@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import type { SpeakerPreset } from "./types";
|
|
4
|
+
import {
|
|
5
|
+
type RefAudioTokens,
|
|
6
|
+
readVoicePresetFile,
|
|
7
|
+
type VoicePresetSeedPhrase,
|
|
8
|
+
} from "./voice-preset-format";
|
|
9
|
+
|
|
10
|
+
export interface PresetBundlePaths {
|
|
11
|
+
bundleRoot: string;
|
|
12
|
+
/** Relative path to the *default* voice preset inside the bundle. */
|
|
13
|
+
cacheRelPath?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface LoadedPresetBundle {
|
|
17
|
+
preset: SpeakerPreset;
|
|
18
|
+
/** Phrase-cache seed entries parsed alongside the embedding. The engine
|
|
19
|
+
* bridge feeds these into a `PhraseCache.seed(...)` call before the
|
|
20
|
+
* scheduler is constructed. */
|
|
21
|
+
phrases: ReadonlyArray<VoicePresetSeedPhrase>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface SpeakerPresetCacheOptions {
|
|
25
|
+
/**
|
|
26
|
+
* Maximum number of distinct voices held in RAM at once. Speaker presets
|
|
27
|
+
* are KB-scale (an embedding vector + the seed-phrase PCM), so the default
|
|
28
|
+
* is generous; the bound exists so a connector that switches voices per
|
|
29
|
+
* speaker (e.g. a Discord room with many users) does not grow unbounded.
|
|
30
|
+
* On insert past the bound the least-recently-used voice is evicted.
|
|
31
|
+
*
|
|
32
|
+
* Defaults to 8 hot voices (R6 §1: "LRU 8 hot, mmap-backed").
|
|
33
|
+
*/
|
|
34
|
+
maxVoices?: number;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export const DEFAULT_VOICE_ID = "default";
|
|
38
|
+
const DEFAULT_MAX_VOICES = 8;
|
|
39
|
+
|
|
40
|
+
export const DEFAULT_VOICE_PRESET_REL_PATH = path.join(
|
|
41
|
+
"cache",
|
|
42
|
+
"voice-preset-default.bin",
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Resolve the on-disk path of a voice preset inside a bundle. The default
|
|
47
|
+
* voice lives at `cache/voice-preset-default.bin`; additional voices ship as
|
|
48
|
+
* `cache/voice-preset-<voiceId>.bin`. Throws on a `voiceId` that is not a safe
|
|
49
|
+
* single path segment (no `/`, no `..`).
|
|
50
|
+
*/
|
|
51
|
+
export function voicePresetPath(bundleRoot: string, voiceId: string): string {
|
|
52
|
+
if (voiceId === DEFAULT_VOICE_ID) {
|
|
53
|
+
return path.join(bundleRoot, DEFAULT_VOICE_PRESET_REL_PATH);
|
|
54
|
+
}
|
|
55
|
+
if (!/^[A-Za-z0-9._-]+$/.test(voiceId) || voiceId.includes("..")) {
|
|
56
|
+
throw new Error(
|
|
57
|
+
`[voice] Invalid voiceId ${JSON.stringify(voiceId)} — must be a single path-safe segment.`,
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
return path.join(bundleRoot, "cache", `voice-preset-${voiceId}.bin`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
interface CacheEntry {
|
|
64
|
+
preset: SpeakerPreset;
|
|
65
|
+
phrases: ReadonlyArray<VoicePresetSeedPhrase>;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* LRU cache of parsed speaker presets keyed by `voiceId`. Holds the speaker
|
|
70
|
+
* embedding, the raw preset bytes (for FFI handoff), and the phrase-cache seed
|
|
71
|
+
* list parsed from the preset file. Multi-voice: `load(bundleRoot, voiceId)`
|
|
72
|
+
* reads `cache/voice-preset-<voiceId>.bin` from the bundle on a miss.
|
|
73
|
+
*
|
|
74
|
+
* v2 preset fields (`refAudioTokens`, `refText`, `instruct`) are surfaced
|
|
75
|
+
* on the `SpeakerPreset` shape so the FFI bridge can pass them through to
|
|
76
|
+
* `ov_tts_params` without going through the legacy "instruct == voiceId"
|
|
77
|
+
* misreading.
|
|
78
|
+
*/
|
|
79
|
+
export class SpeakerPresetCache {
|
|
80
|
+
// `Map` preserves insertion order; we re-insert on access so the first key
|
|
81
|
+
// is always the least-recently-used.
|
|
82
|
+
private readonly entries = new Map<string, CacheEntry>();
|
|
83
|
+
private readonly maxVoices: number;
|
|
84
|
+
|
|
85
|
+
constructor(opts: SpeakerPresetCacheOptions = {}) {
|
|
86
|
+
this.maxVoices = Math.max(
|
|
87
|
+
1,
|
|
88
|
+
Math.floor(opts.maxVoices ?? DEFAULT_MAX_VOICES),
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Load the bundle's default voice preset (`cache/voice-preset-default.bin`,
|
|
94
|
+
* or `paths.cacheRelPath` if overridden) and return both the speaker
|
|
95
|
+
* embedding and the phrase-cache seed entries. Cached for subsequent
|
|
96
|
+
* `get("default")` lookups (and marked most-recently-used).
|
|
97
|
+
*/
|
|
98
|
+
loadFromBundle(
|
|
99
|
+
paths: PresetBundlePaths,
|
|
100
|
+
voiceId = DEFAULT_VOICE_ID,
|
|
101
|
+
): LoadedPresetBundle {
|
|
102
|
+
const rel =
|
|
103
|
+
paths.cacheRelPath ??
|
|
104
|
+
(voiceId === DEFAULT_VOICE_ID
|
|
105
|
+
? DEFAULT_VOICE_PRESET_REL_PATH
|
|
106
|
+
: path.join("cache", `voice-preset-${voiceId}.bin`));
|
|
107
|
+
return this.loadFile(path.join(paths.bundleRoot, rel), voiceId);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Load an arbitrary voice by id from a bundle root, reading
|
|
112
|
+
* `cache/voice-preset-<voiceId>.bin` (or `cache/voice-preset-default.bin`
|
|
113
|
+
* for `"default"`). Returns the cached entry on a hit (marked MRU).
|
|
114
|
+
*/
|
|
115
|
+
load(bundleRoot: string, voiceId: string): LoadedPresetBundle {
|
|
116
|
+
return this.loadFile(voicePresetPath(bundleRoot, voiceId), voiceId);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/** True if `voiceId` is currently resident in the cache. */
|
|
120
|
+
has(voiceId: string): boolean {
|
|
121
|
+
return this.entries.has(voiceId);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
put(preset: SpeakerPreset): void {
|
|
125
|
+
const existing = this.entries.get(preset.voiceId);
|
|
126
|
+
this.entries.delete(preset.voiceId);
|
|
127
|
+
this.entries.set(preset.voiceId, {
|
|
128
|
+
preset,
|
|
129
|
+
phrases: existing?.phrases ?? [],
|
|
130
|
+
});
|
|
131
|
+
this.evictOverflow();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
get(voiceId: string): SpeakerPreset | undefined {
|
|
135
|
+
const entry = this.entries.get(voiceId);
|
|
136
|
+
if (!entry) return undefined;
|
|
137
|
+
this.entries.delete(voiceId);
|
|
138
|
+
this.entries.set(voiceId, entry);
|
|
139
|
+
return entry.preset;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/** Seed entries previously loaded for a voice, if any (does not touch LRU order). */
|
|
143
|
+
getSeed(voiceId: string): ReadonlyArray<VoicePresetSeedPhrase> {
|
|
144
|
+
return this.entries.get(voiceId)?.phrases ?? [];
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/** Number of voices currently resident. */
|
|
148
|
+
size(): number {
|
|
149
|
+
return this.entries.size;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** Drop every cached preset. */
|
|
153
|
+
clear(): void {
|
|
154
|
+
this.entries.clear();
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
private loadFile(fullPath: string, voiceId: string): LoadedPresetBundle {
|
|
158
|
+
const cached = this.entries.get(voiceId);
|
|
159
|
+
if (cached) {
|
|
160
|
+
this.entries.delete(voiceId);
|
|
161
|
+
this.entries.set(voiceId, cached);
|
|
162
|
+
return { preset: cached.preset, phrases: cached.phrases };
|
|
163
|
+
}
|
|
164
|
+
if (!existsSync(fullPath)) {
|
|
165
|
+
throw new Error(
|
|
166
|
+
`[voice] Speaker preset for voice ${JSON.stringify(voiceId)} not found at ${fullPath}.`,
|
|
167
|
+
);
|
|
168
|
+
}
|
|
169
|
+
const bytes = new Uint8Array(readFileSync(fullPath));
|
|
170
|
+
const parsed = readVoicePresetFile(bytes);
|
|
171
|
+
const refTokens: RefAudioTokens = parsed.refAudioTokens;
|
|
172
|
+
const preset: SpeakerPreset = {
|
|
173
|
+
voiceId,
|
|
174
|
+
embedding: parsed.embedding,
|
|
175
|
+
bytes,
|
|
176
|
+
version: parsed.version,
|
|
177
|
+
refAudioTokens: refTokens,
|
|
178
|
+
refText: parsed.refText,
|
|
179
|
+
instruct: parsed.instruct,
|
|
180
|
+
metadata: parsed.metadata,
|
|
181
|
+
};
|
|
182
|
+
const entry: CacheEntry = { preset, phrases: parsed.phrases };
|
|
183
|
+
this.entries.set(voiceId, entry);
|
|
184
|
+
this.evictOverflow();
|
|
185
|
+
return { preset, phrases: parsed.phrases };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
private evictOverflow(): void {
|
|
189
|
+
while (this.entries.size > this.maxVoices) {
|
|
190
|
+
const lru = this.entries.keys().next().value;
|
|
191
|
+
if (lru === undefined) return;
|
|
192
|
+
this.entries.delete(lru);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming-ASR pipeline adapter (item A1 / W7).
|
|
3
|
+
*
|
|
4
|
+
* The base `VoicePipeline` in `voice/pipeline.ts` drives a `StreamingTranscriber`
|
|
5
|
+
* as a batch: it pushes the WHOLE VAD-gated utterance buffer in a single
|
|
6
|
+
* `feed()` call, awaits `flush()`, and only then splits the final transcript
|
|
7
|
+
* into tokens for the drafter/verifier loop. That works for the fused
|
|
8
|
+
* batch decoder, but it leaves the biggest
|
|
9
|
+
* H2 UX seam (incremental partials → planner / barge-in word-confirm /
|
|
10
|
+
* speculative-on-pause) untapped when the fused build is in streaming mode.
|
|
11
|
+
*
|
|
12
|
+
* Rather than rewrite `pipeline.ts` (the Phase 0/1 agent owns it), this
|
|
13
|
+
* module is a small WRAPPER the engine bridge can use to deliver PCM
|
|
14
|
+
* chunks to a `StreamingTranscriber` as they arrive from the mic / VAD,
|
|
15
|
+
* surface every `partial` event to the turn controller via `onPartial`,
|
|
16
|
+
* and only finalize on `speech-end`. Behind a flag — when the fused
|
|
17
|
+
* library advertises `asrStreamSupported() === false` or the flag is off
|
|
18
|
+
* the caller keeps using `VoicePipeline.transcribeAll` exactly as today.
|
|
19
|
+
*
|
|
20
|
+
* Integration point (documented for the Phase 0/1 agent so they can wire
|
|
21
|
+
* it without merge friction):
|
|
22
|
+
*
|
|
23
|
+
* 1. `EngineVoiceBridge` decides whether streaming is available via
|
|
24
|
+
* `pickStreamingMode({ ffi, asrBundlePresent, flag })`. When the
|
|
25
|
+
* mode is `"streaming"`, the bridge constructs `StreamingAsrFeeder`
|
|
26
|
+
* once per turn (passing the same transcriber that would have been
|
|
27
|
+
* handed to `VoicePipeline`) and routes mic PCM frames through
|
|
28
|
+
* `feeder.feedFrame(frame)` instead of buffering them.
|
|
29
|
+
* 2. The feeder forwards every transcriber `partial` event to
|
|
30
|
+
* `onPartial(update)`. When VAD reports `speech-end` the caller
|
|
31
|
+
* calls `await feeder.finalize()`; the returned `TranscriptUpdate`
|
|
32
|
+
* is the final and is used to seed the drafter/verifier loop exactly
|
|
33
|
+
* as before (`splitTranscriptToTokens(final.partial, 0, final.tokens)`).
|
|
34
|
+
* 3. The batch path (`VoicePipeline.transcribeAll`) is unchanged for
|
|
35
|
+
* every other adapter — there is no fork in `pipeline.ts` itself.
|
|
36
|
+
*
|
|
37
|
+
* This file is intentionally small and side-effect-free so it can land
|
|
38
|
+
* during the merge window without touching files other agents own.
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
import { splitTranscriptToTokens } from "../pipeline";
|
|
42
|
+
import type {
|
|
43
|
+
PcmFrame,
|
|
44
|
+
StreamingTranscriber,
|
|
45
|
+
TextToken,
|
|
46
|
+
TranscriptUpdate,
|
|
47
|
+
} from "../types";
|
|
48
|
+
|
|
49
|
+
/* ==================================================================== *
|
|
50
|
+
* LocalAgreementBuffer — word-level streaming-ASR partial stabilizer.
|
|
51
|
+
*
|
|
52
|
+
* Streaming ASR emits a fresh word-sequence hypothesis on every audio
|
|
53
|
+
* frame. Individual words near the end of the hypothesis can change
|
|
54
|
+
* across frames ("sat" → "cap" → "sat") before settling. This buffer
|
|
55
|
+
* applies LocalAgreement-n (n=2 default) at the word level: a word is
|
|
56
|
+
* emitted to downstream only when it appears at the same position in n
|
|
57
|
+
* consecutive hypotheses. The committed stable prefix is monotonically
|
|
58
|
+
* non-decreasing — once a word is committed it is never retracted.
|
|
59
|
+
*
|
|
60
|
+
* Word-level (not character-level): suited for the VAD pipeline adapter
|
|
61
|
+
* where downstream consumers (drafter, verifier) operate on word tokens.
|
|
62
|
+
* For the character-level prefix variant, see `partial-stabilizer.ts`.
|
|
63
|
+
* ==================================================================== */
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* LocalAgreement-n word-level partial stabilizer.
|
|
67
|
+
*
|
|
68
|
+
* Usage:
|
|
69
|
+
* const buf = new LocalAgreementBuffer();
|
|
70
|
+
* const stable = buf.stable(["hello", "there", "world"]);
|
|
71
|
+
* // → [] on first call (need n=2 consecutive identical prefix)
|
|
72
|
+
* const stable2 = buf.stable(["hello", "there", "how"]);
|
|
73
|
+
* // → ["hello", "there"] (matched across two consecutive hypotheses)
|
|
74
|
+
*/
|
|
75
|
+
export class LocalAgreementBuffer {
|
|
76
|
+
private readonly n: number;
|
|
77
|
+
/** Rolling window of the last `n` hypotheses, oldest first. */
|
|
78
|
+
private window: string[][] = [];
|
|
79
|
+
/** Monotonically growing committed word list. */
|
|
80
|
+
private committed: string[] = [];
|
|
81
|
+
|
|
82
|
+
constructor(n = 2) {
|
|
83
|
+
if (!Number.isFinite(n) || n < 1) {
|
|
84
|
+
throw new Error(
|
|
85
|
+
`[LocalAgreementBuffer] n must be a finite integer >= 1; got ${String(n)}`,
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
this.n = Math.floor(n);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Feed the latest word-level hypothesis. Returns the stable committed
|
|
93
|
+
* prefix — the longest leading word sequence that has appeared
|
|
94
|
+
* identically in `n` consecutive calls. Monotonically non-decreasing.
|
|
95
|
+
*
|
|
96
|
+
* A rolling window of the last `n` hypotheses is maintained. Once the
|
|
97
|
+
* window is full, the agreed prefix is the intersection across all `n`
|
|
98
|
+
* entries — word i is in the agreed prefix only if it is identical in
|
|
99
|
+
* every hypothesis in the window.
|
|
100
|
+
*/
|
|
101
|
+
stable(current: string[]): string[] {
|
|
102
|
+
this.window.push(current);
|
|
103
|
+
if (this.window.length > this.n) {
|
|
104
|
+
this.window.shift();
|
|
105
|
+
}
|
|
106
|
+
// Need a full window of `n` hypotheses before any word can be agreed.
|
|
107
|
+
if (this.window.length < this.n) {
|
|
108
|
+
return this.committed;
|
|
109
|
+
}
|
|
110
|
+
// Intersect: the agreed prefix is the longest common leading prefix
|
|
111
|
+
// across all entries in the window.
|
|
112
|
+
const first = this.window[0];
|
|
113
|
+
if (!first) {
|
|
114
|
+
throw new Error("hypothesis window unexpectedly empty");
|
|
115
|
+
}
|
|
116
|
+
let agreedLen = first.length;
|
|
117
|
+
for (let i = 1; i < this.window.length; i++) {
|
|
118
|
+
const h = this.window[i];
|
|
119
|
+
if (!h) {
|
|
120
|
+
throw new Error(`missing hypothesis at index ${i}`);
|
|
121
|
+
}
|
|
122
|
+
let matchLen = 0;
|
|
123
|
+
const limit = Math.min(agreedLen, h.length);
|
|
124
|
+
for (let j = 0; j < limit; j++) {
|
|
125
|
+
if (first[j] === h[j]) matchLen++;
|
|
126
|
+
else break;
|
|
127
|
+
}
|
|
128
|
+
agreedLen = matchLen;
|
|
129
|
+
if (agreedLen === 0) break;
|
|
130
|
+
}
|
|
131
|
+
// Extend committed if the new agreement is longer.
|
|
132
|
+
if (agreedLen > this.committed.length) {
|
|
133
|
+
this.committed = first.slice(0, agreedLen);
|
|
134
|
+
}
|
|
135
|
+
return this.committed;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/** Clear all state. Call at utterance boundaries. */
|
|
139
|
+
reset(): void {
|
|
140
|
+
this.window = [];
|
|
141
|
+
this.committed = [];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/** The current committed stable word list (read-only view). */
|
|
145
|
+
getCommitted(): string[] {
|
|
146
|
+
return this.committed;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/** Available transcription drive modes. */
|
|
151
|
+
export type StreamingPipelineMode = "streaming" | "batch";
|
|
152
|
+
|
|
153
|
+
export interface PickStreamingModeArgs {
|
|
154
|
+
/** True only when the loaded fused library advertises a working streaming decoder. */
|
|
155
|
+
ffiSupportsStreaming: boolean;
|
|
156
|
+
/** True only when the bundled ASR model is present on disk. */
|
|
157
|
+
asrBundlePresent: boolean;
|
|
158
|
+
/**
|
|
159
|
+
* Feature flag — defaults to FALSE so the streaming path stays opt-in
|
|
160
|
+
* until the Phase 0/1 partial-stabilizer wiring lands. Once that lands
|
|
161
|
+
* the engine bridge flips this default to true.
|
|
162
|
+
*/
|
|
163
|
+
enableStreaming: boolean;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Choose the transcription drive mode. Streaming is selected only when:
|
|
168
|
+
* - the loaded fused library advertises a working streaming decoder
|
|
169
|
+
* (`asr_stream_supported() === 1`), AND
|
|
170
|
+
* - the bundled ASR model is present, AND
|
|
171
|
+
* - the engine bridge has opted in via `enableStreaming`.
|
|
172
|
+
*
|
|
173
|
+
* Any other combination falls back to the existing batch path
|
|
174
|
+
* (`VoicePipeline.transcribeAll`).
|
|
175
|
+
*/
|
|
176
|
+
export function pickStreamingMode(
|
|
177
|
+
args: PickStreamingModeArgs,
|
|
178
|
+
): StreamingPipelineMode {
|
|
179
|
+
if (!args.enableStreaming) return "batch";
|
|
180
|
+
if (!args.ffiSupportsStreaming) return "batch";
|
|
181
|
+
if (!args.asrBundlePresent) return "batch";
|
|
182
|
+
return "streaming";
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export interface StreamingAsrFeederEvents {
|
|
186
|
+
/**
|
|
187
|
+
* Called for every transcriber `partial` event the feeder observes
|
|
188
|
+
* BEFORE the segment is finalized. Includes the running `partial`
|
|
189
|
+
* text, `isFinal: false`, and (when the fused build supplied them)
|
|
190
|
+
* the shared text-model token ids.
|
|
191
|
+
*/
|
|
192
|
+
onPartial?(update: TranscriptUpdate): void;
|
|
193
|
+
/**
|
|
194
|
+
* Called the first time ≥1 real word is recognized in the segment.
|
|
195
|
+
* Wired into the turn controller's word-confirm gate so the agent
|
|
196
|
+
* only barge-in-cancels on real speech, not blips.
|
|
197
|
+
*/
|
|
198
|
+
onWords?(words: ReadonlyArray<string>): void;
|
|
199
|
+
/**
|
|
200
|
+
* Called once, after `finalize()` returns, with the final transcript
|
|
201
|
+
* split into contiguous text tokens (`splitTranscriptToTokens`). The
|
|
202
|
+
* batch path delivers the same shape via `transcribeAll`, so the
|
|
203
|
+
* downstream drafter/verifier loop sees an identical signal.
|
|
204
|
+
*/
|
|
205
|
+
onFinalTokens?(
|
|
206
|
+
tokens: ReadonlyArray<TextToken>,
|
|
207
|
+
final: TranscriptUpdate,
|
|
208
|
+
): void;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Drives a `StreamingTranscriber` chunk-by-chunk on behalf of the engine
|
|
213
|
+
* bridge / turn controller. One instance per active speech segment;
|
|
214
|
+
* `finalize()` returns the final transcript and the feeder is disposed.
|
|
215
|
+
*
|
|
216
|
+
* Construction takes a `StreamingTranscriber` (already constructed via
|
|
217
|
+
* `createStreamingTranscriber` with the same options used for batch).
|
|
218
|
+
* The feeder does NOT own the transcriber's lifecycle — disposal still
|
|
219
|
+
* runs through the engine bridge so the same path is used when the
|
|
220
|
+
* batch fallback is taken.
|
|
221
|
+
*/
|
|
222
|
+
export class StreamingAsrFeeder {
|
|
223
|
+
private readonly transcriber: StreamingTranscriber;
|
|
224
|
+
private readonly events: StreamingAsrFeederEvents;
|
|
225
|
+
private latestPartial: TranscriptUpdate | null = null;
|
|
226
|
+
private finalized = false;
|
|
227
|
+
private unsubscribe: (() => void) | null = null;
|
|
228
|
+
|
|
229
|
+
constructor(args: {
|
|
230
|
+
transcriber: StreamingTranscriber;
|
|
231
|
+
events?: StreamingAsrFeederEvents;
|
|
232
|
+
}) {
|
|
233
|
+
this.transcriber = args.transcriber;
|
|
234
|
+
this.events = args.events ?? {};
|
|
235
|
+
this.unsubscribe = this.transcriber.on((event) => {
|
|
236
|
+
switch (event.kind) {
|
|
237
|
+
case "partial":
|
|
238
|
+
this.latestPartial = event.update;
|
|
239
|
+
this.events.onPartial?.(event.update);
|
|
240
|
+
break;
|
|
241
|
+
case "words":
|
|
242
|
+
this.events.onWords?.(event.words);
|
|
243
|
+
break;
|
|
244
|
+
case "final":
|
|
245
|
+
// Final events are surfaced via `finalize()`'s return value so
|
|
246
|
+
// the caller has a single point of truth. We do not re-emit
|
|
247
|
+
// them here.
|
|
248
|
+
break;
|
|
249
|
+
}
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Feed one PCM frame as it arrives from the mic / connector. Drops
|
|
255
|
+
* frames received after `finalize()` (the segment is over).
|
|
256
|
+
*/
|
|
257
|
+
feedFrame(frame: PcmFrame): void {
|
|
258
|
+
if (this.finalized) return;
|
|
259
|
+
this.transcriber.feed(frame);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Force-finalize on `speech-end`. Resolves with the final transcript
|
|
264
|
+
* and emits `onFinalTokens` so the caller can seed the drafter /
|
|
265
|
+
* verifier loop without re-running the surface split itself.
|
|
266
|
+
*
|
|
267
|
+
* Calling `finalize()` twice is a hard error — the segment is over.
|
|
268
|
+
*/
|
|
269
|
+
async finalize(): Promise<TranscriptUpdate> {
|
|
270
|
+
if (this.finalized) {
|
|
271
|
+
throw new Error(
|
|
272
|
+
"[streaming-asr] finalize() called twice on the same feeder",
|
|
273
|
+
);
|
|
274
|
+
}
|
|
275
|
+
this.finalized = true;
|
|
276
|
+
const final = await this.transcriber.flush();
|
|
277
|
+
const tokens = splitTranscriptToTokens(final.partial, 0, final.tokens);
|
|
278
|
+
this.events.onFinalTokens?.(tokens, final);
|
|
279
|
+
return final;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/** The most recent `partial` snapshot observed, or `null` until the first decode lands. */
|
|
283
|
+
getLatestPartial(): TranscriptUpdate | null {
|
|
284
|
+
return this.latestPartial;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/** Detach the transcriber subscription. Does NOT dispose the transcriber itself. */
|
|
288
|
+
dispose(): void {
|
|
289
|
+
this.unsubscribe?.();
|
|
290
|
+
this.unsubscribe = null;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System audio sinks for the interactive voice harness.
|
|
3
|
+
*
|
|
4
|
+
* The voice scheduler writes synthesized PCM (`Float32Array` mono in
|
|
5
|
+
* [-1, 1] at the bridge sample rate) into an {@link AudioSink}. For tests
|
|
6
|
+
* and headless runs `InMemoryAudioSink` (in `./ring-buffer`) captures the
|
|
7
|
+
* samples; for an interactive session the harness needs the audio to
|
|
8
|
+
* actually come out of the speakers.
|
|
9
|
+
*
|
|
10
|
+
* `SystemAudioSink` shells out to a long-lived player that reads raw 16-bit
|
|
11
|
+
* signed-LE PCM on stdin. Per-platform selection (priority order):
|
|
12
|
+
* - Linux: `aplay` (alsa-utils), else `paplay` (PulseAudio), else
|
|
13
|
+
* `play`/`sox` (sox), else `ffplay` (ffmpeg).
|
|
14
|
+
* - macOS: `play`/`sox` (sox), else `ffplay` (ffmpeg). `afplay` needs a
|
|
15
|
+
* file (no stdin) so it cannot be used for streaming.
|
|
16
|
+
* - Windows: `ffplay` (ffmpeg), else `play`/`sox` if installed. PowerShell's
|
|
17
|
+
* `Media.SoundPlayer` also needs a file, not a stream, so it's not
|
|
18
|
+
* used here — the renderer's `AudioContext` path (feeding nothing
|
|
19
|
+
* here; the renderer plays directly) is the no-ffmpeg route.
|
|
20
|
+
* If no player is on `PATH`, `available()` returns false and the harness
|
|
21
|
+
* falls back to `WavFileAudioSink` (writes a rolling WAV) — never silence.
|
|
22
|
+
*
|
|
23
|
+
* `WavFileAudioSink` accumulates everything written and serializes a
|
|
24
|
+
* single mono PCM16 WAV on `finalize()` — used by `--no-audio` and as the
|
|
25
|
+
* no-player fallback.
|
|
26
|
+
*/
|
|
27
|
+
import type { AudioSink } from "./types";
|
|
28
|
+
/**
|
|
29
|
+
* Exported view of {@link resolvePlayer} for the cross-platform preflight
|
|
30
|
+
* (`voice:interactive --platform-report`). Returns the bare program name
|
|
31
|
+
* (no args) the host would stream synthesized audio to, or `null`.
|
|
32
|
+
*/
|
|
33
|
+
export declare function resolveSystemPlayerName(sampleRate?: number): string | null;
|
|
34
|
+
export interface SystemAudioSinkOptions {
|
|
35
|
+
sampleRate: number;
|
|
36
|
+
}
|
|
37
|
+
export declare class SystemAudioSink implements AudioSink {
|
|
38
|
+
private readonly sampleRate;
|
|
39
|
+
private readonly playerSpec;
|
|
40
|
+
private proc;
|
|
41
|
+
private buffered;
|
|
42
|
+
constructor(opts: SystemAudioSinkOptions);
|
|
43
|
+
available(): boolean;
|
|
44
|
+
player(): string;
|
|
45
|
+
private ensureProc;
|
|
46
|
+
write(pcm: Float32Array, _sampleRate: number): void;
|
|
47
|
+
drain(): void;
|
|
48
|
+
bufferedSamples(): number;
|
|
49
|
+
/** Flush + close the player. Idempotent. */
|
|
50
|
+
dispose(): Promise<void>;
|
|
51
|
+
}
|
|
52
|
+
export interface WavFileAudioSinkOptions {
|
|
53
|
+
sampleRate: number;
|
|
54
|
+
filePath: string;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Accumulates all written PCM and serializes a single mono PCM16 WAV on
|
|
58
|
+
* {@link finalize}. Used by `--no-audio` and as the no-player fallback so
|
|
59
|
+
* a headless run still produces an inspectable artifact (never silence).
|
|
60
|
+
*/
|
|
61
|
+
export declare class WavFileAudioSink implements AudioSink {
|
|
62
|
+
private readonly sampleRate;
|
|
63
|
+
private readonly filePath;
|
|
64
|
+
private readonly chunks;
|
|
65
|
+
private buffered;
|
|
66
|
+
constructor(opts: WavFileAudioSinkOptions);
|
|
67
|
+
write(pcm: Float32Array, _sampleRate: number): void;
|
|
68
|
+
drain(): void;
|
|
69
|
+
bufferedSamples(): number;
|
|
70
|
+
totalSamples(): number;
|
|
71
|
+
finalize(): Promise<string>;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=system-audio-sink.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"system-audio-sink.d.ts","sourceRoot":"","sources":["system-audio-sink.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAQH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoIzC;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,UAAU,SAAS,GAAG,MAAM,GAAG,IAAI,CAG1E;AAED,MAAM,WAAW,sBAAsB;IACtC,UAAU,EAAE,MAAM,CAAC;CACnB;AAWD,qBAAa,eAAgB,YAAW,SAAS;IAChD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAyC;IACpE,OAAO,CAAC,IAAI,CAAiC;IAC7C,OAAO,CAAC,QAAQ,CAAK;gBAET,IAAI,EAAE,sBAAsB;IAKxC,SAAS,IAAI,OAAO;IAIpB,MAAM,IAAI,MAAM;IAIhB,OAAO,CAAC,UAAU;IAmBlB,KAAK,CAAC,GAAG,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,GAAG,IAAI;IAUnD,KAAK,IAAI,IAAI;IAmBb,eAAe,IAAI,MAAM;IAIzB,4CAA4C;IACtC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CA6B9B;AAED,MAAM,WAAW,uBAAuB;IACvC,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED;;;;GAIG;AACH,qBAAa,gBAAiB,YAAW,SAAS;IACjD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAC7C,OAAO,CAAC,QAAQ,CAAK;gBAET,IAAI,EAAE,uBAAuB;IAKzC,KAAK,CAAC,GAAG,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,GAAG,IAAI;IAKnD,KAAK,IAAI,IAAI;IAMb,eAAe,IAAI,MAAM;IAIzB,YAAY,IAAI,MAAM;IAMhB,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC;CAgCjC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { mkdtempSync, readFileSync, rmSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { describe, expect, it } from "vitest";
|
|
5
|
+
import { WavFileAudioSink } from "./system-audio-sink";
|
|
6
|
+
|
|
7
|
+
describe("WavFileAudioSink", () => {
|
|
8
|
+
it("drain resets buffered samples without dropping artifact chunks", async () => {
|
|
9
|
+
const dir = mkdtempSync(path.join(tmpdir(), "eliza-audio-sink-"));
|
|
10
|
+
const filePath = path.join(dir, "out.wav");
|
|
11
|
+
try {
|
|
12
|
+
const sink = new WavFileAudioSink({ sampleRate: 24_000, filePath });
|
|
13
|
+
sink.write(new Float32Array([0.1, 0.2, 0.3]), 24_000);
|
|
14
|
+
expect(sink.bufferedSamples()).toBe(3);
|
|
15
|
+
|
|
16
|
+
sink.drain();
|
|
17
|
+
expect(sink.bufferedSamples()).toBe(0);
|
|
18
|
+
await sink.finalize();
|
|
19
|
+
|
|
20
|
+
const wav = readFileSync(filePath);
|
|
21
|
+
expect(wav.subarray(0, 4).toString("ascii")).toBe("RIFF");
|
|
22
|
+
expect(wav.subarray(8, 12).toString("ascii")).toBe("WAVE");
|
|
23
|
+
expect(wav.readUInt32LE(40)).toBe(6);
|
|
24
|
+
expect(wav.length).toBe(50);
|
|
25
|
+
} finally {
|
|
26
|
+
rmSync(dir, { recursive: true, force: true });
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
});
|