@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice on/off state machine.
|
|
3
|
+
*
|
|
4
|
+
* Per `packages/inference/AGENTS.md` §4 + this scope's design goals,
|
|
5
|
+
* voice is OFF by default. Text + native MTP are hot; TTS, ASR, the
|
|
6
|
+
* speaker preset cache and phrase cache, the chunker, the rollback
|
|
7
|
+
* queue, the barge-in controller, and the ring buffer are NOT in RAM.
|
|
8
|
+
*
|
|
9
|
+
* Transitions are explicit. Illegal transitions throw — no
|
|
10
|
+
* "log-and-continue" (AGENTS.md §9). The transition to `voice-off`
|
|
11
|
+
* MUST issue a real page-eviction call on the TTS/ASR mmap regions
|
|
12
|
+
* (see `MmapRegionHandle.evictPages()` in `shared-resources.ts`) so
|
|
13
|
+
* the OS can reclaim those pages.
|
|
14
|
+
*
|
|
15
|
+
* ┌──────────┐ start() ┌──────────────┐ armed ┌──────────┐
|
|
16
|
+
* │ voice-off│─────────▶│ voice-arming │────────▶│ voice-on │
|
|
17
|
+
* └──────────┘ └──────────────┘ └──────────┘
|
|
18
|
+
* ▲ │ start fails │ stop()
|
|
19
|
+
* │ disarmed ▼ ▼
|
|
20
|
+
* ┌──────────────────┐ ┌──────────────┐ ┌────────────────────┐
|
|
21
|
+
* │ voice-disarming │◀───│ voice-error │ │ voice-disarming │
|
|
22
|
+
* └──────────────────┘ └──────────────┘ └────────────────────┘
|
|
23
|
+
* │ │
|
|
24
|
+
* └────────────────── disarmed ◀───────────────┘
|
|
25
|
+
*
|
|
26
|
+
* `voice-error` is terminal until `reset()` is called. There is no
|
|
27
|
+
* automatic retry: a missing kernel, mmap fail, or RAM-pressure
|
|
28
|
+
* refusal MUST surface to the caller.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import type {
|
|
32
|
+
KernelSet,
|
|
33
|
+
MmapRegionHandle,
|
|
34
|
+
MtpDraftHandle,
|
|
35
|
+
RefCountedResource,
|
|
36
|
+
SchedulerSlot,
|
|
37
|
+
SharedResourceRegistry,
|
|
38
|
+
SharedTokenizer,
|
|
39
|
+
} from "./shared-resources";
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Discriminated union — never widened to `string`. Each state may carry
|
|
43
|
+
* payload (the `armed` payload includes the loaded mmap regions so the
|
|
44
|
+
* disarm path can call `evictPages()` on them).
|
|
45
|
+
*/
|
|
46
|
+
export type VoiceLifecycleState =
|
|
47
|
+
| { readonly kind: "voice-off" }
|
|
48
|
+
| { readonly kind: "voice-arming" }
|
|
49
|
+
| { readonly kind: "voice-on"; readonly resources: ArmedResources }
|
|
50
|
+
| { readonly kind: "voice-disarming"; readonly resources: ArmedResources }
|
|
51
|
+
| { readonly kind: "voice-error"; readonly error: VoiceLifecycleError };
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Resources held while voice is armed. Released in reverse order on
|
|
55
|
+
* disarm; the mmap regions get an explicit `evictPages()` call before
|
|
56
|
+
* `release()` so the OS reclaims pages even if the FFI keeps the file
|
|
57
|
+
* descriptor open for the next re-arm.
|
|
58
|
+
*/
|
|
59
|
+
export interface ArmedResources {
|
|
60
|
+
readonly tts: MmapRegionHandle;
|
|
61
|
+
readonly asr: MmapRegionHandle;
|
|
62
|
+
/** Speaker preset + phrase cache — kept in a small LRU after disarm. */
|
|
63
|
+
readonly voiceCaches: RefCountedResource;
|
|
64
|
+
/** Voice-specific scheduler nodes (chunker, rollback, ring buffer, barge-in). */
|
|
65
|
+
readonly voiceSchedulerNodes: RefCountedResource;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Resources held while text is up. Acquired by the engine when the
|
|
70
|
+
* bundle is activated; voice piggy-backs on these without re-loading.
|
|
71
|
+
*/
|
|
72
|
+
export interface TextResources {
|
|
73
|
+
readonly tokenizer: SharedTokenizer;
|
|
74
|
+
readonly textWeights: MmapRegionHandle;
|
|
75
|
+
readonly kernels: KernelSet;
|
|
76
|
+
readonly scheduler: SchedulerSlot;
|
|
77
|
+
readonly mtp: MtpDraftHandle;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Structured failure surfaced to the caller. Never a generic `Error` —
|
|
82
|
+
* the caller (engine + UI) needs to distinguish RAM pressure from a
|
|
83
|
+
* missing kernel from a manifest mismatch (AGENTS.md §3).
|
|
84
|
+
*/
|
|
85
|
+
export class VoiceLifecycleError extends Error {
|
|
86
|
+
readonly code:
|
|
87
|
+
| "ram-pressure"
|
|
88
|
+
| "mmap-fail"
|
|
89
|
+
| "kernel-missing"
|
|
90
|
+
| "illegal-transition"
|
|
91
|
+
| "arm-failed"
|
|
92
|
+
| "disarm-failed";
|
|
93
|
+
|
|
94
|
+
constructor(code: VoiceLifecycleError["code"], message: string) {
|
|
95
|
+
super(message);
|
|
96
|
+
this.name = "VoiceLifecycleError";
|
|
97
|
+
this.code = code;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Loader functions injected at construction. Splitting these out keeps
|
|
103
|
+
* `VoiceLifecycle` independent of FFI specifics — the engine wires real
|
|
104
|
+
* loaders at runtime; tests inject mocks. Each loader MUST throw on
|
|
105
|
+
* failure (AGENTS.md §3 — no silent fallback).
|
|
106
|
+
*/
|
|
107
|
+
export interface VoiceLifecycleLoaders {
|
|
108
|
+
loadTtsRegion(): Promise<MmapRegionHandle>;
|
|
109
|
+
loadAsrRegion(): Promise<MmapRegionHandle>;
|
|
110
|
+
loadVoiceCaches(): Promise<RefCountedResource>;
|
|
111
|
+
loadVoiceSchedulerNodes(): Promise<RefCountedResource>;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface VoiceLifecycleEvents {
|
|
115
|
+
onTransition?(prev: VoiceLifecycleState, next: VoiceLifecycleState): void;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export class VoiceLifecycle {
|
|
119
|
+
private state: VoiceLifecycleState = { kind: "voice-off" };
|
|
120
|
+
private readonly registry: SharedResourceRegistry;
|
|
121
|
+
private readonly loaders: VoiceLifecycleLoaders;
|
|
122
|
+
private readonly events: VoiceLifecycleEvents;
|
|
123
|
+
|
|
124
|
+
constructor(args: {
|
|
125
|
+
registry: SharedResourceRegistry;
|
|
126
|
+
loaders: VoiceLifecycleLoaders;
|
|
127
|
+
events?: VoiceLifecycleEvents;
|
|
128
|
+
}) {
|
|
129
|
+
this.registry = args.registry;
|
|
130
|
+
this.loaders = args.loaders;
|
|
131
|
+
this.events = args.events ?? {};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
current(): VoiceLifecycleState {
|
|
135
|
+
return this.state;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Transition `voice-off → voice-arming → voice-on`. Loads TTS + ASR
|
|
140
|
+
* mmap regions, voice caches, voice scheduler nodes. Each load throws
|
|
141
|
+
* on failure; a thrown loader transitions the state to `voice-error`
|
|
142
|
+
* and re-throws so the caller sees the structured cause. No partial
|
|
143
|
+
* arm: either all four resources are held or none are.
|
|
144
|
+
*/
|
|
145
|
+
async arm(): Promise<ArmedResources> {
|
|
146
|
+
if (this.state.kind !== "voice-off") {
|
|
147
|
+
throw new VoiceLifecycleError(
|
|
148
|
+
"illegal-transition",
|
|
149
|
+
`[voice-lifecycle] arm() called in state ${this.state.kind} — must be voice-off`,
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
this.transition({ kind: "voice-arming" });
|
|
153
|
+
|
|
154
|
+
let tts: MmapRegionHandle | null = null;
|
|
155
|
+
let asr: MmapRegionHandle | null = null;
|
|
156
|
+
let voiceCaches: RefCountedResource | null = null;
|
|
157
|
+
let voiceSchedulerNodes: RefCountedResource | null = null;
|
|
158
|
+
try {
|
|
159
|
+
tts = this.registry.acquire(await this.loaders.loadTtsRegion());
|
|
160
|
+
asr = this.registry.acquire(await this.loaders.loadAsrRegion());
|
|
161
|
+
voiceCaches = this.registry.acquire(await this.loaders.loadVoiceCaches());
|
|
162
|
+
voiceSchedulerNodes = this.registry.acquire(
|
|
163
|
+
await this.loaders.loadVoiceSchedulerNodes(),
|
|
164
|
+
);
|
|
165
|
+
} catch (err) {
|
|
166
|
+
// Roll back partial acquisitions before surfacing the error so the
|
|
167
|
+
// registry doesn't leak refs on a failed arm. Evict heavy mmap
|
|
168
|
+
// regions before release; release() only drops the refcount and may
|
|
169
|
+
// intentionally keep file descriptors alive for the next re-page.
|
|
170
|
+
await Promise.allSettled([
|
|
171
|
+
tts?.evictPages() ?? Promise.resolve(),
|
|
172
|
+
asr?.evictPages() ?? Promise.resolve(),
|
|
173
|
+
]);
|
|
174
|
+
const rollback: Array<RefCountedResource | null> = [
|
|
175
|
+
voiceSchedulerNodes,
|
|
176
|
+
voiceCaches,
|
|
177
|
+
asr,
|
|
178
|
+
tts,
|
|
179
|
+
];
|
|
180
|
+
for (const res of rollback) {
|
|
181
|
+
if (res) await this.registry.release(res.id);
|
|
182
|
+
}
|
|
183
|
+
const lifecycleErr = toLifecycleError("arm-failed", err);
|
|
184
|
+
this.transition({ kind: "voice-error", error: lifecycleErr });
|
|
185
|
+
throw lifecycleErr;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const resources: ArmedResources = {
|
|
189
|
+
tts,
|
|
190
|
+
asr,
|
|
191
|
+
voiceCaches,
|
|
192
|
+
voiceSchedulerNodes,
|
|
193
|
+
};
|
|
194
|
+
this.transition({ kind: "voice-on", resources });
|
|
195
|
+
return resources;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Transition `voice-on → voice-disarming → voice-off`. Calls
|
|
200
|
+
* `evictPages()` on the TTS + ASR mmap regions before releasing them
|
|
201
|
+
* so the OS reclaims the pages even if another consumer keeps the
|
|
202
|
+
* file descriptor open. The voice caches stay in the registry as
|
|
203
|
+
* tiny (KB-scale) entries — only the heavy mmap regions get evicted.
|
|
204
|
+
*/
|
|
205
|
+
async disarm(): Promise<void> {
|
|
206
|
+
if (this.state.kind !== "voice-on") {
|
|
207
|
+
throw new VoiceLifecycleError(
|
|
208
|
+
"illegal-transition",
|
|
209
|
+
`[voice-lifecycle] disarm() called in state ${this.state.kind} — must be voice-on`,
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
const resources = this.state.resources;
|
|
213
|
+
this.transition({ kind: "voice-disarming", resources });
|
|
214
|
+
|
|
215
|
+
let evictionFailure: unknown = null;
|
|
216
|
+
// Eviction first — the mmap region is still mapped, the kernel can
|
|
217
|
+
// still drop the pages. If eviction fails we still proceed to
|
|
218
|
+
// release; the failure is captured and re-thrown after release so
|
|
219
|
+
// the registry stays consistent.
|
|
220
|
+
//
|
|
221
|
+
// `evictPages()` on production handles wires through to the
|
|
222
|
+
// `libelizainference` FFI (`ffi.mmapEvict(ctx, "tts" | "asr")`,
|
|
223
|
+
// declared in `tools/omnivoice/include/eliza-inference-ffi.h`).
|
|
224
|
+
// The fused build implements it by tearing down the OmniVoice /
|
|
225
|
+
// ASR model context (`ov_free` + `eliza_free_asr`), which lets
|
|
226
|
+
// the llama.cpp / OmniVoice destructors run their own
|
|
227
|
+
// platform-appropriate unmap (`munmap` on POSIX, `UnmapViewOfFile`
|
|
228
|
+
// on Windows). The TS layer is platform-agnostic — all
|
|
229
|
+
// platform-specific eviction lives in the C ABI. The stub library
|
|
230
|
+
// returns ELIZA_ERR_NOT_IMPLEMENTED, which the binding raises as
|
|
231
|
+
// `VoiceLifecycleError({code:"kernel-missing"})` — this method
|
|
232
|
+
// captures it and re-classifies as `disarm-failed` after release
|
|
233
|
+
// runs (so registry refs don't leak on a bad eviction).
|
|
234
|
+
const evictResults = await Promise.allSettled([
|
|
235
|
+
resources.tts.evictPages(),
|
|
236
|
+
resources.asr.evictPages(),
|
|
237
|
+
]);
|
|
238
|
+
for (const r of evictResults) {
|
|
239
|
+
if (r.status === "rejected" && evictionFailure === null) {
|
|
240
|
+
evictionFailure = r.reason;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Release in reverse acquisition order.
|
|
245
|
+
await this.registry.release(resources.voiceSchedulerNodes.id);
|
|
246
|
+
await this.registry.release(resources.voiceCaches.id);
|
|
247
|
+
await this.registry.release(resources.asr.id);
|
|
248
|
+
await this.registry.release(resources.tts.id);
|
|
249
|
+
|
|
250
|
+
if (evictionFailure !== null) {
|
|
251
|
+
const err = toLifecycleError("disarm-failed", evictionFailure);
|
|
252
|
+
this.transition({ kind: "voice-error", error: err });
|
|
253
|
+
throw err;
|
|
254
|
+
}
|
|
255
|
+
this.transition({ kind: "voice-off" });
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Reset from `voice-error` back to `voice-off`. Required because
|
|
260
|
+
* `voice-error` is terminal — the engine must explicitly acknowledge
|
|
261
|
+
* the failure before the user can re-attempt voice. There is no
|
|
262
|
+
* automatic retry path.
|
|
263
|
+
*/
|
|
264
|
+
reset(): void {
|
|
265
|
+
if (this.state.kind !== "voice-error") {
|
|
266
|
+
throw new VoiceLifecycleError(
|
|
267
|
+
"illegal-transition",
|
|
268
|
+
`[voice-lifecycle] reset() called in state ${this.state.kind} — must be voice-error`,
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
this.transition({ kind: "voice-off" });
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
private transition(next: VoiceLifecycleState): void {
|
|
275
|
+
const prev = this.state;
|
|
276
|
+
this.state = next;
|
|
277
|
+
this.events.onTransition?.(prev, next);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function toLifecycleError(
|
|
282
|
+
fallbackCode: VoiceLifecycleError["code"],
|
|
283
|
+
err: unknown,
|
|
284
|
+
): VoiceLifecycleError {
|
|
285
|
+
if (err instanceof VoiceLifecycleError) return err;
|
|
286
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
287
|
+
// Heuristic mapping of common platform-level signals into the
|
|
288
|
+
// structured codes documented above. The lifecycle never fabricates
|
|
289
|
+
// a code it didn't receive evidence for — anything that doesn't match
|
|
290
|
+
// one of these falls back to the caller-provided code.
|
|
291
|
+
if (/ENOMEM|out of memory|RAM/i.test(message)) {
|
|
292
|
+
return new VoiceLifecycleError("ram-pressure", message);
|
|
293
|
+
}
|
|
294
|
+
if (/mmap|MAP_FAILED/i.test(message)) {
|
|
295
|
+
return new VoiceLifecycleError("mmap-fail", message);
|
|
296
|
+
}
|
|
297
|
+
if (/kernel|missing kernel/i.test(message)) {
|
|
298
|
+
return new VoiceLifecycleError("kernel-missing", message);
|
|
299
|
+
}
|
|
300
|
+
return new VoiceLifecycleError(fallbackCode, message);
|
|
301
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live on-device diarization session — the agent-process owner of an
|
|
3
|
+
* {@link AudioFrameConsumer} wired to the REAL fused VAD / encoder / diarizer /
|
|
4
|
+
* attribution stack.
|
|
5
|
+
*
|
|
6
|
+
* The Android `audioFrame` PCM stream is produced in the Capacitor WebView
|
|
7
|
+
* (JS renderer) but the voice FFI runs in the embedded bun agent process. The
|
|
8
|
+
* agent's `/api/voice/audio-frames` route pumps batched frames into the single
|
|
9
|
+
* session this module owns, where the consumer segments turns, runs
|
|
10
|
+
* diarization + speaker attribution, and emits VOICE_TURN_OBSERVED.
|
|
11
|
+
*
|
|
12
|
+
* This module is the agent-side mirror of the host smoke harness
|
|
13
|
+
* (`packages/app-core/scripts/voice-attribution-smoke.ts`): same real models,
|
|
14
|
+
* same consumer, fed live frames over HTTP instead of a WAV.
|
|
15
|
+
*
|
|
16
|
+
* Single fused engine: VAD, the WeSpeaker speaker encoder, and the pyannote
|
|
17
|
+
* diarizer all run through the ONE fused `libelizainference` handle via its
|
|
18
|
+
* `eliza_inference_vad_*` / `_speaker_*` / `_diariz_*` ABI (the user directive:
|
|
19
|
+
* no separate bun:ffi-musl libs). Resolution:
|
|
20
|
+
* - fused lib: `$ELIZA_INFERENCE_LIBRARY` (exact) or `$ELIZA_INFERENCE_LIB_DIR`
|
|
21
|
+
* (dir) — both exported by ElizaAgentService on Android to the app
|
|
22
|
+
* nativeLibraryDir.
|
|
23
|
+
* - context bundle root: `$ELIZA_VOICE_MODEL_DIR` (the same dir the GGUFs
|
|
24
|
+
* live under); the fused runtime resolves the per-model GGUFs from there.
|
|
25
|
+
*/
|
|
26
|
+
import { type AudioFrameEvent, type RuntimeEventSink } from "./audio-frame-consumer.js";
|
|
27
|
+
export type { RuntimeEventSink } from "./audio-frame-consumer.js";
|
|
28
|
+
export interface LiveDiarizationStatus {
|
|
29
|
+
/** True once the consumer + real fused deps are loaded and accepting frames. */
|
|
30
|
+
ready: boolean;
|
|
31
|
+
/** Resolved fused-library path (null when it could not be resolved). */
|
|
32
|
+
libs: {
|
|
33
|
+
fusedInference: string | null;
|
|
34
|
+
};
|
|
35
|
+
/** Resolved context-bundle dir for the fused runtime. */
|
|
36
|
+
models: {
|
|
37
|
+
dir: string;
|
|
38
|
+
};
|
|
39
|
+
/** Frames received from the WebView across this session. */
|
|
40
|
+
framesReceived: number;
|
|
41
|
+
/** Frames dropped at the decode boundary. */
|
|
42
|
+
framesDropped: number;
|
|
43
|
+
/** Turns segmented + attributed so far. */
|
|
44
|
+
turnsObserved: number;
|
|
45
|
+
/** The most recent attributed turns (capped), for device-evidence reads. */
|
|
46
|
+
recentTurns: LiveDiarizationTurnSummary[];
|
|
47
|
+
/** Populated only when readiness failed — the precise blocker. */
|
|
48
|
+
error?: string;
|
|
49
|
+
}
|
|
50
|
+
/** A compact, JSON-safe summary of one attributed turn (no PCM/embeddings). */
|
|
51
|
+
export interface LiveDiarizationTurnSummary {
|
|
52
|
+
turnId: string;
|
|
53
|
+
startedAtMs: number;
|
|
54
|
+
endedAtMs: number;
|
|
55
|
+
samples: number;
|
|
56
|
+
durationMs: number;
|
|
57
|
+
hasSpeaker: boolean;
|
|
58
|
+
speakerEntityId: string | null;
|
|
59
|
+
speakerConfidence: number | null;
|
|
60
|
+
segments: number;
|
|
61
|
+
agentShouldSpeak: boolean | null;
|
|
62
|
+
nextSpeaker: string | null;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Owns the single live diarization consumer for the agent process. Built
|
|
66
|
+
* lazily on first frame batch so it does not load voice models at boot.
|
|
67
|
+
*/
|
|
68
|
+
export declare class LiveDiarizationSession {
|
|
69
|
+
private readonly runtime;
|
|
70
|
+
private consumer;
|
|
71
|
+
private ffi;
|
|
72
|
+
private ctx;
|
|
73
|
+
private encoder;
|
|
74
|
+
private diarizer;
|
|
75
|
+
private vad;
|
|
76
|
+
private building;
|
|
77
|
+
private framesReceived;
|
|
78
|
+
private turnsObserved;
|
|
79
|
+
private readonly recentTurns;
|
|
80
|
+
private resolvedLibPath;
|
|
81
|
+
private buildError;
|
|
82
|
+
constructor(runtime: RuntimeEventSink);
|
|
83
|
+
/** Ensure the real-deps consumer exists; idempotent + concurrency-safe. */
|
|
84
|
+
private ensureBuilt;
|
|
85
|
+
private build;
|
|
86
|
+
private recordTurn;
|
|
87
|
+
/** Feed a batch of WebView-captured frames; resolves once VAD has processed them. */
|
|
88
|
+
ingest(frames: AudioFrameEvent[]): Promise<void>;
|
|
89
|
+
/** Flush any open segment (call on stopAudioFrames) and await attribution. */
|
|
90
|
+
flush(): Promise<void>;
|
|
91
|
+
/** Build (if needed) and report status — the device-evidence read. */
|
|
92
|
+
status(): Promise<LiveDiarizationStatus>;
|
|
93
|
+
/** Release native handles + listeners. */
|
|
94
|
+
close(): Promise<void>;
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=live-diarization-session.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"live-diarization-session.d.ts","sourceRoot":"","sources":["live-diarization-session.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAKH,OAAO,EAIN,KAAK,eAAe,EACpB,KAAK,gBAAgB,EACrB,MAAM,2BAA2B,CAAC;AAYnC,YAAY,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAqClE,MAAM,WAAW,qBAAqB;IACrC,gFAAgF;IAChF,KAAK,EAAE,OAAO,CAAC;IACf,wEAAwE;IACxE,IAAI,EAAE;QACL,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;KAC9B,CAAC;IACF,yDAAyD;IACzD,MAAM,EAAE;QACP,GAAG,EAAE,MAAM,CAAC;KACZ,CAAC;IACF,4DAA4D;IAC5D,cAAc,EAAE,MAAM,CAAC;IACvB,6CAA6C;IAC7C,aAAa,EAAE,MAAM,CAAC;IACtB,2CAA2C;IAC3C,aAAa,EAAE,MAAM,CAAC;IACtB,4EAA4E;IAC5E,WAAW,EAAE,0BAA0B,EAAE,CAAC;IAC1C,kEAAkE;IAClE,KAAK,CAAC,EAAE,MAAM,CAAC;CACf;AAED,+EAA+E;AAC/E,MAAM,WAAW,0BAA0B;IAC1C,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,OAAO,CAAC;IACpB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,gBAAgB,EAAE,OAAO,GAAG,IAAI,CAAC;IACjC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAID;;;GAGG;AACH,qBAAa,sBAAsB;IActB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAbpC,OAAO,CAAC,QAAQ,CAAmC;IACnD,OAAO,CAAC,GAAG,CAAkC;IAC7C,OAAO,CAAC,GAAG,CAA4C;IACvD,OAAO,CAAC,OAAO,CAAoC;IACnD,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,GAAG,CAA8B;IACzC,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,aAAa,CAAK;IAC1B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoC;IAChE,OAAO,CAAC,eAAe,CAAuB;IAC9C,OAAO,CAAC,UAAU,CAAuB;gBAEZ,OAAO,EAAE,gBAAgB;IAEtD,2EAA2E;IAC3E,OAAO,CAAC,WAAW;YAUL,KAAK;IAmEnB,OAAO,CAAC,UAAU;IAoBlB,qFAAqF;IAC/E,MAAM,CAAC,MAAM,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAStD,8EAA8E;IACxE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B,sEAAsE;IAChE,MAAM,IAAI,OAAO,CAAC,qBAAqB,CAAC;IAkB9C,0CAA0C;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAW5B"}
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live on-device diarization session — the agent-process owner of an
|
|
3
|
+
* {@link AudioFrameConsumer} wired to the REAL fused VAD / encoder / diarizer /
|
|
4
|
+
* attribution stack.
|
|
5
|
+
*
|
|
6
|
+
* The Android `audioFrame` PCM stream is produced in the Capacitor WebView
|
|
7
|
+
* (JS renderer) but the voice FFI runs in the embedded bun agent process. The
|
|
8
|
+
* agent's `/api/voice/audio-frames` route pumps batched frames into the single
|
|
9
|
+
* session this module owns, where the consumer segments turns, runs
|
|
10
|
+
* diarization + speaker attribution, and emits VOICE_TURN_OBSERVED.
|
|
11
|
+
*
|
|
12
|
+
* This module is the agent-side mirror of the host smoke harness
|
|
13
|
+
* (`packages/app-core/scripts/voice-attribution-smoke.ts`): same real models,
|
|
14
|
+
* same consumer, fed live frames over HTTP instead of a WAV.
|
|
15
|
+
*
|
|
16
|
+
* Single fused engine: VAD, the WeSpeaker speaker encoder, and the pyannote
|
|
17
|
+
* diarizer all run through the ONE fused `libelizainference` handle via its
|
|
18
|
+
* `eliza_inference_vad_*` / `_speaker_*` / `_diariz_*` ABI (the user directive:
|
|
19
|
+
* no separate bun:ffi-musl libs). Resolution:
|
|
20
|
+
* - fused lib: `$ELIZA_INFERENCE_LIBRARY` (exact) or `$ELIZA_INFERENCE_LIB_DIR`
|
|
21
|
+
* (dir) — both exported by ElizaAgentService on Android to the app
|
|
22
|
+
* nativeLibraryDir.
|
|
23
|
+
* - context bundle root: `$ELIZA_VOICE_MODEL_DIR` (the same dir the GGUFs
|
|
24
|
+
* live under); the fused runtime resolves the per-model GGUFs from there.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { existsSync } from "node:fs";
|
|
28
|
+
import path from "node:path";
|
|
29
|
+
import { resolveStateDir } from "@elizaos/core";
|
|
30
|
+
import {
|
|
31
|
+
type AttributedTurn,
|
|
32
|
+
AudioFrameConsumer,
|
|
33
|
+
type AudioFrameConsumerConfig,
|
|
34
|
+
type AudioFrameEvent,
|
|
35
|
+
type RuntimeEventSink,
|
|
36
|
+
} from "./audio-frame-consumer.js";
|
|
37
|
+
import type {
|
|
38
|
+
ElizaInferenceContextHandle,
|
|
39
|
+
ElizaInferenceFfi,
|
|
40
|
+
} from "./ffi-bindings.js";
|
|
41
|
+
import { loadElizaInferenceFfi } from "./ffi-bindings.js";
|
|
42
|
+
import { VoiceProfileStore } from "./profile-store.js";
|
|
43
|
+
import { VoiceAttributionPipeline } from "./speaker/attribution-pipeline.js";
|
|
44
|
+
import { FusedDiarizer } from "./speaker/diarizer-fused.js";
|
|
45
|
+
import { FusedSpeakerEncoder } from "./speaker/encoder-fused.js";
|
|
46
|
+
import { GgmlSileroVad, VadDetector } from "./vad.js";
|
|
47
|
+
|
|
48
|
+
export type { RuntimeEventSink } from "./audio-frame-consumer.js";
|
|
49
|
+
|
|
50
|
+
/** Resolve the on-device voice-model directory (env override wins). Doubles as
|
|
51
|
+
* the fused context bundle root — the runtime resolves per-model GGUFs from it. */
|
|
52
|
+
function voiceModelDir(): string {
|
|
53
|
+
const override = process.env.ELIZA_VOICE_MODEL_DIR?.trim();
|
|
54
|
+
if (override) return override;
|
|
55
|
+
return path.join(resolveStateDir(process.env), "models", "voice");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Candidate filenames for the fused library on this platform. */
|
|
59
|
+
function fusedLibraryFilenames(): string[] {
|
|
60
|
+
if (process.platform === "darwin") return ["libelizainference.dylib"];
|
|
61
|
+
if (process.platform === "win32") {
|
|
62
|
+
return ["elizainference.dll", "libelizainference.dll"];
|
|
63
|
+
}
|
|
64
|
+
return ["libelizainference.so"];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Resolve the fused `libelizainference` path from the environment. Returns
|
|
69
|
+
* `null` when neither an exact path nor a containing dir yields a file —
|
|
70
|
+
* the session then surfaces that as a structured build error.
|
|
71
|
+
*/
|
|
72
|
+
function resolveFusedLibrary(): string | null {
|
|
73
|
+
const exact = process.env.ELIZA_INFERENCE_LIBRARY?.trim();
|
|
74
|
+
if (exact && existsSync(exact)) return exact;
|
|
75
|
+
const dir = process.env.ELIZA_INFERENCE_LIB_DIR?.trim();
|
|
76
|
+
if (dir) {
|
|
77
|
+
for (const name of fusedLibraryFilenames()) {
|
|
78
|
+
const candidate = path.join(dir, name);
|
|
79
|
+
if (existsSync(candidate)) return candidate;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export interface LiveDiarizationStatus {
|
|
86
|
+
/** True once the consumer + real fused deps are loaded and accepting frames. */
|
|
87
|
+
ready: boolean;
|
|
88
|
+
/** Resolved fused-library path (null when it could not be resolved). */
|
|
89
|
+
libs: {
|
|
90
|
+
fusedInference: string | null;
|
|
91
|
+
};
|
|
92
|
+
/** Resolved context-bundle dir for the fused runtime. */
|
|
93
|
+
models: {
|
|
94
|
+
dir: string;
|
|
95
|
+
};
|
|
96
|
+
/** Frames received from the WebView across this session. */
|
|
97
|
+
framesReceived: number;
|
|
98
|
+
/** Frames dropped at the decode boundary. */
|
|
99
|
+
framesDropped: number;
|
|
100
|
+
/** Turns segmented + attributed so far. */
|
|
101
|
+
turnsObserved: number;
|
|
102
|
+
/** The most recent attributed turns (capped), for device-evidence reads. */
|
|
103
|
+
recentTurns: LiveDiarizationTurnSummary[];
|
|
104
|
+
/** Populated only when readiness failed — the precise blocker. */
|
|
105
|
+
error?: string;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/** A compact, JSON-safe summary of one attributed turn (no PCM/embeddings). */
|
|
109
|
+
export interface LiveDiarizationTurnSummary {
|
|
110
|
+
turnId: string;
|
|
111
|
+
startedAtMs: number;
|
|
112
|
+
endedAtMs: number;
|
|
113
|
+
samples: number;
|
|
114
|
+
durationMs: number;
|
|
115
|
+
hasSpeaker: boolean;
|
|
116
|
+
speakerEntityId: string | null;
|
|
117
|
+
speakerConfidence: number | null;
|
|
118
|
+
segments: number;
|
|
119
|
+
agentShouldSpeak: boolean | null;
|
|
120
|
+
nextSpeaker: string | null;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const MAX_RECENT_TURNS = 20;
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Owns the single live diarization consumer for the agent process. Built
|
|
127
|
+
* lazily on first frame batch so it does not load voice models at boot.
|
|
128
|
+
*/
|
|
129
|
+
export class LiveDiarizationSession {
|
|
130
|
+
private consumer: AudioFrameConsumer | null = null;
|
|
131
|
+
private ffi: ElizaInferenceFfi | null = null;
|
|
132
|
+
private ctx: ElizaInferenceContextHandle | null = null;
|
|
133
|
+
private encoder: FusedSpeakerEncoder | null = null;
|
|
134
|
+
private diarizer: FusedDiarizer | null = null;
|
|
135
|
+
private vad: GgmlSileroVad | null = null;
|
|
136
|
+
private building: Promise<void> | null = null;
|
|
137
|
+
private framesReceived = 0;
|
|
138
|
+
private turnsObserved = 0;
|
|
139
|
+
private readonly recentTurns: LiveDiarizationTurnSummary[] = [];
|
|
140
|
+
private resolvedLibPath: string | null = null;
|
|
141
|
+
private buildError: string | null = null;
|
|
142
|
+
|
|
143
|
+
constructor(private readonly runtime: RuntimeEventSink) {}
|
|
144
|
+
|
|
145
|
+
/** Ensure the real-deps consumer exists; idempotent + concurrency-safe. */
|
|
146
|
+
private ensureBuilt(): Promise<void> {
|
|
147
|
+
if (this.consumer) return Promise.resolve();
|
|
148
|
+
if (this.building) return this.building;
|
|
149
|
+
this.building = this.build().catch((err) => {
|
|
150
|
+
this.buildError = err instanceof Error ? err.message : String(err);
|
|
151
|
+
throw err;
|
|
152
|
+
});
|
|
153
|
+
return this.building;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
private async build(): Promise<void> {
|
|
157
|
+
const dir = voiceModelDir();
|
|
158
|
+
const libPath = resolveFusedLibrary();
|
|
159
|
+
if (!libPath) {
|
|
160
|
+
throw new Error(
|
|
161
|
+
`fused libelizainference not found on device. Set $ELIZA_INFERENCE_LIBRARY (exact path) or $ELIZA_INFERENCE_LIB_DIR (containing one of ${fusedLibraryFilenames().join(", ")}).`,
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
this.resolvedLibPath = libPath;
|
|
165
|
+
const ffi = loadElizaInferenceFfi(libPath);
|
|
166
|
+
this.ffi = ffi;
|
|
167
|
+
// One context anchored at the voice-model dir; the fused runtime resolves
|
|
168
|
+
// the VAD / speaker / diarizer GGUFs from it.
|
|
169
|
+
const ctx = ffi.create(dir);
|
|
170
|
+
this.ctx = ctx;
|
|
171
|
+
|
|
172
|
+
if (!GgmlSileroVad.isSupported(ffi)) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
"fused libelizainference does not export the VAD ABI (eliza_inference_vad_supported() == 0). Rebuild with the fused voice runtime linked in.",
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
if (!FusedSpeakerEncoder.isSupported(ffi)) {
|
|
178
|
+
throw new Error(
|
|
179
|
+
"fused libelizainference does not export the speaker ABI (eliza_inference_speaker_supported() == 0).",
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
if (!FusedDiarizer.isSupported(ffi)) {
|
|
183
|
+
throw new Error(
|
|
184
|
+
"fused libelizainference does not export the diarizer ABI (eliza_inference_diariz_supported() == 0).",
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const vad = await GgmlSileroVad.load({ ffi, ctx });
|
|
189
|
+
this.vad = vad;
|
|
190
|
+
const detector = new VadDetector(vad, {
|
|
191
|
+
onsetThreshold: 0.5,
|
|
192
|
+
pauseHangoverMs: 120,
|
|
193
|
+
endHangoverMs: 500,
|
|
194
|
+
minSpeechMs: 250,
|
|
195
|
+
});
|
|
196
|
+
const encoder = await FusedSpeakerEncoder.load({ ffi, ctx });
|
|
197
|
+
this.encoder = encoder;
|
|
198
|
+
const diarizer = await FusedDiarizer.load({ ffi, ctx });
|
|
199
|
+
this.diarizer = diarizer;
|
|
200
|
+
const store = new VoiceProfileStore({
|
|
201
|
+
rootDir: path.join(resolveStateDir(process.env), "voice-profiles"),
|
|
202
|
+
});
|
|
203
|
+
await store.init();
|
|
204
|
+
|
|
205
|
+
const pipeline = new VoiceAttributionPipeline({
|
|
206
|
+
encoder,
|
|
207
|
+
diarizer,
|
|
208
|
+
profileStore: store,
|
|
209
|
+
});
|
|
210
|
+
const config: AudioFrameConsumerConfig = {
|
|
211
|
+
source: { kind: "local_mic", deviceId: "android-audioframe" },
|
|
212
|
+
preRollSeconds: 0.3,
|
|
213
|
+
maxTurnSeconds: 30,
|
|
214
|
+
};
|
|
215
|
+
const consumer = new AudioFrameConsumer(
|
|
216
|
+
{ vad: detector, pipeline, runtime: this.runtime },
|
|
217
|
+
config,
|
|
218
|
+
);
|
|
219
|
+
consumer.onTurn((turn) => this.recordTurn(turn));
|
|
220
|
+
this.consumer = consumer;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
private recordTurn(turn: AttributedTurn): void {
|
|
224
|
+
this.turnsObserved += 1;
|
|
225
|
+
const speaker = turn.output.primarySpeaker;
|
|
226
|
+
const summary: LiveDiarizationTurnSummary = {
|
|
227
|
+
turnId: turn.turnId,
|
|
228
|
+
startedAtMs: turn.startedAtMs,
|
|
229
|
+
endedAtMs: turn.endedAtMs,
|
|
230
|
+
samples: turn.samples,
|
|
231
|
+
durationMs: Math.round((turn.samples / 16_000) * 1000),
|
|
232
|
+
hasSpeaker: speaker != null,
|
|
233
|
+
speakerEntityId: speaker?.entityId ?? null,
|
|
234
|
+
speakerConfidence: speaker?.confidence ?? null,
|
|
235
|
+
segments: turn.output.segments.length,
|
|
236
|
+
agentShouldSpeak: turn.signal.agentShouldSpeak,
|
|
237
|
+
nextSpeaker: turn.signal.nextSpeaker ?? null,
|
|
238
|
+
};
|
|
239
|
+
this.recentTurns.push(summary);
|
|
240
|
+
if (this.recentTurns.length > MAX_RECENT_TURNS) this.recentTurns.shift();
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/** Feed a batch of WebView-captured frames; resolves once VAD has processed them. */
|
|
244
|
+
async ingest(frames: AudioFrameEvent[]): Promise<void> {
|
|
245
|
+
await this.ensureBuilt();
|
|
246
|
+
if (!this.consumer) return;
|
|
247
|
+
for (const frame of frames) {
|
|
248
|
+
this.framesReceived += 1;
|
|
249
|
+
await this.consumer.onAudioFrame(frame);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/** Flush any open segment (call on stopAudioFrames) and await attribution. */
|
|
254
|
+
async flush(): Promise<void> {
|
|
255
|
+
if (this.consumer) await this.consumer.flush();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/** Build (if needed) and report status — the device-evidence read. */
|
|
259
|
+
async status(): Promise<LiveDiarizationStatus> {
|
|
260
|
+
try {
|
|
261
|
+
await this.ensureBuilt();
|
|
262
|
+
} catch {
|
|
263
|
+
// Surface the blocker in the status payload rather than throwing.
|
|
264
|
+
}
|
|
265
|
+
return {
|
|
266
|
+
ready: this.consumer != null,
|
|
267
|
+
libs: { fusedInference: this.resolvedLibPath },
|
|
268
|
+
models: { dir: voiceModelDir() },
|
|
269
|
+
framesReceived: this.framesReceived,
|
|
270
|
+
framesDropped: this.consumer?.droppedFrames ?? 0,
|
|
271
|
+
turnsObserved: this.turnsObserved,
|
|
272
|
+
recentTurns: [...this.recentTurns],
|
|
273
|
+
...(this.buildError ? { error: this.buildError } : {}),
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/** Release native handles + listeners. */
|
|
278
|
+
async close(): Promise<void> {
|
|
279
|
+
await this.consumer?.close();
|
|
280
|
+
await this.encoder?.dispose();
|
|
281
|
+
await this.diarizer?.dispose();
|
|
282
|
+
this.vad?.close();
|
|
283
|
+
if (this.ffi && this.ctx !== null) this.ffi.destroy(this.ctx);
|
|
284
|
+
this.ffi?.close();
|
|
285
|
+
this.consumer = null;
|
|
286
|
+
this.ffi = null;
|
|
287
|
+
this.ctx = null;
|
|
288
|
+
}
|
|
289
|
+
}
|