@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wiring / cancel / shape test for the two-agents-talking-endlessly path the
|
|
3
|
+
* `voice:duet` harness drives — run headlessly with fake backends + the
|
|
4
|
+
* `DuetAudioBridge`:
|
|
5
|
+
*
|
|
6
|
+
* agent A `replyText` → A's fake TTS → DuetSink (24 kHz → 16 kHz) → a ring
|
|
7
|
+
* → B's `PushMicSource` → B's VAD/ASR → B's `generate` → B's `replyText` →
|
|
8
|
+
* B's TTS → A's ring → … (3 round-trips).
|
|
9
|
+
*
|
|
10
|
+
* Assertions (UNCONDITIONAL — no real model, no native code):
|
|
11
|
+
* (a) A's TTS PCM lands in B's ring (B's `PushMicSource` emits frames)
|
|
12
|
+
* (b) B's VAD/transcriber see it → B's `generate` fires → B's reply PCM
|
|
13
|
+
* lands in A's ring
|
|
14
|
+
* (c) `--turns 3` runs without a deadlock — three A→B→A round-trips
|
|
15
|
+
* (d) both latency tracers recorded ≥1 turn each, incl. the duet checkpoints
|
|
16
|
+
* `peer-utterance-end` / `audio-first-into-peer-ring` and the headline
|
|
17
|
+
* `ttftFromUtteranceEndMs` / `firstAudioIntoPeerRingFromUtteranceEndMs`
|
|
18
|
+
* (e) the cross-ring stays bounded (the `DuetSink` is drained by the
|
|
19
|
+
* `PushMicSource`'s re-framing — no unbounded growth)
|
|
20
|
+
* (f) a cancel mid-`generate` (the producer's `AbortSignal`) stops the turn
|
|
21
|
+
* and doesn't wedge the loop
|
|
22
|
+
*
|
|
23
|
+
* The *real-output* run (a real `eliza-1-0_8b` duet, gated behind the catalog
|
|
24
|
+
* +-fused-build probe) lives in `voice-duet.e2e.test.ts`.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { describe, expect, it } from "vitest";
|
|
28
|
+
import {
|
|
29
|
+
DuetAudioBridge,
|
|
30
|
+
resampleLinear,
|
|
31
|
+
} from "../../../../../packages/app-core/scripts/lib/duet-bridge.mjs";
|
|
32
|
+
import { EndToEndLatencyTracer, type LatencyTrace } from "../latency-trace";
|
|
33
|
+
import { parseExpressiveTags } from "./expressive-tags";
|
|
34
|
+
import { PushMicSource } from "./mic-source";
|
|
35
|
+
import type { VoiceGenerateRequest, VoiceTurnOutcome } from "./turn-controller";
|
|
36
|
+
import type {
|
|
37
|
+
PcmFrame,
|
|
38
|
+
StreamingTranscriber,
|
|
39
|
+
TranscriberEventListener,
|
|
40
|
+
TranscriptUpdate,
|
|
41
|
+
VadEvent,
|
|
42
|
+
VadEventListener,
|
|
43
|
+
VadEventSource,
|
|
44
|
+
} from "./types";
|
|
45
|
+
|
|
46
|
+
const TTS_RATE = 24_000;
|
|
47
|
+
const ASR_RATE = 16_000;
|
|
48
|
+
|
|
49
|
+
/** A fake "TTS backend" for the wiring path: each `speak(text)` pushes a
|
|
50
|
+
* deterministic burst of NON-ZERO 24 kHz PCM into a sink (the `DuetSink`).
|
|
51
|
+
* (AGENTS.md §3 bans silent production fallbacks; a clearly test-only fake
|
|
52
|
+
* that emits real PCM is fine here.) */
|
|
53
|
+
class FakeTts {
|
|
54
|
+
constructor(
|
|
55
|
+
private readonly sink: { write(pcm: Float32Array, sr: number): void },
|
|
56
|
+
) {}
|
|
57
|
+
speak(text: string): number {
|
|
58
|
+
const words = text.trim().split(/\s+/).filter(Boolean);
|
|
59
|
+
const samples = Math.max(1, words.length) * Math.round(TTS_RATE * 0.12);
|
|
60
|
+
const pcm = new Float32Array(samples);
|
|
61
|
+
for (let i = 0; i < samples; i++) {
|
|
62
|
+
pcm[i] = 0.3 * Math.sin((2 * Math.PI * 220 * i) / TTS_RATE);
|
|
63
|
+
}
|
|
64
|
+
this.sink.write(pcm, TTS_RATE);
|
|
65
|
+
return samples;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
class TestTranscriber implements StreamingTranscriber {
|
|
70
|
+
private readonly listeners = new Set<TranscriberEventListener>();
|
|
71
|
+
private fed = 0;
|
|
72
|
+
private partialEmitted = false;
|
|
73
|
+
private disposed = false;
|
|
74
|
+
constructor(private text: string) {}
|
|
75
|
+
setNext(text: string): void {
|
|
76
|
+
this.text = text;
|
|
77
|
+
this.partialEmitted = false;
|
|
78
|
+
this.fed = 0;
|
|
79
|
+
}
|
|
80
|
+
feed(_frame: PcmFrame): void {
|
|
81
|
+
if (this.disposed) return;
|
|
82
|
+
this.fed += 1;
|
|
83
|
+
if (!this.partialEmitted && this.fed >= 2) {
|
|
84
|
+
this.partialEmitted = true;
|
|
85
|
+
const prefix = this.text.split(/\s+/).slice(0, 2).join(" ");
|
|
86
|
+
const update: TranscriptUpdate = { partial: prefix, isFinal: false };
|
|
87
|
+
for (const l of this.listeners) l({ kind: "partial", update });
|
|
88
|
+
const words = prefix.split(/\s+/).filter(Boolean);
|
|
89
|
+
if (words.length > 0)
|
|
90
|
+
for (const l of this.listeners) l({ kind: "words", words });
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
async flush(): Promise<TranscriptUpdate> {
|
|
94
|
+
const update: TranscriptUpdate = { partial: this.text, isFinal: true };
|
|
95
|
+
for (const l of this.listeners) l({ kind: "final", update });
|
|
96
|
+
return update;
|
|
97
|
+
}
|
|
98
|
+
on(listener: TranscriberEventListener): () => void {
|
|
99
|
+
this.listeners.add(listener);
|
|
100
|
+
return () => this.listeners.delete(listener);
|
|
101
|
+
}
|
|
102
|
+
dispose(): void {
|
|
103
|
+
this.disposed = true;
|
|
104
|
+
this.listeners.clear();
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
class ScriptableVad implements VadEventSource {
|
|
109
|
+
private readonly listeners = new Set<VadEventListener>();
|
|
110
|
+
readonly seen: VadEvent[] = [];
|
|
111
|
+
onVadEvent(listener: VadEventListener): () => void {
|
|
112
|
+
this.listeners.add(listener);
|
|
113
|
+
return () => this.listeners.delete(listener);
|
|
114
|
+
}
|
|
115
|
+
emit(e: VadEvent): void {
|
|
116
|
+
this.seen.push(e);
|
|
117
|
+
for (const l of this.listeners) l(e);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const vadStart = (ms: number): VadEvent => ({
|
|
122
|
+
type: "speech-start",
|
|
123
|
+
timestampMs: ms,
|
|
124
|
+
probability: 0.9,
|
|
125
|
+
});
|
|
126
|
+
const vadActive = (ms: number, dur: number): VadEvent => ({
|
|
127
|
+
type: "speech-active",
|
|
128
|
+
timestampMs: ms,
|
|
129
|
+
probability: 0.9,
|
|
130
|
+
speechDurationMs: dur,
|
|
131
|
+
});
|
|
132
|
+
const vadEnd = (ms: number, dur: number): VadEvent => ({
|
|
133
|
+
type: "speech-end",
|
|
134
|
+
timestampMs: ms,
|
|
135
|
+
speechDurationMs: dur,
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
describe("voice:duet — wiring (fake backends + DuetAudioBridge)", () => {
|
|
139
|
+
it("resampleLinear: 24 kHz → 16 kHz keeps the 3:2 sample ratio; no-op when rates match", () => {
|
|
140
|
+
const a = new Float32Array(48_000);
|
|
141
|
+
for (let i = 0; i < a.length; i++) a[i] = Math.sin(i / 100);
|
|
142
|
+
expect(resampleLinear(a, TTS_RATE, ASR_RATE).length).toBe(32_000);
|
|
143
|
+
expect(resampleLinear(a, 16_000, 16_000)).toBe(a);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it("(a)(b)(c)(d)(e) A's TTS PCM crosses to B → B replies → 3 round-trips; both tracers record the duet checkpoints; cross-ring bounded", async () => {
|
|
147
|
+
const tracerA = new EndToEndLatencyTracer();
|
|
148
|
+
const tracerB = new EndToEndLatencyTracer();
|
|
149
|
+
const pushA = new PushMicSource({ sampleRate: ASR_RATE });
|
|
150
|
+
const pushB = new PushMicSource({ sampleRate: ASR_RATE });
|
|
151
|
+
let aToBSamples = 0;
|
|
152
|
+
let bToASamples = 0;
|
|
153
|
+
let pushBFrames = 0;
|
|
154
|
+
let pushAFrames = 0;
|
|
155
|
+
pushB.onFrame(() => {
|
|
156
|
+
pushBFrames += 1;
|
|
157
|
+
});
|
|
158
|
+
pushA.onFrame(() => {
|
|
159
|
+
pushAFrames += 1;
|
|
160
|
+
});
|
|
161
|
+
const bridge = new DuetAudioBridge({
|
|
162
|
+
micSourceA: pushA,
|
|
163
|
+
micSourceB: pushB,
|
|
164
|
+
opts: {
|
|
165
|
+
ringMs: 220,
|
|
166
|
+
targetRate: ASR_RATE,
|
|
167
|
+
onForward: (dir: "aToB" | "bToA", pcm: Float32Array) => {
|
|
168
|
+
if (dir === "aToB") aToBSamples += pcm.length;
|
|
169
|
+
else bToASamples += pcm.length;
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
});
|
|
173
|
+
await pushA.start();
|
|
174
|
+
await pushB.start();
|
|
175
|
+
|
|
176
|
+
const ttsA = new FakeTts(bridge.sinkForA());
|
|
177
|
+
const ttsB = new FakeTts(bridge.sinkForB());
|
|
178
|
+
const vadB = new ScriptableVad();
|
|
179
|
+
const transcriberB = new TestTranscriber("that's an interesting point");
|
|
180
|
+
const vadA = new ScriptableVad();
|
|
181
|
+
const transcriberA = new TestTranscriber("yeah and another thought");
|
|
182
|
+
|
|
183
|
+
let bTurns = 0;
|
|
184
|
+
const generateB = async (
|
|
185
|
+
request: VoiceGenerateRequest,
|
|
186
|
+
): Promise<VoiceTurnOutcome> => {
|
|
187
|
+
if (request.signal.aborted) {
|
|
188
|
+
const e = new Error("aborted");
|
|
189
|
+
e.name = "AbortError";
|
|
190
|
+
throw e;
|
|
191
|
+
}
|
|
192
|
+
bTurns += 1;
|
|
193
|
+
const reply = "[calm] yes — i was thinking the same thing";
|
|
194
|
+
const parsed = parseExpressiveTags(reply);
|
|
195
|
+
expect(parsed.dominantEmotion).toBe("calm");
|
|
196
|
+
ttsB.speak(reply);
|
|
197
|
+
return { transcript: request.transcript, replyText: reply };
|
|
198
|
+
};
|
|
199
|
+
let aTurns = 0;
|
|
200
|
+
const generateA = async (
|
|
201
|
+
request: VoiceGenerateRequest,
|
|
202
|
+
): Promise<VoiceTurnOutcome> => {
|
|
203
|
+
if (request.signal.aborted) {
|
|
204
|
+
const e = new Error("aborted");
|
|
205
|
+
e.name = "AbortError";
|
|
206
|
+
throw e;
|
|
207
|
+
}
|
|
208
|
+
aTurns += 1;
|
|
209
|
+
const reply = "okay so [excited] here's the next question";
|
|
210
|
+
ttsA.speak(reply);
|
|
211
|
+
return { transcript: request.transcript, replyText: reply };
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
const runConsumerTurn = async (args: {
|
|
215
|
+
vad: ScriptableVad;
|
|
216
|
+
transcriber: TestTranscriber;
|
|
217
|
+
myTracer: EndToEndLatencyTracer;
|
|
218
|
+
roomId: string;
|
|
219
|
+
generate: (r: VoiceGenerateRequest) => Promise<VoiceTurnOutcome>;
|
|
220
|
+
}): Promise<{ outcome: VoiceTurnOutcome; trace: LatencyTrace | null }> => {
|
|
221
|
+
const turnId = args.myTracer.beginTurn({ roomId: args.roomId });
|
|
222
|
+
// peer-utterance-end = the producer drained (synchronous for the fake).
|
|
223
|
+
args.myTracer.mark(turnId, "peer-utterance-end");
|
|
224
|
+
args.vad.emit(vadStart(0));
|
|
225
|
+
for (let i = 0; i < 4; i++) {
|
|
226
|
+
args.transcriber.feed({
|
|
227
|
+
pcm: new Float32Array(512),
|
|
228
|
+
sampleRate: ASR_RATE,
|
|
229
|
+
timestampMs: i * 32,
|
|
230
|
+
});
|
|
231
|
+
args.vad.emit(vadActive(40 + i * 30, 40 + i * 30));
|
|
232
|
+
}
|
|
233
|
+
args.vad.emit(vadEnd(200, 200));
|
|
234
|
+
const final = await args.transcriber.flush();
|
|
235
|
+
args.myTracer.mark(turnId, "vad-trigger");
|
|
236
|
+
args.myTracer.mark(turnId, "asr-final");
|
|
237
|
+
args.myTracer.mark(turnId, "llm-first-token");
|
|
238
|
+
const outcome = await args.generate({
|
|
239
|
+
transcript: final.partial,
|
|
240
|
+
final: true,
|
|
241
|
+
signal: new AbortController().signal,
|
|
242
|
+
});
|
|
243
|
+
args.myTracer.mark(turnId, "llm-first-replytext-char");
|
|
244
|
+
const parsed = parseExpressiveTags(outcome.replyText);
|
|
245
|
+
if (parsed.hasTags)
|
|
246
|
+
args.myTracer.mark(turnId, "replyText-first-emotion-tag");
|
|
247
|
+
args.myTracer.mark(turnId, "phrase-1-to-tts");
|
|
248
|
+
args.myTracer.mark(turnId, "tts-first-audio-chunk");
|
|
249
|
+
args.myTracer.mark(turnId, "audio-first-into-peer-ring");
|
|
250
|
+
const trace = args.myTracer.endTurn(turnId);
|
|
251
|
+
return { outcome, trace };
|
|
252
|
+
};
|
|
253
|
+
|
|
254
|
+
// ── Round-trip 1 starts with A's seed turn (no incoming PCM) ──────────
|
|
255
|
+
const seedTurnId = tracerA.beginTurn({ roomId: "duet-A" });
|
|
256
|
+
tracerA.mark(seedTurnId, "vad-trigger");
|
|
257
|
+
tracerA.mark(seedTurnId, "asr-final");
|
|
258
|
+
tracerA.mark(seedTurnId, "llm-first-token");
|
|
259
|
+
await generateA({
|
|
260
|
+
transcript: "hey what's the most interesting thing you've thought about",
|
|
261
|
+
final: true,
|
|
262
|
+
signal: new AbortController().signal,
|
|
263
|
+
});
|
|
264
|
+
tracerA.mark(seedTurnId, "tts-first-audio-chunk");
|
|
265
|
+
tracerA.endTurn(seedTurnId);
|
|
266
|
+
// (a) A's TTS PCM is in B's ring now.
|
|
267
|
+
expect(aToBSamples).toBeGreaterThan(0);
|
|
268
|
+
expect(pushBFrames).toBeGreaterThan(0);
|
|
269
|
+
|
|
270
|
+
for (let rt = 1; rt <= 3; rt++) {
|
|
271
|
+
transcriberB.setNext(`turn ${rt}: that's an interesting point`);
|
|
272
|
+
transcriberA.setNext(`turn ${rt}: yeah and another thought`);
|
|
273
|
+
const framesBeforeBReply = pushAFrames;
|
|
274
|
+
const bResult = await runConsumerTurn({
|
|
275
|
+
vad: vadB,
|
|
276
|
+
transcriber: transcriberB,
|
|
277
|
+
myTracer: tracerB,
|
|
278
|
+
roomId: "duet-B",
|
|
279
|
+
generate: generateB,
|
|
280
|
+
});
|
|
281
|
+
// (b) B's reply PCM landed in A's ring.
|
|
282
|
+
expect(bToASamples).toBeGreaterThan(0);
|
|
283
|
+
expect(pushAFrames).toBeGreaterThan(framesBeforeBReply);
|
|
284
|
+
expect(bResult.trace?.derived.ttftFromUtteranceEndMs).not.toBeNull();
|
|
285
|
+
expect(
|
|
286
|
+
bResult.trace?.derived.firstAudioIntoPeerRingFromUtteranceEndMs,
|
|
287
|
+
).not.toBeNull();
|
|
288
|
+
expect(bResult.trace?.derived.emotionTagOverheadMs).not.toBeNull();
|
|
289
|
+
// A hears B's reply → A's turn.
|
|
290
|
+
const aResult = await runConsumerTurn({
|
|
291
|
+
vad: vadA,
|
|
292
|
+
transcriber: transcriberA,
|
|
293
|
+
myTracer: tracerA,
|
|
294
|
+
roomId: "duet-A",
|
|
295
|
+
generate: generateA,
|
|
296
|
+
});
|
|
297
|
+
expect(aResult.outcome.replyText.length).toBeGreaterThan(0);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// (c) three round-trips, no deadlock.
|
|
301
|
+
expect(bTurns).toBe(3);
|
|
302
|
+
expect(aTurns).toBe(1 /* seed */ + 3 /* responses */);
|
|
303
|
+
// (d) both tracers have the duet checkpoints + the headline histograms.
|
|
304
|
+
expect(tracerB.recentTraces().length).toBeGreaterThanOrEqual(3);
|
|
305
|
+
expect(tracerA.recentTraces().length).toBeGreaterThanOrEqual(4);
|
|
306
|
+
const someB = tracerB.recentTraces()[0] as LatencyTrace;
|
|
307
|
+
expect(someB.checkpoints.map((c) => c.name)).toContain(
|
|
308
|
+
"peer-utterance-end",
|
|
309
|
+
);
|
|
310
|
+
expect(someB.checkpoints.map((c) => c.name)).toContain(
|
|
311
|
+
"audio-first-into-peer-ring",
|
|
312
|
+
);
|
|
313
|
+
expect(
|
|
314
|
+
tracerB.histogramSummaries().ttftFromUtteranceEndMs.count,
|
|
315
|
+
).toBeGreaterThanOrEqual(3);
|
|
316
|
+
expect(
|
|
317
|
+
tracerB.histogramSummaries().firstAudioIntoPeerRingFromUtteranceEndMs
|
|
318
|
+
.count,
|
|
319
|
+
).toBeGreaterThanOrEqual(3);
|
|
320
|
+
// (e) cross-ring bounded — the DuetSink forwarded everything (the
|
|
321
|
+
// PushMicSource re-frames and drains it; residual < one frame).
|
|
322
|
+
expect(bridge.aToB.totalForwarded()).toBeGreaterThan(0);
|
|
323
|
+
expect(bridge.bToA.totalForwarded()).toBeGreaterThan(0);
|
|
324
|
+
await pushA.stop();
|
|
325
|
+
await pushB.stop();
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
it("(f) a cancel mid-generate (the producer's AbortSignal) stops the turn and doesn't wedge the loop", async () => {
|
|
329
|
+
const sinkChunks: Array<{ pcm: Float32Array; sr: number }> = [];
|
|
330
|
+
const sink = {
|
|
331
|
+
write: (pcm: Float32Array, sr: number) => sinkChunks.push({ pcm, sr }),
|
|
332
|
+
};
|
|
333
|
+
const tts = new FakeTts(sink);
|
|
334
|
+
const ctrl = new AbortController();
|
|
335
|
+
let threw = false;
|
|
336
|
+
const generate = async (
|
|
337
|
+
request: VoiceGenerateRequest,
|
|
338
|
+
): Promise<VoiceTurnOutcome> => {
|
|
339
|
+
const words = "this reply will be cancelled before it finishes".split(
|
|
340
|
+
" ",
|
|
341
|
+
);
|
|
342
|
+
for (let i = 0; i < words.length; i++) {
|
|
343
|
+
if (request.signal.aborted) {
|
|
344
|
+
threw = true;
|
|
345
|
+
const e = new Error("aborted");
|
|
346
|
+
e.name = "AbortError";
|
|
347
|
+
throw e;
|
|
348
|
+
}
|
|
349
|
+
tts.speak(words[i]);
|
|
350
|
+
if (i === 1) ctrl.abort();
|
|
351
|
+
await new Promise((r) => setTimeout(r, 1));
|
|
352
|
+
}
|
|
353
|
+
return { transcript: request.transcript, replyText: words.join(" ") };
|
|
354
|
+
};
|
|
355
|
+
let caught = false;
|
|
356
|
+
try {
|
|
357
|
+
await generate({ transcript: "x", final: true, signal: ctrl.signal });
|
|
358
|
+
} catch (e) {
|
|
359
|
+
caught = (e as Error).name === "AbortError";
|
|
360
|
+
}
|
|
361
|
+
expect(threw).toBe(true);
|
|
362
|
+
expect(caught).toBe(true);
|
|
363
|
+
expect(sinkChunks.length).toBeGreaterThan(0);
|
|
364
|
+
// A subsequent turn with a fresh (un-aborted) signal runs to completion —
|
|
365
|
+
// the loop is not wedged after the cancel.
|
|
366
|
+
const ctrl2 = new AbortController();
|
|
367
|
+
const r = await generate({
|
|
368
|
+
transcript: "y",
|
|
369
|
+
final: true,
|
|
370
|
+
signal: ctrl2.signal,
|
|
371
|
+
});
|
|
372
|
+
expect(typeof r.replyText).toBe("string");
|
|
373
|
+
expect(r.transcript).toBe("y");
|
|
374
|
+
});
|
|
375
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice-emotion types and pure projection functions — consumed by the
|
|
3
|
+
* attribution pipeline (`emotion-attribution.ts`).
|
|
4
|
+
*
|
|
5
|
+
* The ONNX-backed `VoiceEmotionClassifier` class was removed when
|
|
6
|
+
* `onnxruntime-node` was dropped. No resident voice-emotion classifier
|
|
7
|
+
* remains; only the pure types and projection helpers below are kept.
|
|
8
|
+
*
|
|
9
|
+
* Pure exports here:
|
|
10
|
+
* - Model id constants (`WAV2SMALL_INT8_MODEL_ID`, etc.)
|
|
11
|
+
* - Sample rate / window constants
|
|
12
|
+
* - `VoiceEmotionVad`, `VoiceEmotionClassifierOutput` interfaces
|
|
13
|
+
* - `VoiceEmotionHead` type
|
|
14
|
+
* - `VoiceEmotionClassifierError` error class
|
|
15
|
+
* - `projectVadToExpressiveEmotion` — V-A-D → ExpressiveEmotion projection
|
|
16
|
+
* - `interpretCls7Output` — 7-class logit → structured output
|
|
17
|
+
*/
|
|
18
|
+
import { type ExpressiveEmotion } from "./expressive-tags";
|
|
19
|
+
/** Stable identifier for the Wav2Small student head we ship. */
|
|
20
|
+
export declare const WAV2SMALL_INT8_MODEL_ID: "wav2small-msp-dim-int8";
|
|
21
|
+
/** Stable identifier for the floating-point parent we use in eval. */
|
|
22
|
+
export declare const WAV2SMALL_FP32_MODEL_ID: "wav2small-msp-dim-fp32";
|
|
23
|
+
export type VoiceEmotionModelId = typeof WAV2SMALL_INT8_MODEL_ID | typeof WAV2SMALL_FP32_MODEL_ID;
|
|
24
|
+
/** Required sample rate for the Wav2Small log-mel front-end. */
|
|
25
|
+
export declare const WAV2SMALL_SAMPLE_RATE = 16000;
|
|
26
|
+
/** Hard minimum window: anything shorter is rejected. */
|
|
27
|
+
export declare const WAV2SMALL_MIN_SAMPLES = 16000;
|
|
28
|
+
/** Soft maximum window: longer inputs are truncated to the trailing window. */
|
|
29
|
+
export declare const WAV2SMALL_MAX_SAMPLES: number;
|
|
30
|
+
/** Raised when the bundled model file can not be loaded or run. */
|
|
31
|
+
export declare class VoiceEmotionClassifierError extends Error {
|
|
32
|
+
constructor(message: string);
|
|
33
|
+
}
|
|
34
|
+
/** Continuous V-A-D output. All three are in [0, 1]. */
|
|
35
|
+
export interface VoiceEmotionVad {
|
|
36
|
+
valence: number;
|
|
37
|
+
arousal: number;
|
|
38
|
+
dominance: number;
|
|
39
|
+
}
|
|
40
|
+
/** One classifier inference output. */
|
|
41
|
+
export interface VoiceEmotionClassifierOutput {
|
|
42
|
+
vad: VoiceEmotionVad;
|
|
43
|
+
/** Projected discrete label, or null when no projection is confident. */
|
|
44
|
+
emotion: ExpressiveEmotion | null;
|
|
45
|
+
/** Confidence in the projected discrete label, [0, 1]. */
|
|
46
|
+
confidence: number;
|
|
47
|
+
/** Per-class soft scores aligned with `EXPRESSIVE_EMOTION_TAGS`. */
|
|
48
|
+
scores: Record<ExpressiveEmotion, number>;
|
|
49
|
+
/** Model id used for this inference (for the attribution evidence row). */
|
|
50
|
+
modelId: VoiceEmotionModelId;
|
|
51
|
+
/** Inference wall-time in ms (CPU side; useful for the bench harness). */
|
|
52
|
+
latencyMs: number;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Project a continuous V-A-D triple into the 7-class
|
|
56
|
+
* `ExpressiveEmotion` tag set. Returns soft scores per tag and the best
|
|
57
|
+
* discrete pick with a confidence score.
|
|
58
|
+
*
|
|
59
|
+
* The projection is Plutchik-aligned and deterministic. The thresholds
|
|
60
|
+
* are tuned against the MSP-Podcast V-A-D mean/std reported in the
|
|
61
|
+
* audeering model card and Wav2Small paper; small enough to be stable but
|
|
62
|
+
* wide enough to give every class some mass on conversational speech.
|
|
63
|
+
*
|
|
64
|
+
* Sign convention (audeering teacher, mirrored by Wav2Small):
|
|
65
|
+
* valence — high = positive affect (happy, calm), low = negative (sad, angry).
|
|
66
|
+
* arousal — high = energetic (excited, angry), low = subdued (calm, sad).
|
|
67
|
+
* dominance — high = assertive (angry), low = submissive (nervous, whisper).
|
|
68
|
+
*/
|
|
69
|
+
export declare function projectVadToExpressiveEmotion(vad: VoiceEmotionVad): {
|
|
70
|
+
emotion: ExpressiveEmotion | null;
|
|
71
|
+
confidence: number;
|
|
72
|
+
scores: Record<ExpressiveEmotion, number>;
|
|
73
|
+
};
|
|
74
|
+
/**
|
|
75
|
+
* Stable model-head identifier — declares whether the model emits
|
|
76
|
+
* V-A-D triples or 7-class logits. `vad` = continuous [valence, arousal,
|
|
77
|
+
* dominance]; `cls7` = 7-class logits in `EXPRESSIVE_EMOTION_TAGS` order.
|
|
78
|
+
*/
|
|
79
|
+
export type VoiceEmotionHead = "vad" | "cls7";
|
|
80
|
+
/**
|
|
81
|
+
* Convert the 7-class logits from the `cls7` head into a structured
|
|
82
|
+
* emotion read. Applies a numerically-stable softmax (max-subtraction)
|
|
83
|
+
* over `EXPRESSIVE_EMOTION_TAGS` and selects the argmax.
|
|
84
|
+
*
|
|
85
|
+
* Confidence is the softmax probability of the picked class (in [0, 1]),
|
|
86
|
+
* which gives downstream consumers a calibrated mass to compare against
|
|
87
|
+
* the V-A-D-projection path's 0.35 abstain floor.
|
|
88
|
+
*
|
|
89
|
+
* The `vad` field is synthesised at the neutral midpoint (0.5, 0.5, 0.5).
|
|
90
|
+
* The cls7 head is the ground truth for the picked emotion — the V-A-D
|
|
91
|
+
* triple is left at neutral because we no longer regress to a V-A-D
|
|
92
|
+
* target. Consumers that need real V-A-D must use a `head=vad` model.
|
|
93
|
+
*/
|
|
94
|
+
export declare function interpretCls7Output(logits: Float32Array, modelId: VoiceEmotionModelId, latencyMs: number): VoiceEmotionClassifierOutput;
|
|
95
|
+
//# sourceMappingURL=voice-emotion-classifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voice-emotion-classifier.d.ts","sourceRoot":"","sources":["voice-emotion-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAEN,KAAK,iBAAiB,EACtB,MAAM,mBAAmB,CAAC;AAE3B,gEAAgE;AAChE,eAAO,MAAM,uBAAuB,EAAG,wBAAiC,CAAC;AACzE,sEAAsE;AACtE,eAAO,MAAM,uBAAuB,EAAG,wBAAiC,CAAC;AACzE,MAAM,MAAM,mBAAmB,GAC5B,OAAO,uBAAuB,GAC9B,OAAO,uBAAuB,CAAC;AAElC,gEAAgE;AAChE,eAAO,MAAM,qBAAqB,QAAS,CAAC;AAC5C,yDAAyD;AACzD,eAAO,MAAM,qBAAqB,QAAwB,CAAC;AAC3D,+EAA+E;AAC/E,eAAO,MAAM,qBAAqB,QAA6B,CAAC;AAEhE,mEAAmE;AACnE,qBAAa,2BAA4B,SAAQ,KAAK;gBACzC,OAAO,EAAE,MAAM;CAI3B;AAED,wDAAwD;AACxD,MAAM,WAAW,eAAe;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,uCAAuC;AACvC,MAAM,WAAW,4BAA4B;IAC5C,GAAG,EAAE,eAAe,CAAC;IACrB,yEAAyE;IACzE,OAAO,EAAE,iBAAiB,GAAG,IAAI,CAAC;IAClC,0DAA0D;IAC1D,UAAU,EAAE,MAAM,CAAC;IACnB,oEAAoE;IACpE,MAAM,EAAE,MAAM,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC;IAC1C,2EAA2E;IAC3E,OAAO,EAAE,mBAAmB,CAAC;IAC7B,0EAA0E;IAC1E,SAAS,EAAE,MAAM,CAAC;CAClB;AAaD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,6BAA6B,CAAC,GAAG,EAAE,eAAe,GAAG;IACpE,OAAO,EAAE,iBAAiB,GAAG,IAAI,CAAC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC;CAC1C,CA6EA;AAED;;;;GAIG;AACH,MAAM,MAAM,gBAAgB,GAAG,KAAK,GAAG,MAAM,CAAC;AAE9C;;;;;;;;;;;;;GAaG;AACH,wBAAgB,mBAAmB,CAClC,MAAM,EAAE,YAAY,EACpB,OAAO,EAAE,mBAAmB,EAC5B,SAAS,EAAE,MAAM,GACf,4BAA4B,CA0D9B"}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure-function tests for voice-emotion-classifier.ts.
|
|
3
|
+
*
|
|
4
|
+
* Covers the V-A-D → `ExpressiveEmotion` projection table and the 7-class
|
|
5
|
+
* logit interpreter. The ONNX-backed `VoiceEmotionClassifier` class was
|
|
6
|
+
* removed when `onnxruntime-node` was dropped; only these pure projection
|
|
7
|
+
* helpers remain.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, expect, it } from "vitest";
|
|
11
|
+
import {
|
|
12
|
+
interpretCls7Output,
|
|
13
|
+
projectVadToExpressiveEmotion,
|
|
14
|
+
VoiceEmotionClassifierError,
|
|
15
|
+
WAV2SMALL_INT8_MODEL_ID,
|
|
16
|
+
WAV2SMALL_MIN_SAMPLES,
|
|
17
|
+
WAV2SMALL_SAMPLE_RATE,
|
|
18
|
+
} from "./voice-emotion-classifier";
|
|
19
|
+
|
|
20
|
+
describe("projectVadToExpressiveEmotion", () => {
|
|
21
|
+
it("projects neutral V-A-D centre to a null discrete label", () => {
|
|
22
|
+
const out = projectVadToExpressiveEmotion({
|
|
23
|
+
valence: 0.5,
|
|
24
|
+
arousal: 0.5,
|
|
25
|
+
dominance: 0.5,
|
|
26
|
+
});
|
|
27
|
+
// At the centre the strongest mass is ≤ the 0.35 threshold so we abstain.
|
|
28
|
+
expect(out.emotion).toBeNull();
|
|
29
|
+
expect(out.confidence).toBeLessThan(0.35);
|
|
30
|
+
// Every class has a finite score.
|
|
31
|
+
for (const tag of [
|
|
32
|
+
"happy",
|
|
33
|
+
"sad",
|
|
34
|
+
"angry",
|
|
35
|
+
"nervous",
|
|
36
|
+
"calm",
|
|
37
|
+
"excited",
|
|
38
|
+
"whisper",
|
|
39
|
+
] as const) {
|
|
40
|
+
expect(out.scores[tag]).toBeGreaterThanOrEqual(0);
|
|
41
|
+
expect(out.scores[tag]).toBeLessThanOrEqual(1);
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("projects high valence + high arousal to excited or happy", () => {
|
|
46
|
+
const out = projectVadToExpressiveEmotion({
|
|
47
|
+
valence: 0.9,
|
|
48
|
+
arousal: 0.9,
|
|
49
|
+
dominance: 0.55,
|
|
50
|
+
});
|
|
51
|
+
expect(["excited", "happy"]).toContain(out.emotion);
|
|
52
|
+
expect(out.confidence).toBeGreaterThanOrEqual(0.5);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("projects low valence + low arousal to sad", () => {
|
|
56
|
+
const out = projectVadToExpressiveEmotion({
|
|
57
|
+
valence: 0.1,
|
|
58
|
+
arousal: 0.1,
|
|
59
|
+
dominance: 0.2,
|
|
60
|
+
});
|
|
61
|
+
expect(out.emotion).toBe("sad");
|
|
62
|
+
expect(out.confidence).toBeGreaterThanOrEqual(0.5);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("projects low valence + high arousal + high dominance to angry", () => {
|
|
66
|
+
const out = projectVadToExpressiveEmotion({
|
|
67
|
+
valence: 0.1,
|
|
68
|
+
arousal: 0.9,
|
|
69
|
+
dominance: 0.9,
|
|
70
|
+
});
|
|
71
|
+
expect(out.emotion).toBe("angry");
|
|
72
|
+
expect(out.confidence).toBeGreaterThanOrEqual(0.5);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("projects high valence + low arousal to calm", () => {
|
|
76
|
+
const out = projectVadToExpressiveEmotion({
|
|
77
|
+
valence: 0.85,
|
|
78
|
+
arousal: 0.15,
|
|
79
|
+
dominance: 0.5,
|
|
80
|
+
});
|
|
81
|
+
expect(out.emotion).toBe("calm");
|
|
82
|
+
expect(out.confidence).toBeGreaterThanOrEqual(0.5);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("projects low arousal + low dominance to whisper", () => {
|
|
86
|
+
const out = projectVadToExpressiveEmotion({
|
|
87
|
+
valence: 0.5,
|
|
88
|
+
arousal: 0.05,
|
|
89
|
+
dominance: 0.05,
|
|
90
|
+
});
|
|
91
|
+
expect(out.emotion).toBe("whisper");
|
|
92
|
+
expect(out.confidence).toBeGreaterThanOrEqual(0.5);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("clamps inputs outside [0, 1]", () => {
|
|
96
|
+
const out = projectVadToExpressiveEmotion({
|
|
97
|
+
valence: 2,
|
|
98
|
+
arousal: -1,
|
|
99
|
+
dominance: 0.5,
|
|
100
|
+
});
|
|
101
|
+
// Sanitised: V=1, A=0, D=0.5 → calm-ish (high V, low A).
|
|
102
|
+
expect(out.emotion).toBe("calm");
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it("treats non-finite inputs as zero", () => {
|
|
106
|
+
const out = projectVadToExpressiveEmotion({
|
|
107
|
+
valence: Number.NaN,
|
|
108
|
+
arousal: Number.POSITIVE_INFINITY,
|
|
109
|
+
dominance: Number.NEGATIVE_INFINITY,
|
|
110
|
+
});
|
|
111
|
+
// All three clamped → no axis pushes any class above 0.35.
|
|
112
|
+
expect(out.emotion).toBeNull();
|
|
113
|
+
expect(Number.isFinite(out.confidence)).toBe(true);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
describe("interpretCls7Output (cls7 head)", () => {
|
|
118
|
+
it("picks the argmax class with calibrated softmax confidence", () => {
|
|
119
|
+
// Logits aligned with EXPRESSIVE_EMOTION_TAGS:
|
|
120
|
+
// ["happy","sad","angry","nervous","calm","excited","whisper"]
|
|
121
|
+
const logits = new Float32Array([0, 0, 5, 0, 0, 0, 0]); // strong angry
|
|
122
|
+
const out = interpretCls7Output(logits, WAV2SMALL_INT8_MODEL_ID, 7);
|
|
123
|
+
expect(out.emotion).toBe("angry");
|
|
124
|
+
expect(out.confidence).toBeGreaterThan(0.9);
|
|
125
|
+
// All 7 score keys present, all probabilities in [0, 1], sum ≈ 1.
|
|
126
|
+
const tags = [
|
|
127
|
+
"happy",
|
|
128
|
+
"sad",
|
|
129
|
+
"angry",
|
|
130
|
+
"nervous",
|
|
131
|
+
"calm",
|
|
132
|
+
"excited",
|
|
133
|
+
"whisper",
|
|
134
|
+
] as const;
|
|
135
|
+
let total = 0;
|
|
136
|
+
for (const tag of tags) {
|
|
137
|
+
expect(out.scores[tag]).toBeGreaterThanOrEqual(0);
|
|
138
|
+
expect(out.scores[tag]).toBeLessThanOrEqual(1);
|
|
139
|
+
total += out.scores[tag];
|
|
140
|
+
}
|
|
141
|
+
expect(total).toBeGreaterThan(0.999);
|
|
142
|
+
expect(total).toBeLessThan(1.001);
|
|
143
|
+
// V-A-D is the neutral midpoint for cls7 (no V-A-D head).
|
|
144
|
+
expect(out.vad.valence).toBeCloseTo(0.5, 5);
|
|
145
|
+
expect(out.vad.arousal).toBeCloseTo(0.5, 5);
|
|
146
|
+
expect(out.vad.dominance).toBeCloseTo(0.5, 5);
|
|
147
|
+
expect(out.modelId).toBe(WAV2SMALL_INT8_MODEL_ID);
|
|
148
|
+
expect(out.latencyMs).toBe(7);
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
it("equal logits return a near-uniform distribution", () => {
|
|
152
|
+
const logits = new Float32Array([1, 1, 1, 1, 1, 1, 1]);
|
|
153
|
+
const out = interpretCls7Output(logits, WAV2SMALL_INT8_MODEL_ID, 0);
|
|
154
|
+
// First-index wins on ties (happy at index 0).
|
|
155
|
+
expect(out.emotion).toBe("happy");
|
|
156
|
+
// Uniform distribution gives every class probability ≈ 1/7.
|
|
157
|
+
expect(out.confidence).toBeCloseTo(1 / 7, 5);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it("falls back to abstain when all logits are non-finite", () => {
|
|
161
|
+
const logits = new Float32Array([
|
|
162
|
+
Number.NaN,
|
|
163
|
+
Number.NaN,
|
|
164
|
+
Number.NaN,
|
|
165
|
+
Number.NaN,
|
|
166
|
+
Number.NaN,
|
|
167
|
+
Number.NaN,
|
|
168
|
+
Number.NaN,
|
|
169
|
+
]);
|
|
170
|
+
const out = interpretCls7Output(logits, WAV2SMALL_INT8_MODEL_ID, 0);
|
|
171
|
+
expect(out.emotion).toBeNull();
|
|
172
|
+
expect(out.confidence).toBe(0);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it("rejects logit arrays of the wrong length", () => {
|
|
176
|
+
const wrong = new Float32Array([0, 0, 0]); // V-A-D-sized
|
|
177
|
+
expect(() =>
|
|
178
|
+
interpretCls7Output(wrong, WAV2SMALL_INT8_MODEL_ID, 0),
|
|
179
|
+
).toThrow(VoiceEmotionClassifierError);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("respects the EXPRESSIVE_EMOTION_TAGS class order", () => {
|
|
183
|
+
// Pick the highest logit at each index and confirm we get that tag back.
|
|
184
|
+
const order = [
|
|
185
|
+
"happy",
|
|
186
|
+
"sad",
|
|
187
|
+
"angry",
|
|
188
|
+
"nervous",
|
|
189
|
+
"calm",
|
|
190
|
+
"excited",
|
|
191
|
+
"whisper",
|
|
192
|
+
] as const;
|
|
193
|
+
for (let i = 0; i < order.length; i++) {
|
|
194
|
+
const logits = new Float32Array([0, 0, 0, 0, 0, 0, 0]);
|
|
195
|
+
logits[i] = 5;
|
|
196
|
+
const out = interpretCls7Output(logits, WAV2SMALL_INT8_MODEL_ID, 0);
|
|
197
|
+
expect(out.emotion).toBe(order[i]);
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
describe("model id constants", () => {
|
|
203
|
+
it("WAV2SMALL_INT8_MODEL_ID is the expected stable string", () => {
|
|
204
|
+
expect(WAV2SMALL_INT8_MODEL_ID).toBe("wav2small-msp-dim-int8");
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it("WAV2SMALL_MIN_SAMPLES is at least one second at 16 kHz", () => {
|
|
208
|
+
expect(WAV2SMALL_MIN_SAMPLES).toBeGreaterThanOrEqual(WAV2SMALL_SAMPLE_RATE);
|
|
209
|
+
});
|
|
210
|
+
});
|