@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,844 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end voice-loop latency tracing.
|
|
3
|
+
*
|
|
4
|
+
* One `LatencyTrace` per voice turn — a span recorder with named
|
|
5
|
+
* checkpoints from "user makes a sound" to "agent's first audio plays".
|
|
6
|
+
* The checkpoint set is fixed (`VOICE_CHECKPOINTS`) and ordered; each
|
|
7
|
+
* checkpoint is recorded at most once per turn. Missing checkpoints are
|
|
8
|
+
* surfaced as missing-checkpoint state — never synthesized — and derived
|
|
9
|
+
* metrics that depend on a missing checkpoint stay `null` (AGENTS.md §3 / §7:
|
|
10
|
+
* a missing measurement is recorded as missing, not faked).
|
|
11
|
+
*
|
|
12
|
+
* Ownership / lifecycle:
|
|
13
|
+
* - The turn controller (`voice/turn-controller.ts`, W9) is the natural
|
|
14
|
+
* owner of the per-turn tracer: it calls `tracer.beginTurn({...})` when
|
|
15
|
+
* a turn opens and `tracer.endTurn(turnId)` when it finalizes/aborts.
|
|
16
|
+
* Until that lands, callers can use the module-level
|
|
17
|
+
* `voiceLatencyTracer` singleton + the `markVoiceLatency()` helper —
|
|
18
|
+
* the singleton lazily opens a turn keyed by `roomId` on first mark.
|
|
19
|
+
* - Components that produce a checkpoint either (a) hold a `tracer` and
|
|
20
|
+
* call `tracer.mark(turnId, checkpoint)`, or (b) call the context-free
|
|
21
|
+
* `markVoiceLatency(roomId, checkpoint)` helper. `bindVadDetector()`
|
|
22
|
+
* bridges a `VadEventSource` onto the tracer without touching `vad.ts`.
|
|
23
|
+
*
|
|
24
|
+
* Hook points (where each checkpoint is meant to be recorded):
|
|
25
|
+
* - `peer-utterance-end` — (DUET ONLY) the producing agent's
|
|
26
|
+
* scheduler drained its last PCM chunk into
|
|
27
|
+
* the cross ring — the headline `t0` for a
|
|
28
|
+
* two-agents-talking run (`voice-duet.mjs`).
|
|
29
|
+
* Not recorded in the single-agent path.
|
|
30
|
+
* - `vad-trigger` — `VadDetector` energy-rise edge / the
|
|
31
|
+
* turn controller's wake instant.
|
|
32
|
+
* - `vad-speech-start` — `VadDetector` Silero speech-start.
|
|
33
|
+
* - `prewarm-fired` — the turn controller (W9) when it calls
|
|
34
|
+
* W6's `prewarmConversation`.
|
|
35
|
+
* - `asr-first-partial` — `StreamingTranscriber` first `partial`.
|
|
36
|
+
* - `asr-final` — `StreamingTranscriber` `final`.
|
|
37
|
+
* - `llm-first-token` — the engine generate path's first
|
|
38
|
+
* `onTextChunk` (W4).
|
|
39
|
+
* - `llm-first-replytext-char` — `StructuredFieldStreamExtractor`'s
|
|
40
|
+
* `onFieldStart("replyText")`.
|
|
41
|
+
* - `replyText-first-emotion-tag` — the field extractor / `parseExpressiveTags`
|
|
42
|
+
* on the first inline expressive tag (`[happy]`
|
|
43
|
+
* …) in `replyText` — emotion-markup overhead,
|
|
44
|
+
* measured the way `envelopeToReplyTextMs`
|
|
45
|
+
* measures envelope overhead.
|
|
46
|
+
* - `phrase-1-to-tts` — the scheduler/chunker (W9) on the first
|
|
47
|
+
* phrase handed to the TTS backend.
|
|
48
|
+
* - `tts-first-audio-chunk` — the TTS backend's first PCM chunk (W7).
|
|
49
|
+
* - `audio-first-played` — the audio sink on the first written
|
|
50
|
+
* sample (W9/W13) — single-agent path.
|
|
51
|
+
* - `audio-first-into-peer-ring` — (DUET ONLY) the responding agent's first
|
|
52
|
+
* TTS PCM chunk landed in the peer's ring
|
|
53
|
+
* (the duet replacement for
|
|
54
|
+
* `audio-first-played` — no speakers).
|
|
55
|
+
*
|
|
56
|
+
* Logger only, `[LatencyTracer]` prefix (AGENTS.md §9).
|
|
57
|
+
*/
|
|
58
|
+
|
|
59
|
+
import { logger } from "@elizaos/core";
|
|
60
|
+
import type { VadEvent, VadEventSource } from "./voice/types";
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// Checkpoint set (ordered)
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* The fixed, ordered set of latency checkpoints. The recorder enforces the
|
|
68
|
+
* order is non-decreasing in wall-clock terms only loosely — a checkpoint
|
|
69
|
+
* arriving "out of order" (a later checkpoint with an earlier timestamp) is
|
|
70
|
+
* recorded as-is and flagged; we never reorder or clamp.
|
|
71
|
+
*/
|
|
72
|
+
export const VOICE_CHECKPOINTS = [
|
|
73
|
+
"peer-utterance-end",
|
|
74
|
+
"vad-trigger",
|
|
75
|
+
"vad-speech-start",
|
|
76
|
+
"prewarm-fired",
|
|
77
|
+
"asr-first-partial",
|
|
78
|
+
"asr-final",
|
|
79
|
+
"llm-first-token",
|
|
80
|
+
"llm-first-replytext-char",
|
|
81
|
+
"replyText-first-emotion-tag",
|
|
82
|
+
"phrase-1-to-tts",
|
|
83
|
+
"tts-first-audio-chunk",
|
|
84
|
+
"audio-first-played",
|
|
85
|
+
"audio-first-into-peer-ring",
|
|
86
|
+
] as const;
|
|
87
|
+
|
|
88
|
+
export type VoiceCheckpoint = (typeof VOICE_CHECKPOINTS)[number];
|
|
89
|
+
|
|
90
|
+
const CHECKPOINT_ORDER: Readonly<Record<VoiceCheckpoint, number>> =
|
|
91
|
+
Object.fromEntries(VOICE_CHECKPOINTS.map((c, i) => [c, i])) as Record<
|
|
92
|
+
VoiceCheckpoint,
|
|
93
|
+
number
|
|
94
|
+
>;
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Checkpoints that only appear in specific run shapes — `peer-utterance-end`
|
|
98
|
+
* and `audio-first-into-peer-ring` are recorded only by the two-agents duet
|
|
99
|
+
* harness; `replyText-first-emotion-tag` only when the model emits an inline
|
|
100
|
+
* expressive tag. Their absence does NOT make a trace missing-checkpoint (a
|
|
101
|
+
* single-agent voice turn is "complete" without them); they are still listed
|
|
102
|
+
* in `missing` so the duet harness can see which ones it didn't get.
|
|
103
|
+
*/
|
|
104
|
+
const OPTIONAL_CHECKPOINTS: ReadonlySet<VoiceCheckpoint> = new Set([
|
|
105
|
+
"peer-utterance-end",
|
|
106
|
+
"replyText-first-emotion-tag",
|
|
107
|
+
"audio-first-into-peer-ring",
|
|
108
|
+
]);
|
|
109
|
+
|
|
110
|
+
/** The single-agent "core" checkpoint set — every checkpoint that is NOT
|
|
111
|
+
* optional. A trace is `complete` iff every core checkpoint was recorded. */
|
|
112
|
+
export const CORE_VOICE_CHECKPOINTS = VOICE_CHECKPOINTS.filter(
|
|
113
|
+
(c) => !OPTIONAL_CHECKPOINTS.has(c),
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// Derived metrics
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Derived per-turn metrics. Every field is the duration between two
|
|
122
|
+
* checkpoints; `null` whenever either endpoint checkpoint is missing for
|
|
123
|
+
* the turn — there is no fallback estimate.
|
|
124
|
+
*/
|
|
125
|
+
export interface LatencyDerived {
|
|
126
|
+
/** vad-trigger → llm-first-token (time-to-first-token). */
|
|
127
|
+
ttftMs: number | null;
|
|
128
|
+
/** vad-trigger → tts-first-audio-chunk (time-to-first-audio). */
|
|
129
|
+
ttfaMs: number | null;
|
|
130
|
+
/** vad-trigger → audio-first-played (time-to-audio-played; the headline). */
|
|
131
|
+
ttapMs: number | null;
|
|
132
|
+
/** vad-speech-start → asr-final (ASR finalization latency). */
|
|
133
|
+
asrFinalLatencyMs: number | null;
|
|
134
|
+
/** vad-trigger → asr-first-partial (how fast the first words appear). */
|
|
135
|
+
asrFirstPartialMs: number | null;
|
|
136
|
+
/** vad-trigger → prewarm-fired (how fast the prewarm kicks off). */
|
|
137
|
+
prewarmLatencyMs: number | null;
|
|
138
|
+
/** asr-final → llm-first-token (LLM latency once the prompt is complete). */
|
|
139
|
+
llmFirstTokenAfterAsrMs: number | null;
|
|
140
|
+
/** llm-first-token → llm-first-replytext-char (envelope-skip overhead). */
|
|
141
|
+
envelopeToReplyTextMs: number | null;
|
|
142
|
+
/** llm-first-replytext-char → phrase-1-to-tts (chunker hand-off lag). */
|
|
143
|
+
replyTextToPhrase1Ms: number | null;
|
|
144
|
+
/** phrase-1-to-tts → tts-first-audio-chunk (TTS first-chunk latency). */
|
|
145
|
+
ttsFirstChunkMs: number | null;
|
|
146
|
+
/** tts-first-audio-chunk → audio-first-played (sink/playback lag). */
|
|
147
|
+
audioSinkLatencyMs: number | null;
|
|
148
|
+
// ── Duet (cross-agent) spans — `null` outside the duet harness. ──────────
|
|
149
|
+
/**
|
|
150
|
+
* peer-utterance-end → llm-first-token — **THE headline number** for the
|
|
151
|
+
* two-agents-talking benchmark: how long after the peer stopped speaking
|
|
152
|
+
* the responding agent emits its first token (TTFT-from-last-utterance).
|
|
153
|
+
*/
|
|
154
|
+
ttftFromUtteranceEndMs: number | null;
|
|
155
|
+
/** peer-utterance-end → llm-first-replytext-char. */
|
|
156
|
+
replyTextFirstCharFromUtteranceEndMs: number | null;
|
|
157
|
+
/** peer-utterance-end → tts-first-audio-chunk. */
|
|
158
|
+
firstTtsPcmFromUtteranceEndMs: number | null;
|
|
159
|
+
/**
|
|
160
|
+
* peer-utterance-end → audio-first-into-peer-ring — the **duet round-trip**:
|
|
161
|
+
* peer stops speaking → responding agent's first audio is back in the
|
|
162
|
+
* peer's ear (the `duet_round_trip_ms` gate reads `.p50` of this).
|
|
163
|
+
*/
|
|
164
|
+
firstAudioIntoPeerRingFromUtteranceEndMs: number | null;
|
|
165
|
+
/** llm-first-token → replyText-first-emotion-tag (emotion-markup overhead);
|
|
166
|
+
* `null` when the model emitted no inline expressive tag. */
|
|
167
|
+
emotionTagOverheadMs: number | null;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/** The derived-metric keys, in display order. */
|
|
171
|
+
export const LATENCY_DERIVED_KEYS = [
|
|
172
|
+
"ttftFromUtteranceEndMs",
|
|
173
|
+
"firstAudioIntoPeerRingFromUtteranceEndMs",
|
|
174
|
+
"ttftMs",
|
|
175
|
+
"ttfaMs",
|
|
176
|
+
"ttapMs",
|
|
177
|
+
"asrFinalLatencyMs",
|
|
178
|
+
"asrFirstPartialMs",
|
|
179
|
+
"prewarmLatencyMs",
|
|
180
|
+
"llmFirstTokenAfterAsrMs",
|
|
181
|
+
"envelopeToReplyTextMs",
|
|
182
|
+
"emotionTagOverheadMs",
|
|
183
|
+
"replyTextToPhrase1Ms",
|
|
184
|
+
"ttsFirstChunkMs",
|
|
185
|
+
"audioSinkLatencyMs",
|
|
186
|
+
"ttftFromUtteranceEndMs",
|
|
187
|
+
"replyTextFirstCharFromUtteranceEndMs",
|
|
188
|
+
"firstTtsPcmFromUtteranceEndMs",
|
|
189
|
+
"firstAudioIntoPeerRingFromUtteranceEndMs",
|
|
190
|
+
"emotionTagOverheadMs",
|
|
191
|
+
] as const satisfies ReadonlyArray<keyof LatencyDerived>;
|
|
192
|
+
|
|
193
|
+
export type LatencyDerivedKey = (typeof LATENCY_DERIVED_KEYS)[number];
|
|
194
|
+
|
|
195
|
+
const DERIVED_SPANS: Readonly<
|
|
196
|
+
Record<LatencyDerivedKey, readonly [VoiceCheckpoint, VoiceCheckpoint]>
|
|
197
|
+
> = {
|
|
198
|
+
ttftMs: ["vad-trigger", "llm-first-token"],
|
|
199
|
+
ttfaMs: ["vad-trigger", "tts-first-audio-chunk"],
|
|
200
|
+
ttapMs: ["vad-trigger", "audio-first-played"],
|
|
201
|
+
asrFinalLatencyMs: ["vad-speech-start", "asr-final"],
|
|
202
|
+
asrFirstPartialMs: ["vad-trigger", "asr-first-partial"],
|
|
203
|
+
prewarmLatencyMs: ["vad-trigger", "prewarm-fired"],
|
|
204
|
+
llmFirstTokenAfterAsrMs: ["asr-final", "llm-first-token"],
|
|
205
|
+
envelopeToReplyTextMs: ["llm-first-token", "llm-first-replytext-char"],
|
|
206
|
+
replyTextToPhrase1Ms: ["llm-first-replytext-char", "phrase-1-to-tts"],
|
|
207
|
+
ttsFirstChunkMs: ["phrase-1-to-tts", "tts-first-audio-chunk"],
|
|
208
|
+
audioSinkLatencyMs: ["tts-first-audio-chunk", "audio-first-played"],
|
|
209
|
+
ttftFromUtteranceEndMs: ["peer-utterance-end", "llm-first-token"],
|
|
210
|
+
replyTextFirstCharFromUtteranceEndMs: [
|
|
211
|
+
"peer-utterance-end",
|
|
212
|
+
"llm-first-replytext-char",
|
|
213
|
+
],
|
|
214
|
+
firstTtsPcmFromUtteranceEndMs: [
|
|
215
|
+
"peer-utterance-end",
|
|
216
|
+
"tts-first-audio-chunk",
|
|
217
|
+
],
|
|
218
|
+
firstAudioIntoPeerRingFromUtteranceEndMs: [
|
|
219
|
+
"peer-utterance-end",
|
|
220
|
+
"audio-first-into-peer-ring",
|
|
221
|
+
],
|
|
222
|
+
emotionTagOverheadMs: ["llm-first-token", "replyText-first-emotion-tag"],
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
// ---------------------------------------------------------------------------
|
|
226
|
+
// Trace shape
|
|
227
|
+
// ---------------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
export interface LatencyCheckpoint {
|
|
230
|
+
name: VoiceCheckpoint;
|
|
231
|
+
/** Wall-clock ms since the turn's `t0` (the first checkpoint recorded). */
|
|
232
|
+
tMs: number;
|
|
233
|
+
/** Absolute epoch ms when the checkpoint was recorded. */
|
|
234
|
+
atEpochMs: number;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
export interface LatencyTrace {
|
|
238
|
+
turnId: string;
|
|
239
|
+
roomId: string | null;
|
|
240
|
+
/** Epoch ms of the first checkpoint recorded for this turn (the t=0 ref). */
|
|
241
|
+
t0EpochMs: number;
|
|
242
|
+
/** Epoch ms when `endTurn` was called, or null while still open. */
|
|
243
|
+
closedAtEpochMs: number | null;
|
|
244
|
+
checkpoints: LatencyCheckpoint[];
|
|
245
|
+
derived: LatencyDerived;
|
|
246
|
+
/** Names of checkpoints that were never recorded for this turn. */
|
|
247
|
+
missing: VoiceCheckpoint[];
|
|
248
|
+
/** True when every checkpoint in `VOICE_CHECKPOINTS` was recorded. */
|
|
249
|
+
complete: boolean;
|
|
250
|
+
/**
|
|
251
|
+
* Non-empty when the recorder saw something it could not reconcile —
|
|
252
|
+
* a duplicate mark, an out-of-order timestamp, an unknown checkpoint.
|
|
253
|
+
* Diagnostic only; the trace is still emitted.
|
|
254
|
+
*/
|
|
255
|
+
anomalies: string[];
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// ---------------------------------------------------------------------------
|
|
259
|
+
// Histograms
|
|
260
|
+
// ---------------------------------------------------------------------------
|
|
261
|
+
|
|
262
|
+
export interface HistogramSummary {
|
|
263
|
+
count: number;
|
|
264
|
+
p50: number | null;
|
|
265
|
+
p90: number | null;
|
|
266
|
+
p99: number | null;
|
|
267
|
+
min: number | null;
|
|
268
|
+
max: number | null;
|
|
269
|
+
mean: number | null;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Bounded-sample running histogram for one derived metric. Keeps the last
|
|
274
|
+
* `capacity` samples (FIFO) and computes percentiles on demand. Bounded so
|
|
275
|
+
* a long-running process does not grow without limit.
|
|
276
|
+
*
|
|
277
|
+
* Exported so sibling accumulators (e.g. the Mobile Resource Workbench's
|
|
278
|
+
* `DeviceResourceMetrics`) reuse the same percentile logic instead of
|
|
279
|
+
* re-implementing it.
|
|
280
|
+
*/
|
|
281
|
+
export class BoundedHistogram {
|
|
282
|
+
private readonly samples: number[] = [];
|
|
283
|
+
constructor(private readonly capacity: number) {}
|
|
284
|
+
|
|
285
|
+
add(value: number): void {
|
|
286
|
+
if (!Number.isFinite(value)) return;
|
|
287
|
+
this.samples.push(value);
|
|
288
|
+
if (this.samples.length > this.capacity) this.samples.shift();
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
summary(): HistogramSummary {
|
|
292
|
+
const n = this.samples.length;
|
|
293
|
+
if (n === 0) {
|
|
294
|
+
return {
|
|
295
|
+
count: 0,
|
|
296
|
+
p50: null,
|
|
297
|
+
p90: null,
|
|
298
|
+
p99: null,
|
|
299
|
+
min: null,
|
|
300
|
+
max: null,
|
|
301
|
+
mean: null,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
const sorted = [...this.samples].sort((a, b) => a - b);
|
|
305
|
+
const pct = (p: number): number => {
|
|
306
|
+
// Nearest-rank percentile on the sorted sample.
|
|
307
|
+
const rank = Math.ceil((p / 100) * n);
|
|
308
|
+
const idx = Math.min(n - 1, Math.max(0, rank - 1));
|
|
309
|
+
return sorted[idx] as number;
|
|
310
|
+
};
|
|
311
|
+
const sum = sorted.reduce((acc, v) => acc + v, 0);
|
|
312
|
+
return {
|
|
313
|
+
count: n,
|
|
314
|
+
p50: pct(50),
|
|
315
|
+
p90: pct(90),
|
|
316
|
+
p99: pct(99),
|
|
317
|
+
min: sorted[0] as number,
|
|
318
|
+
max: sorted[n - 1] as number,
|
|
319
|
+
mean: sum / n,
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// ---------------------------------------------------------------------------
|
|
325
|
+
// Tracer
|
|
326
|
+
// ---------------------------------------------------------------------------
|
|
327
|
+
|
|
328
|
+
export interface TracerOptions {
|
|
329
|
+
/** Max number of completed traces to retain in the ring. Default 64. */
|
|
330
|
+
ringCapacity?: number;
|
|
331
|
+
/** Max samples per derived-metric histogram. Default 256. */
|
|
332
|
+
histogramCapacity?: number;
|
|
333
|
+
/**
|
|
334
|
+
* Max number of concurrently-open turns. A new `beginTurn` past this cap
|
|
335
|
+
* evicts the oldest still-open turn (it is closed and emitted with whatever
|
|
336
|
+
* checkpoints it had). Guards against a leaked turn never being closed.
|
|
337
|
+
* Default 16.
|
|
338
|
+
*/
|
|
339
|
+
maxOpenTurns?: number;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
interface OpenTurn {
|
|
343
|
+
turnId: string;
|
|
344
|
+
roomId: string | null;
|
|
345
|
+
t0EpochMs: number | null;
|
|
346
|
+
/** name -> atEpochMs for recorded checkpoints. */
|
|
347
|
+
marks: Map<VoiceCheckpoint, number>;
|
|
348
|
+
anomalies: string[];
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
let TURN_COUNTER = 0;
|
|
352
|
+
function nextTurnId(): string {
|
|
353
|
+
TURN_COUNTER += 1;
|
|
354
|
+
return `vt-${Date.now().toString(36)}-${TURN_COUNTER.toString(36)}`;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
export class EndToEndLatencyTracer {
|
|
358
|
+
private readonly ring: LatencyTrace[] = [];
|
|
359
|
+
private readonly open = new Map<string, OpenTurn>();
|
|
360
|
+
private readonly byRoom = new Map<string, string>();
|
|
361
|
+
private readonly histograms = new Map<LatencyDerivedKey, BoundedHistogram>();
|
|
362
|
+
private readonly ringCapacity: number;
|
|
363
|
+
private readonly histogramCapacity: number;
|
|
364
|
+
private readonly maxOpenTurns: number;
|
|
365
|
+
|
|
366
|
+
constructor(opts: TracerOptions = {}) {
|
|
367
|
+
this.ringCapacity = Math.max(1, opts.ringCapacity ?? 64);
|
|
368
|
+
this.histogramCapacity = Math.max(1, opts.histogramCapacity ?? 256);
|
|
369
|
+
this.maxOpenTurns = Math.max(1, opts.maxOpenTurns ?? 16);
|
|
370
|
+
for (const key of LATENCY_DERIVED_KEYS) {
|
|
371
|
+
this.histograms.set(key, new BoundedHistogram(this.histogramCapacity));
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Open a new turn. Returns the `turnId`. If `roomId` is given, subsequent
|
|
377
|
+
* context-free marks for that room route to this turn until it is closed.
|
|
378
|
+
*/
|
|
379
|
+
beginTurn(args: { turnId?: string; roomId?: string | null } = {}): string {
|
|
380
|
+
const turnId = args.turnId ?? nextTurnId();
|
|
381
|
+
if (this.open.has(turnId)) return turnId;
|
|
382
|
+
if (this.open.size >= this.maxOpenTurns) {
|
|
383
|
+
// Evict the oldest open turn — better to emit a partial trace than to
|
|
384
|
+
// leak. `open` preserves insertion order.
|
|
385
|
+
const oldest = this.open.keys().next().value as string | undefined;
|
|
386
|
+
if (oldest) {
|
|
387
|
+
logger.warn(
|
|
388
|
+
`[LatencyTracer] evicting stale open turn ${oldest} (maxOpenTurns=${this.maxOpenTurns})`,
|
|
389
|
+
);
|
|
390
|
+
this.endTurn(oldest);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
const roomId = args.roomId ?? null;
|
|
394
|
+
this.open.set(turnId, {
|
|
395
|
+
turnId,
|
|
396
|
+
roomId,
|
|
397
|
+
t0EpochMs: null,
|
|
398
|
+
marks: new Map(),
|
|
399
|
+
anomalies: [],
|
|
400
|
+
});
|
|
401
|
+
if (roomId) this.byRoom.set(roomId, turnId);
|
|
402
|
+
return turnId;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/** Resolve (or lazily open) a turn for a roomId. Used by the helper. */
|
|
406
|
+
turnForRoom(roomId: string): string {
|
|
407
|
+
const existing = this.byRoom.get(roomId);
|
|
408
|
+
if (existing && this.open.has(existing)) return existing;
|
|
409
|
+
return this.beginTurn({ roomId });
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Record a checkpoint on an open turn. No-op (with a warning) if the turn
|
|
414
|
+
* is unknown or already closed — a late mark on a finalized turn is a
|
|
415
|
+
* caller bug, not something to retroactively patch into history.
|
|
416
|
+
*/
|
|
417
|
+
mark(turnId: string, checkpoint: VoiceCheckpoint, atEpochMs?: number): void {
|
|
418
|
+
if (!VOICE_CHECKPOINTS.includes(checkpoint)) {
|
|
419
|
+
logger.warn(`[LatencyTracer] unknown checkpoint "${checkpoint}" ignored`);
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
422
|
+
const turn = this.open.get(turnId);
|
|
423
|
+
if (!turn) {
|
|
424
|
+
logger.warn(
|
|
425
|
+
`[LatencyTracer] mark("${checkpoint}") for unknown/closed turn ${turnId} ignored`,
|
|
426
|
+
);
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
const now = atEpochMs ?? Date.now();
|
|
430
|
+
if (turn.t0EpochMs === null) turn.t0EpochMs = now;
|
|
431
|
+
if (turn.marks.has(checkpoint)) {
|
|
432
|
+
turn.anomalies.push(
|
|
433
|
+
`duplicate mark for "${checkpoint}" (kept first, ignored ${now})`,
|
|
434
|
+
);
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
// Out-of-order detection: a checkpoint with a lower order index but a
|
|
438
|
+
// later timestamp than an already-recorded later checkpoint. Recorded
|
|
439
|
+
// as-is; flagged.
|
|
440
|
+
const order = CHECKPOINT_ORDER[checkpoint];
|
|
441
|
+
for (const [seen, at] of turn.marks) {
|
|
442
|
+
if (CHECKPOINT_ORDER[seen] > order && at < now) {
|
|
443
|
+
turn.anomalies.push(
|
|
444
|
+
`"${checkpoint}" recorded after later checkpoint "${seen}" (clock skew?)`,
|
|
445
|
+
);
|
|
446
|
+
break;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
turn.marks.set(checkpoint, now);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
/** Convenience: mark a checkpoint by roomId, opening a turn if needed. */
|
|
453
|
+
markByRoom(
|
|
454
|
+
roomId: string,
|
|
455
|
+
checkpoint: VoiceCheckpoint,
|
|
456
|
+
atEpochMs?: number,
|
|
457
|
+
): void {
|
|
458
|
+
this.mark(this.turnForRoom(roomId), checkpoint, atEpochMs);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* Close an open turn: snapshot it into a `LatencyTrace`, push to the ring
|
|
463
|
+
* (evicting the oldest), and fold its derived metrics into the histograms.
|
|
464
|
+
* Idempotent for an unknown turnId. Returns the emitted trace (or null if
|
|
465
|
+
* the turn was unknown).
|
|
466
|
+
*/
|
|
467
|
+
endTurn(turnId: string): LatencyTrace | null {
|
|
468
|
+
const turn = this.open.get(turnId);
|
|
469
|
+
if (!turn) return null;
|
|
470
|
+
this.open.delete(turnId);
|
|
471
|
+
if (turn.roomId && this.byRoom.get(turn.roomId) === turnId) {
|
|
472
|
+
this.byRoom.delete(turn.roomId);
|
|
473
|
+
}
|
|
474
|
+
const trace = this.snapshotTurn(turn, Date.now());
|
|
475
|
+
this.ring.push(trace);
|
|
476
|
+
while (this.ring.length > this.ringCapacity) this.ring.shift();
|
|
477
|
+
for (const key of LATENCY_DERIVED_KEYS) {
|
|
478
|
+
const v = trace.derived[key];
|
|
479
|
+
if (v !== null) this.histograms.get(key)?.add(v);
|
|
480
|
+
}
|
|
481
|
+
return trace;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/** A read-only snapshot of an open turn (does not close it). */
|
|
485
|
+
peekTurn(turnId: string): LatencyTrace | null {
|
|
486
|
+
const turn = this.open.get(turnId);
|
|
487
|
+
if (!turn) return null;
|
|
488
|
+
return this.snapshotTurn(turn, null);
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/** The most recent `n` completed traces, newest last. */
|
|
492
|
+
recentTraces(n = this.ringCapacity): LatencyTrace[] {
|
|
493
|
+
if (n >= this.ring.length) return [...this.ring];
|
|
494
|
+
return this.ring.slice(this.ring.length - n);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/** Per-derived-metric histogram summaries over the retained sample. */
|
|
498
|
+
histogramSummaries(): Record<LatencyDerivedKey, HistogramSummary> {
|
|
499
|
+
const out = {} as Record<LatencyDerivedKey, HistogramSummary>;
|
|
500
|
+
for (const key of LATENCY_DERIVED_KEYS) {
|
|
501
|
+
out[key] = this.histograms.get(key)?.summary() ?? {
|
|
502
|
+
count: 0,
|
|
503
|
+
p50: null,
|
|
504
|
+
p90: null,
|
|
505
|
+
p99: null,
|
|
506
|
+
min: null,
|
|
507
|
+
max: null,
|
|
508
|
+
mean: null,
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
return out;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/** Drop all retained traces, histograms, and open turns. */
|
|
515
|
+
reset(): void {
|
|
516
|
+
this.ring.length = 0;
|
|
517
|
+
this.open.clear();
|
|
518
|
+
this.byRoom.clear();
|
|
519
|
+
for (const key of LATENCY_DERIVED_KEYS) {
|
|
520
|
+
this.histograms.set(key, new BoundedHistogram(this.histogramCapacity));
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/** Number of turns currently open (un-closed). */
|
|
525
|
+
get openTurnCount(): number {
|
|
526
|
+
return this.open.size;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/**
|
|
530
|
+
* Bridge a VAD event source onto this tracer: subscribes to the
|
|
531
|
+
* `VadEvent` stream and emits `vad-trigger` + `vad-speech-start` on the
|
|
532
|
+
* Silero rising edge (the earliest reliable per-turn `t0`). Returns the
|
|
533
|
+
* unsubscribe function. This is the documented seam that lets the tracer
|
|
534
|
+
* hook the VAD without editing `voice/vad.ts` — the true energy-rise
|
|
535
|
+
* "wake" instant is owned by the turn controller (W9), which calls
|
|
536
|
+
* `mark(turnId, "vad-trigger")` directly; this bridge is the fallback for
|
|
537
|
+
* plain VAD-only setups.
|
|
538
|
+
*/
|
|
539
|
+
bindVadDetector(
|
|
540
|
+
source: VadEventSource,
|
|
541
|
+
args: {
|
|
542
|
+
roomId?: string | null;
|
|
543
|
+
onTurnOpen?: (turnId: string) => void;
|
|
544
|
+
} = {},
|
|
545
|
+
): () => void {
|
|
546
|
+
const handler = (event: VadEvent): void => {
|
|
547
|
+
if (event.type === "speech-start") {
|
|
548
|
+
const turnId = this.beginTurn({ roomId: args.roomId ?? null });
|
|
549
|
+
this.mark(turnId, "vad-trigger", event.timestampMs || undefined);
|
|
550
|
+
this.mark(turnId, "vad-speech-start", event.timestampMs || undefined);
|
|
551
|
+
args.onTurnOpen?.(turnId);
|
|
552
|
+
}
|
|
553
|
+
};
|
|
554
|
+
return source.onVadEvent(handler);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// -------------------------------------------------------------------------
|
|
558
|
+
// Internal
|
|
559
|
+
// -------------------------------------------------------------------------
|
|
560
|
+
|
|
561
|
+
private snapshotTurn(
|
|
562
|
+
turn: OpenTurn,
|
|
563
|
+
closedAtEpochMs: number | null,
|
|
564
|
+
): LatencyTrace {
|
|
565
|
+
const t0 = turn.t0EpochMs ?? closedAtEpochMs ?? Date.now();
|
|
566
|
+
const checkpoints: LatencyCheckpoint[] = [];
|
|
567
|
+
for (const name of VOICE_CHECKPOINTS) {
|
|
568
|
+
const at = turn.marks.get(name);
|
|
569
|
+
if (at === undefined) continue;
|
|
570
|
+
checkpoints.push({ name, atEpochMs: at, tMs: at - t0 });
|
|
571
|
+
}
|
|
572
|
+
checkpoints.sort((a, b) => a.atEpochMs - b.atEpochMs);
|
|
573
|
+
const missing = VOICE_CHECKPOINTS.filter((c) => !turn.marks.has(c));
|
|
574
|
+
// "Complete" = every *core* (non-optional) checkpoint recorded — a
|
|
575
|
+
// single-agent voice turn is complete without the duet-only / emotion-tag
|
|
576
|
+
// checkpoints.
|
|
577
|
+
const coreMissing = CORE_VOICE_CHECKPOINTS.some((c) => !turn.marks.has(c));
|
|
578
|
+
return {
|
|
579
|
+
turnId: turn.turnId,
|
|
580
|
+
roomId: turn.roomId,
|
|
581
|
+
t0EpochMs: t0,
|
|
582
|
+
closedAtEpochMs,
|
|
583
|
+
checkpoints,
|
|
584
|
+
derived: this.computeDerived(turn.marks),
|
|
585
|
+
missing,
|
|
586
|
+
complete: !coreMissing,
|
|
587
|
+
anomalies: [...turn.anomalies],
|
|
588
|
+
};
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
private computeDerived(marks: Map<VoiceCheckpoint, number>): LatencyDerived {
|
|
592
|
+
const span = (
|
|
593
|
+
from: VoiceCheckpoint,
|
|
594
|
+
to: VoiceCheckpoint,
|
|
595
|
+
): number | null => {
|
|
596
|
+
const a = marks.get(from);
|
|
597
|
+
const b = marks.get(to);
|
|
598
|
+
if (a === undefined || b === undefined) return null;
|
|
599
|
+
return b - a;
|
|
600
|
+
};
|
|
601
|
+
const out = {} as LatencyDerived;
|
|
602
|
+
for (const key of LATENCY_DERIVED_KEYS) {
|
|
603
|
+
const [from, to] = DERIVED_SPANS[key];
|
|
604
|
+
out[key] = span(from, to);
|
|
605
|
+
}
|
|
606
|
+
return out;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// ---------------------------------------------------------------------------
|
|
611
|
+
// Module-level singleton + context-free helper
|
|
612
|
+
// ---------------------------------------------------------------------------
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Process-wide tracer. The turn controller (W9) owns per-turn lifecycle
|
|
616
|
+
* via `beginTurn` / `endTurn`; components that only know a `roomId` use
|
|
617
|
+
* `markVoiceLatency(roomId, checkpoint)` which routes through `markByRoom`.
|
|
618
|
+
* The dev endpoint (`GET /api/dev/voice-latency`) reads this singleton.
|
|
619
|
+
*/
|
|
620
|
+
export const voiceLatencyTracer = new EndToEndLatencyTracer();
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* Record a checkpoint on the process-wide tracer, keyed by `roomId`. Opens
|
|
624
|
+
* a turn for that room on first call. No-op-safe — instrumentation must
|
|
625
|
+
* never throw into the voice loop. This is the seam every component (VAD,
|
|
626
|
+
* turn controller, engine, field extractor, chunker, TTS backend, audio
|
|
627
|
+
* sink) can call without threading a tracer reference.
|
|
628
|
+
*/
|
|
629
|
+
export function markVoiceLatency(
|
|
630
|
+
roomId: string | null | undefined,
|
|
631
|
+
checkpoint: VoiceCheckpoint,
|
|
632
|
+
atEpochMs?: number,
|
|
633
|
+
): void {
|
|
634
|
+
try {
|
|
635
|
+
if (!roomId) {
|
|
636
|
+
// No room context — open an anonymous turn so the mark is not lost.
|
|
637
|
+
const turnId = voiceLatencyTracer.beginTurn({});
|
|
638
|
+
voiceLatencyTracer.mark(turnId, checkpoint, atEpochMs);
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
voiceLatencyTracer.markByRoom(roomId, checkpoint, atEpochMs);
|
|
642
|
+
} catch (err) {
|
|
643
|
+
logger.warn(
|
|
644
|
+
`[LatencyTracer] markVoiceLatency("${checkpoint}") failed: ${
|
|
645
|
+
err instanceof Error ? err.message : String(err)
|
|
646
|
+
}`,
|
|
647
|
+
);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
/** Close the process-wide tracer's turn for a roomId, returning the trace. */
|
|
652
|
+
export function endVoiceLatencyTurn(roomId: string): LatencyTrace | null {
|
|
653
|
+
try {
|
|
654
|
+
const turnId = voiceLatencyTracer.turnForRoom(roomId);
|
|
655
|
+
return voiceLatencyTracer.endTurn(turnId);
|
|
656
|
+
} catch (err) {
|
|
657
|
+
logger.warn(
|
|
658
|
+
`[LatencyTracer] endVoiceLatencyTurn(${roomId}) failed: ${
|
|
659
|
+
err instanceof Error ? err.message : String(err)
|
|
660
|
+
}`,
|
|
661
|
+
);
|
|
662
|
+
return null;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
// ---------------------------------------------------------------------------
|
|
667
|
+
// JSON payload for the dev endpoint
|
|
668
|
+
// ---------------------------------------------------------------------------
|
|
669
|
+
|
|
670
|
+
export interface VoiceLatencyDevPayload {
|
|
671
|
+
generatedAtEpochMs: number;
|
|
672
|
+
/** Checkpoint names, in canonical order — so consumers can render headers. */
|
|
673
|
+
checkpoints: ReadonlyArray<VoiceCheckpoint>;
|
|
674
|
+
derivedKeys: ReadonlyArray<LatencyDerivedKey>;
|
|
675
|
+
openTurnCount: number;
|
|
676
|
+
traces: LatencyTrace[];
|
|
677
|
+
histograms: Record<LatencyDerivedKey, HistogramSummary>;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
/** Build the JSON body for `GET /api/dev/voice-latency`. */
|
|
681
|
+
export function buildVoiceLatencyDevPayload(
|
|
682
|
+
tracer: EndToEndLatencyTracer = voiceLatencyTracer,
|
|
683
|
+
limit = 50,
|
|
684
|
+
): VoiceLatencyDevPayload {
|
|
685
|
+
return {
|
|
686
|
+
generatedAtEpochMs: Date.now(),
|
|
687
|
+
checkpoints: VOICE_CHECKPOINTS,
|
|
688
|
+
derivedKeys: LATENCY_DERIVED_KEYS,
|
|
689
|
+
openTurnCount: tracer.openTurnCount,
|
|
690
|
+
traces: tracer.recentTraces(limit),
|
|
691
|
+
histograms: tracer.histogramSummaries(),
|
|
692
|
+
};
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// ---------------------------------------------------------------------------
|
|
696
|
+
// VoiceRunMetrics — non-latency accumulator over a long voice run
|
|
697
|
+
// ---------------------------------------------------------------------------
|
|
698
|
+
|
|
699
|
+
/** A per-turn observation fed to `VoiceRunMetrics.recordTurn`. Every field is
|
|
700
|
+
* optional — a turn that couldn't measure a quantity records it as missing,
|
|
701
|
+
* never as a fabricated zero (AGENTS.md §3 / §7). */
|
|
702
|
+
export interface VoiceTurnMetrics {
|
|
703
|
+
/** MTP drafter token-acceptance rate (n_drafted_accepted / n_drafted)
|
|
704
|
+
* for this turn's generation, from the llama-server `/metrics` deltas. */
|
|
705
|
+
mtpAcceptRate?: number | null;
|
|
706
|
+
/** Tokens accepted from the drafter this turn (for an aggregate accept-rate
|
|
707
|
+
* that weights by token count, not turn count). */
|
|
708
|
+
mtpAccepted?: number | null;
|
|
709
|
+
/** Tokens drafted this turn. */
|
|
710
|
+
mtpDrafted?: number | null;
|
|
711
|
+
/** Structured-decode token-savings % for this turn — tokens the grammar
|
|
712
|
+
* force-filled ÷ tokens that would otherwise have been generated, ×100
|
|
713
|
+
* (WS-4's `guided_decode_token_bench.mjs` counter; ≈28% aggregate forced
|
|
714
|
+
* on the synthetic action set). */
|
|
715
|
+
structuredDecodeTokenSavingsPct?: number | null;
|
|
716
|
+
/** Decode throughput (tokens / second) for this turn's generation. */
|
|
717
|
+
tokensPerSecond?: number | null;
|
|
718
|
+
/** Server resident-set high-water mark in MB at the end of this turn
|
|
719
|
+
* (`VmHWM` from `/proc/<pid>/status`). */
|
|
720
|
+
serverRssMb?: number | null;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
export interface VoiceRunMetricsSummary {
|
|
724
|
+
turns: number;
|
|
725
|
+
/** MTP accept-rate, token-weighted across the run (Σaccepted / Σdrafted);
|
|
726
|
+
* `null` when nothing was drafted / no drafter present. */
|
|
727
|
+
mtpAcceptRate: number | null;
|
|
728
|
+
mtpAccepted: number;
|
|
729
|
+
mtpDrafted: number;
|
|
730
|
+
/** Per-turn accept-rate histogram (p50/p90/p99 etc. — bounded sample). */
|
|
731
|
+
mtpAcceptRateHistogram: HistogramSummary;
|
|
732
|
+
/** Mean / histogram of the structured-decode token-savings %. */
|
|
733
|
+
structuredDecodeTokenSavingsPct: HistogramSummary;
|
|
734
|
+
/** Mean / histogram of decode tok/s. */
|
|
735
|
+
tokensPerSecond: HistogramSummary;
|
|
736
|
+
/** Server RSS over the run: first / last / max in MB + the `leakSuspected`
|
|
737
|
+
* flag (true when RSS is monotone non-decreasing across ≥4 turns and grew
|
|
738
|
+
* by more than `leakGrowthMbThreshold`). */
|
|
739
|
+
rss: {
|
|
740
|
+
firstMb: number | null;
|
|
741
|
+
lastMb: number | null;
|
|
742
|
+
maxMb: number | null;
|
|
743
|
+
samples: number;
|
|
744
|
+
leakSuspected: boolean;
|
|
745
|
+
growthMb: number | null;
|
|
746
|
+
};
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
const VOICE_RUN_HISTOGRAM_CAPACITY = 512;
|
|
750
|
+
|
|
751
|
+
/**
|
|
752
|
+
* Accumulates the non-latency signals over a long voice run (the duet harness
|
|
753
|
+
* feeds it per-turn). Sibling to `EndToEndLatencyTracer` (which is per-turn
|
|
754
|
+
* spans only). The duet bench report (`voice-duet-bench-<model>.json`) writes
|
|
755
|
+
* `summary()` next to the latency histograms; `eliza1_gates_collect.mjs`
|
|
756
|
+
* ingests the gate-named fields.
|
|
757
|
+
*/
|
|
758
|
+
export class VoiceRunMetrics {
|
|
759
|
+
private turns = 0;
|
|
760
|
+
private mtpAccepted = 0;
|
|
761
|
+
private mtpDrafted = 0;
|
|
762
|
+
private readonly acceptRateHist = new BoundedHistogram(
|
|
763
|
+
VOICE_RUN_HISTOGRAM_CAPACITY,
|
|
764
|
+
);
|
|
765
|
+
private readonly savingsHist = new BoundedHistogram(
|
|
766
|
+
VOICE_RUN_HISTOGRAM_CAPACITY,
|
|
767
|
+
);
|
|
768
|
+
private readonly tokSecHist = new BoundedHistogram(
|
|
769
|
+
VOICE_RUN_HISTOGRAM_CAPACITY,
|
|
770
|
+
);
|
|
771
|
+
private readonly rssSamples: number[] = [];
|
|
772
|
+
|
|
773
|
+
constructor(private readonly opts: { leakGrowthMbThreshold?: number } = {}) {}
|
|
774
|
+
|
|
775
|
+
recordTurn(m: VoiceTurnMetrics): void {
|
|
776
|
+
this.turns += 1;
|
|
777
|
+
if (typeof m.mtpAccepted === "number" && Number.isFinite(m.mtpAccepted))
|
|
778
|
+
this.mtpAccepted += m.mtpAccepted;
|
|
779
|
+
if (typeof m.mtpDrafted === "number" && Number.isFinite(m.mtpDrafted))
|
|
780
|
+
this.mtpDrafted += m.mtpDrafted;
|
|
781
|
+
if (typeof m.mtpAcceptRate === "number" && Number.isFinite(m.mtpAcceptRate))
|
|
782
|
+
this.acceptRateHist.add(m.mtpAcceptRate);
|
|
783
|
+
if (
|
|
784
|
+
typeof m.structuredDecodeTokenSavingsPct === "number" &&
|
|
785
|
+
Number.isFinite(m.structuredDecodeTokenSavingsPct)
|
|
786
|
+
)
|
|
787
|
+
this.savingsHist.add(m.structuredDecodeTokenSavingsPct);
|
|
788
|
+
if (
|
|
789
|
+
typeof m.tokensPerSecond === "number" &&
|
|
790
|
+
Number.isFinite(m.tokensPerSecond)
|
|
791
|
+
)
|
|
792
|
+
this.tokSecHist.add(m.tokensPerSecond);
|
|
793
|
+
if (typeof m.serverRssMb === "number" && Number.isFinite(m.serverRssMb))
|
|
794
|
+
this.rssSamples.push(m.serverRssMb);
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
summary(): VoiceRunMetricsSummary {
|
|
798
|
+
const rssN = this.rssSamples.length;
|
|
799
|
+
const firstMb = rssN > 0 ? (this.rssSamples[0] as number) : null;
|
|
800
|
+
const lastMb = rssN > 0 ? (this.rssSamples[rssN - 1] as number) : null;
|
|
801
|
+
const maxMb = rssN > 0 ? Math.max(...this.rssSamples) : null;
|
|
802
|
+
// Leak heuristic: ≥4 samples, monotone non-decreasing, and grew by more
|
|
803
|
+
// than the threshold (default 256 MB). This is a warning flag.
|
|
804
|
+
const threshold = this.opts.leakGrowthMbThreshold ?? 256;
|
|
805
|
+
let monotone = rssN >= 4;
|
|
806
|
+
for (let i = 1; i < rssN; i++) {
|
|
807
|
+
if ((this.rssSamples[i] as number) < (this.rssSamples[i - 1] as number)) {
|
|
808
|
+
monotone = false;
|
|
809
|
+
break;
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
const growthMb =
|
|
813
|
+
firstMb !== null && lastMb !== null ? lastMb - firstMb : null;
|
|
814
|
+
const leakSuspected = monotone && growthMb !== null && growthMb > threshold;
|
|
815
|
+
return {
|
|
816
|
+
turns: this.turns,
|
|
817
|
+
mtpAcceptRate:
|
|
818
|
+
this.mtpDrafted > 0 ? this.mtpAccepted / this.mtpDrafted : null,
|
|
819
|
+
mtpAccepted: this.mtpAccepted,
|
|
820
|
+
mtpDrafted: this.mtpDrafted,
|
|
821
|
+
mtpAcceptRateHistogram: this.acceptRateHist.summary(),
|
|
822
|
+
structuredDecodeTokenSavingsPct: this.savingsHist.summary(),
|
|
823
|
+
tokensPerSecond: this.tokSecHist.summary(),
|
|
824
|
+
rss: {
|
|
825
|
+
firstMb,
|
|
826
|
+
lastMb,
|
|
827
|
+
maxMb,
|
|
828
|
+
samples: rssN,
|
|
829
|
+
leakSuspected,
|
|
830
|
+
growthMb,
|
|
831
|
+
},
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
reset(): void {
|
|
836
|
+
this.turns = 0;
|
|
837
|
+
this.mtpAccepted = 0;
|
|
838
|
+
this.mtpDrafted = 0;
|
|
839
|
+
this.rssSamples.length = 0;
|
|
840
|
+
// Histograms are not reset-able in place; the caller creates a fresh
|
|
841
|
+
// VoiceRunMetrics for a new run. (Kept simple — a long run lives one
|
|
842
|
+
// instance.)
|
|
843
|
+
}
|
|
844
|
+
}
|