@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Speaker-ID + diarization attribution pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Wraps a `StreamingTranscriber` so the partial / final
|
|
5
|
+
* `TranscriptUpdate`s carry diarized `VoiceSegment[]` and a
|
|
6
|
+
* `primarySpeaker`. The attribution runs in parallel with ASR — the
|
|
7
|
+
* encoder fires the moment ≥ 1 s of audio is available, and the
|
|
8
|
+
* profile store's `beginMatch` starts at speech-start.
|
|
9
|
+
*
|
|
10
|
+
* This module owns *only* the attribution logic. It does NOT replace
|
|
11
|
+
* the transcriber; callers feed PCM through both the transcriber and
|
|
12
|
+
* the attributor in parallel, then attach the resolved metadata via
|
|
13
|
+
* `BaseStreamingTranscriber.setMetadataDefaults()` once it lands.
|
|
14
|
+
*
|
|
15
|
+
* Why a separate module: the existing `VoicePipeline` is large and
|
|
16
|
+
* already handles a lot. Putting attribution behind a small adapter
|
|
17
|
+
* lets the voice pipeline opt in without entangling the diarizer /
|
|
18
|
+
* encoder / profile-store dependencies into the streaming-ASR contract.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import type {
|
|
22
|
+
VoiceProfileObservation,
|
|
23
|
+
VoiceProfileStore,
|
|
24
|
+
} from "../profile-store";
|
|
25
|
+
import { voiceSpeakerFromImprintMatch } from "../speaker-imprint";
|
|
26
|
+
import type {
|
|
27
|
+
VoiceInputSource,
|
|
28
|
+
VoiceSegment,
|
|
29
|
+
VoiceSpeaker,
|
|
30
|
+
VoiceTurnMetadata,
|
|
31
|
+
} from "../types";
|
|
32
|
+
import type { Diarizer, LocalSpeakerSegment } from "./diarizer";
|
|
33
|
+
import type { SpeakerEncoder } from "./encoder";
|
|
34
|
+
import { WESPEAKER_MIN_SAMPLES } from "./encoder";
|
|
35
|
+
|
|
36
|
+
export interface VoiceAttributionPipelineDeps {
|
|
37
|
+
encoder: SpeakerEncoder;
|
|
38
|
+
diarizer?: Diarizer;
|
|
39
|
+
profileStore: VoiceProfileStore;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface VoiceAttributionRequest {
|
|
43
|
+
turnId: string;
|
|
44
|
+
source?: VoiceInputSource;
|
|
45
|
+
/** Concatenated mono 16 kHz PCM for the entire turn. */
|
|
46
|
+
pcm: Float32Array;
|
|
47
|
+
startedAtMs?: number;
|
|
48
|
+
endedAtMs?: number;
|
|
49
|
+
/** When set, the attributor will only run if the abort signal isn't yet fired. */
|
|
50
|
+
signal?: AbortSignal;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface VoiceAttributionOutput {
|
|
54
|
+
turnId: string;
|
|
55
|
+
primarySpeaker?: VoiceSpeaker;
|
|
56
|
+
segments: VoiceSegment[];
|
|
57
|
+
turn: VoiceTurnMetadata;
|
|
58
|
+
observation: VoiceProfileObservation | null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function nonOverlappingSegments(
|
|
62
|
+
local: ReadonlyArray<LocalSpeakerSegment>,
|
|
63
|
+
): LocalSpeakerSegment[] {
|
|
64
|
+
if (local.length === 0) return [];
|
|
65
|
+
return local
|
|
66
|
+
.filter((seg) => !seg.hasOverlap)
|
|
67
|
+
.sort((a, b) =>
|
|
68
|
+
a.startMs !== b.startMs ? a.startMs - b.startMs : a.endMs - b.endMs,
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function pickPrimaryLocalSpeaker(
|
|
73
|
+
local: ReadonlyArray<LocalSpeakerSegment>,
|
|
74
|
+
): number | null {
|
|
75
|
+
if (local.length === 0) return null;
|
|
76
|
+
const durations = new Map<number, number>();
|
|
77
|
+
for (const seg of local) {
|
|
78
|
+
const ms = Math.max(0, seg.endMs - seg.startMs);
|
|
79
|
+
durations.set(
|
|
80
|
+
seg.localSpeakerId,
|
|
81
|
+
(durations.get(seg.localSpeakerId) ?? 0) + ms,
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
let best: { id: number; ms: number } | null = null;
|
|
85
|
+
for (const [id, ms] of durations.entries()) {
|
|
86
|
+
if (!best || ms > best.ms) best = { id, ms };
|
|
87
|
+
}
|
|
88
|
+
return best?.id ?? null;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Run the diarizer + encoder + profile-store against a complete turn's
|
|
93
|
+
* audio. The caller is responsible for slicing the audio buffer (the
|
|
94
|
+
* pipeline's prefix queue already buffers the entire utterance for
|
|
95
|
+
* the streaming-ASR path).
|
|
96
|
+
*
|
|
97
|
+
* The high-level flow:
|
|
98
|
+
* 1. Diarizer runs on the full PCM, producing per-segment speaker
|
|
99
|
+
* tags (window-local ids).
|
|
100
|
+
* 2. We pick the longest local-speaker span and run the encoder on
|
|
101
|
+
* that span (≥ 1 s) to produce a 256-dim embedding.
|
|
102
|
+
* 3. The embedding is matched against the profile store. On hit,
|
|
103
|
+
* attribute the turn to the matched profile's entity. On miss,
|
|
104
|
+
* create a new cluster profile (no entity binding — that happens
|
|
105
|
+
* at the LifeOps layer based on utterance text).
|
|
106
|
+
* 4. Build `VoiceSegment[]` with the resolved speaker, plus a
|
|
107
|
+
* `VoiceTurnMetadata` for downstream consumers.
|
|
108
|
+
*/
|
|
109
|
+
export class VoiceAttributionPipeline {
|
|
110
|
+
constructor(private readonly deps: VoiceAttributionPipelineDeps) {}
|
|
111
|
+
|
|
112
|
+
async attribute(
|
|
113
|
+
req: VoiceAttributionRequest,
|
|
114
|
+
): Promise<VoiceAttributionOutput> {
|
|
115
|
+
if (req.signal?.aborted) {
|
|
116
|
+
return this.buildEmptyOutput(req);
|
|
117
|
+
}
|
|
118
|
+
// Diarizer is optional — when missing we treat the whole turn as
|
|
119
|
+
// one segment with `localSpeakerId=0`.
|
|
120
|
+
let local: LocalSpeakerSegment[] = [];
|
|
121
|
+
if (this.deps.diarizer) {
|
|
122
|
+
try {
|
|
123
|
+
const out = await this.deps.diarizer.diarizeWindow(req.pcm);
|
|
124
|
+
local = nonOverlappingSegments(out.segments);
|
|
125
|
+
} catch {
|
|
126
|
+
local = [];
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
if (local.length === 0) {
|
|
130
|
+
local = [
|
|
131
|
+
{
|
|
132
|
+
startMs: 0,
|
|
133
|
+
endMs: Math.round(
|
|
134
|
+
(req.pcm.length / this.deps.encoder.sampleRate) * 1000,
|
|
135
|
+
),
|
|
136
|
+
localSpeakerId: 0,
|
|
137
|
+
confidence: 0.5,
|
|
138
|
+
hasOverlap: false,
|
|
139
|
+
},
|
|
140
|
+
];
|
|
141
|
+
}
|
|
142
|
+
const primaryLocal = pickPrimaryLocalSpeaker(local);
|
|
143
|
+
if (primaryLocal === null) return this.buildEmptyOutput(req);
|
|
144
|
+
// Concatenate the primary local speaker's spans into a single PCM
|
|
145
|
+
// window for the embedding.
|
|
146
|
+
const primarySpans = local.filter(
|
|
147
|
+
(seg) => seg.localSpeakerId === primaryLocal,
|
|
148
|
+
);
|
|
149
|
+
const window = this.spliceSpans(req.pcm, primarySpans);
|
|
150
|
+
if (window.length < WESPEAKER_MIN_SAMPLES) {
|
|
151
|
+
// Not enough audio for a stable embedding — emit an
|
|
152
|
+
// "unknown speaker" segment, no profile observation.
|
|
153
|
+
const turn: VoiceTurnMetadata = {
|
|
154
|
+
turnId: req.turnId,
|
|
155
|
+
source: req.source,
|
|
156
|
+
segments: this.localToUnknownSegments(local, req.source),
|
|
157
|
+
...(req.startedAtMs !== undefined
|
|
158
|
+
? { startedAtMs: req.startedAtMs }
|
|
159
|
+
: {}),
|
|
160
|
+
...(req.endedAtMs !== undefined ? { endedAtMs: req.endedAtMs } : {}),
|
|
161
|
+
diarization: this.deps.diarizer
|
|
162
|
+
? {
|
|
163
|
+
provider: "local",
|
|
164
|
+
model: this.deps.diarizer.modelId,
|
|
165
|
+
version: "v1",
|
|
166
|
+
}
|
|
167
|
+
: undefined,
|
|
168
|
+
};
|
|
169
|
+
return {
|
|
170
|
+
turnId: req.turnId,
|
|
171
|
+
segments: turn.segments ?? [],
|
|
172
|
+
turn,
|
|
173
|
+
observation: null,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
if (req.signal?.aborted) return this.buildEmptyOutput(req);
|
|
177
|
+
|
|
178
|
+
const embedding = await this.deps.encoder.encode(window);
|
|
179
|
+
if (req.signal?.aborted) return this.buildEmptyOutput(req);
|
|
180
|
+
|
|
181
|
+
const match = await this.deps.profileStore.findBestMatch({
|
|
182
|
+
embedding,
|
|
183
|
+
embeddingModel: this.deps.encoder.modelId ?? "",
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
let observation: VoiceProfileObservation;
|
|
187
|
+
let speaker: VoiceSpeaker;
|
|
188
|
+
if (match) {
|
|
189
|
+
// Update the existing profile with the new observation.
|
|
190
|
+
const refined = await this.deps.profileStore.refine({
|
|
191
|
+
profileId: match.profile.id,
|
|
192
|
+
embedding,
|
|
193
|
+
durationMs: this.spanMsTotal(primarySpans),
|
|
194
|
+
confidence: match.confidence,
|
|
195
|
+
});
|
|
196
|
+
observation = {
|
|
197
|
+
profileId: match.profile.id,
|
|
198
|
+
imprintClusterId: match.profile.sourceScopeId ?? match.profile.id,
|
|
199
|
+
entityId: refined?.entityId ?? match.profile.entityId ?? null,
|
|
200
|
+
embedding,
|
|
201
|
+
embeddingModel: this.deps.encoder.modelId ?? "",
|
|
202
|
+
confidence: match.confidence,
|
|
203
|
+
source: req.source,
|
|
204
|
+
startMs: primarySpans[0]?.startMs,
|
|
205
|
+
endMs: primarySpans[primarySpans.length - 1]?.endMs,
|
|
206
|
+
};
|
|
207
|
+
speaker = voiceSpeakerFromImprintMatch({
|
|
208
|
+
match,
|
|
209
|
+
source: req.source,
|
|
210
|
+
observationId: req.turnId,
|
|
211
|
+
});
|
|
212
|
+
} else {
|
|
213
|
+
// Create a new cluster.
|
|
214
|
+
const created = await this.deps.profileStore.createProfile({
|
|
215
|
+
centroid: embedding,
|
|
216
|
+
embeddingModel: this.deps.encoder.modelId ?? "",
|
|
217
|
+
entityId: null,
|
|
218
|
+
confidence: 0.5,
|
|
219
|
+
durationMs: this.spanMsTotal(primarySpans),
|
|
220
|
+
});
|
|
221
|
+
observation = {
|
|
222
|
+
profileId: created.profileId,
|
|
223
|
+
imprintClusterId: created.imprintClusterId,
|
|
224
|
+
entityId: null,
|
|
225
|
+
embedding,
|
|
226
|
+
embeddingModel: this.deps.encoder.modelId ?? "",
|
|
227
|
+
confidence: 0.5,
|
|
228
|
+
source: req.source,
|
|
229
|
+
startMs: primarySpans[0]?.startMs,
|
|
230
|
+
endMs: primarySpans[primarySpans.length - 1]?.endMs,
|
|
231
|
+
};
|
|
232
|
+
speaker = {
|
|
233
|
+
id: created.imprintClusterId,
|
|
234
|
+
imprintClusterId: created.imprintClusterId,
|
|
235
|
+
imprintObservationId: req.turnId,
|
|
236
|
+
entityId: undefined,
|
|
237
|
+
source: req.source,
|
|
238
|
+
confidence: 0.5,
|
|
239
|
+
metadata: {
|
|
240
|
+
attributionOnly: true,
|
|
241
|
+
evidenceKind: "voice_imprint_attribution",
|
|
242
|
+
identityAuthority: false,
|
|
243
|
+
synthesisAuthorization: false,
|
|
244
|
+
embeddingModel: this.deps.encoder.modelId ?? "",
|
|
245
|
+
profileId: created.profileId,
|
|
246
|
+
},
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const segments = this.localToVoiceSegments(
|
|
251
|
+
local,
|
|
252
|
+
primaryLocal,
|
|
253
|
+
speaker,
|
|
254
|
+
req.source,
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
const turn: VoiceTurnMetadata = {
|
|
258
|
+
turnId: req.turnId,
|
|
259
|
+
source: req.source,
|
|
260
|
+
primarySpeaker: speaker,
|
|
261
|
+
segments,
|
|
262
|
+
...(req.startedAtMs !== undefined
|
|
263
|
+
? { startedAtMs: req.startedAtMs }
|
|
264
|
+
: {}),
|
|
265
|
+
...(req.endedAtMs !== undefined ? { endedAtMs: req.endedAtMs } : {}),
|
|
266
|
+
diarization: this.deps.diarizer
|
|
267
|
+
? {
|
|
268
|
+
provider: "local",
|
|
269
|
+
model: this.deps.diarizer.modelId,
|
|
270
|
+
version: "v1",
|
|
271
|
+
confidence: match?.confidence,
|
|
272
|
+
}
|
|
273
|
+
: undefined,
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
turnId: req.turnId,
|
|
278
|
+
primarySpeaker: speaker,
|
|
279
|
+
segments,
|
|
280
|
+
turn,
|
|
281
|
+
observation,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
private buildEmptyOutput(
|
|
286
|
+
req: VoiceAttributionRequest,
|
|
287
|
+
): VoiceAttributionOutput {
|
|
288
|
+
const turn: VoiceTurnMetadata = {
|
|
289
|
+
turnId: req.turnId,
|
|
290
|
+
source: req.source,
|
|
291
|
+
segments: [],
|
|
292
|
+
...(req.startedAtMs !== undefined
|
|
293
|
+
? { startedAtMs: req.startedAtMs }
|
|
294
|
+
: {}),
|
|
295
|
+
...(req.endedAtMs !== undefined ? { endedAtMs: req.endedAtMs } : {}),
|
|
296
|
+
};
|
|
297
|
+
return { turnId: req.turnId, segments: [], turn, observation: null };
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
private spliceSpans(
|
|
301
|
+
pcm: Float32Array,
|
|
302
|
+
spans: ReadonlyArray<LocalSpeakerSegment>,
|
|
303
|
+
): Float32Array {
|
|
304
|
+
const sr = this.deps.encoder.sampleRate;
|
|
305
|
+
// Compute total length first so we can allocate once.
|
|
306
|
+
let total = 0;
|
|
307
|
+
for (const span of spans) {
|
|
308
|
+
const a = Math.max(0, Math.floor((span.startMs / 1000) * sr));
|
|
309
|
+
const b = Math.min(pcm.length, Math.ceil((span.endMs / 1000) * sr));
|
|
310
|
+
if (b > a) total += b - a;
|
|
311
|
+
}
|
|
312
|
+
if (total === 0) return new Float32Array(0);
|
|
313
|
+
const out = new Float32Array(total);
|
|
314
|
+
let cursor = 0;
|
|
315
|
+
for (const span of spans) {
|
|
316
|
+
const a = Math.max(0, Math.floor((span.startMs / 1000) * sr));
|
|
317
|
+
const b = Math.min(pcm.length, Math.ceil((span.endMs / 1000) * sr));
|
|
318
|
+
if (b > a) {
|
|
319
|
+
out.set(pcm.subarray(a, b), cursor);
|
|
320
|
+
cursor += b - a;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return out;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
private spanMsTotal(spans: ReadonlyArray<LocalSpeakerSegment>): number {
|
|
327
|
+
let total = 0;
|
|
328
|
+
for (const span of spans) total += Math.max(0, span.endMs - span.startMs);
|
|
329
|
+
return total;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
private localToVoiceSegments(
|
|
333
|
+
local: ReadonlyArray<LocalSpeakerSegment>,
|
|
334
|
+
primaryLocalId: number,
|
|
335
|
+
primarySpeaker: VoiceSpeaker,
|
|
336
|
+
source?: VoiceInputSource,
|
|
337
|
+
): VoiceSegment[] {
|
|
338
|
+
return local.map<VoiceSegment>((seg, i) => {
|
|
339
|
+
const isPrimary = seg.localSpeakerId === primaryLocalId;
|
|
340
|
+
const speaker: VoiceSpeaker = isPrimary
|
|
341
|
+
? primarySpeaker
|
|
342
|
+
: {
|
|
343
|
+
id: `local_${seg.localSpeakerId}`,
|
|
344
|
+
label: `Speaker ${seg.localSpeakerId}`,
|
|
345
|
+
source,
|
|
346
|
+
confidence: seg.confidence,
|
|
347
|
+
metadata: {
|
|
348
|
+
attributionOnly: true,
|
|
349
|
+
evidenceKind: "voice_imprint_attribution",
|
|
350
|
+
identityAuthority: false,
|
|
351
|
+
synthesisAuthorization: false,
|
|
352
|
+
diarizationOnly: true,
|
|
353
|
+
},
|
|
354
|
+
};
|
|
355
|
+
return {
|
|
356
|
+
id: `seg_${i}`,
|
|
357
|
+
text: "",
|
|
358
|
+
startMs: seg.startMs,
|
|
359
|
+
endMs: seg.endMs,
|
|
360
|
+
speaker,
|
|
361
|
+
speakerId: speaker.id,
|
|
362
|
+
...(source ? { source } : {}),
|
|
363
|
+
confidence: seg.confidence,
|
|
364
|
+
metadata: {
|
|
365
|
+
localSpeakerId: seg.localSpeakerId,
|
|
366
|
+
primary: isPrimary,
|
|
367
|
+
},
|
|
368
|
+
};
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
private localToUnknownSegments(
|
|
373
|
+
local: ReadonlyArray<LocalSpeakerSegment>,
|
|
374
|
+
source?: VoiceInputSource,
|
|
375
|
+
): VoiceSegment[] {
|
|
376
|
+
return local.map<VoiceSegment>((seg, i) => ({
|
|
377
|
+
id: `seg_${i}`,
|
|
378
|
+
text: "",
|
|
379
|
+
startMs: seg.startMs,
|
|
380
|
+
endMs: seg.endMs,
|
|
381
|
+
...(source ? { source } : {}),
|
|
382
|
+
confidence: seg.confidence,
|
|
383
|
+
metadata: { localSpeakerId: seg.localSpeakerId, primary: false },
|
|
384
|
+
}));
|
|
385
|
+
}
|
|
386
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pyannote-segmentation-3.0 diarizer — fused `libelizainference` binding
|
|
3
|
+
* (ABI v6).
|
|
4
|
+
*
|
|
5
|
+
* Drives the native pyannote diarizer through the single fused-FFI
|
|
6
|
+
* `libelizainference` handle (the merged llama.cpp fork — see
|
|
7
|
+
* `plugins/plugin-local-inference/native/CLAUDE.md` §1) via the
|
|
8
|
+
* `eliza_inference_diariz_*` ABI. This is the SOLE on-device diarizer runtime —
|
|
9
|
+
* the same `ffi`/`ctx` pair powers VAD / wake-word / speaker / TTS / ASR.
|
|
10
|
+
*
|
|
11
|
+
* The native call returns a per-frame powerset-label sequence (293 int8
|
|
12
|
+
* labels per 5 s window, each in `[0, 7)`). Agglomerative clustering and the
|
|
13
|
+
* frame→segment reduction stay JS-side: this class one-hots the labels and
|
|
14
|
+
* feeds them through the shared pure `classifyFramesToSegments` reducer.
|
|
15
|
+
*
|
|
16
|
+
* No silent fallback: when the fused build does not export the diarizer ABI
|
|
17
|
+
* (`eliza_inference_diariz_supported() == 0`) `load()` throws a structured
|
|
18
|
+
* `DiarizerUnavailableError` (AGENTS.md §3 — never fabricate a label
|
|
19
|
+
* sequence, no standalone-lib fallback).
|
|
20
|
+
*/
|
|
21
|
+
import type { ElizaInferenceContextHandle, ElizaInferenceFfi } from "../ffi-bindings";
|
|
22
|
+
import { type Diarizer, type DiarizerOutput, type PyannoteDiarizerModelId } from "./diarizer";
|
|
23
|
+
export interface FusedDiarizerOptions {
|
|
24
|
+
ffi: ElizaInferenceFfi;
|
|
25
|
+
ctx: ElizaInferenceContextHandle | (() => ElizaInferenceContextHandle);
|
|
26
|
+
/**
|
|
27
|
+
* Optional explicit pyannote GGUF path. `null` lets the native runtime
|
|
28
|
+
* resolve the bundle's `diariz/` dir (the default).
|
|
29
|
+
*/
|
|
30
|
+
ggufPath?: string | null;
|
|
31
|
+
/** Stored model id (purely informational). */
|
|
32
|
+
modelId?: PyannoteDiarizerModelId;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Fused-`libelizainference` pyannote-3 diarizer. Owns one
|
|
36
|
+
* `eliza_inference_diariz_*` session; `diarizeWindow()` runs one forward pass
|
|
37
|
+
* over a ~5 s window and reduces the powerset labels into speaker segments.
|
|
38
|
+
*/
|
|
39
|
+
export declare class FusedDiarizer implements Diarizer {
|
|
40
|
+
private readonly ffi;
|
|
41
|
+
private readonly handle;
|
|
42
|
+
readonly sampleRate = 16000;
|
|
43
|
+
readonly modelId: PyannoteDiarizerModelId;
|
|
44
|
+
private disposed;
|
|
45
|
+
private constructor();
|
|
46
|
+
/**
|
|
47
|
+
* True only when the fused `libelizainference` build exports the diarizer
|
|
48
|
+
* ABI and advertises support at runtime.
|
|
49
|
+
*/
|
|
50
|
+
static isSupported(ffi: ElizaInferenceFfi | null | undefined): boolean;
|
|
51
|
+
/**
|
|
52
|
+
* Open a native diarizer session. Throws `DiarizerUnavailableError` when
|
|
53
|
+
* the runtime is not present.
|
|
54
|
+
*/
|
|
55
|
+
static load(opts: FusedDiarizerOptions): Promise<FusedDiarizer>;
|
|
56
|
+
diarizeWindow(pcm: Float32Array): Promise<DiarizerOutput>;
|
|
57
|
+
dispose(): Promise<void>;
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=diarizer-fused.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"diarizer-fused.d.ts","sourceRoot":"","sources":["diarizer-fused.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EACX,2BAA2B,EAC3B,iBAAiB,EAEjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAEN,KAAK,QAAQ,EACb,KAAK,cAAc,EAMnB,KAAK,uBAAuB,EAC5B,MAAM,YAAY,CAAC;AAEpB,MAAM,WAAW,oBAAoB;IACpC,GAAG,EAAE,iBAAiB,CAAC;IACvB,GAAG,EAAE,2BAA2B,GAAG,CAAC,MAAM,2BAA2B,CAAC,CAAC;IACvE;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,uBAAuB,CAAC;CAClC;AAED;;;;GAIG;AACH,qBAAa,aAAc,YAAW,QAAQ;IAM5C,OAAO,CAAC,QAAQ,CAAC,GAAG;IACpB,OAAO,CAAC,QAAQ,CAAC,MAAM;IANxB,QAAQ,CAAC,UAAU,SAAwB;IAC3C,QAAQ,CAAC,OAAO,EAAE,uBAAuB,CAAC;IAC1C,OAAO,CAAC,QAAQ,CAAS;IAEzB,OAAO;IAQP;;;OAGG;IACH,MAAM,CAAC,WAAW,CAAC,GAAG,EAAE,iBAAiB,GAAG,IAAI,GAAG,SAAS,GAAG,OAAO;IAKtE;;;OAGG;WACU,IAAI,CAAC,IAAI,EAAE,oBAAoB,GAAG,OAAO,CAAC,aAAa,CAAC;IA6B/D,aAAa,CAAC,GAAG,EAAE,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC;IAsCzD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAK9B"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Real-FFI tests for `FusedDiarizer`: run against the ACTUAL fused
|
|
3
|
+
* `libelizainference` — loaded, `create`d, and probed for `diarizSupported()`
|
|
4
|
+
* — never a stub. The pyannote diarizer is the SOLE on-device diarization
|
|
5
|
+
* runtime (the `eliza_inference_diariz_*` ABI off the one fused handle), feeding
|
|
6
|
+
* its per-frame powerset labels through the shared pure `classifyFramesToSegments`
|
|
7
|
+
* reducer.
|
|
8
|
+
*
|
|
9
|
+
* Skipped (not faked) when the fused lib is not resolvable, or when it does not
|
|
10
|
+
* link the pyannote diarizer graph. To run them, point `ELIZA_INFERENCE_LIBRARY`
|
|
11
|
+
* (or `ELIZA_INFERENCE_LIB_DIR`) at a built `libelizainference` with the diarizer
|
|
12
|
+
* ABI, or build one via `packages/app-core/scripts/build-llama-cpp-mtp.mjs`.
|
|
13
|
+
* Runs in the post-merge `bun test` lane (`*.real.test.ts` is excluded from the
|
|
14
|
+
* default lane in `vitest.config.ts`).
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { existsSync, mkdtempSync, rmSync } from "node:fs";
|
|
18
|
+
import os from "node:os";
|
|
19
|
+
import path from "node:path";
|
|
20
|
+
import {
|
|
21
|
+
afterAll,
|
|
22
|
+
afterEach,
|
|
23
|
+
beforeAll,
|
|
24
|
+
beforeEach,
|
|
25
|
+
describe,
|
|
26
|
+
expect,
|
|
27
|
+
it,
|
|
28
|
+
} from "vitest";
|
|
29
|
+
|
|
30
|
+
import { resolveFusedLibraryPath } from "../../desktop-fused-ffi-backend-runtime";
|
|
31
|
+
import {
|
|
32
|
+
type ElizaInferenceContextHandle,
|
|
33
|
+
type ElizaInferenceFfi,
|
|
34
|
+
loadElizaInferenceFfi,
|
|
35
|
+
} from "../ffi-bindings";
|
|
36
|
+
import { FusedDiarizer } from "./diarizer-fused";
|
|
37
|
+
|
|
38
|
+
const DIARIZ_WINDOW_SAMPLES = 16_000 * 5; // 5 s @ 16 kHz
|
|
39
|
+
|
|
40
|
+
const isBun = typeof (globalThis as { Bun?: unknown }).Bun !== "undefined";
|
|
41
|
+
const LIB_PATH = resolveFusedLibraryPath(null, process.env);
|
|
42
|
+
// The native diariz_open needs a pyannote-segmentation GGUF. Provide one via
|
|
43
|
+
// ELIZA_TEST_DIARIZ_GGUF; the diarize assertion skips honestly when it isn't
|
|
44
|
+
// supplied — it is never faked.
|
|
45
|
+
const DIARIZ_GGUF = process.env.ELIZA_TEST_DIARIZ_GGUF?.trim();
|
|
46
|
+
const HAVE_MODEL = !!DIARIZ_GGUF && existsSync(DIARIZ_GGUF);
|
|
47
|
+
|
|
48
|
+
describe.skipIf(!isBun || !LIB_PATH)("FusedDiarizer — real FFI", () => {
|
|
49
|
+
let ffi: ElizaInferenceFfi;
|
|
50
|
+
let ctx: ElizaInferenceContextHandle;
|
|
51
|
+
let tmp: string;
|
|
52
|
+
|
|
53
|
+
beforeAll(() => {
|
|
54
|
+
// LIB_PATH is non-null inside the skipIf-guarded block.
|
|
55
|
+
ffi = loadElizaInferenceFfi(LIB_PATH as string);
|
|
56
|
+
});
|
|
57
|
+
afterAll(() => {
|
|
58
|
+
ffi?.close();
|
|
59
|
+
});
|
|
60
|
+
beforeEach(() => {
|
|
61
|
+
tmp = mkdtempSync(path.join(os.tmpdir(), "diarizer-fused-real-"));
|
|
62
|
+
ctx = ffi.create(tmp);
|
|
63
|
+
});
|
|
64
|
+
afterEach(() => {
|
|
65
|
+
ffi.destroy(ctx);
|
|
66
|
+
rmSync(tmp, { recursive: true, force: true });
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("isSupported() reflects the loaded build's diarizer ABI", () => {
|
|
70
|
+
expect(typeof FusedDiarizer.isSupported(ffi)).toBe("boolean");
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it.skipIf(!HAVE_MODEL)(
|
|
74
|
+
"diarizeWindow() reduces real native labels into bounded segments",
|
|
75
|
+
async () => {
|
|
76
|
+
const dia = await FusedDiarizer.load({ ffi, ctx, ggufPath: DIARIZ_GGUF });
|
|
77
|
+
expect(dia.sampleRate).toBe(16_000);
|
|
78
|
+
expect(dia.modelId).toBe("pyannote-segmentation-3.0-int8");
|
|
79
|
+
// 5 s of a 180 Hz tone — a real, finite window the native graph accepts.
|
|
80
|
+
const pcm = new Float32Array(DIARIZ_WINDOW_SAMPLES);
|
|
81
|
+
for (let i = 0; i < pcm.length; i += 1) {
|
|
82
|
+
pcm[i] = 0.2 * Math.sin((2 * Math.PI * 180 * i) / 16_000);
|
|
83
|
+
}
|
|
84
|
+
const out = await dia.diarizeWindow(pcm);
|
|
85
|
+
// Every reduced segment must be well-formed: start < end, a valid local
|
|
86
|
+
// speaker id, and a confidence in [0, 1]. The exact speaker count is
|
|
87
|
+
// content-dependent and not asserted here.
|
|
88
|
+
expect(out.localSpeakerCount).toBe(
|
|
89
|
+
new Set(out.segments.map((s) => s.localSpeakerId)).size,
|
|
90
|
+
);
|
|
91
|
+
for (const seg of out.segments) {
|
|
92
|
+
expect(seg.endMs).toBeGreaterThanOrEqual(seg.startMs);
|
|
93
|
+
expect(seg.localSpeakerId).toBeGreaterThanOrEqual(0);
|
|
94
|
+
expect(seg.confidence).toBeGreaterThanOrEqual(0);
|
|
95
|
+
expect(seg.confidence).toBeLessThanOrEqual(1);
|
|
96
|
+
}
|
|
97
|
+
await dia.dispose();
|
|
98
|
+
},
|
|
99
|
+
);
|
|
100
|
+
});
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pyannote-segmentation-3.0 diarizer — fused `libelizainference` binding
|
|
3
|
+
* (ABI v6).
|
|
4
|
+
*
|
|
5
|
+
* Drives the native pyannote diarizer through the single fused-FFI
|
|
6
|
+
* `libelizainference` handle (the merged llama.cpp fork — see
|
|
7
|
+
* `plugins/plugin-local-inference/native/CLAUDE.md` §1) via the
|
|
8
|
+
* `eliza_inference_diariz_*` ABI. This is the SOLE on-device diarizer runtime —
|
|
9
|
+
* the same `ffi`/`ctx` pair powers VAD / wake-word / speaker / TTS / ASR.
|
|
10
|
+
*
|
|
11
|
+
* The native call returns a per-frame powerset-label sequence (293 int8
|
|
12
|
+
* labels per 5 s window, each in `[0, 7)`). Agglomerative clustering and the
|
|
13
|
+
* frame→segment reduction stay JS-side: this class one-hots the labels and
|
|
14
|
+
* feeds them through the shared pure `classifyFramesToSegments` reducer.
|
|
15
|
+
*
|
|
16
|
+
* No silent fallback: when the fused build does not export the diarizer ABI
|
|
17
|
+
* (`eliza_inference_diariz_supported() == 0`) `load()` throws a structured
|
|
18
|
+
* `DiarizerUnavailableError` (AGENTS.md §3 — never fabricate a label
|
|
19
|
+
* sequence, no standalone-lib fallback).
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import type {
|
|
23
|
+
ElizaInferenceContextHandle,
|
|
24
|
+
ElizaInferenceFfi,
|
|
25
|
+
NativeDiarizHandle,
|
|
26
|
+
} from "../ffi-bindings";
|
|
27
|
+
import {
|
|
28
|
+
classifyFramesToSegments,
|
|
29
|
+
type Diarizer,
|
|
30
|
+
type DiarizerOutput,
|
|
31
|
+
DiarizerUnavailableError,
|
|
32
|
+
PYANNOTE_CLASS_COUNT,
|
|
33
|
+
PYANNOTE_FRAME_STRIDE_MS,
|
|
34
|
+
PYANNOTE_SAMPLE_RATE,
|
|
35
|
+
PYANNOTE_SEGMENTATION_3_INT8_MODEL_ID,
|
|
36
|
+
type PyannoteDiarizerModelId,
|
|
37
|
+
} from "./diarizer";
|
|
38
|
+
|
|
39
|
+
export interface FusedDiarizerOptions {
|
|
40
|
+
ffi: ElizaInferenceFfi;
|
|
41
|
+
ctx: ElizaInferenceContextHandle | (() => ElizaInferenceContextHandle);
|
|
42
|
+
/**
|
|
43
|
+
* Optional explicit pyannote GGUF path. `null` lets the native runtime
|
|
44
|
+
* resolve the bundle's `diariz/` dir (the default).
|
|
45
|
+
*/
|
|
46
|
+
ggufPath?: string | null;
|
|
47
|
+
/** Stored model id (purely informational). */
|
|
48
|
+
modelId?: PyannoteDiarizerModelId;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Fused-`libelizainference` pyannote-3 diarizer. Owns one
|
|
53
|
+
* `eliza_inference_diariz_*` session; `diarizeWindow()` runs one forward pass
|
|
54
|
+
* over a ~5 s window and reduces the powerset labels into speaker segments.
|
|
55
|
+
*/
|
|
56
|
+
export class FusedDiarizer implements Diarizer {
|
|
57
|
+
readonly sampleRate = PYANNOTE_SAMPLE_RATE;
|
|
58
|
+
readonly modelId: PyannoteDiarizerModelId;
|
|
59
|
+
private disposed = false;
|
|
60
|
+
|
|
61
|
+
private constructor(
|
|
62
|
+
private readonly ffi: ElizaInferenceFfi,
|
|
63
|
+
private readonly handle: NativeDiarizHandle,
|
|
64
|
+
modelId: PyannoteDiarizerModelId,
|
|
65
|
+
) {
|
|
66
|
+
this.modelId = modelId;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* True only when the fused `libelizainference` build exports the diarizer
|
|
71
|
+
* ABI and advertises support at runtime.
|
|
72
|
+
*/
|
|
73
|
+
static isSupported(ffi: ElizaInferenceFfi | null | undefined): boolean {
|
|
74
|
+
if (!ffi || typeof ffi.diarizSupported !== "function") return false;
|
|
75
|
+
return ffi.diarizSupported();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Open a native diarizer session. Throws `DiarizerUnavailableError` when
|
|
80
|
+
* the runtime is not present.
|
|
81
|
+
*/
|
|
82
|
+
static async load(opts: FusedDiarizerOptions): Promise<FusedDiarizer> {
|
|
83
|
+
if (!FusedDiarizer.isSupported(opts.ffi)) {
|
|
84
|
+
throw new DiarizerUnavailableError(
|
|
85
|
+
"native-missing",
|
|
86
|
+
"[diarizer-fused] The native diarizer is not present in this libelizainference build. Rebuild with the pyannote forward graph linked in (eliza_inference_diariz_* symbols).",
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
if (
|
|
90
|
+
!opts.ffi.diarizOpen ||
|
|
91
|
+
!opts.ffi.diarizSegment ||
|
|
92
|
+
!opts.ffi.diarizClose
|
|
93
|
+
) {
|
|
94
|
+
throw new DiarizerUnavailableError(
|
|
95
|
+
"model-load-failed",
|
|
96
|
+
"[diarizer-fused] Diarizer support probe succeeded, but the required FFI methods are missing on the binding.",
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
const ctx = typeof opts.ctx === "function" ? opts.ctx() : opts.ctx;
|
|
100
|
+
const handle = opts.ffi.diarizOpen({
|
|
101
|
+
ctx,
|
|
102
|
+
ggufPath: opts.ggufPath ?? null,
|
|
103
|
+
});
|
|
104
|
+
return new FusedDiarizer(
|
|
105
|
+
opts.ffi,
|
|
106
|
+
handle,
|
|
107
|
+
opts.modelId ?? PYANNOTE_SEGMENTATION_3_INT8_MODEL_ID,
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async diarizeWindow(pcm: Float32Array): Promise<DiarizerOutput> {
|
|
112
|
+
if (this.disposed) {
|
|
113
|
+
throw new DiarizerUnavailableError(
|
|
114
|
+
"model-load-failed",
|
|
115
|
+
"[diarizer-fused] diarizeWindow called after dispose()",
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
const segment = this.ffi.diarizSegment;
|
|
119
|
+
if (!segment) {
|
|
120
|
+
throw new DiarizerUnavailableError(
|
|
121
|
+
"model-load-failed",
|
|
122
|
+
"[diarizer-fused] diarizeWindow missing FFI method",
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
const labels = segment({ diariz: this.handle, pcm });
|
|
126
|
+
const frames = labels.length;
|
|
127
|
+
// One-hot the powerset labels into the frame×class tensor the shared
|
|
128
|
+
// pure reducer expects (it argmaxes back out, so the one-hot is exact).
|
|
129
|
+
const probs = new Float32Array(frames * PYANNOTE_CLASS_COUNT);
|
|
130
|
+
for (let frame = 0; frame < frames; frame += 1) {
|
|
131
|
+
const label = labels[frame] ?? -1;
|
|
132
|
+
if (label < 0 || label >= PYANNOTE_CLASS_COUNT) {
|
|
133
|
+
throw new DiarizerUnavailableError(
|
|
134
|
+
"model-load-failed",
|
|
135
|
+
`[diarizer-fused] native diarizer emitted invalid class ${label} at frame ${frame}`,
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
probs[frame * PYANNOTE_CLASS_COUNT + label] = 1;
|
|
139
|
+
}
|
|
140
|
+
return classifyFramesToSegments(
|
|
141
|
+
probs,
|
|
142
|
+
frames,
|
|
143
|
+
PYANNOTE_CLASS_COUNT,
|
|
144
|
+
0,
|
|
145
|
+
PYANNOTE_FRAME_STRIDE_MS,
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async dispose(): Promise<void> {
|
|
150
|
+
if (this.disposed) return;
|
|
151
|
+
this.disposed = true;
|
|
152
|
+
this.ffi.diarizClose?.(this.handle);
|
|
153
|
+
}
|
|
154
|
+
}
|