@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,887 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-addressed voice-profile store with hot LRU + cold disk tiers.
|
|
3
|
+
*
|
|
4
|
+
* Each profile is one WeSpeaker ResNet34-LM centroid plus running
|
|
5
|
+
* variance (Welford), consent flags, and an entity binding. Profiles
|
|
6
|
+
* are content-addressed by `sha256(centroid_bytes)` so duplicate
|
|
7
|
+
* captures collapse and entity merges are safe.
|
|
8
|
+
*
|
|
9
|
+
* Layout under `$ELIZA_STATE_DIR/voice-profiles/`:
|
|
10
|
+
*
|
|
11
|
+
* index.json — entityId/cluster index + LRU order
|
|
12
|
+
* profiles/vp_<sha>.json — one record per profile
|
|
13
|
+
* audio/vp_<sha>/sample-*.wav — optional, consent-gated
|
|
14
|
+
*
|
|
15
|
+
* The contract:
|
|
16
|
+
* - **Hot LRU 30** in-memory records (default `hotCacheSize`).
|
|
17
|
+
* - **Cold disk cap 200** (default `coldDiskMax`).
|
|
18
|
+
* - `beginMatch()` starts at speech-start and resolves once minSpeechMs
|
|
19
|
+
* of audio has been encoded — runs in parallel with ASR.
|
|
20
|
+
* - `refine()` uses online running-mean + Welford variance.
|
|
21
|
+
* - Profiles with a non-null `entityId` are never auto-evicted.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import crypto from "node:crypto";
|
|
25
|
+
import fs from "node:fs";
|
|
26
|
+
import fsp from "node:fs/promises";
|
|
27
|
+
import path from "node:path";
|
|
28
|
+
import {
|
|
29
|
+
cosineSimilarity,
|
|
30
|
+
DEFAULT_VOICE_IMPRINT_MATCH_THRESHOLD,
|
|
31
|
+
type VoiceImprintMatch,
|
|
32
|
+
type VoiceImprintProfile,
|
|
33
|
+
} from "./speaker-imprint";
|
|
34
|
+
import type { VoiceInputSource } from "./types";
|
|
35
|
+
|
|
36
|
+
/** Canonical schema version for `vp_*.json` records. */
|
|
37
|
+
export const VOICE_PROFILE_RECORD_SCHEMA_VERSION =
|
|
38
|
+
"eliza.voice_profile_record.v1" as const;
|
|
39
|
+
|
|
40
|
+
export interface VoiceProfileConsentState {
|
|
41
|
+
attributionAuthorized: boolean;
|
|
42
|
+
synthesisAuthorized: boolean;
|
|
43
|
+
grantedAt?: string;
|
|
44
|
+
grantedBy?: string;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface VoiceProfileAudioRef {
|
|
48
|
+
sampleId: string;
|
|
49
|
+
wavSha256: string;
|
|
50
|
+
durationMs: number;
|
|
51
|
+
recordedAt: string;
|
|
52
|
+
referenceText?: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface VoiceProfileRecord {
|
|
56
|
+
schemaVersion: typeof VOICE_PROFILE_RECORD_SCHEMA_VERSION;
|
|
57
|
+
profileId: string;
|
|
58
|
+
embeddingModel: string;
|
|
59
|
+
embeddingDim: number;
|
|
60
|
+
/** L2-normalized centroid; cosine == dot. */
|
|
61
|
+
centroid: number[];
|
|
62
|
+
/** Welford per-dim variance accumulator (M2 / max(1, n-1)). */
|
|
63
|
+
variance: number[];
|
|
64
|
+
/** Welford `M2` running sum (per-dim squared diff from running mean). */
|
|
65
|
+
welfordM2: number[];
|
|
66
|
+
sampleCount: number;
|
|
67
|
+
totalDurationMs: number;
|
|
68
|
+
firstObservedAt: string;
|
|
69
|
+
lastObservedAt: string;
|
|
70
|
+
lastRefinedAt: string;
|
|
71
|
+
entityId: string | null;
|
|
72
|
+
imprintClusterId: string;
|
|
73
|
+
confidence: number;
|
|
74
|
+
consent: VoiceProfileConsentState;
|
|
75
|
+
audioRefs?: VoiceProfileAudioRef[];
|
|
76
|
+
metadata?: Record<string, unknown>;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface VoiceProfileStoreOptions {
|
|
80
|
+
rootDir: string;
|
|
81
|
+
hotCacheSize?: number;
|
|
82
|
+
coldDiskMax?: number;
|
|
83
|
+
matchThreshold?: number;
|
|
84
|
+
/** Below this we open a new cluster instead of attributing. */
|
|
85
|
+
unmatchedClusterThreshold?: number;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface VoiceImprintMatchHandle {
|
|
89
|
+
/** Resolves once minSpeechMs of audio is encoded, or `null` if no match. */
|
|
90
|
+
result: Promise<VoiceImprintMatch | null>;
|
|
91
|
+
/** Synchronous polling for the latest match — null until first resolve. */
|
|
92
|
+
current(): VoiceImprintMatch | null;
|
|
93
|
+
cancel(): void;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const DEFAULT_HOT_CACHE = 30;
|
|
97
|
+
const DEFAULT_COLD_DISK = 200;
|
|
98
|
+
const DEFAULT_UNMATCHED_THRESHOLD = 0.55;
|
|
99
|
+
|
|
100
|
+
function iso(): string {
|
|
101
|
+
return new Date().toISOString();
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function sha256(buf: Buffer | Uint8Array | string): string {
|
|
105
|
+
const hash = crypto.createHash("sha256");
|
|
106
|
+
hash.update(buf as Buffer);
|
|
107
|
+
return hash.digest("hex");
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function centroidToBuffer(centroid: readonly number[]): Buffer {
|
|
111
|
+
const arr = new Float32Array(centroid);
|
|
112
|
+
return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function deriveProfileId(centroid: readonly number[]): string {
|
|
116
|
+
return `vp_${sha256(centroidToBuffer(centroid)).slice(0, 32)}`;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
interface IndexEntry {
|
|
120
|
+
profileId: string;
|
|
121
|
+
entityId: string | null;
|
|
122
|
+
imprintClusterId: string;
|
|
123
|
+
embeddingModel: string;
|
|
124
|
+
embeddingDim: number;
|
|
125
|
+
lastObservedAt: string;
|
|
126
|
+
sampleCount: number;
|
|
127
|
+
/** LRU order — higher = more recently touched. */
|
|
128
|
+
lruRank: number;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
interface IndexFile {
|
|
132
|
+
version: 1;
|
|
133
|
+
nextLruRank: number;
|
|
134
|
+
entries: IndexEntry[];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const INITIAL_INDEX: IndexFile = {
|
|
138
|
+
version: 1,
|
|
139
|
+
nextLruRank: 1,
|
|
140
|
+
entries: [],
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Welford online variance update. Returns the new (mean, M2, count)
|
|
145
|
+
* triple so a caller can persist the M2 and derive variance lazily.
|
|
146
|
+
* `prevMean` is the running mean *before* the observation; the caller
|
|
147
|
+
* passes the L2-normalized embedding as the observation.
|
|
148
|
+
*/
|
|
149
|
+
export function welfordUpdate(args: {
|
|
150
|
+
count: number;
|
|
151
|
+
mean: readonly number[];
|
|
152
|
+
m2: readonly number[];
|
|
153
|
+
observation: ReadonlyArray<number>;
|
|
154
|
+
}): { mean: number[]; m2: number[]; count: number } {
|
|
155
|
+
const n = args.count + 1;
|
|
156
|
+
const dim = args.observation.length;
|
|
157
|
+
if (
|
|
158
|
+
(args.mean.length !== 0 && args.mean.length !== dim) ||
|
|
159
|
+
(args.m2.length !== 0 && args.m2.length !== dim)
|
|
160
|
+
) {
|
|
161
|
+
throw new Error("[welfordUpdate] dim mismatch");
|
|
162
|
+
}
|
|
163
|
+
const mean =
|
|
164
|
+
args.mean.length === dim ? args.mean.slice() : new Array(dim).fill(0);
|
|
165
|
+
const m2 = args.m2.length === dim ? args.m2.slice() : new Array(dim).fill(0);
|
|
166
|
+
for (let i = 0; i < dim; i += 1) {
|
|
167
|
+
const x = args.observation[i];
|
|
168
|
+
const delta = x - mean[i];
|
|
169
|
+
mean[i] += delta / n;
|
|
170
|
+
const delta2 = x - mean[i];
|
|
171
|
+
m2[i] += delta * delta2;
|
|
172
|
+
}
|
|
173
|
+
return { mean, m2, count: n };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export function welfordVariance(
|
|
177
|
+
m2: readonly number[],
|
|
178
|
+
count: number,
|
|
179
|
+
): number[] {
|
|
180
|
+
const denom = Math.max(1, count - 1);
|
|
181
|
+
return m2.map((v) => v / denom);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Reject an observation if its per-dim distance from the centroid is
|
|
186
|
+
* more than `sigmaThreshold` standard deviations on more than half the
|
|
187
|
+
* dimensions. Used to drop cough / cross-talk samples that would
|
|
188
|
+
* corrupt the centroid.
|
|
189
|
+
*/
|
|
190
|
+
export function isOutlier(args: {
|
|
191
|
+
centroid: readonly number[];
|
|
192
|
+
variance: readonly number[];
|
|
193
|
+
observation: readonly number[];
|
|
194
|
+
sigmaThreshold?: number;
|
|
195
|
+
}): boolean {
|
|
196
|
+
const sigma = args.sigmaThreshold ?? 4;
|
|
197
|
+
let exceeded = 0;
|
|
198
|
+
const dim = args.observation.length;
|
|
199
|
+
for (let i = 0; i < dim; i += 1) {
|
|
200
|
+
const v = args.variance[i] ?? 0;
|
|
201
|
+
if (v <= 1e-12) continue;
|
|
202
|
+
const std = Math.sqrt(v);
|
|
203
|
+
const z = Math.abs(args.observation[i] - args.centroid[i]) / std;
|
|
204
|
+
if (z > sigma) exceeded += 1;
|
|
205
|
+
}
|
|
206
|
+
return exceeded > dim / 2;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export class VoiceProfileStore {
|
|
210
|
+
private readonly hotCacheSize: number;
|
|
211
|
+
private readonly coldDiskMax: number;
|
|
212
|
+
private readonly matchThreshold: number;
|
|
213
|
+
private readonly unmatchedThreshold: number;
|
|
214
|
+
private readonly rootDir: string;
|
|
215
|
+
private readonly profilesDir: string;
|
|
216
|
+
private readonly indexPath: string;
|
|
217
|
+
/** Hot cache: profileId → record. Insertion order = LRU order. */
|
|
218
|
+
private hot = new Map<string, VoiceProfileRecord>();
|
|
219
|
+
private indexCache: IndexFile | null = null;
|
|
220
|
+
|
|
221
|
+
constructor(options: VoiceProfileStoreOptions) {
|
|
222
|
+
this.rootDir = options.rootDir;
|
|
223
|
+
this.profilesDir = path.join(this.rootDir, "profiles");
|
|
224
|
+
this.indexPath = path.join(this.rootDir, "index.json");
|
|
225
|
+
this.hotCacheSize = Math.max(1, options.hotCacheSize ?? DEFAULT_HOT_CACHE);
|
|
226
|
+
this.coldDiskMax = Math.max(
|
|
227
|
+
this.hotCacheSize,
|
|
228
|
+
options.coldDiskMax ?? DEFAULT_COLD_DISK,
|
|
229
|
+
);
|
|
230
|
+
this.matchThreshold =
|
|
231
|
+
options.matchThreshold ?? DEFAULT_VOICE_IMPRINT_MATCH_THRESHOLD;
|
|
232
|
+
this.unmatchedThreshold =
|
|
233
|
+
options.unmatchedClusterThreshold ?? DEFAULT_UNMATCHED_THRESHOLD;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
get matchThresholdValue(): number {
|
|
237
|
+
return this.matchThreshold;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
get unmatchedClusterThresholdValue(): number {
|
|
241
|
+
return this.unmatchedThreshold;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/** Public for tests / management UI. */
|
|
245
|
+
get hotCacheSizeValue(): number {
|
|
246
|
+
return this.hotCacheSize;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
get coldDiskMaxValue(): number {
|
|
250
|
+
return this.coldDiskMax;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async init(): Promise<void> {
|
|
254
|
+
await fsp.mkdir(this.profilesDir, { recursive: true });
|
|
255
|
+
if (!fs.existsSync(this.indexPath)) {
|
|
256
|
+
await this.writeIndex(INITIAL_INDEX);
|
|
257
|
+
}
|
|
258
|
+
await this.readIndex();
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
private async readIndex(): Promise<IndexFile> {
|
|
262
|
+
if (this.indexCache) return this.indexCache;
|
|
263
|
+
try {
|
|
264
|
+
const raw = await fsp.readFile(this.indexPath, "utf8");
|
|
265
|
+
const parsed = JSON.parse(raw) as IndexFile;
|
|
266
|
+
if (!parsed.entries) parsed.entries = [];
|
|
267
|
+
if (!parsed.nextLruRank) parsed.nextLruRank = 1;
|
|
268
|
+
parsed.version = 1;
|
|
269
|
+
this.indexCache = parsed;
|
|
270
|
+
return parsed;
|
|
271
|
+
} catch {
|
|
272
|
+
this.indexCache = { ...INITIAL_INDEX, entries: [] };
|
|
273
|
+
return this.indexCache;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
private async writeIndex(index: IndexFile): Promise<void> {
|
|
278
|
+
this.indexCache = index;
|
|
279
|
+
const tmp = `${this.indexPath}.tmp`;
|
|
280
|
+
await fsp.writeFile(tmp, JSON.stringify(index, null, 2), "utf8");
|
|
281
|
+
await fsp.rename(tmp, this.indexPath);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
private profilePath(profileId: string): string {
|
|
285
|
+
const safe = profileId.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
286
|
+
return path.join(this.profilesDir, `${safe}.json`);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
private async readProfileFromDisk(
|
|
290
|
+
profileId: string,
|
|
291
|
+
): Promise<VoiceProfileRecord | null> {
|
|
292
|
+
try {
|
|
293
|
+
const raw = await fsp.readFile(this.profilePath(profileId), "utf8");
|
|
294
|
+
return JSON.parse(raw) as VoiceProfileRecord;
|
|
295
|
+
} catch {
|
|
296
|
+
return null;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
private async writeProfileToDisk(record: VoiceProfileRecord): Promise<void> {
|
|
301
|
+
const tmp = `${this.profilePath(record.profileId)}.tmp`;
|
|
302
|
+
await fsp.writeFile(tmp, JSON.stringify(record, null, 2), "utf8");
|
|
303
|
+
await fsp.rename(tmp, this.profilePath(record.profileId));
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/** Touch the LRU order: re-insert at the end. */
|
|
307
|
+
private touchHot(record: VoiceProfileRecord): void {
|
|
308
|
+
if (this.hot.has(record.profileId)) {
|
|
309
|
+
this.hot.delete(record.profileId);
|
|
310
|
+
}
|
|
311
|
+
this.hot.set(record.profileId, record);
|
|
312
|
+
while (this.hot.size > this.hotCacheSize) {
|
|
313
|
+
// Evict oldest entry — still on disk, so this is just a memory drop.
|
|
314
|
+
const oldest = this.hot.keys().next().value;
|
|
315
|
+
if (oldest !== undefined) this.hot.delete(oldest);
|
|
316
|
+
else break;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
private async upsertIndexEntry(record: VoiceProfileRecord): Promise<void> {
|
|
321
|
+
const index = await this.readIndex();
|
|
322
|
+
const lruRank = index.nextLruRank;
|
|
323
|
+
index.nextLruRank = lruRank + 1;
|
|
324
|
+
const existing = index.entries.findIndex(
|
|
325
|
+
(e) => e.profileId === record.profileId,
|
|
326
|
+
);
|
|
327
|
+
const entry: IndexEntry = {
|
|
328
|
+
profileId: record.profileId,
|
|
329
|
+
entityId: record.entityId,
|
|
330
|
+
imprintClusterId: record.imprintClusterId,
|
|
331
|
+
embeddingModel: record.embeddingModel,
|
|
332
|
+
embeddingDim: record.embeddingDim,
|
|
333
|
+
lastObservedAt: record.lastObservedAt,
|
|
334
|
+
sampleCount: record.sampleCount,
|
|
335
|
+
lruRank,
|
|
336
|
+
};
|
|
337
|
+
if (existing >= 0) {
|
|
338
|
+
index.entries[existing] = entry;
|
|
339
|
+
} else {
|
|
340
|
+
index.entries.push(entry);
|
|
341
|
+
}
|
|
342
|
+
await this.enforceColdLimit(index);
|
|
343
|
+
await this.writeIndex(index);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
private async enforceColdLimit(index: IndexFile): Promise<void> {
|
|
347
|
+
if (index.entries.length <= this.coldDiskMax) return;
|
|
348
|
+
// Eligible for eviction: no entity binding AND low confidence AND
|
|
349
|
+
// few samples. Sort ascending by lruRank (oldest first) and unlink.
|
|
350
|
+
const evictionCandidates = index.entries
|
|
351
|
+
.filter((entry) => entry.entityId === null)
|
|
352
|
+
.sort((a, b) => a.lruRank - b.lruRank);
|
|
353
|
+
while (
|
|
354
|
+
index.entries.length > this.coldDiskMax &&
|
|
355
|
+
evictionCandidates.length > 0
|
|
356
|
+
) {
|
|
357
|
+
const victim = evictionCandidates.shift();
|
|
358
|
+
if (!victim) break;
|
|
359
|
+
const record = await this.readProfileFromDisk(victim.profileId);
|
|
360
|
+
if (record && record.entityId !== null) continue;
|
|
361
|
+
if (record && (record.confidence >= 0.5 || record.sampleCount >= 3)) {
|
|
362
|
+
continue;
|
|
363
|
+
}
|
|
364
|
+
await fsp.unlink(this.profilePath(victim.profileId)).catch(() => {});
|
|
365
|
+
index.entries = index.entries.filter(
|
|
366
|
+
(e) => e.profileId !== victim.profileId,
|
|
367
|
+
);
|
|
368
|
+
this.hot.delete(victim.profileId);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
private async ensureLoaded(
|
|
373
|
+
profileId: string,
|
|
374
|
+
): Promise<VoiceProfileRecord | null> {
|
|
375
|
+
const hot = this.hot.get(profileId);
|
|
376
|
+
if (hot) {
|
|
377
|
+
this.touchHot(hot);
|
|
378
|
+
return hot;
|
|
379
|
+
}
|
|
380
|
+
const disk = await this.readProfileFromDisk(profileId);
|
|
381
|
+
if (disk) this.touchHot(disk);
|
|
382
|
+
return disk;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/** Walk profiles + return the best match above `matchThreshold` (or null). */
|
|
386
|
+
async findBestMatch(args: {
|
|
387
|
+
embedding: Float32Array;
|
|
388
|
+
embeddingModel: string;
|
|
389
|
+
}): Promise<VoiceImprintMatch | null> {
|
|
390
|
+
const index = await this.readIndex();
|
|
391
|
+
let best: VoiceImprintMatch | null = null;
|
|
392
|
+
for (const entry of index.entries) {
|
|
393
|
+
if (entry.embeddingModel !== args.embeddingModel) continue;
|
|
394
|
+
if (entry.embeddingDim !== args.embedding.length) continue;
|
|
395
|
+
const record = await this.ensureLoaded(entry.profileId);
|
|
396
|
+
if (!record) continue;
|
|
397
|
+
const similarity = cosineSimilarity(args.embedding, record.centroid);
|
|
398
|
+
if (similarity < this.matchThreshold) continue;
|
|
399
|
+
const confidence = Math.max(
|
|
400
|
+
0,
|
|
401
|
+
Math.min(
|
|
402
|
+
1,
|
|
403
|
+
((similarity - this.matchThreshold) /
|
|
404
|
+
Math.max(0.0001, 1 - this.matchThreshold)) *
|
|
405
|
+
Math.max(0, Math.min(1, record.confidence)),
|
|
406
|
+
),
|
|
407
|
+
);
|
|
408
|
+
if (!best || similarity > best.similarity) {
|
|
409
|
+
best = {
|
|
410
|
+
profile: this.recordToImprintProfile(record),
|
|
411
|
+
similarity,
|
|
412
|
+
confidence,
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
return best;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
private recordToImprintProfile(
|
|
420
|
+
record: VoiceProfileRecord,
|
|
421
|
+
): VoiceImprintProfile {
|
|
422
|
+
return {
|
|
423
|
+
id: record.profileId,
|
|
424
|
+
centroidEmbedding: record.centroid,
|
|
425
|
+
embeddingModel: record.embeddingModel,
|
|
426
|
+
sampleCount: record.sampleCount,
|
|
427
|
+
confidence: record.confidence,
|
|
428
|
+
label: undefined,
|
|
429
|
+
displayName: undefined,
|
|
430
|
+
entityId: record.entityId,
|
|
431
|
+
sourceKind: undefined,
|
|
432
|
+
sourceScopeId: record.imprintClusterId,
|
|
433
|
+
metadata: record.metadata,
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
/**
|
|
438
|
+
* Speculative match handle. The caller supplies a function that
|
|
439
|
+
* resolves to a single embedding once `minSpeechMs` of audio is
|
|
440
|
+
* available. The handle starts the lookup the moment it's
|
|
441
|
+
* constructed — there is no awaitable for "the encoder finished
|
|
442
|
+
* before we wanted it to".
|
|
443
|
+
*/
|
|
444
|
+
beginMatch(args: {
|
|
445
|
+
embed: () => Promise<{
|
|
446
|
+
embedding: Float32Array;
|
|
447
|
+
embeddingModel: string;
|
|
448
|
+
} | null>;
|
|
449
|
+
signal?: AbortSignal;
|
|
450
|
+
}): VoiceImprintMatchHandle {
|
|
451
|
+
let current: VoiceImprintMatch | null = null;
|
|
452
|
+
let cancelled = false;
|
|
453
|
+
const onAbort = () => {
|
|
454
|
+
cancelled = true;
|
|
455
|
+
};
|
|
456
|
+
if (args.signal) {
|
|
457
|
+
if (args.signal.aborted) cancelled = true;
|
|
458
|
+
else args.signal.addEventListener("abort", onAbort, { once: true });
|
|
459
|
+
}
|
|
460
|
+
const result = (async (): Promise<VoiceImprintMatch | null> => {
|
|
461
|
+
try {
|
|
462
|
+
const embedded = await args.embed();
|
|
463
|
+
if (cancelled || !embedded) return null;
|
|
464
|
+
const match = await this.findBestMatch(embedded);
|
|
465
|
+
if (cancelled) return null;
|
|
466
|
+
current = match;
|
|
467
|
+
return match;
|
|
468
|
+
} finally {
|
|
469
|
+
if (args.signal) args.signal.removeEventListener("abort", onAbort);
|
|
470
|
+
}
|
|
471
|
+
})();
|
|
472
|
+
return {
|
|
473
|
+
result,
|
|
474
|
+
current: () => current,
|
|
475
|
+
cancel: () => {
|
|
476
|
+
cancelled = true;
|
|
477
|
+
},
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/** Create a new profile from a single capture. */
|
|
482
|
+
async createProfile(args: {
|
|
483
|
+
centroid: Float32Array;
|
|
484
|
+
embeddingModel: string;
|
|
485
|
+
entityId?: string | null;
|
|
486
|
+
imprintClusterId?: string;
|
|
487
|
+
confidence: number;
|
|
488
|
+
durationMs: number;
|
|
489
|
+
consent?: Partial<VoiceProfileConsentState>;
|
|
490
|
+
audioRef?: VoiceProfileAudioRef;
|
|
491
|
+
metadata?: Record<string, unknown>;
|
|
492
|
+
}): Promise<VoiceProfileRecord> {
|
|
493
|
+
const now = iso();
|
|
494
|
+
const centroidArray = Array.from(args.centroid);
|
|
495
|
+
const profileId = deriveProfileId(centroidArray);
|
|
496
|
+
const record: VoiceProfileRecord = {
|
|
497
|
+
schemaVersion: VOICE_PROFILE_RECORD_SCHEMA_VERSION,
|
|
498
|
+
profileId,
|
|
499
|
+
embeddingModel: args.embeddingModel,
|
|
500
|
+
embeddingDim: centroidArray.length,
|
|
501
|
+
centroid: centroidArray,
|
|
502
|
+
variance: new Array(centroidArray.length).fill(0),
|
|
503
|
+
welfordM2: new Array(centroidArray.length).fill(0),
|
|
504
|
+
sampleCount: 1,
|
|
505
|
+
totalDurationMs: Math.max(0, Math.round(args.durationMs)),
|
|
506
|
+
firstObservedAt: now,
|
|
507
|
+
lastObservedAt: now,
|
|
508
|
+
lastRefinedAt: now,
|
|
509
|
+
entityId: args.entityId ?? null,
|
|
510
|
+
imprintClusterId:
|
|
511
|
+
args.imprintClusterId ?? `cluster_${crypto.randomUUID()}`,
|
|
512
|
+
confidence: Math.max(0, Math.min(1, args.confidence)),
|
|
513
|
+
consent: {
|
|
514
|
+
attributionAuthorized: args.consent?.attributionAuthorized ?? false,
|
|
515
|
+
synthesisAuthorized: args.consent?.synthesisAuthorized ?? false,
|
|
516
|
+
...(args.consent?.grantedAt
|
|
517
|
+
? { grantedAt: args.consent.grantedAt }
|
|
518
|
+
: {}),
|
|
519
|
+
...(args.consent?.grantedBy
|
|
520
|
+
? { grantedBy: args.consent.grantedBy }
|
|
521
|
+
: {}),
|
|
522
|
+
},
|
|
523
|
+
...(args.audioRef ? { audioRefs: [args.audioRef] } : {}),
|
|
524
|
+
...(args.metadata ? { metadata: args.metadata } : {}),
|
|
525
|
+
};
|
|
526
|
+
await this.writeProfileToDisk(record);
|
|
527
|
+
this.touchHot(record);
|
|
528
|
+
await this.upsertIndexEntry(record);
|
|
529
|
+
return record;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Fold one new embedding into the existing profile via the online
|
|
534
|
+
* running mean (sampleCount-weighted) and update Welford variance.
|
|
535
|
+
* Rejects outliers if `dropOutliers` is true (default).
|
|
536
|
+
*/
|
|
537
|
+
async refine(args: {
|
|
538
|
+
profileId: string;
|
|
539
|
+
embedding: Float32Array;
|
|
540
|
+
durationMs: number;
|
|
541
|
+
confidence: number;
|
|
542
|
+
audioRef?: VoiceProfileAudioRef;
|
|
543
|
+
dropOutliers?: boolean;
|
|
544
|
+
}): Promise<VoiceProfileRecord | null> {
|
|
545
|
+
const record = await this.ensureLoaded(args.profileId);
|
|
546
|
+
if (!record) return null;
|
|
547
|
+
if (record.embeddingDim !== args.embedding.length) {
|
|
548
|
+
throw new Error(
|
|
549
|
+
`[VoiceProfileStore.refine] embedding dim mismatch: ${record.embeddingDim} vs ${args.embedding.length}`,
|
|
550
|
+
);
|
|
551
|
+
}
|
|
552
|
+
const obs = Array.from(args.embedding);
|
|
553
|
+
if (
|
|
554
|
+
(args.dropOutliers ?? true) &&
|
|
555
|
+
record.sampleCount >= 4 &&
|
|
556
|
+
isOutlier({
|
|
557
|
+
centroid: record.centroid,
|
|
558
|
+
variance: record.variance,
|
|
559
|
+
observation: obs,
|
|
560
|
+
})
|
|
561
|
+
) {
|
|
562
|
+
return record;
|
|
563
|
+
}
|
|
564
|
+
const w = welfordUpdate({
|
|
565
|
+
count: record.sampleCount,
|
|
566
|
+
mean: record.centroid,
|
|
567
|
+
m2: record.welfordM2,
|
|
568
|
+
observation: obs,
|
|
569
|
+
});
|
|
570
|
+
// Re-normalize the mean (kept on the unit sphere for cosine).
|
|
571
|
+
let sumSq = 0;
|
|
572
|
+
for (let i = 0; i < w.mean.length; i += 1) sumSq += w.mean[i] * w.mean[i];
|
|
573
|
+
const inv = sumSq > 0 ? 1 / Math.sqrt(sumSq) : 1;
|
|
574
|
+
const centroid = w.mean.map((v) => v * inv);
|
|
575
|
+
const now = iso();
|
|
576
|
+
const updated: VoiceProfileRecord = {
|
|
577
|
+
...record,
|
|
578
|
+
centroid,
|
|
579
|
+
welfordM2: w.m2,
|
|
580
|
+
variance: welfordVariance(w.m2, w.count),
|
|
581
|
+
sampleCount: w.count,
|
|
582
|
+
totalDurationMs:
|
|
583
|
+
record.totalDurationMs + Math.max(0, Math.round(args.durationMs)),
|
|
584
|
+
confidence: Math.max(
|
|
585
|
+
0,
|
|
586
|
+
Math.min(
|
|
587
|
+
1,
|
|
588
|
+
(record.confidence * record.sampleCount +
|
|
589
|
+
Math.max(0, Math.min(1, args.confidence))) /
|
|
590
|
+
(record.sampleCount + 1),
|
|
591
|
+
),
|
|
592
|
+
),
|
|
593
|
+
lastRefinedAt: now,
|
|
594
|
+
lastObservedAt: now,
|
|
595
|
+
audioRefs: args.audioRef
|
|
596
|
+
? [...(record.audioRefs ?? []), args.audioRef]
|
|
597
|
+
: record.audioRefs,
|
|
598
|
+
};
|
|
599
|
+
await this.writeProfileToDisk(updated);
|
|
600
|
+
this.touchHot(updated);
|
|
601
|
+
await this.upsertIndexEntry(updated);
|
|
602
|
+
return updated;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
async bindEntity(args: {
|
|
606
|
+
profileId: string;
|
|
607
|
+
entityId: string;
|
|
608
|
+
label?: string;
|
|
609
|
+
}): Promise<VoiceProfileRecord | null> {
|
|
610
|
+
const record = await this.ensureLoaded(args.profileId);
|
|
611
|
+
if (!record) return null;
|
|
612
|
+
const updated: VoiceProfileRecord = {
|
|
613
|
+
...record,
|
|
614
|
+
entityId: args.entityId,
|
|
615
|
+
lastObservedAt: iso(),
|
|
616
|
+
metadata: {
|
|
617
|
+
...(record.metadata ?? {}),
|
|
618
|
+
...(args.label ? { label: args.label } : {}),
|
|
619
|
+
},
|
|
620
|
+
};
|
|
621
|
+
await this.writeProfileToDisk(updated);
|
|
622
|
+
this.touchHot(updated);
|
|
623
|
+
await this.upsertIndexEntry(updated);
|
|
624
|
+
return updated;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
/**
|
|
628
|
+
* Merge a metadata patch onto a profile. Keys mapped to `null` are
|
|
629
|
+
* deleted; other keys overwrite. Used by the management routes for
|
|
630
|
+
* rename / relationship / retention edits.
|
|
631
|
+
*/
|
|
632
|
+
async updateMetadata(
|
|
633
|
+
profileId: string,
|
|
634
|
+
patch: Record<string, unknown>,
|
|
635
|
+
): Promise<VoiceProfileRecord | null> {
|
|
636
|
+
const record = await this.ensureLoaded(profileId);
|
|
637
|
+
if (!record) return null;
|
|
638
|
+
const metadata: Record<string, unknown> = { ...(record.metadata ?? {}) };
|
|
639
|
+
for (const [key, value] of Object.entries(patch)) {
|
|
640
|
+
if (value === null || value === undefined) delete metadata[key];
|
|
641
|
+
else metadata[key] = value;
|
|
642
|
+
}
|
|
643
|
+
const updated: VoiceProfileRecord = {
|
|
644
|
+
...record,
|
|
645
|
+
metadata,
|
|
646
|
+
lastObservedAt: iso(),
|
|
647
|
+
};
|
|
648
|
+
await this.writeProfileToDisk(updated);
|
|
649
|
+
this.touchHot(updated);
|
|
650
|
+
await this.upsertIndexEntry(updated);
|
|
651
|
+
return updated;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
async unbindEntity(profileId: string): Promise<VoiceProfileRecord | null> {
|
|
655
|
+
const record = await this.ensureLoaded(profileId);
|
|
656
|
+
if (!record) return null;
|
|
657
|
+
const updated: VoiceProfileRecord = {
|
|
658
|
+
...record,
|
|
659
|
+
entityId: null,
|
|
660
|
+
lastObservedAt: iso(),
|
|
661
|
+
};
|
|
662
|
+
await this.writeProfileToDisk(updated);
|
|
663
|
+
this.touchHot(updated);
|
|
664
|
+
await this.upsertIndexEntry(updated);
|
|
665
|
+
return updated;
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
async get(profileId: string): Promise<VoiceProfileRecord | null> {
|
|
669
|
+
return this.ensureLoaded(profileId);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
async list(): Promise<VoiceProfileRecord[]> {
|
|
673
|
+
const index = await this.readIndex();
|
|
674
|
+
const out: VoiceProfileRecord[] = [];
|
|
675
|
+
for (const entry of index.entries) {
|
|
676
|
+
const record = await this.ensureLoaded(entry.profileId);
|
|
677
|
+
if (record) out.push(record);
|
|
678
|
+
}
|
|
679
|
+
return out;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
/** For tests / management — drops a profile. Refuses if entityId is set. */
|
|
683
|
+
async deleteProfile(args: {
|
|
684
|
+
profileId: string;
|
|
685
|
+
allowBoundEntity?: boolean;
|
|
686
|
+
}): Promise<boolean> {
|
|
687
|
+
const record = await this.ensureLoaded(args.profileId);
|
|
688
|
+
if (!record) return false;
|
|
689
|
+
if (record.entityId && !args.allowBoundEntity) {
|
|
690
|
+
throw new Error(
|
|
691
|
+
`[VoiceProfileStore.deleteProfile] refusing to delete ${args.profileId}: bound to entity ${record.entityId}`,
|
|
692
|
+
);
|
|
693
|
+
}
|
|
694
|
+
await fsp.unlink(this.profilePath(args.profileId)).catch(() => {});
|
|
695
|
+
this.hot.delete(args.profileId);
|
|
696
|
+
const index = await this.readIndex();
|
|
697
|
+
index.entries = index.entries.filter((e) => e.profileId !== args.profileId);
|
|
698
|
+
await this.writeIndex(index);
|
|
699
|
+
return true;
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
/**
|
|
703
|
+
* Merge `sourceId` into `targetId`: a sample-count-weighted centroid
|
|
704
|
+
* combine (with the Chan parallel-variance update for Welford M2), union
|
|
705
|
+
* of audio refs, summed counts/durations, and confidence average. The
|
|
706
|
+
* target's metadata + entity binding win; an unbound target inherits the
|
|
707
|
+
* source's `entityId`. The source profile is deleted. Returns the merged
|
|
708
|
+
* target, or `null` if either profile is missing.
|
|
709
|
+
*
|
|
710
|
+
* Refuses when both carry a *different* `entityId` unless
|
|
711
|
+
* `allowEntityOverwrite` is set — merging two bound identities is a
|
|
712
|
+
* destructive operation the caller must opt into.
|
|
713
|
+
*/
|
|
714
|
+
async mergeProfiles(args: {
|
|
715
|
+
sourceId: string;
|
|
716
|
+
targetId: string;
|
|
717
|
+
allowEntityOverwrite?: boolean;
|
|
718
|
+
}): Promise<VoiceProfileRecord | null> {
|
|
719
|
+
if (args.sourceId === args.targetId) {
|
|
720
|
+
throw new Error(
|
|
721
|
+
"[VoiceProfileStore.mergeProfiles] source and target are identical",
|
|
722
|
+
);
|
|
723
|
+
}
|
|
724
|
+
const source = await this.ensureLoaded(args.sourceId);
|
|
725
|
+
const target = await this.ensureLoaded(args.targetId);
|
|
726
|
+
if (!source || !target) return null;
|
|
727
|
+
if (
|
|
728
|
+
source.embeddingModel !== target.embeddingModel ||
|
|
729
|
+
source.embeddingDim !== target.embeddingDim
|
|
730
|
+
) {
|
|
731
|
+
throw new Error(
|
|
732
|
+
`[VoiceProfileStore.mergeProfiles] embedding mismatch: ${target.embeddingModel}/${target.embeddingDim} vs ${source.embeddingModel}/${source.embeddingDim}`,
|
|
733
|
+
);
|
|
734
|
+
}
|
|
735
|
+
if (
|
|
736
|
+
source.entityId &&
|
|
737
|
+
target.entityId &&
|
|
738
|
+
source.entityId !== target.entityId &&
|
|
739
|
+
!args.allowEntityOverwrite
|
|
740
|
+
) {
|
|
741
|
+
throw new Error(
|
|
742
|
+
`[VoiceProfileStore.mergeProfiles] entity conflict: target ${target.entityId} vs source ${source.entityId}`,
|
|
743
|
+
);
|
|
744
|
+
}
|
|
745
|
+
const dim = target.embeddingDim;
|
|
746
|
+
const nA = Math.max(1, target.sampleCount);
|
|
747
|
+
const nB = Math.max(1, source.sampleCount);
|
|
748
|
+
const total = nA + nB;
|
|
749
|
+
const mean = new Array<number>(dim).fill(0);
|
|
750
|
+
const m2 = new Array<number>(dim).fill(0);
|
|
751
|
+
for (let i = 0; i < dim; i += 1) {
|
|
752
|
+
const a = target.centroid[i] ?? 0;
|
|
753
|
+
const b = source.centroid[i] ?? 0;
|
|
754
|
+
mean[i] = (a * nA + b * nB) / total;
|
|
755
|
+
const delta = b - a;
|
|
756
|
+
m2[i] =
|
|
757
|
+
(target.welfordM2[i] ?? 0) +
|
|
758
|
+
(source.welfordM2[i] ?? 0) +
|
|
759
|
+
(delta * delta * nA * nB) / total;
|
|
760
|
+
}
|
|
761
|
+
let sumSq = 0;
|
|
762
|
+
for (let i = 0; i < dim; i += 1) sumSq += mean[i] * mean[i];
|
|
763
|
+
const inv = sumSq > 0 ? 1 / Math.sqrt(sumSq) : 1;
|
|
764
|
+
const centroid = mean.map((v) => v * inv);
|
|
765
|
+
const mergedAudio = [...(target.audioRefs ?? [])];
|
|
766
|
+
const seen = new Set(mergedAudio.map((r) => r.sampleId));
|
|
767
|
+
for (const ref of source.audioRefs ?? []) {
|
|
768
|
+
if (!seen.has(ref.sampleId)) mergedAudio.push(ref);
|
|
769
|
+
}
|
|
770
|
+
const now = iso();
|
|
771
|
+
const updated: VoiceProfileRecord = {
|
|
772
|
+
...target,
|
|
773
|
+
centroid,
|
|
774
|
+
welfordM2: m2,
|
|
775
|
+
variance: welfordVariance(m2, total),
|
|
776
|
+
sampleCount: total,
|
|
777
|
+
totalDurationMs: target.totalDurationMs + source.totalDurationMs,
|
|
778
|
+
confidence: Math.max(
|
|
779
|
+
0,
|
|
780
|
+
Math.min(1, (target.confidence * nA + source.confidence * nB) / total),
|
|
781
|
+
),
|
|
782
|
+
entityId: target.entityId ?? source.entityId,
|
|
783
|
+
firstObservedAt:
|
|
784
|
+
target.firstObservedAt < source.firstObservedAt
|
|
785
|
+
? target.firstObservedAt
|
|
786
|
+
: source.firstObservedAt,
|
|
787
|
+
lastObservedAt:
|
|
788
|
+
target.lastObservedAt > source.lastObservedAt
|
|
789
|
+
? target.lastObservedAt
|
|
790
|
+
: source.lastObservedAt,
|
|
791
|
+
lastRefinedAt: now,
|
|
792
|
+
metadata: { ...(source.metadata ?? {}), ...(target.metadata ?? {}) },
|
|
793
|
+
...(mergedAudio.length ? { audioRefs: mergedAudio } : {}),
|
|
794
|
+
};
|
|
795
|
+
await this.writeProfileToDisk(updated);
|
|
796
|
+
this.touchHot(updated);
|
|
797
|
+
await this.upsertIndexEntry(updated);
|
|
798
|
+
await this.deleteProfile({
|
|
799
|
+
profileId: source.profileId,
|
|
800
|
+
allowBoundEntity: true,
|
|
801
|
+
});
|
|
802
|
+
return updated;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
/**
|
|
806
|
+
* Split the audio samples named by `sampleIds` out of `profileId` into a
|
|
807
|
+
* new profile. Returns the updated original plus the new split profile,
|
|
808
|
+
* or `null` if the profile is missing.
|
|
809
|
+
*
|
|
810
|
+
* Limitation: per-utterance embeddings are not retained (only the running
|
|
811
|
+
* centroid + Welford accumulators), so the split cannot re-cluster — the
|
|
812
|
+
* new profile copies the parent centroid and the split is by *audio sample
|
|
813
|
+
* assignment* only. Both profiles should be re-refined from fresh captures
|
|
814
|
+
* to diverge. The new profile is unbound (`entityId: null`) and gets a
|
|
815
|
+
* fresh imprint cluster.
|
|
816
|
+
*/
|
|
817
|
+
async splitProfile(args: {
|
|
818
|
+
profileId: string;
|
|
819
|
+
sampleIds: string[];
|
|
820
|
+
}): Promise<{
|
|
821
|
+
original: VoiceProfileRecord;
|
|
822
|
+
split: VoiceProfileRecord;
|
|
823
|
+
} | null> {
|
|
824
|
+
const record = await this.ensureLoaded(args.profileId);
|
|
825
|
+
if (!record) return null;
|
|
826
|
+
const moveSet = new Set(args.sampleIds);
|
|
827
|
+
const refs = record.audioRefs ?? [];
|
|
828
|
+
const moved = refs.filter((r) => moveSet.has(r.sampleId));
|
|
829
|
+
const kept = refs.filter((r) => !moveSet.has(r.sampleId));
|
|
830
|
+
if (moved.length === 0) {
|
|
831
|
+
throw new Error(
|
|
832
|
+
"[VoiceProfileStore.splitProfile] no matching sampleIds to split out",
|
|
833
|
+
);
|
|
834
|
+
}
|
|
835
|
+
const now = iso();
|
|
836
|
+
const movedDuration = moved.reduce((s, r) => s + (r.durationMs || 0), 0);
|
|
837
|
+
const splitId = `vp_split_${sha256(
|
|
838
|
+
moved
|
|
839
|
+
.map((r) => r.sampleId)
|
|
840
|
+
.sort()
|
|
841
|
+
.join("|"),
|
|
842
|
+
).slice(0, 28)}`;
|
|
843
|
+
const splitRecord: VoiceProfileRecord = {
|
|
844
|
+
...record,
|
|
845
|
+
profileId: splitId,
|
|
846
|
+
sampleCount: Math.max(1, moved.length),
|
|
847
|
+
totalDurationMs: Math.max(0, Math.round(movedDuration)),
|
|
848
|
+
entityId: null,
|
|
849
|
+
firstObservedAt: now,
|
|
850
|
+
lastObservedAt: now,
|
|
851
|
+
lastRefinedAt: now,
|
|
852
|
+
imprintClusterId: `cluster_${crypto.randomUUID()}`,
|
|
853
|
+
metadata: { ...(record.metadata ?? {}), splitFrom: record.profileId },
|
|
854
|
+
audioRefs: moved,
|
|
855
|
+
};
|
|
856
|
+
const original: VoiceProfileRecord = {
|
|
857
|
+
...record,
|
|
858
|
+
sampleCount: Math.max(1, record.sampleCount - moved.length),
|
|
859
|
+
totalDurationMs: Math.max(
|
|
860
|
+
0,
|
|
861
|
+
record.totalDurationMs - Math.round(movedDuration),
|
|
862
|
+
),
|
|
863
|
+
lastObservedAt: now,
|
|
864
|
+
audioRefs: kept,
|
|
865
|
+
};
|
|
866
|
+
await this.writeProfileToDisk(splitRecord);
|
|
867
|
+
this.touchHot(splitRecord);
|
|
868
|
+
await this.upsertIndexEntry(splitRecord);
|
|
869
|
+
await this.writeProfileToDisk(original);
|
|
870
|
+
this.touchHot(original);
|
|
871
|
+
await this.upsertIndexEntry(original);
|
|
872
|
+
return { original, split: splitRecord };
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
/** Snapshot of one observation for downstream attribution code. */
|
|
877
|
+
export interface VoiceProfileObservation {
|
|
878
|
+
profileId: string;
|
|
879
|
+
imprintClusterId: string;
|
|
880
|
+
entityId: string | null;
|
|
881
|
+
embedding: Float32Array;
|
|
882
|
+
embeddingModel: string;
|
|
883
|
+
confidence: number;
|
|
884
|
+
source?: VoiceInputSource;
|
|
885
|
+
startMs?: number;
|
|
886
|
+
endMs?: number;
|
|
887
|
+
}
|