@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice-budget allocator — single arbiter of the co-resident memory budget
|
|
3
|
+
* for the whole voice + text bundle (text LM, drafter, ASR, TTS, embedding,
|
|
4
|
+
* VAD, wake-word, turn-detector, emotion classifier, speaker encoder).
|
|
5
|
+
*
|
|
6
|
+
* Today's `ram-budget.ts` is per-tier: it decides whether ONE text bundle
|
|
7
|
+
* fits a host. `voice-budget.ts` is the cross-model layer the brief
|
|
8
|
+
* mandated in `.swarm/VOICE_WAVE_2.md` §H4 and R9 §4 — every model loader
|
|
9
|
+
* calls `reserve()` before it loads weights, releases on unload, and
|
|
10
|
+
* `reserve()` walks the residents under contention by eviction priority
|
|
11
|
+
* (cold → warm → hot) until the requested amount fits.
|
|
12
|
+
*
|
|
13
|
+
* Priorities (from R9 §4.1, mapped to `ResidentModelRole`):
|
|
14
|
+
*
|
|
15
|
+
* - **hot** (priority ≥ 40): `text-target`, `tts`, `asr` — never load
|
|
16
|
+
* on demand, never evicted before pressure-of-last-resort.
|
|
17
|
+
* - **warm** (priority 25–35): `vad`, `embedding` — may be evicted but
|
|
18
|
+
* reload is expensive.
|
|
19
|
+
* - **cold** (priority ≤ 20): `speaker-id` (18), `emotion` (15),
|
|
20
|
+
* `vision` (20), `drafter` (10) — load-on-demand; first to evict.
|
|
21
|
+
*
|
|
22
|
+
* Eviction policy: walk ascending priority (cheapest first) until enough
|
|
23
|
+
* memory has been reclaimed. The text target evicts only when it is
|
|
24
|
+
* literally the only resident role and pressure persists (matches
|
|
25
|
+
* `SharedResourceRegistry.evictLowestPriorityRole` semantics).
|
|
26
|
+
*
|
|
27
|
+
* The allocator is **memory-only** — it does not load weights. The caller
|
|
28
|
+
* (TTS engine, ASR loader, etc.) holds the typed reservation and runs
|
|
29
|
+
* `release()` on unload.
|
|
30
|
+
*
|
|
31
|
+
* Wire-up plan (handed to follow-up commits, NOT done by I9):
|
|
32
|
+
* - `ffi-streaming-backend.ts` → `reserve(role="text-target")` + `reserve(role="drafter")` at spawn.
|
|
33
|
+
* - `voice/pipeline.ts` → `reserve(role="tts", bytes=transientPeakMb*MB)` per synth.
|
|
34
|
+
* - `voice/wake-word.ts`, `vad.ts`, `eot-classifier.ts` → reserve at session arm.
|
|
35
|
+
* - I2/I3 add `emotion` + `speaker-id` reservations when those models register.
|
|
36
|
+
*
|
|
37
|
+
* NOTE: the wire-up is intentionally separate from the allocator
|
|
38
|
+
* implementation because the in-flight I-agents (I1/I2/I3/I5) own those
|
|
39
|
+
* loader files and we must not race their edits. The allocator + the
|
|
40
|
+
* `evictionPriority` hooks are in place; the loaders adopt it as they
|
|
41
|
+
* land.
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
import {
|
|
45
|
+
classifyDeviceTier,
|
|
46
|
+
type DeviceTier,
|
|
47
|
+
type DeviceTierAssessment,
|
|
48
|
+
effectiveModelMemoryGb,
|
|
49
|
+
} from "../device-tier";
|
|
50
|
+
import type { HardwareProbe } from "../types";
|
|
51
|
+
import {
|
|
52
|
+
RESIDENT_ROLE_PRIORITY,
|
|
53
|
+
type ResidentModelRole,
|
|
54
|
+
} from "./shared-resources";
|
|
55
|
+
|
|
56
|
+
const BYTES_PER_MB = 1024 * 1024;
|
|
57
|
+
const BYTES_PER_GB = 1024 ** 3;
|
|
58
|
+
|
|
59
|
+
/** Coarse priority class consumed by `reserve()`. Internally we map this
|
|
60
|
+
* back to the per-role priority number in `RESIDENT_ROLE_PRIORITY`. */
|
|
61
|
+
export type AllocationPriority = "hot" | "warm" | "cold";
|
|
62
|
+
|
|
63
|
+
export function priorityClassForRole(
|
|
64
|
+
role: ResidentModelRole,
|
|
65
|
+
): AllocationPriority {
|
|
66
|
+
const p = RESIDENT_ROLE_PRIORITY[role];
|
|
67
|
+
if (p >= 40) return "hot";
|
|
68
|
+
if (p >= 25) return "warm";
|
|
69
|
+
return "cold";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface BudgetReservation {
|
|
73
|
+
readonly id: string;
|
|
74
|
+
readonly role: ResidentModelRole;
|
|
75
|
+
readonly bytes: number;
|
|
76
|
+
readonly priority: AllocationPriority;
|
|
77
|
+
/** Per-role priority number (R9 §4.1 / `RESIDENT_ROLE_PRIORITY`). */
|
|
78
|
+
readonly priorityRank: number;
|
|
79
|
+
/** Idempotent. Multi-release is a no-op (release happens from teardown
|
|
80
|
+
* paths that may race). */
|
|
81
|
+
release(): void;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Diagnostic snapshot row for `VoiceBudget.snapshot()`. */
|
|
85
|
+
export interface ReservationSnapshot {
|
|
86
|
+
id: string;
|
|
87
|
+
role: ResidentModelRole;
|
|
88
|
+
bytes: number;
|
|
89
|
+
priority: AllocationPriority;
|
|
90
|
+
priorityRank: number;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export class BudgetExhaustedError extends Error {
|
|
94
|
+
readonly code = "voice-budget-exhausted";
|
|
95
|
+
readonly details: {
|
|
96
|
+
requestedBytes: number;
|
|
97
|
+
freeBytes: number;
|
|
98
|
+
totalBytes: number;
|
|
99
|
+
role: ResidentModelRole;
|
|
100
|
+
priority: AllocationPriority;
|
|
101
|
+
evictedRoles: ReadonlyArray<ResidentModelRole>;
|
|
102
|
+
evictionCandidate: ResidentModelRole | null;
|
|
103
|
+
};
|
|
104
|
+
constructor(details: BudgetExhaustedError["details"]) {
|
|
105
|
+
super(
|
|
106
|
+
`[voice-budget] Cannot fit ${(details.requestedBytes / BYTES_PER_MB).toFixed(0)} MB ` +
|
|
107
|
+
`reservation for role "${details.role}" (priority ${details.priority}). ` +
|
|
108
|
+
`Free: ${(details.freeBytes / BYTES_PER_MB).toFixed(0)} MB / ` +
|
|
109
|
+
`total: ${(details.totalBytes / BYTES_PER_MB).toFixed(0)} MB. ` +
|
|
110
|
+
`Evicted: [${details.evictedRoles.join(", ")}]. ` +
|
|
111
|
+
`Next candidate: ${details.evictionCandidate ?? "none (only hot reservations remain)"}.`,
|
|
112
|
+
);
|
|
113
|
+
this.name = "BudgetExhaustedError";
|
|
114
|
+
this.details = details;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export interface VoiceBudget {
|
|
119
|
+
/**
|
|
120
|
+
* Reserve `bytes` for `modelId` with `priority`. Returns a handle the
|
|
121
|
+
* caller MUST `.release()` to give the memory back. Throws
|
|
122
|
+
* `BudgetExhaustedError` when the requested amount cannot fit even after
|
|
123
|
+
* evicting every available lower-priority reservation.
|
|
124
|
+
*
|
|
125
|
+
* `evictHook` is optional: when present, the allocator will call it for
|
|
126
|
+
* each role that needs to be evicted (one at a time, ascending priority)
|
|
127
|
+
* before recording the new reservation. When omitted, the allocator just
|
|
128
|
+
* walks its own internal table — the caller is expected to drive the
|
|
129
|
+
* actual weight unload (the loader/eviction path lives in the model's
|
|
130
|
+
* own service, not here).
|
|
131
|
+
*/
|
|
132
|
+
reserve(args: {
|
|
133
|
+
modelId: string;
|
|
134
|
+
role: ResidentModelRole;
|
|
135
|
+
bytes: number;
|
|
136
|
+
/** Optional; defaults to `priorityClassForRole(role)`. */
|
|
137
|
+
priority?: AllocationPriority;
|
|
138
|
+
/** Optional eviction callback. When provided, called once per evicted
|
|
139
|
+
* role in ascending-priority order before the new reservation is
|
|
140
|
+
* recorded. The callback should drop the weights and return the
|
|
141
|
+
* bytes actually reclaimed (must be >= the reservation's recorded
|
|
142
|
+
* bytes). When omitted, the allocator only drops the internal
|
|
143
|
+
* reservation entry (eviction-by-accounting). */
|
|
144
|
+
evictHook?: (role: ResidentModelRole, id: string) => Promise<number>;
|
|
145
|
+
}): Promise<BudgetReservation>;
|
|
146
|
+
|
|
147
|
+
/** Best-effort current free budget, in bytes. */
|
|
148
|
+
freeBytes(): number;
|
|
149
|
+
/** Total budget on this device, in bytes. */
|
|
150
|
+
totalBytes(): number;
|
|
151
|
+
/** All current reservations, ordered by priority ascending. */
|
|
152
|
+
snapshot(): ReadonlyArray<ReservationSnapshot>;
|
|
153
|
+
/** The tier this budget was sized to. */
|
|
154
|
+
tier(): DeviceTier;
|
|
155
|
+
/** The original assessment. */
|
|
156
|
+
assessment(): DeviceTierAssessment;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Per-tier total budget table (in bytes). Sized to the §2.3 co-resident
|
|
161
|
+
* roll-up in R9: MAX/GOOD/OKAY/POOR keep the relevant subset of weights +
|
|
162
|
+
* KV + TTS transient peak resident with an OS reserve.
|
|
163
|
+
*
|
|
164
|
+
* - MAX: ~24 GB free RAM (enough to keep 9b + drafter + omnivoice-Q8 +
|
|
165
|
+
* ASR + embed + warm/cold path co-resident).
|
|
166
|
+
* - GOOD: ~12 GB (2b/4b co-resident + transient).
|
|
167
|
+
* - OKAY: ~6 GB (0.8b LM only resident; ASR/TTS swap).
|
|
168
|
+
* - POOR: ~3 GB (turn + VAD + wake only, no LM/TTS local).
|
|
169
|
+
*
|
|
170
|
+
* The `maxRamMB` user override (R9 §5.3) can cap this lower. The default
|
|
171
|
+
* picks the tier's natural total but never exceeds the device's effective
|
|
172
|
+
* model memory.
|
|
173
|
+
*/
|
|
174
|
+
function defaultTierBudgetBytes(
|
|
175
|
+
probe: HardwareProbe,
|
|
176
|
+
tier: DeviceTier,
|
|
177
|
+
): number {
|
|
178
|
+
const effectiveGb = effectiveModelMemoryGb(probe);
|
|
179
|
+
switch (tier) {
|
|
180
|
+
case "MAX":
|
|
181
|
+
return Math.min(24, effectiveGb) * BYTES_PER_GB;
|
|
182
|
+
case "GOOD":
|
|
183
|
+
return Math.min(12, effectiveGb) * BYTES_PER_GB;
|
|
184
|
+
case "OKAY":
|
|
185
|
+
return Math.min(6, effectiveGb) * BYTES_PER_GB;
|
|
186
|
+
case "POOR":
|
|
187
|
+
return Math.min(3, Math.max(1, effectiveGb)) * BYTES_PER_GB;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Co-resident voice-ensemble RSS estimate in MB. Sourced from R9 §2.3,
|
|
193
|
+
* keyed off the LM-tier slot (the text model that anchors the bundle).
|
|
194
|
+
*
|
|
195
|
+
* Each row is the steady-state weights + KV at default context for the
|
|
196
|
+
* whole voice + text bundle running at once:
|
|
197
|
+
*
|
|
198
|
+
* LM + LM KV + drafter + TTS (omnivoice base + tokenizer or kokoro-q8) +
|
|
199
|
+
* ASR + ASR mmproj + embedding + VAD + wake-word + turn-detector +
|
|
200
|
+
* emotion classifier + speaker encoder.
|
|
201
|
+
*
|
|
202
|
+
* The `transientTtsBufferMb` field is the OmniVoice MaskGIT decode peak
|
|
203
|
+
* (~1.17 GB measured on Metal). Backends that don't run OmniVoice locally
|
|
204
|
+
* (kokoro-only, cloud TTS) have a much smaller transient — kept at 100 MB
|
|
205
|
+
* to leave room for kokoro's ONNX compute path. Mobile defaults to no
|
|
206
|
+
* local TTS, so transient = 0.
|
|
207
|
+
*
|
|
208
|
+
* The figures are MEASURED on-disk (Q4_K_M GGUFs in
|
|
209
|
+
* `<stateDir>/local-inference/models/eliza-1-{0_8b,2b}.bundle/`) plus
|
|
210
|
+
* model-card sizes for VAD, wake-word, turn-detector, emotion, speaker-id.
|
|
211
|
+
* See R9 §2.1 + §2.2 + §2.3 for the per-component breakdown.
|
|
212
|
+
*/
|
|
213
|
+
export interface VoiceEnsembleBudget {
|
|
214
|
+
readonly tierSlot: VoiceTierSlot;
|
|
215
|
+
readonly lmMb: number;
|
|
216
|
+
readonly lmKvMb: number;
|
|
217
|
+
readonly drafterMb: number;
|
|
218
|
+
readonly ttsMb: number;
|
|
219
|
+
readonly asrMb: number;
|
|
220
|
+
readonly asrMmprojMb: number;
|
|
221
|
+
readonly embeddingMb: number;
|
|
222
|
+
readonly vadMb: number;
|
|
223
|
+
readonly wakeWordMb: number;
|
|
224
|
+
readonly turnDetectorMb: number;
|
|
225
|
+
readonly emotionMb: number;
|
|
226
|
+
readonly speakerEncoderMb: number;
|
|
227
|
+
readonly transientTtsBufferMb: number;
|
|
228
|
+
/** Sum of weights + KV (steady-state). Excludes transient TTS buffer. */
|
|
229
|
+
readonly steadyStateMb: number;
|
|
230
|
+
/** Sum of steady-state + transient TTS peak. */
|
|
231
|
+
readonly peakMb: number;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* The voice ensemble's LM tier slot. We key the table off the LM size +
|
|
236
|
+
* the surrounding voice profile (mobile-cloud vs desktop-omnivoice) since
|
|
237
|
+
* the largest co-resident knob is the LM itself.
|
|
238
|
+
*/
|
|
239
|
+
export type VoiceTierSlot =
|
|
240
|
+
| "mobile-0_8b" // mobile profile: kokoro-q8 + turnsense + ASR-0.6B + LM-0.8B, no embedding
|
|
241
|
+
| "desktop-0_8b" // desktop profile: omnivoice + livekit-turn + ASR-0.6B + LM-0.8B
|
|
242
|
+
| "desktop-2b" // 2b LM + full voice stack + embedding
|
|
243
|
+
| "desktop-4b" // 4b LM + full voice stack + embedding
|
|
244
|
+
| "workstation-9b" // 9b LM + omnivoice-Q8 + ASR-0.6B + embedding
|
|
245
|
+
| "workstation-27b"; // 27b LM + omnivoice-Q8 + ASR-1.7B + embedding
|
|
246
|
+
|
|
247
|
+
const _MB = 1; // alias for readability inside the table
|
|
248
|
+
const _GB = 1024;
|
|
249
|
+
|
|
250
|
+
/** R9 §2.3 — measured co-resident bundle for every supported tier slot. */
|
|
251
|
+
export const VOICE_ENSEMBLE_BUDGETS: Readonly<
|
|
252
|
+
Record<VoiceTierSlot, VoiceEnsembleBudget>
|
|
253
|
+
> = {
|
|
254
|
+
"mobile-0_8b": buildEnsemble({
|
|
255
|
+
tierSlot: "mobile-0_8b",
|
|
256
|
+
lmMb: 0.5 * _GB,
|
|
257
|
+
lmKvMb: 0.044 * _GB,
|
|
258
|
+
drafterMb: 0.31 * _GB,
|
|
259
|
+
ttsMb: 0.08 * _GB, // kokoro-q8 ONNX
|
|
260
|
+
asrMb: 0.4 * _GB, // qwen3-asr-0.6B documented Q4-equiv
|
|
261
|
+
asrMmprojMb: 0.2 * _GB,
|
|
262
|
+
embeddingMb: 0, // pools from LM on the 0.8B tier
|
|
263
|
+
vadMb: 2 * _MB, // silero-vad documented baseline
|
|
264
|
+
wakeWordMb: 4 * _MB,
|
|
265
|
+
turnDetectorMb: 60 * _MB, // turnsense 135M int8 mobile
|
|
266
|
+
emotionMb: 40 * _MB, // wav2small int8 acoustic
|
|
267
|
+
speakerEncoderMb: 10 * _MB, // wespeaker / x-vector int8
|
|
268
|
+
transientTtsBufferMb: 0, // mobile defaults to cloud TTS or kokoro burst
|
|
269
|
+
}),
|
|
270
|
+
"desktop-0_8b": buildEnsemble({
|
|
271
|
+
tierSlot: "desktop-0_8b",
|
|
272
|
+
lmMb: 0.5 * _GB,
|
|
273
|
+
lmKvMb: 0.044 * _GB,
|
|
274
|
+
drafterMb: 0.31 * _GB,
|
|
275
|
+
ttsMb: 0.65 * _GB, // omnivoice base (Q4_K_M = 388.6 MB) + tokenizer (240.8 MB)
|
|
276
|
+
asrMb: 0.4 * _GB,
|
|
277
|
+
asrMmprojMb: 0.2 * _GB,
|
|
278
|
+
embeddingMb: 0,
|
|
279
|
+
vadMb: 2 * _MB,
|
|
280
|
+
wakeWordMb: 4 * _MB,
|
|
281
|
+
turnDetectorMb: 100 * _MB, // livekit/turn-detector v1.2.2-en SmolLM2-135M
|
|
282
|
+
emotionMb: 40 * _MB,
|
|
283
|
+
speakerEncoderMb: 10 * _MB,
|
|
284
|
+
transientTtsBufferMb: 1.17 * _GB, // omnivoice MaskGIT compute peak
|
|
285
|
+
}),
|
|
286
|
+
"desktop-2b": buildEnsemble({
|
|
287
|
+
tierSlot: "desktop-2b",
|
|
288
|
+
lmMb: 1.4 * _GB,
|
|
289
|
+
lmKvMb: 0.075 * _GB,
|
|
290
|
+
drafterMb: 0.5 * _GB,
|
|
291
|
+
ttsMb: 0.65 * _GB,
|
|
292
|
+
asrMb: 0.4 * _GB,
|
|
293
|
+
asrMmprojMb: 0.2 * _GB,
|
|
294
|
+
embeddingMb: 0.4 * _GB, // eliza-1-embedding.gguf 0.6B Q4-ish
|
|
295
|
+
vadMb: 2 * _MB,
|
|
296
|
+
wakeWordMb: 4 * _MB,
|
|
297
|
+
turnDetectorMb: 100 * _MB,
|
|
298
|
+
emotionMb: 40 * _MB,
|
|
299
|
+
speakerEncoderMb: 10 * _MB,
|
|
300
|
+
transientTtsBufferMb: 1.17 * _GB,
|
|
301
|
+
}),
|
|
302
|
+
"desktop-4b": buildEnsemble({
|
|
303
|
+
tierSlot: "desktop-4b",
|
|
304
|
+
lmMb: 2.6 * _GB,
|
|
305
|
+
lmKvMb: 0.3 * _GB,
|
|
306
|
+
drafterMb: 0.7 * _GB,
|
|
307
|
+
ttsMb: 0.65 * _GB,
|
|
308
|
+
asrMb: 0.4 * _GB,
|
|
309
|
+
asrMmprojMb: 0.2 * _GB,
|
|
310
|
+
embeddingMb: 0.4 * _GB,
|
|
311
|
+
vadMb: 2 * _MB,
|
|
312
|
+
wakeWordMb: 4 * _MB,
|
|
313
|
+
turnDetectorMb: 400 * _MB, // livekit/turn-detector v0.4.1-intl Qwen2.5-0.5B
|
|
314
|
+
emotionMb: 40 * _MB,
|
|
315
|
+
speakerEncoderMb: 10 * _MB,
|
|
316
|
+
transientTtsBufferMb: 1.17 * _GB,
|
|
317
|
+
}),
|
|
318
|
+
"workstation-9b": buildEnsemble({
|
|
319
|
+
tierSlot: "workstation-9b",
|
|
320
|
+
lmMb: 5.4 * _GB,
|
|
321
|
+
lmKvMb: 0.56 * _GB,
|
|
322
|
+
drafterMb: 1.4 * _GB,
|
|
323
|
+
ttsMb: 1.28 * _GB, // omnivoice Q8_0 on 9B+ tiers per voiceQuantForTier()
|
|
324
|
+
asrMb: 0.4 * _GB,
|
|
325
|
+
asrMmprojMb: 0.2 * _GB,
|
|
326
|
+
embeddingMb: 0.4 * _GB,
|
|
327
|
+
vadMb: 2 * _MB,
|
|
328
|
+
wakeWordMb: 4 * _MB,
|
|
329
|
+
turnDetectorMb: 400 * _MB,
|
|
330
|
+
emotionMb: 40 * _MB,
|
|
331
|
+
speakerEncoderMb: 10 * _MB,
|
|
332
|
+
transientTtsBufferMb: 1.17 * _GB,
|
|
333
|
+
}),
|
|
334
|
+
"workstation-27b": buildEnsemble({
|
|
335
|
+
tierSlot: "workstation-27b",
|
|
336
|
+
lmMb: 16.8 * _GB,
|
|
337
|
+
lmKvMb: 2.75 * _GB,
|
|
338
|
+
drafterMb: 2.6 * _GB,
|
|
339
|
+
ttsMb: 1.28 * _GB,
|
|
340
|
+
asrMb: 1.1 * _GB, // qwen3-asr-1.7B on the 27B tier
|
|
341
|
+
asrMmprojMb: 0.3 * _GB,
|
|
342
|
+
embeddingMb: 0.4 * _GB,
|
|
343
|
+
vadMb: 2 * _MB,
|
|
344
|
+
wakeWordMb: 4 * _MB,
|
|
345
|
+
turnDetectorMb: 400 * _MB,
|
|
346
|
+
emotionMb: 40 * _MB,
|
|
347
|
+
speakerEncoderMb: 10 * _MB,
|
|
348
|
+
transientTtsBufferMb: 1.17 * _GB,
|
|
349
|
+
}),
|
|
350
|
+
};
|
|
351
|
+
|
|
352
|
+
function buildEnsemble(
|
|
353
|
+
rows: Omit<VoiceEnsembleBudget, "steadyStateMb" | "peakMb">,
|
|
354
|
+
): VoiceEnsembleBudget {
|
|
355
|
+
const steadyStateMb =
|
|
356
|
+
rows.lmMb +
|
|
357
|
+
rows.lmKvMb +
|
|
358
|
+
rows.drafterMb +
|
|
359
|
+
rows.ttsMb +
|
|
360
|
+
rows.asrMb +
|
|
361
|
+
rows.asrMmprojMb +
|
|
362
|
+
rows.embeddingMb +
|
|
363
|
+
rows.vadMb +
|
|
364
|
+
rows.wakeWordMb +
|
|
365
|
+
rows.turnDetectorMb +
|
|
366
|
+
rows.emotionMb +
|
|
367
|
+
rows.speakerEncoderMb;
|
|
368
|
+
return {
|
|
369
|
+
...rows,
|
|
370
|
+
steadyStateMb,
|
|
371
|
+
peakMb: steadyStateMb + rows.transientTtsBufferMb,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Estimate the full voice ensemble's peak resident MB for a tier slot.
|
|
377
|
+
* `assertVoiceBundleFitsHost` consults this against the device's host RAM.
|
|
378
|
+
*/
|
|
379
|
+
export function voiceEnsemblePeakMb(slot: VoiceTierSlot): number {
|
|
380
|
+
return VOICE_ENSEMBLE_BUDGETS[slot].peakMb;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/** Sum of weights + KV (steady-state, excludes transient TTS buffer). */
|
|
384
|
+
export function voiceEnsembleSteadyStateMb(slot: VoiceTierSlot): number {
|
|
385
|
+
return VOICE_ENSEMBLE_BUDGETS[slot].steadyStateMb;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Pick the canonical voice-tier slot for an installed text model + device
|
|
390
|
+
* tier. The LM size anchors the slot (`eliza-1-0_8b` → `0_8b`, `2b` → `2b`,
|
|
391
|
+
* …) and the device tier picks `mobile-` vs `desktop-` vs `workstation-`
|
|
392
|
+
* for the voice surrounding it. Mobile always pulls the `mobile-0_8b` slot
|
|
393
|
+
* because the brief defaults mobile to cloud TTS+ASR; only the 0.8B local
|
|
394
|
+
* LM stays available there.
|
|
395
|
+
*/
|
|
396
|
+
export function pickVoiceTierSlot(args: {
|
|
397
|
+
textModelId: string;
|
|
398
|
+
deviceTier: DeviceTier;
|
|
399
|
+
mobile?: boolean;
|
|
400
|
+
}): VoiceTierSlot {
|
|
401
|
+
if (args.mobile) return "mobile-0_8b";
|
|
402
|
+
const id = args.textModelId.toLowerCase();
|
|
403
|
+
if (id.includes("27b")) return "workstation-27b";
|
|
404
|
+
if (id.includes("9b")) return "workstation-9b";
|
|
405
|
+
if (id.includes("4b")) return "desktop-4b";
|
|
406
|
+
if (id.includes("2b") || id.includes("1_7b")) return "desktop-2b";
|
|
407
|
+
// 0.8B / 0.6B / unknown small fall through to desktop-0_8b on non-mobile.
|
|
408
|
+
if (args.deviceTier === "POOR" || args.deviceTier === "OKAY") {
|
|
409
|
+
return "desktop-0_8b";
|
|
410
|
+
}
|
|
411
|
+
return "desktop-0_8b";
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Decision returned by `assertVoiceBundleFitsHost`. Mirrors the shape of
|
|
416
|
+
* `RamFitDecision` in `ram-budget.ts` but at the bundle level.
|
|
417
|
+
*/
|
|
418
|
+
export interface VoiceBundleFitDecision {
|
|
419
|
+
tierSlot: VoiceTierSlot;
|
|
420
|
+
deviceTier: DeviceTier;
|
|
421
|
+
/** Steady-state weights + KV, MB. */
|
|
422
|
+
steadyStateMb: number;
|
|
423
|
+
/** Steady-state + transient TTS peak, MB. */
|
|
424
|
+
peakMb: number;
|
|
425
|
+
/** RAM available to the bundle (host MB - OS reserve). */
|
|
426
|
+
usableMb: number;
|
|
427
|
+
/** True iff `peakMb <= usableMb` AND `steadyStateMb <= usableMb`. */
|
|
428
|
+
fits: boolean;
|
|
429
|
+
/** "fits" when peak fits, "tight" when only steady-state fits, "wontfit"
|
|
430
|
+
* when not even steady-state fits. */
|
|
431
|
+
level: "fits" | "tight" | "wontfit";
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/** Default OS reserve subtracted from the host before the bundle check. */
|
|
435
|
+
export const DEFAULT_VOICE_BUNDLE_RESERVE_MB = 1536;
|
|
436
|
+
|
|
437
|
+
/**
|
|
438
|
+
* Decide whether the whole voice ensemble fits a host. Used by the runtime
|
|
439
|
+
* at voice-session-start to refuse local-voice entry rather than start it
|
|
440
|
+
* and watch `MemoryMonitor` evict the loaders mid-session.
|
|
441
|
+
*
|
|
442
|
+
* `assertVoiceBundleFitsHost` (in `active-model.ts`) wraps this with a
|
|
443
|
+
* typed error. This function returns the raw decision so callers that want
|
|
444
|
+
* to degrade silently can do so. R9 §1.4 spec.
|
|
445
|
+
*/
|
|
446
|
+
export function assessVoiceBundleFits(args: {
|
|
447
|
+
tierSlot: VoiceTierSlot;
|
|
448
|
+
deviceTier: DeviceTier;
|
|
449
|
+
hostRamMb: number;
|
|
450
|
+
reserveMb?: number;
|
|
451
|
+
}): VoiceBundleFitDecision {
|
|
452
|
+
const reserveMb = args.reserveMb ?? DEFAULT_VOICE_BUNDLE_RESERVE_MB;
|
|
453
|
+
const usableMb = Math.max(0, args.hostRamMb - reserveMb);
|
|
454
|
+
const ensemble = VOICE_ENSEMBLE_BUDGETS[args.tierSlot];
|
|
455
|
+
const steadyStateMb = ensemble.steadyStateMb;
|
|
456
|
+
const peakMb = ensemble.peakMb;
|
|
457
|
+
let level: VoiceBundleFitDecision["level"];
|
|
458
|
+
if (usableMb >= peakMb) level = "fits";
|
|
459
|
+
else if (usableMb >= steadyStateMb) level = "tight";
|
|
460
|
+
else level = "wontfit";
|
|
461
|
+
return {
|
|
462
|
+
tierSlot: args.tierSlot,
|
|
463
|
+
deviceTier: args.deviceTier,
|
|
464
|
+
steadyStateMb,
|
|
465
|
+
peakMb,
|
|
466
|
+
usableMb,
|
|
467
|
+
fits: level !== "wontfit",
|
|
468
|
+
level,
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
interface InternalReservation {
|
|
473
|
+
id: string;
|
|
474
|
+
role: ResidentModelRole;
|
|
475
|
+
bytes: number;
|
|
476
|
+
priority: AllocationPriority;
|
|
477
|
+
priorityRank: number;
|
|
478
|
+
released: boolean;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
class VoiceBudgetImpl implements VoiceBudget {
|
|
482
|
+
private readonly _totalBytes: number;
|
|
483
|
+
private readonly _assessment: DeviceTierAssessment;
|
|
484
|
+
private readonly _reservations = new Map<string, InternalReservation>();
|
|
485
|
+
private _usedBytes = 0;
|
|
486
|
+
|
|
487
|
+
constructor(args: {
|
|
488
|
+
totalBytes: number;
|
|
489
|
+
assessment: DeviceTierAssessment;
|
|
490
|
+
}) {
|
|
491
|
+
this._totalBytes = args.totalBytes;
|
|
492
|
+
this._assessment = args.assessment;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
freeBytes(): number {
|
|
496
|
+
return Math.max(0, this._totalBytes - this._usedBytes);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
totalBytes(): number {
|
|
500
|
+
return this._totalBytes;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
tier(): DeviceTier {
|
|
504
|
+
return this._assessment.tier;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
assessment(): DeviceTierAssessment {
|
|
508
|
+
return this._assessment;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
snapshot(): ReadonlyArray<ReservationSnapshot> {
|
|
512
|
+
return Array.from(this._reservations.values())
|
|
513
|
+
.filter((r) => !r.released)
|
|
514
|
+
.sort((a, b) => a.priorityRank - b.priorityRank)
|
|
515
|
+
.map(({ id, role, bytes, priority, priorityRank }) => ({
|
|
516
|
+
id,
|
|
517
|
+
role,
|
|
518
|
+
bytes,
|
|
519
|
+
priority,
|
|
520
|
+
priorityRank,
|
|
521
|
+
}));
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
async reserve(args: {
|
|
525
|
+
modelId: string;
|
|
526
|
+
role: ResidentModelRole;
|
|
527
|
+
bytes: number;
|
|
528
|
+
priority?: AllocationPriority;
|
|
529
|
+
evictHook?: (role: ResidentModelRole, id: string) => Promise<number>;
|
|
530
|
+
}): Promise<BudgetReservation> {
|
|
531
|
+
const priority = args.priority ?? priorityClassForRole(args.role);
|
|
532
|
+
const priorityRank = RESIDENT_ROLE_PRIORITY[args.role];
|
|
533
|
+
const requestedBytes = Math.max(0, Math.floor(args.bytes));
|
|
534
|
+
const requestedPriorityRank = priorityRank;
|
|
535
|
+
|
|
536
|
+
if (requestedBytes > this._totalBytes) {
|
|
537
|
+
throw new BudgetExhaustedError({
|
|
538
|
+
requestedBytes,
|
|
539
|
+
freeBytes: this.freeBytes(),
|
|
540
|
+
totalBytes: this._totalBytes,
|
|
541
|
+
role: args.role,
|
|
542
|
+
priority,
|
|
543
|
+
evictedRoles: [],
|
|
544
|
+
evictionCandidate: null,
|
|
545
|
+
});
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
const evictedRoles: ResidentModelRole[] = [];
|
|
549
|
+
|
|
550
|
+
// Walk evictable reservations in ascending priority (cheapest first)
|
|
551
|
+
// until enough memory fits. We only evict reservations with a STRICTLY
|
|
552
|
+
// LOWER priority rank than the request; equal or higher priority
|
|
553
|
+
// reservations stay put.
|
|
554
|
+
while (this.freeBytes() < requestedBytes) {
|
|
555
|
+
const candidate = this.lowestPriorityEvictableReservation(
|
|
556
|
+
requestedPriorityRank,
|
|
557
|
+
);
|
|
558
|
+
if (!candidate) {
|
|
559
|
+
throw new BudgetExhaustedError({
|
|
560
|
+
requestedBytes,
|
|
561
|
+
freeBytes: this.freeBytes(),
|
|
562
|
+
totalBytes: this._totalBytes,
|
|
563
|
+
role: args.role,
|
|
564
|
+
priority,
|
|
565
|
+
evictedRoles,
|
|
566
|
+
evictionCandidate: null,
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
if (args.evictHook) {
|
|
570
|
+
// Let the caller actually unload the weights. The hook returns the
|
|
571
|
+
// bytes it reclaimed; we still drop the accounting entry by the
|
|
572
|
+
// recorded `bytes` field — partial reclamation is treated as
|
|
573
|
+
// success (the loader, not the allocator, owns the side effect).
|
|
574
|
+
await args.evictHook(candidate.role, candidate.id);
|
|
575
|
+
}
|
|
576
|
+
candidate.released = true;
|
|
577
|
+
this._reservations.delete(candidate.id);
|
|
578
|
+
this._usedBytes = Math.max(0, this._usedBytes - candidate.bytes);
|
|
579
|
+
evictedRoles.push(candidate.role);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
const id = `${args.modelId}#${args.role}#${Date.now().toString(36)}-${Math.random()
|
|
583
|
+
.toString(36)
|
|
584
|
+
.slice(2, 8)}`;
|
|
585
|
+
const entry: InternalReservation = {
|
|
586
|
+
id,
|
|
587
|
+
role: args.role,
|
|
588
|
+
bytes: requestedBytes,
|
|
589
|
+
priority,
|
|
590
|
+
priorityRank,
|
|
591
|
+
released: false,
|
|
592
|
+
};
|
|
593
|
+
this._reservations.set(id, entry);
|
|
594
|
+
this._usedBytes += requestedBytes;
|
|
595
|
+
|
|
596
|
+
const release = (): void => {
|
|
597
|
+
if (entry.released) return;
|
|
598
|
+
entry.released = true;
|
|
599
|
+
this._reservations.delete(id);
|
|
600
|
+
this._usedBytes = Math.max(0, this._usedBytes - entry.bytes);
|
|
601
|
+
};
|
|
602
|
+
|
|
603
|
+
return {
|
|
604
|
+
id,
|
|
605
|
+
role: entry.role,
|
|
606
|
+
bytes: entry.bytes,
|
|
607
|
+
priority: entry.priority,
|
|
608
|
+
priorityRank: entry.priorityRank,
|
|
609
|
+
release,
|
|
610
|
+
};
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
private lowestPriorityEvictableReservation(
|
|
614
|
+
requesterRank: number,
|
|
615
|
+
): InternalReservation | null {
|
|
616
|
+
let cheapest: InternalReservation | null = null;
|
|
617
|
+
for (const entry of this._reservations.values()) {
|
|
618
|
+
if (entry.released) continue;
|
|
619
|
+
if (entry.priorityRank >= requesterRank) continue;
|
|
620
|
+
if (!cheapest || entry.priorityRank < cheapest.priorityRank) {
|
|
621
|
+
cheapest = entry;
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
return cheapest;
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
/** Public factory. */
|
|
629
|
+
export function createVoiceBudget(args: {
|
|
630
|
+
probe: HardwareProbe;
|
|
631
|
+
/** Optional user override for the budget cap, in MB. Default: tier
|
|
632
|
+
* natural total. Clamped to the device's effective model memory. */
|
|
633
|
+
maxRamMb?: number;
|
|
634
|
+
/** Optional pre-computed assessment (avoid double classification). */
|
|
635
|
+
assessment?: DeviceTierAssessment;
|
|
636
|
+
}): VoiceBudget {
|
|
637
|
+
const assessment = args.assessment ?? classifyDeviceTier(args.probe);
|
|
638
|
+
const naturalBytes = defaultTierBudgetBytes(args.probe, assessment.tier);
|
|
639
|
+
let totalBytes = naturalBytes;
|
|
640
|
+
if (typeof args.maxRamMb === "number" && args.maxRamMb > 0) {
|
|
641
|
+
const cap = Math.floor(args.maxRamMb * BYTES_PER_MB);
|
|
642
|
+
totalBytes = Math.min(naturalBytes, cap);
|
|
643
|
+
}
|
|
644
|
+
return new VoiceBudgetImpl({ totalBytes, assessment });
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
/** Test seam — construct a budget with explicit total bytes + assessment. */
|
|
648
|
+
export function createVoiceBudgetForTest(args: {
|
|
649
|
+
totalBytes: number;
|
|
650
|
+
assessment: DeviceTierAssessment;
|
|
651
|
+
}): VoiceBudget {
|
|
652
|
+
return new VoiceBudgetImpl({
|
|
653
|
+
totalBytes: args.totalBytes,
|
|
654
|
+
assessment: args.assessment,
|
|
655
|
+
});
|
|
656
|
+
}
|