@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,1000 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Arbiter — single in-process owner of every model handle (text,
|
|
3
|
+
* embedding, vision-language, ASR, TTS, image generation) for the local
|
|
4
|
+
* inference stack. WS1 deliverable.
|
|
5
|
+
*
|
|
6
|
+
* Why this exists
|
|
7
|
+
* ---------------
|
|
8
|
+
* The current code has every plugin loading independently:
|
|
9
|
+
*
|
|
10
|
+
* - `plugin-local-inference` owns the text + voice GGUFs through
|
|
11
|
+
* `LocalInferenceEngine` + `SharedResourceRegistry`.
|
|
12
|
+
* - `plugin-vision` loads its own TF.js / face-api models with no
|
|
13
|
+
* shared budget.
|
|
14
|
+
* - `plugin-aosp-local-inference` runs the bun:ffi llama.cpp binding
|
|
15
|
+
* in its own world, also with no shared budget.
|
|
16
|
+
*
|
|
17
|
+
* On a 6 GB iPhone or an 8 GB low-tier Android, that means loading a
|
|
18
|
+
* vision model on top of a text model gets the app jetsam'd / lmkd-killed
|
|
19
|
+
* before the planner even runs.
|
|
20
|
+
*
|
|
21
|
+
* The arbiter is the single seam every consumer goes through to acquire
|
|
22
|
+
* a model. It owns the eviction policy across modalities (the existing
|
|
23
|
+
* `ResidentModelRole` priority table + memory-pressure signals from the
|
|
24
|
+
* platform), it owns the queue for capability swaps (a vision-describe
|
|
25
|
+
* arriving while the text model is generating waits its turn rather than
|
|
26
|
+
* triggering a parallel load that OOMs), and it owns the
|
|
27
|
+
* `vision-embedding-cache` so repeat frames don't re-pay the projector.
|
|
28
|
+
*
|
|
29
|
+
* What this module does NOT do
|
|
30
|
+
* ----------------------------
|
|
31
|
+
* - It does not implement any model loader. Loaders are passed in via
|
|
32
|
+
* `registerCapability(...)` by the plugins that own the binding
|
|
33
|
+
* (plugin-local-inference for text/embedding, plugin-vision for
|
|
34
|
+
* vision-describe, plugin-image-gen for diffusion, etc.).
|
|
35
|
+
* - It does not download models, probe hardware, or render UI. Those
|
|
36
|
+
* are the existing `Downloader`, `probeHardware`, and Settings UI
|
|
37
|
+
* concerns.
|
|
38
|
+
* - It does not run on a worker thread. One process, one event loop —
|
|
39
|
+
* the arbiter coordinates async work via promises only.
|
|
40
|
+
*
|
|
41
|
+
* Consumer contract
|
|
42
|
+
* -----------------
|
|
43
|
+
* Capability handlers register themselves at boot:
|
|
44
|
+
*
|
|
45
|
+
* ```ts
|
|
46
|
+
* arbiter.registerCapability({
|
|
47
|
+
* capability: "vision-describe",
|
|
48
|
+
* residentRole: "vision",
|
|
49
|
+
* load: async (modelKey) => loadQwen3VL(modelKey),
|
|
50
|
+
* unload: async (handle) => handle.dispose(),
|
|
51
|
+
* run: async (handle, req) => handle.describe(req.imageBytes),
|
|
52
|
+
* });
|
|
53
|
+
* ```
|
|
54
|
+
*
|
|
55
|
+
* Then anyone can call:
|
|
56
|
+
*
|
|
57
|
+
* ```ts
|
|
58
|
+
* const result = await arbiter.requestVisionDescribe({
|
|
59
|
+
* modelKey: "qwen3-vl-4b",
|
|
60
|
+
* imageBytes: pixels,
|
|
61
|
+
* });
|
|
62
|
+
* ```
|
|
63
|
+
*
|
|
64
|
+
* The arbiter handles:
|
|
65
|
+
* 1. Acquiring (or reusing) the handle for `qwen3-vl-4b`.
|
|
66
|
+
* 2. If a different capability holds the active model and we need to
|
|
67
|
+
* swap, evicting it first.
|
|
68
|
+
* 3. Running the request.
|
|
69
|
+
* 4. Releasing the handle (refcounted; the handle stays loaded until
|
|
70
|
+
* pressure or idle eviction reclaims it).
|
|
71
|
+
*
|
|
72
|
+
* Telemetry
|
|
73
|
+
* ---------
|
|
74
|
+
* The arbiter emits typed events:
|
|
75
|
+
* - `model_load` — a handle came online (capability, modelKey, ms)
|
|
76
|
+
* - `model_unload` — a handle went offline (capability, modelKey, reason)
|
|
77
|
+
* - `memory_pressure` — pressure level changed (level, source, freeMb?)
|
|
78
|
+
* - `eviction` — a role was evicted (capability, modelKey, reason)
|
|
79
|
+
* - `capability_run` — a request completed (capability, modelKey, ms)
|
|
80
|
+
*
|
|
81
|
+
* The runtime observability layer subscribes via `onEvent(...)`.
|
|
82
|
+
*/
|
|
83
|
+
|
|
84
|
+
import type {
|
|
85
|
+
MemoryPressureEvent,
|
|
86
|
+
MemoryPressureLevel,
|
|
87
|
+
MemoryPressureSource,
|
|
88
|
+
} from "./memory-pressure";
|
|
89
|
+
import {
|
|
90
|
+
VisionEmbeddingCache,
|
|
91
|
+
type VisionEmbeddingEntry,
|
|
92
|
+
} from "./vision-embedding-cache";
|
|
93
|
+
import {
|
|
94
|
+
createEvictableModelRole,
|
|
95
|
+
type EvictableModelRole,
|
|
96
|
+
RESIDENT_ROLE_PRIORITY,
|
|
97
|
+
type ResidentModelRole,
|
|
98
|
+
type SharedResourceRegistry,
|
|
99
|
+
} from "./voice/shared-resources";
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Capability identifiers the arbiter routes between. One per consumer
|
|
103
|
+
* surface — keep this list short; new capabilities should be added
|
|
104
|
+
* deliberately, not on a whim.
|
|
105
|
+
*/
|
|
106
|
+
export type ArbiterCapability =
|
|
107
|
+
| "text"
|
|
108
|
+
| "embedding"
|
|
109
|
+
| "vision-describe"
|
|
110
|
+
| "image-gen"
|
|
111
|
+
| "transcribe"
|
|
112
|
+
| "speak";
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Map a capability to the resident-role bucket the existing
|
|
116
|
+
* `SharedResourceRegistry` already tracks. Adding a new capability MUST
|
|
117
|
+
* extend this map so the eviction priority is well-defined.
|
|
118
|
+
*/
|
|
119
|
+
const CAPABILITY_ROLE: Readonly<Record<ArbiterCapability, ResidentModelRole>> =
|
|
120
|
+
{
|
|
121
|
+
text: "text-target",
|
|
122
|
+
embedding: "embedding",
|
|
123
|
+
"vision-describe": "vision",
|
|
124
|
+
// Image-gen has no slot in `ResidentModelRole` today. We park it on
|
|
125
|
+
// `vision` priority so it co-evicts with the VL model — both are
|
|
126
|
+
// GPU-heavy weights with similar lifecycles.
|
|
127
|
+
"image-gen": "vision",
|
|
128
|
+
transcribe: "asr",
|
|
129
|
+
speak: "tts",
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
/** The opaque handle returned by `acquire`. Callers MUST `release` it. */
|
|
133
|
+
export interface ArbiterHandle<TBackend = unknown> {
|
|
134
|
+
readonly capability: ArbiterCapability;
|
|
135
|
+
readonly modelKey: string;
|
|
136
|
+
readonly backend: TBackend;
|
|
137
|
+
/**
|
|
138
|
+
* Increment the refcount so the handle is shared. Returns the same
|
|
139
|
+
* underlying handle. Useful when one consumer hands the handle to
|
|
140
|
+
* another mid-flight.
|
|
141
|
+
*/
|
|
142
|
+
retain(): void;
|
|
143
|
+
/** Decrement the refcount. When it hits zero the role becomes evictable. */
|
|
144
|
+
release(): Promise<void>;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* What a capability handler tells the arbiter about itself. The arbiter
|
|
149
|
+
* uses these to load on demand, run requests, and unload under pressure.
|
|
150
|
+
*/
|
|
151
|
+
export interface CapabilityRegistration<TBackend, TRequest, TResult> {
|
|
152
|
+
capability: ArbiterCapability;
|
|
153
|
+
/**
|
|
154
|
+
* Optional override for the resident-role priority. Defaults to the
|
|
155
|
+
* `CAPABILITY_ROLE` map; pass when a specific binding has different
|
|
156
|
+
* eviction semantics than the default for its capability.
|
|
157
|
+
*/
|
|
158
|
+
residentRole?: ResidentModelRole;
|
|
159
|
+
/**
|
|
160
|
+
* Best-effort estimate of bytes the model occupies in RAM/VRAM once
|
|
161
|
+
* loaded. Used by telemetry only — eviction picks by *priority*, not by
|
|
162
|
+
* size, so a wrong estimate doesn't change behaviour. 0 when unknown.
|
|
163
|
+
*/
|
|
164
|
+
estimatedMb?: number;
|
|
165
|
+
/** Load the backend for a given model key. Called at most once per (capability, modelKey). */
|
|
166
|
+
load: (modelKey: string) => Promise<TBackend>;
|
|
167
|
+
/** Tear the backend down. The arbiter stops referencing it after this resolves. */
|
|
168
|
+
unload: (backend: TBackend) => Promise<void>;
|
|
169
|
+
/** Run one request through the backend. */
|
|
170
|
+
run: (backend: TBackend, request: TRequest) => Promise<TResult>;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
interface ResidentEntry {
|
|
174
|
+
capability: ArbiterCapability;
|
|
175
|
+
modelKey: string;
|
|
176
|
+
backend: unknown;
|
|
177
|
+
residentRole: ResidentModelRole;
|
|
178
|
+
estimatedMb: number;
|
|
179
|
+
refCount: number;
|
|
180
|
+
loadedAtMs: number;
|
|
181
|
+
/**
|
|
182
|
+
* Wall-clock of the most recent `acquire`. Drives the fit-to-budget LRU
|
|
183
|
+
* eviction path (`evictToFit`): when a new load would exceed the usable
|
|
184
|
+
* RAM budget, the least-recently-used evictable entries are dropped first.
|
|
185
|
+
*/
|
|
186
|
+
lastUsedAt: number;
|
|
187
|
+
roleId: string;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/** Telemetry event the runtime observability layer can subscribe to. */
|
|
191
|
+
export type ArbiterEvent =
|
|
192
|
+
| {
|
|
193
|
+
type: "model_load";
|
|
194
|
+
capability: ArbiterCapability;
|
|
195
|
+
modelKey: string;
|
|
196
|
+
loadMs: number;
|
|
197
|
+
atMs: number;
|
|
198
|
+
}
|
|
199
|
+
| {
|
|
200
|
+
type: "model_unload";
|
|
201
|
+
capability: ArbiterCapability;
|
|
202
|
+
modelKey: string;
|
|
203
|
+
reason: "release" | "swap" | "pressure" | "shutdown" | "fit";
|
|
204
|
+
atMs: number;
|
|
205
|
+
}
|
|
206
|
+
| {
|
|
207
|
+
type: "memory_pressure";
|
|
208
|
+
level: MemoryPressureLevel;
|
|
209
|
+
source: string;
|
|
210
|
+
freeMb?: number;
|
|
211
|
+
atMs: number;
|
|
212
|
+
}
|
|
213
|
+
| {
|
|
214
|
+
type: "eviction";
|
|
215
|
+
capability: ArbiterCapability;
|
|
216
|
+
modelKey: string;
|
|
217
|
+
reason: "pressure" | "swap" | "fit";
|
|
218
|
+
estimatedMb: number;
|
|
219
|
+
atMs: number;
|
|
220
|
+
}
|
|
221
|
+
| {
|
|
222
|
+
type: "capability_run";
|
|
223
|
+
capability: ArbiterCapability;
|
|
224
|
+
modelKey: string;
|
|
225
|
+
runMs: number;
|
|
226
|
+
atMs: number;
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
export type ArbiterEventListener = (event: ArbiterEvent) => void;
|
|
230
|
+
|
|
231
|
+
interface QueueEntry<TRequest, TResult> {
|
|
232
|
+
capability: ArbiterCapability;
|
|
233
|
+
modelKey: string;
|
|
234
|
+
request: TRequest;
|
|
235
|
+
resolve: (value: TResult) => void;
|
|
236
|
+
reject: (err: unknown) => void;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
export interface MemoryArbiterOptions {
|
|
240
|
+
registry: SharedResourceRegistry;
|
|
241
|
+
pressureSource?: MemoryPressureSource;
|
|
242
|
+
visionCache?: VisionEmbeddingCache;
|
|
243
|
+
logger?: {
|
|
244
|
+
info?: (m: string) => void;
|
|
245
|
+
warn?: (m: string) => void;
|
|
246
|
+
debug?: (m: string) => void;
|
|
247
|
+
};
|
|
248
|
+
now?: () => number;
|
|
249
|
+
/**
|
|
250
|
+
* Usable RAM budget (MB) for the proactive fit-to-budget LRU eviction
|
|
251
|
+
* path. Before loading a model whose `estimatedMb` would push the sum of
|
|
252
|
+
* resident footprints past this budget, the arbiter evicts the
|
|
253
|
+
* least-recently-used evictable entries (refcount 0, never the text
|
|
254
|
+
* target) until it fits. Return `null` to disable the fit path entirely —
|
|
255
|
+
* the default, since an arbiter with no host-RAM knowledge must not guess.
|
|
256
|
+
* Production wiring passes `os.totalmem()/MB - ramHeadroomReserveMb()`.
|
|
257
|
+
*/
|
|
258
|
+
budgetMb?: () => number | null;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* The arbiter. One instance per process; the plugin owns the singleton
|
|
263
|
+
* (see `index.ts`), and any consumer calls `getMemoryArbiter()` rather
|
|
264
|
+
* than newing one up.
|
|
265
|
+
*/
|
|
266
|
+
export class MemoryArbiter {
|
|
267
|
+
private readonly registry: SharedResourceRegistry;
|
|
268
|
+
private readonly pressureSource: MemoryPressureSource | null;
|
|
269
|
+
private readonly visionCache: VisionEmbeddingCache;
|
|
270
|
+
private readonly log?: MemoryArbiterOptions["logger"];
|
|
271
|
+
private readonly now: () => number;
|
|
272
|
+
private readonly budgetMb: () => number | null;
|
|
273
|
+
|
|
274
|
+
private readonly capabilities = new Map<
|
|
275
|
+
ArbiterCapability,
|
|
276
|
+
CapabilityRegistration<unknown, unknown, unknown>
|
|
277
|
+
>();
|
|
278
|
+
private readonly resident = new Map<string, ResidentEntry>();
|
|
279
|
+
|
|
280
|
+
private readonly listeners = new Set<ArbiterEventListener>();
|
|
281
|
+
private pressureUnsubscribe: (() => void) | null = null;
|
|
282
|
+
private currentPressure: MemoryPressureLevel = "nominal";
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* One serialized in-flight load per (capability, modelKey) so concurrent
|
|
286
|
+
* `requestX` calls share a single load promise instead of triggering
|
|
287
|
+
* duplicate weights into RAM.
|
|
288
|
+
*/
|
|
289
|
+
private readonly inFlightLoads = new Map<string, Promise<ResidentEntry>>();
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Per-capability run queue. The arbiter does NOT serialize across
|
|
293
|
+
* capabilities; what it serializes is the *swap*: when a request needs
|
|
294
|
+
* to evict another resident role first, the ongoing run on that role is
|
|
295
|
+
* allowed to finish, then the swap proceeds. Concurrent runs against the
|
|
296
|
+
* same loaded handle pass through directly.
|
|
297
|
+
*/
|
|
298
|
+
private readonly queues = new Map<
|
|
299
|
+
ArbiterCapability,
|
|
300
|
+
QueueEntry<unknown, unknown>[]
|
|
301
|
+
>();
|
|
302
|
+
private readonly running = new Map<ArbiterCapability, boolean>();
|
|
303
|
+
|
|
304
|
+
private shuttingDown = false;
|
|
305
|
+
|
|
306
|
+
constructor(opts: MemoryArbiterOptions) {
|
|
307
|
+
this.registry = opts.registry;
|
|
308
|
+
this.pressureSource = opts.pressureSource ?? null;
|
|
309
|
+
this.visionCache = opts.visionCache ?? new VisionEmbeddingCache();
|
|
310
|
+
this.log = opts.logger;
|
|
311
|
+
this.now = opts.now ?? (() => Date.now());
|
|
312
|
+
this.budgetMb = opts.budgetMb ?? (() => null);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/** Begin observing memory pressure. Idempotent. */
|
|
316
|
+
start(): void {
|
|
317
|
+
if (this.shuttingDown) {
|
|
318
|
+
throw new Error("[memory-arbiter] cannot start after shutdown");
|
|
319
|
+
}
|
|
320
|
+
if (this.pressureUnsubscribe) return;
|
|
321
|
+
const source = this.pressureSource;
|
|
322
|
+
if (!source) return;
|
|
323
|
+
source.start();
|
|
324
|
+
this.pressureUnsubscribe = source.subscribe((event) => {
|
|
325
|
+
void this.handlePressure(event).catch((err) => {
|
|
326
|
+
this.log?.warn?.(
|
|
327
|
+
`[memory-arbiter] pressure handler failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
328
|
+
);
|
|
329
|
+
});
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/** Stop observing pressure. Does NOT evict resident handles. */
|
|
334
|
+
stop(): void {
|
|
335
|
+
if (this.pressureUnsubscribe) {
|
|
336
|
+
this.pressureUnsubscribe();
|
|
337
|
+
this.pressureUnsubscribe = null;
|
|
338
|
+
}
|
|
339
|
+
this.pressureSource?.stop();
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/** Tear down: stop pressure observation and unload every resident handle. */
|
|
343
|
+
async shutdown(): Promise<void> {
|
|
344
|
+
this.shuttingDown = true;
|
|
345
|
+
this.stop();
|
|
346
|
+
const keys = Array.from(this.resident.keys());
|
|
347
|
+
for (const key of keys) {
|
|
348
|
+
const entry = this.resident.get(key);
|
|
349
|
+
if (!entry) continue;
|
|
350
|
+
await this.evictEntry(entry, "shutdown").catch((err) => {
|
|
351
|
+
this.log?.warn?.(
|
|
352
|
+
`[memory-arbiter] shutdown evict ${key} failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
353
|
+
);
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
this.resident.clear();
|
|
357
|
+
this.inFlightLoads.clear();
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/** Subscribe to telemetry events. Returns the unsubscribe fn. */
|
|
361
|
+
onEvent(listener: ArbiterEventListener): () => void {
|
|
362
|
+
this.listeners.add(listener);
|
|
363
|
+
return () => {
|
|
364
|
+
this.listeners.delete(listener);
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
private emit(event: ArbiterEvent): void {
|
|
369
|
+
for (const listener of this.listeners) {
|
|
370
|
+
try {
|
|
371
|
+
listener(event);
|
|
372
|
+
} catch {
|
|
373
|
+
this.listeners.delete(listener);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/** Register a capability handler. Throws on duplicate registration. */
|
|
379
|
+
registerCapability<TBackend, TRequest, TResult>(
|
|
380
|
+
registration: CapabilityRegistration<TBackend, TRequest, TResult>,
|
|
381
|
+
): void {
|
|
382
|
+
if (this.capabilities.has(registration.capability)) {
|
|
383
|
+
throw new Error(
|
|
384
|
+
`[memory-arbiter] capability "${registration.capability}" is already registered`,
|
|
385
|
+
);
|
|
386
|
+
}
|
|
387
|
+
this.capabilities.set(
|
|
388
|
+
registration.capability,
|
|
389
|
+
registration as unknown as CapabilityRegistration<
|
|
390
|
+
unknown,
|
|
391
|
+
unknown,
|
|
392
|
+
unknown
|
|
393
|
+
>,
|
|
394
|
+
);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/** Whether a capability has been registered. */
|
|
398
|
+
hasCapability(capability: ArbiterCapability): boolean {
|
|
399
|
+
return this.capabilities.has(capability);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
/** Diagnostic snapshot of all resident handles. */
|
|
403
|
+
residentSnapshot(): ReadonlyArray<{
|
|
404
|
+
capability: ArbiterCapability;
|
|
405
|
+
modelKey: string;
|
|
406
|
+
residentRole: ResidentModelRole;
|
|
407
|
+
estimatedMb: number;
|
|
408
|
+
refCount: number;
|
|
409
|
+
loadedAtMs: number;
|
|
410
|
+
lastUsedAt: number;
|
|
411
|
+
}> {
|
|
412
|
+
return Array.from(this.resident.values()).map((e) => ({
|
|
413
|
+
capability: e.capability,
|
|
414
|
+
modelKey: e.modelKey,
|
|
415
|
+
residentRole: e.residentRole,
|
|
416
|
+
estimatedMb: e.estimatedMb,
|
|
417
|
+
refCount: e.refCount,
|
|
418
|
+
loadedAtMs: e.loadedAtMs,
|
|
419
|
+
lastUsedAt: e.lastUsedAt,
|
|
420
|
+
}));
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
currentPressureLevel(): MemoryPressureLevel {
|
|
424
|
+
return this.currentPressure;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Acquire a handle for `(capability, modelKey)`. If the model is already
|
|
429
|
+
* resident the refcount is bumped and we return immediately; otherwise we
|
|
430
|
+
* load it (sharing the in-flight promise across concurrent acquirers).
|
|
431
|
+
*
|
|
432
|
+
* Critical pressure causes acquire to throw for non-text capabilities so
|
|
433
|
+
* we don't load on top of a system the OS has already flagged as in
|
|
434
|
+
* trouble. Text always loads — without text the agent is a brick.
|
|
435
|
+
*/
|
|
436
|
+
async acquire<TBackend>(
|
|
437
|
+
capability: ArbiterCapability,
|
|
438
|
+
modelKey: string,
|
|
439
|
+
): Promise<ArbiterHandle<TBackend>> {
|
|
440
|
+
const registration = this.capabilities.get(capability);
|
|
441
|
+
if (!registration) {
|
|
442
|
+
throw new Error(
|
|
443
|
+
`[memory-arbiter] no capability registered for "${capability}"`,
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
if (this.shuttingDown) {
|
|
447
|
+
throw new Error(
|
|
448
|
+
`[memory-arbiter] arbiter is shutting down; cannot acquire ${capability}`,
|
|
449
|
+
);
|
|
450
|
+
}
|
|
451
|
+
if (this.currentPressure === "critical" && capability !== "text") {
|
|
452
|
+
throw new Error(
|
|
453
|
+
`[memory-arbiter] memory pressure is critical; refusing to load capability "${capability}". Free RAM and retry.`,
|
|
454
|
+
);
|
|
455
|
+
}
|
|
456
|
+
const entry = await this.loadOrReuse(registration, modelKey);
|
|
457
|
+
entry.refCount++;
|
|
458
|
+
entry.lastUsedAt = this.now();
|
|
459
|
+
return this.handleFor<TBackend>(entry);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
private handleFor<TBackend>(entry: ResidentEntry): ArbiterHandle<TBackend> {
|
|
463
|
+
const arbiter = this;
|
|
464
|
+
let released = false;
|
|
465
|
+
return {
|
|
466
|
+
capability: entry.capability,
|
|
467
|
+
modelKey: entry.modelKey,
|
|
468
|
+
backend: entry.backend as TBackend,
|
|
469
|
+
retain(): void {
|
|
470
|
+
if (released) {
|
|
471
|
+
throw new Error(
|
|
472
|
+
`[memory-arbiter] cannot retain ${entry.capability}/${entry.modelKey} after release`,
|
|
473
|
+
);
|
|
474
|
+
}
|
|
475
|
+
entry.refCount++;
|
|
476
|
+
},
|
|
477
|
+
async release(): Promise<void> {
|
|
478
|
+
if (released) return;
|
|
479
|
+
released = true;
|
|
480
|
+
entry.refCount = Math.max(0, entry.refCount - 1);
|
|
481
|
+
// We don't unload at refcount=0; the role becomes evictable, and
|
|
482
|
+
// the pressure / idle path is what reclaims it. Keeps warm-paths
|
|
483
|
+
// fast.
|
|
484
|
+
arbiter.log?.debug?.(
|
|
485
|
+
`[memory-arbiter] release ${entry.capability}/${entry.modelKey} refcount=${entry.refCount}`,
|
|
486
|
+
);
|
|
487
|
+
},
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
private residentKey(capability: ArbiterCapability, modelKey: string): string {
|
|
492
|
+
return `${capability}::${modelKey}`;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
private async loadOrReuse(
|
|
496
|
+
registration: CapabilityRegistration<unknown, unknown, unknown>,
|
|
497
|
+
modelKey: string,
|
|
498
|
+
): Promise<ResidentEntry> {
|
|
499
|
+
const key = this.residentKey(registration.capability, modelKey);
|
|
500
|
+
const existing = this.resident.get(key);
|
|
501
|
+
if (existing) return existing;
|
|
502
|
+
const inFlight = this.inFlightLoads.get(key);
|
|
503
|
+
if (inFlight) return inFlight;
|
|
504
|
+
|
|
505
|
+
// Before loading, decide whether the new role conflicts with what's
|
|
506
|
+
// currently resident. The conservative policy: if the same
|
|
507
|
+
// `residentRole` is held by a different modelKey, we evict the
|
|
508
|
+
// existing one first (one model per role). Different roles can co-
|
|
509
|
+
// exist; the pressure path is what rebalances them.
|
|
510
|
+
const role =
|
|
511
|
+
registration.residentRole ?? CAPABILITY_ROLE[registration.capability];
|
|
512
|
+
const conflicts = this.findConflictingRole(
|
|
513
|
+
role,
|
|
514
|
+
registration.capability,
|
|
515
|
+
modelKey,
|
|
516
|
+
);
|
|
517
|
+
|
|
518
|
+
const promise = (async (): Promise<ResidentEntry> => {
|
|
519
|
+
for (const conflict of conflicts) {
|
|
520
|
+
if (conflict.refCount > 0) {
|
|
521
|
+
// A different consumer is actively using the conflicting model.
|
|
522
|
+
// Wait for it to drain rather than yanking the rug out — the
|
|
523
|
+
// arbiter does NOT cancel in-flight work for a swap.
|
|
524
|
+
await this.waitForRefcountZero(conflict);
|
|
525
|
+
}
|
|
526
|
+
await this.evictEntry(conflict, "swap");
|
|
527
|
+
}
|
|
528
|
+
// Proactively make room for the incoming weights: evict the
|
|
529
|
+
// least-recently-used evictable models until this one fits the
|
|
530
|
+
// usable RAM budget. No-op when no budget is configured or the
|
|
531
|
+
// incoming footprint is unknown.
|
|
532
|
+
await this.evictToFit(registration.estimatedMb ?? 0);
|
|
533
|
+
const startMs = this.now();
|
|
534
|
+
const backend = await registration.load(modelKey);
|
|
535
|
+
const loadedAtMs = this.now();
|
|
536
|
+
const entry: ResidentEntry = {
|
|
537
|
+
capability: registration.capability,
|
|
538
|
+
modelKey,
|
|
539
|
+
backend,
|
|
540
|
+
residentRole: role,
|
|
541
|
+
estimatedMb: registration.estimatedMb ?? 0,
|
|
542
|
+
refCount: 0,
|
|
543
|
+
loadedAtMs,
|
|
544
|
+
lastUsedAt: loadedAtMs,
|
|
545
|
+
roleId: `arbiter:${registration.capability}:${modelKey}`,
|
|
546
|
+
};
|
|
547
|
+
const evictable = this.makeEvictable(entry, registration);
|
|
548
|
+
this.registry.acquire(evictable);
|
|
549
|
+
this.resident.set(key, entry);
|
|
550
|
+
this.emit({
|
|
551
|
+
type: "model_load",
|
|
552
|
+
capability: registration.capability,
|
|
553
|
+
modelKey,
|
|
554
|
+
loadMs: loadedAtMs - startMs,
|
|
555
|
+
atMs: loadedAtMs,
|
|
556
|
+
});
|
|
557
|
+
this.log?.info?.(
|
|
558
|
+
`[memory-arbiter] loaded ${registration.capability}/${modelKey} in ${loadedAtMs - startMs}ms`,
|
|
559
|
+
);
|
|
560
|
+
return entry;
|
|
561
|
+
})().finally(() => {
|
|
562
|
+
this.inFlightLoads.delete(key);
|
|
563
|
+
});
|
|
564
|
+
this.inFlightLoads.set(key, promise);
|
|
565
|
+
return promise;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
private findConflictingRole(
|
|
569
|
+
role: ResidentModelRole,
|
|
570
|
+
capability: ArbiterCapability,
|
|
571
|
+
modelKey: string,
|
|
572
|
+
): ResidentEntry[] {
|
|
573
|
+
const out: ResidentEntry[] = [];
|
|
574
|
+
for (const entry of this.resident.values()) {
|
|
575
|
+
if (entry.residentRole !== role) continue;
|
|
576
|
+
if (entry.capability === capability && entry.modelKey === modelKey)
|
|
577
|
+
continue;
|
|
578
|
+
out.push(entry);
|
|
579
|
+
}
|
|
580
|
+
return out;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
private async waitForRefcountZero(entry: ResidentEntry): Promise<void> {
|
|
584
|
+
// Cooperative wait — the arbiter doesn't have a per-entry condvar, so
|
|
585
|
+
// we poll on a microtask cadence. Refcount drops happen synchronously
|
|
586
|
+
// inside `release()`, so this terminates within at most one extra
|
|
587
|
+
// run-to-completion cycle when the holder has already released.
|
|
588
|
+
const start = this.now();
|
|
589
|
+
while (entry.refCount > 0) {
|
|
590
|
+
await new Promise<void>((resolve) => setTimeout(resolve, 0));
|
|
591
|
+
if (this.now() - start > 10_000) {
|
|
592
|
+
throw new Error(
|
|
593
|
+
`[memory-arbiter] timeout waiting for ${entry.capability}/${entry.modelKey} to drain (refcount=${entry.refCount}); refusing to swap mid-flight`,
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
private makeEvictable(
|
|
600
|
+
entry: ResidentEntry,
|
|
601
|
+
registration: CapabilityRegistration<unknown, unknown, unknown>,
|
|
602
|
+
): EvictableModelRole {
|
|
603
|
+
return createEvictableModelRole({
|
|
604
|
+
id: entry.roleId,
|
|
605
|
+
role: entry.residentRole,
|
|
606
|
+
evictionPriority: RESIDENT_ROLE_PRIORITY[entry.residentRole],
|
|
607
|
+
estimatedMb: entry.estimatedMb,
|
|
608
|
+
isResident: () =>
|
|
609
|
+
this.resident.has(this.residentKey(entry.capability, entry.modelKey)),
|
|
610
|
+
evict: async () => {
|
|
611
|
+
// The shared registry's monitor calls this. We must be careful not
|
|
612
|
+
// to evict a handle that's actively in use; refcount > 0 means
|
|
613
|
+
// "someone is holding it" and we leave it alone — the registry
|
|
614
|
+
// will pick the next-priority role.
|
|
615
|
+
if (entry.refCount > 0) return;
|
|
616
|
+
await this.evictEntry(entry, "pressure", registration);
|
|
617
|
+
},
|
|
618
|
+
});
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
private async evictEntry(
|
|
622
|
+
entry: ResidentEntry,
|
|
623
|
+
reason: "release" | "swap" | "pressure" | "shutdown" | "fit",
|
|
624
|
+
registration?: CapabilityRegistration<unknown, unknown, unknown>,
|
|
625
|
+
): Promise<void> {
|
|
626
|
+
const key = this.residentKey(entry.capability, entry.modelKey);
|
|
627
|
+
if (!this.resident.has(key)) return;
|
|
628
|
+
this.resident.delete(key);
|
|
629
|
+
try {
|
|
630
|
+
await this.registry.release(entry.roleId);
|
|
631
|
+
} catch (err) {
|
|
632
|
+
this.log?.warn?.(
|
|
633
|
+
`[memory-arbiter] registry release failed for ${entry.roleId}: ${err instanceof Error ? err.message : String(err)}`,
|
|
634
|
+
);
|
|
635
|
+
}
|
|
636
|
+
const reg = registration ?? this.capabilities.get(entry.capability);
|
|
637
|
+
try {
|
|
638
|
+
await reg?.unload(entry.backend);
|
|
639
|
+
} catch (err) {
|
|
640
|
+
this.log?.warn?.(
|
|
641
|
+
`[memory-arbiter] unload failed for ${entry.capability}/${entry.modelKey}: ${err instanceof Error ? err.message : String(err)}`,
|
|
642
|
+
);
|
|
643
|
+
}
|
|
644
|
+
this.emit({
|
|
645
|
+
type: "model_unload",
|
|
646
|
+
capability: entry.capability,
|
|
647
|
+
modelKey: entry.modelKey,
|
|
648
|
+
reason,
|
|
649
|
+
atMs: this.now(),
|
|
650
|
+
});
|
|
651
|
+
if (reason === "pressure" || reason === "swap" || reason === "fit") {
|
|
652
|
+
this.emit({
|
|
653
|
+
type: "eviction",
|
|
654
|
+
capability: entry.capability,
|
|
655
|
+
modelKey: entry.modelKey,
|
|
656
|
+
reason,
|
|
657
|
+
estimatedMb: entry.estimatedMb,
|
|
658
|
+
atMs: this.now(),
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
this.log?.info?.(
|
|
662
|
+
`[memory-arbiter] evicted ${entry.capability}/${entry.modelKey} reason=${reason}`,
|
|
663
|
+
);
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
/**
|
|
667
|
+
* Proactive fit-to-budget eviction. Before loading a model needing
|
|
668
|
+
* `incomingMb`, evict the least-recently-used evictable residents until
|
|
669
|
+
* the projected resident footprint fits `budgetMb()`.
|
|
670
|
+
*
|
|
671
|
+
* Policy:
|
|
672
|
+
* - Disabled when no budget is configured (`budgetMb()` → null/≤0) or
|
|
673
|
+
* the incoming footprint is unknown (`incomingMb` ≤ 0): we never guess.
|
|
674
|
+
* - Pins: the text target is never evicted (losing it bricks the agent),
|
|
675
|
+
* and any entry with a live refcount is left alone (in active use).
|
|
676
|
+
* - Ordering is pure LRU (oldest `lastUsedAt` first); ties break toward
|
|
677
|
+
* the lower-priority role, then the older load.
|
|
678
|
+
* - Best-effort: if the pins can't be freed enough, the load still
|
|
679
|
+
* proceeds — the OS-pressure path and the `active-model` admission gate
|
|
680
|
+
* are the backstops; this path only avoids predictable overcommit.
|
|
681
|
+
*/
|
|
682
|
+
private async evictToFit(incomingMb: number): Promise<void> {
|
|
683
|
+
const budget = this.budgetMb();
|
|
684
|
+
if (budget === null || budget <= 0) return;
|
|
685
|
+
if (incomingMb <= 0) return;
|
|
686
|
+
|
|
687
|
+
const residentMb = (): number => {
|
|
688
|
+
let sum = 0;
|
|
689
|
+
for (const e of this.resident.values()) sum += e.estimatedMb;
|
|
690
|
+
return sum;
|
|
691
|
+
};
|
|
692
|
+
|
|
693
|
+
while (residentMb() + incomingMb > budget) {
|
|
694
|
+
const candidate = this.lruEvictionCandidate();
|
|
695
|
+
if (!candidate) break;
|
|
696
|
+
await this.evictEntry(candidate, "fit");
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* The next entry the fit path should drop: least-recently-used among
|
|
702
|
+
* evictable residents (refcount 0, not the text target). Returns null when
|
|
703
|
+
* nothing is evictable.
|
|
704
|
+
*/
|
|
705
|
+
private lruEvictionCandidate(): ResidentEntry | null {
|
|
706
|
+
let best: ResidentEntry | null = null;
|
|
707
|
+
for (const entry of this.resident.values()) {
|
|
708
|
+
if (entry.refCount > 0) continue;
|
|
709
|
+
if (entry.residentRole === "text-target") continue;
|
|
710
|
+
if (best === null) {
|
|
711
|
+
best = entry;
|
|
712
|
+
continue;
|
|
713
|
+
}
|
|
714
|
+
if (entry.lastUsedAt !== best.lastUsedAt) {
|
|
715
|
+
if (entry.lastUsedAt < best.lastUsedAt) best = entry;
|
|
716
|
+
continue;
|
|
717
|
+
}
|
|
718
|
+
const pa = RESIDENT_ROLE_PRIORITY[entry.residentRole];
|
|
719
|
+
const pb = RESIDENT_ROLE_PRIORITY[best.residentRole];
|
|
720
|
+
if (pa !== pb) {
|
|
721
|
+
if (pa < pb) best = entry;
|
|
722
|
+
continue;
|
|
723
|
+
}
|
|
724
|
+
if (entry.loadedAtMs < best.loadedAtMs) best = entry;
|
|
725
|
+
}
|
|
726
|
+
return best;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
private async handlePressure(event: MemoryPressureEvent): Promise<void> {
|
|
730
|
+
this.currentPressure = event.level;
|
|
731
|
+
this.emit({
|
|
732
|
+
type: "memory_pressure",
|
|
733
|
+
level: event.level,
|
|
734
|
+
source: event.source,
|
|
735
|
+
...(event.freeMb !== undefined ? { freeMb: event.freeMb } : {}),
|
|
736
|
+
atMs: event.atMs,
|
|
737
|
+
});
|
|
738
|
+
if (event.level === "nominal") {
|
|
739
|
+
return;
|
|
740
|
+
}
|
|
741
|
+
// Cheap reclaim first: drop any expired vision-embedding cache entries.
|
|
742
|
+
const purged = this.visionCache.purgeExpired(this.now());
|
|
743
|
+
if (purged > 0) {
|
|
744
|
+
this.log?.debug?.(
|
|
745
|
+
`[memory-arbiter] purged ${purged} expired vision-embedding entries on pressure`,
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
// Then ask the SharedResourceRegistry for the cheapest evictable role.
|
|
749
|
+
// `low`: evict one role per pressure tick (gentle).
|
|
750
|
+
// `critical`: evict every non-text role we own.
|
|
751
|
+
if (event.level === "low") {
|
|
752
|
+
await this.registry.evictLowestPriorityRole();
|
|
753
|
+
return;
|
|
754
|
+
}
|
|
755
|
+
// Critical: walk our resident handles in priority order and evict
|
|
756
|
+
// everything that's not the text-target. We do not evict text — losing
|
|
757
|
+
// it bricks the agent and won't actually rescue an OOM that's already
|
|
758
|
+
// past the critical line.
|
|
759
|
+
const entries = Array.from(this.resident.values())
|
|
760
|
+
.filter((e) => e.residentRole !== "text-target")
|
|
761
|
+
.sort(
|
|
762
|
+
(a, b) =>
|
|
763
|
+
RESIDENT_ROLE_PRIORITY[a.residentRole] -
|
|
764
|
+
RESIDENT_ROLE_PRIORITY[b.residentRole],
|
|
765
|
+
);
|
|
766
|
+
for (const entry of entries) {
|
|
767
|
+
if (entry.refCount > 0) continue;
|
|
768
|
+
await this.evictEntry(entry, "pressure");
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// ---------------------------------------------------------------------
|
|
773
|
+
// Capability-specific request fns. Thin wrappers around the queue —
|
|
774
|
+
// each one calls `enqueueRequest` with its capability tag and the
|
|
775
|
+
// caller's request payload. Plugins call these instead of `acquire`
|
|
776
|
+
// directly when they don't need to keep a long-lived handle.
|
|
777
|
+
// ---------------------------------------------------------------------
|
|
778
|
+
|
|
779
|
+
requestText<TRequest, TResult>(req: {
|
|
780
|
+
modelKey: string;
|
|
781
|
+
payload: TRequest;
|
|
782
|
+
}): Promise<TResult> {
|
|
783
|
+
return this.enqueueRequest("text", req.modelKey, req.payload);
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
requestEmbedding<TRequest, TResult>(req: {
|
|
787
|
+
modelKey: string;
|
|
788
|
+
payload: TRequest;
|
|
789
|
+
}): Promise<TResult> {
|
|
790
|
+
return this.enqueueRequest("embedding", req.modelKey, req.payload);
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
requestVisionDescribe<TRequest, TResult>(req: {
|
|
794
|
+
modelKey: string;
|
|
795
|
+
payload: TRequest;
|
|
796
|
+
}): Promise<TResult> {
|
|
797
|
+
return this.enqueueRequest("vision-describe", req.modelKey, req.payload);
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
requestImageGen<TRequest, TResult>(req: {
|
|
801
|
+
modelKey: string;
|
|
802
|
+
payload: TRequest;
|
|
803
|
+
}): Promise<TResult> {
|
|
804
|
+
return this.enqueueRequest("image-gen", req.modelKey, req.payload);
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
requestTranscribe<TRequest, TResult>(req: {
|
|
808
|
+
modelKey: string;
|
|
809
|
+
payload: TRequest;
|
|
810
|
+
}): Promise<TResult> {
|
|
811
|
+
return this.enqueueRequest("transcribe", req.modelKey, req.payload);
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
requestSpeak<TRequest, TResult>(req: {
|
|
815
|
+
modelKey: string;
|
|
816
|
+
payload: TRequest;
|
|
817
|
+
}): Promise<TResult> {
|
|
818
|
+
return this.enqueueRequest("speak", req.modelKey, req.payload);
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
/**
|
|
822
|
+
* Alias for {@link requestSpeak} that matches the `requestTextToSpeech`
|
|
823
|
+
* naming used by `provider.ts`'s `ModelType.TEXT_TO_SPEECH` handler and
|
|
824
|
+
* by external WS5 callers that don't import the `ArbiterCapability` type.
|
|
825
|
+
* Resolves through the same `"speak"` capability + queue — the two names
|
|
826
|
+
* are interchangeable. Mirrors the `requestVisionDescribe` ergonomic.
|
|
827
|
+
*/
|
|
828
|
+
requestTextToSpeech<TRequest, TResult>(req: {
|
|
829
|
+
modelKey: string;
|
|
830
|
+
payload: TRequest;
|
|
831
|
+
}): Promise<TResult> {
|
|
832
|
+
return this.enqueueRequest("speak", req.modelKey, req.payload);
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
private async enqueueRequest<TRequest, TResult>(
|
|
836
|
+
capability: ArbiterCapability,
|
|
837
|
+
modelKey: string,
|
|
838
|
+
payload: TRequest,
|
|
839
|
+
): Promise<TResult> {
|
|
840
|
+
const reg = this.capabilities.get(capability);
|
|
841
|
+
if (!reg) {
|
|
842
|
+
throw new Error(
|
|
843
|
+
`[memory-arbiter] no capability registered for "${capability}"`,
|
|
844
|
+
);
|
|
845
|
+
}
|
|
846
|
+
return new Promise<TResult>((resolve, reject) => {
|
|
847
|
+
const queue = this.queues.get(capability) ?? [];
|
|
848
|
+
queue.push({
|
|
849
|
+
capability,
|
|
850
|
+
modelKey,
|
|
851
|
+
request: payload,
|
|
852
|
+
resolve: resolve as (value: unknown) => void,
|
|
853
|
+
reject,
|
|
854
|
+
});
|
|
855
|
+
this.queues.set(capability, queue);
|
|
856
|
+
void this.drainQueue(capability).catch((err) => {
|
|
857
|
+
this.log?.warn?.(
|
|
858
|
+
`[memory-arbiter] queue drain failed for ${capability}: ${err instanceof Error ? err.message : String(err)}`,
|
|
859
|
+
);
|
|
860
|
+
});
|
|
861
|
+
});
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
private async drainQueue(capability: ArbiterCapability): Promise<void> {
|
|
865
|
+
if (this.running.get(capability)) return;
|
|
866
|
+
this.running.set(capability, true);
|
|
867
|
+
try {
|
|
868
|
+
const queue = this.queues.get(capability);
|
|
869
|
+
while (queue && queue.length > 0) {
|
|
870
|
+
const next = queue.shift();
|
|
871
|
+
if (!next) break;
|
|
872
|
+
const reg = this.capabilities.get(capability);
|
|
873
|
+
if (!reg) {
|
|
874
|
+
next.reject(
|
|
875
|
+
new Error(
|
|
876
|
+
`[memory-arbiter] capability "${capability}" was deregistered mid-queue`,
|
|
877
|
+
),
|
|
878
|
+
);
|
|
879
|
+
continue;
|
|
880
|
+
}
|
|
881
|
+
try {
|
|
882
|
+
const handle = await this.acquire(capability, next.modelKey);
|
|
883
|
+
const startMs = this.now();
|
|
884
|
+
try {
|
|
885
|
+
const result = await reg.run(handle.backend, next.request);
|
|
886
|
+
const runMs = this.now() - startMs;
|
|
887
|
+
this.emit({
|
|
888
|
+
type: "capability_run",
|
|
889
|
+
capability,
|
|
890
|
+
modelKey: next.modelKey,
|
|
891
|
+
runMs,
|
|
892
|
+
atMs: this.now(),
|
|
893
|
+
});
|
|
894
|
+
next.resolve(result);
|
|
895
|
+
} finally {
|
|
896
|
+
await handle.release();
|
|
897
|
+
}
|
|
898
|
+
} catch (err) {
|
|
899
|
+
next.reject(err);
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
} finally {
|
|
903
|
+
this.running.set(capability, false);
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
// ---------------------------------------------------------------------
|
|
908
|
+
// Vision-embedding cache passthroughs.
|
|
909
|
+
// ---------------------------------------------------------------------
|
|
910
|
+
|
|
911
|
+
getCachedVisionEmbedding(hash: string): VisionEmbeddingEntry | null {
|
|
912
|
+
return this.visionCache.get(hash);
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
setCachedVisionEmbedding(
|
|
916
|
+
hash: string,
|
|
917
|
+
entry: { tokens: Float32Array; tokenCount: number; hiddenSize: number },
|
|
918
|
+
ttlMs?: number,
|
|
919
|
+
): void {
|
|
920
|
+
this.visionCache.set(hash, entry, ttlMs);
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
// ---------------------------------------------------------------------
|
|
924
|
+
// ASR transcript cache passthroughs.
|
|
925
|
+
//
|
|
926
|
+
// Re-transcribing the same audio is a frequent test/dev pattern (the
|
|
927
|
+
// dashboard's "play the WAV back" view, the streaming-audio handler
|
|
928
|
+
// flushing duplicate frames at segment boundaries). The cache is
|
|
929
|
+
// content-hashed by `services/asr/hash.ts`, with a hard cap so a long
|
|
930
|
+
// session can't memory-leak. Default TTL is 1 hour; entries are
|
|
931
|
+
// evicted on touch when stale.
|
|
932
|
+
// ---------------------------------------------------------------------
|
|
933
|
+
|
|
934
|
+
private readonly asrTranscriptCache = new Map<
|
|
935
|
+
string,
|
|
936
|
+
{ text: string; expiresAt: number }
|
|
937
|
+
>();
|
|
938
|
+
private static readonly ASR_TRANSCRIPT_CACHE_MAX = 256;
|
|
939
|
+
private static readonly ASR_TRANSCRIPT_DEFAULT_TTL_MS = 60 * 60 * 1000;
|
|
940
|
+
|
|
941
|
+
getCachedAsrTranscript(
|
|
942
|
+
hash: string,
|
|
943
|
+
): { text: string; live?: boolean } | null {
|
|
944
|
+
const entry = this.asrTranscriptCache.get(hash);
|
|
945
|
+
if (!entry) return null;
|
|
946
|
+
if (entry.expiresAt <= this.now()) {
|
|
947
|
+
this.asrTranscriptCache.delete(hash);
|
|
948
|
+
return null;
|
|
949
|
+
}
|
|
950
|
+
// Touch for LRU-ish ordering on Map iteration.
|
|
951
|
+
this.asrTranscriptCache.delete(hash);
|
|
952
|
+
this.asrTranscriptCache.set(hash, entry);
|
|
953
|
+
return { text: entry.text, live: true };
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
setCachedAsrTranscript(
|
|
957
|
+
hash: string,
|
|
958
|
+
entry: { text: string },
|
|
959
|
+
ttlMs?: number,
|
|
960
|
+
): void {
|
|
961
|
+
const ttl = ttlMs ?? MemoryArbiter.ASR_TRANSCRIPT_DEFAULT_TTL_MS;
|
|
962
|
+
this.asrTranscriptCache.set(hash, {
|
|
963
|
+
text: entry.text,
|
|
964
|
+
expiresAt: this.now() + ttl,
|
|
965
|
+
});
|
|
966
|
+
while (
|
|
967
|
+
this.asrTranscriptCache.size > MemoryArbiter.ASR_TRANSCRIPT_CACHE_MAX
|
|
968
|
+
) {
|
|
969
|
+
const oldest = this.asrTranscriptCache.keys().next().value;
|
|
970
|
+
if (oldest === undefined) break;
|
|
971
|
+
this.asrTranscriptCache.delete(oldest);
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
/**
|
|
977
|
+
* Process-wide singleton accessor. The plugin's `index.ts` calls
|
|
978
|
+
* `setMemoryArbiter` once at boot; consumers call `getMemoryArbiter`.
|
|
979
|
+
* Throws when no arbiter has been configured — the runtime is expected
|
|
980
|
+
* to set one before any consumer touches it.
|
|
981
|
+
*/
|
|
982
|
+
let globalArbiter: MemoryArbiter | null = null;
|
|
983
|
+
|
|
984
|
+
export function setMemoryArbiter(arbiter: MemoryArbiter | null): void {
|
|
985
|
+
globalArbiter = arbiter;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
export function getMemoryArbiter(): MemoryArbiter {
|
|
989
|
+
if (!globalArbiter) {
|
|
990
|
+
throw new Error(
|
|
991
|
+
"[memory-arbiter] no arbiter configured; call setMemoryArbiter() at plugin init",
|
|
992
|
+
);
|
|
993
|
+
}
|
|
994
|
+
return globalArbiter;
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
/** Test/diagnostic — returns the singleton without throwing. */
|
|
998
|
+
export function tryGetMemoryArbiter(): MemoryArbiter | null {
|
|
999
|
+
return globalArbiter;
|
|
1000
|
+
}
|