@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAM-pressure monitor for the local-inference path (W10 / J2).
|
|
3
|
+
*
|
|
4
|
+
* Polls `os.freemem()` / `os.totalmem()` on an interval. When free RAM
|
|
5
|
+
* crosses a low-water threshold, the monitor walks the
|
|
6
|
+
* `SharedResourceRegistry`'s evictable model roles in *ascending priority*
|
|
7
|
+
* — `vision/mmproj < embedding < vad < ASR < TTS < text-target` —
|
|
8
|
+
* and evicts the cheapest one. Cheap evictions are the voice TTS/ASR weights
|
|
9
|
+
* (`MmapRegionHandle.evictPages()`), the vision projector, and unloading the
|
|
10
|
+
* embedding model. Every eviction is logged (observable) and reversible
|
|
11
|
+
* (roles re-load lazily on next use).
|
|
12
|
+
*
|
|
13
|
+
* The monitor never *loads* anything — it only frees memory. Re-load is the
|
|
14
|
+
* caller's job, on demand. It also never evicts the text target (priority
|
|
15
|
+
* 100) unless it's literally the only resident role and pressure persists,
|
|
16
|
+
* which is the intended "nothing left to give" behaviour.
|
|
17
|
+
*
|
|
18
|
+
* No fallback sludge: when there is nothing to evict and pressure persists,
|
|
19
|
+
* the monitor logs a warning and stops trying for a back-off window — it does
|
|
20
|
+
* not pretend it fixed anything.
|
|
21
|
+
*/
|
|
22
|
+
import type { ResidentModelRole, SharedResourceRegistry } from "./voice/shared-resources";
|
|
23
|
+
/** Minimal structural logger — keeps this module dependency-free. */
|
|
24
|
+
export interface MemoryMonitorLogger {
|
|
25
|
+
debug?(message: string): void;
|
|
26
|
+
info?(message: string): void;
|
|
27
|
+
warn?(message: string): void;
|
|
28
|
+
}
|
|
29
|
+
export interface MemorySample {
|
|
30
|
+
totalMb: number;
|
|
31
|
+
freeMb: number;
|
|
32
|
+
/**
|
|
33
|
+
* Resident-set size in MB of the inference host. On the in-process FFI path
|
|
34
|
+
* this is the current process's RSS (`process.memoryUsage().rss`); the
|
|
35
|
+
* device-bridge path injects a phone-sourced figure. `null` only when no
|
|
36
|
+
* probe could read it.
|
|
37
|
+
*/
|
|
38
|
+
serverRssMb: number | null;
|
|
39
|
+
/** Effective free memory used for the pressure decision (min of OS-free and total-minus-RSS-style headroom). */
|
|
40
|
+
effectiveFreeMb: number;
|
|
41
|
+
/** Free as a fraction of total (0..1), based on `effectiveFreeMb`. */
|
|
42
|
+
freeFraction: number;
|
|
43
|
+
}
|
|
44
|
+
export interface MemoryMonitorConfig {
|
|
45
|
+
/** Poll interval, ms. Default 30 s; min 1 s. */
|
|
46
|
+
intervalMs: number;
|
|
47
|
+
/**
|
|
48
|
+
* Evict when free RAM drops below `max(lowWaterMb, lowWaterFraction*total)`.
|
|
49
|
+
* Defaults: 768 MB / 8% of total.
|
|
50
|
+
*/
|
|
51
|
+
lowWaterMb: number;
|
|
52
|
+
lowWaterFraction: number;
|
|
53
|
+
/**
|
|
54
|
+
* After an eviction, wait this long before the next eviction so the OS
|
|
55
|
+
* has time to reflect the reclaimed pages. Default 5 s.
|
|
56
|
+
*/
|
|
57
|
+
evictionCooldownMs: number;
|
|
58
|
+
/**
|
|
59
|
+
* After "nothing left to evict", back off for this long before warning
|
|
60
|
+
* again. Default 60 s.
|
|
61
|
+
*/
|
|
62
|
+
exhaustedBackoffMs: number;
|
|
63
|
+
}
|
|
64
|
+
export declare function resolveMemoryMonitorConfig(overrides?: Partial<MemoryMonitorConfig>): MemoryMonitorConfig;
|
|
65
|
+
/** Pluggable sources so the monitor stays unit-testable without OS state. */
|
|
66
|
+
export interface MemoryMonitorSources {
|
|
67
|
+
/** OS free/total memory in bytes. Defaults to `os.freemem()/os.totalmem()`. */
|
|
68
|
+
osMemory?: () => {
|
|
69
|
+
freeBytes: number;
|
|
70
|
+
totalBytes: number;
|
|
71
|
+
};
|
|
72
|
+
/** Running external runtime RSS in MB, or null. */
|
|
73
|
+
serverRssMb?: () => Promise<number | null>;
|
|
74
|
+
}
|
|
75
|
+
export interface MemoryPressureAction {
|
|
76
|
+
sample: MemorySample;
|
|
77
|
+
/** What got evicted this tick, if anything. */
|
|
78
|
+
evicted: {
|
|
79
|
+
id: string;
|
|
80
|
+
role: ResidentModelRole;
|
|
81
|
+
estimatedMb: number;
|
|
82
|
+
} | null;
|
|
83
|
+
/** True when pressure was detected but nothing could be evicted. */
|
|
84
|
+
exhausted: boolean;
|
|
85
|
+
}
|
|
86
|
+
export declare class MemoryMonitor {
|
|
87
|
+
private readonly config;
|
|
88
|
+
private readonly registry;
|
|
89
|
+
private readonly log?;
|
|
90
|
+
private readonly osMemory;
|
|
91
|
+
private readonly serverRssMb;
|
|
92
|
+
private timer;
|
|
93
|
+
private ticking;
|
|
94
|
+
private lastEvictionAtMs;
|
|
95
|
+
private exhaustedUntilMs;
|
|
96
|
+
constructor(args: {
|
|
97
|
+
registry: SharedResourceRegistry;
|
|
98
|
+
config?: Partial<MemoryMonitorConfig>;
|
|
99
|
+
logger?: MemoryMonitorLogger;
|
|
100
|
+
sources?: MemoryMonitorSources;
|
|
101
|
+
});
|
|
102
|
+
/** Begin polling. Idempotent. The interval is unref'd so it never holds the process open. */
|
|
103
|
+
start(): void;
|
|
104
|
+
stop(): void;
|
|
105
|
+
/** Whether the polling timer is running. */
|
|
106
|
+
isRunning(): boolean;
|
|
107
|
+
/** Take a memory sample now (no side effects). */
|
|
108
|
+
sample(): Promise<MemorySample>;
|
|
109
|
+
/** Low-water line for the current sample, in MB. */
|
|
110
|
+
private lowWaterMb;
|
|
111
|
+
isUnderPressure(sample: MemorySample): boolean;
|
|
112
|
+
/**
|
|
113
|
+
* One monitor step: sample, and if under pressure (and not in cooldown),
|
|
114
|
+
* evict the lowest-priority resident role. Returns what it did so callers
|
|
115
|
+
* (and tests) can assert. Public so tests don't have to wait on a timer.
|
|
116
|
+
*/
|
|
117
|
+
tick(now?: number): Promise<MemoryPressureAction>;
|
|
118
|
+
}
|
|
119
|
+
//# sourceMappingURL=memory-monitor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"memory-monitor.d.ts","sourceRoot":"","sources":["memory-monitor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,KAAK,EACX,iBAAiB,EACjB,sBAAsB,EACtB,MAAM,0BAA0B,CAAC;AAElC,qEAAqE;AACrE,MAAM,WAAW,mBAAmB;IACnC,KAAK,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,IAAI,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,IAAI,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf;;;;;OAKG;IACH,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gHAAgH;IAChH,eAAe,EAAE,MAAM,CAAC;IACxB,sEAAsE;IACtE,YAAY,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,mBAAmB;IACnC,gDAAgD;IAChD,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,kBAAkB,EAAE,MAAM,CAAC;IAC3B;;;OAGG;IACH,kBAAkB,EAAE,MAAM,CAAC;CAC3B;AA4BD,wBAAgB,0BAA0B,CACzC,SAAS,GAAE,OAAO,CAAC,mBAAmB,CAAM,GAC1C,mBAAmB,CA0BrB;AAED,6EAA6E;AAC7E,MAAM,WAAW,oBAAoB;IACpC,+EAA+E;IAC/E,QAAQ,CAAC,EAAE,MAAM;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAC3D,mDAAmD;IACnD,WAAW,CAAC,EAAE,MAAM,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;CAC3C;AAED,MAAM,WAAW,oBAAoB;IACpC,MAAM,EAAE,YAAY,CAAC;IACrB,+CAA+C;IAC/C,OAAO,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,iBAAiB,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC7E,oEAAoE;IACpE,SAAS,EAAE,OAAO,CAAC;CACnB;AAED,qBAAa,aAAa;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAC7C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAyB;IAClD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAsB;IAC3C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAkD;IAC3E,OAAO,CAAC,QAAQ,CAAC,WAAW,CAA+B;IAC3D,OAAO,CAAC,KAAK,CAA+B;IAC5C,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,gBAAgB,CAAK;IAC7B,OAAO,CAAC,gBAAgB,CAAK;gBAEjB,IAAI,EAAE;QACjB,QAAQ,EAAE,sBAAsB,CAAC;QACjC,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,CAAC;QACtC,MAAM,CAAC,EAAE,mBAAmB,CAAC;QAC7B,OAAO,CAAC,EAAE,oBAAoB,CAAC;KAC/B;IAWD,6FAA6F;IAC7F,KAAK,IAAI,IAAI;IAab,IAAI,IAAI,IAAI;IAOZ,4CAA4C;IAC5C,SAAS,IAAI,OAAO;IAIpB,kDAAkD;IAC5C,MAAM,IAAI,OAAO,CAAC,YAAY,CAAC;IAuBrC,oDAAoD;IACpD,OAAO,CAAC,UAAU;IAOlB,eAAe,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO;IAI9C;;;;OAIG;IACG,IAAI,CAAC,GAAG,GAAE,MAAmB,GAAG,OAAO,CAAC,oBAAoB,CAAC;CAoCnE"}
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { MemoryMonitor } from "./memory-monitor";
|
|
3
|
+
import {
|
|
4
|
+
createEvictableModelRole,
|
|
5
|
+
type ResidentModelRole,
|
|
6
|
+
SharedResourceRegistry,
|
|
7
|
+
} from "./voice/shared-resources";
|
|
8
|
+
|
|
9
|
+
const MB = 1024 * 1024;
|
|
10
|
+
|
|
11
|
+
/** A controllable evictable role for the eviction-order tests. */
|
|
12
|
+
function fakeRole(
|
|
13
|
+
role: ResidentModelRole,
|
|
14
|
+
estimatedMb: number,
|
|
15
|
+
): {
|
|
16
|
+
resource: ReturnType<typeof createEvictableModelRole>;
|
|
17
|
+
evictCount: () => number;
|
|
18
|
+
reload: () => void;
|
|
19
|
+
} {
|
|
20
|
+
let resident = true;
|
|
21
|
+
let evictions = 0;
|
|
22
|
+
const resource = createEvictableModelRole({
|
|
23
|
+
id: `fake:${role}`,
|
|
24
|
+
role,
|
|
25
|
+
estimatedMb,
|
|
26
|
+
isResident: () => resident,
|
|
27
|
+
evict: async () => {
|
|
28
|
+
resident = false;
|
|
29
|
+
evictions += 1;
|
|
30
|
+
},
|
|
31
|
+
});
|
|
32
|
+
return {
|
|
33
|
+
resource,
|
|
34
|
+
evictCount: () => evictions,
|
|
35
|
+
reload: () => {
|
|
36
|
+
resident = true;
|
|
37
|
+
},
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function monitorWithFreeMb(
|
|
42
|
+
registry: SharedResourceRegistry,
|
|
43
|
+
freeMb: number,
|
|
44
|
+
totalMb = 16 * 1024,
|
|
45
|
+
): MemoryMonitor {
|
|
46
|
+
return new MemoryMonitor({
|
|
47
|
+
registry,
|
|
48
|
+
config: { lowWaterMb: 768, lowWaterFraction: 0.08, evictionCooldownMs: 0 },
|
|
49
|
+
sources: {
|
|
50
|
+
osMemory: () => ({ freeBytes: freeMb * MB, totalBytes: totalMb * MB }),
|
|
51
|
+
serverRssMb: async () => null,
|
|
52
|
+
},
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
describe("MemoryMonitor", () => {
|
|
57
|
+
it("does nothing while free RAM is above the low-water line", async () => {
|
|
58
|
+
const registry = new SharedResourceRegistry();
|
|
59
|
+
const text = fakeRole("text-target", 2000);
|
|
60
|
+
registry.acquire(text.resource);
|
|
61
|
+
const monitor = monitorWithFreeMb(registry, 8 * 1024); // plenty free
|
|
62
|
+
const action = await monitor.tick();
|
|
63
|
+
expect(action.evicted).toBeNull();
|
|
64
|
+
expect(action.exhausted).toBe(false);
|
|
65
|
+
expect(text.evictCount()).toBe(0);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("under pressure, evicts the lowest-priority resident role first", async () => {
|
|
69
|
+
const registry = new SharedResourceRegistry();
|
|
70
|
+
const emotion = fakeRole("emotion", 800);
|
|
71
|
+
const tts = fakeRole("tts", 300);
|
|
72
|
+
const text = fakeRole("text-target", 4000);
|
|
73
|
+
// Register out of priority order on purpose.
|
|
74
|
+
registry.acquire(text.resource);
|
|
75
|
+
registry.acquire(tts.resource);
|
|
76
|
+
registry.acquire(emotion.resource);
|
|
77
|
+
const monitor = monitorWithFreeMb(registry, 200); // hard pressure
|
|
78
|
+
|
|
79
|
+
const first = await monitor.tick();
|
|
80
|
+
expect(first.evicted?.role).toBe("emotion");
|
|
81
|
+
expect(emotion.evictCount()).toBe(1);
|
|
82
|
+
expect(tts.evictCount()).toBe(0);
|
|
83
|
+
expect(text.evictCount()).toBe(0);
|
|
84
|
+
|
|
85
|
+
// Still under pressure → next-lowest (tts) goes.
|
|
86
|
+
const second = await monitor.tick();
|
|
87
|
+
expect(second.evicted?.role).toBe("tts");
|
|
88
|
+
expect(tts.evictCount()).toBe(1);
|
|
89
|
+
expect(text.evictCount()).toBe(0);
|
|
90
|
+
|
|
91
|
+
// Then the text target — the last thing to go.
|
|
92
|
+
const third = await monitor.tick();
|
|
93
|
+
expect(third.evicted?.role).toBe("text-target");
|
|
94
|
+
expect(text.evictCount()).toBe(1);
|
|
95
|
+
|
|
96
|
+
// Nothing left.
|
|
97
|
+
const fourth = await monitor.tick();
|
|
98
|
+
expect(fourth.evicted).toBeNull();
|
|
99
|
+
expect(fourth.exhausted).toBe(true);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("re-loaded roles become evictable again on the next pressure tick", async () => {
|
|
103
|
+
const registry = new SharedResourceRegistry();
|
|
104
|
+
const emotion = fakeRole("emotion", 800);
|
|
105
|
+
registry.acquire(emotion.resource);
|
|
106
|
+
const monitor = monitorWithFreeMb(registry, 100);
|
|
107
|
+
|
|
108
|
+
const first = await monitor.tick();
|
|
109
|
+
expect(first.evicted?.role).toBe("emotion");
|
|
110
|
+
expect(emotion.evictCount()).toBe(1);
|
|
111
|
+
|
|
112
|
+
// Nothing else resident → exhausted now.
|
|
113
|
+
const exhausted = await monitor.tick();
|
|
114
|
+
expect(exhausted.evicted).toBeNull();
|
|
115
|
+
|
|
116
|
+
// Caller re-loads the drafter on demand; pressure persists → it can be evicted again.
|
|
117
|
+
emotion.reload();
|
|
118
|
+
const second = await monitor.tick();
|
|
119
|
+
expect(second.evicted?.role).toBe("emotion");
|
|
120
|
+
expect(emotion.evictCount()).toBe(2);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("honours the eviction cooldown between ticks", async () => {
|
|
124
|
+
const registry = new SharedResourceRegistry();
|
|
125
|
+
registry.acquire(fakeRole("emotion", 100).resource);
|
|
126
|
+
registry.acquire(fakeRole("tts", 100).resource);
|
|
127
|
+
const monitor = new MemoryMonitor({
|
|
128
|
+
registry,
|
|
129
|
+
config: {
|
|
130
|
+
lowWaterMb: 768,
|
|
131
|
+
lowWaterFraction: 0,
|
|
132
|
+
evictionCooldownMs: 10_000,
|
|
133
|
+
},
|
|
134
|
+
sources: {
|
|
135
|
+
osMemory: () => ({ freeBytes: 100 * MB, totalBytes: 16 * 1024 * MB }),
|
|
136
|
+
serverRssMb: async () => null,
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
const t0 = 1_000_000;
|
|
140
|
+
const a = await monitor.tick(t0);
|
|
141
|
+
expect(a.evicted?.role).toBe("emotion");
|
|
142
|
+
// Within the cooldown — no further eviction even though still under pressure.
|
|
143
|
+
const b = await monitor.tick(t0 + 5_000);
|
|
144
|
+
expect(b.evicted).toBeNull();
|
|
145
|
+
// After the cooldown — the next role goes.
|
|
146
|
+
const c = await monitor.tick(t0 + 11_000);
|
|
147
|
+
expect(c.evicted?.role).toBe("tts");
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it("treats a huge llama-server RSS as pressure even when OS free looks fine", async () => {
|
|
151
|
+
const registry = new SharedResourceRegistry();
|
|
152
|
+
const emotion = fakeRole("emotion", 800);
|
|
153
|
+
registry.acquire(emotion.resource);
|
|
154
|
+
const monitor = new MemoryMonitor({
|
|
155
|
+
registry,
|
|
156
|
+
config: {
|
|
157
|
+
lowWaterMb: 1024,
|
|
158
|
+
lowWaterFraction: 0.05,
|
|
159
|
+
evictionCooldownMs: 0,
|
|
160
|
+
},
|
|
161
|
+
sources: {
|
|
162
|
+
// OS reports 4 GB free (looks fine), but the server is 15 GB on a 16 GB box.
|
|
163
|
+
osMemory: () => ({
|
|
164
|
+
freeBytes: 4 * 1024 * MB,
|
|
165
|
+
totalBytes: 16 * 1024 * MB,
|
|
166
|
+
}),
|
|
167
|
+
serverRssMb: async () => 15 * 1024,
|
|
168
|
+
},
|
|
169
|
+
});
|
|
170
|
+
const sample = await monitor.sample();
|
|
171
|
+
expect(monitor.isUnderPressure(sample)).toBe(true);
|
|
172
|
+
const action = await monitor.tick();
|
|
173
|
+
expect(action.evicted?.role).toBe("emotion");
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("start()/stop() arm and disarm the polling timer", () => {
|
|
177
|
+
const registry = new SharedResourceRegistry();
|
|
178
|
+
const monitor = monitorWithFreeMb(registry, 8 * 1024);
|
|
179
|
+
expect(monitor.isRunning()).toBe(false);
|
|
180
|
+
monitor.start();
|
|
181
|
+
expect(monitor.isRunning()).toBe(true);
|
|
182
|
+
monitor.start(); // idempotent
|
|
183
|
+
expect(monitor.isRunning()).toBe(true);
|
|
184
|
+
monitor.stop();
|
|
185
|
+
expect(monitor.isRunning()).toBe(false);
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it("defaults serverRssMb to the real in-process RSS on the FFI path", async () => {
|
|
189
|
+
// No `serverRssMb` source injected → the default probe reads
|
|
190
|
+
// `process.memoryUsage().rss`, the in-process FFI host's resident set.
|
|
191
|
+
const registry = new SharedResourceRegistry();
|
|
192
|
+
const monitor = new MemoryMonitor({
|
|
193
|
+
registry,
|
|
194
|
+
config: { lowWaterMb: 768, lowWaterFraction: 0.08 },
|
|
195
|
+
sources: {
|
|
196
|
+
osMemory: () => ({
|
|
197
|
+
freeBytes: 8 * 1024 * MB,
|
|
198
|
+
totalBytes: 16 * 1024 * MB,
|
|
199
|
+
}),
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
const sample = await monitor.sample();
|
|
203
|
+
expect(sample.serverRssMb).not.toBeNull();
|
|
204
|
+
expect(sample.serverRssMb as number).toBeGreaterThan(0);
|
|
205
|
+
// The in-process RSS is bounded by total RAM (sanity, not a fabricated value).
|
|
206
|
+
expect(sample.serverRssMb as number).toBeLessThan(sample.totalMb);
|
|
207
|
+
});
|
|
208
|
+
});
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAM-pressure monitor for the local-inference path (W10 / J2).
|
|
3
|
+
*
|
|
4
|
+
* Polls `os.freemem()` / `os.totalmem()` on an interval. When free RAM
|
|
5
|
+
* crosses a low-water threshold, the monitor walks the
|
|
6
|
+
* `SharedResourceRegistry`'s evictable model roles in *ascending priority*
|
|
7
|
+
* — `vision/mmproj < embedding < vad < ASR < TTS < text-target` —
|
|
8
|
+
* and evicts the cheapest one. Cheap evictions are the voice TTS/ASR weights
|
|
9
|
+
* (`MmapRegionHandle.evictPages()`), the vision projector, and unloading the
|
|
10
|
+
* embedding model. Every eviction is logged (observable) and reversible
|
|
11
|
+
* (roles re-load lazily on next use).
|
|
12
|
+
*
|
|
13
|
+
* The monitor never *loads* anything — it only frees memory. Re-load is the
|
|
14
|
+
* caller's job, on demand. It also never evicts the text target (priority
|
|
15
|
+
* 100) unless it's literally the only resident role and pressure persists,
|
|
16
|
+
* which is the intended "nothing left to give" behaviour.
|
|
17
|
+
*
|
|
18
|
+
* No fallback sludge: when there is nothing to evict and pressure persists,
|
|
19
|
+
* the monitor logs a warning and stops trying for a back-off window — it does
|
|
20
|
+
* not pretend it fixed anything.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import os from "node:os";
|
|
24
|
+
import type {
|
|
25
|
+
ResidentModelRole,
|
|
26
|
+
SharedResourceRegistry,
|
|
27
|
+
} from "./voice/shared-resources";
|
|
28
|
+
|
|
29
|
+
/** Minimal structural logger — keeps this module dependency-free. */
|
|
30
|
+
export interface MemoryMonitorLogger {
|
|
31
|
+
debug?(message: string): void;
|
|
32
|
+
info?(message: string): void;
|
|
33
|
+
warn?(message: string): void;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface MemorySample {
|
|
37
|
+
totalMb: number;
|
|
38
|
+
freeMb: number;
|
|
39
|
+
/**
|
|
40
|
+
* Resident-set size in MB of the inference host. On the in-process FFI path
|
|
41
|
+
* this is the current process's RSS (`process.memoryUsage().rss`); the
|
|
42
|
+
* device-bridge path injects a phone-sourced figure. `null` only when no
|
|
43
|
+
* probe could read it.
|
|
44
|
+
*/
|
|
45
|
+
serverRssMb: number | null;
|
|
46
|
+
/** Effective free memory used for the pressure decision (min of OS-free and total-minus-RSS-style headroom). */
|
|
47
|
+
effectiveFreeMb: number;
|
|
48
|
+
/** Free as a fraction of total (0..1), based on `effectiveFreeMb`. */
|
|
49
|
+
freeFraction: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface MemoryMonitorConfig {
|
|
53
|
+
/** Poll interval, ms. Default 30 s; min 1 s. */
|
|
54
|
+
intervalMs: number;
|
|
55
|
+
/**
|
|
56
|
+
* Evict when free RAM drops below `max(lowWaterMb, lowWaterFraction*total)`.
|
|
57
|
+
* Defaults: 768 MB / 8% of total.
|
|
58
|
+
*/
|
|
59
|
+
lowWaterMb: number;
|
|
60
|
+
lowWaterFraction: number;
|
|
61
|
+
/**
|
|
62
|
+
* After an eviction, wait this long before the next eviction so the OS
|
|
63
|
+
* has time to reflect the reclaimed pages. Default 5 s.
|
|
64
|
+
*/
|
|
65
|
+
evictionCooldownMs: number;
|
|
66
|
+
/**
|
|
67
|
+
* After "nothing left to evict", back off for this long before warning
|
|
68
|
+
* again. Default 60 s.
|
|
69
|
+
*/
|
|
70
|
+
exhaustedBackoffMs: number;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const DEFAULT_CONFIG: MemoryMonitorConfig = {
|
|
74
|
+
intervalMs: 30_000,
|
|
75
|
+
lowWaterMb: 768,
|
|
76
|
+
lowWaterFraction: 0.08,
|
|
77
|
+
evictionCooldownMs: 5_000,
|
|
78
|
+
exhaustedBackoffMs: 60_000,
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const BYTES_PER_MB = 1024 * 1024;
|
|
82
|
+
|
|
83
|
+
function envInt(name: string): number | undefined {
|
|
84
|
+
const raw = process.env[name]?.trim();
|
|
85
|
+
if (!raw) return undefined;
|
|
86
|
+
const parsed = Number.parseInt(raw, 10);
|
|
87
|
+
return Number.isFinite(parsed) && parsed >= 0 ? parsed : undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function envFloat(name: string): number | undefined {
|
|
91
|
+
const raw = process.env[name]?.trim();
|
|
92
|
+
if (!raw) return undefined;
|
|
93
|
+
const parsed = Number.parseFloat(raw);
|
|
94
|
+
return Number.isFinite(parsed) && parsed >= 0 && parsed <= 1
|
|
95
|
+
? parsed
|
|
96
|
+
: undefined;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function resolveMemoryMonitorConfig(
|
|
100
|
+
overrides: Partial<MemoryMonitorConfig> = {},
|
|
101
|
+
): MemoryMonitorConfig {
|
|
102
|
+
const intervalMs = Math.max(
|
|
103
|
+
1_000,
|
|
104
|
+
overrides.intervalMs ??
|
|
105
|
+
envInt("ELIZA_LOCAL_MEMORY_MONITOR_INTERVAL_MS") ??
|
|
106
|
+
DEFAULT_CONFIG.intervalMs,
|
|
107
|
+
);
|
|
108
|
+
return {
|
|
109
|
+
intervalMs,
|
|
110
|
+
lowWaterMb:
|
|
111
|
+
overrides.lowWaterMb ??
|
|
112
|
+
envInt("ELIZA_LOCAL_MEMORY_LOW_WATER_MB") ??
|
|
113
|
+
DEFAULT_CONFIG.lowWaterMb,
|
|
114
|
+
lowWaterFraction:
|
|
115
|
+
overrides.lowWaterFraction ??
|
|
116
|
+
envFloat("ELIZA_LOCAL_MEMORY_LOW_WATER_FRACTION") ??
|
|
117
|
+
DEFAULT_CONFIG.lowWaterFraction,
|
|
118
|
+
evictionCooldownMs: Math.max(
|
|
119
|
+
0,
|
|
120
|
+
overrides.evictionCooldownMs ?? DEFAULT_CONFIG.evictionCooldownMs,
|
|
121
|
+
),
|
|
122
|
+
exhaustedBackoffMs: Math.max(
|
|
123
|
+
0,
|
|
124
|
+
overrides.exhaustedBackoffMs ?? DEFAULT_CONFIG.exhaustedBackoffMs,
|
|
125
|
+
),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** Pluggable sources so the monitor stays unit-testable without OS state. */
|
|
130
|
+
export interface MemoryMonitorSources {
|
|
131
|
+
/** OS free/total memory in bytes. Defaults to `os.freemem()/os.totalmem()`. */
|
|
132
|
+
osMemory?: () => { freeBytes: number; totalBytes: number };
|
|
133
|
+
/** Running external runtime RSS in MB, or null. */
|
|
134
|
+
serverRssMb?: () => Promise<number | null>;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export interface MemoryPressureAction {
|
|
138
|
+
sample: MemorySample;
|
|
139
|
+
/** What got evicted this tick, if anything. */
|
|
140
|
+
evicted: { id: string; role: ResidentModelRole; estimatedMb: number } | null;
|
|
141
|
+
/** True when pressure was detected but nothing could be evicted. */
|
|
142
|
+
exhausted: boolean;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export class MemoryMonitor {
|
|
146
|
+
private readonly config: MemoryMonitorConfig;
|
|
147
|
+
private readonly registry: SharedResourceRegistry;
|
|
148
|
+
private readonly log?: MemoryMonitorLogger;
|
|
149
|
+
private readonly osMemory: () => { freeBytes: number; totalBytes: number };
|
|
150
|
+
private readonly serverRssMb: () => Promise<number | null>;
|
|
151
|
+
private timer: NodeJS.Timeout | null = null;
|
|
152
|
+
private ticking = false;
|
|
153
|
+
private lastEvictionAtMs = 0;
|
|
154
|
+
private exhaustedUntilMs = 0;
|
|
155
|
+
|
|
156
|
+
constructor(args: {
|
|
157
|
+
registry: SharedResourceRegistry;
|
|
158
|
+
config?: Partial<MemoryMonitorConfig>;
|
|
159
|
+
logger?: MemoryMonitorLogger;
|
|
160
|
+
sources?: MemoryMonitorSources;
|
|
161
|
+
}) {
|
|
162
|
+
this.registry = args.registry;
|
|
163
|
+
this.config = resolveMemoryMonitorConfig(args.config);
|
|
164
|
+
this.log = args.logger;
|
|
165
|
+
this.osMemory =
|
|
166
|
+
args.sources?.osMemory ??
|
|
167
|
+
(() => ({ freeBytes: os.freemem(), totalBytes: os.totalmem() }));
|
|
168
|
+
this.serverRssMb =
|
|
169
|
+
args.sources?.serverRssMb ?? (async () => defaultServerRssMb());
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Begin polling. Idempotent. The interval is unref'd so it never holds the process open. */
|
|
173
|
+
start(): void {
|
|
174
|
+
if (this.timer) return;
|
|
175
|
+
const timer = setInterval(() => {
|
|
176
|
+
void this.tick().catch((err) => {
|
|
177
|
+
this.log?.warn?.(
|
|
178
|
+
`[MemoryMonitor] tick failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
179
|
+
);
|
|
180
|
+
});
|
|
181
|
+
}, this.config.intervalMs);
|
|
182
|
+
timer.unref();
|
|
183
|
+
this.timer = timer;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
stop(): void {
|
|
187
|
+
if (this.timer) {
|
|
188
|
+
clearInterval(this.timer);
|
|
189
|
+
this.timer = null;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/** Whether the polling timer is running. */
|
|
194
|
+
isRunning(): boolean {
|
|
195
|
+
return this.timer !== null;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/** Take a memory sample now (no side effects). */
|
|
199
|
+
async sample(): Promise<MemorySample> {
|
|
200
|
+
const { freeBytes, totalBytes } = this.osMemory();
|
|
201
|
+
const totalMb = Math.round(totalBytes / BYTES_PER_MB);
|
|
202
|
+
const freeMb = Math.round(freeBytes / BYTES_PER_MB);
|
|
203
|
+
const serverRssMb = await this.serverRssMb().catch(() => null);
|
|
204
|
+
// If the server process is huge relative to total RAM, treat the
|
|
205
|
+
// headroom (total - RSS - what other things need) as a tighter free
|
|
206
|
+
// estimate than the OS free figure alone. We approximate "what other
|
|
207
|
+
// things need" by the configured low-water reserve so this only kicks
|
|
208
|
+
// in when the server itself is the problem.
|
|
209
|
+
const reserveMb = Math.max(
|
|
210
|
+
this.config.lowWaterMb,
|
|
211
|
+
Math.round(totalMb * this.config.lowWaterFraction),
|
|
212
|
+
);
|
|
213
|
+
const serverHeadroomMb =
|
|
214
|
+
serverRssMb !== null
|
|
215
|
+
? totalMb - serverRssMb - reserveMb
|
|
216
|
+
: Number.POSITIVE_INFINITY;
|
|
217
|
+
const effectiveFreeMb = Math.min(freeMb, serverHeadroomMb);
|
|
218
|
+
const freeFraction = totalMb > 0 ? effectiveFreeMb / totalMb : 1;
|
|
219
|
+
return { totalMb, freeMb, serverRssMb, effectiveFreeMb, freeFraction };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/** Low-water line for the current sample, in MB. */
|
|
223
|
+
private lowWaterMb(totalMb: number): number {
|
|
224
|
+
return Math.max(
|
|
225
|
+
this.config.lowWaterMb,
|
|
226
|
+
Math.round(totalMb * this.config.lowWaterFraction),
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
isUnderPressure(sample: MemorySample): boolean {
|
|
231
|
+
return sample.effectiveFreeMb < this.lowWaterMb(sample.totalMb);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* One monitor step: sample, and if under pressure (and not in cooldown),
|
|
236
|
+
* evict the lowest-priority resident role. Returns what it did so callers
|
|
237
|
+
* (and tests) can assert. Public so tests don't have to wait on a timer.
|
|
238
|
+
*/
|
|
239
|
+
async tick(now: number = Date.now()): Promise<MemoryPressureAction> {
|
|
240
|
+
if (this.ticking) {
|
|
241
|
+
const sample = await this.sample();
|
|
242
|
+
return { sample, evicted: null, exhausted: false };
|
|
243
|
+
}
|
|
244
|
+
this.ticking = true;
|
|
245
|
+
try {
|
|
246
|
+
const sample = await this.sample();
|
|
247
|
+
if (!this.isUnderPressure(sample)) {
|
|
248
|
+
this.exhaustedUntilMs = 0;
|
|
249
|
+
return { sample, evicted: null, exhausted: false };
|
|
250
|
+
}
|
|
251
|
+
if (now - this.lastEvictionAtMs < this.config.evictionCooldownMs) {
|
|
252
|
+
return { sample, evicted: null, exhausted: false };
|
|
253
|
+
}
|
|
254
|
+
const evicted = await this.registry.evictLowestPriorityRole();
|
|
255
|
+
if (evicted) {
|
|
256
|
+
this.lastEvictionAtMs = now;
|
|
257
|
+
this.exhaustedUntilMs = 0;
|
|
258
|
+
this.log?.info?.(
|
|
259
|
+
`[MemoryMonitor] RAM pressure (free ${sample.effectiveFreeMb} MB < ${this.lowWaterMb(sample.totalMb)} MB low-water) — evicted ${evicted.role} (~${evicted.estimatedMb} MB)`,
|
|
260
|
+
);
|
|
261
|
+
return { sample, evicted, exhausted: false };
|
|
262
|
+
}
|
|
263
|
+
// Nothing evictable. Warn (back-off so we don't spam the log).
|
|
264
|
+
if (now >= this.exhaustedUntilMs) {
|
|
265
|
+
this.exhaustedUntilMs = now + this.config.exhaustedBackoffMs;
|
|
266
|
+
this.log?.warn?.(
|
|
267
|
+
`[MemoryMonitor] RAM pressure (free ${sample.effectiveFreeMb} MB) but no evictable model role — only the text target is resident. Consider a smaller tier (ELIZA_LOCAL_RAM_HEADROOM_MB / model selection).`,
|
|
268
|
+
);
|
|
269
|
+
}
|
|
270
|
+
return { sample, evicted: null, exhausted: true };
|
|
271
|
+
} finally {
|
|
272
|
+
this.ticking = false;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Default RSS probe for the in-process FFI path.
|
|
279
|
+
*
|
|
280
|
+
* Text inference now runs in-process via FFI llama.cpp, so the inference
|
|
281
|
+
* weights live in *this* process's address space — `process.memoryUsage().rss`
|
|
282
|
+
* is therefore the real resident-set high-water of the inference host, not a
|
|
283
|
+
* separate server to scrape. Returning it (instead of the old `null` stub) gives
|
|
284
|
+
* the monitor a genuine on-device RSS signal on desktop and on a phone running
|
|
285
|
+
* the agent in-process.
|
|
286
|
+
*
|
|
287
|
+
* The device-bridge topology (agent in a container, inference on a paired phone)
|
|
288
|
+
* is the exception: there the container process RSS is *not* the phone's, so
|
|
289
|
+
* that bootstrap injects a device-sourced `serverRssMb` via
|
|
290
|
+
* `MemoryMonitorSources` rather than using this default.
|
|
291
|
+
*/
|
|
292
|
+
async function defaultServerRssMb(): Promise<number | null> {
|
|
293
|
+
const usage = process.memoryUsage?.();
|
|
294
|
+
if (!usage || !Number.isFinite(usage.rss)) return null;
|
|
295
|
+
return usage.rss / (1024 * 1024);
|
|
296
|
+
}
|