@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,1213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coordinates which model is currently loaded into the plugin-local-ai
|
|
3
|
+
* runtime. Eliza runs one inference model at a time; switching models
|
|
4
|
+
* unloads the previous one first so we don't double-allocate VRAM.
|
|
5
|
+
*
|
|
6
|
+
* This module *does not* talk to `capacitor-llama` directly. The plugin
|
|
7
|
+
* owns the native binding; we ask it to swap via a small runtime service
|
|
8
|
+
* registered under the name "localInferenceLoader". When the plugin is not
|
|
9
|
+
* enabled, we still track the user's preferred active model so the
|
|
10
|
+
* preference survives enabling the plugin later.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
14
|
+
import {
|
|
15
|
+
dirname as pathDirname,
|
|
16
|
+
join as pathJoin,
|
|
17
|
+
resolve as pathResolve,
|
|
18
|
+
} from "node:path";
|
|
19
|
+
import type { AgentRuntime } from "@elizaos/core";
|
|
20
|
+
import {
|
|
21
|
+
ELIZA_1_PLACEHOLDER_IDS,
|
|
22
|
+
FIRST_RUN_DEFAULT_MODEL_ID,
|
|
23
|
+
findCatalogModel,
|
|
24
|
+
} from "./catalog";
|
|
25
|
+
import { localInferenceEngine } from "./engine";
|
|
26
|
+
import { probeHardware } from "./hardware";
|
|
27
|
+
import type { Eliza1Manifest } from "./manifest";
|
|
28
|
+
import {
|
|
29
|
+
assessRamFit,
|
|
30
|
+
defaultManifestLoader,
|
|
31
|
+
type ManifestLoader,
|
|
32
|
+
pickFittingContextVariant,
|
|
33
|
+
type RamFitOptions,
|
|
34
|
+
} from "./ram-budget";
|
|
35
|
+
import { recommendForFirstRun } from "./recommendation";
|
|
36
|
+
import { touchElizaModel } from "./registry";
|
|
37
|
+
import type {
|
|
38
|
+
ActiveModelState,
|
|
39
|
+
CatalogModel,
|
|
40
|
+
HardwareProbe,
|
|
41
|
+
InstalledModel,
|
|
42
|
+
} from "./types";
|
|
43
|
+
import {
|
|
44
|
+
assessVoiceBundleFits,
|
|
45
|
+
VOICE_ENSEMBLE_BUDGETS,
|
|
46
|
+
type VoiceTierSlot,
|
|
47
|
+
} from "./voice/voice-budget";
|
|
48
|
+
|
|
49
|
+
export type { KvOffloadMode, LocalInferenceLoadArgs } from "./load-args.js";
|
|
50
|
+
export {
|
|
51
|
+
ELIZA_1_PLACEHOLDER_IDS,
|
|
52
|
+
FIRST_RUN_DEFAULT_MODEL_ID,
|
|
53
|
+
recommendForFirstRun,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
import type { KvOffloadMode, LocalInferenceLoadArgs } from "./load-args.js";
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Allow-list for KV cache type strings. The eliza fork of node-llama-cpp
|
|
60
|
+
* (v3.18.1-eliza.3+) extends `GgmlType` with TBQ3_0 (43), TBQ4_0 (44),
|
|
61
|
+
* QJL1_256 (46), Q4_POLAR (47) so the binding accepts the lowercase
|
|
62
|
+
* aliases below. Whether the C++ kernel actually runs depends on the
|
|
63
|
+
* loaded the legacy node-llama-cpp NAPI prebuild (no longer used) binary — the elizaOS/llama.cpp
|
|
64
|
+
* prebuild ships the kernels; upstream's prebuild does not.
|
|
65
|
+
*
|
|
66
|
+
* `validateLocalInferenceLoadArgs({ allowFork: false })` (the route-layer
|
|
67
|
+
* default) still throws on these strings so a UI/API caller can't land
|
|
68
|
+
* the desktop on a kernel that won't run; `allowFork: true` (the AOSP +
|
|
69
|
+
* resolved-args path) lets them through.
|
|
70
|
+
*/
|
|
71
|
+
const FORK_ONLY_KV_CACHE_TYPES = new Set([
|
|
72
|
+
"tbq1_0",
|
|
73
|
+
"tbq2_0",
|
|
74
|
+
"tbq3_0",
|
|
75
|
+
"tbq4_0",
|
|
76
|
+
"tbq3_0_tcq",
|
|
77
|
+
"turbo2",
|
|
78
|
+
"turbo3",
|
|
79
|
+
"turbo4",
|
|
80
|
+
"turbo2_0",
|
|
81
|
+
"turbo3_0",
|
|
82
|
+
"turbo4_0",
|
|
83
|
+
"turbo2_tcq",
|
|
84
|
+
"turbo3_tcq",
|
|
85
|
+
"qjl1_256",
|
|
86
|
+
"qjl1_512",
|
|
87
|
+
"q4_polar",
|
|
88
|
+
]);
|
|
89
|
+
|
|
90
|
+
const STOCK_KV_CACHE_TYPES = new Set([
|
|
91
|
+
"f16",
|
|
92
|
+
"f32",
|
|
93
|
+
"bf16",
|
|
94
|
+
"q4_0",
|
|
95
|
+
"q4_1",
|
|
96
|
+
"q5_0",
|
|
97
|
+
"q5_1",
|
|
98
|
+
"q8_0",
|
|
99
|
+
"q4_k",
|
|
100
|
+
"q5_k",
|
|
101
|
+
"q6_k",
|
|
102
|
+
"q8_k",
|
|
103
|
+
"iq4_nl",
|
|
104
|
+
]);
|
|
105
|
+
|
|
106
|
+
export function isForkOnlyKvCacheType(name: string | undefined): boolean {
|
|
107
|
+
if (!name) return false;
|
|
108
|
+
return FORK_ONLY_KV_CACHE_TYPES.has(name.trim().toLowerCase());
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function isStockKvCacheType(name: string | undefined): boolean {
|
|
112
|
+
if (!name) return false;
|
|
113
|
+
return STOCK_KV_CACHE_TYPES.has(name.trim().toLowerCase());
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Validate per-load overrides against what the in-process backend can
|
|
118
|
+
* honour. The AOSP loader has its own (broader) acceptance set — pass
|
|
119
|
+
* `{ allowFork: true }` to skip the desktop-only restriction.
|
|
120
|
+
*
|
|
121
|
+
* Throws on the first illegal value so the caller (the API route) can
|
|
122
|
+
* surface a 400 with a useful message instead of letting the load slip
|
|
123
|
+
* through and silently degrade to fp16.
|
|
124
|
+
*/
|
|
125
|
+
export function validateLocalInferenceLoadArgs(
|
|
126
|
+
args: Partial<LocalInferenceLoadArgs>,
|
|
127
|
+
options: { allowFork?: boolean } = {},
|
|
128
|
+
): void {
|
|
129
|
+
const allowFork = options.allowFork === true;
|
|
130
|
+
for (const field of ["cacheTypeK", "cacheTypeV"] as const) {
|
|
131
|
+
const value = args[field];
|
|
132
|
+
if (value === undefined) continue;
|
|
133
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
134
|
+
throw new Error(`${field} must be a non-empty string`);
|
|
135
|
+
}
|
|
136
|
+
if (!allowFork && isForkOnlyKvCacheType(value)) {
|
|
137
|
+
throw new Error(
|
|
138
|
+
`${field}="${value}" requires the elizaOS/llama.cpp kernel from the elizaOS fork. The elizaOS/capacitor-llama binding accepts the string at the TS layer, but the upstream @node-llama-cpp/<platform> prebuild does not implement the underlying ggml type. Pass through the AOSP path or load the elizaOS/llama.cpp prebuilt binary. Stock-only types accepted here: ${[...STOCK_KV_CACHE_TYPES].join(", ")}.`,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
if (!allowFork && !isStockKvCacheType(value)) {
|
|
142
|
+
throw new Error(
|
|
143
|
+
`${field}="${value}" is not a recognised KV cache type. Stock builds accept ${[...STOCK_KV_CACHE_TYPES].join(", ")}.`,
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
|
+
if (
|
|
147
|
+
allowFork &&
|
|
148
|
+
!isStockKvCacheType(value) &&
|
|
149
|
+
!isForkOnlyKvCacheType(value)
|
|
150
|
+
) {
|
|
151
|
+
throw new Error(
|
|
152
|
+
`${field}="${value}" is not a recognised KV cache type. Accepted stock types: ${[...STOCK_KV_CACHE_TYPES].join(", ")}. Accepted elizaOS fork types: ${[...FORK_ONLY_KV_CACHE_TYPES].join(", ")}.`,
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (args.contextSize !== undefined) {
|
|
157
|
+
if (
|
|
158
|
+
typeof args.contextSize !== "number" ||
|
|
159
|
+
!Number.isInteger(args.contextSize) ||
|
|
160
|
+
args.contextSize < 256
|
|
161
|
+
) {
|
|
162
|
+
throw new Error(
|
|
163
|
+
`contextSize must be a positive integer >= 256 (got ${String(args.contextSize)})`,
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
if (args.gpuLayers !== undefined) {
|
|
168
|
+
if (
|
|
169
|
+
typeof args.gpuLayers !== "number" ||
|
|
170
|
+
!Number.isInteger(args.gpuLayers) ||
|
|
171
|
+
args.gpuLayers < 0
|
|
172
|
+
) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
`gpuLayers must be a non-negative integer (got ${String(args.gpuLayers)})`,
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (args.kvOffload !== undefined) {
|
|
179
|
+
const v = args.kvOffload;
|
|
180
|
+
if (typeof v === "string") {
|
|
181
|
+
if (v !== "cpu" && v !== "gpu" && v !== "split") {
|
|
182
|
+
throw new Error(
|
|
183
|
+
`kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number } (got "${v}")`,
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
} else if (
|
|
187
|
+
!v ||
|
|
188
|
+
typeof v !== "object" ||
|
|
189
|
+
typeof (v as { gpuLayers?: unknown }).gpuLayers !== "number"
|
|
190
|
+
) {
|
|
191
|
+
throw new Error(
|
|
192
|
+
`kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number }`,
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
for (const field of ["flashAttention", "mmap", "mlock"] as const) {
|
|
197
|
+
const value = args[field];
|
|
198
|
+
if (value === undefined) continue;
|
|
199
|
+
if (typeof value !== "boolean") {
|
|
200
|
+
throw new Error(`${field} must be a boolean`);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export interface LocalInferenceLoader {
|
|
206
|
+
loadModel(args: LocalInferenceLoadArgs): Promise<void>;
|
|
207
|
+
unloadModel(): Promise<void>;
|
|
208
|
+
currentModelPath(): string | null;
|
|
209
|
+
/**
|
|
210
|
+
* Optional generation surface. When a loader implements this, the runtime
|
|
211
|
+
* handler (`ensure-local-inference-handler.ts`) routes TEXT_SMALL /
|
|
212
|
+
* TEXT_LARGE requests through it instead of the standalone engine. Mobile
|
|
213
|
+
* builds populate this via the Capacitor adapter; desktop omits it and falls
|
|
214
|
+
* back to the `LocalInferenceEngine`.
|
|
215
|
+
*/
|
|
216
|
+
generate?(args: {
|
|
217
|
+
prompt: string;
|
|
218
|
+
stopSequences?: string[];
|
|
219
|
+
maxTokens?: number;
|
|
220
|
+
temperature?: number;
|
|
221
|
+
/**
|
|
222
|
+
* Optional `promptCacheKey` from the runtime cache plan. Loaders
|
|
223
|
+
* that implement prefix caching (the in-process llama.cpp FFI slot
|
|
224
|
+
* pool or node-llama-cpp session pool) use this to pin
|
|
225
|
+
* subsequent calls with the same key to the same KV cache slot.
|
|
226
|
+
* Loaders without prefix caching can ignore the field.
|
|
227
|
+
*/
|
|
228
|
+
cacheKey?: string;
|
|
229
|
+
}): Promise<string>;
|
|
230
|
+
/**
|
|
231
|
+
* Optional embedding surface. When a loader implements this, the runtime
|
|
232
|
+
* handler routes `TEXT_EMBEDDING` requests through it. The AOSP bun:ffi
|
|
233
|
+
* loader populates this directly via `llama_get_embeddings_seq`; the
|
|
234
|
+
* device-bridge loader populates it by dispatching an `embed` frame to
|
|
235
|
+
* the connected device. Loaders that cannot embed leave this undefined,
|
|
236
|
+
* and the runtime falls back to its non-local embedding provider chain.
|
|
237
|
+
*/
|
|
238
|
+
embed?(args: { input: string }): Promise<{
|
|
239
|
+
embedding: number[];
|
|
240
|
+
tokens: number;
|
|
241
|
+
}>;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Per-load override fields the caller can set. Subset of `LocalInferenceLoadArgs`
|
|
246
|
+
* minus `modelPath` (which the coordinator owns) and minus speculative
|
|
247
|
+
* fields (which the catalog `runtime.mtp` block owns end-to-end). The
|
|
248
|
+
* route layer accepts this shape on `POST /api/local-inference/active`.
|
|
249
|
+
*/
|
|
250
|
+
export interface LocalInferenceLoadOverrides {
|
|
251
|
+
contextSize?: number;
|
|
252
|
+
cacheTypeK?: string;
|
|
253
|
+
cacheTypeV?: string;
|
|
254
|
+
gpuLayers?: number;
|
|
255
|
+
kvOffload?: KvOffloadMode;
|
|
256
|
+
flashAttention?: boolean;
|
|
257
|
+
mmap?: boolean;
|
|
258
|
+
mlock?: boolean;
|
|
259
|
+
useGpu?: boolean;
|
|
260
|
+
maxThreads?: number;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
interface ResolveLocalInferenceLoadArgsOptions {
|
|
264
|
+
manifestLoader?: ManifestLoader;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function bundleRootForInstalledModel(installed: InstalledModel): string {
|
|
268
|
+
return installed.bundleRoot ?? pathDirname(pathDirname(installed.path));
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function manifestTextContextForInstalledPath(
|
|
272
|
+
installed: InstalledModel,
|
|
273
|
+
manifest: Eliza1Manifest,
|
|
274
|
+
): number | undefined {
|
|
275
|
+
const modelPath = pathResolve(installed.path);
|
|
276
|
+
const bundleRoot = bundleRootForInstalledModel(installed);
|
|
277
|
+
for (const entry of manifest.files.text) {
|
|
278
|
+
if (
|
|
279
|
+
typeof entry.ctx !== "number" ||
|
|
280
|
+
!Number.isInteger(entry.ctx) ||
|
|
281
|
+
entry.ctx < 256
|
|
282
|
+
) {
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
285
|
+
if (pathResolve(bundleRoot, entry.path) === modelPath) {
|
|
286
|
+
return entry.ctx;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return undefined;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function candidateManifestPaths(installed: InstalledModel): string[] {
|
|
293
|
+
const candidates = [
|
|
294
|
+
installed.manifestPath,
|
|
295
|
+
installed.bundleRoot
|
|
296
|
+
? pathJoin(installed.bundleRoot, "eliza-1.manifest.json")
|
|
297
|
+
: undefined,
|
|
298
|
+
pathJoin(pathDirname(pathDirname(installed.path)), "eliza-1.manifest.json"),
|
|
299
|
+
pathJoin(pathDirname(installed.path), "eliza-1.manifest.json"),
|
|
300
|
+
];
|
|
301
|
+
return [...new Set(candidates.filter((p): p is string => Boolean(p)))];
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function readLegacyStagedManifestTextContext(
|
|
305
|
+
installed: InstalledModel,
|
|
306
|
+
): number | undefined {
|
|
307
|
+
if (installed.source !== "eliza-download") return undefined;
|
|
308
|
+
const modelPath = pathResolve(installed.path);
|
|
309
|
+
const bundleRoot = bundleRootForInstalledModel(installed);
|
|
310
|
+
|
|
311
|
+
for (const manifestPath of candidateManifestPaths(installed)) {
|
|
312
|
+
let parsed: unknown;
|
|
313
|
+
try {
|
|
314
|
+
parsed = JSON.parse(readFileSync(manifestPath, "utf8"));
|
|
315
|
+
} catch {
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
318
|
+
if (!parsed || typeof parsed !== "object") continue;
|
|
319
|
+
const raw = parsed as {
|
|
320
|
+
id?: unknown;
|
|
321
|
+
version?: unknown;
|
|
322
|
+
defaultEligible?: unknown;
|
|
323
|
+
files?: { text?: unknown };
|
|
324
|
+
};
|
|
325
|
+
if (typeof raw.id === "string" && raw.id !== installed.id) continue;
|
|
326
|
+
const version = typeof raw.version === "string" ? raw.version : "";
|
|
327
|
+
const stagedOrCandidate =
|
|
328
|
+
raw.defaultEligible === false ||
|
|
329
|
+
/(?:candidate|staged|dev|local)/i.test(version);
|
|
330
|
+
if (!stagedOrCandidate) continue;
|
|
331
|
+
if (!Array.isArray(raw.files?.text)) continue;
|
|
332
|
+
for (const entry of raw.files.text) {
|
|
333
|
+
if (!entry || typeof entry !== "object") continue;
|
|
334
|
+
const file = entry as { path?: unknown; ctx?: unknown };
|
|
335
|
+
if (typeof file.path !== "string") continue;
|
|
336
|
+
if (
|
|
337
|
+
typeof file.ctx !== "number" ||
|
|
338
|
+
!Number.isInteger(file.ctx) ||
|
|
339
|
+
file.ctx < 256
|
|
340
|
+
) {
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
if (pathResolve(bundleRoot, file.path) === modelPath) {
|
|
344
|
+
return file.ctx;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
return undefined;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function installedBundleContextSize(
|
|
352
|
+
installed: InstalledModel,
|
|
353
|
+
manifestLoader: ManifestLoader,
|
|
354
|
+
): number | undefined {
|
|
355
|
+
const manifest = manifestLoader(installed.id, installed);
|
|
356
|
+
if (manifest) {
|
|
357
|
+
const contextSize = manifestTextContextForInstalledPath(
|
|
358
|
+
installed,
|
|
359
|
+
manifest,
|
|
360
|
+
);
|
|
361
|
+
if (contextSize !== undefined) return contextSize;
|
|
362
|
+
}
|
|
363
|
+
return readLegacyStagedManifestTextContext(installed);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function applyCatalogDefaults(
|
|
367
|
+
args: LocalInferenceLoadArgs,
|
|
368
|
+
installed: InstalledModel,
|
|
369
|
+
catalog: CatalogModel | undefined,
|
|
370
|
+
manifestLoader: ManifestLoader,
|
|
371
|
+
): void {
|
|
372
|
+
const runtime = catalog?.runtime;
|
|
373
|
+
|
|
374
|
+
// KV cache types from the catalog runtime block. Per-call overrides
|
|
375
|
+
// take precedence and are merged in afterwards.
|
|
376
|
+
if (runtime?.kvCache?.typeK) args.cacheTypeK = runtime.kvCache.typeK;
|
|
377
|
+
if (runtime?.kvCache?.typeV) args.cacheTypeV = runtime.kvCache.typeV;
|
|
378
|
+
|
|
379
|
+
// Catalog-level model ceiling. Without a per-load override, plumb the
|
|
380
|
+
// model's true `contextLength` so the loader picks an appropriate
|
|
381
|
+
// window instead of falling back to whatever default the binding
|
|
382
|
+
// happens to use ("auto" → smallest fitting, which historically meant
|
|
383
|
+
// 4k or 8k even for 128k-trained models).
|
|
384
|
+
if (args.contextSize === undefined) {
|
|
385
|
+
args.contextSize =
|
|
386
|
+
installedBundleContextSize(installed, manifestLoader) ??
|
|
387
|
+
catalog?.contextLength;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Catalog-declared GPU offload default — only apply when the caller
|
|
391
|
+
// didn't override `gpuLayers`. Numeric `gpuLayers` is the canonical
|
|
392
|
+
// shape; `"auto"` is the loader's default and we don't need to set
|
|
393
|
+
// anything for it.
|
|
394
|
+
if (
|
|
395
|
+
catalog?.gpuLayers !== undefined &&
|
|
396
|
+
typeof catalog.gpuLayers === "number" &&
|
|
397
|
+
args.gpuLayers === undefined
|
|
398
|
+
) {
|
|
399
|
+
args.gpuLayers = catalog.gpuLayers;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// flashAttention default from catalog optimizations block. Per-load
|
|
403
|
+
// overrides win.
|
|
404
|
+
if (
|
|
405
|
+
runtime?.optimizations?.flashAttention !== undefined &&
|
|
406
|
+
args.flashAttention === undefined
|
|
407
|
+
) {
|
|
408
|
+
args.flashAttention = runtime.optimizations.flashAttention;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// mmap / mlock from catalog optimizations. `noMmap === true` means
|
|
412
|
+
// disable mmap explicitly; otherwise leave the loader default.
|
|
413
|
+
if (runtime?.optimizations?.noMmap !== undefined && args.mmap === undefined) {
|
|
414
|
+
args.mmap = !runtime.optimizations.noMmap;
|
|
415
|
+
}
|
|
416
|
+
if (runtime?.optimizations?.mlock !== undefined && args.mlock === undefined) {
|
|
417
|
+
args.mlock = runtime.optimizations.mlock;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function mergeOverrides(
|
|
422
|
+
args: LocalInferenceLoadArgs,
|
|
423
|
+
overrides: LocalInferenceLoadOverrides | undefined,
|
|
424
|
+
): void {
|
|
425
|
+
if (!overrides) return;
|
|
426
|
+
if (overrides.contextSize !== undefined)
|
|
427
|
+
args.contextSize = overrides.contextSize;
|
|
428
|
+
if (overrides.cacheTypeK !== undefined)
|
|
429
|
+
args.cacheTypeK = overrides.cacheTypeK;
|
|
430
|
+
if (overrides.cacheTypeV !== undefined)
|
|
431
|
+
args.cacheTypeV = overrides.cacheTypeV;
|
|
432
|
+
if (overrides.gpuLayers !== undefined) args.gpuLayers = overrides.gpuLayers;
|
|
433
|
+
if (overrides.kvOffload !== undefined) args.kvOffload = overrides.kvOffload;
|
|
434
|
+
if (overrides.flashAttention !== undefined) {
|
|
435
|
+
args.flashAttention = overrides.flashAttention;
|
|
436
|
+
}
|
|
437
|
+
if (overrides.mmap !== undefined) args.mmap = overrides.mmap;
|
|
438
|
+
if (overrides.mlock !== undefined) args.mlock = overrides.mlock;
|
|
439
|
+
if (overrides.useGpu !== undefined) args.useGpu = overrides.useGpu;
|
|
440
|
+
if (overrides.maxThreads !== undefined)
|
|
441
|
+
args.maxThreads = overrides.maxThreads;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Resolve the per-tier mmproj GGUF path for a given installed model when
|
|
446
|
+
* the catalog declares the tier ships a vision projector AND the file is
|
|
447
|
+
* actually on disk under the bundle root.
|
|
448
|
+
*
|
|
449
|
+
* Returns:
|
|
450
|
+
* - the absolute path to the mmproj file when the tier has vision and
|
|
451
|
+
* the file exists.
|
|
452
|
+
* - undefined when the tier has no vision component (text-only bundle)
|
|
453
|
+
* or when the file hasn't been downloaded yet. In the latter case
|
|
454
|
+
* the coordinator emits a one-shot warning; vision capability is
|
|
455
|
+
* unavailable for the session but the text load still succeeds.
|
|
456
|
+
*
|
|
457
|
+
* Path layout: the catalog's `sourceModel.components.vision.file` is the
|
|
458
|
+
* Hugging Face-relative path, e.g. `bundles/2b/vision/mmproj-2b.gguf`.
|
|
459
|
+
* Locally the bundleRoot already represents the per-tier "bundles/<tier>"
|
|
460
|
+
* subtree, so we strip the leading `bundles/<tier>/` segment before
|
|
461
|
+
* joining against the local bundleRoot. When that prefix isn't present
|
|
462
|
+
* (e.g. a custom bundle layout), we fall through to the original path
|
|
463
|
+
* unchanged. Manifest-validated bundles (`bundleRoot` set) are the only
|
|
464
|
+
* path that lands a vision component — external-scan models (LM Studio,
|
|
465
|
+
* Jan) don't.
|
|
466
|
+
*/
|
|
467
|
+
export function resolveMmprojPath(
|
|
468
|
+
installed: InstalledModel,
|
|
469
|
+
catalog: CatalogModel | undefined,
|
|
470
|
+
): string | undefined {
|
|
471
|
+
if (!catalog) return undefined;
|
|
472
|
+
const visionComponent = catalog.sourceModel?.components?.vision;
|
|
473
|
+
if (!visionComponent?.file) return undefined;
|
|
474
|
+
const bundleRoot = installed.bundleRoot;
|
|
475
|
+
if (!bundleRoot) return undefined;
|
|
476
|
+
const local = stripBundlePrefix(visionComponent.file, installed.id);
|
|
477
|
+
const candidate = pathJoin(bundleRoot, local);
|
|
478
|
+
if (!existsSync(candidate)) return undefined;
|
|
479
|
+
return candidate;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
function resolveMtpDrafterPath(
|
|
483
|
+
installed: InstalledModel,
|
|
484
|
+
catalog: CatalogModel | undefined,
|
|
485
|
+
manifestLoader: ManifestLoader,
|
|
486
|
+
): string | undefined {
|
|
487
|
+
const bundleRoot = installed.bundleRoot;
|
|
488
|
+
if (!bundleRoot) return undefined;
|
|
489
|
+
|
|
490
|
+
const manifest = manifestLoader(installed.id, installed);
|
|
491
|
+
for (const entry of manifest?.files.mtp ?? []) {
|
|
492
|
+
const candidate = pathJoin(bundleRoot, entry.path);
|
|
493
|
+
if (existsSync(candidate)) return candidate;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
const catalogFile =
|
|
497
|
+
catalog?.runtime?.mtp?.drafterFile ??
|
|
498
|
+
catalog?.sourceModel?.components?.mtp?.file;
|
|
499
|
+
if (!catalogFile) return undefined;
|
|
500
|
+
const local = stripBundlePrefix(catalogFile, installed.id);
|
|
501
|
+
const candidate = pathJoin(bundleRoot, local);
|
|
502
|
+
if (!existsSync(candidate)) return undefined;
|
|
503
|
+
return candidate;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
/**
|
|
507
|
+
* Strip the `bundles/<tier-slug>/` prefix the catalog uses for HF
|
|
508
|
+
* paths so the remaining string is bundle-root-relative. When the
|
|
509
|
+
* prefix isn't present, return the input unchanged.
|
|
510
|
+
*/
|
|
511
|
+
function stripBundlePrefix(catalogFile: string, modelId: string): string {
|
|
512
|
+
const slug = modelId.startsWith("eliza-1-")
|
|
513
|
+
? modelId.slice("eliza-1-".length)
|
|
514
|
+
: modelId;
|
|
515
|
+
const prefix = `bundles/${slug}/`;
|
|
516
|
+
if (catalogFile.startsWith(prefix)) {
|
|
517
|
+
return catalogFile.slice(prefix.length);
|
|
518
|
+
}
|
|
519
|
+
return catalogFile;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
const DEFAULT_MOBILE_CONTEXT_CEILING = 8192;
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Whether this on-device inference runtime is a memory-constrained mobile
|
|
526
|
+
* platform (iOS/Android). The agent runs inside the embedded engine and the
|
|
527
|
+
* host injects the platform marker into the process env at start; desktop and
|
|
528
|
+
* server have no marker, so they keep the full catalog context ceiling.
|
|
529
|
+
*/
|
|
530
|
+
function isMobileLocalInferenceRuntime(): boolean {
|
|
531
|
+
if (typeof process === "undefined" || !process.env) return false;
|
|
532
|
+
const platform = (
|
|
533
|
+
process.env.ELIZA_MOBILE_PLATFORM ||
|
|
534
|
+
process.env.ELIZA_PLATFORM ||
|
|
535
|
+
""
|
|
536
|
+
)
|
|
537
|
+
.trim()
|
|
538
|
+
.toLowerCase();
|
|
539
|
+
return platform === "ios" || platform === "android";
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
function mobileContextCeiling(): number {
|
|
543
|
+
const raw = process.env?.ELIZA_MOBILE_CONTEXT_CEILING?.trim();
|
|
544
|
+
const parsed = raw ? Number.parseInt(raw, 10) : Number.NaN;
|
|
545
|
+
return Number.isInteger(parsed) && parsed >= 256
|
|
546
|
+
? parsed
|
|
547
|
+
: DEFAULT_MOBILE_CONTEXT_CEILING;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
export async function resolveLocalInferenceLoadArgs(
|
|
551
|
+
installed: InstalledModel,
|
|
552
|
+
overrides?: LocalInferenceLoadOverrides,
|
|
553
|
+
options: ResolveLocalInferenceLoadArgsOptions = {},
|
|
554
|
+
): Promise<LocalInferenceLoadArgs> {
|
|
555
|
+
const args: LocalInferenceLoadArgs = { modelPath: installed.path };
|
|
556
|
+
const catalog = findCatalogModel(installed.id);
|
|
557
|
+
const runtime = catalog?.runtime;
|
|
558
|
+
const manifestLoader = options.manifestLoader ?? defaultManifestLoader;
|
|
559
|
+
|
|
560
|
+
applyCatalogDefaults(args, installed, catalog, manifestLoader);
|
|
561
|
+
|
|
562
|
+
// WS2: when the tier declares vision and the per-tier mmproj GGUF is
|
|
563
|
+
// already on disk, plumb the path. The text load is never gated on
|
|
564
|
+
// mmproj — when the file is missing on a vision-capable tier the
|
|
565
|
+
// coordinator emits a one-shot warning and continues.
|
|
566
|
+
const mmprojPath = resolveMmprojPath(installed, catalog);
|
|
567
|
+
if (mmprojPath) {
|
|
568
|
+
args.mmprojPath = mmprojPath;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
const mtp = runtime?.mtp;
|
|
572
|
+
if (mtp) {
|
|
573
|
+
// Native MTP launch defaults. Do NOT replace catalog `contextLength`
|
|
574
|
+
// here; `applyCatalogDefaults` owns the chat-side context. The MTP
|
|
575
|
+
// block only owns the speculative draft window.
|
|
576
|
+
//
|
|
577
|
+
// Two MTP shapes: same-file MTP embeds the NextN head in the text
|
|
578
|
+
// GGUF (no `drafterFile` in the catalog) and runs with no separate
|
|
579
|
+
// draft model; separate-drafter MTP declares a `drafterFile` and
|
|
580
|
+
// requires the bundled drafter GGUF to be present on disk.
|
|
581
|
+
const sameFileMtp = !mtp.drafterFile;
|
|
582
|
+
const drafterPath = sameFileMtp
|
|
583
|
+
? undefined
|
|
584
|
+
: resolveMtpDrafterPath(installed, catalog, manifestLoader);
|
|
585
|
+
if (!sameFileMtp && installed.bundleRoot && !drafterPath) {
|
|
586
|
+
throw new Error(
|
|
587
|
+
`[local-inference] ${installed.id} declares a separate-drafter MTP but no bundled drafter GGUF was found under ${installed.bundleRoot}`,
|
|
588
|
+
);
|
|
589
|
+
}
|
|
590
|
+
args.useGpu = true;
|
|
591
|
+
args.draftModelPath = drafterPath;
|
|
592
|
+
args.draftMin = mtp.draftMin;
|
|
593
|
+
args.draftMax = mtp.draftMax;
|
|
594
|
+
args.speculativeSamples = mtp.draftMax;
|
|
595
|
+
args.mobileSpeculative = true;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
mergeOverrides(args, overrides);
|
|
599
|
+
|
|
600
|
+
// Mobile context ceiling. A 128k-trained model's catalog `contextLength`
|
|
601
|
+
// (e.g. 131072) implies a multi-GB KV cache; loading it at full width on a
|
|
602
|
+
// phone is impractically slow and OOMs, so the on-device agent's first reply
|
|
603
|
+
// never lands. On iOS/Android clamp the context window (and any speculative
|
|
604
|
+
// draft window) to a mobile-sane ceiling so local inference is usable;
|
|
605
|
+
// desktop/server keep the full catalog ceiling. Override with
|
|
606
|
+
// ELIZA_MOBILE_CONTEXT_CEILING for capable devices.
|
|
607
|
+
if (args.contextSize !== undefined && isMobileLocalInferenceRuntime()) {
|
|
608
|
+
const ceiling = mobileContextCeiling();
|
|
609
|
+
if (args.contextSize > ceiling) args.contextSize = ceiling;
|
|
610
|
+
if (
|
|
611
|
+
args.draftContextSize !== undefined &&
|
|
612
|
+
args.draftContextSize > ceiling
|
|
613
|
+
) {
|
|
614
|
+
args.draftContextSize = ceiling;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
if (args.cacheTypeK) args.cacheTypeK = args.cacheTypeK.trim().toLowerCase();
|
|
619
|
+
if (args.cacheTypeV) args.cacheTypeV = args.cacheTypeV.trim().toLowerCase();
|
|
620
|
+
|
|
621
|
+
// Validate the final merged args. The route layer is the one
|
|
622
|
+
// that calls `validateLocalInferenceLoadArgs` with `allowFork: false`
|
|
623
|
+
// against just the overrides — see `local-inference-compat-routes.ts`.
|
|
624
|
+
validateLocalInferenceLoadArgs(args, { allowFork: true });
|
|
625
|
+
return args;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
const MB_PER_GB = 1024;
|
|
629
|
+
|
|
630
|
+
export class ModelDoesNotFitError extends Error {
|
|
631
|
+
readonly modelId: string;
|
|
632
|
+
readonly requiredMb: number;
|
|
633
|
+
readonly usableMb: number;
|
|
634
|
+
readonly hostRamMb: number;
|
|
635
|
+
readonly fittingVariantId: string | null;
|
|
636
|
+
|
|
637
|
+
constructor(args: {
|
|
638
|
+
modelId: string;
|
|
639
|
+
requiredMb: number;
|
|
640
|
+
usableMb: number;
|
|
641
|
+
hostRamMb: number;
|
|
642
|
+
fittingVariantId: string | null;
|
|
643
|
+
}) {
|
|
644
|
+
const variantHint = args.fittingVariantId
|
|
645
|
+
? args.fittingVariantId === args.modelId
|
|
646
|
+
? ""
|
|
647
|
+
: ` The largest context variant of this tier that would fit is "${args.fittingVariantId}".`
|
|
648
|
+
: " No context variant of this tier fits this host.";
|
|
649
|
+
super(
|
|
650
|
+
`[local-inference] Model "${args.modelId}" needs ~${args.requiredMb} MB RAM to boot, but only ~${args.usableMb} MB are usable on this host (${args.hostRamMb} MB total, after the OS/runtime headroom reserve). Refusing to load it.${variantHint} Pick a smaller tier in Settings → Model Hub, or set ELIZA_LOCAL_RAM_HEADROOM_MB lower if you accept running closer to the limit.`,
|
|
651
|
+
);
|
|
652
|
+
this.name = "ModelDoesNotFitError";
|
|
653
|
+
this.modelId = args.modelId;
|
|
654
|
+
this.requiredMb = args.requiredMb;
|
|
655
|
+
this.usableMb = args.usableMb;
|
|
656
|
+
this.hostRamMb = args.hostRamMb;
|
|
657
|
+
this.fittingVariantId = args.fittingVariantId;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
/**
|
|
662
|
+
* Admission gate: refuse a model load when the host can't fit the bundle's
|
|
663
|
+
* boot floor. `hostRamMb` is the host's total RAM in megabytes. `installed`
|
|
664
|
+
* is forwarded to `assessRamFit` so a manifest-declared `ramBudgetMb` wins
|
|
665
|
+
* over the catalog scalar. Throws `ModelDoesNotFitError` on no-fit; returns
|
|
666
|
+
* the (advisory) fit decision otherwise so callers can log a `tight` warning.
|
|
667
|
+
*
|
|
668
|
+
* Models with no catalog entry (external HF blobs) are not gated — the
|
|
669
|
+
* catalog has no RAM budget for them, so we trust the operator's explicit
|
|
670
|
+
* pick (the dispatcher's load-time error surfaces if it genuinely OOMs).
|
|
671
|
+
*/
|
|
672
|
+
export function assertModelFitsHost(
|
|
673
|
+
installed: InstalledModel,
|
|
674
|
+
hostRamMb: number,
|
|
675
|
+
options: RamFitOptions = {},
|
|
676
|
+
): { level: "fits" | "tight"; minMb: number; recommendedMb: number } {
|
|
677
|
+
const catalog = findCatalogModel(installed.id);
|
|
678
|
+
if (!catalog) return { level: "fits", minMb: 0, recommendedMb: 0 };
|
|
679
|
+
const fit = assessRamFit(catalog, hostRamMb, { ...options, installed });
|
|
680
|
+
if (fit.fits) {
|
|
681
|
+
return {
|
|
682
|
+
level: fit.level === "wontfit" ? "tight" : fit.level,
|
|
683
|
+
minMb: fit.budget.minMb,
|
|
684
|
+
recommendedMb: fit.budget.recommendedMb,
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
const fitting = pickFittingContextVariant(catalog, hostRamMb, {
|
|
688
|
+
...options,
|
|
689
|
+
installed,
|
|
690
|
+
});
|
|
691
|
+
throw new ModelDoesNotFitError({
|
|
692
|
+
modelId: installed.id,
|
|
693
|
+
requiredMb: fit.budget.minMb,
|
|
694
|
+
usableMb: fit.usableMb,
|
|
695
|
+
hostRamMb,
|
|
696
|
+
fittingVariantId: fitting?.id ?? null,
|
|
697
|
+
});
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* Typed error for refused local-voice sessions. Mirrors
|
|
702
|
+
* `ModelDoesNotFitError` but at the bundle level — emitted by
|
|
703
|
+
* `assertVoiceBundleFitsHost` when the whole co-resident voice + text stack
|
|
704
|
+
* cannot fit a host's RAM (per R9 §2.3 / §3.2).
|
|
705
|
+
*
|
|
706
|
+
* Catch this at the runtime's voice-session-start boundary and surface the
|
|
707
|
+
* tier-warning copy (`TIER_WARNING_COPY[<tier>]`) — DO NOT load weights and
|
|
708
|
+
* watch `MemoryMonitor` evict mid-session.
|
|
709
|
+
*/
|
|
710
|
+
export class VoiceBundleDoesNotFitError extends Error {
|
|
711
|
+
readonly tierSlot: string;
|
|
712
|
+
readonly deviceTier: string;
|
|
713
|
+
readonly requiredPeakMb: number;
|
|
714
|
+
readonly requiredSteadyStateMb: number;
|
|
715
|
+
readonly usableMb: number;
|
|
716
|
+
readonly hostRamMb: number;
|
|
717
|
+
|
|
718
|
+
constructor(args: {
|
|
719
|
+
tierSlot: string;
|
|
720
|
+
deviceTier: string;
|
|
721
|
+
requiredPeakMb: number;
|
|
722
|
+
requiredSteadyStateMb: number;
|
|
723
|
+
usableMb: number;
|
|
724
|
+
hostRamMb: number;
|
|
725
|
+
}) {
|
|
726
|
+
super(
|
|
727
|
+
`[local-inference] The voice bundle for tier "${args.tierSlot}" needs ~${args.requiredSteadyStateMb} MB steady-state (+~${args.requiredPeakMb - args.requiredSteadyStateMb} MB transient TTS peak) but only ~${args.usableMb} MB are usable on this host (${args.hostRamMb} MB total, after the OS/runtime headroom reserve). Refusing to start local voice; the runtime should fall back to cloud TTS+ASR or refuse the user-facing action.`,
|
|
728
|
+
);
|
|
729
|
+
this.name = "VoiceBundleDoesNotFitError";
|
|
730
|
+
this.tierSlot = args.tierSlot;
|
|
731
|
+
this.deviceTier = args.deviceTier;
|
|
732
|
+
this.requiredPeakMb = args.requiredPeakMb;
|
|
733
|
+
this.requiredSteadyStateMb = args.requiredSteadyStateMb;
|
|
734
|
+
this.usableMb = args.usableMb;
|
|
735
|
+
this.hostRamMb = args.hostRamMb;
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* Cross-model admission gate for the local-voice session. Sums the whole
|
|
741
|
+
* co-resident bundle (LM + ASR + TTS + embedding + VAD +
|
|
742
|
+
* wake-word + turn-detector + emotion + speaker-encoder + transient TTS
|
|
743
|
+
* peak) and refuses entry when the host can't fit it.
|
|
744
|
+
*
|
|
745
|
+
* Returns the decision on `fits`. Throws `VoiceBundleDoesNotFitError` when
|
|
746
|
+
* `wontfit` (when `strict=true`, the default), or just returns the
|
|
747
|
+
* `wontfit` decision when `strict=false` (the runtime then logs and
|
|
748
|
+
* degrades silently). Pair with `TIER_WARNING_COPY[deviceTier]` for
|
|
749
|
+
* user-facing UX.
|
|
750
|
+
*
|
|
751
|
+
* R9 §1.4 + §2.3 + §3.2 spec.
|
|
752
|
+
*/
|
|
753
|
+
export function assertVoiceBundleFitsHost(args: {
|
|
754
|
+
tierSlot: string;
|
|
755
|
+
deviceTier: string;
|
|
756
|
+
hostRamMb: number;
|
|
757
|
+
reserveMb?: number;
|
|
758
|
+
strict?: boolean;
|
|
759
|
+
}): {
|
|
760
|
+
level: "fits" | "tight" | "wontfit";
|
|
761
|
+
steadyStateMb: number;
|
|
762
|
+
peakMb: number;
|
|
763
|
+
usableMb: number;
|
|
764
|
+
fits: boolean;
|
|
765
|
+
} {
|
|
766
|
+
if (!(args.tierSlot in VOICE_ENSEMBLE_BUDGETS)) {
|
|
767
|
+
// Unknown tier slot — be permissive: the runtime hasn't built a
|
|
768
|
+
// canonical slot for this combination yet, and falling through to
|
|
769
|
+
// `assertModelFitsHost` (the per-tier check) is the right default.
|
|
770
|
+
return {
|
|
771
|
+
level: "fits",
|
|
772
|
+
steadyStateMb: 0,
|
|
773
|
+
peakMb: 0,
|
|
774
|
+
usableMb: Math.max(0, args.hostRamMb - (args.reserveMb ?? 1536)),
|
|
775
|
+
fits: true,
|
|
776
|
+
};
|
|
777
|
+
}
|
|
778
|
+
const decision = assessVoiceBundleFits({
|
|
779
|
+
tierSlot: args.tierSlot as VoiceTierSlot,
|
|
780
|
+
deviceTier: args.deviceTier as "MAX" | "GOOD" | "OKAY" | "POOR",
|
|
781
|
+
hostRamMb: args.hostRamMb,
|
|
782
|
+
reserveMb: args.reserveMb,
|
|
783
|
+
});
|
|
784
|
+
if (decision.level === "wontfit" && args.strict !== false) {
|
|
785
|
+
throw new VoiceBundleDoesNotFitError({
|
|
786
|
+
tierSlot: args.tierSlot,
|
|
787
|
+
deviceTier: args.deviceTier,
|
|
788
|
+
requiredPeakMb: Math.round(decision.peakMb),
|
|
789
|
+
requiredSteadyStateMb: Math.round(decision.steadyStateMb),
|
|
790
|
+
usableMb: Math.round(decision.usableMb),
|
|
791
|
+
hostRamMb: args.hostRamMb,
|
|
792
|
+
});
|
|
793
|
+
}
|
|
794
|
+
return {
|
|
795
|
+
level: decision.level,
|
|
796
|
+
steadyStateMb: decision.steadyStateMb,
|
|
797
|
+
peakMb: decision.peakMb,
|
|
798
|
+
usableMb: decision.usableMb,
|
|
799
|
+
fits: decision.fits,
|
|
800
|
+
};
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
function hostRamMbFromProbe(probe: HardwareProbe): number {
|
|
804
|
+
return Math.round(probe.totalRamGb * MB_PER_GB);
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
/**
|
|
808
|
+
* Refusal raised when activation is asked for a model whose own
|
|
809
|
+
* `eliza-1.manifest.json` says its text eval has not passed (`candidate.*` /
|
|
810
|
+
* `weights-staged.*` tiers). Carries the structured payload the route layer
|
|
811
|
+
* surfaces verbatim to the API consumer: `manifestVersion` so the UI can
|
|
812
|
+
* say "this tier isn't ready" with the actual version string, and
|
|
813
|
+
* `failedEvals` so the user sees which checks are still red.
|
|
814
|
+
*
|
|
815
|
+
* Why we gate here, not just at download:
|
|
816
|
+
* - the bundle may already be on disk (hand-staged, manually copied, or
|
|
817
|
+
* downloaded before a fail-state was recorded), so the download gate
|
|
818
|
+
* alone leaves a window where a candidate-only bundle can be flipped
|
|
819
|
+
* into the active model slot and silently emit `[unused]` tokens.
|
|
820
|
+
*
|
|
821
|
+
* See issue #7679 for the original symptom: the runtime activated a
|
|
822
|
+
* candidate `1.0.0-candidate.1` bundle whose every `evals.*.passed`
|
|
823
|
+
* was `false`, then served BERT/WordPiece reserved tokens (`[unused0..99]`
|
|
824
|
+
* / `[PAD]`) as chat output with no actionable error.
|
|
825
|
+
*/
|
|
826
|
+
export class CandidateModelActivationError extends Error {
|
|
827
|
+
readonly modelId: string;
|
|
828
|
+
readonly manifestVersion: string;
|
|
829
|
+
readonly failedEvals: ReadonlyArray<string>;
|
|
830
|
+
|
|
831
|
+
constructor(args: {
|
|
832
|
+
modelId: string;
|
|
833
|
+
manifestVersion: string;
|
|
834
|
+
failedEvals: ReadonlyArray<string>;
|
|
835
|
+
}) {
|
|
836
|
+
const evalSuffix =
|
|
837
|
+
args.failedEvals.length > 0
|
|
838
|
+
? ` Failed evals: ${args.failedEvals.join(", ")}.`
|
|
839
|
+
: "";
|
|
840
|
+
super(
|
|
841
|
+
`Model "${args.modelId}" is candidate-only — its manifest (version ${args.manifestVersion}) reports evals.textEval.passed=false. Refusing to activate.${evalSuffix} Wait for the publisher to flip the manifest off candidate/weights-staged and re-fetch the bundle.`,
|
|
842
|
+
);
|
|
843
|
+
this.name = "CandidateModelActivationError";
|
|
844
|
+
this.modelId = args.modelId;
|
|
845
|
+
this.manifestVersion = args.manifestVersion;
|
|
846
|
+
this.failedEvals = args.failedEvals;
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* Activation eval gate. Reads the installed bundle's manifest and refuses
|
|
852
|
+
* activation when `evals.textEval.passed` is not `true`. A bundle with no
|
|
853
|
+
* `eliza-1.manifest.json` on disk (third-party HF GGUFs, external scans,
|
|
854
|
+
* pre-bundle installs) is *not* gated — the gate only applies to bundles
|
|
855
|
+
* that ship a published manifest, which is the source of truth for the
|
|
856
|
+
* publish state.
|
|
857
|
+
*
|
|
858
|
+
* Throws `CandidateModelActivationError` on a failing manifest; returns
|
|
859
|
+
* silently otherwise.
|
|
860
|
+
*/
|
|
861
|
+
export function assertManifestEvalsPassed(
|
|
862
|
+
installed: InstalledModel,
|
|
863
|
+
manifestLoader: ManifestLoader = defaultManifestLoader,
|
|
864
|
+
): void {
|
|
865
|
+
const manifest = manifestLoader(installed.id, installed);
|
|
866
|
+
if (!manifest) return;
|
|
867
|
+
if (manifest.evals.textEval.passed === true) return;
|
|
868
|
+
throw new CandidateModelActivationError({
|
|
869
|
+
modelId: installed.id,
|
|
870
|
+
manifestVersion: manifest.version,
|
|
871
|
+
failedEvals: collectFailedEvalNames(manifest),
|
|
872
|
+
});
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
function collectFailedEvalNames(manifest: Eliza1Manifest): string[] {
|
|
876
|
+
const failed: string[] = [];
|
|
877
|
+
const evals = manifest.evals;
|
|
878
|
+
if (evals.textEval.passed !== true) failed.push("textEval");
|
|
879
|
+
if (evals.voiceRtf.passed !== true) failed.push("voiceRtf");
|
|
880
|
+
if (evals.e2eLoopOk !== true) failed.push("e2eLoopOk");
|
|
881
|
+
if (evals.thirtyTurnOk !== true) failed.push("thirtyTurnOk");
|
|
882
|
+
if (evals.asrWer && evals.asrWer.passed !== true) failed.push("asrWer");
|
|
883
|
+
if (evals.embedMteb && evals.embedMteb.passed !== true) {
|
|
884
|
+
failed.push("embedMteb");
|
|
885
|
+
}
|
|
886
|
+
if (evals.vadLatencyMs && evals.vadLatencyMs.passed !== true) {
|
|
887
|
+
failed.push("vadLatencyMs");
|
|
888
|
+
}
|
|
889
|
+
if (evals.expressive && evals.expressive.passed !== true) {
|
|
890
|
+
failed.push("expressive");
|
|
891
|
+
}
|
|
892
|
+
if (evals.turnDetector && evals.turnDetector.passed !== true) {
|
|
893
|
+
failed.push("turnDetector");
|
|
894
|
+
}
|
|
895
|
+
return failed;
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
function isLoader(value: unknown): value is LocalInferenceLoader {
|
|
899
|
+
if (!value || typeof value !== "object") return false;
|
|
900
|
+
const candidate = value as Partial<LocalInferenceLoader>;
|
|
901
|
+
return (
|
|
902
|
+
typeof candidate.loadModel === "function" &&
|
|
903
|
+
typeof candidate.unloadModel === "function" &&
|
|
904
|
+
typeof candidate.currentModelPath === "function"
|
|
905
|
+
);
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
export class ActiveModelCoordinator {
|
|
909
|
+
private state: ActiveModelState = {
|
|
910
|
+
modelId: null,
|
|
911
|
+
loadedAt: null,
|
|
912
|
+
status: "idle",
|
|
913
|
+
};
|
|
914
|
+
|
|
915
|
+
/**
|
|
916
|
+
* The last model that successfully reached `status: "ready"`, plus the
|
|
917
|
+
* inputs needed to re-load it. switchTo() tears the active model down
|
|
918
|
+
* before loading the new one (unload-then-load); if the new load fails we
|
|
919
|
+
* restore this so a failed switch never leaves the host with zero models
|
|
920
|
+
* loaded while a working one existed moments earlier. `null` until the
|
|
921
|
+
* first successful load (or after an unload).
|
|
922
|
+
*/
|
|
923
|
+
private lastReady: {
|
|
924
|
+
installed: InstalledModel;
|
|
925
|
+
overrides?: LocalInferenceLoadOverrides;
|
|
926
|
+
state: ActiveModelState;
|
|
927
|
+
} | null = null;
|
|
928
|
+
|
|
929
|
+
private readonly listeners = new Set<(state: ActiveModelState) => void>();
|
|
930
|
+
|
|
931
|
+
snapshot(): ActiveModelState {
|
|
932
|
+
return { ...this.state };
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
subscribe(listener: (state: ActiveModelState) => void): () => void {
|
|
936
|
+
this.listeners.add(listener);
|
|
937
|
+
return () => {
|
|
938
|
+
this.listeners.delete(listener);
|
|
939
|
+
};
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
private emit(): void {
|
|
943
|
+
const current = { ...this.state };
|
|
944
|
+
for (const listener of this.listeners) {
|
|
945
|
+
try {
|
|
946
|
+
listener(current);
|
|
947
|
+
} catch {
|
|
948
|
+
this.listeners.delete(listener);
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
/**
|
|
954
|
+
* WS2: one-shot warning latch per (modelId) — when the tier declares
|
|
955
|
+
* vision but no mmproj GGUF was found on disk, log once so the
|
|
956
|
+
* operator sees that vision is degraded for this session. The
|
|
957
|
+
* arbiter's vision-describe capability stays unregistered for this
|
|
958
|
+
* session; plugin-vision falls back to its non-eliza-1 path.
|
|
959
|
+
*/
|
|
960
|
+
private readonly warnedDegradedVisionFor = new Set<string>();
|
|
961
|
+
|
|
962
|
+
private warnIfVisionDegraded(
|
|
963
|
+
installed: InstalledModel,
|
|
964
|
+
resolvedMmprojPath: string | undefined,
|
|
965
|
+
): void {
|
|
966
|
+
const catalog = findCatalogModel(installed.id);
|
|
967
|
+
const tierClaimsVision = Boolean(
|
|
968
|
+
catalog?.sourceModel?.components?.vision?.file,
|
|
969
|
+
);
|
|
970
|
+
if (!tierClaimsVision) return;
|
|
971
|
+
if (resolvedMmprojPath) return;
|
|
972
|
+
if (this.warnedDegradedVisionFor.has(installed.id)) return;
|
|
973
|
+
this.warnedDegradedVisionFor.add(installed.id);
|
|
974
|
+
console.warn(
|
|
975
|
+
`[local-inference] vision capability unavailable for tier "${installed.id}" — the bundle declares vision/mmproj but the projector GGUF is not on disk under "${installed.bundleRoot ?? "<no-bundleRoot>"}". Text and voice will continue to load; plugin-vision will fall back to its Florence-2 path. Download the per-tier mmproj-<tier>.gguf to enable native vision-describe.`,
|
|
976
|
+
);
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
/** Return the loader service from the current runtime, if registered. */
|
|
980
|
+
private getLoader(runtime: AgentRuntime | null): LocalInferenceLoader | null {
|
|
981
|
+
if (!runtime) return null;
|
|
982
|
+
const candidate = (
|
|
983
|
+
runtime as {
|
|
984
|
+
getService?: (name: string) => unknown;
|
|
985
|
+
}
|
|
986
|
+
).getService?.("localInferenceLoader");
|
|
987
|
+
return isLoader(candidate) ? candidate : null;
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
async switchTo(
|
|
991
|
+
runtime: AgentRuntime | null,
|
|
992
|
+
installed: InstalledModel,
|
|
993
|
+
overrides?: LocalInferenceLoadOverrides,
|
|
994
|
+
opts: { hardware?: HardwareProbe; manifestLoader?: ManifestLoader } = {},
|
|
995
|
+
): Promise<ActiveModelState> {
|
|
996
|
+
// Activation eval gate (#7679). Refuse to flip a candidate-only /
|
|
997
|
+
// weights-staged bundle into the active model slot — the manifest
|
|
998
|
+
// already says its text eval hasn't passed, so the only thing
|
|
999
|
+
// activation buys is `[unused]`/`[PAD]` tokens in chat output and
|
|
1000
|
+
// a confused user. Runs BEFORE the loading state is emitted so
|
|
1001
|
+
// the UI never shows "loading → error" for a known-bad bundle;
|
|
1002
|
+
// it sees the 422 from the route layer directly.
|
|
1003
|
+
assertManifestEvalsPassed(installed, opts.manifestLoader);
|
|
1004
|
+
|
|
1005
|
+
this.state = {
|
|
1006
|
+
modelId: installed.id,
|
|
1007
|
+
loadedAt: null,
|
|
1008
|
+
status: "loading",
|
|
1009
|
+
};
|
|
1010
|
+
this.emit();
|
|
1011
|
+
|
|
1012
|
+
// Prefer a runtime-registered loader (plugin-local-ai or equivalent)
|
|
1013
|
+
// when present — it will already have warmed up the right configuration.
|
|
1014
|
+
// Otherwise, fall back to the standalone engine, which is the default
|
|
1015
|
+
// path for users who haven't separately enabled plugin-local-ai.
|
|
1016
|
+
const loader = this.getLoader(runtime);
|
|
1017
|
+
|
|
1018
|
+
// Snapshot the previously-active model BEFORE the unload-then-load tears
|
|
1019
|
+
// it down, so a failed switch can restore it instead of leaving zero
|
|
1020
|
+
// models loaded under the requested id.
|
|
1021
|
+
const previous = this.lastReady;
|
|
1022
|
+
let previousDisplaced = false;
|
|
1023
|
+
|
|
1024
|
+
try {
|
|
1025
|
+
const ready = await this.performLoad(
|
|
1026
|
+
loader,
|
|
1027
|
+
installed,
|
|
1028
|
+
overrides,
|
|
1029
|
+
opts,
|
|
1030
|
+
() => {
|
|
1031
|
+
previousDisplaced = true;
|
|
1032
|
+
},
|
|
1033
|
+
);
|
|
1034
|
+
this.state = ready;
|
|
1035
|
+
this.lastReady = { installed, overrides, state: ready };
|
|
1036
|
+
} catch (err) {
|
|
1037
|
+
const failure = err instanceof Error ? err.message : String(err);
|
|
1038
|
+
if (previous) {
|
|
1039
|
+
previousDisplaced =
|
|
1040
|
+
(loader?.currentModelPath() ??
|
|
1041
|
+
localInferenceEngine.currentModelPath()) !==
|
|
1042
|
+
previous.installed.path;
|
|
1043
|
+
}
|
|
1044
|
+
// Attempt to restore the previously-active model. The unload-then-load
|
|
1045
|
+
// already tore it down, so without this the host has no model loaded.
|
|
1046
|
+
if (previous && previousDisplaced) {
|
|
1047
|
+
try {
|
|
1048
|
+
const restored = await this.performLoad(
|
|
1049
|
+
loader,
|
|
1050
|
+
previous.installed,
|
|
1051
|
+
previous.overrides,
|
|
1052
|
+
opts,
|
|
1053
|
+
() => {},
|
|
1054
|
+
);
|
|
1055
|
+
this.state = restored;
|
|
1056
|
+
this.lastReady = {
|
|
1057
|
+
installed: previous.installed,
|
|
1058
|
+
overrides: previous.overrides,
|
|
1059
|
+
state: restored,
|
|
1060
|
+
};
|
|
1061
|
+
console.warn(
|
|
1062
|
+
`[local-inference] Failed to switch to "${installed.id}" (${failure}); restored previously-active model "${previous.installed.id}".`,
|
|
1063
|
+
);
|
|
1064
|
+
this.emit();
|
|
1065
|
+
return this.snapshot();
|
|
1066
|
+
} catch (restoreErr) {
|
|
1067
|
+
const restoreFailure =
|
|
1068
|
+
restoreErr instanceof Error
|
|
1069
|
+
? restoreErr.message
|
|
1070
|
+
: String(restoreErr);
|
|
1071
|
+
console.error(
|
|
1072
|
+
`[local-inference] Failed to switch to "${installed.id}" (${failure}) AND failed to restore "${previous.installed.id}" (${restoreFailure}). No model is loaded.`,
|
|
1073
|
+
);
|
|
1074
|
+
}
|
|
1075
|
+
} else if (previous) {
|
|
1076
|
+
// Admission/load-arg errors happen before unload, so the previous
|
|
1077
|
+
// model is still live. Restore the coordinator state without touching
|
|
1078
|
+
// the loader and surface the failed request only as a warning.
|
|
1079
|
+
this.state = previous.state;
|
|
1080
|
+
this.lastReady = previous;
|
|
1081
|
+
console.warn(
|
|
1082
|
+
`[local-inference] Refused to switch to "${installed.id}" before unloading the active model "${previous.installed.id}" (${failure}).`,
|
|
1083
|
+
);
|
|
1084
|
+
this.emit();
|
|
1085
|
+
return this.snapshot();
|
|
1086
|
+
}
|
|
1087
|
+
// No prior model to restore (or restore also failed): report honestly
|
|
1088
|
+
// that nothing is loaded rather than attributing a phantom id.
|
|
1089
|
+
this.lastReady = null;
|
|
1090
|
+
this.state = {
|
|
1091
|
+
modelId: null,
|
|
1092
|
+
loadedAt: null,
|
|
1093
|
+
status: "error",
|
|
1094
|
+
error: failure,
|
|
1095
|
+
};
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
this.emit();
|
|
1099
|
+
if (installed.source === "eliza-download") {
|
|
1100
|
+
try {
|
|
1101
|
+
await touchElizaModel(installed.id);
|
|
1102
|
+
} catch (err) {
|
|
1103
|
+
console.warn(
|
|
1104
|
+
`[local-inference] Model "${installed.id}" loaded, but failed to update last-used metadata: ${err instanceof Error ? err.message : String(err)}`,
|
|
1105
|
+
);
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
return this.snapshot();
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
/**
|
|
1112
|
+
* Run the unload-then-load against the loader (or standalone engine) and
|
|
1113
|
+
* build the `status: "ready"` state. Throws on any load failure; never
|
|
1114
|
+
* mutates `this.state`/`this.lastReady` so callers control rollback.
|
|
1115
|
+
*/
|
|
1116
|
+
private async performLoad(
|
|
1117
|
+
loader: LocalInferenceLoader | null,
|
|
1118
|
+
installed: InstalledModel,
|
|
1119
|
+
overrides: LocalInferenceLoadOverrides | undefined,
|
|
1120
|
+
opts: { hardware?: HardwareProbe; manifestLoader?: ManifestLoader },
|
|
1121
|
+
markPreviousDisplaced: () => void,
|
|
1122
|
+
): Promise<ActiveModelState> {
|
|
1123
|
+
// RAM-budget admission control (W10 / J1): refuse a model that won't
|
|
1124
|
+
// fit this host *before* touching the loader, so we never half-load
|
|
1125
|
+
// and OOM. `assertModelFitsHost` throws `ModelDoesNotFitError` with
|
|
1126
|
+
// the specific numbers + the largest fitting variant of the tier.
|
|
1127
|
+
const probe = opts.hardware ?? (await probeHardware());
|
|
1128
|
+
const admission = assertModelFitsHost(installed, hostRamMbFromProbe(probe));
|
|
1129
|
+
if (admission.level === "tight") {
|
|
1130
|
+
console.warn(
|
|
1131
|
+
`[local-inference] Loading "${installed.id}" with tight RAM headroom (~${admission.minMb} MB floor, ${admission.recommendedMb} MB recommended; ${hostRamMbFromProbe(probe)} MB host). Expect swapping under sustained load.`,
|
|
1132
|
+
);
|
|
1133
|
+
}
|
|
1134
|
+
const resolved = await resolveLocalInferenceLoadArgs(installed, overrides);
|
|
1135
|
+
// WS2: warn one-shot when the tier declares vision but the
|
|
1136
|
+
// per-tier mmproj GGUF isn't on disk yet. The text load still
|
|
1137
|
+
// proceeds; vision capability is degraded for this session
|
|
1138
|
+
// (plugin-vision falls back to its Florence-2 path).
|
|
1139
|
+
this.warnIfVisionDegraded(installed, resolved.mmprojPath);
|
|
1140
|
+
if (loader) {
|
|
1141
|
+
markPreviousDisplaced();
|
|
1142
|
+
await loader.unloadModel();
|
|
1143
|
+
await loader.loadModel(resolved);
|
|
1144
|
+
} else {
|
|
1145
|
+
await localInferenceEngine.load(installed.path, resolved);
|
|
1146
|
+
}
|
|
1147
|
+
const runtimeLoad = loader
|
|
1148
|
+
? null
|
|
1149
|
+
: localInferenceEngine.currentRuntimeLoadConfig();
|
|
1150
|
+
// Surface the effective load config so consumers (the benchmark
|
|
1151
|
+
// harness, the Settings UI, the active-model SSE) can verify the
|
|
1152
|
+
// requested overrides actually took hold instead of silently
|
|
1153
|
+
// falling back to a smaller context or fp16 KV.
|
|
1154
|
+
return {
|
|
1155
|
+
modelId: installed.id,
|
|
1156
|
+
loadedAt: new Date().toISOString(),
|
|
1157
|
+
status: "ready",
|
|
1158
|
+
loadedContextSize:
|
|
1159
|
+
runtimeLoad?.contextSize ?? resolved.contextSize ?? null,
|
|
1160
|
+
loadedCacheTypeK: runtimeLoad
|
|
1161
|
+
? runtimeLoad.cacheTypeK
|
|
1162
|
+
: (resolved.cacheTypeK ?? null),
|
|
1163
|
+
loadedCacheTypeV: runtimeLoad
|
|
1164
|
+
? runtimeLoad.cacheTypeV
|
|
1165
|
+
: (resolved.cacheTypeV ?? null),
|
|
1166
|
+
loadedGpuLayers:
|
|
1167
|
+
runtimeLoad !== null
|
|
1168
|
+
? runtimeLoad.gpuLayers
|
|
1169
|
+
: typeof resolved.gpuLayers === "number"
|
|
1170
|
+
? resolved.gpuLayers
|
|
1171
|
+
: null,
|
|
1172
|
+
};
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
async unload(runtime: AgentRuntime | null): Promise<ActiveModelState> {
|
|
1176
|
+
const loader = this.getLoader(runtime);
|
|
1177
|
+
try {
|
|
1178
|
+
if (loader) {
|
|
1179
|
+
await loader.unloadModel();
|
|
1180
|
+
} else {
|
|
1181
|
+
await localInferenceEngine.unload();
|
|
1182
|
+
}
|
|
1183
|
+
} catch (err) {
|
|
1184
|
+
this.state = {
|
|
1185
|
+
modelId: null,
|
|
1186
|
+
loadedAt: null,
|
|
1187
|
+
status: "error",
|
|
1188
|
+
error: err instanceof Error ? err.message : String(err),
|
|
1189
|
+
loadedContextSize: null,
|
|
1190
|
+
loadedCacheTypeK: null,
|
|
1191
|
+
loadedCacheTypeV: null,
|
|
1192
|
+
loadedGpuLayers: null,
|
|
1193
|
+
};
|
|
1194
|
+
this.emit();
|
|
1195
|
+
return this.snapshot();
|
|
1196
|
+
}
|
|
1197
|
+
// The model was deliberately unloaded — drop the restore snapshot so a
|
|
1198
|
+
// later failed switch doesn't silently re-load a model the operator
|
|
1199
|
+
// asked to unload.
|
|
1200
|
+
this.lastReady = null;
|
|
1201
|
+
this.state = {
|
|
1202
|
+
modelId: null,
|
|
1203
|
+
loadedAt: null,
|
|
1204
|
+
status: "idle",
|
|
1205
|
+
loadedContextSize: null,
|
|
1206
|
+
loadedCacheTypeK: null,
|
|
1207
|
+
loadedCacheTypeV: null,
|
|
1208
|
+
loadedGpuLayers: null,
|
|
1209
|
+
};
|
|
1210
|
+
this.emit();
|
|
1211
|
+
return this.snapshot();
|
|
1212
|
+
}
|
|
1213
|
+
}
|