@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_ELIGIBLE_MODEL_IDS,
|
|
3
|
+
type Eliza1TierId,
|
|
4
|
+
eliza1TierPublishStatus,
|
|
5
|
+
FIRST_RUN_DEFAULT_MODEL_ID,
|
|
6
|
+
MODEL_CATALOG,
|
|
7
|
+
} from "./catalog";
|
|
8
|
+
import {
|
|
9
|
+
canSetAsDefault,
|
|
10
|
+
type Eliza1Backend,
|
|
11
|
+
type Eliza1DeviceCaps,
|
|
12
|
+
type Eliza1Manifest,
|
|
13
|
+
SUPPORTED_BACKENDS_BY_TIER,
|
|
14
|
+
} from "./manifest";
|
|
15
|
+
import {
|
|
16
|
+
assessRamFit,
|
|
17
|
+
defaultManifestLoader,
|
|
18
|
+
type ManifestLoader,
|
|
19
|
+
} from "./ram-budget";
|
|
20
|
+
import type {
|
|
21
|
+
CatalogModel,
|
|
22
|
+
CatalogQuantizationVariant,
|
|
23
|
+
HardwareFitLevel,
|
|
24
|
+
HardwareProbe,
|
|
25
|
+
InstalledModel,
|
|
26
|
+
TextGenerationSlot,
|
|
27
|
+
} from "./types";
|
|
28
|
+
|
|
29
|
+
const TIER_2B: Eliza1TierId = "eliza-1-2b";
|
|
30
|
+
const TIER_4B: Eliza1TierId = "eliza-1-4b";
|
|
31
|
+
const TIER_9B: Eliza1TierId = "eliza-1-9b";
|
|
32
|
+
const TIER_27B: Eliza1TierId = "eliza-1-27b";
|
|
33
|
+
|
|
34
|
+
export type RecommendationPlatformClass =
|
|
35
|
+
| "mobile"
|
|
36
|
+
| "apple-silicon"
|
|
37
|
+
| "linux-gpu"
|
|
38
|
+
| "linux-cpu"
|
|
39
|
+
| "desktop-gpu"
|
|
40
|
+
| "desktop-cpu";
|
|
41
|
+
|
|
42
|
+
export interface RecommendedModelSelection {
|
|
43
|
+
slot: TextGenerationSlot;
|
|
44
|
+
platformClass: RecommendationPlatformClass;
|
|
45
|
+
model: CatalogModel | null;
|
|
46
|
+
fit: HardwareFitLevel | null;
|
|
47
|
+
reason: string;
|
|
48
|
+
alternatives: CatalogModel[];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const BYTES_PER_GB = 1024 ** 3;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Per-platform slot ladders. Every default-recommended entry is an
|
|
55
|
+
* Eliza-1 tier (the only default-eligible line — see catalog.ts and
|
|
56
|
+
* `packages/inference/AGENTS.md` §2). Ladders bias toward the smallest
|
|
57
|
+
* tier that fits the platform; desktops/servers pick larger tiers
|
|
58
|
+
* first when memory headroom allows.
|
|
59
|
+
*/
|
|
60
|
+
const SLOT_LADDERS: Record<
|
|
61
|
+
RecommendationPlatformClass,
|
|
62
|
+
Record<TextGenerationSlot, ReadonlyArray<Eliza1TierId>>
|
|
63
|
+
> = {
|
|
64
|
+
mobile: {
|
|
65
|
+
TEXT_SMALL: [TIER_2B],
|
|
66
|
+
TEXT_LARGE: [TIER_4B, TIER_2B],
|
|
67
|
+
},
|
|
68
|
+
"apple-silicon": {
|
|
69
|
+
TEXT_SMALL: [TIER_2B, TIER_4B],
|
|
70
|
+
TEXT_LARGE: [TIER_27B, TIER_9B, TIER_4B, TIER_2B],
|
|
71
|
+
},
|
|
72
|
+
"linux-gpu": {
|
|
73
|
+
TEXT_SMALL: [TIER_2B, TIER_4B],
|
|
74
|
+
TEXT_LARGE: [TIER_27B, TIER_9B, TIER_4B, TIER_2B],
|
|
75
|
+
},
|
|
76
|
+
"linux-cpu": {
|
|
77
|
+
TEXT_SMALL: [TIER_2B, TIER_4B],
|
|
78
|
+
TEXT_LARGE: [TIER_9B, TIER_4B, TIER_2B],
|
|
79
|
+
},
|
|
80
|
+
"desktop-gpu": {
|
|
81
|
+
TEXT_SMALL: [TIER_2B, TIER_4B],
|
|
82
|
+
TEXT_LARGE: [TIER_27B, TIER_9B, TIER_4B, TIER_2B],
|
|
83
|
+
},
|
|
84
|
+
"desktop-cpu": {
|
|
85
|
+
TEXT_SMALL: [TIER_2B, TIER_4B],
|
|
86
|
+
TEXT_LARGE: [TIER_9B, TIER_4B, TIER_2B],
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
function catalogById(catalog: CatalogModel[]): Map<string, CatalogModel> {
|
|
91
|
+
return new Map(catalog.map((model) => [model.id, model]));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function chatCandidates(catalog: CatalogModel[]): CatalogModel[] {
|
|
95
|
+
return catalog.filter((model) => !model.hiddenFromCatalog);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function classifyRecommendationPlatform(
|
|
99
|
+
hardware: HardwareProbe,
|
|
100
|
+
): RecommendationPlatformClass {
|
|
101
|
+
// Mobile detection comes from the typed `hardware.mobile.platform`
|
|
102
|
+
// field (`"ios" | "android" | "web"`). `NodeJS.Platform` doesn't
|
|
103
|
+
// include those values — the previous `process.platform as string`
|
|
104
|
+
// cast was hiding that the cast was the only way the comparison
|
|
105
|
+
// type-checked. Reading the proper typed field is both safer and
|
|
106
|
+
// accurate when a host advertises mobile via the mobile probe.
|
|
107
|
+
const mobilePlatform = hardware.mobile?.platform;
|
|
108
|
+
if (mobilePlatform === "android" || mobilePlatform === "ios") return "mobile";
|
|
109
|
+
|
|
110
|
+
const platform = hardware.platform;
|
|
111
|
+
if (hardware.appleSilicon) return "apple-silicon";
|
|
112
|
+
if (platform === "linux" && hardware.gpu) return "linux-gpu";
|
|
113
|
+
if (platform === "linux") return "linux-cpu";
|
|
114
|
+
if (hardware.gpu) return "desktop-gpu";
|
|
115
|
+
return "desktop-cpu";
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function catalogDownloadSizeGb(
|
|
119
|
+
model: CatalogModel,
|
|
120
|
+
catalog: CatalogModel[] = MODEL_CATALOG,
|
|
121
|
+
): number {
|
|
122
|
+
void catalog;
|
|
123
|
+
return model.sizeGb;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function catalogDownloadSizeBytes(
|
|
127
|
+
model: CatalogModel,
|
|
128
|
+
catalog: CatalogModel[] = MODEL_CATALOG,
|
|
129
|
+
): number {
|
|
130
|
+
return Math.round(catalogDownloadSizeGb(model, catalog) * BYTES_PER_GB);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
export function selectBestQuantizationVariant(
|
|
134
|
+
model: CatalogModel,
|
|
135
|
+
): CatalogQuantizationVariant | null {
|
|
136
|
+
const quantization = model.quantization;
|
|
137
|
+
if (!quantization) return null;
|
|
138
|
+
return (
|
|
139
|
+
quantization.variants.find(
|
|
140
|
+
(variant) => variant.id === quantization.defaultVariantId,
|
|
141
|
+
) ??
|
|
142
|
+
quantization.variants.find((variant) => variant.status === "published") ??
|
|
143
|
+
quantization.variants[0] ??
|
|
144
|
+
null
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const MB_PER_GB = 1024;
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Memory the model can actually use on this host, in GB. On Apple Silicon
|
|
152
|
+
* and mobile the GPU shares system RAM, so total RAM acts as the budget.
|
|
153
|
+
* On discrete-GPU x86 the KV cache + weights live wherever the layers do —
|
|
154
|
+
* weight VRAM higher. CPU-only hosts can give about half of RAM to a model
|
|
155
|
+
* before paging hurts.
|
|
156
|
+
*/
|
|
157
|
+
function effectiveMemoryGb(probe: HardwareProbe): number {
|
|
158
|
+
if (probe.appleSilicon) return probe.totalRamGb;
|
|
159
|
+
if (probe.gpu) {
|
|
160
|
+
return Math.max(probe.gpu.totalVramGb, probe.totalRamGb * 0.5);
|
|
161
|
+
}
|
|
162
|
+
return probe.totalRamGb * 0.5;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Download-size guardrail layered on top of the RAM-budget fit decision:
|
|
167
|
+
* a bundle whose on-disk footprint is a large fraction of the available
|
|
168
|
+
* memory will swap even if the RAM-budget floor says it boots. Returns
|
|
169
|
+
* `"wontfit"` / `"tight"` / `null` ("the size is fine; defer to the
|
|
170
|
+
* RAM-budget level"). Ratios match the historical `assessFit` (desktop)
|
|
171
|
+
* and `mobileFit` (mobile) thresholds.
|
|
172
|
+
*/
|
|
173
|
+
function downloadSizeGuardrail(
|
|
174
|
+
hardware: HardwareProbe,
|
|
175
|
+
model: CatalogModel,
|
|
176
|
+
catalog: CatalogModel[],
|
|
177
|
+
isMobile: boolean,
|
|
178
|
+
): HardwareFitLevel | null {
|
|
179
|
+
const sizeGb = catalogDownloadSizeGb(model, catalog);
|
|
180
|
+
const memGb = isMobile ? hardware.totalRamGb : effectiveMemoryGb(hardware);
|
|
181
|
+
const wontFitRatio = isMobile ? 0.8 : 0.9;
|
|
182
|
+
const tightRatio = isMobile ? 0.65 : 0.7;
|
|
183
|
+
if (sizeGb > memGb * wontFitRatio) return "wontfit";
|
|
184
|
+
if (sizeGb > memGb * tightRatio) return "tight";
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export function assessCatalogModelFit(
|
|
189
|
+
hardware: HardwareProbe,
|
|
190
|
+
model: CatalogModel,
|
|
191
|
+
catalog: CatalogModel[] = MODEL_CATALOG,
|
|
192
|
+
options: { installed?: InstalledModel; manifestLoader?: ManifestLoader } = {},
|
|
193
|
+
): HardwareFitLevel {
|
|
194
|
+
const isMobile = classifyRecommendationPlatform(hardware) === "mobile";
|
|
195
|
+
const memGb = isMobile ? hardware.totalRamGb : effectiveMemoryGb(hardware);
|
|
196
|
+
// Single source of truth for the RAM floor + fits-vs-tight cutoff:
|
|
197
|
+
// `ram-budget.assessRamFit`. The recommender works in "memory available
|
|
198
|
+
// to the model" terms (VRAM-weighted on GPU hosts), so the OS headroom
|
|
199
|
+
// reserve is already discounted — pass `reserveMb: 0`.
|
|
200
|
+
const ramFit = assessRamFit(model, memGb * MB_PER_GB, {
|
|
201
|
+
installed: options.installed,
|
|
202
|
+
manifestLoader: options.manifestLoader ?? defaultManifestLoader,
|
|
203
|
+
reserveMb: 0,
|
|
204
|
+
});
|
|
205
|
+
if (!ramFit.fits) return "wontfit";
|
|
206
|
+
const sizeLevel = downloadSizeGuardrail(hardware, model, catalog, isMobile);
|
|
207
|
+
if (sizeLevel === "wontfit") return "wontfit";
|
|
208
|
+
if (sizeLevel === "tight" || ramFit.level === "tight") return "tight";
|
|
209
|
+
return "fits";
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function canFit(
|
|
213
|
+
hardware: HardwareProbe,
|
|
214
|
+
model: CatalogModel,
|
|
215
|
+
catalog: CatalogModel[],
|
|
216
|
+
options: { installed?: InstalledModel; manifestLoader?: ManifestLoader } = {},
|
|
217
|
+
): boolean {
|
|
218
|
+
if (!hasUsableCpuBackendForRecommendation(hardware)) return false;
|
|
219
|
+
return assessCatalogModelFit(hardware, model, catalog, options) !== "wontfit";
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function hasUsableCpuBackendForRecommendation(
|
|
223
|
+
hardware: HardwareProbe,
|
|
224
|
+
): boolean {
|
|
225
|
+
if (hardware.gpu) return true;
|
|
226
|
+
if (hardware.arch !== "arm64" && hardware.arch !== "arm") return true;
|
|
227
|
+
return hardware.cpuFeatures?.neon === true;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* True when every kernel listed in `model.runtime.optimizations.requiresKernel`
|
|
232
|
+
* is advertised as `true` in the binary's CAPABILITIES.json kernels map.
|
|
233
|
+
*
|
|
234
|
+
* `binaryKernels === null` means we have no probe (older binary, or
|
|
235
|
+
* llama-server isn't installed). In that case we trust the catalog —
|
|
236
|
+
* filtering would hide every kernel-required model and the dispatcher's
|
|
237
|
+
* load-time check will surface the real error if/when the user tries to
|
|
238
|
+
* activate it.
|
|
239
|
+
*
|
|
240
|
+
* `unsupportedKernels` is a soft signal layered on top: when the binary
|
|
241
|
+
* has no satisfied `requiresKernel` anchor and exposes only an unsupported
|
|
242
|
+
* backend (OpenVINO-only Intel build for an Eliza-1 text tier), drop the
|
|
243
|
+
* tier so the recommender doesn't suggest a path that has no kernel route.
|
|
244
|
+
* A binary that already satisfies `requiresKernel` stays eligible even
|
|
245
|
+
* when it also advertises an unsupported backend (e.g. OpenVINO
|
|
246
|
+
* co-compiled — the dispatcher steers the spawn off OpenVINO via
|
|
247
|
+
* `applyUnsupportedKernelEnv` at runtime).
|
|
248
|
+
*/
|
|
249
|
+
function kernelRequirementsSatisfied(
|
|
250
|
+
model: CatalogModel,
|
|
251
|
+
binaryKernels: Partial<Record<string, boolean>> | null,
|
|
252
|
+
): boolean {
|
|
253
|
+
const required = model.runtime?.optimizations?.requiresKernel ?? [];
|
|
254
|
+
if (!binaryKernels) return true;
|
|
255
|
+
if (required.length > 0) {
|
|
256
|
+
return required.every((k) => binaryKernels[k] === true);
|
|
257
|
+
}
|
|
258
|
+
const unsupported = model.runtime?.optimizations?.unsupportedKernels ?? [];
|
|
259
|
+
return !unsupported.some((k) => binaryKernels[k] === true);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function modelsFromLadder(
|
|
263
|
+
ids: ReadonlyArray<string>,
|
|
264
|
+
catalog: CatalogModel[],
|
|
265
|
+
): CatalogModel[] {
|
|
266
|
+
const byId = catalogById(catalog);
|
|
267
|
+
return ids.flatMap((id) => {
|
|
268
|
+
const model = byId.get(id);
|
|
269
|
+
return model ? [model] : [];
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* True when this host has enough memory headroom to serve the long-context
|
|
275
|
+
* KV cache for a 64k+ window. Threshold mirrors the "16 GB workstation"
|
|
276
|
+
* line from the porting plan — a 64k context for an 8B model at fp16 KV
|
|
277
|
+
* occupies ~4 GB; with TurboQuant compression it fits inside 1 GB. Below
|
|
278
|
+
* 16 GB total we keep the historical short-context preference.
|
|
279
|
+
*
|
|
280
|
+
* For GPU hosts we look at total VRAM, since the KV cache lives wherever
|
|
281
|
+
* the layers do; for CPU-only hosts we look at total RAM.
|
|
282
|
+
*/
|
|
283
|
+
const LONG_CONTEXT_RAM_BUMP_THRESHOLD_GB = 16;
|
|
284
|
+
const LONG_CONTEXT_MIN_LENGTH = 65536;
|
|
285
|
+
|
|
286
|
+
function hasLongContextHeadroom(hardware: HardwareProbe): boolean {
|
|
287
|
+
const vramGb = hardware.gpu?.totalVramGb ?? 0;
|
|
288
|
+
if (vramGb >= LONG_CONTEXT_RAM_BUMP_THRESHOLD_GB) return true;
|
|
289
|
+
return hardware.totalRamGb >= LONG_CONTEXT_RAM_BUMP_THRESHOLD_GB;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function isLongContextModel(model: CatalogModel): boolean {
|
|
293
|
+
return (
|
|
294
|
+
typeof model.contextLength === "number" &&
|
|
295
|
+
model.contextLength >= LONG_CONTEXT_MIN_LENGTH
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function fallbackCandidates(
|
|
300
|
+
slot: TextGenerationSlot,
|
|
301
|
+
hardware: HardwareProbe,
|
|
302
|
+
catalog: CatalogModel[],
|
|
303
|
+
budgetOptions: BudgetOptions,
|
|
304
|
+
): CatalogModel[] {
|
|
305
|
+
const candidates = chatCandidates(catalog).filter(
|
|
306
|
+
(model) =>
|
|
307
|
+
DEFAULT_ELIGIBLE_MODEL_IDS.has(model.id) &&
|
|
308
|
+
canFit(
|
|
309
|
+
hardware,
|
|
310
|
+
model,
|
|
311
|
+
catalog,
|
|
312
|
+
budgetOptionsForModel(model, budgetOptions),
|
|
313
|
+
),
|
|
314
|
+
);
|
|
315
|
+
const preferLongContext = hasLongContextHeadroom(hardware);
|
|
316
|
+
return candidates.sort((left, right) => {
|
|
317
|
+
if (preferLongContext) {
|
|
318
|
+
const leftLong = isLongContextModel(left) ? 1 : 0;
|
|
319
|
+
const rightLong = isLongContextModel(right) ? 1 : 0;
|
|
320
|
+
if (leftLong !== rightLong) return rightLong - leftLong;
|
|
321
|
+
}
|
|
322
|
+
const sizeDelta =
|
|
323
|
+
catalogDownloadSizeGb(right, catalog) -
|
|
324
|
+
catalogDownloadSizeGb(left, catalog);
|
|
325
|
+
return slot === "TEXT_LARGE" ? sizeDelta : -sizeDelta;
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
export interface RecommendationOptions {
|
|
330
|
+
/**
|
|
331
|
+
* Kernels actually advertised by the installed llama-server binary
|
|
332
|
+
* (parsed from CAPABILITIES.json next to it). When provided, models
|
|
333
|
+
* declaring `requiresKernel` not satisfied by this map are filtered
|
|
334
|
+
* out so we don't recommend a model the user can't actually run on
|
|
335
|
+
* this binary. Pass null/omit when no probe is available — recommender
|
|
336
|
+
* trusts the catalog and the dispatcher's load-time check.
|
|
337
|
+
*/
|
|
338
|
+
binaryKernels?: Partial<Record<string, boolean>> | null;
|
|
339
|
+
/**
|
|
340
|
+
* Models the user has already installed. When an Eliza-1 tier in this
|
|
341
|
+
* list has a published `eliza-1.manifest.json` next to its bundle,
|
|
342
|
+
* the recommender consults `manifest.ramBudgetMb` instead of the
|
|
343
|
+
* catalog's coarse `minRamGb` scalar. See `./ram-budget.ts`.
|
|
344
|
+
*/
|
|
345
|
+
installed?: ReadonlyArray<InstalledModel>;
|
|
346
|
+
/**
|
|
347
|
+
* Test-only override for the manifest reader. Production callers leave
|
|
348
|
+
* this unset and the helper reads `eliza-1.manifest.json` from disk.
|
|
349
|
+
*/
|
|
350
|
+
manifestLoader?: ManifestLoader;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
interface BudgetOptions {
|
|
354
|
+
installed: ReadonlyArray<InstalledModel>;
|
|
355
|
+
manifestLoader: ManifestLoader;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
function budgetOptionsForModel(
|
|
359
|
+
model: CatalogModel,
|
|
360
|
+
budget: BudgetOptions,
|
|
361
|
+
): { installed?: InstalledModel; manifestLoader: ManifestLoader } {
|
|
362
|
+
return {
|
|
363
|
+
installed: budget.installed.find((m) => m.id === model.id),
|
|
364
|
+
manifestLoader: budget.manifestLoader,
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
function resolveBudgetOptions(options: RecommendationOptions): BudgetOptions {
|
|
369
|
+
return {
|
|
370
|
+
installed: options.installed ?? [],
|
|
371
|
+
manifestLoader: options.manifestLoader ?? defaultManifestLoader,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
export function selectRecommendedModelForSlot(
|
|
376
|
+
slot: TextGenerationSlot,
|
|
377
|
+
hardware: HardwareProbe,
|
|
378
|
+
catalog: CatalogModel[] = MODEL_CATALOG,
|
|
379
|
+
options: RecommendationOptions = {},
|
|
380
|
+
): RecommendedModelSelection {
|
|
381
|
+
const platformClass = classifyRecommendationPlatform(hardware);
|
|
382
|
+
const ladder = modelsFromLadder(SLOT_LADDERS[platformClass][slot], catalog);
|
|
383
|
+
const binaryKernels = options.binaryKernels ?? null;
|
|
384
|
+
const budget = resolveBudgetOptions(options);
|
|
385
|
+
const eligible = ladder.filter(
|
|
386
|
+
(model) =>
|
|
387
|
+
canFit(hardware, model, catalog, budgetOptionsForModel(model, budget)) &&
|
|
388
|
+
kernelRequirementsSatisfied(model, binaryKernels),
|
|
389
|
+
);
|
|
390
|
+
|
|
391
|
+
// On hosts with >= 16 GB RAM/VRAM, give long-context (>= 64k) ladder
|
|
392
|
+
// entries a small bump so we surface 128k models when they fit. The
|
|
393
|
+
// ladder order still wins when long-context availability is the same
|
|
394
|
+
// for every entry (or when the host doesn't have the headroom).
|
|
395
|
+
const ranked =
|
|
396
|
+
slot === "TEXT_LARGE" &&
|
|
397
|
+
eligible.length > 0 &&
|
|
398
|
+
hasLongContextHeadroom(hardware)
|
|
399
|
+
? rankLadderByLongContext(eligible)
|
|
400
|
+
: eligible;
|
|
401
|
+
|
|
402
|
+
const alternatives =
|
|
403
|
+
ranked.length > 0
|
|
404
|
+
? ranked
|
|
405
|
+
: fallbackCandidates(slot, hardware, catalog, budget).filter((model) =>
|
|
406
|
+
kernelRequirementsSatisfied(model, binaryKernels),
|
|
407
|
+
);
|
|
408
|
+
const model = alternatives[0] ?? null;
|
|
409
|
+
const fit = model
|
|
410
|
+
? assessCatalogModelFit(
|
|
411
|
+
hardware,
|
|
412
|
+
model,
|
|
413
|
+
catalog,
|
|
414
|
+
budgetOptionsForModel(model, budget),
|
|
415
|
+
)
|
|
416
|
+
: null;
|
|
417
|
+
return {
|
|
418
|
+
slot,
|
|
419
|
+
platformClass,
|
|
420
|
+
model,
|
|
421
|
+
fit,
|
|
422
|
+
reason: model
|
|
423
|
+
? `${platformClass} ${slot} ladder selected ${model.id}`
|
|
424
|
+
: `${platformClass} ${slot} ladder has no fitting catalog model`,
|
|
425
|
+
alternatives,
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/**
|
|
430
|
+
* Stable sort that pulls long-context models toward the front while
|
|
431
|
+
* preserving relative order within each group. Used only on hosts with
|
|
432
|
+
* the long-context RAM/VRAM headroom — the ladder order remains the
|
|
433
|
+
* tie-breaker so native-runtime preferences survive.
|
|
434
|
+
*/
|
|
435
|
+
function rankLadderByLongContext(ladder: CatalogModel[]): CatalogModel[] {
|
|
436
|
+
return ladder
|
|
437
|
+
.map((model, idx) => ({ model, idx, long: isLongContextModel(model) }))
|
|
438
|
+
.sort((left, right) => {
|
|
439
|
+
if (left.long !== right.long) return right.long ? 1 : -1;
|
|
440
|
+
return left.idx - right.idx;
|
|
441
|
+
})
|
|
442
|
+
.map((entry) => entry.model);
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// ---------------------------------------------------------------------------
|
|
446
|
+
// Default-eligibility on this device — the recommendation-engine gate that
|
|
447
|
+
// consults the bundle's `eliza-1.manifest.json` (`kernels.verifiedBackends`,
|
|
448
|
+
// `evals`, `defaultEligible`) against the device hardware + the bundle's
|
|
449
|
+
// on-device verify state. See `packages/inference/AGENTS.md` §6 + §7.
|
|
450
|
+
// ---------------------------------------------------------------------------
|
|
451
|
+
|
|
452
|
+
/**
|
|
453
|
+
* Project a `HardwareProbe` onto the `Eliza1DeviceCaps` shape the manifest
|
|
454
|
+
* validator's `canSetAsDefault` consumes. CPU is always available; a probed
|
|
455
|
+
* GPU contributes exactly its one backend (`cuda` / `metal` / `vulkan`). RAM
|
|
456
|
+
* is the device total, in MB — `canSetAsDefault` compares against the
|
|
457
|
+
* manifest's `ramBudgetMb.min` floor, not the headroom-discounted figure the
|
|
458
|
+
* ladder uses, because the floor is "will it boot at all".
|
|
459
|
+
*/
|
|
460
|
+
export function deviceCapsFromProbe(hardware: HardwareProbe): Eliza1DeviceCaps {
|
|
461
|
+
const backends: Eliza1Backend[] =
|
|
462
|
+
hardware.arch === "arm64" || hardware.arch === "arm"
|
|
463
|
+
? hardware.cpuFeatures?.neon === true
|
|
464
|
+
? ["cpu"]
|
|
465
|
+
: []
|
|
466
|
+
: ["cpu"];
|
|
467
|
+
if (hardware.gpu) backends.push(hardware.gpu.backend);
|
|
468
|
+
return {
|
|
469
|
+
availableBackends: backends,
|
|
470
|
+
ramMb: Math.round(hardware.totalRamGb * 1024),
|
|
471
|
+
cpuFeatures: hardware.cpuFeatures,
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
export type BundleDefaultEligibility =
|
|
476
|
+
| { canBeDefault: true }
|
|
477
|
+
| {
|
|
478
|
+
canBeDefault: false;
|
|
479
|
+
reason:
|
|
480
|
+
| "no-manifest"
|
|
481
|
+
| "not-default-eligible"
|
|
482
|
+
| "ram-below-floor"
|
|
483
|
+
| "kernels-unverified-on-device"
|
|
484
|
+
| "not-verified-on-device";
|
|
485
|
+
detail: string;
|
|
486
|
+
};
|
|
487
|
+
|
|
488
|
+
/**
|
|
489
|
+
* True iff this installed Eliza-1 bundle may be offered as the recommended
|
|
490
|
+
* default on this device. The full set of conditions (any one failing →
|
|
491
|
+
* not default):
|
|
492
|
+
*
|
|
493
|
+
* - the bundle ships a validated `eliza-1.manifest.json`,
|
|
494
|
+
* - the manifest is contract-valid (every required kernel declared, every
|
|
495
|
+
* required eval green for a strict release, lineage/files consistent —
|
|
496
|
+
* enforced by `canSetAsDefault` → `collectContractErrors`),
|
|
497
|
+
* - the device exposes at least one backend the manifest verified `pass` on
|
|
498
|
+
* out of the tier's supported set,
|
|
499
|
+
* - the device RAM meets the manifest's `ramBudgetMb.min` floor,
|
|
500
|
+
* - the bundle has passed the one-time on-device verify pass
|
|
501
|
+
* (`InstalledModel.bundleVerifiedAt` is set) — a materialized-but-unverified
|
|
502
|
+
* bundle is never auto-selected, per AGENTS.md §7.
|
|
503
|
+
*
|
|
504
|
+
* `manifest.defaultEligible: true` is NOT required at the gate level — a
|
|
505
|
+
* `base-v1-candidate` bundle that passes every above condition is allowed
|
|
506
|
+
* to fill an empty default slot. The recommender prefers a strict release
|
|
507
|
+
* (`defaultEligible: true`) over a candidate when both are installed.
|
|
508
|
+
*/
|
|
509
|
+
export function canBundleBeDefaultOnDevice(
|
|
510
|
+
installed: InstalledModel,
|
|
511
|
+
hardware: HardwareProbe,
|
|
512
|
+
options: { manifestLoader?: ManifestLoader } = {},
|
|
513
|
+
): BundleDefaultEligibility {
|
|
514
|
+
const loader = options.manifestLoader ?? defaultManifestLoader;
|
|
515
|
+
const manifest: Eliza1Manifest | null = loader(installed.id, installed);
|
|
516
|
+
if (!manifest) {
|
|
517
|
+
return {
|
|
518
|
+
canBeDefault: false,
|
|
519
|
+
reason: "no-manifest",
|
|
520
|
+
detail: `${installed.id}: no validated eliza-1.manifest.json next to the bundle`,
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
if (!installed.bundleVerifiedAt) {
|
|
524
|
+
return {
|
|
525
|
+
canBeDefault: false,
|
|
526
|
+
reason: "not-verified-on-device",
|
|
527
|
+
detail: `${installed.id}: bundle materialized but the on-device verify pass (load → 1-token text → 1-phrase voice → barge-in) has not run`,
|
|
528
|
+
};
|
|
529
|
+
}
|
|
530
|
+
const caps = deviceCapsFromProbe(hardware);
|
|
531
|
+
if (canSetAsDefault(manifest, caps)) return { canBeDefault: true };
|
|
532
|
+
|
|
533
|
+
// canSetAsDefault returned false — disambiguate why so the UI/log is precise.
|
|
534
|
+
if (manifest.ramBudgetMb.min > caps.ramMb) {
|
|
535
|
+
return {
|
|
536
|
+
canBeDefault: false,
|
|
537
|
+
reason: "ram-below-floor",
|
|
538
|
+
detail: `${installed.id}: device RAM ${caps.ramMb} MB is below the manifest floor ${manifest.ramBudgetMb.min} MB`,
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
const supported = new Set<Eliza1Backend>(
|
|
542
|
+
SUPPORTED_BACKENDS_BY_TIER[manifest.tier],
|
|
543
|
+
);
|
|
544
|
+
const verifiedOnDeviceBackend = caps.availableBackends.some(
|
|
545
|
+
(b) =>
|
|
546
|
+
supported.has(b) &&
|
|
547
|
+
manifest.kernels.verifiedBackends[b].status === "pass",
|
|
548
|
+
);
|
|
549
|
+
if (!verifiedOnDeviceBackend) {
|
|
550
|
+
const deviceBackends = caps.availableBackends.join(", ");
|
|
551
|
+
return {
|
|
552
|
+
canBeDefault: false,
|
|
553
|
+
reason: "kernels-unverified-on-device",
|
|
554
|
+
detail: `${installed.id}: no backend the device exposes (${deviceBackends}) has a 'pass' kernel-verify report in the manifest`,
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
// RAM ok, backend ok — the failure must be a manifest-contract path the
|
|
558
|
+
// validator caught (e.g. a required-eval gate not passed for a strict
|
|
559
|
+
// release, a lineage/files mismatch, an inconsistent provenance block).
|
|
560
|
+
// All contract failures make the bundle ineligible to be the device default.
|
|
561
|
+
return {
|
|
562
|
+
canBeDefault: false,
|
|
563
|
+
reason: "not-default-eligible",
|
|
564
|
+
detail: `${installed.id}: manifest failed the contract check (an eval gate, kernel-coverage rule, or lineage/files consistency rule)`,
|
|
565
|
+
};
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
export function selectRecommendedModels(
|
|
569
|
+
hardware: HardwareProbe,
|
|
570
|
+
catalog: CatalogModel[] = MODEL_CATALOG,
|
|
571
|
+
options: RecommendationOptions = {},
|
|
572
|
+
): Record<TextGenerationSlot, RecommendedModelSelection> {
|
|
573
|
+
return {
|
|
574
|
+
TEXT_SMALL: selectRecommendedModelForSlot(
|
|
575
|
+
"TEXT_SMALL",
|
|
576
|
+
hardware,
|
|
577
|
+
catalog,
|
|
578
|
+
options,
|
|
579
|
+
),
|
|
580
|
+
TEXT_LARGE: selectRecommendedModelForSlot(
|
|
581
|
+
"TEXT_LARGE",
|
|
582
|
+
hardware,
|
|
583
|
+
catalog,
|
|
584
|
+
options,
|
|
585
|
+
),
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
/**
|
|
590
|
+
* Pick the model the engine should auto-load on first run when no user
|
|
591
|
+
* preference exists. Always resolves to an Eliza-1 default-eligible
|
|
592
|
+
* tier — never a non-Eliza catalog entry, never a HF-search result.
|
|
593
|
+
*
|
|
594
|
+
* Resolution order:
|
|
595
|
+
* 1. `FIRST_RUN_DEFAULT_MODEL_ID` when present in the catalog, in the
|
|
596
|
+
* default-eligible set, and not marked `publishStatus: "pending"`.
|
|
597
|
+
* 2. The first default-eligible, non-pending chat entry in the catalog
|
|
598
|
+
* as a fallback when the preferred id is missing or its HF bundle
|
|
599
|
+
* isn't published yet (elizaOS/eliza#7629). The fall-through walks
|
|
600
|
+
* the catalog in order, so the maintainer can keep
|
|
601
|
+
* `FIRST_RUN_DEFAULT_MODEL_ID` pointed at the *intended* default
|
|
602
|
+
* while the publish pipeline catches up.
|
|
603
|
+
* 3. If every default-eligible tier is pending, last-resort to ANY
|
|
604
|
+
* default-eligible tier — the device download path will fail
|
|
605
|
+
* cleanly with a 404 rather than silently picking a private
|
|
606
|
+
* non-Eliza model.
|
|
607
|
+
*
|
|
608
|
+
* Returns null only when no default-eligible entry exists at all —
|
|
609
|
+
* which means the catalog is misconfigured and the caller should
|
|
610
|
+
* surface a hard error rather than degrade silently.
|
|
611
|
+
*/
|
|
612
|
+
export function recommendForFirstRun(
|
|
613
|
+
catalog: CatalogModel[] = MODEL_CATALOG,
|
|
614
|
+
): CatalogModel | null {
|
|
615
|
+
const byId = catalogById(catalog);
|
|
616
|
+
const isEligibleChat = (model: CatalogModel): boolean =>
|
|
617
|
+
!model.hiddenFromCatalog && DEFAULT_ELIGIBLE_MODEL_IDS.has(model.id);
|
|
618
|
+
const publishStatusFor = (model: CatalogModel): "published" | "pending" =>
|
|
619
|
+
model.publishStatus ?? eliza1TierPublishStatus(model.id as Eliza1TierId);
|
|
620
|
+
const isPublishedEligibleChat = (model: CatalogModel): boolean =>
|
|
621
|
+
isEligibleChat(model) && publishStatusFor(model) === "published";
|
|
622
|
+
|
|
623
|
+
const preferred = byId.get(FIRST_RUN_DEFAULT_MODEL_ID);
|
|
624
|
+
if (preferred && isPublishedEligibleChat(preferred)) return preferred;
|
|
625
|
+
|
|
626
|
+
// Preferred is missing or its bundle is still being published — walk the
|
|
627
|
+
// catalog for the first eligible chat tier whose bundle IS published.
|
|
628
|
+
const fallbackPublished = catalog.find(isPublishedEligibleChat);
|
|
629
|
+
if (fallbackPublished) return fallbackPublished;
|
|
630
|
+
|
|
631
|
+
// Every eligible tier is "pending" — last-resort to the preferred tier
|
|
632
|
+
// when it exists in the catalog, otherwise the first default-eligible
|
|
633
|
+
// chat entry. Either path lets the downloader emit a clear "manifest
|
|
634
|
+
// 404" message rather than silently picking a non-Eliza model.
|
|
635
|
+
if (preferred && isEligibleChat(preferred)) return preferred;
|
|
636
|
+
return catalog.find(isEligibleChat) ?? null;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
export function chooseSmallerFallbackModel(
|
|
640
|
+
currentModelId: string,
|
|
641
|
+
hardware: HardwareProbe,
|
|
642
|
+
slot: TextGenerationSlot = "TEXT_LARGE",
|
|
643
|
+
catalog: CatalogModel[] = MODEL_CATALOG,
|
|
644
|
+
options: RecommendationOptions = {},
|
|
645
|
+
): CatalogModel | null {
|
|
646
|
+
const byId = catalogById(catalog);
|
|
647
|
+
const current = byId.get(currentModelId);
|
|
648
|
+
const currentSize = current
|
|
649
|
+
? catalogDownloadSizeGb(current, catalog)
|
|
650
|
+
: Number.POSITIVE_INFINITY;
|
|
651
|
+
const platformClass = classifyRecommendationPlatform(hardware);
|
|
652
|
+
const budget = resolveBudgetOptions(options);
|
|
653
|
+
const ladderFallback = modelsFromLadder(
|
|
654
|
+
SLOT_LADDERS[platformClass][slot],
|
|
655
|
+
catalog,
|
|
656
|
+
)
|
|
657
|
+
.filter((model) => model.id !== currentModelId)
|
|
658
|
+
.filter((model) => catalogDownloadSizeGb(model, catalog) < currentSize)
|
|
659
|
+
.filter((model) =>
|
|
660
|
+
canFit(hardware, model, catalog, budgetOptionsForModel(model, budget)),
|
|
661
|
+
)[0];
|
|
662
|
+
if (ladderFallback) return ladderFallback;
|
|
663
|
+
|
|
664
|
+
return (
|
|
665
|
+
fallbackCandidates(slot, hardware, catalog, budget)
|
|
666
|
+
.filter((model) => model.id !== currentModelId)
|
|
667
|
+
.filter(
|
|
668
|
+
(model) => catalogDownloadSizeGb(model, catalog) < currentSize,
|
|
669
|
+
)[0] ?? null
|
|
670
|
+
);
|
|
671
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* On-disk registry of installed models.
|
|
3
|
+
*
|
|
4
|
+
* Two sources feed the registry:
|
|
5
|
+
* 1. Eliza-owned downloads (source: "eliza-download") — written on
|
|
6
|
+
* successful completion by the downloader.
|
|
7
|
+
* 2. External scans (source: "external-scan") — merged in at read time
|
|
8
|
+
* from `scanExternalModels()`. These are never persisted to the
|
|
9
|
+
* registry file; a rescan runs whenever we read.
|
|
10
|
+
*
|
|
11
|
+
* The JSON file only holds Eliza-owned entries. That way, if a user
|
|
12
|
+
* cleans up LM Studio models we don't show stale ghosts.
|
|
13
|
+
*/
|
|
14
|
+
import type { InstalledModel } from "./types";
|
|
15
|
+
/**
|
|
16
|
+
* Return all models currently usable: persisted Eliza downloads plus a
|
|
17
|
+
* fresh external-tool scan. External duplicates of Eliza-owned files are
|
|
18
|
+
* filtered out by path.
|
|
19
|
+
*/
|
|
20
|
+
export declare function listInstalledModels(): Promise<InstalledModel[]>;
|
|
21
|
+
/** Add or update a Eliza-owned entry. External entries are rejected. */
|
|
22
|
+
export declare function upsertElizaModel(model: InstalledModel): Promise<void>;
|
|
23
|
+
/** Mark an existing Eliza-owned model as most-recently-used. */
|
|
24
|
+
export declare function touchElizaModel(id: string): Promise<void>;
|
|
25
|
+
/**
|
|
26
|
+
* Delete a Eliza-owned model from the registry and from disk.
|
|
27
|
+
*
|
|
28
|
+
* Refuses if the model was discovered from another tool — Eliza must not
|
|
29
|
+
* touch files it doesn't own. Callers surface that refusal as a 4xx.
|
|
30
|
+
*/
|
|
31
|
+
export declare function removeElizaModel(id: string): Promise<{
|
|
32
|
+
removed: boolean;
|
|
33
|
+
reason?: "external" | "not-found";
|
|
34
|
+
}>;
|
|
35
|
+
//# sourceMappingURL=registry.d.ts.map
|