@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve a model's RAM budget and decide whether it fits the host.
|
|
3
|
+
*
|
|
4
|
+
* For installed Eliza-1 tiers WITH a published `eliza-1.manifest.json`
|
|
5
|
+
* on disk we prefer the manifest's `ramBudgetMb.{min, recommended}`
|
|
6
|
+
* block — that's what packages/inference/AGENTS.md §3 + §6 designate
|
|
7
|
+
* as the source of truth for per-bundle memory expectations. For every
|
|
8
|
+
* other model (non-Eliza-1, uninstalled tiers, or Eliza-1 bundles that
|
|
9
|
+
* predate the manifest publish) we fall back to the catalog scalar
|
|
10
|
+
* `model.minRamGb` for the floor and synthesize a `recommendedMb` from
|
|
11
|
+
* the floor plus the bundle's per-token KV-cache footprint at its default
|
|
12
|
+
* context window — the same `KvGeometry` figure `kv-spill.ts` uses. That
|
|
13
|
+
* fixes the degenerate `minMb == recommendedMb` catalog case: a long
|
|
14
|
+
* session grows into the KV cache, so "boots" and "runs comfortably" are
|
|
15
|
+
* genuinely different lines.
|
|
16
|
+
*
|
|
17
|
+
* The manifest read is best-effort: a missing or malformed manifest
|
|
18
|
+
* never throws — recommendation runs at runtime and a broken manifest
|
|
19
|
+
* must not crash the dashboard. Build-time gates live in the publish
|
|
20
|
+
* script (packages/training/scripts/manifest/eliza1_manifest.py) and
|
|
21
|
+
* the validator (`./manifest/validator.ts`).
|
|
22
|
+
*
|
|
23
|
+
* This module is the single source of truth for the "does model X fit a
|
|
24
|
+
* host with N MB usable RAM" decision (`assessRamFit`). Both the Model
|
|
25
|
+
* Hub recommender (`recommendation.ts`) and the model-load admission gate
|
|
26
|
+
* (`active-model.ts`) call it — neither re-derives the math.
|
|
27
|
+
*/
|
|
28
|
+
import { type Eliza1Manifest } from "./manifest";
|
|
29
|
+
import type { CatalogModel, InstalledModel, RamBudget } from "./types";
|
|
30
|
+
export declare function ramHeadroomReserveMb(): number;
|
|
31
|
+
export type { RamBudget } from "./types.js";
|
|
32
|
+
/**
|
|
33
|
+
* Loader contract — keeps the helper testable without touching disk.
|
|
34
|
+
* Production callers pass `defaultManifestLoader`; tests inject a fake loader.
|
|
35
|
+
*/
|
|
36
|
+
export type ManifestLoader = (modelId: string, installed: InstalledModel | undefined) => Eliza1Manifest | null;
|
|
37
|
+
/** Test-only: drop memoized manifests so fixtures with reused SHAs don't leak. */
|
|
38
|
+
export declare function __resetManifestCacheForTests(): void;
|
|
39
|
+
/**
|
|
40
|
+
* Production manifest loader — reads `eliza-1.manifest.json` from the
|
|
41
|
+
* installed bundle's directory. Two candidate paths are probed:
|
|
42
|
+
*
|
|
43
|
+
* 1. `dirname(dirname(model.path))` — the canonical bundle root when
|
|
44
|
+
* the GGUF lives in a `text/` subdir per AGENTS.md §2.
|
|
45
|
+
* 2. `dirname(model.path)` — flat layout used by some test fixtures
|
|
46
|
+
* and pre-bundle installs.
|
|
47
|
+
*
|
|
48
|
+
* Returns `null` for any failure: missing file, JSON parse error,
|
|
49
|
+
* manifest validation error, or tier mismatch.
|
|
50
|
+
*/
|
|
51
|
+
export declare function defaultManifestLoader(modelId: string, installed: InstalledModel | undefined): Eliza1Manifest | null;
|
|
52
|
+
/**
|
|
53
|
+
* Resolve a `RamBudget` for `model`, optionally consulting the on-disk
|
|
54
|
+
* manifest of an installed Eliza-1 bundle.
|
|
55
|
+
*
|
|
56
|
+
* `installed` and `manifestLoader` are both optional — passing neither
|
|
57
|
+
* always returns the catalog-scalar fallback. The recommendation engine
|
|
58
|
+
* passes both at call sites where it has the installed-models list.
|
|
59
|
+
*/
|
|
60
|
+
export declare function resolveRamBudget(model: CatalogModel, installed?: InstalledModel, manifestLoader?: ManifestLoader): RamBudget;
|
|
61
|
+
export interface RamFitOptions {
|
|
62
|
+
installed?: InstalledModel;
|
|
63
|
+
manifestLoader?: ManifestLoader;
|
|
64
|
+
/**
|
|
65
|
+
* Override the headroom reserved for the OS/runtime. Defaults to
|
|
66
|
+
* `ramHeadroomReserveMb()`. Pass 0 to assess against raw memory (the
|
|
67
|
+
* recommender does this — it works in "effective memory available to
|
|
68
|
+
* the model" terms, where the OS reserve is already discounted).
|
|
69
|
+
*/
|
|
70
|
+
reserveMb?: number;
|
|
71
|
+
}
|
|
72
|
+
export type RamFitLevel = "fits" | "tight" | "wontfit";
|
|
73
|
+
export interface RamFitDecision {
|
|
74
|
+
level: RamFitLevel;
|
|
75
|
+
/** True for `fits` and `tight`; false only for `wontfit`. */
|
|
76
|
+
fits: boolean;
|
|
77
|
+
budget: RamBudget;
|
|
78
|
+
/** Memory after subtracting the headroom reserve, in MB. */
|
|
79
|
+
usableMb: number;
|
|
80
|
+
/** Headroom reserve applied, in MB. */
|
|
81
|
+
reserveMb: number;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* The one fit decision. `hostRamMb` is the memory being assessed against,
|
|
85
|
+
* in megabytes (`os.totalmem() / 2**20`, or the probe's `totalRamGb * 1024`,
|
|
86
|
+
* or — for the recommender on a GPU host — the effective model-available
|
|
87
|
+
* memory in MB).
|
|
88
|
+
*
|
|
89
|
+
* - `wontfit` : usable RAM (host minus headroom) is below the bundle's
|
|
90
|
+
* boot floor (`budget.minMb`). A load MUST be refused.
|
|
91
|
+
* - `tight` : boots, but usable RAM is below the recommended budget —
|
|
92
|
+
* a long session will swap or stutter under load.
|
|
93
|
+
* - `fits` : comfortable headroom.
|
|
94
|
+
*/
|
|
95
|
+
export declare function assessRamFit(model: CatalogModel, hostRamMb: number, options?: RamFitOptions): RamFitDecision;
|
|
96
|
+
/**
|
|
97
|
+
* Given a catalog entry, pick the variant of the same model "line" (same
|
|
98
|
+
* param count and display-name stem — e.g. `eliza-1-27b` / `eliza-1-27b-256k`)
|
|
99
|
+
* with the largest context window that still fits a
|
|
100
|
+
* host with `hostRamMb` of RAM. Returns `model` itself when it's already
|
|
101
|
+
* the best fit (or the only variant), or `null` when not even `model` fits
|
|
102
|
+
* — callers turn that into a refusal.
|
|
103
|
+
*
|
|
104
|
+
* Variants are matched by `(params, displayNameStem)` where the stem is the
|
|
105
|
+
* display name with any trailing `-<ctx>` suffix (`-256k`, `-1m`, `-128k`)
|
|
106
|
+
* stripped. This keeps `eliza-1-27b*` together without conflating `9b` and
|
|
107
|
+
* `27b`.
|
|
108
|
+
*/
|
|
109
|
+
export declare function pickFittingContextVariant(model: CatalogModel, hostRamMb: number, options?: RamFitOptions, catalog?: ReadonlyArray<CatalogModel>): CatalogModel | null;
|
|
110
|
+
//# sourceMappingURL=ram-budget.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ram-budget.d.ts","sourceRoot":"","sources":["ram-budget.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,EAAE,KAAK,cAAc,EAAoB,MAAM,YAAY,CAAC;AACnE,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoBvE,wBAAgB,oBAAoB,IAAI,MAAM,CAO7C;AAED,YAAY,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAE5C;;;GAGG;AACH,MAAM,MAAM,cAAc,GAAG,CAC5B,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,cAAc,GAAG,SAAS,KACjC,cAAc,GAAG,IAAI,CAAC;AAmB3B,kFAAkF;AAClF,wBAAgB,4BAA4B,IAAI,IAAI,CAEnD;AAOD;;;;;;;;;;;GAWG;AACH,wBAAgB,qBAAqB,CACpC,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,cAAc,GAAG,SAAS,GACnC,cAAc,GAAG,IAAI,CAyCvB;AAkBD;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAC/B,KAAK,EAAE,YAAY,EACnB,SAAS,CAAC,EAAE,cAAc,EAC1B,cAAc,GAAE,cAAsC,GACpD,SAAS,CAiBX;AAED,MAAM,WAAW,aAAa;IAC7B,SAAS,CAAC,EAAE,cAAc,CAAC;IAC3B,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAC;AAEvD,MAAM,WAAW,cAAc;IAC9B,KAAK,EAAE,WAAW,CAAC;IACnB,6DAA6D;IAC7D,IAAI,EAAE,OAAO,CAAC;IACd,MAAM,EAAE,SAAS,CAAC;IAClB,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IACjB,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,YAAY,CAC3B,KAAK,EAAE,YAAY,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,aAAkB,GACzB,cAAc,CAmBhB;AAOD;;;;;;;;;;;;GAYG;AACH,wBAAgB,yBAAyB,CACxC,KAAK,EAAE,YAAY,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,aAAkB,EAC3B,OAAO,GAAE,aAAa,CAAC,YAAY,CAAiB,GAClD,YAAY,GAAG,IAAI,CAkBrB"}
|
|
Binary file
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ActiveModelState, CatalogModel, DownloadJob, InstalledModel, LocalInferenceReadiness, ModelAssignments } from "./types";
|
|
2
|
+
export declare function buildTextGenerationReadiness(input: {
|
|
3
|
+
assignments: ModelAssignments;
|
|
4
|
+
installed: InstalledModel[];
|
|
5
|
+
active: ActiveModelState;
|
|
6
|
+
downloads: DownloadJob[];
|
|
7
|
+
catalog?: CatalogModel[];
|
|
8
|
+
}): LocalInferenceReadiness;
|
|
9
|
+
//# sourceMappingURL=readiness.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"readiness.d.ts","sourceRoot":"","sources":["readiness.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACX,gBAAgB,EAChB,YAAY,EACZ,WAAW,EAEX,cAAc,EAEd,uBAAuB,EAEvB,gBAAgB,EAEhB,MAAM,SAAS,CAAC;AA8LjB,wBAAgB,4BAA4B,CAAC,KAAK,EAAE;IACnD,WAAW,EAAE,gBAAgB,CAAC;IAC9B,SAAS,EAAE,cAAc,EAAE,CAAC;IAC5B,MAAM,EAAE,gBAAgB,CAAC;IACzB,SAAS,EAAE,WAAW,EAAE,CAAC;IACzB,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;CACzB,GAAG,uBAAuB,CA4B1B"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildTextGenerationReadiness } from "./readiness";
|
|
3
|
+
import type { ActiveModelState, DownloadJob, InstalledModel } from "./types";
|
|
4
|
+
|
|
5
|
+
const activeIdle: ActiveModelState = {
|
|
6
|
+
modelId: null,
|
|
7
|
+
loadedAt: null,
|
|
8
|
+
status: "idle",
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
describe("local inference text readiness", () => {
|
|
12
|
+
it("reports assigned download terminal error state", () => {
|
|
13
|
+
const installed: InstalledModel[] = [];
|
|
14
|
+
const failedDownload: DownloadJob = {
|
|
15
|
+
jobId: "job-1",
|
|
16
|
+
modelId: "eliza-1-2b",
|
|
17
|
+
state: "failed",
|
|
18
|
+
received: 128,
|
|
19
|
+
total: 512,
|
|
20
|
+
bytesPerSec: 0,
|
|
21
|
+
etaMs: null,
|
|
22
|
+
startedAt: new Date().toISOString(),
|
|
23
|
+
updatedAt: new Date().toISOString(),
|
|
24
|
+
error: "HTTP 503 from HuggingFace",
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const readiness = buildTextGenerationReadiness({
|
|
28
|
+
assignments: {
|
|
29
|
+
TEXT_LARGE: "eliza-1-2b",
|
|
30
|
+
},
|
|
31
|
+
installed,
|
|
32
|
+
active: activeIdle,
|
|
33
|
+
downloads: [failedDownload],
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
expect(readiness.slots.TEXT_LARGE.assigned).toBe(true);
|
|
37
|
+
expect(readiness.slots.TEXT_LARGE.primaryDownloaded).toBe(false);
|
|
38
|
+
expect(readiness.slots.TEXT_LARGE.downloaded).toBe(false);
|
|
39
|
+
expect(readiness.slots.TEXT_LARGE.state).toBe("failed");
|
|
40
|
+
expect(readiness.slots.TEXT_LARGE.missingModelIds).toContain("eliza-1-2b");
|
|
41
|
+
expect(readiness.slots.TEXT_LARGE.download.percent).toBe(25);
|
|
42
|
+
expect(readiness.slots.TEXT_LARGE.errors).toContain(
|
|
43
|
+
"HTTP 503 from HuggingFace",
|
|
44
|
+
);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("marks a downloaded active assignment ready", () => {
|
|
48
|
+
const installed: InstalledModel[] = [
|
|
49
|
+
{
|
|
50
|
+
id: "eliza-1-2b",
|
|
51
|
+
displayName: "eliza-1-2b",
|
|
52
|
+
path: "/tmp/eliza-1-2b.gguf",
|
|
53
|
+
sizeBytes: 2048,
|
|
54
|
+
installedAt: new Date().toISOString(),
|
|
55
|
+
lastUsedAt: null,
|
|
56
|
+
source: "eliza-download",
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: "eliza-1-2b-drafter",
|
|
60
|
+
displayName: "eliza-1-2b drafter",
|
|
61
|
+
path: "/tmp/eliza-1-2b-drafter.gguf",
|
|
62
|
+
sizeBytes: 512,
|
|
63
|
+
installedAt: new Date().toISOString(),
|
|
64
|
+
lastUsedAt: null,
|
|
65
|
+
source: "eliza-download",
|
|
66
|
+
},
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
const readiness = buildTextGenerationReadiness({
|
|
70
|
+
assignments: {
|
|
71
|
+
TEXT_SMALL: "eliza-1-2b",
|
|
72
|
+
},
|
|
73
|
+
installed,
|
|
74
|
+
active: {
|
|
75
|
+
modelId: "eliza-1-2b",
|
|
76
|
+
loadedAt: new Date().toISOString(),
|
|
77
|
+
status: "ready",
|
|
78
|
+
},
|
|
79
|
+
downloads: [],
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
expect(readiness.slots.TEXT_SMALL.downloaded).toBe(true);
|
|
83
|
+
expect(readiness.slots.TEXT_SMALL.active).toBe(true);
|
|
84
|
+
expect(readiness.slots.TEXT_SMALL.ready).toBe(true);
|
|
85
|
+
expect(readiness.slots.TEXT_SMALL.state).toBe("active");
|
|
86
|
+
});
|
|
87
|
+
});
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { MODEL_CATALOG } from "./catalog";
|
|
2
|
+
import { catalogDownloadSizeBytes } from "./recommendation";
|
|
3
|
+
import type {
|
|
4
|
+
ActiveModelState,
|
|
5
|
+
CatalogModel,
|
|
6
|
+
DownloadJob,
|
|
7
|
+
DownloadState,
|
|
8
|
+
InstalledModel,
|
|
9
|
+
LocalInferenceDownloadStatus,
|
|
10
|
+
LocalInferenceReadiness,
|
|
11
|
+
LocalInferenceSlotReadiness,
|
|
12
|
+
ModelAssignments,
|
|
13
|
+
TextGenerationSlot,
|
|
14
|
+
} from "./types";
|
|
15
|
+
|
|
16
|
+
const TERMINAL_STATES = new Set<DownloadState>([
|
|
17
|
+
"completed",
|
|
18
|
+
"failed",
|
|
19
|
+
"cancelled",
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
function catalogById(catalog: CatalogModel[]): Map<string, CatalogModel> {
|
|
23
|
+
return new Map(catalog.map((model) => [model.id, model]));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function installedById(
|
|
27
|
+
installed: InstalledModel[],
|
|
28
|
+
): Map<string, InstalledModel> {
|
|
29
|
+
return new Map(installed.map((model) => [model.id, model]));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function latestJobByModel(downloads: DownloadJob[]): Map<string, DownloadJob> {
|
|
33
|
+
const jobs = new Map<string, DownloadJob>();
|
|
34
|
+
for (const job of downloads) {
|
|
35
|
+
const current = jobs.get(job.modelId);
|
|
36
|
+
if (!current || job.updatedAt.localeCompare(current.updatedAt) > 0) {
|
|
37
|
+
jobs.set(job.modelId, job);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return jobs;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function requiredModelIds(
|
|
44
|
+
assignedModelId: string,
|
|
45
|
+
catalog: Map<string, CatalogModel>,
|
|
46
|
+
): string[] {
|
|
47
|
+
const model = catalog.get(assignedModelId);
|
|
48
|
+
return model ? [assignedModelId] : [assignedModelId];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function statusFromJobs(
|
|
52
|
+
jobs: DownloadJob[],
|
|
53
|
+
missingModelIds: string[],
|
|
54
|
+
): LocalInferenceDownloadStatus {
|
|
55
|
+
const activeJobs = jobs.filter(
|
|
56
|
+
(job) => job.state === "queued" || job.state === "downloading",
|
|
57
|
+
);
|
|
58
|
+
const terminalJobs = jobs.filter((job) => TERMINAL_STATES.has(job.state));
|
|
59
|
+
const relevantJobs = activeJobs.length > 0 ? activeJobs : terminalJobs;
|
|
60
|
+
const state =
|
|
61
|
+
relevantJobs.find((job) => job.state === "failed")?.state ??
|
|
62
|
+
relevantJobs.find((job) => job.state === "cancelled")?.state ??
|
|
63
|
+
relevantJobs.find((job) => job.state === "downloading")?.state ??
|
|
64
|
+
relevantJobs.find((job) => job.state === "queued")?.state ??
|
|
65
|
+
relevantJobs.find((job) => job.state === "completed")?.state ??
|
|
66
|
+
(missingModelIds.length > 0 ? "missing" : "completed");
|
|
67
|
+
const receivedBytes = relevantJobs.reduce(
|
|
68
|
+
(total, job) => total + job.received,
|
|
69
|
+
0,
|
|
70
|
+
);
|
|
71
|
+
const totalBytes = relevantJobs.reduce((total, job) => total + job.total, 0);
|
|
72
|
+
const bytesPerSec = activeJobs.reduce(
|
|
73
|
+
(total, job) => total + job.bytesPerSec,
|
|
74
|
+
0,
|
|
75
|
+
);
|
|
76
|
+
const etaValues = activeJobs
|
|
77
|
+
.map((job) => job.etaMs)
|
|
78
|
+
.filter((etaMs): etaMs is number => etaMs !== null);
|
|
79
|
+
const etaMs =
|
|
80
|
+
etaValues.length > 0
|
|
81
|
+
? Math.max(...etaValues)
|
|
82
|
+
: (relevantJobs[0]?.etaMs ?? null);
|
|
83
|
+
const updatedAt =
|
|
84
|
+
relevantJobs
|
|
85
|
+
.map((job) => job.updatedAt)
|
|
86
|
+
.sort((left, right) => right.localeCompare(left))[0] ?? null;
|
|
87
|
+
const errors = relevantJobs.flatMap((job) => (job.error ? [job.error] : []));
|
|
88
|
+
return {
|
|
89
|
+
state,
|
|
90
|
+
receivedBytes,
|
|
91
|
+
totalBytes,
|
|
92
|
+
percent:
|
|
93
|
+
totalBytes > 0
|
|
94
|
+
? Math.max(
|
|
95
|
+
0,
|
|
96
|
+
Math.min(100, Math.round((receivedBytes / totalBytes) * 1000) / 10),
|
|
97
|
+
)
|
|
98
|
+
: null,
|
|
99
|
+
bytesPerSec,
|
|
100
|
+
etaMs,
|
|
101
|
+
updatedAt,
|
|
102
|
+
errors,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function slotReadiness(
|
|
107
|
+
slot: TextGenerationSlot,
|
|
108
|
+
assignments: ModelAssignments,
|
|
109
|
+
installed: Map<string, InstalledModel>,
|
|
110
|
+
catalog: Map<string, CatalogModel>,
|
|
111
|
+
catalogList: CatalogModel[],
|
|
112
|
+
downloads: Map<string, DownloadJob>,
|
|
113
|
+
active: ActiveModelState,
|
|
114
|
+
): LocalInferenceSlotReadiness {
|
|
115
|
+
const assignedModelId = assignments[slot] ?? null;
|
|
116
|
+
if (!assignedModelId) {
|
|
117
|
+
return {
|
|
118
|
+
slot,
|
|
119
|
+
assigned: false,
|
|
120
|
+
assignedModelId: null,
|
|
121
|
+
displayName: null,
|
|
122
|
+
primaryDownloaded: false,
|
|
123
|
+
downloaded: false,
|
|
124
|
+
active: false,
|
|
125
|
+
ready: false,
|
|
126
|
+
state: "unassigned",
|
|
127
|
+
requiredModelIds: [],
|
|
128
|
+
missingModelIds: [],
|
|
129
|
+
installedBytes: 0,
|
|
130
|
+
expectedBytes: 0,
|
|
131
|
+
download: {
|
|
132
|
+
state: "missing",
|
|
133
|
+
receivedBytes: 0,
|
|
134
|
+
totalBytes: 0,
|
|
135
|
+
percent: null,
|
|
136
|
+
bytesPerSec: 0,
|
|
137
|
+
etaMs: null,
|
|
138
|
+
updatedAt: null,
|
|
139
|
+
errors: [],
|
|
140
|
+
},
|
|
141
|
+
errors: [],
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const ids = requiredModelIds(assignedModelId, catalog);
|
|
146
|
+
const missingModelIds = ids.filter((id) => !installed.has(id));
|
|
147
|
+
const primaryDownloaded = installed.has(assignedModelId);
|
|
148
|
+
const downloaded = missingModelIds.length === 0;
|
|
149
|
+
const activeReady =
|
|
150
|
+
active.modelId === assignedModelId && active.status === "ready";
|
|
151
|
+
const model = catalog.get(assignedModelId);
|
|
152
|
+
const jobs = ids.flatMap((id) => {
|
|
153
|
+
const job = downloads.get(id);
|
|
154
|
+
return job ? [job] : [];
|
|
155
|
+
});
|
|
156
|
+
const download = statusFromJobs(jobs, missingModelIds);
|
|
157
|
+
const activeError =
|
|
158
|
+
active.modelId === assignedModelId &&
|
|
159
|
+
active.status === "error" &&
|
|
160
|
+
active.error
|
|
161
|
+
? [active.error]
|
|
162
|
+
: [];
|
|
163
|
+
const errors = [...download.errors, ...activeError];
|
|
164
|
+
const hasActiveJob = jobs.some(
|
|
165
|
+
(job) => job.state === "queued" || job.state === "downloading",
|
|
166
|
+
);
|
|
167
|
+
const terminalFailure = jobs.find(
|
|
168
|
+
(job) => job.state === "failed" || job.state === "cancelled",
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
slot,
|
|
173
|
+
assigned: true,
|
|
174
|
+
assignedModelId,
|
|
175
|
+
displayName:
|
|
176
|
+
model?.displayName ?? installed.get(assignedModelId)?.displayName ?? null,
|
|
177
|
+
primaryDownloaded,
|
|
178
|
+
downloaded,
|
|
179
|
+
active: activeReady,
|
|
180
|
+
ready: downloaded && activeReady,
|
|
181
|
+
state: terminalFailure
|
|
182
|
+
? terminalFailure.state === "failed"
|
|
183
|
+
? "failed"
|
|
184
|
+
: "cancelled"
|
|
185
|
+
: hasActiveJob
|
|
186
|
+
? "downloading"
|
|
187
|
+
: activeReady
|
|
188
|
+
? "active"
|
|
189
|
+
: downloaded
|
|
190
|
+
? "downloaded"
|
|
191
|
+
: "missing",
|
|
192
|
+
requiredModelIds: ids,
|
|
193
|
+
missingModelIds,
|
|
194
|
+
installedBytes: ids.reduce(
|
|
195
|
+
(total, id) => total + (installed.get(id)?.sizeBytes ?? 0),
|
|
196
|
+
0,
|
|
197
|
+
),
|
|
198
|
+
expectedBytes: model ? catalogDownloadSizeBytes(model, catalogList) : 0,
|
|
199
|
+
download,
|
|
200
|
+
errors,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
export function buildTextGenerationReadiness(input: {
|
|
205
|
+
assignments: ModelAssignments;
|
|
206
|
+
installed: InstalledModel[];
|
|
207
|
+
active: ActiveModelState;
|
|
208
|
+
downloads: DownloadJob[];
|
|
209
|
+
catalog?: CatalogModel[];
|
|
210
|
+
}): LocalInferenceReadiness {
|
|
211
|
+
const catalogList = input.catalog ?? MODEL_CATALOG;
|
|
212
|
+
const catalog = catalogById(catalogList);
|
|
213
|
+
const installed = installedById(input.installed);
|
|
214
|
+
const downloads = latestJobByModel(input.downloads);
|
|
215
|
+
return {
|
|
216
|
+
updatedAt: new Date().toISOString(),
|
|
217
|
+
slots: {
|
|
218
|
+
TEXT_SMALL: slotReadiness(
|
|
219
|
+
"TEXT_SMALL",
|
|
220
|
+
input.assignments,
|
|
221
|
+
installed,
|
|
222
|
+
catalog,
|
|
223
|
+
catalogList,
|
|
224
|
+
downloads,
|
|
225
|
+
input.active,
|
|
226
|
+
),
|
|
227
|
+
TEXT_LARGE: slotReadiness(
|
|
228
|
+
"TEXT_LARGE",
|
|
229
|
+
input.assignments,
|
|
230
|
+
installed,
|
|
231
|
+
catalog,
|
|
232
|
+
catalogList,
|
|
233
|
+
downloads,
|
|
234
|
+
input.active,
|
|
235
|
+
),
|
|
236
|
+
},
|
|
237
|
+
};
|
|
238
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { type Eliza1DeviceCaps } from "./manifest";
|
|
2
|
+
import { type ManifestLoader } from "./ram-budget";
|
|
3
|
+
import type { CatalogModel, CatalogQuantizationVariant, HardwareFitLevel, HardwareProbe, InstalledModel, TextGenerationSlot } from "./types";
|
|
4
|
+
export type RecommendationPlatformClass = "mobile" | "apple-silicon" | "linux-gpu" | "linux-cpu" | "desktop-gpu" | "desktop-cpu";
|
|
5
|
+
export interface RecommendedModelSelection {
|
|
6
|
+
slot: TextGenerationSlot;
|
|
7
|
+
platformClass: RecommendationPlatformClass;
|
|
8
|
+
model: CatalogModel | null;
|
|
9
|
+
fit: HardwareFitLevel | null;
|
|
10
|
+
reason: string;
|
|
11
|
+
alternatives: CatalogModel[];
|
|
12
|
+
}
|
|
13
|
+
export declare function classifyRecommendationPlatform(hardware: HardwareProbe): RecommendationPlatformClass;
|
|
14
|
+
export declare function catalogDownloadSizeGb(model: CatalogModel, catalog?: CatalogModel[]): number;
|
|
15
|
+
export declare function catalogDownloadSizeBytes(model: CatalogModel, catalog?: CatalogModel[]): number;
|
|
16
|
+
export declare function selectBestQuantizationVariant(model: CatalogModel): CatalogQuantizationVariant | null;
|
|
17
|
+
export declare function assessCatalogModelFit(hardware: HardwareProbe, model: CatalogModel, catalog?: CatalogModel[], options?: {
|
|
18
|
+
installed?: InstalledModel;
|
|
19
|
+
manifestLoader?: ManifestLoader;
|
|
20
|
+
}): HardwareFitLevel;
|
|
21
|
+
export interface RecommendationOptions {
|
|
22
|
+
/**
|
|
23
|
+
* Kernels actually advertised by the installed llama-server binary
|
|
24
|
+
* (parsed from CAPABILITIES.json next to it). When provided, models
|
|
25
|
+
* declaring `requiresKernel` not satisfied by this map are filtered
|
|
26
|
+
* out so we don't recommend a model the user can't actually run on
|
|
27
|
+
* this binary. Pass null/omit when no probe is available — recommender
|
|
28
|
+
* trusts the catalog and the dispatcher's load-time check.
|
|
29
|
+
*/
|
|
30
|
+
binaryKernels?: Partial<Record<string, boolean>> | null;
|
|
31
|
+
/**
|
|
32
|
+
* Models the user has already installed. When an Eliza-1 tier in this
|
|
33
|
+
* list has a published `eliza-1.manifest.json` next to its bundle,
|
|
34
|
+
* the recommender consults `manifest.ramBudgetMb` instead of the
|
|
35
|
+
* catalog's coarse `minRamGb` scalar. See `./ram-budget.ts`.
|
|
36
|
+
*/
|
|
37
|
+
installed?: ReadonlyArray<InstalledModel>;
|
|
38
|
+
/**
|
|
39
|
+
* Test-only override for the manifest reader. Production callers leave
|
|
40
|
+
* this unset and the helper reads `eliza-1.manifest.json` from disk.
|
|
41
|
+
*/
|
|
42
|
+
manifestLoader?: ManifestLoader;
|
|
43
|
+
}
|
|
44
|
+
export declare function selectRecommendedModelForSlot(slot: TextGenerationSlot, hardware: HardwareProbe, catalog?: CatalogModel[], options?: RecommendationOptions): RecommendedModelSelection;
|
|
45
|
+
/**
|
|
46
|
+
* Project a `HardwareProbe` onto the `Eliza1DeviceCaps` shape the manifest
|
|
47
|
+
* validator's `canSetAsDefault` consumes. CPU is always available; a probed
|
|
48
|
+
* GPU contributes exactly its one backend (`cuda` / `metal` / `vulkan`). RAM
|
|
49
|
+
* is the device total, in MB — `canSetAsDefault` compares against the
|
|
50
|
+
* manifest's `ramBudgetMb.min` floor, not the headroom-discounted figure the
|
|
51
|
+
* ladder uses, because the floor is "will it boot at all".
|
|
52
|
+
*/
|
|
53
|
+
export declare function deviceCapsFromProbe(hardware: HardwareProbe): Eliza1DeviceCaps;
|
|
54
|
+
export type BundleDefaultEligibility = {
|
|
55
|
+
canBeDefault: true;
|
|
56
|
+
} | {
|
|
57
|
+
canBeDefault: false;
|
|
58
|
+
reason: "no-manifest" | "not-default-eligible" | "ram-below-floor" | "kernels-unverified-on-device" | "not-verified-on-device";
|
|
59
|
+
detail: string;
|
|
60
|
+
};
|
|
61
|
+
/**
|
|
62
|
+
* True iff this installed Eliza-1 bundle may be offered as the recommended
|
|
63
|
+
* default on this device. The full set of conditions (any one failing →
|
|
64
|
+
* not default):
|
|
65
|
+
*
|
|
66
|
+
* - the bundle ships a validated `eliza-1.manifest.json`,
|
|
67
|
+
* - the manifest is contract-valid (every required kernel declared, every
|
|
68
|
+
* required eval green for a strict release, lineage/files consistent —
|
|
69
|
+
* enforced by `canSetAsDefault` → `collectContractErrors`),
|
|
70
|
+
* - the device exposes at least one backend the manifest verified `pass` on
|
|
71
|
+
* out of the tier's supported set,
|
|
72
|
+
* - the device RAM meets the manifest's `ramBudgetMb.min` floor,
|
|
73
|
+
* - the bundle has passed the one-time on-device verify pass
|
|
74
|
+
* (`InstalledModel.bundleVerifiedAt` is set) — a materialized-but-unverified
|
|
75
|
+
* bundle is never auto-selected, per AGENTS.md §7.
|
|
76
|
+
*
|
|
77
|
+
* `manifest.defaultEligible: true` is NOT required at the gate level — a
|
|
78
|
+
* `base-v1-candidate` bundle that passes every above condition is allowed
|
|
79
|
+
* to fill an empty default slot. The recommender prefers a strict release
|
|
80
|
+
* (`defaultEligible: true`) over a candidate when both are installed.
|
|
81
|
+
*/
|
|
82
|
+
export declare function canBundleBeDefaultOnDevice(installed: InstalledModel, hardware: HardwareProbe, options?: {
|
|
83
|
+
manifestLoader?: ManifestLoader;
|
|
84
|
+
}): BundleDefaultEligibility;
|
|
85
|
+
export declare function selectRecommendedModels(hardware: HardwareProbe, catalog?: CatalogModel[], options?: RecommendationOptions): Record<TextGenerationSlot, RecommendedModelSelection>;
|
|
86
|
+
/**
|
|
87
|
+
* Pick the model the engine should auto-load on first run when no user
|
|
88
|
+
* preference exists. Always resolves to an Eliza-1 default-eligible
|
|
89
|
+
* tier — never a non-Eliza catalog entry, never a HF-search result.
|
|
90
|
+
*
|
|
91
|
+
* Resolution order:
|
|
92
|
+
* 1. `FIRST_RUN_DEFAULT_MODEL_ID` when present in the catalog, in the
|
|
93
|
+
* default-eligible set, and not marked `publishStatus: "pending"`.
|
|
94
|
+
* 2. The first default-eligible, non-pending chat entry in the catalog
|
|
95
|
+
* as a fallback when the preferred id is missing or its HF bundle
|
|
96
|
+
* isn't published yet (elizaOS/eliza#7629). The fall-through walks
|
|
97
|
+
* the catalog in order, so the maintainer can keep
|
|
98
|
+
* `FIRST_RUN_DEFAULT_MODEL_ID` pointed at the *intended* default
|
|
99
|
+
* while the publish pipeline catches up.
|
|
100
|
+
* 3. If every default-eligible tier is pending, last-resort to ANY
|
|
101
|
+
* default-eligible tier — the device download path will fail
|
|
102
|
+
* cleanly with a 404 rather than silently picking a private
|
|
103
|
+
* non-Eliza model.
|
|
104
|
+
*
|
|
105
|
+
* Returns null only when no default-eligible entry exists at all —
|
|
106
|
+
* which means the catalog is misconfigured and the caller should
|
|
107
|
+
* surface a hard error rather than degrade silently.
|
|
108
|
+
*/
|
|
109
|
+
export declare function recommendForFirstRun(catalog?: CatalogModel[]): CatalogModel | null;
|
|
110
|
+
export declare function chooseSmallerFallbackModel(currentModelId: string, hardware: HardwareProbe, slot?: TextGenerationSlot, catalog?: CatalogModel[], options?: RecommendationOptions): CatalogModel | null;
|
|
111
|
+
//# sourceMappingURL=recommendation.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recommendation.d.ts","sourceRoot":"","sources":["recommendation.ts"],"names":[],"mappings":"AAOA,OAAO,EAGN,KAAK,gBAAgB,EAGrB,MAAM,YAAY,CAAC;AACpB,OAAO,EAGN,KAAK,cAAc,EACnB,MAAM,cAAc,CAAC;AACtB,OAAO,KAAK,EACX,YAAY,EACZ,0BAA0B,EAC1B,gBAAgB,EAChB,aAAa,EACb,cAAc,EACd,kBAAkB,EAClB,MAAM,SAAS,CAAC;AAOjB,MAAM,MAAM,2BAA2B,GACpC,QAAQ,GACR,eAAe,GACf,WAAW,GACX,WAAW,GACX,aAAa,GACb,aAAa,CAAC;AAEjB,MAAM,WAAW,yBAAyB;IACzC,IAAI,EAAE,kBAAkB,CAAC;IACzB,aAAa,EAAE,2BAA2B,CAAC;IAC3C,KAAK,EAAE,YAAY,GAAG,IAAI,CAAC;IAC3B,GAAG,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,YAAY,EAAE,CAAC;CAC7B;AAiDD,wBAAgB,8BAA8B,CAC7C,QAAQ,EAAE,aAAa,GACrB,2BAA2B,CAgB7B;AAED,wBAAgB,qBAAqB,CACpC,KAAK,EAAE,YAAY,EACnB,OAAO,GAAE,YAAY,EAAkB,GACrC,MAAM,CAGR;AAED,wBAAgB,wBAAwB,CACvC,KAAK,EAAE,YAAY,EACnB,OAAO,GAAE,YAAY,EAAkB,GACrC,MAAM,CAER;AAED,wBAAgB,6BAA6B,CAC5C,KAAK,EAAE,YAAY,GACjB,0BAA0B,GAAG,IAAI,CAWnC;AA0CD,wBAAgB,qBAAqB,CACpC,QAAQ,EAAE,aAAa,EACvB,KAAK,EAAE,YAAY,EACnB,OAAO,GAAE,YAAY,EAAkB,EACvC,OAAO,GAAE;IAAE,SAAS,CAAC,EAAE,cAAc,CAAC;IAAC,cAAc,CAAC,EAAE,cAAc,CAAA;CAAO,GAC3E,gBAAgB,CAiBlB;AAuHD,MAAM,WAAW,qBAAqB;IACrC;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC;IACxD;;;;;OAKG;IACH,SAAS,CAAC,EAAE,aAAa,CAAC,cAAc,CAAC,CAAC;IAC1C;;;OAGG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC;CAChC;AAwBD,wBAAgB,6BAA6B,CAC5C,IAAI,EAAE,kBAAkB,EACxB,QAAQ,EAAE,aAAa,EACvB,OAAO,GAAE,YAAY,EAAkB,EACvC,OAAO,GAAE,qBAA0B,GACjC,yBAAyB,CA+C3B;AAyBD;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,GAAG,gBAAgB,CAa7E;AAED,MAAM,MAAM,wBAAwB,GACjC;IAAE,YAAY,EAAE,IAAI,CAAA;CAAE,GACtB;IACA,YAAY,EAAE,KAAK,CAAC;IACpB,MAAM,EACH,aAAa,GACb,sBAAsB,GACtB,iBAAiB,GACjB,8BAA8B,GAC9B,wBAAwB,CAAC;IAC5B,MAAM,EAAE,MAAM,CAAC;CACd,CAAC;AAEL;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,0BAA0B,CACzC,SAAS,EAAE,cAAc,EACzB,QAAQ,EAAE,aAAa,EACvB,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,cAAc,CAAA;CAAO,GAC/C,wBAAwB,CAqD1B;AAED,wBAAgB,uBAAuB,CACtC,QAAQ,EAAE,aAAa,EACvB,OAAO,GAAE,YAAY,EAAkB,EACvC,OAAO,GAAE,qBAA0B,GACjC,MAAM,CAAC,kBAAkB,EAAE,yBAAyB,CAAC,CAevD;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,oBAAoB,CACnC,OAAO,GAAE,YAAY,EAAkB,GACrC,YAAY,GAAG,IAAI,CAuBrB;AAED,wBAAgB,0BAA0B,CACzC,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,aAAa,EACvB,IAAI,GAAE,kBAAiC,EACvC,OAAO,GAAE,YAAY,EAAkB,EACvC,OAAO,GAAE,qBAA0B,GACjC,YAAY,GAAG,IAAI,CA0BrB"}
|