@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CPU-offloaded KV-cache spill policy.
|
|
3
|
+
*
|
|
4
|
+
* packages/inference/AGENTS.md §3 item 7 mandates that for context > 64k on a
|
|
5
|
+
* device whose RAM cannot hold the full KV cache, the runtime MUST implement
|
|
6
|
+
* *spill* — keep the hot KV pages resident, page the cold ones out to CPU RAM
|
|
7
|
+
* (or, when even that is insufficient, to disk) — rather than refusing the
|
|
8
|
+
* request. AGENTS.md §3 "Failure handling" is equally explicit that the spill
|
|
9
|
+
* is gated by a real latency budget: a device where paging the cold KV back in
|
|
10
|
+
* would miss the voice first-audio-latency target must HARD-FAIL with a
|
|
11
|
+
* structured error, not silently serve a slow session.
|
|
12
|
+
*
|
|
13
|
+
* This module is the policy core. It is pure arithmetic — no llama-server
|
|
14
|
+
* process management, no native binding. `ffi-streaming-backend.ts` consults
|
|
15
|
+
* `planKvSpill()` at activation time:
|
|
16
|
+
* - `mode: "resident"` → no spill needed; load normally.
|
|
17
|
+
* - `mode: "spill"` → pass the resulting `residentPages` /
|
|
18
|
+
* `spillBytes` / tier ("cpu" | "disk") down to the
|
|
19
|
+
* backend as a `--kv-spill` hint.
|
|
20
|
+
* - `mode: "unsupported"` → throw `KvSpillUnsupportedError` so the engine
|
|
21
|
+
* surfaces a structured 4xx to the UI.
|
|
22
|
+
*
|
|
23
|
+
* Model parameters (page size, per-page bandwidth, voice latency budget) are
|
|
24
|
+
* documented constants below — the only "measured" inputs are the device's
|
|
25
|
+
* memory bandwidth class and the KV geometry of the loaded bundle. We do not
|
|
26
|
+
* pretend to micro-benchmark the disk here; the bandwidth tiers are coarse
|
|
27
|
+
* and conservative, and the gate fails *closed*.
|
|
28
|
+
*/
|
|
29
|
+
import type { RamBudget } from "./types";
|
|
30
|
+
/** Context length below which spill never applies (AGENTS.md §3 item 7). */
|
|
31
|
+
export declare const KV_SPILL_MIN_CONTEXT = 65536;
|
|
32
|
+
/**
|
|
33
|
+
* KV-cache page granularity, in tokens. The runtime evicts/restores KV in
|
|
34
|
+
* page units, not per-token, so spill accounting is page-aligned. 256 tokens
|
|
35
|
+
* is the buun-llama-cpp fork's default `--kv-page-size` for the spillable
|
|
36
|
+
* cache; keep this in sync if that default changes.
|
|
37
|
+
*/
|
|
38
|
+
export declare const KV_PAGE_TOKENS = 256;
|
|
39
|
+
/**
|
|
40
|
+
* First-audio-latency budget for voice mode, in milliseconds. The streaming
|
|
41
|
+
* contract (AGENTS.md §4) wants the phrase chunker handing the first chunk to
|
|
42
|
+
* TTS inside a scheduler tick; a cold KV restore at decode time eats directly
|
|
43
|
+
* into this budget. If the worst-case restore for the spilled pages exceeds
|
|
44
|
+
* this, spill is not viable for a voice-enabled bundle and we hard-fail.
|
|
45
|
+
*
|
|
46
|
+
* Text-only bundles get the looser `KV_SPILL_TEXT_LATENCY_BUDGET_MS`.
|
|
47
|
+
*/
|
|
48
|
+
export declare const KV_SPILL_VOICE_LATENCY_BUDGET_MS = 200;
|
|
49
|
+
export declare const KV_SPILL_TEXT_LATENCY_BUDGET_MS = 1500;
|
|
50
|
+
/**
|
|
51
|
+
* Effective KV transfer bandwidth back into the attention kernel, by storage
|
|
52
|
+
* tier and host class, in bytes per millisecond (≈ GB/s). Conservative — the
|
|
53
|
+
* gate fails closed, so under-estimating bandwidth only makes us refuse more
|
|
54
|
+
* aggressively, never serve something too slow.
|
|
55
|
+
*
|
|
56
|
+
* - `cpu`/`apple` : Apple Silicon shared memory — "spilling to CPU" is
|
|
57
|
+
* mostly an accounting move (same physical RAM, different
|
|
58
|
+
* residency bookkeeping); effective restore bandwidth is
|
|
59
|
+
* high.
|
|
60
|
+
* - `cpu`/`pcie` : discrete-GPU x86 — cold KV pages live in host RAM and
|
|
61
|
+
* ride the PCIe bus back to VRAM. PCIe 4.0 x16 ≈ 25 GB/s
|
|
62
|
+
* after framing; we budget 12.
|
|
63
|
+
* - `disk`/`nvme` : NVMe SSD — sequential read ≈ 3 GB/s; we budget 1.5.
|
|
64
|
+
* - `disk`/`sata` : SATA SSD / spinning rust fallback — ≈ 0.4 GB/s; we
|
|
65
|
+
* budget 0.25. (Mostly here so the math is defined; in
|
|
66
|
+
* practice this tier fails the gate immediately.)
|
|
67
|
+
*/
|
|
68
|
+
declare const KV_RESTORE_BANDWIDTH_BYTES_PER_MS: {
|
|
69
|
+
readonly "cpu-apple": 40000000;
|
|
70
|
+
readonly "cpu-pcie": 12000000;
|
|
71
|
+
readonly "disk-nvme": 1500000;
|
|
72
|
+
readonly "disk-sata": 250000;
|
|
73
|
+
};
|
|
74
|
+
export type KvRestoreClass = keyof typeof KV_RESTORE_BANDWIDTH_BYTES_PER_MS;
|
|
75
|
+
/**
|
|
76
|
+
* Per-token KV-cache footprint of a loaded bundle, summed across all
|
|
77
|
+
* full-attention layers, for the *quantized* cache it actually ships with
|
|
78
|
+
* (QJL K + PolarQuant/TurboQuant V — see packages/training/AGENTS.md §3).
|
|
79
|
+
* Callers derive this from the bundle's manifest / catalog runtime block;
|
|
80
|
+
* `estimateQuantizedKvBytesPerToken()` is the fallback when only the param
|
|
81
|
+
* count is known.
|
|
82
|
+
*/
|
|
83
|
+
export interface KvGeometry {
|
|
84
|
+
/** Bytes of compressed KV the cache grows by, per generated token. */
|
|
85
|
+
bytesPerToken: number;
|
|
86
|
+
/** True when the loaded bundle has voice enabled (tighter latency gate). */
|
|
87
|
+
voiceEnabled: boolean;
|
|
88
|
+
}
|
|
89
|
+
export declare function estimateQuantizedKvBytesPerToken(params: string): number;
|
|
90
|
+
/**
|
|
91
|
+
* Where the spilled pages land. `"cpu"` = host RAM (still RAM, just not
|
|
92
|
+
* counted against the resident budget); `"disk"` = the local-inference cache
|
|
93
|
+
* directory on persistent storage.
|
|
94
|
+
*/
|
|
95
|
+
export type KvSpillTier = "cpu" | "disk";
|
|
96
|
+
export interface KvSpillPlanResident {
|
|
97
|
+
mode: "resident";
|
|
98
|
+
/** The whole KV cache fits in the resident budget; nothing spills. */
|
|
99
|
+
totalKvBytes: number;
|
|
100
|
+
residentBytes: number;
|
|
101
|
+
}
|
|
102
|
+
export interface KvSpillPlanSpill {
|
|
103
|
+
mode: "spill";
|
|
104
|
+
tier: KvSpillTier;
|
|
105
|
+
/** Pages kept resident (the hot tail of the context). */
|
|
106
|
+
residentPages: number;
|
|
107
|
+
/** Pages paged out to `tier`. */
|
|
108
|
+
spillPages: number;
|
|
109
|
+
/** Bytes of KV held resident. */
|
|
110
|
+
residentBytes: number;
|
|
111
|
+
/** Bytes of KV spilled to `tier`. */
|
|
112
|
+
spillBytes: number;
|
|
113
|
+
/** Total compressed KV footprint at full context. */
|
|
114
|
+
totalKvBytes: number;
|
|
115
|
+
/** Worst-case latency to restore one cold page, in ms. */
|
|
116
|
+
worstCaseRestoreMs: number;
|
|
117
|
+
/** The latency budget this plan was checked against, in ms. */
|
|
118
|
+
latencyBudgetMs: number;
|
|
119
|
+
}
|
|
120
|
+
export type KvSpillPlan = KvSpillPlanResident | KvSpillPlanSpill;
|
|
121
|
+
/**
|
|
122
|
+
* Structured error thrown when spill cannot meet the latency budget. The
|
|
123
|
+
* engine catches this and surfaces it to the UI as a 4xx with `code` and
|
|
124
|
+
* `details` intact — there is NO silent-slow fallback (AGENTS.md §3).
|
|
125
|
+
*/
|
|
126
|
+
export declare class KvSpillUnsupportedError extends Error {
|
|
127
|
+
readonly code = "kv-spill-unsupported";
|
|
128
|
+
readonly details: {
|
|
129
|
+
requestedContext: number;
|
|
130
|
+
totalKvBytes: number;
|
|
131
|
+
residentBytes: number;
|
|
132
|
+
spillBytes: number;
|
|
133
|
+
worstCaseRestoreMs: number;
|
|
134
|
+
latencyBudgetMs: number;
|
|
135
|
+
restoreClass: KvRestoreClass;
|
|
136
|
+
voiceEnabled: boolean;
|
|
137
|
+
};
|
|
138
|
+
constructor(details: KvSpillUnsupportedError["details"]);
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Inputs to `planKvSpill`. `residentKvBudgetBytes` is the slice of the RAM
|
|
142
|
+
* budget the runtime is willing to hand to the *resident* KV cache after
|
|
143
|
+
* weights + activations + the TTS/ASR working sets are accounted for; callers
|
|
144
|
+
* derive it from `RamBudget` via `residentKvBudgetFromRamBudget()`.
|
|
145
|
+
*/
|
|
146
|
+
export interface KvSpillInput {
|
|
147
|
+
requestedContext: number;
|
|
148
|
+
geometry: KvGeometry;
|
|
149
|
+
residentKvBudgetBytes: number;
|
|
150
|
+
restoreClass: KvRestoreClass;
|
|
151
|
+
/**
|
|
152
|
+
* True when the host can spill to CPU RAM (host RAM available beyond the
|
|
153
|
+
* resident budget). When false the spill tier degrades to `"disk"`.
|
|
154
|
+
*/
|
|
155
|
+
cpuSpillAvailable: boolean;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Slice the resident-KV budget out of a model's `RamBudget`. The recommended
|
|
159
|
+
* budget covers weights + activations + voice working sets + KV; we reserve a
|
|
160
|
+
* fixed fraction for KV. This mirrors what `recommendation.ts` already assumes
|
|
161
|
+
* implicitly when it sizes tiers — kept as one constant so the spill policy
|
|
162
|
+
* and the recommender agree.
|
|
163
|
+
*/
|
|
164
|
+
export declare const RESIDENT_KV_BUDGET_FRACTION = 0.25;
|
|
165
|
+
export declare function residentKvBudgetFromRamBudget(budget: RamBudget): number;
|
|
166
|
+
/**
|
|
167
|
+
* Decide the KV-cache placement for a requested context.
|
|
168
|
+
*
|
|
169
|
+
* Returns `{ mode: "resident" }` when the whole compressed KV fits the
|
|
170
|
+
* resident budget; `{ mode: "spill", ... }` when it fits with paging and the
|
|
171
|
+
* cold-page restore stays inside the latency budget; throws
|
|
172
|
+
* `KvSpillUnsupportedError` when spill would miss the budget.
|
|
173
|
+
*
|
|
174
|
+
* Below `KV_SPILL_MIN_CONTEXT` this is always `{ mode: "resident" }` — there
|
|
175
|
+
* is no spill at short context, by contract.
|
|
176
|
+
*/
|
|
177
|
+
export declare function planKvSpill(input: KvSpillInput): KvSpillPlan;
|
|
178
|
+
/**
|
|
179
|
+
* Map a `HardwareProbe`-shaped descriptor to the KV restore bandwidth class.
|
|
180
|
+
* Apple Silicon → unified-memory class; discrete-GPU x86 → PCIe class;
|
|
181
|
+
* CPU-only → NVMe class (no GPU to page back to, so "restore" is a host-RAM
|
|
182
|
+
* memcpy bounded by the same order as a fast SSD on the conservative side).
|
|
183
|
+
*/
|
|
184
|
+
export declare function restoreClassForHardware(input: {
|
|
185
|
+
appleSilicon: boolean;
|
|
186
|
+
hasDiscreteGpu: boolean;
|
|
187
|
+
}): KvRestoreClass;
|
|
188
|
+
export {};
|
|
189
|
+
//# sourceMappingURL=kv-spill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kv-spill.d.ts","sourceRoot":"","sources":["kv-spill.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEzC,4EAA4E;AAC5E,eAAO,MAAM,oBAAoB,QAAQ,CAAC;AAE1C;;;;;GAKG;AACH,eAAO,MAAM,cAAc,MAAM,CAAC;AAElC;;;;;;;;GAQG;AACH,eAAO,MAAM,gCAAgC,MAAM,CAAC;AACpD,eAAO,MAAM,+BAA+B,OAAO,CAAC;AAEpD;;;;;;;;;;;;;;;;;GAiBG;AACH,QAAA,MAAM,iCAAiC;;;;;CAK7B,CAAC;AAEX,MAAM,MAAM,cAAc,GAAG,MAAM,OAAO,iCAAiC,CAAC;AAE5E;;;;;;;GAOG;AACH,MAAM,WAAW,UAAU;IAC1B,sEAAsE;IACtE,aAAa,EAAE,MAAM,CAAC;IACtB,4EAA4E;IAC5E,YAAY,EAAE,OAAO,CAAC;CACtB;AAmBD,wBAAgB,gCAAgC,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAOvE;AAED;;;;GAIG;AACH,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,MAAM,CAAC;AAEzC,MAAM,WAAW,mBAAmB;IACnC,IAAI,EAAE,UAAU,CAAC;IACjB,sEAAsE;IACtE,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAChC,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,WAAW,CAAC;IAClB,yDAAyD;IACzD,aAAa,EAAE,MAAM,CAAC;IACtB,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,iCAAiC;IACjC,aAAa,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,YAAY,EAAE,MAAM,CAAC;IACrB,0DAA0D;IAC1D,kBAAkB,EAAE,MAAM,CAAC;IAC3B,+DAA+D;IAC/D,eAAe,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,WAAW,GAAG,mBAAmB,GAAG,gBAAgB,CAAC;AAEjE;;;;GAIG;AACH,qBAAa,uBAAwB,SAAQ,KAAK;IACjD,QAAQ,CAAC,IAAI,0BAA0B;IACvC,QAAQ,CAAC,OAAO,EAAE;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,EAAE,MAAM,CAAC;QACrB,aAAa,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,kBAAkB,EAAE,MAAM,CAAC;QAC3B,eAAe,EAAE,MAAM,CAAC;QACxB,YAAY,EAAE,cAAc,CAAC;QAC7B,YAAY,EAAE,OAAO,CAAC;KACtB,CAAC;gBAEU,OAAO,EAAE,uBAAuB,CAAC,SAAS,CAAC;CAgBvD;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,UAAU,CAAC;IACrB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,YAAY,EAAE,cAAc,CAAC;IAC7B;;;OAGG;IACH,iBAAiB,EAAE,OAAO,CAAC;CAC3B;AAED;;;;;;GAMG;AACH,eAAO,MAAM,2BAA2B,OAAO,CAAC;AAEhD,wBAAgB,6BAA6B,CAAC,MAAM,EAAE,SAAS,GAAG,MAAM,CAIvE;AAMD;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,YAAY,GAAG,WAAW,CAmG5D;AAED;;;;;GAKG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE;IAC9C,YAAY,EAAE,OAAO,CAAC;IACtB,cAAc,EAAE,OAAO,CAAC;CACxB,GAAG,cAAc,CAIjB"}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
estimateQuantizedKvBytesPerToken,
|
|
4
|
+
KV_PAGE_TOKENS,
|
|
5
|
+
KV_SPILL_MIN_CONTEXT,
|
|
6
|
+
KV_SPILL_VOICE_LATENCY_BUDGET_MS,
|
|
7
|
+
type KvGeometry,
|
|
8
|
+
KvSpillUnsupportedError,
|
|
9
|
+
planKvSpill,
|
|
10
|
+
residentKvBudgetFromRamBudget,
|
|
11
|
+
restoreClassForHardware,
|
|
12
|
+
} from "./kv-spill";
|
|
13
|
+
import type { RamBudget } from "./ram-budget";
|
|
14
|
+
|
|
15
|
+
const MIB = 1024 * 1024;
|
|
16
|
+
|
|
17
|
+
function geometry(overrides: Partial<KvGeometry> = {}): KvGeometry {
|
|
18
|
+
return { bytesPerToken: 2_000, voiceEnabled: false, ...overrides };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
describe("planKvSpill", () => {
|
|
22
|
+
it("returns resident when the whole compressed KV fits the budget", () => {
|
|
23
|
+
// 32k tokens * 2000 B/token = 64 MiB; budget 128 MiB.
|
|
24
|
+
const plan = planKvSpill({
|
|
25
|
+
requestedContext: 32_768,
|
|
26
|
+
geometry: geometry(),
|
|
27
|
+
residentKvBudgetBytes: 128 * MIB,
|
|
28
|
+
restoreClass: "cpu-pcie",
|
|
29
|
+
cpuSpillAvailable: true,
|
|
30
|
+
});
|
|
31
|
+
expect(plan.mode).toBe("resident");
|
|
32
|
+
if (plan.mode !== "resident") throw new Error("unreachable");
|
|
33
|
+
expect(plan.residentBytes).toBe(plan.totalKvBytes);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("spills the cold pages on a long context that overruns the resident budget", () => {
|
|
37
|
+
// 256k tokens * 2000 B/token = 512 MiB; budget 64 MiB → most pages spill.
|
|
38
|
+
const plan = planKvSpill({
|
|
39
|
+
requestedContext: 262_144,
|
|
40
|
+
geometry: geometry(),
|
|
41
|
+
residentKvBudgetBytes: 64 * MIB,
|
|
42
|
+
restoreClass: "cpu-pcie",
|
|
43
|
+
cpuSpillAvailable: true,
|
|
44
|
+
});
|
|
45
|
+
expect(plan.mode).toBe("spill");
|
|
46
|
+
if (plan.mode !== "spill") throw new Error("unreachable");
|
|
47
|
+
expect(plan.tier).toBe("cpu");
|
|
48
|
+
expect(plan.spillPages).toBeGreaterThan(0);
|
|
49
|
+
expect(plan.residentPages).toBeGreaterThanOrEqual(1);
|
|
50
|
+
expect(plan.residentBytes + plan.spillBytes).toBe(plan.totalKvBytes);
|
|
51
|
+
// 256-token page * 2000 B = 512_000 B; PCIe budget 12_000_000 B/ms →
|
|
52
|
+
// ~0.043 ms per page restore, comfortably under any budget.
|
|
53
|
+
expect(plan.worstCaseRestoreMs).toBeLessThan(plan.latencyBudgetMs);
|
|
54
|
+
expect(plan.latencyBudgetMs).toBe(1_500);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("uses the tighter voice budget when the bundle has voice enabled", () => {
|
|
58
|
+
const plan = planKvSpill({
|
|
59
|
+
requestedContext: 262_144,
|
|
60
|
+
geometry: geometry({ voiceEnabled: true }),
|
|
61
|
+
residentKvBudgetBytes: 64 * MIB,
|
|
62
|
+
restoreClass: "cpu-pcie",
|
|
63
|
+
cpuSpillAvailable: true,
|
|
64
|
+
});
|
|
65
|
+
expect(plan.mode).toBe("spill");
|
|
66
|
+
if (plan.mode !== "spill") throw new Error("unreachable");
|
|
67
|
+
expect(plan.latencyBudgetMs).toBe(KV_SPILL_VOICE_LATENCY_BUDGET_MS);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("hard-fails with a structured error when cold-page restore misses the voice budget", () => {
|
|
71
|
+
// Big per-token KV + slow SATA disk → a single page restore blows the
|
|
72
|
+
// 200ms voice budget. Page = 256 * 200_000 B = 51.2 MB; SATA budget
|
|
73
|
+
// 250_000 B/ms → ~205 ms > 200 ms.
|
|
74
|
+
let thrown: unknown;
|
|
75
|
+
try {
|
|
76
|
+
planKvSpill({
|
|
77
|
+
requestedContext: 262_144,
|
|
78
|
+
geometry: geometry({ bytesPerToken: 200_000, voiceEnabled: true }),
|
|
79
|
+
residentKvBudgetBytes: 64 * MIB,
|
|
80
|
+
restoreClass: "disk-sata",
|
|
81
|
+
cpuSpillAvailable: false,
|
|
82
|
+
});
|
|
83
|
+
} catch (err) {
|
|
84
|
+
thrown = err;
|
|
85
|
+
}
|
|
86
|
+
expect(thrown).toBeInstanceOf(KvSpillUnsupportedError);
|
|
87
|
+
const e = thrown as KvSpillUnsupportedError;
|
|
88
|
+
expect(e.code).toBe("kv-spill-unsupported");
|
|
89
|
+
expect(e.details.voiceEnabled).toBe(true);
|
|
90
|
+
expect(e.details.restoreClass).toBe("disk-sata");
|
|
91
|
+
expect(e.details.worstCaseRestoreMs).toBeGreaterThan(
|
|
92
|
+
e.details.latencyBudgetMs,
|
|
93
|
+
);
|
|
94
|
+
expect(e.message).toContain("voice latency budget");
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it("degrades the spill tier to disk and downgrades a cpu restore class when CPU spill is unavailable", () => {
|
|
98
|
+
const plan = planKvSpill({
|
|
99
|
+
requestedContext: 262_144,
|
|
100
|
+
geometry: geometry(),
|
|
101
|
+
residentKvBudgetBytes: 64 * MIB,
|
|
102
|
+
restoreClass: "cpu-pcie",
|
|
103
|
+
cpuSpillAvailable: false,
|
|
104
|
+
});
|
|
105
|
+
expect(plan.mode).toBe("spill");
|
|
106
|
+
if (plan.mode !== "spill") throw new Error("unreachable");
|
|
107
|
+
expect(plan.tier).toBe("disk");
|
|
108
|
+
// worstCaseRestoreMs reflects the NVMe class, not the cpu-pcie one.
|
|
109
|
+
expect(plan.worstCaseRestoreMs).toBeGreaterThan(
|
|
110
|
+
// page (512_000 B) / cpu-pcie (12e6) ≈ 0.043 ms
|
|
111
|
+
512_000 / 12_000_000,
|
|
112
|
+
);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it("page-aligns the resident/spill split", () => {
|
|
116
|
+
const plan = planKvSpill({
|
|
117
|
+
requestedContext: 200_000,
|
|
118
|
+
geometry: geometry(),
|
|
119
|
+
residentKvBudgetBytes: 30 * MIB,
|
|
120
|
+
restoreClass: "cpu-apple",
|
|
121
|
+
cpuSpillAvailable: true,
|
|
122
|
+
});
|
|
123
|
+
expect(plan.mode).toBe("spill");
|
|
124
|
+
if (plan.mode !== "spill") throw new Error("unreachable");
|
|
125
|
+
const pageBytes = 2_000 * KV_PAGE_TOKENS;
|
|
126
|
+
expect(plan.residentBytes % pageBytes).toBe(0);
|
|
127
|
+
expect(plan.spillBytes % pageBytes).toBe(0);
|
|
128
|
+
const totalPages = Math.ceil(200_000 / KV_PAGE_TOKENS);
|
|
129
|
+
expect(plan.residentPages + plan.spillPages).toBe(totalPages);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("treats a short context that overruns the resident budget as unsupported (wrong tier for device)", () => {
|
|
133
|
+
// 64k - 1 tokens, but a per-token KV so large the cache won't fit and
|
|
134
|
+
// we're below KV_SPILL_MIN_CONTEXT → structured error, no half-load.
|
|
135
|
+
let thrown: unknown;
|
|
136
|
+
try {
|
|
137
|
+
planKvSpill({
|
|
138
|
+
requestedContext: KV_SPILL_MIN_CONTEXT - 1,
|
|
139
|
+
geometry: geometry({ bytesPerToken: 50_000 }),
|
|
140
|
+
residentKvBudgetBytes: 16 * MIB,
|
|
141
|
+
restoreClass: "cpu-apple",
|
|
142
|
+
cpuSpillAvailable: true,
|
|
143
|
+
});
|
|
144
|
+
} catch (err) {
|
|
145
|
+
thrown = err;
|
|
146
|
+
}
|
|
147
|
+
expect(thrown).toBeInstanceOf(KvSpillUnsupportedError);
|
|
148
|
+
expect((thrown as KvSpillUnsupportedError).details.requestedContext).toBe(
|
|
149
|
+
KV_SPILL_MIN_CONTEXT - 1,
|
|
150
|
+
);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it("rejects degenerate inputs loudly", () => {
|
|
154
|
+
expect(() =>
|
|
155
|
+
planKvSpill({
|
|
156
|
+
requestedContext: 0,
|
|
157
|
+
geometry: geometry(),
|
|
158
|
+
residentKvBudgetBytes: MIB,
|
|
159
|
+
restoreClass: "cpu-apple",
|
|
160
|
+
cpuSpillAvailable: true,
|
|
161
|
+
}),
|
|
162
|
+
).toThrow(/positive context/);
|
|
163
|
+
expect(() =>
|
|
164
|
+
planKvSpill({
|
|
165
|
+
requestedContext: 100_000,
|
|
166
|
+
geometry: geometry(),
|
|
167
|
+
residentKvBudgetBytes: 0,
|
|
168
|
+
restoreClass: "cpu-apple",
|
|
169
|
+
cpuSpillAvailable: true,
|
|
170
|
+
}),
|
|
171
|
+
).toThrow(/residentKvBudgetBytes must be positive/);
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
describe("residentKvBudgetFromRamBudget", () => {
|
|
176
|
+
it("reserves the documented KV fraction of the recommended budget", () => {
|
|
177
|
+
const budget: RamBudget = {
|
|
178
|
+
minMb: 7000,
|
|
179
|
+
recommendedMb: 9600,
|
|
180
|
+
source: "manifest",
|
|
181
|
+
};
|
|
182
|
+
// 9600 MiB * 0.25 = 2400 MiB
|
|
183
|
+
expect(residentKvBudgetFromRamBudget(budget)).toBe(2400 * MIB);
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
describe("estimateQuantizedKvBytesPerToken", () => {
|
|
188
|
+
it("returns the per-tier figure for known param strings", () => {
|
|
189
|
+
expect(estimateQuantizedKvBytesPerToken("0.8B")).toBeLessThan(
|
|
190
|
+
estimateQuantizedKvBytesPerToken("9B"),
|
|
191
|
+
);
|
|
192
|
+
expect(estimateQuantizedKvBytesPerToken("2B")).toBeGreaterThan(
|
|
193
|
+
estimateQuantizedKvBytesPerToken("0.8B"),
|
|
194
|
+
);
|
|
195
|
+
expect(estimateQuantizedKvBytesPerToken("4B")).toBeGreaterThan(
|
|
196
|
+
estimateQuantizedKvBytesPerToken("2B"),
|
|
197
|
+
);
|
|
198
|
+
expect(estimateQuantizedKvBytesPerToken("27B")).toBeGreaterThan(
|
|
199
|
+
estimateQuantizedKvBytesPerToken("9B"),
|
|
200
|
+
);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
it("fails closed (largest tier) for unknown param strings", () => {
|
|
204
|
+
expect(estimateQuantizedKvBytesPerToken("999B")).toBe(
|
|
205
|
+
estimateQuantizedKvBytesPerToken("27B"),
|
|
206
|
+
);
|
|
207
|
+
});
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
describe("restoreClassForHardware", () => {
|
|
211
|
+
it("maps Apple Silicon, discrete GPU, and CPU-only to the right classes", () => {
|
|
212
|
+
expect(
|
|
213
|
+
restoreClassForHardware({ appleSilicon: true, hasDiscreteGpu: false }),
|
|
214
|
+
).toBe("cpu-apple");
|
|
215
|
+
expect(
|
|
216
|
+
restoreClassForHardware({ appleSilicon: false, hasDiscreteGpu: true }),
|
|
217
|
+
).toBe("cpu-pcie");
|
|
218
|
+
expect(
|
|
219
|
+
restoreClassForHardware({ appleSilicon: false, hasDiscreteGpu: false }),
|
|
220
|
+
).toBe("disk-nvme");
|
|
221
|
+
});
|
|
222
|
+
});
|