@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cache bridge for the local-inference path.
|
|
3
|
+
*
|
|
4
|
+
* Translates the runtime's `ProviderCachePlan` (a provider-neutral cache
|
|
5
|
+
* plan emitted by `@elizaos/core`'s `buildProviderCachePlan`) into
|
|
6
|
+
* concrete behaviour for the two local backends:
|
|
7
|
+
*
|
|
8
|
+
* 1. Out-of-process llama-server (MTP / buun-llama-cpp): stable
|
|
9
|
+
* slot-id derivation + on-disk slot KV save/restore directory layout
|
|
10
|
+
* + TTL-based eviction by mtime.
|
|
11
|
+
* 2. In-process node-llama-cpp: a session pool (see
|
|
12
|
+
* `session-pool.ts`) keyed by `promptCacheKey`.
|
|
13
|
+
*
|
|
14
|
+
* This module is pure logic — no llama-server process management, no
|
|
15
|
+
* node-llama-cpp imports. All filesystem state is rooted under
|
|
16
|
+
* `local-inference/llama-cache/` so cleanup is easy and explicit.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { createHash } from "node:crypto";
|
|
20
|
+
import fs from "node:fs/promises";
|
|
21
|
+
import path from "node:path";
|
|
22
|
+
import { localInferenceRoot } from "./paths";
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* TTLs for cached prefix data, mirroring the cloud-side semantics:
|
|
26
|
+
* - `short`: roughly the "default" Anthropic ephemeral cache window.
|
|
27
|
+
* - `long`: roughly the "1h" Anthropic ephemeral cache window.
|
|
28
|
+
* - `extended`: the OpenAI 24h prompt-cache retention window.
|
|
29
|
+
*
|
|
30
|
+
* Values are in milliseconds. Eviction uses file mtime, not access time,
|
|
31
|
+
* so a slot that is read repeatedly without being rewritten still ages
|
|
32
|
+
* out — which matches how llama-server writes the slot file each turn.
|
|
33
|
+
*/
|
|
34
|
+
export interface CacheTtls {
|
|
35
|
+
short: number;
|
|
36
|
+
long: number;
|
|
37
|
+
extended?: number;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export const DEFAULT_CACHE_TTLS: CacheTtls = {
|
|
41
|
+
short: 5 * 60 * 1000,
|
|
42
|
+
long: 60 * 60 * 1000,
|
|
43
|
+
extended: 24 * 60 * 60 * 1000,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Root directory for all local llama-cache state. Anything inside is
|
|
48
|
+
* Eliza-owned and safe to delete to reset the cache.
|
|
49
|
+
*/
|
|
50
|
+
export function llamaCacheRoot(): string {
|
|
51
|
+
return path.join(localInferenceRoot(), "llama-cache");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Per-model-hash cache directory. Slot save files for one model never
|
|
56
|
+
* collide with another model's; switching active model does not need to
|
|
57
|
+
* wipe the cache.
|
|
58
|
+
*/
|
|
59
|
+
export function cacheRoot(modelHash: string): string {
|
|
60
|
+
if (!modelHash) {
|
|
61
|
+
throw new Error("[cache-bridge] cacheRoot requires a non-empty modelHash");
|
|
62
|
+
}
|
|
63
|
+
return path.join(llamaCacheRoot(), modelHash);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* llama-server `--slot-save-path` argument: the directory llama-server
|
|
68
|
+
* writes per-slot KV state into when a request includes
|
|
69
|
+
* `cache_prompt: true`. One directory per model hash.
|
|
70
|
+
*/
|
|
71
|
+
export function slotSavePath(modelHash: string): string {
|
|
72
|
+
return cacheRoot(modelHash);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Stable model-fingerprint hash. Combines the absolute paths of the
|
|
77
|
+
* target / drafter GGUFs and the cache-type knobs so two distinct
|
|
78
|
+
* configurations don't share a slot directory.
|
|
79
|
+
*/
|
|
80
|
+
export function buildModelHash(input: {
|
|
81
|
+
targetModelPath: string;
|
|
82
|
+
drafterModelPath?: string | null;
|
|
83
|
+
cacheTypeK?: string | null;
|
|
84
|
+
cacheTypeV?: string | null;
|
|
85
|
+
/** Optional extra discriminator (context size, parallel, etc.). */
|
|
86
|
+
extra?: string | null;
|
|
87
|
+
}): string {
|
|
88
|
+
const hash = createHash("sha256");
|
|
89
|
+
hash.update(input.targetModelPath);
|
|
90
|
+
hash.update("");
|
|
91
|
+
hash.update(input.drafterModelPath ?? "");
|
|
92
|
+
hash.update("");
|
|
93
|
+
hash.update(input.cacheTypeK ?? "");
|
|
94
|
+
hash.update("");
|
|
95
|
+
hash.update(input.cacheTypeV ?? "");
|
|
96
|
+
hash.update("");
|
|
97
|
+
hash.update(input.extra ?? "");
|
|
98
|
+
return hash.digest("hex").slice(0, 16);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Map a `promptCacheKey` to a llama-server slot id in [0, parallel).
|
|
103
|
+
*
|
|
104
|
+
* llama-server's `--parallel N` flag pre-allocates N decoding slots and
|
|
105
|
+
* accepts a `slot_id` integer in `[0, N-1]` on each request. By hashing
|
|
106
|
+
* the cache key into that range we get:
|
|
107
|
+
*
|
|
108
|
+
* - The same prefix hash always lands on the same slot, so the in-RAM
|
|
109
|
+
* KV cache from the previous turn is reused.
|
|
110
|
+
* - Different prefix hashes spread across slots and don't fight for
|
|
111
|
+
* the same KV memory.
|
|
112
|
+
*
|
|
113
|
+
* Pass `parallel <= 0` to disable slot pinning (returns -1, the
|
|
114
|
+
* llama-server "any free slot" sentinel).
|
|
115
|
+
*/
|
|
116
|
+
export function deriveSlotId(promptCacheKey: string, parallel: number): number {
|
|
117
|
+
if (!Number.isFinite(parallel) || parallel <= 0) return -1;
|
|
118
|
+
if (!promptCacheKey) return -1;
|
|
119
|
+
const integerParallel = Math.max(1, Math.floor(parallel));
|
|
120
|
+
if (integerParallel === 1) return 0;
|
|
121
|
+
const digest = createHash("sha256").update(promptCacheKey).digest();
|
|
122
|
+
// Read first 4 bytes as an unsigned big-endian int. Plenty of entropy
|
|
123
|
+
// for parallel ≤ 64.
|
|
124
|
+
const value = digest.readUInt32BE(0);
|
|
125
|
+
return value % integerParallel;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Convert the runtime-side `CacheTTL` enum + OpenAI extended retention
|
|
130
|
+
* hint into a concrete TTL in milliseconds. This is what the eviction
|
|
131
|
+
* sweep uses when deciding whether a slot file is still live.
|
|
132
|
+
*/
|
|
133
|
+
export function ttlMsForKey(
|
|
134
|
+
ttl: "short" | "long" | "extended" | undefined,
|
|
135
|
+
ttls: CacheTtls = DEFAULT_CACHE_TTLS,
|
|
136
|
+
): number {
|
|
137
|
+
if (ttl === "long") return ttls.long;
|
|
138
|
+
if (ttl === "extended") return ttls.extended ?? ttls.long;
|
|
139
|
+
return ttls.short;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/** TTL classes that can be encoded into a slot `.bin` filename. */
|
|
143
|
+
export type SlotCacheTtlClass = "short" | "long" | "extended";
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Build the basename for a persisted slot/conversation `.bin` file with
|
|
147
|
+
* its TTL class encoded as a middle component: `<base>.<ttl>.bin`. The
|
|
148
|
+
* eviction sweep reads that component back via `parseSlotCacheTtlClass`
|
|
149
|
+
* so a slot persisted with the long retention window isn't deleted on
|
|
150
|
+
* the short horizon (and vice versa). Pass `"long"` for cross-restart
|
|
151
|
+
* conversation KV — that matches the prior global (long-only) behaviour
|
|
152
|
+
* for those files.
|
|
153
|
+
*/
|
|
154
|
+
export function slotCacheFileName(
|
|
155
|
+
base: string,
|
|
156
|
+
ttl: SlotCacheTtlClass,
|
|
157
|
+
): string {
|
|
158
|
+
return `${base}.${ttl}.bin`;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Parse the TTL class encoded into a slot `.bin` filename by
|
|
163
|
+
* `slotCacheFileName`. Returns `undefined` for legacy / hand-written
|
|
164
|
+
* filenames without an encoded class — those keep the `long` horizon
|
|
165
|
+
* (the prior global behaviour for persisted slot files).
|
|
166
|
+
*/
|
|
167
|
+
export function parseSlotCacheTtlClass(
|
|
168
|
+
fileName: string,
|
|
169
|
+
): SlotCacheTtlClass | undefined {
|
|
170
|
+
// `<base>.<ttl>.bin` — the penultimate dot-component is the class.
|
|
171
|
+
const withoutBin = fileName.endsWith(".bin")
|
|
172
|
+
? fileName.slice(0, -".bin".length)
|
|
173
|
+
: fileName;
|
|
174
|
+
const lastDot = withoutBin.lastIndexOf(".");
|
|
175
|
+
if (lastDot < 0) return undefined;
|
|
176
|
+
const candidate = withoutBin.slice(lastDot + 1);
|
|
177
|
+
if (
|
|
178
|
+
candidate === "short" ||
|
|
179
|
+
candidate === "long" ||
|
|
180
|
+
candidate === "extended"
|
|
181
|
+
) {
|
|
182
|
+
return candidate;
|
|
183
|
+
}
|
|
184
|
+
return undefined;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Sweep the slot-save directory and delete files older than their
|
|
189
|
+
* per-file TTL horizon. The TTL class is read from the filename
|
|
190
|
+
* (`<base>.<ttl>.bin` — see `slotCacheFileName`); files without an
|
|
191
|
+
* encoded class use the `long` horizon (the prior global behaviour).
|
|
192
|
+
* Mtime is the watermark; llama-server rewrites the slot file on every
|
|
193
|
+
* save, so a slot that's actively used keeps a fresh mtime.
|
|
194
|
+
*
|
|
195
|
+
* Returns the number of files deleted. Missing directories are not
|
|
196
|
+
* errors — eviction on a clean install just no-ops.
|
|
197
|
+
*/
|
|
198
|
+
export async function evictExpired(
|
|
199
|
+
rootDir: string,
|
|
200
|
+
ttls: CacheTtls = DEFAULT_CACHE_TTLS,
|
|
201
|
+
now: number = Date.now(),
|
|
202
|
+
): Promise<number> {
|
|
203
|
+
let entries: string[];
|
|
204
|
+
try {
|
|
205
|
+
entries = await fs.readdir(rootDir);
|
|
206
|
+
} catch (err) {
|
|
207
|
+
if ((err as NodeJS.ErrnoException).code === "ENOENT") return 0;
|
|
208
|
+
throw err;
|
|
209
|
+
}
|
|
210
|
+
let deleted = 0;
|
|
211
|
+
for (const entry of entries) {
|
|
212
|
+
const full = path.join(rootDir, entry);
|
|
213
|
+
let stat: Awaited<ReturnType<typeof fs.stat>>;
|
|
214
|
+
try {
|
|
215
|
+
stat = await fs.stat(full);
|
|
216
|
+
} catch {
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
if (!stat.isFile()) continue;
|
|
220
|
+
const ttlClass = parseSlotCacheTtlClass(entry) ?? "long";
|
|
221
|
+
const horizon = ttlMsForKey(ttlClass, ttls);
|
|
222
|
+
if (now - stat.mtimeMs > horizon) {
|
|
223
|
+
try {
|
|
224
|
+
await fs.unlink(full);
|
|
225
|
+
deleted += 1;
|
|
226
|
+
} catch {
|
|
227
|
+
// Best-effort cleanup; another process may already have removed it.
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return deleted;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
export interface CacheStatsEntry {
|
|
235
|
+
file: string;
|
|
236
|
+
sizeBytes: number;
|
|
237
|
+
mtimeMs: number;
|
|
238
|
+
ageMs: number;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Snapshot of the on-disk slot-save directory. Used by the public
|
|
243
|
+
* `getLocalCacheStats()` debugging endpoint.
|
|
244
|
+
*/
|
|
245
|
+
export async function readCacheStats(
|
|
246
|
+
rootDir: string,
|
|
247
|
+
now: number = Date.now(),
|
|
248
|
+
): Promise<CacheStatsEntry[]> {
|
|
249
|
+
let entries: string[];
|
|
250
|
+
try {
|
|
251
|
+
entries = await fs.readdir(rootDir);
|
|
252
|
+
} catch (err) {
|
|
253
|
+
if ((err as NodeJS.ErrnoException).code === "ENOENT") return [];
|
|
254
|
+
throw err;
|
|
255
|
+
}
|
|
256
|
+
const out: CacheStatsEntry[] = [];
|
|
257
|
+
for (const entry of entries) {
|
|
258
|
+
const full = path.join(rootDir, entry);
|
|
259
|
+
let stat: Awaited<ReturnType<typeof fs.stat>>;
|
|
260
|
+
try {
|
|
261
|
+
stat = await fs.stat(full);
|
|
262
|
+
} catch {
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
if (!stat.isFile()) continue;
|
|
266
|
+
out.push({
|
|
267
|
+
file: entry,
|
|
268
|
+
sizeBytes: stat.size,
|
|
269
|
+
mtimeMs: stat.mtimeMs,
|
|
270
|
+
ageMs: Math.max(0, now - stat.mtimeMs),
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
out.sort((left, right) => left.file.localeCompare(right.file));
|
|
274
|
+
return out;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Resolve `promptCacheKey` from a `providerOptions` payload as emitted
|
|
279
|
+
* by `buildProviderCachePlan`. The runtime stuffs it under
|
|
280
|
+
* `providerOptions.eliza.promptCacheKey`. Returns `null` when the key is
|
|
281
|
+
* missing or not a non-empty string — callers fall back to the default
|
|
282
|
+
* "_default" session in that case.
|
|
283
|
+
*/
|
|
284
|
+
export function extractPromptCacheKey(providerOptions: unknown): string | null {
|
|
285
|
+
if (!providerOptions || typeof providerOptions !== "object") return null;
|
|
286
|
+
const eliza = (providerOptions as Record<string, unknown>).eliza;
|
|
287
|
+
if (!eliza || typeof eliza !== "object") return null;
|
|
288
|
+
const raw = (eliza as Record<string, unknown>).promptCacheKey;
|
|
289
|
+
if (typeof raw !== "string" || raw.length === 0) return null;
|
|
290
|
+
return raw;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Resolve `prefixHash` from `providerOptions.eliza.prefixHash`. Mirrors
|
|
295
|
+
* `extractPromptCacheKey` — returns null when missing or not a non-empty
|
|
296
|
+
* string. The prefix hash covers ONLY the stable prompt prefix (system
|
|
297
|
+
* prompt + tool definitions + large constant context), so a runtime
|
|
298
|
+
* timestamp in the unstable tail does not invalidate it.
|
|
299
|
+
*
|
|
300
|
+
* Local backends prefer this over `promptCacheKey` when available because
|
|
301
|
+
* it gives the strongest "same prefix → same slot" guarantee: two
|
|
302
|
+
* conversations with byte-identical stable prefixes will land on the same
|
|
303
|
+
* slot regardless of how their tail content differs.
|
|
304
|
+
*/
|
|
305
|
+
export function extractPrefixHash(providerOptions: unknown): string | null {
|
|
306
|
+
if (!providerOptions || typeof providerOptions !== "object") return null;
|
|
307
|
+
const eliza = (providerOptions as Record<string, unknown>).eliza;
|
|
308
|
+
if (!eliza || typeof eliza !== "object") return null;
|
|
309
|
+
const raw = (eliza as Record<string, unknown>).prefixHash;
|
|
310
|
+
if (typeof raw !== "string" || raw.length === 0) return null;
|
|
311
|
+
return raw;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Stable annotation describing a single segment of the prompt as it was
|
|
316
|
+
* emitted by the runtime planner. The cache-bridge consumes this to
|
|
317
|
+
* compute a stable-prefix-only hash for slot pinning, without having to
|
|
318
|
+
* look at the (timestamp-laden) tail.
|
|
319
|
+
*
|
|
320
|
+
* Mirrors `PromptSegment` in @elizaos/core/src/types/model.ts but is kept
|
|
321
|
+
* standalone so the cache-bridge can be imported by the local-inference
|
|
322
|
+
* backends without a hard dep on `@elizaos/core`.
|
|
323
|
+
*/
|
|
324
|
+
export interface AnnotatedPromptSegment {
|
|
325
|
+
content: string;
|
|
326
|
+
stable: boolean;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Hash the longest stable prefix of `segments`. Stops at the first
|
|
331
|
+
* unstable segment, so a runtime timestamp in the unstable tail never
|
|
332
|
+
* shifts the hash. Returns `null` when no stable segment exists, signaling
|
|
333
|
+
* to the caller that prefix-cache reuse cannot be derived purely from the
|
|
334
|
+
* prompt structure (fall back to the prompt-cache-key path instead).
|
|
335
|
+
*
|
|
336
|
+
* The hash is sha256-truncated to 16 hex chars, matching `buildModelHash`
|
|
337
|
+
* — short enough for log lines, wide enough that collision is not a
|
|
338
|
+
* realistic concern for any plausible number of concurrent prefixes.
|
|
339
|
+
*/
|
|
340
|
+
export function hashStablePrefix(
|
|
341
|
+
segments: readonly AnnotatedPromptSegment[],
|
|
342
|
+
): string | null {
|
|
343
|
+
if (segments.length === 0) return null;
|
|
344
|
+
const hash = createHash("sha256");
|
|
345
|
+
let consumed = 0;
|
|
346
|
+
for (const segment of segments) {
|
|
347
|
+
if (!segment.stable) break;
|
|
348
|
+
hash.update(segment.content);
|
|
349
|
+
hash.update("");
|
|
350
|
+
consumed += 1;
|
|
351
|
+
}
|
|
352
|
+
if (consumed === 0) return null;
|
|
353
|
+
return hash.digest("hex").slice(0, 16);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Extract the per-segment stable annotations from a `providerOptions`
|
|
358
|
+
* payload. The runtime emits these as `providerOptions.eliza.promptSegments`
|
|
359
|
+
* when a structured prompt is available — local backends use it to compute
|
|
360
|
+
* `hashStablePrefix` directly, without having to re-parse the prompt text.
|
|
361
|
+
*
|
|
362
|
+
* Returns `null` when the field is absent or malformed; callers fall back
|
|
363
|
+
* to `extractPromptCacheKey` / `extractPrefixHash`.
|
|
364
|
+
*/
|
|
365
|
+
export function extractAnnotatedSegments(
|
|
366
|
+
providerOptions: unknown,
|
|
367
|
+
): AnnotatedPromptSegment[] | null {
|
|
368
|
+
if (!providerOptions || typeof providerOptions !== "object") return null;
|
|
369
|
+
const eliza = (providerOptions as Record<string, unknown>).eliza;
|
|
370
|
+
if (!eliza || typeof eliza !== "object") return null;
|
|
371
|
+
const raw = (eliza as Record<string, unknown>).promptSegments;
|
|
372
|
+
if (!Array.isArray(raw)) return null;
|
|
373
|
+
const out: AnnotatedPromptSegment[] = [];
|
|
374
|
+
for (const entry of raw) {
|
|
375
|
+
if (!entry || typeof entry !== "object") return null;
|
|
376
|
+
const content = (entry as { content?: unknown }).content;
|
|
377
|
+
const stable = (entry as { stable?: unknown }).stable;
|
|
378
|
+
if (typeof content !== "string" || typeof stable !== "boolean") return null;
|
|
379
|
+
out.push({ content, stable });
|
|
380
|
+
}
|
|
381
|
+
return out;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Resolve the conversation handle id from a `providerOptions` payload.
|
|
386
|
+
* The runtime stuffs it under `providerOptions.eliza.conversationId` when
|
|
387
|
+
* the calling context represents a long-lived conversation (chat handler,
|
|
388
|
+
* planner loop). When present, local backends should use it as the
|
|
389
|
+
* primary slot key — it's stable across turns regardless of prompt
|
|
390
|
+
* content drift, which gives the strongest possible cache reuse for
|
|
391
|
+
* agentic loops.
|
|
392
|
+
*/
|
|
393
|
+
export function extractConversationId(providerOptions: unknown): string | null {
|
|
394
|
+
if (!providerOptions || typeof providerOptions !== "object") return null;
|
|
395
|
+
const eliza = (providerOptions as Record<string, unknown>).eliza;
|
|
396
|
+
if (!eliza || typeof eliza !== "object") return null;
|
|
397
|
+
const raw = (eliza as Record<string, unknown>).conversationId;
|
|
398
|
+
if (typeof raw !== "string" || raw.length === 0) return null;
|
|
399
|
+
return raw;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Resolve the stable per-call cache key for the local backends. Order of
|
|
404
|
+
* precedence:
|
|
405
|
+
* 1. Conversation id — strongest signal, identical across turns.
|
|
406
|
+
* 2. Annotated stable-prefix hash — survives unstable-tail drift.
|
|
407
|
+
* 3. `prefixHash` from the runtime cache plan — already stable-only via
|
|
408
|
+
* `cachePrefixSegments` upstream.
|
|
409
|
+
* 4. `promptCacheKey` (`v5:<prefixHash>`) — back-compat fallback.
|
|
410
|
+
* Returns null when none are available.
|
|
411
|
+
*/
|
|
412
|
+
export function resolveLocalCacheKey(providerOptions: unknown): string | null {
|
|
413
|
+
const conversationId = extractConversationId(providerOptions);
|
|
414
|
+
if (conversationId) return `conv:${conversationId}`;
|
|
415
|
+
const segments = extractAnnotatedSegments(providerOptions);
|
|
416
|
+
if (segments) {
|
|
417
|
+
const hashed = hashStablePrefix(segments);
|
|
418
|
+
if (hashed) return `seg:${hashed}`;
|
|
419
|
+
}
|
|
420
|
+
const prefixHash = extractPrefixHash(providerOptions);
|
|
421
|
+
if (prefixHash) return `pfx:${prefixHash}`;
|
|
422
|
+
return extractPromptCacheKey(providerOptions);
|
|
423
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local inference catalog re-exports.
|
|
3
|
+
*
|
|
4
|
+
* The canonical catalog (Eliza-1 tier ids, default-eligibility set,
|
|
5
|
+
* `MODEL_CATALOG`, HuggingFace URL builders) lives in
|
|
6
|
+
* `@elizaos/shared/local-inference`. This shim preserves the historical
|
|
7
|
+
* import path `./catalog` for server-side code.
|
|
8
|
+
*/
|
|
9
|
+
export { buildHuggingFaceResolveUrl, buildHuggingFaceResolveUrlForPath, DEFAULT_ELIGIBLE_MODEL_IDS, ELIZA_1_HF_REPO, ELIZA_1_MTP_TIER_IDS, ELIZA_1_PLACEHOLDER_IDS, ELIZA_1_RELEASE_TIER_IDS, ELIZA_1_TIER_IDS, ELIZA_1_TIER_PUBLISH_STATUS, ELIZA_1_VISION_TIER_IDS, type Eliza1TierId, eliza1TierPublishStatus, FIRST_RUN_DEFAULT_MODEL_ID, findCatalogModel, isDefaultEligibleId, MODEL_CATALOG, } from "@elizaos/shared";
|
|
10
|
+
//# sourceMappingURL=catalog.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"catalog.d.ts","sourceRoot":"","sources":["catalog.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACN,0BAA0B,EAC1B,iCAAiC,EACjC,0BAA0B,EAC1B,eAAe,EACf,oBAAoB,EACpB,uBAAuB,EACvB,wBAAwB,EACxB,gBAAgB,EAChB,2BAA2B,EAC3B,uBAAuB,EACvB,KAAK,YAAY,EACjB,uBAAuB,EACvB,0BAA0B,EAC1B,gBAAgB,EAChB,mBAAmB,EACnB,aAAa,GACb,MAAM,iBAAiB,CAAC"}
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
buildHuggingFaceResolveUrl,
|
|
4
|
+
DEFAULT_ELIGIBLE_MODEL_IDS,
|
|
5
|
+
ELIZA_1_MTP_TIER_IDS,
|
|
6
|
+
ELIZA_1_TIER_IDS,
|
|
7
|
+
FIRST_RUN_DEFAULT_MODEL_ID,
|
|
8
|
+
findCatalogModel,
|
|
9
|
+
MODEL_CATALOG,
|
|
10
|
+
} from "./catalog";
|
|
11
|
+
import { recommendForFirstRun } from "./recommendation";
|
|
12
|
+
import { localInferenceService } from "./service";
|
|
13
|
+
|
|
14
|
+
describe("local inference catalog", () => {
|
|
15
|
+
it("ships exactly the visible Eliza-1 tiers", () => {
|
|
16
|
+
const visible = MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog);
|
|
17
|
+
expect(visible.map((m) => m.id).sort()).toEqual(
|
|
18
|
+
[...ELIZA_1_TIER_IDS].sort(),
|
|
19
|
+
);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("marks ONLY the Eliza-1 tiers as default-eligible", () => {
|
|
23
|
+
expect([...DEFAULT_ELIGIBLE_MODEL_IDS].sort()).toEqual(
|
|
24
|
+
[...ELIZA_1_TIER_IDS].sort(),
|
|
25
|
+
);
|
|
26
|
+
for (const id of ELIZA_1_TIER_IDS) {
|
|
27
|
+
expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(id), `${id} not eligible`).toBe(
|
|
28
|
+
true,
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
for (const model of MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog)) {
|
|
32
|
+
expect(model.id.startsWith("eliza-1-")).toBe(true);
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("uses eliza-1 size ids as user-facing display names", () => {
|
|
37
|
+
for (const id of ELIZA_1_TIER_IDS) {
|
|
38
|
+
const model = findCatalogModel(id);
|
|
39
|
+
expect(model, `${id} missing`).toBeTruthy();
|
|
40
|
+
expect(model?.displayName).toMatch(/^(?:Eliza-1\b|eliza-1-)/);
|
|
41
|
+
expect(model?.blurb).toMatch(/^(?:Eliza-1\b|eliza-1-)/);
|
|
42
|
+
expect(`${model?.displayName} ${model?.blurb}`).not.toMatch(
|
|
43
|
+
/\b(?:Qwen|Llama)\b/i,
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("uses the single elizaOS HuggingFace repo for every visible Eliza-1 tier", () => {
|
|
49
|
+
for (const model of MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog)) {
|
|
50
|
+
const tier = model.id.slice("eliza-1-".length);
|
|
51
|
+
expect(model.hfRepo).toBe("elizaos/eliza-1");
|
|
52
|
+
expect(model.hfPathPrefix).toBe(`bundles/${tier}`);
|
|
53
|
+
expect(buildHuggingFaceResolveUrl(model)).toContain(
|
|
54
|
+
`/elizaos/eliza-1/resolve/main/bundles/${tier}/`,
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("does not expose hidden companion entries in the hub", () => {
|
|
60
|
+
const visible = localInferenceService.getCatalog();
|
|
61
|
+
const visibleIds = new Set(visible.map((model) => model.id));
|
|
62
|
+
const hiddenCompanionIds = MODEL_CATALOG.filter(
|
|
63
|
+
(model) => model.hiddenFromCatalog,
|
|
64
|
+
).map((model) => model.id);
|
|
65
|
+
expect(hiddenCompanionIds.filter((id) => visibleIds.has(id))).toEqual([]);
|
|
66
|
+
expect(visible.flatMap((model) => model.companionModelIds ?? [])).toEqual(
|
|
67
|
+
[],
|
|
68
|
+
);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it("keeps the visible model hub focused on Eliza-1 only", () => {
|
|
72
|
+
const visible = localInferenceService.getCatalog();
|
|
73
|
+
expect(visible.map((model) => model.id).sort()).toEqual(
|
|
74
|
+
[...ELIZA_1_TIER_IDS].sort(),
|
|
75
|
+
);
|
|
76
|
+
expect(
|
|
77
|
+
visible.filter((model) => DEFAULT_ELIGIBLE_MODEL_IDS.has(model.id))
|
|
78
|
+
.length,
|
|
79
|
+
).toBe(visible.length);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("declares contextLength on every entry whose blurb claims a long window", () => {
|
|
83
|
+
const longContextRegex =
|
|
84
|
+
/\b(?:128k|256k|long.*context|long-context|128 ?k tokens?)\b/i;
|
|
85
|
+
const offenders: string[] = [];
|
|
86
|
+
for (const model of MODEL_CATALOG) {
|
|
87
|
+
if (!longContextRegex.test(model.blurb)) continue;
|
|
88
|
+
if (
|
|
89
|
+
typeof model.contextLength !== "number" ||
|
|
90
|
+
model.contextLength < 65536
|
|
91
|
+
) {
|
|
92
|
+
offenders.push(
|
|
93
|
+
`${model.id} claims long context in blurb but contextLength=${String(model.contextLength)}`,
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
expect(offenders).toEqual([]);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("sets contextLength on every Eliza-1 tier per the tier matrix", () => {
|
|
101
|
+
const expected: Record<string, number> = {
|
|
102
|
+
"eliza-1-2b": 131072,
|
|
103
|
+
"eliza-1-4b": 131072,
|
|
104
|
+
"eliza-1-9b": 131072,
|
|
105
|
+
"eliza-1-27b": 131072,
|
|
106
|
+
"eliza-1-27b-256k": 262144,
|
|
107
|
+
};
|
|
108
|
+
for (const [id, expectedLength] of Object.entries(expected)) {
|
|
109
|
+
const model = findCatalogModel(id);
|
|
110
|
+
expect(model, `${id} missing from catalog`).toBeTruthy();
|
|
111
|
+
expect(model?.contextLength, `${id} contextLength mismatch`).toBe(
|
|
112
|
+
expectedLength,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("sets a tokenizerFamily on every chat/code/reasoning entry", () => {
|
|
118
|
+
const offenders: string[] = [];
|
|
119
|
+
for (const model of MODEL_CATALOG) {
|
|
120
|
+
if (!model.tokenizerFamily) {
|
|
121
|
+
offenders.push(model.id);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
expect(offenders).toEqual([]);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("declares native MTP on every Eliza-1 tier", () => {
|
|
128
|
+
for (const id of ELIZA_1_MTP_TIER_IDS) {
|
|
129
|
+
const model = findCatalogModel(id);
|
|
130
|
+
expect(model?.runtime?.mtp?.specType, `${id} mtp`).toBe("draft-mtp");
|
|
131
|
+
expect(model?.companionModelIds, `${id} companions`).toBeUndefined();
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
it("declares the mandatory local runtime contract for every default tier", () => {
|
|
136
|
+
const baseKernels = ["turbo3", "turbo4", "qjl_full", "polarquant"];
|
|
137
|
+
for (const id of ELIZA_1_TIER_IDS) {
|
|
138
|
+
const model = findCatalogModel(id);
|
|
139
|
+
expect(model?.runtime?.preferredBackend, `${id} backend`).toBe(
|
|
140
|
+
"llama-cpp",
|
|
141
|
+
);
|
|
142
|
+
for (const kernel of baseKernels) {
|
|
143
|
+
expect(
|
|
144
|
+
model?.runtime?.optimizations?.requiresKernel,
|
|
145
|
+
`${id} kernel ${kernel}`,
|
|
146
|
+
).toContain(kernel);
|
|
147
|
+
}
|
|
148
|
+
expect(model?.runtime?.mtp?.specType, `${id} mtp`).toBe("draft-mtp");
|
|
149
|
+
expect(model?.companionModelIds, `${id} companions`).toBeUndefined();
|
|
150
|
+
if ((model?.contextLength ?? 0) >= 65536) {
|
|
151
|
+
expect(model?.runtime?.optimizations?.requiresKernel).toContain(
|
|
152
|
+
"turbo3_tcq",
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
expect(model?.runtime?.optimizations?.requiresKernel).not.toContain(
|
|
156
|
+
"openvino",
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("does not publish external speculative drafter companions", () => {
|
|
162
|
+
const drafters = MODEL_CATALOG.filter((m) => m.companionModelIds?.length);
|
|
163
|
+
expect(drafters).toEqual([]);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("declares the text quantization matrix and voice boundary by tier", () => {
|
|
167
|
+
for (const id of ELIZA_1_TIER_IDS) {
|
|
168
|
+
const model = findCatalogModel(id);
|
|
169
|
+
expect(model?.quantization?.defaultVariantId).toBe("q4_k_m");
|
|
170
|
+
expect(model?.quantization?.variants.map((v) => v.id)).toEqual([
|
|
171
|
+
"q3_k_m",
|
|
172
|
+
"q4_k_m",
|
|
173
|
+
"q5_k_m",
|
|
174
|
+
"q6_k",
|
|
175
|
+
"q8_0",
|
|
176
|
+
]);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Mobile-class tiers (2b/4b) ship Kokoro only — it is smaller +
|
|
180
|
+
// faster and is the exclusive mobile TTS. 9B keeps OmniVoice first with
|
|
181
|
+
// Kokoro bundled; large tiers are OmniVoice-only.
|
|
182
|
+
// See catalog.ts ELIZA_1_VOICE_BACKENDS for the policy rationale.
|
|
183
|
+
expect(findCatalogModel("eliza-1-2b")?.voiceBackends).toEqual(["kokoro"]);
|
|
184
|
+
expect(findCatalogModel("eliza-1-4b")?.voiceBackends).toEqual(["kokoro"]);
|
|
185
|
+
expect(findCatalogModel("eliza-1-9b")?.voiceBackends).toEqual([
|
|
186
|
+
"omnivoice",
|
|
187
|
+
"kokoro",
|
|
188
|
+
]);
|
|
189
|
+
expect(findCatalogModel("eliza-1-27b")?.voiceBackends).toEqual([
|
|
190
|
+
"omnivoice",
|
|
191
|
+
]);
|
|
192
|
+
expect(findCatalogModel("eliza-1-27b-256k")?.voiceBackends).toEqual([
|
|
193
|
+
"omnivoice",
|
|
194
|
+
]);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it("does not leak implementation-family names in visible catalog copy", () => {
|
|
198
|
+
const banned = /\b(?:qwen|llama|turboquant|qjl|polarquant)\b/i;
|
|
199
|
+
for (const model of MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog)) {
|
|
200
|
+
expect(model.displayName).not.toMatch(banned);
|
|
201
|
+
expect(model.quant).not.toMatch(banned);
|
|
202
|
+
expect(model.blurb).not.toMatch(banned);
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
it("does not ship non-Eliza local model entries", () => {
|
|
207
|
+
const offenders: string[] = [];
|
|
208
|
+
for (const model of MODEL_CATALOG) {
|
|
209
|
+
if (!model.id.startsWith("eliza-1-")) {
|
|
210
|
+
offenders.push(model.id);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
expect(offenders).toEqual([]);
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it("keeps external HF search-shaped ids custom-only", () => {
|
|
217
|
+
const externalId = "hf:some-org/custom-model::model.Q4_K_M.gguf";
|
|
218
|
+
expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(externalId)).toBe(false);
|
|
219
|
+
expect(externalId.startsWith("eliza-1-")).toBe(false);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it("FIRST_RUN_DEFAULT_MODEL_ID resolves to a default-eligible Eliza-1 tier", () => {
|
|
223
|
+
const defaultModel = findCatalogModel(FIRST_RUN_DEFAULT_MODEL_ID);
|
|
224
|
+
expect(defaultModel, `${FIRST_RUN_DEFAULT_MODEL_ID} missing`).toBeTruthy();
|
|
225
|
+
expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(FIRST_RUN_DEFAULT_MODEL_ID)).toBe(
|
|
226
|
+
true,
|
|
227
|
+
);
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it("recommendForFirstRun resolves to a default-eligible Eliza-1 tier", () => {
|
|
231
|
+
const picked = recommendForFirstRun();
|
|
232
|
+
expect(picked).not.toBeNull();
|
|
233
|
+
if (!picked) throw new Error("missing first-run recommendation");
|
|
234
|
+
expect(picked.id).toBe(FIRST_RUN_DEFAULT_MODEL_ID);
|
|
235
|
+
expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(picked.id)).toBe(true);
|
|
236
|
+
expect(picked.displayName).toMatch(/^(?:Eliza-1\b|eliza-1-)/);
|
|
237
|
+
});
|
|
238
|
+
});
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local inference catalog re-exports.
|
|
3
|
+
*
|
|
4
|
+
* The canonical catalog (Eliza-1 tier ids, default-eligibility set,
|
|
5
|
+
* `MODEL_CATALOG`, HuggingFace URL builders) lives in
|
|
6
|
+
* `@elizaos/shared/local-inference`. This shim preserves the historical
|
|
7
|
+
* import path `./catalog` for server-side code.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export {
|
|
11
|
+
buildHuggingFaceResolveUrl,
|
|
12
|
+
buildHuggingFaceResolveUrlForPath,
|
|
13
|
+
DEFAULT_ELIGIBLE_MODEL_IDS,
|
|
14
|
+
ELIZA_1_HF_REPO,
|
|
15
|
+
ELIZA_1_MTP_TIER_IDS,
|
|
16
|
+
ELIZA_1_PLACEHOLDER_IDS,
|
|
17
|
+
ELIZA_1_RELEASE_TIER_IDS,
|
|
18
|
+
ELIZA_1_TIER_IDS,
|
|
19
|
+
ELIZA_1_TIER_PUBLISH_STATUS,
|
|
20
|
+
ELIZA_1_VISION_TIER_IDS,
|
|
21
|
+
type Eliza1TierId,
|
|
22
|
+
eliza1TierPublishStatus,
|
|
23
|
+
FIRST_RUN_DEFAULT_MODEL_ID,
|
|
24
|
+
findCatalogModel,
|
|
25
|
+
isDefaultEligibleId,
|
|
26
|
+
MODEL_CATALOG,
|
|
27
|
+
} from "@elizaos/shared";
|