@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import net from "node:net";
|
|
2
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
3
|
+
import { BionicHostLoader } from "./bionic-host-loader";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Real-IPC test: stand up an actual abstract-namespace AF_UNIX server (the same
|
|
7
|
+
* transport ElizaBionicInferenceServer.java binds on the device) and drive the
|
|
8
|
+
* loader against it. No mocks — this exercises the real node:net framing.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
function frame(json: string): Buffer {
|
|
12
|
+
const payload = Buffer.from(json, "utf8");
|
|
13
|
+
const out = Buffer.allocUnsafe(4 + payload.length);
|
|
14
|
+
out.writeUInt32BE(payload.length, 0);
|
|
15
|
+
payload.copy(out, 4);
|
|
16
|
+
return out;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** A test host that decodes one request frame and replies with `respond(req)`. */
|
|
20
|
+
function startHost(
|
|
21
|
+
name: string,
|
|
22
|
+
respond: (req: Record<string, unknown>) => string,
|
|
23
|
+
): net.Server {
|
|
24
|
+
const server = net.createServer((sock) => {
|
|
25
|
+
let buf = Buffer.alloc(0);
|
|
26
|
+
let expected = -1;
|
|
27
|
+
sock.on("data", (d) => {
|
|
28
|
+
buf = Buffer.concat([buf, d]);
|
|
29
|
+
if (expected < 0 && buf.length >= 4) expected = buf.readUInt32BE(0);
|
|
30
|
+
if (expected >= 0 && buf.length >= 4 + expected) {
|
|
31
|
+
const req = JSON.parse(buf.subarray(4, 4 + expected).toString("utf8"));
|
|
32
|
+
sock.write(frame(respond(req)));
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
server.listen({ path: `\0${name}` });
|
|
37
|
+
return server;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
let host: net.Server | null = null;
|
|
41
|
+
afterEach(() => {
|
|
42
|
+
host?.close();
|
|
43
|
+
host = null;
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const SOCK = `eliza-bionic-test-${process.pid}`;
|
|
47
|
+
|
|
48
|
+
describe("BionicHostLoader (real abstract-UDS)", () => {
|
|
49
|
+
it("round-trips a buffered generate and returns the host completion", async () => {
|
|
50
|
+
let seen: Record<string, unknown> | null = null;
|
|
51
|
+
host = startHost(SOCK, (req) => {
|
|
52
|
+
seen = req;
|
|
53
|
+
return JSON.stringify({
|
|
54
|
+
ok: true,
|
|
55
|
+
text: "Two plus two equals four.",
|
|
56
|
+
tokens: 7,
|
|
57
|
+
ms: 500,
|
|
58
|
+
tokS: 14,
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
const loader = new BionicHostLoader(SOCK);
|
|
62
|
+
await loader.loadModel({
|
|
63
|
+
modelPath: "/data/x/eliza-1/bundle/text/model.gguf",
|
|
64
|
+
});
|
|
65
|
+
expect(loader.currentModelPath()).toBe(
|
|
66
|
+
"/data/x/eliza-1/bundle/text/model.gguf",
|
|
67
|
+
);
|
|
68
|
+
const out = await loader.generate({
|
|
69
|
+
prompt: "what is 2+2?",
|
|
70
|
+
maxTokens: 32,
|
|
71
|
+
});
|
|
72
|
+
expect(out).toBe("Two plus two equals four.");
|
|
73
|
+
// bundleDir derived from the .../text/<model>.gguf layout.
|
|
74
|
+
expect(seen).toMatchObject({
|
|
75
|
+
op: "generate",
|
|
76
|
+
prompt: "what is 2+2?",
|
|
77
|
+
maxTokens: 32,
|
|
78
|
+
bundleDir: "/data/x/eliza-1/bundle",
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("forwards an empty bundleDir when the model is not in a text/ bundle", async () => {
|
|
83
|
+
let seen: Record<string, unknown> | null = null;
|
|
84
|
+
host = startHost(SOCK, (req) => {
|
|
85
|
+
seen = req;
|
|
86
|
+
return JSON.stringify({ ok: true, text: "hi" });
|
|
87
|
+
});
|
|
88
|
+
const loader = new BionicHostLoader(SOCK);
|
|
89
|
+
await loader.loadModel({ modelPath: "/models/flat-model.gguf" });
|
|
90
|
+
await loader.generate({ prompt: "hi" });
|
|
91
|
+
expect((seen as { bundleDir?: string } | null)?.bundleDir).toBe("");
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it("throws when the host returns ok:false", async () => {
|
|
95
|
+
host = startHost(SOCK, () =>
|
|
96
|
+
JSON.stringify({ ok: false, error: "no vulkan device" }),
|
|
97
|
+
);
|
|
98
|
+
const loader = new BionicHostLoader(SOCK);
|
|
99
|
+
await loader.loadModel({ modelPath: "/m/text/x.gguf" });
|
|
100
|
+
await expect(loader.generate({ prompt: "x" })).rejects.toThrow(
|
|
101
|
+
/no vulkan device/,
|
|
102
|
+
);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it("survives a response split across multiple data chunks (multibyte safe)", async () => {
|
|
106
|
+
const text = `héllo 🌊 ünïcode ${"x".repeat(5000)}`;
|
|
107
|
+
host = net.createServer((sock) => {
|
|
108
|
+
let buf = Buffer.alloc(0);
|
|
109
|
+
let expected = -1;
|
|
110
|
+
sock.on("data", (d) => {
|
|
111
|
+
buf = Buffer.concat([buf, d]);
|
|
112
|
+
if (expected < 0 && buf.length >= 4) expected = buf.readUInt32BE(0);
|
|
113
|
+
if (expected >= 0 && buf.length >= 4 + expected) {
|
|
114
|
+
const full = frame(JSON.stringify({ ok: true, text }));
|
|
115
|
+
// Write in two pieces, splitting mid-buffer to exercise reassembly.
|
|
116
|
+
sock.write(full.subarray(0, 10));
|
|
117
|
+
setTimeout(() => sock.write(full.subarray(10)), 5);
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
host.listen({ path: `\0${SOCK}` });
|
|
122
|
+
const loader = new BionicHostLoader(SOCK);
|
|
123
|
+
await loader.loadModel({ modelPath: "/m/text/x.gguf" });
|
|
124
|
+
const out = await loader.generate({ prompt: "x" });
|
|
125
|
+
expect(out).toBe(text);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it("rejects when the host is unreachable", async () => {
|
|
129
|
+
const loader = new BionicHostLoader(`eliza-bionic-absent-${process.pid}`);
|
|
130
|
+
await loader.loadModel({ modelPath: "/m/text/x.gguf" });
|
|
131
|
+
await expect(loader.generate({ prompt: "x" })).rejects.toThrow();
|
|
132
|
+
});
|
|
133
|
+
});
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BionicHostLoader — the agent-side half of the on-device GPU delegation path.
|
|
3
|
+
*
|
|
4
|
+
* On Android the elizaOS agent runs as embedded bun under the musl loader, whose
|
|
5
|
+
* restricted linker namespace cannot load the bionic Android Vulkan driver (its
|
|
6
|
+
* HIDL/HAL closure) — so the musl agent can only run inference on the CPU. The
|
|
7
|
+
* GPU is reachable only from the normal bionic `ai.elizaos.app` process, where
|
|
8
|
+
* `ElizaBionicInferenceServer` (Java) has loaded `libelizainference.so` +
|
|
9
|
+
* `libggml-vulkan.so` and offloads the model to the Mali GPU.
|
|
10
|
+
*
|
|
11
|
+
* This loader implements the standard {@link LocalInferenceLoader} contract, so
|
|
12
|
+
* the TEXT_SMALL / TEXT_LARGE handlers in `ensure-local-inference-handler.ts`
|
|
13
|
+
* route through it transparently. `generate()` sends the prompt to the bionic
|
|
14
|
+
* host over an abstract-namespace `AF_UNIX` socket and gets the GPU completion
|
|
15
|
+
* back — the whole decode loop runs server-side, so there is no per-token
|
|
16
|
+
* two-process round trip.
|
|
17
|
+
*
|
|
18
|
+
* This is the buffered first slice (one GENERATE request → one full completion).
|
|
19
|
+
* Server-push per-step streaming, embed, and cancel are layered on later via the
|
|
20
|
+
* shared `LlmStreamingBinding`; the wire framing already carries an `op`
|
|
21
|
+
* discriminator for that.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import net from "node:net";
|
|
25
|
+
import path from "node:path";
|
|
26
|
+
import { logger } from "@elizaos/core";
|
|
27
|
+
import type {
|
|
28
|
+
LocalInferenceLoadArgs,
|
|
29
|
+
LocalInferenceLoader,
|
|
30
|
+
} from "./active-model";
|
|
31
|
+
|
|
32
|
+
/** Connect + full round-trip budget. A cold GPU decode of a long reply fits. */
|
|
33
|
+
const REQUEST_TIMEOUT_MS = 120_000;
|
|
34
|
+
/** Defensive ceiling on a single response frame (a full completion). */
|
|
35
|
+
const MAX_FRAME_BYTES = 64 * 1024 * 1024;
|
|
36
|
+
|
|
37
|
+
interface BionicGenerateResponse {
|
|
38
|
+
ok: boolean;
|
|
39
|
+
text?: string;
|
|
40
|
+
error?: string;
|
|
41
|
+
tokens?: number;
|
|
42
|
+
ms?: number;
|
|
43
|
+
tokS?: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Derive the fused-bundle root from a model GGUF path. The host's
|
|
48
|
+
* `eliza_inference_create(bundleDir)` expects the directory that contains
|
|
49
|
+
* `text/<model>.gguf`; when the installed model is laid out that way we forward
|
|
50
|
+
* it, otherwise we send empty and let the host fall back to its default bundle.
|
|
51
|
+
*/
|
|
52
|
+
function deriveBundleDir(modelPath: string): string {
|
|
53
|
+
if (!modelPath) return "";
|
|
54
|
+
const dir = path.dirname(modelPath);
|
|
55
|
+
if (path.basename(dir) === "text") return path.dirname(dir);
|
|
56
|
+
return "";
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export class BionicHostLoader implements LocalInferenceLoader {
|
|
60
|
+
private modelPath: string | null = null;
|
|
61
|
+
private bundleDir = "";
|
|
62
|
+
|
|
63
|
+
/** @param socketName abstract-namespace socket name (no leading NUL). */
|
|
64
|
+
constructor(private readonly socketName: string) {}
|
|
65
|
+
|
|
66
|
+
async loadModel(args: LocalInferenceLoadArgs): Promise<void> {
|
|
67
|
+
this.modelPath = args.modelPath;
|
|
68
|
+
this.bundleDir = deriveBundleDir(args.modelPath);
|
|
69
|
+
logger.info(
|
|
70
|
+
`[BionicHostLoader] active model ${args.modelPath} (bundle ${this.bundleDir || "<host-default>"})`,
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async unloadModel(): Promise<void> {
|
|
75
|
+
this.modelPath = null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
currentModelPath(): string | null {
|
|
79
|
+
return this.modelPath;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async generate(args: {
|
|
83
|
+
prompt: string;
|
|
84
|
+
stopSequences?: string[];
|
|
85
|
+
maxTokens?: number;
|
|
86
|
+
temperature?: number;
|
|
87
|
+
cacheKey?: string;
|
|
88
|
+
}): Promise<string> {
|
|
89
|
+
const res = await this.roundTrip<BionicGenerateResponse>({
|
|
90
|
+
op: "generate",
|
|
91
|
+
bundleDir: this.bundleDir,
|
|
92
|
+
prompt: args.prompt,
|
|
93
|
+
maxTokens: args.maxTokens ?? 256,
|
|
94
|
+
temperature: args.temperature ?? 0,
|
|
95
|
+
});
|
|
96
|
+
if (!res.ok) {
|
|
97
|
+
throw new Error(
|
|
98
|
+
`[BionicHostLoader] host generate failed: ${res.error ?? "unknown error"}`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
if (typeof res.tokS === "number") {
|
|
102
|
+
logger.debug(
|
|
103
|
+
`[BionicHostLoader] generated ${res.tokens ?? "?"} tok @ ${res.tokS.toFixed(1)} tok/s on the bionic GPU host`,
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
return res.text ?? "";
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* One request → one response over a fresh connection. Length-prefixed frames:
|
|
111
|
+
* `[int32 BE byte length][UTF-8 JSON]` in each direction.
|
|
112
|
+
*/
|
|
113
|
+
private roundTrip<T>(request: Record<string, unknown>): Promise<T> {
|
|
114
|
+
const payload = Buffer.from(JSON.stringify(request), "utf8");
|
|
115
|
+
const frame = Buffer.allocUnsafe(4 + payload.length);
|
|
116
|
+
frame.writeUInt32BE(payload.length, 0);
|
|
117
|
+
payload.copy(frame, 4);
|
|
118
|
+
|
|
119
|
+
return new Promise<T>((resolve, reject) => {
|
|
120
|
+
// Abstract-namespace socket: a leading NUL byte in the path.
|
|
121
|
+
const sock = net.connect({ path: `\0${this.socketName}` });
|
|
122
|
+
let settled = false;
|
|
123
|
+
let chunks: Buffer = Buffer.alloc(0);
|
|
124
|
+
let expected = -1;
|
|
125
|
+
|
|
126
|
+
const finish = (err: Error | null, value?: T) => {
|
|
127
|
+
if (settled) return;
|
|
128
|
+
settled = true;
|
|
129
|
+
clearTimeout(timer);
|
|
130
|
+
sock.destroy();
|
|
131
|
+
if (err) reject(err);
|
|
132
|
+
else resolve(value as T);
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const timer = setTimeout(
|
|
136
|
+
() => finish(new Error("[BionicHostLoader] request timed out")),
|
|
137
|
+
REQUEST_TIMEOUT_MS,
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
sock.on("connect", () => sock.write(frame));
|
|
141
|
+
sock.on("data", (d: Buffer) => {
|
|
142
|
+
chunks = Buffer.concat([chunks, d]);
|
|
143
|
+
if (expected < 0 && chunks.length >= 4) {
|
|
144
|
+
expected = chunks.readUInt32BE(0);
|
|
145
|
+
if (expected < 0 || expected > MAX_FRAME_BYTES) {
|
|
146
|
+
finish(
|
|
147
|
+
new Error(
|
|
148
|
+
`[BionicHostLoader] bad response frame length ${expected}`,
|
|
149
|
+
),
|
|
150
|
+
);
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (expected >= 0 && chunks.length >= 4 + expected) {
|
|
155
|
+
const json = chunks.subarray(4, 4 + expected).toString("utf8");
|
|
156
|
+
try {
|
|
157
|
+
finish(null, JSON.parse(json) as T);
|
|
158
|
+
} catch (e) {
|
|
159
|
+
finish(
|
|
160
|
+
new Error(
|
|
161
|
+
`[BionicHostLoader] malformed response: ${e instanceof Error ? e.message : String(e)}`,
|
|
162
|
+
),
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
sock.on("error", (e: Error) =>
|
|
168
|
+
finish(new Error(`[BionicHostLoader] socket error: ${e.message}`)),
|
|
169
|
+
);
|
|
170
|
+
sock.on("close", () => {
|
|
171
|
+
if (!settled)
|
|
172
|
+
finish(
|
|
173
|
+
new Error(
|
|
174
|
+
"[BionicHostLoader] host closed the connection before responding",
|
|
175
|
+
),
|
|
176
|
+
);
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bundled-models bootstrap for AOSP / on-device installs.
|
|
3
|
+
*
|
|
4
|
+
* The AOSP build pipeline stages Eliza-1 models into the APK at
|
|
5
|
+
* `assets/agent/models/{file}.gguf` plus a
|
|
6
|
+
* `manifest.json` describing each one (id, role, sha256, sizeBytes).
|
|
7
|
+
* `ElizaAgentService.extractAssetsIfNeeded()` copies those files into
|
|
8
|
+
* `$ELIZA_STATE_DIR/local-inference/models/` on first launch.
|
|
9
|
+
*
|
|
10
|
+
* This module reads the manifest at runtime startup and registers each
|
|
11
|
+
* file as a eliza-owned model in the local-inference registry, so the
|
|
12
|
+
* auto-assign pass picks them up for TEXT_LARGE / TEXT_SMALL /
|
|
13
|
+
* TEXT_EMBEDDING slots without needing the user to download anything.
|
|
14
|
+
*
|
|
15
|
+
* Idempotent: re-running with the registry already populated is a
|
|
16
|
+
* no-op for unchanged entries (`upsertElizaModel` overwrites entries
|
|
17
|
+
* with the same id, so updated sha256s on a later re-bundle replace
|
|
18
|
+
* the old metadata cleanly).
|
|
19
|
+
*
|
|
20
|
+
* Source classification: the runtime treats bundled models as
|
|
21
|
+
* `source: "eliza-download"` because Eliza ships the file and Eliza
|
|
22
|
+
* owns it on disk — same lifecycle as a user-initiated download
|
|
23
|
+
* (uninstall removes the file, the registry tracks the install). The
|
|
24
|
+
* only difference is the file arrived via APK extraction rather than
|
|
25
|
+
* an HTTP transfer.
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* Walk the manifest and register every bundled GGUF file in the
|
|
29
|
+
* local-inference registry. Returns the number of entries successfully
|
|
30
|
+
* registered. A missing manifest is normal on Capacitor / desktop /
|
|
31
|
+
* non-AOSP installs and returns 0 silently.
|
|
32
|
+
*/
|
|
33
|
+
export declare function registerBundledModels(): Promise<number>;
|
|
34
|
+
//# sourceMappingURL=bundled-models.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bundled-models.d.ts","sourceRoot":"","sources":["bundled-models.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AA2DH;;;;;GAKG;AACH,wBAAsB,qBAAqB,IAAI,OAAO,CAAC,MAAM,CAAC,CAsC7D"}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bundled-models bootstrap for AOSP / on-device installs.
|
|
3
|
+
*
|
|
4
|
+
* The AOSP build pipeline stages Eliza-1 models into the APK at
|
|
5
|
+
* `assets/agent/models/{file}.gguf` plus a
|
|
6
|
+
* `manifest.json` describing each one (id, role, sha256, sizeBytes).
|
|
7
|
+
* `ElizaAgentService.extractAssetsIfNeeded()` copies those files into
|
|
8
|
+
* `$ELIZA_STATE_DIR/local-inference/models/` on first launch.
|
|
9
|
+
*
|
|
10
|
+
* This module reads the manifest at runtime startup and registers each
|
|
11
|
+
* file as a eliza-owned model in the local-inference registry, so the
|
|
12
|
+
* auto-assign pass picks them up for TEXT_LARGE / TEXT_SMALL /
|
|
13
|
+
* TEXT_EMBEDDING slots without needing the user to download anything.
|
|
14
|
+
*
|
|
15
|
+
* Idempotent: re-running with the registry already populated is a
|
|
16
|
+
* no-op for unchanged entries (`upsertElizaModel` overwrites entries
|
|
17
|
+
* with the same id, so updated sha256s on a later re-bundle replace
|
|
18
|
+
* the old metadata cleanly).
|
|
19
|
+
*
|
|
20
|
+
* Source classification: the runtime treats bundled models as
|
|
21
|
+
* `source: "eliza-download"` because Eliza ships the file and Eliza
|
|
22
|
+
* owns it on disk — same lifecycle as a user-initiated download
|
|
23
|
+
* (uninstall removes the file, the registry tracks the install). The
|
|
24
|
+
* only difference is the file arrived via APK extraction rather than
|
|
25
|
+
* an HTTP transfer.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import fs from "node:fs/promises";
|
|
29
|
+
import path from "node:path";
|
|
30
|
+
import {
|
|
31
|
+
ensureDefaultAssignment,
|
|
32
|
+
readAssignments,
|
|
33
|
+
writeAssignments,
|
|
34
|
+
} from "./assignments";
|
|
35
|
+
import { elizaModelsDir } from "./paths";
|
|
36
|
+
import { upsertElizaModel } from "./registry";
|
|
37
|
+
import type { InstalledModel } from "./types";
|
|
38
|
+
|
|
39
|
+
interface BundledModelEntry {
|
|
40
|
+
id: string;
|
|
41
|
+
displayName: string;
|
|
42
|
+
hfRepo: string;
|
|
43
|
+
ggufFile: string;
|
|
44
|
+
role: "chat" | "embedding";
|
|
45
|
+
sizeBytes: number;
|
|
46
|
+
sha256: string | null;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
interface BundledModelManifest {
|
|
50
|
+
version: 1;
|
|
51
|
+
models: BundledModelEntry[];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function manifestPath(): string {
|
|
55
|
+
return path.join(elizaModelsDir(), "manifest.json");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function readManifest(): Promise<BundledModelManifest | null> {
|
|
59
|
+
try {
|
|
60
|
+
const raw = await fs.readFile(manifestPath(), "utf8");
|
|
61
|
+
const parsed = JSON.parse(raw) as BundledModelManifest;
|
|
62
|
+
if (parsed.version !== 1 || !Array.isArray(parsed.models)) {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
return parsed;
|
|
66
|
+
} catch {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function ensureBundledAssignment(
|
|
72
|
+
modelId: string,
|
|
73
|
+
role: BundledModelEntry["role"],
|
|
74
|
+
): Promise<void> {
|
|
75
|
+
if (role !== "embedding") {
|
|
76
|
+
await ensureDefaultAssignment(modelId);
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const current = await readAssignments();
|
|
81
|
+
if (current.TEXT_EMBEDDING) return;
|
|
82
|
+
await writeAssignments({ ...current, TEXT_EMBEDDING: modelId });
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Walk the manifest and register every bundled GGUF file in the
|
|
87
|
+
* local-inference registry. Returns the number of entries successfully
|
|
88
|
+
* registered. A missing manifest is normal on Capacitor / desktop /
|
|
89
|
+
* non-AOSP installs and returns 0 silently.
|
|
90
|
+
*/
|
|
91
|
+
export async function registerBundledModels(): Promise<number> {
|
|
92
|
+
const manifest = await readManifest();
|
|
93
|
+
if (!manifest) return 0;
|
|
94
|
+
const dir = elizaModelsDir();
|
|
95
|
+
let registered = 0;
|
|
96
|
+
for (const entry of manifest.models) {
|
|
97
|
+
const filePath = path.join(dir, entry.ggufFile);
|
|
98
|
+
let sizeBytes = entry.sizeBytes;
|
|
99
|
+
try {
|
|
100
|
+
const stat = await fs.stat(filePath);
|
|
101
|
+
sizeBytes = stat.size;
|
|
102
|
+
} catch {
|
|
103
|
+
// File didn't extract — manifest references something the APK
|
|
104
|
+
// didn't ship. Skip this entry rather than registering a broken
|
|
105
|
+
// path. AOSP build's stage-default-models.mjs is the source of
|
|
106
|
+
// truth; if a file is missing the build is broken upstream.
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
const installed: InstalledModel = {
|
|
110
|
+
id: entry.id,
|
|
111
|
+
displayName: entry.displayName,
|
|
112
|
+
path: filePath,
|
|
113
|
+
sizeBytes,
|
|
114
|
+
hfRepo: entry.hfRepo,
|
|
115
|
+
installedAt: new Date().toISOString(),
|
|
116
|
+
lastUsedAt: null,
|
|
117
|
+
source: "eliza-download",
|
|
118
|
+
sha256: entry.sha256 ?? undefined,
|
|
119
|
+
};
|
|
120
|
+
await upsertElizaModel(installed);
|
|
121
|
+
// Auto-assign each bundled model to its manifest role if the user
|
|
122
|
+
// hasn't already assigned that slot. This keeps Eliza-1 chat models
|
|
123
|
+
// eligible for TEXT_EMBEDDING when the bundle explicitly marks them
|
|
124
|
+
// as the local embedding bootstrap model.
|
|
125
|
+
await ensureBundledAssignment(entry.id, entry.role);
|
|
126
|
+
registered += 1;
|
|
127
|
+
}
|
|
128
|
+
return registered;
|
|
129
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cache bridge for the local-inference path.
|
|
3
|
+
*
|
|
4
|
+
* Translates the runtime's `ProviderCachePlan` (a provider-neutral cache
|
|
5
|
+
* plan emitted by `@elizaos/core`'s `buildProviderCachePlan`) into
|
|
6
|
+
* concrete behaviour for the two local backends:
|
|
7
|
+
*
|
|
8
|
+
* 1. Out-of-process llama-server (MTP / buun-llama-cpp): stable
|
|
9
|
+
* slot-id derivation + on-disk slot KV save/restore directory layout
|
|
10
|
+
* + TTL-based eviction by mtime.
|
|
11
|
+
* 2. In-process node-llama-cpp: a session pool (see
|
|
12
|
+
* `session-pool.ts`) keyed by `promptCacheKey`.
|
|
13
|
+
*
|
|
14
|
+
* This module is pure logic — no llama-server process management, no
|
|
15
|
+
* node-llama-cpp imports. All filesystem state is rooted under
|
|
16
|
+
* `local-inference/llama-cache/` so cleanup is easy and explicit.
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* TTLs for cached prefix data, mirroring the cloud-side semantics:
|
|
20
|
+
* - `short`: roughly the "default" Anthropic ephemeral cache window.
|
|
21
|
+
* - `long`: roughly the "1h" Anthropic ephemeral cache window.
|
|
22
|
+
* - `extended`: the OpenAI 24h prompt-cache retention window.
|
|
23
|
+
*
|
|
24
|
+
* Values are in milliseconds. Eviction uses file mtime, not access time,
|
|
25
|
+
* so a slot that is read repeatedly without being rewritten still ages
|
|
26
|
+
* out — which matches how llama-server writes the slot file each turn.
|
|
27
|
+
*/
|
|
28
|
+
export interface CacheTtls {
|
|
29
|
+
short: number;
|
|
30
|
+
long: number;
|
|
31
|
+
extended?: number;
|
|
32
|
+
}
|
|
33
|
+
export declare const DEFAULT_CACHE_TTLS: CacheTtls;
|
|
34
|
+
/**
|
|
35
|
+
* Root directory for all local llama-cache state. Anything inside is
|
|
36
|
+
* Eliza-owned and safe to delete to reset the cache.
|
|
37
|
+
*/
|
|
38
|
+
export declare function llamaCacheRoot(): string;
|
|
39
|
+
/**
|
|
40
|
+
* Per-model-hash cache directory. Slot save files for one model never
|
|
41
|
+
* collide with another model's; switching active model does not need to
|
|
42
|
+
* wipe the cache.
|
|
43
|
+
*/
|
|
44
|
+
export declare function cacheRoot(modelHash: string): string;
|
|
45
|
+
/**
|
|
46
|
+
* llama-server `--slot-save-path` argument: the directory llama-server
|
|
47
|
+
* writes per-slot KV state into when a request includes
|
|
48
|
+
* `cache_prompt: true`. One directory per model hash.
|
|
49
|
+
*/
|
|
50
|
+
export declare function slotSavePath(modelHash: string): string;
|
|
51
|
+
/**
|
|
52
|
+
* Stable model-fingerprint hash. Combines the absolute paths of the
|
|
53
|
+
* target / drafter GGUFs and the cache-type knobs so two distinct
|
|
54
|
+
* configurations don't share a slot directory.
|
|
55
|
+
*/
|
|
56
|
+
export declare function buildModelHash(input: {
|
|
57
|
+
targetModelPath: string;
|
|
58
|
+
drafterModelPath?: string | null;
|
|
59
|
+
cacheTypeK?: string | null;
|
|
60
|
+
cacheTypeV?: string | null;
|
|
61
|
+
/** Optional extra discriminator (context size, parallel, etc.). */
|
|
62
|
+
extra?: string | null;
|
|
63
|
+
}): string;
|
|
64
|
+
/**
|
|
65
|
+
* Map a `promptCacheKey` to a llama-server slot id in [0, parallel).
|
|
66
|
+
*
|
|
67
|
+
* llama-server's `--parallel N` flag pre-allocates N decoding slots and
|
|
68
|
+
* accepts a `slot_id` integer in `[0, N-1]` on each request. By hashing
|
|
69
|
+
* the cache key into that range we get:
|
|
70
|
+
*
|
|
71
|
+
* - The same prefix hash always lands on the same slot, so the in-RAM
|
|
72
|
+
* KV cache from the previous turn is reused.
|
|
73
|
+
* - Different prefix hashes spread across slots and don't fight for
|
|
74
|
+
* the same KV memory.
|
|
75
|
+
*
|
|
76
|
+
* Pass `parallel <= 0` to disable slot pinning (returns -1, the
|
|
77
|
+
* llama-server "any free slot" sentinel).
|
|
78
|
+
*/
|
|
79
|
+
export declare function deriveSlotId(promptCacheKey: string, parallel: number): number;
|
|
80
|
+
/**
|
|
81
|
+
* Convert the runtime-side `CacheTTL` enum + OpenAI extended retention
|
|
82
|
+
* hint into a concrete TTL in milliseconds. This is what the eviction
|
|
83
|
+
* sweep uses when deciding whether a slot file is still live.
|
|
84
|
+
*/
|
|
85
|
+
export declare function ttlMsForKey(ttl: "short" | "long" | "extended" | undefined, ttls?: CacheTtls): number;
|
|
86
|
+
/** TTL classes that can be encoded into a slot `.bin` filename. */
|
|
87
|
+
export type SlotCacheTtlClass = "short" | "long" | "extended";
|
|
88
|
+
/**
|
|
89
|
+
* Build the basename for a persisted slot/conversation `.bin` file with
|
|
90
|
+
* its TTL class encoded as a middle component: `<base>.<ttl>.bin`. The
|
|
91
|
+
* eviction sweep reads that component back via `parseSlotCacheTtlClass`
|
|
92
|
+
* so a slot persisted with the long retention window isn't deleted on
|
|
93
|
+
* the short horizon (and vice versa). Pass `"long"` for cross-restart
|
|
94
|
+
* conversation KV — that matches the prior global (long-only) behaviour
|
|
95
|
+
* for those files.
|
|
96
|
+
*/
|
|
97
|
+
export declare function slotCacheFileName(base: string, ttl: SlotCacheTtlClass): string;
|
|
98
|
+
/**
|
|
99
|
+
* Parse the TTL class encoded into a slot `.bin` filename by
|
|
100
|
+
* `slotCacheFileName`. Returns `undefined` for legacy / hand-written
|
|
101
|
+
* filenames without an encoded class — those keep the `long` horizon
|
|
102
|
+
* (the prior global behaviour for persisted slot files).
|
|
103
|
+
*/
|
|
104
|
+
export declare function parseSlotCacheTtlClass(fileName: string): SlotCacheTtlClass | undefined;
|
|
105
|
+
/**
|
|
106
|
+
* Sweep the slot-save directory and delete files older than their
|
|
107
|
+
* per-file TTL horizon. The TTL class is read from the filename
|
|
108
|
+
* (`<base>.<ttl>.bin` — see `slotCacheFileName`); files without an
|
|
109
|
+
* encoded class use the `long` horizon (the prior global behaviour).
|
|
110
|
+
* Mtime is the watermark; llama-server rewrites the slot file on every
|
|
111
|
+
* save, so a slot that's actively used keeps a fresh mtime.
|
|
112
|
+
*
|
|
113
|
+
* Returns the number of files deleted. Missing directories are not
|
|
114
|
+
* errors — eviction on a clean install just no-ops.
|
|
115
|
+
*/
|
|
116
|
+
export declare function evictExpired(rootDir: string, ttls?: CacheTtls, now?: number): Promise<number>;
|
|
117
|
+
export interface CacheStatsEntry {
|
|
118
|
+
file: string;
|
|
119
|
+
sizeBytes: number;
|
|
120
|
+
mtimeMs: number;
|
|
121
|
+
ageMs: number;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Snapshot of the on-disk slot-save directory. Used by the public
|
|
125
|
+
* `getLocalCacheStats()` debugging endpoint.
|
|
126
|
+
*/
|
|
127
|
+
export declare function readCacheStats(rootDir: string, now?: number): Promise<CacheStatsEntry[]>;
|
|
128
|
+
/**
|
|
129
|
+
* Resolve `promptCacheKey` from a `providerOptions` payload as emitted
|
|
130
|
+
* by `buildProviderCachePlan`. The runtime stuffs it under
|
|
131
|
+
* `providerOptions.eliza.promptCacheKey`. Returns `null` when the key is
|
|
132
|
+
* missing or not a non-empty string — callers fall back to the default
|
|
133
|
+
* "_default" session in that case.
|
|
134
|
+
*/
|
|
135
|
+
export declare function extractPromptCacheKey(providerOptions: unknown): string | null;
|
|
136
|
+
/**
|
|
137
|
+
* Resolve `prefixHash` from `providerOptions.eliza.prefixHash`. Mirrors
|
|
138
|
+
* `extractPromptCacheKey` — returns null when missing or not a non-empty
|
|
139
|
+
* string. The prefix hash covers ONLY the stable prompt prefix (system
|
|
140
|
+
* prompt + tool definitions + large constant context), so a runtime
|
|
141
|
+
* timestamp in the unstable tail does not invalidate it.
|
|
142
|
+
*
|
|
143
|
+
* Local backends prefer this over `promptCacheKey` when available because
|
|
144
|
+
* it gives the strongest "same prefix → same slot" guarantee: two
|
|
145
|
+
* conversations with byte-identical stable prefixes will land on the same
|
|
146
|
+
* slot regardless of how their tail content differs.
|
|
147
|
+
*/
|
|
148
|
+
export declare function extractPrefixHash(providerOptions: unknown): string | null;
|
|
149
|
+
/**
|
|
150
|
+
* Stable annotation describing a single segment of the prompt as it was
|
|
151
|
+
* emitted by the runtime planner. The cache-bridge consumes this to
|
|
152
|
+
* compute a stable-prefix-only hash for slot pinning, without having to
|
|
153
|
+
* look at the (timestamp-laden) tail.
|
|
154
|
+
*
|
|
155
|
+
* Mirrors `PromptSegment` in @elizaos/core/src/types/model.ts but is kept
|
|
156
|
+
* standalone so the cache-bridge can be imported by the local-inference
|
|
157
|
+
* backends without a hard dep on `@elizaos/core`.
|
|
158
|
+
*/
|
|
159
|
+
export interface AnnotatedPromptSegment {
|
|
160
|
+
content: string;
|
|
161
|
+
stable: boolean;
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Hash the longest stable prefix of `segments`. Stops at the first
|
|
165
|
+
* unstable segment, so a runtime timestamp in the unstable tail never
|
|
166
|
+
* shifts the hash. Returns `null` when no stable segment exists, signaling
|
|
167
|
+
* to the caller that prefix-cache reuse cannot be derived purely from the
|
|
168
|
+
* prompt structure (fall back to the prompt-cache-key path instead).
|
|
169
|
+
*
|
|
170
|
+
* The hash is sha256-truncated to 16 hex chars, matching `buildModelHash`
|
|
171
|
+
* — short enough for log lines, wide enough that collision is not a
|
|
172
|
+
* realistic concern for any plausible number of concurrent prefixes.
|
|
173
|
+
*/
|
|
174
|
+
export declare function hashStablePrefix(segments: readonly AnnotatedPromptSegment[]): string | null;
|
|
175
|
+
/**
|
|
176
|
+
* Extract the per-segment stable annotations from a `providerOptions`
|
|
177
|
+
* payload. The runtime emits these as `providerOptions.eliza.promptSegments`
|
|
178
|
+
* when a structured prompt is available — local backends use it to compute
|
|
179
|
+
* `hashStablePrefix` directly, without having to re-parse the prompt text.
|
|
180
|
+
*
|
|
181
|
+
* Returns `null` when the field is absent or malformed; callers fall back
|
|
182
|
+
* to `extractPromptCacheKey` / `extractPrefixHash`.
|
|
183
|
+
*/
|
|
184
|
+
export declare function extractAnnotatedSegments(providerOptions: unknown): AnnotatedPromptSegment[] | null;
|
|
185
|
+
/**
|
|
186
|
+
* Resolve the conversation handle id from a `providerOptions` payload.
|
|
187
|
+
* The runtime stuffs it under `providerOptions.eliza.conversationId` when
|
|
188
|
+
* the calling context represents a long-lived conversation (chat handler,
|
|
189
|
+
* planner loop). When present, local backends should use it as the
|
|
190
|
+
* primary slot key — it's stable across turns regardless of prompt
|
|
191
|
+
* content drift, which gives the strongest possible cache reuse for
|
|
192
|
+
* agentic loops.
|
|
193
|
+
*/
|
|
194
|
+
export declare function extractConversationId(providerOptions: unknown): string | null;
|
|
195
|
+
/**
|
|
196
|
+
* Resolve the stable per-call cache key for the local backends. Order of
|
|
197
|
+
* precedence:
|
|
198
|
+
* 1. Conversation id — strongest signal, identical across turns.
|
|
199
|
+
* 2. Annotated stable-prefix hash — survives unstable-tail drift.
|
|
200
|
+
* 3. `prefixHash` from the runtime cache plan — already stable-only via
|
|
201
|
+
* `cachePrefixSegments` upstream.
|
|
202
|
+
* 4. `promptCacheKey` (`v5:<prefixHash>`) — back-compat fallback.
|
|
203
|
+
* Returns null when none are available.
|
|
204
|
+
*/
|
|
205
|
+
export declare function resolveLocalCacheKey(providerOptions: unknown): string | null;
|
|
206
|
+
//# sourceMappingURL=cache-bridge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache-bridge.d.ts","sourceRoot":"","sources":["cache-bridge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAOH;;;;;;;;;GASG;AACH,MAAM,WAAW,SAAS;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,eAAO,MAAM,kBAAkB,EAAE,SAIhC,CAAC;AAEF;;;GAGG;AACH,wBAAgB,cAAc,IAAI,MAAM,CAEvC;AAED;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAKnD;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEtD;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE;IACrC,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,mEAAmE;IACnE,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACtB,GAAG,MAAM,CAYT;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,YAAY,CAAC,cAAc,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAU7E;AAED;;;;GAIG;AACH,wBAAgB,WAAW,CAC1B,GAAG,EAAE,OAAO,GAAG,MAAM,GAAG,UAAU,GAAG,SAAS,EAC9C,IAAI,GAAE,SAA8B,GAClC,MAAM,CAIR;AAED,mEAAmE;AACnE,MAAM,MAAM,iBAAiB,GAAG,OAAO,GAAG,MAAM,GAAG,UAAU,CAAC;AAE9D;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,CAChC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,iBAAiB,GACpB,MAAM,CAER;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACrC,QAAQ,EAAE,MAAM,GACd,iBAAiB,GAAG,SAAS,CAgB/B;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,YAAY,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,GAAE,SAA8B,EACpC,GAAG,GAAE,MAAmB,GACtB,OAAO,CAAC,MAAM,CAAC,CA8BjB;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;CACd;AAED;;;GAGG;AACH,wBAAsB,cAAc,CACnC,OAAO,EAAE,MAAM,EACf,GAAG,GAAE,MAAmB,GACtB,OAAO,CAAC,eAAe,EAAE,CAAC,CA2B5B;AAED;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,eAAe,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI,CAO7E;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAAC,eAAe,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI,CAOzE;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,sBAAsB;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;CAChB;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,gBAAgB,CAC/B,QAAQ,EAAE,SAAS,sBAAsB,EAAE,GACzC,MAAM,GAAG,IAAI,CAYf;AAED;;;;;;;;GAQG;AACH,wBAAgB,wBAAwB,CACvC,eAAe,EAAE,OAAO,GACtB,sBAAsB,EAAE,GAAG,IAAI,CAejC;AAED;;;;;;;;GAQG;AACH,wBAAgB,qBAAqB,CAAC,eAAe,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI,CAO7E;AAED;;;;;;;;;GASG;AACH,wBAAgB,oBAAoB,CAAC,eAAe,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI,CAW5E"}
|