@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public facade for the local-inference service.
|
|
3
|
+
*
|
|
4
|
+
* Single entry point used by the API routes, settings UI, and orchestration
|
|
5
|
+
* code. Holds singleton instances of the downloader
|
|
6
|
+
* and active-model coordinator so subscribers receive the same event
|
|
7
|
+
* stream across the process.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { existsSync } from "node:fs";
|
|
11
|
+
import { totalmem } from "node:os";
|
|
12
|
+
import { join as pathJoin } from "node:path";
|
|
13
|
+
import {
|
|
14
|
+
type AgentRuntime,
|
|
15
|
+
logger,
|
|
16
|
+
renderMessageHandlerStablePrefix,
|
|
17
|
+
type UUID,
|
|
18
|
+
} from "@elizaos/core";
|
|
19
|
+
import {
|
|
20
|
+
ActiveModelCoordinator,
|
|
21
|
+
type LocalInferenceLoadOverrides,
|
|
22
|
+
} from "./active-model";
|
|
23
|
+
import { readEffectiveAssignments, setAssignment } from "./assignments";
|
|
24
|
+
import { registerBundledModels } from "./bundled-models";
|
|
25
|
+
import { MODEL_CATALOG } from "./catalog";
|
|
26
|
+
import { Downloader } from "./downloader";
|
|
27
|
+
import { localInferenceEngine } from "./engine";
|
|
28
|
+
import { probeHardware } from "./hardware";
|
|
29
|
+
import { searchHuggingFaceGguf, searchModelHubGguf } from "./hf-search";
|
|
30
|
+
import {
|
|
31
|
+
createImageGenCapabilityRegistration,
|
|
32
|
+
type ImageGenBackend,
|
|
33
|
+
type ImageGenLoadArgs,
|
|
34
|
+
loadAospImageGenBackend,
|
|
35
|
+
loadCoreMlImageGenBackend,
|
|
36
|
+
loadMfluxImageGenBackend,
|
|
37
|
+
loadSdCppImageGenBackend,
|
|
38
|
+
loadTensorRtImageGenBackend,
|
|
39
|
+
resolveDefaultImageGenModel,
|
|
40
|
+
selectImageGenBackends,
|
|
41
|
+
} from "./imagegen";
|
|
42
|
+
import { isImageGenUnavailable } from "./imagegen/errors";
|
|
43
|
+
import {
|
|
44
|
+
MemoryArbiter,
|
|
45
|
+
setMemoryArbiter,
|
|
46
|
+
tryGetMemoryArbiter,
|
|
47
|
+
} from "./memory-arbiter";
|
|
48
|
+
import {
|
|
49
|
+
capacitorPressureSource,
|
|
50
|
+
compositePressureSource,
|
|
51
|
+
type MemoryPressureSource,
|
|
52
|
+
nodeOsPressureSource,
|
|
53
|
+
} from "./memory-pressure";
|
|
54
|
+
import { ramHeadroomReserveMb } from "./ram-budget";
|
|
55
|
+
import { buildTextGenerationReadiness } from "./readiness";
|
|
56
|
+
import {
|
|
57
|
+
chooseSmallerFallbackModel,
|
|
58
|
+
type RecommendedModelSelection,
|
|
59
|
+
selectRecommendedModelForSlot,
|
|
60
|
+
selectRecommendedModels,
|
|
61
|
+
} from "./recommendation";
|
|
62
|
+
import {
|
|
63
|
+
listInstalledModels,
|
|
64
|
+
removeElizaModel,
|
|
65
|
+
upsertElizaModel,
|
|
66
|
+
} from "./registry";
|
|
67
|
+
import {
|
|
68
|
+
type RoutingPreferences,
|
|
69
|
+
readRoutingPreferences,
|
|
70
|
+
writeRoutingPreferences,
|
|
71
|
+
} from "./routing-preferences";
|
|
72
|
+
import type {
|
|
73
|
+
ActiveModelState,
|
|
74
|
+
AgentModelSlot,
|
|
75
|
+
CatalogModel,
|
|
76
|
+
DownloadEvent,
|
|
77
|
+
DownloadJob,
|
|
78
|
+
HardwareProbe,
|
|
79
|
+
LocalInferenceReadiness,
|
|
80
|
+
ModelAssignments,
|
|
81
|
+
ModelHubSnapshot,
|
|
82
|
+
TextGenerationSlot,
|
|
83
|
+
} from "./types";
|
|
84
|
+
import { type VerifyResult, verifyInstalledModel } from "./verify";
|
|
85
|
+
import { verifyBundleOnDevice } from "./verify-on-device";
|
|
86
|
+
import { createVisionCapabilityRegistration } from "./vision";
|
|
87
|
+
import type {
|
|
88
|
+
VisionDescribeBackend,
|
|
89
|
+
VisionDescribeRequest,
|
|
90
|
+
VisionDescribeResult,
|
|
91
|
+
} from "./vision/types";
|
|
92
|
+
import { prewarmLocalVoiceStackForModel } from "./voice-prewarm";
|
|
93
|
+
|
|
94
|
+
const SYSTEM_PREFIX_CONVERSATION_ID = "__system_prefix__";
|
|
95
|
+
const LOCAL_INFERENCE_PROVIDER_ID = "eliza-local-inference";
|
|
96
|
+
const ACTIVATED_TEXT_ROUTING_SLOTS: AgentModelSlot[] = [
|
|
97
|
+
"TEXT_SMALL",
|
|
98
|
+
"TEXT_LARGE",
|
|
99
|
+
];
|
|
100
|
+
const LEGACY_LOCAL_ROUTING_PROVIDERS = new Set([
|
|
101
|
+
"capacitor-llama",
|
|
102
|
+
"eliza-device-bridge",
|
|
103
|
+
"eliza-aosp-llama",
|
|
104
|
+
]);
|
|
105
|
+
|
|
106
|
+
function shouldRouteActivatedModelToLocal(
|
|
107
|
+
provider: string | undefined,
|
|
108
|
+
): boolean {
|
|
109
|
+
return (
|
|
110
|
+
!provider ||
|
|
111
|
+
provider === LOCAL_INFERENCE_PROVIDER_ID ||
|
|
112
|
+
LEGACY_LOCAL_ROUTING_PROVIDERS.has(provider)
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async function routeActivatedModelToLocalText(): Promise<void> {
|
|
117
|
+
const current = await readRoutingPreferences();
|
|
118
|
+
const next: RoutingPreferences = {
|
|
119
|
+
preferredProvider: { ...current.preferredProvider },
|
|
120
|
+
policy: { ...current.policy },
|
|
121
|
+
};
|
|
122
|
+
let changed = false;
|
|
123
|
+
|
|
124
|
+
for (const slot of ACTIVATED_TEXT_ROUTING_SLOTS) {
|
|
125
|
+
const provider = next.preferredProvider[slot];
|
|
126
|
+
if (!shouldRouteActivatedModelToLocal(provider)) continue;
|
|
127
|
+
if (provider !== LOCAL_INFERENCE_PROVIDER_ID) {
|
|
128
|
+
next.preferredProvider[slot] = LOCAL_INFERENCE_PROVIDER_ID;
|
|
129
|
+
changed = true;
|
|
130
|
+
}
|
|
131
|
+
if (next.policy[slot] !== "manual") {
|
|
132
|
+
next.policy[slot] = "manual";
|
|
133
|
+
changed = true;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (changed) {
|
|
138
|
+
await writeRoutingPreferences(next);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export class LocalInferenceService {
|
|
143
|
+
// The downloader runs the engine-backed on-device verify pass
|
|
144
|
+
// (`packages/inference/AGENTS.md` §7: load → 1-token text → 1-phrase voice
|
|
145
|
+
// → barge-in cancel) after a bundle's bytes check out; a bundle that does
|
|
146
|
+
// not pass does not auto-fill an empty default slot.
|
|
147
|
+
private readonly downloader = new Downloader({
|
|
148
|
+
verifyOnDevice: verifyBundleOnDevice,
|
|
149
|
+
});
|
|
150
|
+
private readonly activeModel = new ActiveModelCoordinator();
|
|
151
|
+
private bundledBootstrap: Promise<void> | null = null;
|
|
152
|
+
/**
|
|
153
|
+
* Memory Arbiter (WS1). Lazily created on first access so the heavy
|
|
154
|
+
* pressure-source machinery doesn't run for processes that never load
|
|
155
|
+
* a local model (CI, dev shells, etc.). Once created, the arbiter is
|
|
156
|
+
* also published via `setMemoryArbiter` so cross-plugin consumers
|
|
157
|
+
* (plugin-vision, plugin-image-gen) can use `getMemoryArbiter()`.
|
|
158
|
+
*/
|
|
159
|
+
private memoryArbiter: MemoryArbiter | null = null;
|
|
160
|
+
/**
|
|
161
|
+
* Mobile pressure bridge — populated by the Capacitor host (iOS / Android
|
|
162
|
+
* onTrimMemory) so a native pressure callback can reach the arbiter.
|
|
163
|
+
* Stays null on desktop until WS2/WS8 wire the native side.
|
|
164
|
+
*/
|
|
165
|
+
private mobilePressureBridge: ReturnType<
|
|
166
|
+
typeof capacitorPressureSource
|
|
167
|
+
> | null = null;
|
|
168
|
+
private imageGenCapabilityRegistered = false;
|
|
169
|
+
|
|
170
|
+
getCatalog() {
|
|
171
|
+
return MODEL_CATALOG.filter((model) => !model.hiddenFromCatalog);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Register any bundled GGUF files staged by the AOSP build (or any
|
|
176
|
+
* other install path that drops a `manifest.json` next to the model
|
|
177
|
+
* files) into the registry. Runs at most once per process; the
|
|
178
|
+
* promise is cached so concurrent first callers wait on the same
|
|
179
|
+
* work.
|
|
180
|
+
*/
|
|
181
|
+
private bootstrapBundled(): Promise<void> {
|
|
182
|
+
if (!this.bundledBootstrap) {
|
|
183
|
+
this.bundledBootstrap = registerBundledModels()
|
|
184
|
+
.then(() => undefined)
|
|
185
|
+
.catch(() => undefined);
|
|
186
|
+
}
|
|
187
|
+
return this.bundledBootstrap;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async getInstalled() {
|
|
191
|
+
await this.bootstrapBundled();
|
|
192
|
+
return listInstalledModels();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
async getHardware(): Promise<HardwareProbe> {
|
|
196
|
+
return probeHardware();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
getDownloads(): DownloadJob[] {
|
|
200
|
+
return this.downloader.snapshot();
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
getActive(): ActiveModelState {
|
|
204
|
+
return this.activeModel.snapshot();
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async getAssignments(): Promise<ModelAssignments> {
|
|
208
|
+
return readEffectiveAssignments();
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
async setSlotAssignment(
|
|
212
|
+
slot: AgentModelSlot,
|
|
213
|
+
modelId: string | null,
|
|
214
|
+
): Promise<ModelAssignments> {
|
|
215
|
+
await setAssignment(slot, modelId);
|
|
216
|
+
return readEffectiveAssignments();
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
async snapshot(): Promise<ModelHubSnapshot> {
|
|
220
|
+
const [installed, hardware, assignments] = await Promise.all([
|
|
221
|
+
this.getInstalled(),
|
|
222
|
+
this.getHardware(),
|
|
223
|
+
this.getAssignments(),
|
|
224
|
+
]);
|
|
225
|
+
const active = this.getActive();
|
|
226
|
+
const downloads = this.getDownloads();
|
|
227
|
+
return {
|
|
228
|
+
catalog: this.getCatalog(),
|
|
229
|
+
installed,
|
|
230
|
+
active,
|
|
231
|
+
downloads,
|
|
232
|
+
hardware,
|
|
233
|
+
assignments,
|
|
234
|
+
textReadiness: buildTextGenerationReadiness({
|
|
235
|
+
assignments,
|
|
236
|
+
installed,
|
|
237
|
+
active,
|
|
238
|
+
downloads,
|
|
239
|
+
catalog: MODEL_CATALOG,
|
|
240
|
+
}),
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
async getTextReadiness(): Promise<LocalInferenceReadiness> {
|
|
245
|
+
const [installed, assignments] = await Promise.all([
|
|
246
|
+
this.getInstalled(),
|
|
247
|
+
this.getAssignments(),
|
|
248
|
+
]);
|
|
249
|
+
return buildTextGenerationReadiness({
|
|
250
|
+
assignments,
|
|
251
|
+
installed,
|
|
252
|
+
active: this.getActive(),
|
|
253
|
+
downloads: this.getDownloads(),
|
|
254
|
+
catalog: MODEL_CATALOG,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
async getRecommendedModel(
|
|
259
|
+
slot: TextGenerationSlot,
|
|
260
|
+
hardware?: HardwareProbe,
|
|
261
|
+
): Promise<RecommendedModelSelection> {
|
|
262
|
+
return selectRecommendedModelForSlot(
|
|
263
|
+
slot,
|
|
264
|
+
hardware ?? (await this.getHardware()),
|
|
265
|
+
MODEL_CATALOG,
|
|
266
|
+
{ binaryKernels: this.installedBinaryKernels() },
|
|
267
|
+
);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
async getRecommendedModels(
|
|
271
|
+
hardware?: HardwareProbe,
|
|
272
|
+
): Promise<Record<TextGenerationSlot, RecommendedModelSelection>> {
|
|
273
|
+
return selectRecommendedModels(
|
|
274
|
+
hardware ?? (await this.getHardware()),
|
|
275
|
+
MODEL_CATALOG,
|
|
276
|
+
{ binaryKernels: this.installedBinaryKernels() },
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Kernel capability probing is now owned by the native FFI runtime. Null
|
|
282
|
+
* means "no static CAPABILITIES.json probe"; the dispatcher still enforces
|
|
283
|
+
* runtime-required kernels at load time.
|
|
284
|
+
*/
|
|
285
|
+
private installedBinaryKernels(): Partial<Record<string, boolean>> | null {
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
async startDownload(
|
|
290
|
+
modelIdOrSpec: string | CatalogModel,
|
|
291
|
+
): Promise<DownloadJob> {
|
|
292
|
+
return this.downloader.start(modelIdOrSpec);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
async startSmallerFallbackDownload(
|
|
296
|
+
currentModelId: string,
|
|
297
|
+
slot: TextGenerationSlot = "TEXT_LARGE",
|
|
298
|
+
hardware?: HardwareProbe,
|
|
299
|
+
): Promise<{ model: CatalogModel; job: DownloadJob } | null> {
|
|
300
|
+
const model = chooseSmallerFallbackModel(
|
|
301
|
+
currentModelId,
|
|
302
|
+
hardware ?? (await this.getHardware()),
|
|
303
|
+
slot,
|
|
304
|
+
MODEL_CATALOG,
|
|
305
|
+
);
|
|
306
|
+
if (!model) return null;
|
|
307
|
+
return {
|
|
308
|
+
model,
|
|
309
|
+
job: await this.startDownload(model.id),
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
async searchHuggingFace(
|
|
314
|
+
query: string,
|
|
315
|
+
limit?: number,
|
|
316
|
+
): Promise<CatalogModel[]> {
|
|
317
|
+
return searchHuggingFaceGguf(query, limit);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
async searchModelHub(
|
|
321
|
+
query: string,
|
|
322
|
+
hub: "huggingface" | "modelscope",
|
|
323
|
+
limit?: number,
|
|
324
|
+
): Promise<CatalogModel[]> {
|
|
325
|
+
return searchModelHubGguf(query, hub, limit);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Verify an installed model's file integrity. When the model was a
|
|
330
|
+
* Eliza-download and there was no stored sha256 yet (legacy entry), the
|
|
331
|
+
* computed hash is persisted so subsequent verifies have a baseline.
|
|
332
|
+
*/
|
|
333
|
+
async verifyModel(id: string): Promise<VerifyResult> {
|
|
334
|
+
const installed = await listInstalledModels();
|
|
335
|
+
const model = installed.find((m) => m.id === id);
|
|
336
|
+
if (!model) {
|
|
337
|
+
throw new Error(`Model not installed: ${id}`);
|
|
338
|
+
}
|
|
339
|
+
const result = await verifyInstalledModel(model);
|
|
340
|
+
|
|
341
|
+
// Self-heal: when a Eliza-owned legacy entry has no sha256 yet and
|
|
342
|
+
// the file passes the structural GGUF check, pin the computed hash as
|
|
343
|
+
// the baseline. External models are never mutated.
|
|
344
|
+
if (
|
|
345
|
+
result.state === "unknown" &&
|
|
346
|
+
result.currentSha256 &&
|
|
347
|
+
model.source === "eliza-download"
|
|
348
|
+
) {
|
|
349
|
+
await upsertElizaModel({
|
|
350
|
+
...model,
|
|
351
|
+
sha256: result.currentSha256,
|
|
352
|
+
lastVerifiedAt: new Date().toISOString(),
|
|
353
|
+
});
|
|
354
|
+
return {
|
|
355
|
+
...result,
|
|
356
|
+
state: "ok",
|
|
357
|
+
expectedSha256: result.currentSha256,
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
if (result.state === "ok" && model.source === "eliza-download") {
|
|
361
|
+
await upsertElizaModel({
|
|
362
|
+
...model,
|
|
363
|
+
lastVerifiedAt: new Date().toISOString(),
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
return result;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
cancelDownload(modelId: string): boolean {
|
|
370
|
+
return this.downloader.cancel(modelId);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
subscribeDownloads(listener: (event: DownloadEvent) => void): () => void {
|
|
374
|
+
return this.downloader.subscribe(listener);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
subscribeActive(listener: (state: ActiveModelState) => void): () => void {
|
|
378
|
+
return this.activeModel.subscribe(listener);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
async setActive(
|
|
382
|
+
runtime: AgentRuntime | null,
|
|
383
|
+
modelId: string,
|
|
384
|
+
overrides?: LocalInferenceLoadOverrides,
|
|
385
|
+
): Promise<ActiveModelState> {
|
|
386
|
+
const installed = (await this.getInstalled()).find((m) => m.id === modelId);
|
|
387
|
+
if (!installed) {
|
|
388
|
+
throw new Error(`Model not installed: ${modelId}`);
|
|
389
|
+
}
|
|
390
|
+
const state = await this.activeModel.switchTo(
|
|
391
|
+
runtime,
|
|
392
|
+
installed,
|
|
393
|
+
overrides,
|
|
394
|
+
);
|
|
395
|
+
if (state.status === "ready") {
|
|
396
|
+
await routeActivatedModelToLocalText();
|
|
397
|
+
}
|
|
398
|
+
if (runtime && state.status === "ready") {
|
|
399
|
+
void (async () => {
|
|
400
|
+
await this.prewarmActiveVoice(modelId);
|
|
401
|
+
await this.prewarmSystemPrefix(runtime);
|
|
402
|
+
})().catch(() => {
|
|
403
|
+
// Individual prewarm helpers log their own failures; activation
|
|
404
|
+
// should not regress to a blocking path if a best-effort warmup misses.
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
return state;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
async prewarmActiveVoice(modelId: string): Promise<boolean> {
|
|
411
|
+
return prewarmLocalVoiceStackForModel(modelId);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Warm the Stage-1 stable prefix after an explicit model activation.
|
|
416
|
+
*
|
|
417
|
+
* `ensureLocalInferenceHandler` also attempts this at runtime boot, but
|
|
418
|
+
* desktop activation often happens later through `/api/local-inference/active`;
|
|
419
|
+
* at boot there may be no resident model, so that early warmup correctly
|
|
420
|
+
* stays inactive. Running it here closes that gap without blocking activation.
|
|
421
|
+
*/
|
|
422
|
+
async prewarmSystemPrefix(runtime: AgentRuntime): Promise<boolean> {
|
|
423
|
+
if (!localInferenceEngine.hasLoadedModel()) return false;
|
|
424
|
+
if (localInferenceEngine.activeBackendId() !== "llama-cpp") return false;
|
|
425
|
+
try {
|
|
426
|
+
const fixedRoomId = (runtime.agentId ??
|
|
427
|
+
SYSTEM_PREFIX_CONVERSATION_ID) as UUID;
|
|
428
|
+
const prefix = await renderMessageHandlerStablePrefix(
|
|
429
|
+
runtime,
|
|
430
|
+
fixedRoomId,
|
|
431
|
+
);
|
|
432
|
+
if (!prefix) return false;
|
|
433
|
+
return await localInferenceEngine.prewarmConversation(
|
|
434
|
+
SYSTEM_PREFIX_CONVERSATION_ID,
|
|
435
|
+
prefix,
|
|
436
|
+
);
|
|
437
|
+
} catch (err) {
|
|
438
|
+
logger.debug(
|
|
439
|
+
"[local-inference] activation prewarmSystemPrefix failed (best-effort):",
|
|
440
|
+
err instanceof Error ? err.message : String(err),
|
|
441
|
+
);
|
|
442
|
+
return false;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
async clearActive(runtime: AgentRuntime | null): Promise<ActiveModelState> {
|
|
447
|
+
return this.activeModel.unload(runtime);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Diagnostic snapshot of the local prefix-cache state. Returns:
|
|
452
|
+
* - `engine`: in-process session-pool size and live cache keys.
|
|
453
|
+
* Used by the API layer to render a "local cache" debug panel.
|
|
454
|
+
*/
|
|
455
|
+
async getLocalCacheStats(): Promise<{
|
|
456
|
+
engine: { size: number; maxSize: number; keys: string[] } | null;
|
|
457
|
+
}> {
|
|
458
|
+
return {
|
|
459
|
+
engine: localInferenceEngine.describeSessionPool(),
|
|
460
|
+
};
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* Memory Arbiter (WS1). Returns the process-wide arbiter, creating it on
|
|
465
|
+
* first call. The arbiter is constructed against the engine's existing
|
|
466
|
+
* `SharedResourceRegistry` so eviction policy is consistent across the
|
|
467
|
+
* voice/text paths and the cross-plugin handles.
|
|
468
|
+
*
|
|
469
|
+
* The pressure source is a composite of:
|
|
470
|
+
* - `nodeOsPressureSource()` — desktop polling at 5 s.
|
|
471
|
+
* - A `capacitorPressureSource()` bridge — populated by the Capacitor
|
|
472
|
+
* host on iOS/Android. The native side calls `dispatchMobilePressure`
|
|
473
|
+
* when the OS hands it a memory-warning callback.
|
|
474
|
+
*/
|
|
475
|
+
getMemoryArbiter(): MemoryArbiter {
|
|
476
|
+
if (this.memoryArbiter) return this.memoryArbiter;
|
|
477
|
+
const existing = tryGetMemoryArbiter();
|
|
478
|
+
if (existing) {
|
|
479
|
+
this.memoryArbiter = existing;
|
|
480
|
+
this.registerImageGenCapability(existing);
|
|
481
|
+
return existing;
|
|
482
|
+
}
|
|
483
|
+
this.mobilePressureBridge = capacitorPressureSource();
|
|
484
|
+
const desktopSource = nodeOsPressureSource();
|
|
485
|
+
const composite: MemoryPressureSource = compositePressureSource([
|
|
486
|
+
desktopSource,
|
|
487
|
+
this.mobilePressureBridge,
|
|
488
|
+
]);
|
|
489
|
+
const arbiter = new MemoryArbiter({
|
|
490
|
+
registry: localInferenceEngine.getSharedResources(),
|
|
491
|
+
pressureSource: composite,
|
|
492
|
+
// Usable RAM for the proactive fit-to-budget LRU path: host RAM
|
|
493
|
+
// minus the OS/runtime headroom reserve. On mobile the OS-pressure
|
|
494
|
+
// bridge is the primary signal; this is the desktop multi-model
|
|
495
|
+
// backstop that evicts the coldest model before an overcommit.
|
|
496
|
+
budgetMb: () =>
|
|
497
|
+
Math.max(
|
|
498
|
+
0,
|
|
499
|
+
Math.floor(totalmem() / (1024 * 1024)) - ramHeadroomReserveMb(),
|
|
500
|
+
),
|
|
501
|
+
});
|
|
502
|
+
arbiter.start();
|
|
503
|
+
setMemoryArbiter(arbiter);
|
|
504
|
+
this.memoryArbiter = arbiter;
|
|
505
|
+
// WS2: register the vision-describe capability so plugin-vision and
|
|
506
|
+
// the IMAGE_DESCRIPTION runtime handler dispatch through llama.cpp's
|
|
507
|
+
// mtmd path (the running llama-server's `--mmproj`-loaded projector).
|
|
508
|
+
// The backend is a thin wrapper over `localInferenceEngine.describeImage`
|
|
509
|
+
// — there is no separate model load: the projector is co-resident with
|
|
510
|
+
// the active text bundle and lives or dies with it. Florence-2 has been
|
|
511
|
+
// removed entirely (see VISION_MIGRATION.md).
|
|
512
|
+
arbiter.registerCapability(
|
|
513
|
+
createVisionCapabilityRegistration({
|
|
514
|
+
arbiterCache: arbiter,
|
|
515
|
+
estimatedMb: 600,
|
|
516
|
+
loader: async () => {
|
|
517
|
+
const backend: VisionDescribeBackend = {
|
|
518
|
+
id: "llama-server",
|
|
519
|
+
async describe(
|
|
520
|
+
request: VisionDescribeRequest,
|
|
521
|
+
): Promise<VisionDescribeResult> {
|
|
522
|
+
const { resolveImageBytes } = await import("./vision/hash");
|
|
523
|
+
const { bytes, mimeType } = resolveImageBytes(request.image);
|
|
524
|
+
const result = await localInferenceEngine.describeImage({
|
|
525
|
+
bytes,
|
|
526
|
+
mimeType,
|
|
527
|
+
prompt: request.prompt,
|
|
528
|
+
maxTokens: request.maxTokens,
|
|
529
|
+
temperature: request.temperature,
|
|
530
|
+
signal: request.signal,
|
|
531
|
+
});
|
|
532
|
+
const trimmed = result.text.trim();
|
|
533
|
+
if (!trimmed) {
|
|
534
|
+
throw new Error(
|
|
535
|
+
"[vision/llama-server] describe returned empty text",
|
|
536
|
+
);
|
|
537
|
+
}
|
|
538
|
+
const title = trimmed.split(/[.!?]/, 1)[0]?.trim() || "Image";
|
|
539
|
+
return {
|
|
540
|
+
title,
|
|
541
|
+
description: trimmed,
|
|
542
|
+
projectorMs: result.projectorMs,
|
|
543
|
+
decodeMs: result.decodeMs,
|
|
544
|
+
cacheHit: false,
|
|
545
|
+
};
|
|
546
|
+
},
|
|
547
|
+
async dispose() {
|
|
548
|
+
// Lifetime owned by the engine; nothing to free here.
|
|
549
|
+
},
|
|
550
|
+
};
|
|
551
|
+
return backend;
|
|
552
|
+
},
|
|
553
|
+
}),
|
|
554
|
+
);
|
|
555
|
+
this.registerImageGenCapability(arbiter);
|
|
556
|
+
return arbiter;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
private registerImageGenCapability(arbiter: MemoryArbiter): void {
|
|
560
|
+
if (
|
|
561
|
+
this.imageGenCapabilityRegistered ||
|
|
562
|
+
arbiter.hasCapability("image-gen")
|
|
563
|
+
) {
|
|
564
|
+
this.imageGenCapabilityRegistered = true;
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
arbiter.registerCapability(
|
|
568
|
+
createImageGenCapabilityRegistration({
|
|
569
|
+
estimatedMb: 1100,
|
|
570
|
+
loader: async (modelKey) => this.loadImageGenBackend(modelKey),
|
|
571
|
+
}),
|
|
572
|
+
);
|
|
573
|
+
this.imageGenCapabilityRegistered = true;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
private async loadImageGenBackend(
|
|
577
|
+
modelKey: string,
|
|
578
|
+
): Promise<ImageGenBackend> {
|
|
579
|
+
const loadArgs = await this.resolveImageGenLoadArgs(modelKey);
|
|
580
|
+
const profile = {
|
|
581
|
+
platform: process.platform,
|
|
582
|
+
arch: process.arch,
|
|
583
|
+
gpu: undefined,
|
|
584
|
+
requiredAccelerator: parseImageGenRequiredAccelerator(
|
|
585
|
+
process.env.ELIZA_IMAGEGEN_ACCELERATOR,
|
|
586
|
+
),
|
|
587
|
+
isIos: process.env.ELIZA_PLATFORM === "ios",
|
|
588
|
+
isAndroid:
|
|
589
|
+
process.env.ELIZA_PLATFORM === "android" ||
|
|
590
|
+
process.env.ELIZA_LOCAL_LLAMA === "1",
|
|
591
|
+
} as const;
|
|
592
|
+
const errors: string[] = [];
|
|
593
|
+
for (const choice of selectImageGenBackends(profile)) {
|
|
594
|
+
const args = { ...loadArgs, accelerator: choice.accelerator };
|
|
595
|
+
try {
|
|
596
|
+
switch (choice.backendId) {
|
|
597
|
+
case "aosp":
|
|
598
|
+
return await loadAospImageGenBackend({
|
|
599
|
+
loadArgs: args,
|
|
600
|
+
modelKey: loadArgs.modelKey,
|
|
601
|
+
});
|
|
602
|
+
case "coreml":
|
|
603
|
+
return await loadCoreMlImageGenBackend({
|
|
604
|
+
loadArgs: args,
|
|
605
|
+
modelKey: loadArgs.modelKey,
|
|
606
|
+
});
|
|
607
|
+
case "mflux":
|
|
608
|
+
return await loadMfluxImageGenBackend({
|
|
609
|
+
loadArgs: args,
|
|
610
|
+
modelKey: loadArgs.modelKey,
|
|
611
|
+
});
|
|
612
|
+
case "tensorrt":
|
|
613
|
+
return await loadTensorRtImageGenBackend({
|
|
614
|
+
loadArgs: args,
|
|
615
|
+
modelKey: loadArgs.modelKey,
|
|
616
|
+
});
|
|
617
|
+
case "sd-cpp":
|
|
618
|
+
return await loadSdCppImageGenBackend({
|
|
619
|
+
loadArgs: args,
|
|
620
|
+
modelKey: loadArgs.modelKey,
|
|
621
|
+
});
|
|
622
|
+
}
|
|
623
|
+
} catch (err) {
|
|
624
|
+
if (!isImageGenUnavailable(err)) throw err;
|
|
625
|
+
errors.push(err.message);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
throw new Error(
|
|
629
|
+
`[imagegen] no backend available for ${loadArgs.modelKey}: ${errors.join("; ")}`,
|
|
630
|
+
);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
private async resolveImageGenLoadArgs(
|
|
634
|
+
modelKey: string,
|
|
635
|
+
): Promise<ImageGenLoadArgs & { modelKey: string }> {
|
|
636
|
+
const resolved = resolveDefaultImageGenModel(modelKey);
|
|
637
|
+
if (!resolved) {
|
|
638
|
+
throw new Error(
|
|
639
|
+
`[imagegen] unknown image generation model key: ${modelKey}`,
|
|
640
|
+
);
|
|
641
|
+
}
|
|
642
|
+
const activeId = this.activeModel.snapshot().modelId;
|
|
643
|
+
const installed = await this.getInstalled();
|
|
644
|
+
const active = activeId
|
|
645
|
+
? installed.find((model) => model.id === activeId)
|
|
646
|
+
: undefined;
|
|
647
|
+
const owner =
|
|
648
|
+
active?.bundleRoot &&
|
|
649
|
+
this.imageGenFileExists(active.bundleRoot, resolved.file)
|
|
650
|
+
? active
|
|
651
|
+
: installed.find(
|
|
652
|
+
(model) =>
|
|
653
|
+
Boolean(model.bundleRoot) &&
|
|
654
|
+
this.imageGenFileExists(
|
|
655
|
+
model.bundleRoot as string,
|
|
656
|
+
resolved.file,
|
|
657
|
+
),
|
|
658
|
+
);
|
|
659
|
+
if (!owner?.bundleRoot) {
|
|
660
|
+
throw new Error(
|
|
661
|
+
`[imagegen] ${resolved.modelId} is not installed. Expected ${resolved.file} under the active Eliza-1 bundle root.`,
|
|
662
|
+
);
|
|
663
|
+
}
|
|
664
|
+
const companion = (file: string | undefined): string | undefined => {
|
|
665
|
+
if (!file) return undefined;
|
|
666
|
+
const fullPath = pathJoin(owner.bundleRoot as string, file);
|
|
667
|
+
if (!existsSync(fullPath)) {
|
|
668
|
+
throw new Error(
|
|
669
|
+
`[imagegen] ${resolved.modelId} companion asset is not installed. Expected ${file} under the active Eliza-1 bundle root.`,
|
|
670
|
+
);
|
|
671
|
+
}
|
|
672
|
+
return fullPath;
|
|
673
|
+
};
|
|
674
|
+
return {
|
|
675
|
+
modelKey: resolved.modelId,
|
|
676
|
+
modelPath: pathJoin(owner.bundleRoot, resolved.file),
|
|
677
|
+
splitDiffusionModel: resolved.splitDiffusionModel,
|
|
678
|
+
vae: companion(resolved.vae),
|
|
679
|
+
llm: companion(resolved.llm),
|
|
680
|
+
accelerator: "auto",
|
|
681
|
+
};
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
private imageGenFileExists(bundleRoot: string, file: string): boolean {
|
|
685
|
+
return existsSync(pathJoin(bundleRoot, file));
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
/**
|
|
689
|
+
* Capacitor bridge entrypoint. The mobile host (iOS / Android) calls
|
|
690
|
+
* this from the native pressure callback. Safe to call before the
|
|
691
|
+
* arbiter has been created — we create it on demand.
|
|
692
|
+
*/
|
|
693
|
+
dispatchMobilePressure(
|
|
694
|
+
level: "nominal" | "low" | "critical",
|
|
695
|
+
freeMb?: number,
|
|
696
|
+
): void {
|
|
697
|
+
this.getMemoryArbiter();
|
|
698
|
+
this.mobilePressureBridge?.dispatch(level, freeMb);
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
async uninstall(
|
|
702
|
+
modelId: string,
|
|
703
|
+
): Promise<{ removed: boolean; reason?: "external" | "not-found" }> {
|
|
704
|
+
// If the user is uninstalling the active model, unload it first so we
|
|
705
|
+
// don't leave the plugin holding a handle to a deleted file.
|
|
706
|
+
if (this.activeModel.snapshot().modelId === modelId) {
|
|
707
|
+
await this.activeModel.unload(null);
|
|
708
|
+
}
|
|
709
|
+
return removeElizaModel(modelId);
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
function parseImageGenRequiredAccelerator(
|
|
714
|
+
value: string | undefined,
|
|
715
|
+
): "cpu" | "cuda" | "vulkan" | "metal" | "coreml" | "tensorrt" | undefined {
|
|
716
|
+
switch (value?.toLowerCase()) {
|
|
717
|
+
case "cpu":
|
|
718
|
+
case "cuda":
|
|
719
|
+
case "vulkan":
|
|
720
|
+
case "metal":
|
|
721
|
+
case "coreml":
|
|
722
|
+
case "tensorrt":
|
|
723
|
+
return value.toLowerCase() as
|
|
724
|
+
| "cpu"
|
|
725
|
+
| "cuda"
|
|
726
|
+
| "vulkan"
|
|
727
|
+
| "metal"
|
|
728
|
+
| "coreml"
|
|
729
|
+
| "tensorrt";
|
|
730
|
+
default:
|
|
731
|
+
return undefined;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
export const localInferenceService = new LocalInferenceService();
|