@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local-inference backend interface and dispatcher.
|
|
3
|
+
*
|
|
4
|
+
* One shipping implementation lives behind this interface:
|
|
5
|
+
*
|
|
6
|
+
* - `llama-cpp` → the optimized in-process FFI llama.cpp path.
|
|
7
|
+
* MTP, n-gram drafter, lookahead, `-ot` MoE offload, TurboQuant KV
|
|
8
|
+
* cache, mlock/no-mmap/mmproj, etc. all live here.
|
|
9
|
+
*
|
|
10
|
+
* The dispatcher decides which one to use per-load based on:
|
|
11
|
+
*
|
|
12
|
+
* 1. Catalog `runtime.optimizations.requiresKernel` — if any specialised
|
|
13
|
+
* llama.cpp kernel is required (e.g. `turbo3`), the
|
|
14
|
+
* dispatcher MUST pick `llama-cpp`. Legacy bindings cannot
|
|
15
|
+
* provide these kernels at all.
|
|
16
|
+
* 2. Catalog `runtime.preferredBackend` — retained for metadata
|
|
17
|
+
* compatibility, but generation still routes through `llama-cpp`.
|
|
18
|
+
* 3. Default: optimized llama.cpp FFI.
|
|
19
|
+
*
|
|
20
|
+
* The dispatcher does NOT own backend internals. It owns selection only,
|
|
21
|
+
* plus a small load-state
|
|
22
|
+
* cache so callers can swap models without touching either backend
|
|
23
|
+
* directly.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { findCatalogModel } from "./catalog";
|
|
27
|
+
import type { StructuredGenerateParams } from "./structured-output";
|
|
28
|
+
import type { CatalogModel, LocalRuntimeKernel } from "./types";
|
|
29
|
+
import type { VerifierStreamEvent } from "./voice/types";
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Per-load runtime overrides forwarded by the dispatcher to whichever
|
|
33
|
+
* backend handles the load. Mirror of the relevant fields on
|
|
34
|
+
* `LocalInferenceLoadArgs` from `active-model.ts` — kept inline here so
|
|
35
|
+
* `backend.ts` stays free of cross-file circular imports (active-model
|
|
36
|
+
* imports engine, engine imports backend).
|
|
37
|
+
*/
|
|
38
|
+
export interface BackendLoadOverrides {
|
|
39
|
+
contextSize?: number;
|
|
40
|
+
cacheTypeK?: string;
|
|
41
|
+
cacheTypeV?: string;
|
|
42
|
+
gpuLayers?: number | "auto" | "max";
|
|
43
|
+
kvOffload?: "cpu" | "gpu" | "split" | { gpuLayers: number };
|
|
44
|
+
flashAttention?: boolean;
|
|
45
|
+
mmap?: boolean;
|
|
46
|
+
mlock?: boolean;
|
|
47
|
+
useGpu?: boolean;
|
|
48
|
+
/** Absolute path to a multimodal projector GGUF passed to the FFI runtime. */
|
|
49
|
+
mmprojPath?: string;
|
|
50
|
+
/** Absolute path to the MTP drafter GGUF passed to the FFI runtime. */
|
|
51
|
+
draftModelPath?: string;
|
|
52
|
+
/** Eliza-1 bundle root for direct bundle loads not present in the registry. */
|
|
53
|
+
bundleRoot?: string;
|
|
54
|
+
/** Manifest path for direct bundle loads not present in the registry. */
|
|
55
|
+
manifestPath?: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface BackendPlan {
|
|
59
|
+
/** Absolute path to the GGUF on disk. */
|
|
60
|
+
modelPath: string;
|
|
61
|
+
/**
|
|
62
|
+
* Catalog model id, when known. The dispatcher uses this to pull
|
|
63
|
+
* `runtime.optimizations` and `runtime.mtp` — without it, we can
|
|
64
|
+
* only honour the env override and fall back to `capacitor-llama`.
|
|
65
|
+
*/
|
|
66
|
+
modelId?: string;
|
|
67
|
+
/** Catalog entry, when the caller already resolved it. */
|
|
68
|
+
catalog?: CatalogModel;
|
|
69
|
+
/**
|
|
70
|
+
* Per-load runtime overrides resolved by the active-model coordinator.
|
|
71
|
+
* The dispatcher passes these through verbatim to the chosen backend
|
|
72
|
+
* so the in-process binding can honour cache-type and contextSize
|
|
73
|
+
* requests instead of silently dropping them.
|
|
74
|
+
*/
|
|
75
|
+
overrides?: BackendLoadOverrides;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface GenerateArgs extends StructuredGenerateParams {
|
|
79
|
+
prompt: string;
|
|
80
|
+
stopSequences?: string[];
|
|
81
|
+
/** Upper bound on output tokens; defaults to 2048. */
|
|
82
|
+
maxTokens?: number;
|
|
83
|
+
/** 0..1; 0.7 default. */
|
|
84
|
+
temperature?: number;
|
|
85
|
+
/** Nucleus sampling; defaults to 0.9. */
|
|
86
|
+
topP?: number;
|
|
87
|
+
/**
|
|
88
|
+
* Optional cache key from the runtime's `ProviderCachePlan`. Identical
|
|
89
|
+
* keys reuse the same KV cache prefix: the `llama-cpp` FFI backend derives
|
|
90
|
+
* a deterministic slot so requests with the same key land on the same
|
|
91
|
+
* persisted KV state. Empty / absent keys fall through to the historical
|
|
92
|
+
* stateless path.
|
|
93
|
+
*/
|
|
94
|
+
cacheKey?: string;
|
|
95
|
+
/**
|
|
96
|
+
* Per-request abort signal. The `llama-cpp` FFI backend honours it
|
|
97
|
+
* cooperatively by cancelling the active FFI stream. Callers that want
|
|
98
|
+
* hard cancel for things like app pause / kill-switch pass the same signal
|
|
99
|
+
* here that they pass into `runtime.useModel`.
|
|
100
|
+
*/
|
|
101
|
+
signal?: AbortSignal;
|
|
102
|
+
/**
|
|
103
|
+
* Optional per-request backend transport budget. This should be at least as
|
|
104
|
+
* long as the caller's user-visible generation timeout; shorter inner
|
|
105
|
+
* timeouts abort long local-prefill turns before the chat route can make the
|
|
106
|
+
* user-facing decision.
|
|
107
|
+
*/
|
|
108
|
+
requestTimeoutMs?: number;
|
|
109
|
+
/**
|
|
110
|
+
* Incremental accepted text from the backend. The `llama-cpp` FFI backend
|
|
111
|
+
* calls this as accepted chunks arrive, per `llmStreamNext` step (it
|
|
112
|
+
* streams even when a `grammar` is set).
|
|
113
|
+
*/
|
|
114
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
115
|
+
/**
|
|
116
|
+
* Whether this generation is user-visible text and therefore eligible for
|
|
117
|
+
* voice-mode TTS. Internal JSON / planner calls must not be spoken.
|
|
118
|
+
*/
|
|
119
|
+
voiceOutput?: "user-visible" | "internal";
|
|
120
|
+
/**
|
|
121
|
+
* Native verifier stream from speculative MTP. Exact accept/reject token
|
|
122
|
+
* ranges let voice TTS rollback avoid inferring state from text chunks.
|
|
123
|
+
*/
|
|
124
|
+
onVerifierEvent?: (event: VerifierStreamEvent) => void | Promise<void>;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export type GenerateResult = string;
|
|
128
|
+
|
|
129
|
+
export interface LocalGenerateWithUsageResult {
|
|
130
|
+
text: string;
|
|
131
|
+
usage?: {
|
|
132
|
+
prompt_tokens?: number;
|
|
133
|
+
completion_tokens?: number;
|
|
134
|
+
total_tokens?: number;
|
|
135
|
+
[key: string]: unknown;
|
|
136
|
+
};
|
|
137
|
+
slotId?: number;
|
|
138
|
+
firstTokenMs?: number | null;
|
|
139
|
+
mtpStats?: {
|
|
140
|
+
drafted: number;
|
|
141
|
+
accepted: number;
|
|
142
|
+
acceptanceRate: number | null;
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export interface LocalRuntimeLoadConfig {
|
|
147
|
+
modelId: string | null;
|
|
148
|
+
modelPath: string | null;
|
|
149
|
+
contextSize: number | null;
|
|
150
|
+
cacheTypeK: string | null;
|
|
151
|
+
cacheTypeV: string | null;
|
|
152
|
+
gpuLayers: number | null;
|
|
153
|
+
parallel: number;
|
|
154
|
+
binaryPath: string | null;
|
|
155
|
+
backend: "capacitor-llama" | "llama-cpp" | null;
|
|
156
|
+
mtp: {
|
|
157
|
+
specType: "draft-mtp";
|
|
158
|
+
draftMin: number;
|
|
159
|
+
draftMax: number;
|
|
160
|
+
} | null;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* The backend contract every local-inference implementation satisfies.
|
|
165
|
+
*
|
|
166
|
+
* `available()` is a soft probe — it should NOT spawn anything; it just
|
|
167
|
+
* reports whether the backend can be used at all (e.g. is the binding
|
|
168
|
+
* loadable, is the binary on disk). Loading a specific model is `load()`.
|
|
169
|
+
*/
|
|
170
|
+
export interface LocalInferenceBackend {
|
|
171
|
+
/** Identifier for the concrete backend implementation. */
|
|
172
|
+
readonly id: "capacitor-llama" | "llama-cpp";
|
|
173
|
+
available(): Promise<boolean>;
|
|
174
|
+
load(plan: BackendPlan): Promise<void>;
|
|
175
|
+
unload(): Promise<void>;
|
|
176
|
+
generate(args: GenerateArgs): Promise<GenerateResult>;
|
|
177
|
+
hasLoadedModel(): boolean;
|
|
178
|
+
currentModelPath(): string | null;
|
|
179
|
+
|
|
180
|
+
// === Optional methods — backends that don't implement them are surfaced
|
|
181
|
+
// === via `dispatcher.X?.()` calls in `engine.ts`, with safe fallback
|
|
182
|
+
// === values for query methods and actionable throws for required ops.
|
|
183
|
+
// ===
|
|
184
|
+
// === These exist so engine.ts can drive every optimized llama.cpp-specific
|
|
185
|
+
// === feature through the dispatcher and keep FFI as the single runtime
|
|
186
|
+
// === implementation surface.
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Usage-instrumented variant of `generate`. Returns Anthropic-shape
|
|
190
|
+
* usage block plus per-turn MTP stats when available.
|
|
191
|
+
*/
|
|
192
|
+
generateWithUsage?(
|
|
193
|
+
args: GenerateArgs & { slotId?: number },
|
|
194
|
+
): Promise<LocalGenerateWithUsageResult>;
|
|
195
|
+
|
|
196
|
+
/** Vision describe via mmproj. Requires an mmproj-loaded backend. */
|
|
197
|
+
describeImage?(args: {
|
|
198
|
+
bytes: Uint8Array;
|
|
199
|
+
mimeType?: string;
|
|
200
|
+
prompt?: string;
|
|
201
|
+
maxTokens?: number;
|
|
202
|
+
temperature?: number;
|
|
203
|
+
signal?: AbortSignal;
|
|
204
|
+
}): Promise<{
|
|
205
|
+
text: string;
|
|
206
|
+
projectorMs?: number;
|
|
207
|
+
decodeMs?: number;
|
|
208
|
+
}>;
|
|
209
|
+
|
|
210
|
+
/** Persist a slot's KV cache to disk under the conversation directory. */
|
|
211
|
+
persistConversationKv?(conversationId: string, slotId: number): Promise<void>;
|
|
212
|
+
|
|
213
|
+
/** Restore a slot's KV cache from disk into the running backend. */
|
|
214
|
+
restoreConversationKv?(
|
|
215
|
+
conversationId: string,
|
|
216
|
+
slotId: number,
|
|
217
|
+
): Promise<boolean>;
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Pre-decode `promptPrefix` into the named slot/cache key so the next
|
|
221
|
+
* `generate` against the same key skips re-prefill. Returns false when
|
|
222
|
+
* no warmup happened (already cached, no model loaded, etc).
|
|
223
|
+
*/
|
|
224
|
+
prewarmConversation?(
|
|
225
|
+
promptPrefix: string,
|
|
226
|
+
opts: { slotId: number; cacheKey: string },
|
|
227
|
+
): Promise<boolean>;
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Resize the backend's parallel slot pool. Returns true on a real
|
|
231
|
+
* restart/resize, false when no resize was needed (target ≤ current, etc).
|
|
232
|
+
*/
|
|
233
|
+
resizeParallel?(target: number): Promise<boolean>;
|
|
234
|
+
|
|
235
|
+
/** Active parallel slot count. Default `1` on backends without pooling. */
|
|
236
|
+
parallelSlots?(): number;
|
|
237
|
+
|
|
238
|
+
/** True when native MTP speculative decoding is enabled. */
|
|
239
|
+
mtpEnabled?(): boolean;
|
|
240
|
+
|
|
241
|
+
/** Absolute path to the loaded mmproj (vision) GGUF, or null. */
|
|
242
|
+
currentMmprojPath?(): string | null;
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Snapshot of the backend's current load configuration (ctx, cache
|
|
246
|
+
* types, parallel, binary path). Used by engine introspection +
|
|
247
|
+
* /api/local-inference/active.
|
|
248
|
+
*/
|
|
249
|
+
currentRuntimeLoadConfig?(): LocalRuntimeLoadConfig | null;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export type BackendOverride = "auto" | "llama-cpp";
|
|
253
|
+
|
|
254
|
+
export function readBackendOverride(): BackendOverride {
|
|
255
|
+
const raw = process.env.ELIZA_INFERENCE_BACKEND?.trim().toLowerCase();
|
|
256
|
+
if (raw === "auto") return "auto";
|
|
257
|
+
if (raw === "llama-cpp") {
|
|
258
|
+
return "llama-cpp";
|
|
259
|
+
}
|
|
260
|
+
return "auto";
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function envFlag(name: string): boolean {
|
|
264
|
+
const v = process.env[name]?.trim().toLowerCase();
|
|
265
|
+
return v === "1" || v === "true" || v === "yes" || v === "on";
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Opt-in "reduced-optimization local mode" (the cross-platform escape hatch
|
|
270
|
+
* documented in `docs/voice-interactive.md` and `packages/inference/AGENTS.md`
|
|
271
|
+
* §4): when the installed llama.cpp runtime does not advertise the
|
|
272
|
+
* custom Eliza-1 KV kernels (`turbo3`/`qjl_full`/`polarquant`/…) — i.e. the
|
|
273
|
+
* fork hasn't been built with those kernels dispatched on this backend yet —
|
|
274
|
+
* setting `ELIZA_LOCAL_ALLOW_STOCK_KV=1` lets the model load anyway with
|
|
275
|
+
* stock `f16` KV cache instead of hard-refusing. The voice pipeline runs;
|
|
276
|
+
* it just runs without the KV-compression speedups on that backend. A loud
|
|
277
|
+
* one-time warning is emitted (see `warnReducedOptimizationLocalMode`).
|
|
278
|
+
*
|
|
279
|
+
* §3-vs-"works everywhere" reconciliation: AGENTS.md §3 says these kernels
|
|
280
|
+
* are *mandatory* and there is *no* "fallback to unoptimized" path. The
|
|
281
|
+
* user's directive for SA-1 is "works everywhere regardless of GPU". The
|
|
282
|
+
* reconciliation: the kernels DO build on every backend where they can be
|
|
283
|
+
* dispatched (Metal, CUDA, Vulkan-source-patched, CPU SIMD TUs), and this
|
|
284
|
+
* fallback is the *opt-in*, *loudly-warned*, *non-publishable* mode for the
|
|
285
|
+
* backends where dispatch isn't wired yet — it is not a silent downgrade,
|
|
286
|
+
* and `defaultEligible` bundles still require the verified kernels.
|
|
287
|
+
*/
|
|
288
|
+
export function localAllowStockKv(): boolean {
|
|
289
|
+
return envFlag("ELIZA_LOCAL_ALLOW_STOCK_KV");
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
let reducedModeWarned = false;
|
|
293
|
+
export function warnReducedOptimizationLocalMode(detail: string): void {
|
|
294
|
+
if (reducedModeWarned) return;
|
|
295
|
+
reducedModeWarned = true;
|
|
296
|
+
console.warn(
|
|
297
|
+
`\n[local-inference] ⚠️ REDUCED-OPTIMIZATION LOCAL MODE — ${detail}\n` +
|
|
298
|
+
` ELIZA_LOCAL_ALLOW_STOCK_KV=1 is set, so the model is loading with stock\n` +
|
|
299
|
+
` f16 KV cache instead of the Eliza-1 TurboQuant/QJL/PolarQuant KV kernels.\n` +
|
|
300
|
+
` The voice pipeline will run, but slower and using more memory than a build\n` +
|
|
301
|
+
` with the kernels dispatched (Metal: all 5; CUDA: ships them; Vulkan: source-\n` +
|
|
302
|
+
` patched; CPU: SIMD TUs). Rebuild the bundled llama.cpp FFI runtime\n` +
|
|
303
|
+
` to get the optimized path. This mode is NOT publishable and NOT a default.\n`,
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/** Reset the one-time warning latch (tests only). */
|
|
308
|
+
export function __resetReducedModeWarnedForTests(): void {
|
|
309
|
+
reducedModeWarned = false;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
export interface BackendDecision {
|
|
313
|
+
backend: "llama-cpp";
|
|
314
|
+
/** Why this backend was chosen — for diagnostics and warnings. */
|
|
315
|
+
reason: "env-override" | "kernel-required" | "preferred-backend" | "default";
|
|
316
|
+
/** Required kernels declared by the catalog, when any. */
|
|
317
|
+
kernels: LocalRuntimeKernel[];
|
|
318
|
+
/**
|
|
319
|
+
* Set when the dispatcher detected a kernel mismatch — the catalog model
|
|
320
|
+
* declares `requiresKernel: [...]` but CAPABILITIES.json next to the
|
|
321
|
+
* installed binary reports those kernels as unavailable. The dispatcher
|
|
322
|
+
* still routes to optimized llama.cpp (the only backend that could satisfy
|
|
323
|
+
* those kernels), but the load is expected to fail; the caller should
|
|
324
|
+
* surface this to the operator with a clear "rebuild your binary"
|
|
325
|
+
* message instead of letting the model silently misbehave.
|
|
326
|
+
*/
|
|
327
|
+
unsatisfiedKernels?: LocalRuntimeKernel[];
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Pure decision function. Easy to unit-test without spawning anything.
|
|
332
|
+
*
|
|
333
|
+
* Inputs are deliberately explicit — the caller resolves the catalog entry,
|
|
334
|
+
* the binary availability, and the env override before calling us.
|
|
335
|
+
*
|
|
336
|
+
* `binaryKernels`, when present, is the parsed CAPABILITIES.json kernels
|
|
337
|
+
* map from the installed llama.cpp FFI runtime. The dispatcher uses it to
|
|
338
|
+
* compute `unsatisfiedKernels`; null means the binary is older / has no
|
|
339
|
+
* capabilities probe, in which case we trust the model's declaration and
|
|
340
|
+
* let the load attempt clarify.
|
|
341
|
+
*/
|
|
342
|
+
export function decideBackend(input: {
|
|
343
|
+
override: BackendOverride;
|
|
344
|
+
catalog: CatalogModel | undefined;
|
|
345
|
+
llamaCppAvailable: boolean;
|
|
346
|
+
binaryKernels?: Partial<Record<LocalRuntimeKernel | string, boolean>> | null;
|
|
347
|
+
}): BackendDecision {
|
|
348
|
+
const { override, catalog } = input;
|
|
349
|
+
const optimizations = catalog?.runtime?.optimizations;
|
|
350
|
+
const kernels = optimizations?.requiresKernel ?? [];
|
|
351
|
+
const unsatisfiedKernels = computeUnsatisfiedKernels(
|
|
352
|
+
kernels,
|
|
353
|
+
input.binaryKernels ?? null,
|
|
354
|
+
);
|
|
355
|
+
|
|
356
|
+
if (override === "llama-cpp") {
|
|
357
|
+
return {
|
|
358
|
+
backend: "llama-cpp",
|
|
359
|
+
reason: "env-override",
|
|
360
|
+
kernels,
|
|
361
|
+
unsatisfiedKernels,
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if (kernels.length > 0) {
|
|
366
|
+
return {
|
|
367
|
+
backend: "llama-cpp",
|
|
368
|
+
reason: "kernel-required",
|
|
369
|
+
kernels,
|
|
370
|
+
unsatisfiedKernels,
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
return {
|
|
374
|
+
backend: "llama-cpp",
|
|
375
|
+
reason: "default",
|
|
376
|
+
kernels,
|
|
377
|
+
unsatisfiedKernels,
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Returns the subset of `required` kernels that aren't reported as `true`
|
|
383
|
+
* in the binary's CAPABILITIES.json. Returns undefined when no probe is
|
|
384
|
+
* available; an empty array means "all required kernels are satisfied".
|
|
385
|
+
*/
|
|
386
|
+
function computeUnsatisfiedKernels(
|
|
387
|
+
required: LocalRuntimeKernel[],
|
|
388
|
+
binaryKernels: Partial<Record<LocalRuntimeKernel | string, boolean>> | null,
|
|
389
|
+
): LocalRuntimeKernel[] | undefined {
|
|
390
|
+
if (required.length === 0) return undefined;
|
|
391
|
+
if (!binaryKernels) return undefined;
|
|
392
|
+
return required.filter((k) => binaryKernels[k] !== true);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
/**
|
|
396
|
+
* Resolve the catalog entry for a `BackendPlan`. Plans may carry the entry
|
|
397
|
+
* already (when the caller has it on hand), reference it by id, or carry
|
|
398
|
+
* neither — in which case the dispatcher falls back to the default backend.
|
|
399
|
+
*/
|
|
400
|
+
export function resolveCatalogForPlan(
|
|
401
|
+
plan: BackendPlan,
|
|
402
|
+
): CatalogModel | undefined {
|
|
403
|
+
if (plan.catalog) return plan.catalog;
|
|
404
|
+
if (plan.modelId) return findCatalogModel(plan.modelId);
|
|
405
|
+
return undefined;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Dispatcher that fronts the in-process FFI llama.cpp backend behind the
|
|
410
|
+
* `LocalInferenceBackend` contract. Holds at most one active backend at a
|
|
411
|
+
* time — load() unloads the previous backend before loading the new one if
|
|
412
|
+
* they differ.
|
|
413
|
+
*/
|
|
414
|
+
export class BackendDispatcher implements LocalInferenceBackend {
|
|
415
|
+
readonly id = "capacitor-llama" as const;
|
|
416
|
+
// The dispatcher's `id` is informational; the active backend's id is what
|
|
417
|
+
// matters for diagnostics. We expose `activeBackendId()` for that.
|
|
418
|
+
|
|
419
|
+
private active: LocalInferenceBackend | null = null;
|
|
420
|
+
|
|
421
|
+
constructor(
|
|
422
|
+
private readonly ffiStreaming: LocalInferenceBackend,
|
|
423
|
+
private readonly probeFfiAvailable: () => boolean,
|
|
424
|
+
/**
|
|
425
|
+
* Optional capabilities probe that returns the kernels map from the
|
|
426
|
+
* installed llama.cpp FFI runtime, or null when no probe is available.
|
|
427
|
+
* Used to flag `unsatisfiedKernels`
|
|
428
|
+
* in the BackendDecision before load() so callers can give a clean
|
|
429
|
+
* "rebuild your fork binary" error instead of a kernel SIGSEGV at
|
|
430
|
+
* generation time.
|
|
431
|
+
*/
|
|
432
|
+
private readonly probeBinaryKernels?: () => Partial<
|
|
433
|
+
Record<string, boolean>
|
|
434
|
+
> | null,
|
|
435
|
+
) {}
|
|
436
|
+
|
|
437
|
+
async available(): Promise<boolean> {
|
|
438
|
+
return this.ffiStreaming.available();
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
activeBackendId(): "capacitor-llama" | "llama-cpp" | null {
|
|
442
|
+
return this.active ? this.active.id : null;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
hasLoadedModel(): boolean {
|
|
446
|
+
return this.active?.hasLoadedModel() ?? false;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
currentModelPath(): string | null {
|
|
450
|
+
return this.active?.currentModelPath() ?? null;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
decide(plan: BackendPlan): BackendDecision {
|
|
454
|
+
const catalog = resolveCatalogForPlan(plan);
|
|
455
|
+
return decideBackend({
|
|
456
|
+
override: readBackendOverride(),
|
|
457
|
+
catalog,
|
|
458
|
+
llamaCppAvailable: this.probeFfiAvailable(),
|
|
459
|
+
binaryKernels: this.probeBinaryKernels?.() ?? null,
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
async load(plan: BackendPlan): Promise<void> {
|
|
464
|
+
let effectivePlan = plan;
|
|
465
|
+
const decision = this.decide(plan);
|
|
466
|
+
if (decision.unsatisfiedKernels && decision.unsatisfiedKernels.length > 0) {
|
|
467
|
+
const missing = decision.unsatisfiedKernels.join(", ");
|
|
468
|
+
if (localAllowStockKv()) {
|
|
469
|
+
// Reduced-optimization local mode: the build hasn't dispatched these
|
|
470
|
+
// kernels on this backend yet, but the user opted into running with
|
|
471
|
+
// stock f16 KV instead of hard-refusing. Strip any custom cache-type
|
|
472
|
+
// override from the plan so the FFI runtime uses f16, and warn
|
|
473
|
+
// loudly exactly once.
|
|
474
|
+
warnReducedOptimizationLocalMode(
|
|
475
|
+
`catalog model requires kernel(s) {${missing}}, not advertised by the installed llama.cpp FFI runtime`,
|
|
476
|
+
);
|
|
477
|
+
if (
|
|
478
|
+
plan.overrides &&
|
|
479
|
+
(plan.overrides.cacheTypeK !== undefined ||
|
|
480
|
+
plan.overrides.cacheTypeV !== undefined)
|
|
481
|
+
) {
|
|
482
|
+
const { cacheTypeK: _k, cacheTypeV: _v, ...rest } = plan.overrides;
|
|
483
|
+
effectivePlan = { ...plan, overrides: { ...rest } };
|
|
484
|
+
}
|
|
485
|
+
} else {
|
|
486
|
+
throw new Error(
|
|
487
|
+
`[local-inference] Catalog model requires kernel(s) {${missing}}, but the installed llama.cpp FFI runtime does not advertise them. Rebuild the bundled runtime for this target, pick a different model, or set ELIZA_LOCAL_ALLOW_STOCK_KV=1 to load with stock f16 KV (reduced-optimization local mode — loud warning, not publishable).`,
|
|
488
|
+
);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
if (decision.backend === "llama-cpp" && !this.probeFfiAvailable()) {
|
|
492
|
+
throw new Error(
|
|
493
|
+
"[local-inference] Optimized llama.cpp requires the in-process FFI backend. " +
|
|
494
|
+
"Install/rebuild libelizainference with streaming-LLM + MTP support; " +
|
|
495
|
+
"server backends are not supported.",
|
|
496
|
+
);
|
|
497
|
+
}
|
|
498
|
+
const target = this.ffiStreaming;
|
|
499
|
+
if (this.active && this.active !== target) {
|
|
500
|
+
await this.active.unload();
|
|
501
|
+
}
|
|
502
|
+
this.active = target;
|
|
503
|
+
await target.load(effectivePlan);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
async unload(): Promise<void> {
|
|
507
|
+
const active = this.active;
|
|
508
|
+
this.active = null;
|
|
509
|
+
if (active) await active.unload();
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
async generate(args: GenerateArgs): Promise<GenerateResult> {
|
|
513
|
+
if (!this.active) {
|
|
514
|
+
throw new Error(
|
|
515
|
+
"[local-inference] No backend loaded. Call load() before generate().",
|
|
516
|
+
);
|
|
517
|
+
}
|
|
518
|
+
return this.active.generate(args);
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// === Forwarders for the optional methods on LocalInferenceBackend.
|
|
522
|
+
// === Required ops (generate / describe / persist / restore / prewarm /
|
|
523
|
+
// === resize / restart) throw an actionable error when the active
|
|
524
|
+
// === backend doesn't implement them, pointing at the FFI parity gap.
|
|
525
|
+
// === Query getters return safe defaults that match the engine's
|
|
526
|
+
// === existing guard expectations.
|
|
527
|
+
|
|
528
|
+
async generateWithUsage(
|
|
529
|
+
args: GenerateArgs & { slotId?: number },
|
|
530
|
+
): Promise<LocalGenerateWithUsageResult> {
|
|
531
|
+
this.ensureLoaded();
|
|
532
|
+
if (!this.active?.generateWithUsage) {
|
|
533
|
+
throw this.notSupported("generateWithUsage");
|
|
534
|
+
}
|
|
535
|
+
return this.active?.generateWithUsage(args);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
async describeImage(
|
|
539
|
+
args: Parameters<NonNullable<LocalInferenceBackend["describeImage"]>>[0],
|
|
540
|
+
): ReturnType<NonNullable<LocalInferenceBackend["describeImage"]>> {
|
|
541
|
+
this.ensureLoaded();
|
|
542
|
+
if (!this.active?.describeImage) {
|
|
543
|
+
throw this.notSupported(
|
|
544
|
+
"describeImage",
|
|
545
|
+
"vision describe requires an mmproj-loaded llama.cpp FFI runtime. Load an Eliza-1 bundle with its vision projector.",
|
|
546
|
+
);
|
|
547
|
+
}
|
|
548
|
+
return this.active?.describeImage(args);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
async persistConversationKv(
|
|
552
|
+
conversationId: string,
|
|
553
|
+
slotId: number,
|
|
554
|
+
): Promise<void> {
|
|
555
|
+
this.ensureLoaded();
|
|
556
|
+
if (!this.active?.persistConversationKv) return;
|
|
557
|
+
await this.active?.persistConversationKv(conversationId, slotId);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
async restoreConversationKv(
|
|
561
|
+
conversationId: string,
|
|
562
|
+
slotId: number,
|
|
563
|
+
): Promise<boolean> {
|
|
564
|
+
this.ensureLoaded();
|
|
565
|
+
if (!this.active?.restoreConversationKv) return false;
|
|
566
|
+
return this.active?.restoreConversationKv(conversationId, slotId);
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
async prewarmConversation(
|
|
570
|
+
promptPrefix: string,
|
|
571
|
+
opts: { slotId: number; cacheKey: string },
|
|
572
|
+
): Promise<boolean> {
|
|
573
|
+
this.ensureLoaded();
|
|
574
|
+
if (!this.active?.prewarmConversation) return false;
|
|
575
|
+
return this.active?.prewarmConversation(promptPrefix, opts);
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
async resizeParallel(target: number): Promise<boolean> {
|
|
579
|
+
this.ensureLoaded();
|
|
580
|
+
if (!this.active?.resizeParallel) return false;
|
|
581
|
+
return this.active?.resizeParallel(target);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
parallelSlots(): number {
|
|
585
|
+
return this.active?.parallelSlots?.() ?? 1;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
mtpEnabled(): boolean {
|
|
589
|
+
return this.active?.mtpEnabled?.() ?? false;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
currentMmprojPath(): string | null {
|
|
593
|
+
return this.active?.currentMmprojPath?.() ?? null;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
currentRuntimeLoadConfig(): LocalRuntimeLoadConfig | null {
|
|
597
|
+
return this.active?.currentRuntimeLoadConfig?.() ?? null;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
private ensureLoaded(): void {
|
|
601
|
+
if (!this.active) {
|
|
602
|
+
throw new Error(
|
|
603
|
+
"[local-inference] No backend loaded. Call load() first.",
|
|
604
|
+
);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
private notSupported(method: string, detail?: string): Error {
|
|
609
|
+
const base = `[local-inference] Active backend (${this.active?.id ?? "<none>"}) does not implement ${method}.`;
|
|
610
|
+
return new Error(detail ? `${base} ${detail}` : base);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BionicHostLoader — the agent-side half of the on-device GPU delegation path.
|
|
3
|
+
*
|
|
4
|
+
* On Android the elizaOS agent runs as embedded bun under the musl loader, whose
|
|
5
|
+
* restricted linker namespace cannot load the bionic Android Vulkan driver (its
|
|
6
|
+
* HIDL/HAL closure) — so the musl agent can only run inference on the CPU. The
|
|
7
|
+
* GPU is reachable only from the normal bionic `ai.elizaos.app` process, where
|
|
8
|
+
* `ElizaBionicInferenceServer` (Java) has loaded `libelizainference.so` +
|
|
9
|
+
* `libggml-vulkan.so` and offloads the model to the Mali GPU.
|
|
10
|
+
*
|
|
11
|
+
* This loader implements the standard {@link LocalInferenceLoader} contract, so
|
|
12
|
+
* the TEXT_SMALL / TEXT_LARGE handlers in `ensure-local-inference-handler.ts`
|
|
13
|
+
* route through it transparently. `generate()` sends the prompt to the bionic
|
|
14
|
+
* host over an abstract-namespace `AF_UNIX` socket and gets the GPU completion
|
|
15
|
+
* back — the whole decode loop runs server-side, so there is no per-token
|
|
16
|
+
* two-process round trip.
|
|
17
|
+
*
|
|
18
|
+
* This is the buffered first slice (one GENERATE request → one full completion).
|
|
19
|
+
* Server-push per-step streaming, embed, and cancel are layered on later via the
|
|
20
|
+
* shared `LlmStreamingBinding`; the wire framing already carries an `op`
|
|
21
|
+
* discriminator for that.
|
|
22
|
+
*/
|
|
23
|
+
import type { LocalInferenceLoadArgs, LocalInferenceLoader } from "./active-model";
|
|
24
|
+
export declare class BionicHostLoader implements LocalInferenceLoader {
|
|
25
|
+
private readonly socketName;
|
|
26
|
+
private modelPath;
|
|
27
|
+
private bundleDir;
|
|
28
|
+
/** @param socketName abstract-namespace socket name (no leading NUL). */
|
|
29
|
+
constructor(socketName: string);
|
|
30
|
+
loadModel(args: LocalInferenceLoadArgs): Promise<void>;
|
|
31
|
+
unloadModel(): Promise<void>;
|
|
32
|
+
currentModelPath(): string | null;
|
|
33
|
+
generate(args: {
|
|
34
|
+
prompt: string;
|
|
35
|
+
stopSequences?: string[];
|
|
36
|
+
maxTokens?: number;
|
|
37
|
+
temperature?: number;
|
|
38
|
+
cacheKey?: string;
|
|
39
|
+
}): Promise<string>;
|
|
40
|
+
/**
|
|
41
|
+
* One request → one response over a fresh connection. Length-prefixed frames:
|
|
42
|
+
* `[int32 BE byte length][UTF-8 JSON]` in each direction.
|
|
43
|
+
*/
|
|
44
|
+
private roundTrip;
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=bionic-host-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bionic-host-loader.d.ts","sourceRoot":"","sources":["bionic-host-loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAKH,OAAO,KAAK,EACX,sBAAsB,EACtB,oBAAoB,EACpB,MAAM,gBAAgB,CAAC;AA6BxB,qBAAa,gBAAiB,YAAW,oBAAoB;IAKhD,OAAO,CAAC,QAAQ,CAAC,UAAU;IAJvC,OAAO,CAAC,SAAS,CAAuB;IACxC,OAAO,CAAC,SAAS,CAAM;IAEvB,yEAAyE;gBAC5C,UAAU,EAAE,MAAM;IAEzC,SAAS,CAAC,IAAI,EAAE,sBAAsB,GAAG,OAAO,CAAC,IAAI,CAAC;IAQtD,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAIlC,gBAAgB,IAAI,MAAM,GAAG,IAAI;IAI3B,QAAQ,CAAC,IAAI,EAAE;QACpB,MAAM,EAAE,MAAM,CAAC;QACf,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;KAClB,GAAG,OAAO,CAAC,MAAM,CAAC;IAqBnB;;;OAGG;IACH,OAAO,CAAC,SAAS;CAmEjB"}
|