@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Policy engine + latency + cost tracking for cross-provider routing.
|
|
3
|
+
*
|
|
4
|
+
* The policy engine sits on top of the `handlerRegistry` and, given a
|
|
5
|
+
* model type and a user-selected policy, decides which provider's handler
|
|
6
|
+
* should serve the next request. The router-handler (registered at top
|
|
7
|
+
* priority) calls `pickProvider` to make that decision.
|
|
8
|
+
*
|
|
9
|
+
* Policies:
|
|
10
|
+
* - manual — honour `preferredProvider`; when no pref set, fall
|
|
11
|
+
* through to the runtime's native priority order
|
|
12
|
+
* (highest registered priority wins).
|
|
13
|
+
* - auto — capability-driven: consult the device-tier assessment.
|
|
14
|
+
* When the device can comfortably run local inference
|
|
15
|
+
* (recommended mode "local", or a MAX/GOOD tier) AND a
|
|
16
|
+
* local handler is registered for the slot, pick local.
|
|
17
|
+
* Otherwise pick the highest-priority cloud provider.
|
|
18
|
+
* - cheapest — pick the provider with the lowest per-token cost.
|
|
19
|
+
* - fastest — pick the provider with the lowest tracked p50 latency
|
|
20
|
+
* (needs at least a few samples; falls back to native).
|
|
21
|
+
* - prefer-local — try local first; if it fails or has no handler,
|
|
22
|
+
* fall through to the next-best non-local. A POOR device
|
|
23
|
+
* tier (cannot run a local LM) softly demotes the local
|
|
24
|
+
* pick so an unusable on-device path isn't forced.
|
|
25
|
+
* - round-robin — distribute load evenly across eligible providers.
|
|
26
|
+
*
|
|
27
|
+
* Latency is tracked in a ring buffer per provider per model type. Cost
|
|
28
|
+
* is a static table of published per-million-token rates; local providers
|
|
29
|
+
* are $0. Neither is exact — the goal is "good enough to discriminate"
|
|
30
|
+
* rather than dollar-accurate billing.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import type { DeviceTierAssessment } from "./device-tier";
|
|
34
|
+
import type { HandlerRegistration } from "./handler-registry";
|
|
35
|
+
import type { RoutingPolicy } from "./routing-preferences";
|
|
36
|
+
|
|
37
|
+
const RING_SIZE = 32;
|
|
38
|
+
|
|
39
|
+
/** Provider IDs that serve inference on-device (no network round-trip). */
|
|
40
|
+
const LOCAL_PROVIDERS: ReadonlySet<string> = new Set([
|
|
41
|
+
"eliza-local-inference",
|
|
42
|
+
"capacitor-llama",
|
|
43
|
+
"eliza-device-bridge",
|
|
44
|
+
]);
|
|
45
|
+
|
|
46
|
+
function isLocalProvider(provider: string): boolean {
|
|
47
|
+
return LOCAL_PROVIDERS.has(provider);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* The first registered local handler in priority order, or null. Prefers the
|
|
52
|
+
* in-process / Capacitor backends over the device bridge, matching the
|
|
53
|
+
* `prefer-local` precedence.
|
|
54
|
+
*/
|
|
55
|
+
function findLocalCandidate(
|
|
56
|
+
candidates: HandlerRegistration[],
|
|
57
|
+
): HandlerRegistration | null {
|
|
58
|
+
const inProcess = candidates.find(
|
|
59
|
+
(c) =>
|
|
60
|
+
c.provider === "eliza-local-inference" ||
|
|
61
|
+
c.provider === "capacitor-llama",
|
|
62
|
+
);
|
|
63
|
+
if (inProcess) return inProcess;
|
|
64
|
+
return candidates.find((c) => c.provider === "eliza-device-bridge") ?? null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Whether the device is strong enough to default to local inference. True when
|
|
69
|
+
* the tier classifier recommends a local-first mode or lands the device in the
|
|
70
|
+
* top two tiers (MAX/GOOD). A POOR device — or one the classifier steers to
|
|
71
|
+
* cloud — returns false so `auto` routes to cloud.
|
|
72
|
+
*/
|
|
73
|
+
function deviceFavoursLocal(assessment: DeviceTierAssessment | null): boolean {
|
|
74
|
+
if (!assessment) return false;
|
|
75
|
+
if (!assessment.canRunLocalLm) return false;
|
|
76
|
+
return (
|
|
77
|
+
assessment.recommendedMode === "local" ||
|
|
78
|
+
assessment.tier === "MAX" ||
|
|
79
|
+
assessment.tier === "GOOD"
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
interface LatencySample {
|
|
84
|
+
durationMs: number;
|
|
85
|
+
at: number;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
class RingBuffer {
|
|
89
|
+
private buf: LatencySample[] = [];
|
|
90
|
+
push(sample: LatencySample): void {
|
|
91
|
+
this.buf.push(sample);
|
|
92
|
+
if (this.buf.length > RING_SIZE) this.buf.shift();
|
|
93
|
+
}
|
|
94
|
+
p50(): number | null {
|
|
95
|
+
if (this.buf.length === 0) return null;
|
|
96
|
+
const sorted = [...this.buf].map((s) => s.durationMs).sort((a, b) => a - b);
|
|
97
|
+
return sorted[Math.floor(sorted.length / 2)] ?? null;
|
|
98
|
+
}
|
|
99
|
+
size(): number {
|
|
100
|
+
return this.buf.length;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Relative per-million-token costs. Keep conservative: the policy only
|
|
106
|
+
* needs the order to be right for product defaults.
|
|
107
|
+
* Local / device-bridge = 0 because the user already paid for the hardware.
|
|
108
|
+
* Subscriptions get a small marginal cost, direct APIs sit above that,
|
|
109
|
+
* and Eliza Cloud is last because managed fallback is the most expensive
|
|
110
|
+
* path for the user.
|
|
111
|
+
*/
|
|
112
|
+
const COST_PER_MILLION_TOKENS: Partial<
|
|
113
|
+
Record<string, { input: number; output: number }>
|
|
114
|
+
> = {
|
|
115
|
+
"eliza-local-inference": { input: 0, output: 0 },
|
|
116
|
+
"eliza-device-bridge": { input: 0, output: 0 },
|
|
117
|
+
"capacitor-llama": { input: 0, output: 0 },
|
|
118
|
+
"anthropic-subscription": { input: 0.1, output: 0.1 },
|
|
119
|
+
"openai-codex": { input: 0.1, output: 0.1 },
|
|
120
|
+
"openai-subscription": { input: 0.1, output: 0.1 },
|
|
121
|
+
anthropic: { input: 3, output: 15 },
|
|
122
|
+
openai: { input: 2.5, output: 10 },
|
|
123
|
+
grok: { input: 5, output: 15 },
|
|
124
|
+
google: { input: 1.25, output: 5 },
|
|
125
|
+
"google-genai": { input: 1.25, output: 5 },
|
|
126
|
+
moonshot: { input: 1.25, output: 5 },
|
|
127
|
+
kimi: { input: 1.25, output: 5 },
|
|
128
|
+
nearai: { input: 0.85, output: 3.3 },
|
|
129
|
+
zai: { input: 1.25, output: 5 },
|
|
130
|
+
glm: { input: 1.25, output: 5 },
|
|
131
|
+
mistral: { input: 2, output: 6 },
|
|
132
|
+
elizacloud: { input: 30, output: 60 },
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
interface ProviderStats {
|
|
136
|
+
latency: Map<string /* modelType */, RingBuffer>;
|
|
137
|
+
lastPicked: Map<string /* modelType */, number /* timestamp */>;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
class PolicyEngine {
|
|
141
|
+
private stats = new Map<string /* provider */, ProviderStats>();
|
|
142
|
+
|
|
143
|
+
private statsFor(provider: string): ProviderStats {
|
|
144
|
+
let s = this.stats.get(provider);
|
|
145
|
+
if (!s) {
|
|
146
|
+
s = { latency: new Map(), lastPicked: new Map() };
|
|
147
|
+
this.stats.set(provider, s);
|
|
148
|
+
}
|
|
149
|
+
return s;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
recordLatency(provider: string, modelType: string, durationMs: number): void {
|
|
153
|
+
const s = this.statsFor(provider);
|
|
154
|
+
let buf = s.latency.get(modelType);
|
|
155
|
+
if (!buf) {
|
|
156
|
+
buf = new RingBuffer();
|
|
157
|
+
s.latency.set(modelType, buf);
|
|
158
|
+
}
|
|
159
|
+
buf.push({ durationMs, at: Date.now() });
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
recordPick(provider: string, modelType: string): void {
|
|
163
|
+
this.statsFor(provider).lastPicked.set(modelType, Date.now());
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
p50(provider: string, modelType: string): number | null {
|
|
167
|
+
return this.statsFor(provider).latency.get(modelType)?.p50() ?? null;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
lastPicked(provider: string, modelType: string): number | null {
|
|
171
|
+
return this.statsFor(provider).lastPicked.get(modelType) ?? null;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
costOf(provider: string): number | null {
|
|
175
|
+
const c = COST_PER_MILLION_TOKENS[provider];
|
|
176
|
+
if (!c) return null;
|
|
177
|
+
// Weighted sum (3:1 output:input is a typical chat ratio). Treat missing
|
|
178
|
+
// output pricing as same as input.
|
|
179
|
+
return c.input * 0.25 + c.output * 0.75;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Pick a provider for this (modelType, policy) given the registry.
|
|
184
|
+
* Returns the HandlerRegistration whose handler the router-handler
|
|
185
|
+
* should dispatch to, or null if no eligible handler exists.
|
|
186
|
+
*
|
|
187
|
+
* `preferredProvider` is only honoured for policy === "manual".
|
|
188
|
+
*/
|
|
189
|
+
pickProvider(args: {
|
|
190
|
+
modelType: string;
|
|
191
|
+
policy: RoutingPolicy;
|
|
192
|
+
preferredProvider: string | null;
|
|
193
|
+
candidates: HandlerRegistration[];
|
|
194
|
+
/** Provider ID of the router itself — always excluded from candidates. */
|
|
195
|
+
selfProvider: string;
|
|
196
|
+
/**
|
|
197
|
+
* Device-capability assessment from `classifyDeviceTier()`. Consulted by
|
|
198
|
+
* the `auto` policy and used as a soft hint by `prefer-local`. Null when
|
|
199
|
+
* the host hasn't probed hardware yet — `auto` then falls back to cloud.
|
|
200
|
+
*/
|
|
201
|
+
deviceTier?: DeviceTierAssessment | null;
|
|
202
|
+
}): HandlerRegistration | null {
|
|
203
|
+
const eligible = args.candidates
|
|
204
|
+
.filter((c) => c.provider !== args.selfProvider)
|
|
205
|
+
.slice()
|
|
206
|
+
// Defensive sort — real callers already sort, but test fixtures and
|
|
207
|
+
// non-registry callers might not, and a silent "pick-wrong" would be
|
|
208
|
+
// worse than the extra O(n log n).
|
|
209
|
+
.sort((a, b) => b.priority - a.priority);
|
|
210
|
+
if (eligible.length === 0) return null;
|
|
211
|
+
|
|
212
|
+
switch (args.policy) {
|
|
213
|
+
case "manual": {
|
|
214
|
+
if (args.preferredProvider) {
|
|
215
|
+
const match = eligible.find(
|
|
216
|
+
(c) => c.provider === args.preferredProvider,
|
|
217
|
+
);
|
|
218
|
+
if (match) return match;
|
|
219
|
+
}
|
|
220
|
+
// Fallback: highest native priority.
|
|
221
|
+
return eligible[0] ?? null;
|
|
222
|
+
}
|
|
223
|
+
case "auto": {
|
|
224
|
+
// Capability-driven: route to local only when the device can
|
|
225
|
+
// comfortably run it AND a local handler is registered for the
|
|
226
|
+
// slot. Otherwise pick the highest-priority cloud provider.
|
|
227
|
+
const local = findLocalCandidate(eligible);
|
|
228
|
+
if (local && deviceFavoursLocal(args.deviceTier ?? null)) {
|
|
229
|
+
return local;
|
|
230
|
+
}
|
|
231
|
+
const cloud = eligible.find((c) => !isLocalProvider(c.provider));
|
|
232
|
+
return cloud ?? eligible[0] ?? null;
|
|
233
|
+
}
|
|
234
|
+
case "cheapest": {
|
|
235
|
+
const ranked = [...eligible].sort((a, b) => {
|
|
236
|
+
const ca = this.costOf(a.provider) ?? Number.POSITIVE_INFINITY;
|
|
237
|
+
const cb = this.costOf(b.provider) ?? Number.POSITIVE_INFINITY;
|
|
238
|
+
if (ca !== cb) return ca - cb;
|
|
239
|
+
return b.priority - a.priority;
|
|
240
|
+
});
|
|
241
|
+
return ranked[0] ?? null;
|
|
242
|
+
}
|
|
243
|
+
case "fastest": {
|
|
244
|
+
const ranked = [...eligible].sort((a, b) => {
|
|
245
|
+
const la = this.p50(a.provider, args.modelType);
|
|
246
|
+
const lb = this.p50(b.provider, args.modelType);
|
|
247
|
+
// Untracked providers get Infinity → deprioritised until we
|
|
248
|
+
// have samples. First call always falls through to native
|
|
249
|
+
// priority via the tie-break.
|
|
250
|
+
const va = la ?? Number.POSITIVE_INFINITY;
|
|
251
|
+
const vb = lb ?? Number.POSITIVE_INFINITY;
|
|
252
|
+
if (va !== vb) return va - vb;
|
|
253
|
+
return b.priority - a.priority;
|
|
254
|
+
});
|
|
255
|
+
return ranked[0] ?? null;
|
|
256
|
+
}
|
|
257
|
+
case "prefer-local": {
|
|
258
|
+
const local = findLocalCandidate(eligible);
|
|
259
|
+
// Soft capability hint: a known-POOR device (cannot run a local
|
|
260
|
+
// LM) demotes the local pick so we don't force an unusable
|
|
261
|
+
// on-device path. When no assessment is present, keep the
|
|
262
|
+
// historical local-first behaviour.
|
|
263
|
+
const tier = args.deviceTier ?? null;
|
|
264
|
+
const localUnviable = tier !== null && !tier.canRunLocalLm;
|
|
265
|
+
if (local && !localUnviable) return local;
|
|
266
|
+
const cloud = eligible.find((c) => !isLocalProvider(c.provider));
|
|
267
|
+
if (cloud) return cloud;
|
|
268
|
+
return local ?? eligible[0] ?? null;
|
|
269
|
+
}
|
|
270
|
+
case "round-robin": {
|
|
271
|
+
// Pick the one least-recently-picked. Ties broken by priority.
|
|
272
|
+
const ranked = [...eligible].sort((a, b) => {
|
|
273
|
+
const la = this.lastPicked(a.provider, args.modelType) ?? 0;
|
|
274
|
+
const lb = this.lastPicked(b.provider, args.modelType) ?? 0;
|
|
275
|
+
if (la !== lb) return la - lb;
|
|
276
|
+
return b.priority - a.priority;
|
|
277
|
+
});
|
|
278
|
+
return ranked[0] ?? null;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/** For tests and diagnostics. */
|
|
284
|
+
snapshot(): Record<string, Record<string, number | null>> {
|
|
285
|
+
const out: Record<string, Record<string, number | null>> = {};
|
|
286
|
+
for (const [provider, stats] of this.stats) {
|
|
287
|
+
out[provider] = {};
|
|
288
|
+
for (const [modelType, buf] of stats.latency) {
|
|
289
|
+
const row = out[provider];
|
|
290
|
+
if (row) row[modelType] = buf.p50();
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
return out;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
export const policyEngine = new PolicyEngine();
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Re-export of the shared routing-preferences module. The canonical
|
|
3
|
+
* implementation lives in `@elizaos/shared/local-inference` because both
|
|
4
|
+
* the server (`@elizaos/app-core`) and the UI client (`@elizaos/ui`)
|
|
5
|
+
* read/write the same routing.json with identical semantics.
|
|
6
|
+
*/
|
|
7
|
+
export { DEFAULT_ROUTING_POLICY, isRoutingPolicy, ROUTING_POLICIES, type RoutingPolicy, type RoutingPreferences, readRoutingPreferences, setPolicy, setPreferredProvider, writeRoutingPreferences, } from "@elizaos/shared/local-inference/routing-preferences";
|
|
8
|
+
//# sourceMappingURL=routing-preferences.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"routing-preferences.d.ts","sourceRoot":"","sources":["routing-preferences.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,EACN,sBAAsB,EACtB,eAAe,EACf,gBAAgB,EAChB,KAAK,aAAa,EAClB,KAAK,kBAAkB,EACvB,sBAAsB,EACtB,SAAS,EACT,oBAAoB,EACpB,uBAAuB,GACvB,MAAM,qDAAqD,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Re-export of the shared routing-preferences module. The canonical
|
|
3
|
+
* implementation lives in `@elizaos/shared/local-inference` because both
|
|
4
|
+
* the server (`@elizaos/app-core`) and the UI client (`@elizaos/ui`)
|
|
5
|
+
* read/write the same routing.json with identical semantics.
|
|
6
|
+
*/
|
|
7
|
+
export {
|
|
8
|
+
DEFAULT_ROUTING_POLICY,
|
|
9
|
+
isRoutingPolicy,
|
|
10
|
+
ROUTING_POLICIES,
|
|
11
|
+
type RoutingPolicy,
|
|
12
|
+
type RoutingPreferences,
|
|
13
|
+
readRoutingPreferences,
|
|
14
|
+
setPolicy,
|
|
15
|
+
setPreferredProvider,
|
|
16
|
+
writeRoutingPreferences,
|
|
17
|
+
} from "@elizaos/shared/local-inference/routing-preferences";
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference runtime-target detection.
|
|
3
|
+
*
|
|
4
|
+
* The local-inference subsystem has one shipping operational shape:
|
|
5
|
+
*
|
|
6
|
+
* - `"ffi"` — in-process bun:ffi streaming bindings against the
|
|
7
|
+
* `libelizainference` (fused omnivoice + llama.cpp)
|
|
8
|
+
* shared library. Lives in `voice/ffi-bindings.ts` and
|
|
9
|
+
* `ffi-streaming-runner.ts`. This is mandatory on
|
|
10
|
+
* desktop and mobile so all generation runs in the
|
|
11
|
+
* app's own address space.
|
|
12
|
+
* - `"native-bridge"`— delegate to a Capacitor / JNI-side native runtime
|
|
13
|
+
* plugin (e.g. a Swift / Kotlin host wrapping
|
|
14
|
+
* llama.cpp directly). Reserved for builds where the
|
|
15
|
+
* FFI layer cannot be used (e.g. a build that disables
|
|
16
|
+
* `bun:ffi`); only the env-override path selects it.
|
|
17
|
+
*
|
|
18
|
+
* This module is the single source of truth for the platform → runtime
|
|
19
|
+
* mapping.
|
|
20
|
+
*
|
|
21
|
+
* Detection inputs (in priority order):
|
|
22
|
+
* 1. `ELIZA_INFERENCE_MODE` env var. Values: `ffi` / `native-bridge`.
|
|
23
|
+
* Wins over every heuristic so operators can force a branch from a CI
|
|
24
|
+
* shell or a debug build without recompiling.
|
|
25
|
+
* 2. Capacitor native marker — when `globalThis.Capacitor.isNativePlatform()`
|
|
26
|
+
* returns `true`, we are inside a Capacitor shell on iOS or Android.
|
|
27
|
+
* Force `"ffi"` regardless of Node's `process.platform`.
|
|
28
|
+
* 3. `process.platform` — all recognised and unknown platforms map to
|
|
29
|
+
* `"ffi"` unless explicitly overridden to `"native-bridge"`.
|
|
30
|
+
*
|
|
31
|
+
* The function is pure: same inputs → same answer. All inputs are explicit
|
|
32
|
+
* arguments so tests can replay the decision offline without poking the live
|
|
33
|
+
* `process` / `globalThis`.
|
|
34
|
+
*
|
|
35
|
+
* NOTE: this module does NOT decide whether the FFI library is actually
|
|
36
|
+
* loaded or whether the FFI symbols are present. `backend-selector.ts`
|
|
37
|
+
* handles that (with a hard throw if the mobile build is missing the
|
|
38
|
+
* streaming-LLM symbols). The two work together:
|
|
39
|
+
*
|
|
40
|
+
* inferenceRuntimeMode() === "ffi" → use `ffi-streaming-runner.ts`
|
|
41
|
+
* inferenceRuntimeMode() === "native-bridge"→ use the Capacitor plugin shim
|
|
42
|
+
*/
|
|
43
|
+
export type InferenceRuntimeMode = "ffi" | "native-bridge";
|
|
44
|
+
/**
|
|
45
|
+
* Node's `process.platform` values, narrowed to the set we care about.
|
|
46
|
+
* `unknown` covers exotic platforms (aix, freebsd, …) without baking them
|
|
47
|
+
* into the public API of this module.
|
|
48
|
+
*/
|
|
49
|
+
export type SupportedHostPlatform = "darwin" | "linux" | "win32" | "ios" | "android" | "unknown";
|
|
50
|
+
export interface InferenceRuntimeModeInput {
|
|
51
|
+
/**
|
|
52
|
+
* Raw `process.platform` value (or a synthetic one in tests). Optional —
|
|
53
|
+
* defaults to the live `process.platform`. Anything other than the
|
|
54
|
+
* recognised set is treated as `"unknown"` and routed to `"ffi"`
|
|
55
|
+
* unless an env override selects the native bridge.
|
|
56
|
+
*/
|
|
57
|
+
platform?: SupportedHostPlatform | NodeJS.Platform;
|
|
58
|
+
/**
|
|
59
|
+
* Whether the JS runtime is currently embedded inside a Capacitor
|
|
60
|
+
* native shell. Defaults to inspecting `globalThis.Capacitor`. Tests
|
|
61
|
+
* pass `false` to keep the env-var / platform branches deterministic.
|
|
62
|
+
*/
|
|
63
|
+
isCapacitorNative?: boolean;
|
|
64
|
+
/**
|
|
65
|
+
* Environment-variable bag. Defaults to `process.env`. The function
|
|
66
|
+
* reads `ELIZA_INFERENCE_MODE` for the override.
|
|
67
|
+
*/
|
|
68
|
+
env?: NodeJS.ProcessEnv;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Read and normalise the `ELIZA_INFERENCE_MODE` env override. Returns `null`
|
|
72
|
+
* when unset or unrecognised.
|
|
73
|
+
*/
|
|
74
|
+
export declare function readRuntimeModeEnvOverride(env?: NodeJS.ProcessEnv): InferenceRuntimeMode | null;
|
|
75
|
+
/**
|
|
76
|
+
* Synchronous Capacitor probe. Reads `globalThis.Capacitor.isNativePlatform()`
|
|
77
|
+
* defensively — neither the property nor the call site is guaranteed to
|
|
78
|
+
* exist in every build (desktop, plain Node tests, web-only Vite dev).
|
|
79
|
+
*
|
|
80
|
+
* Surfaces no errors: any failure means "not native". The throw-on-bad-build
|
|
81
|
+
* policy belongs to the backend-selector, not to platform detection.
|
|
82
|
+
*/
|
|
83
|
+
export declare function isCapacitorNativeRuntime(global?: typeof globalThis): boolean;
|
|
84
|
+
/**
|
|
85
|
+
* Decide which inference runtime mode the host should use. Pure: caller
|
|
86
|
+
* controls every input. See file header for the decision rules.
|
|
87
|
+
*/
|
|
88
|
+
export declare function inferenceRuntimeMode(input?: InferenceRuntimeModeInput): InferenceRuntimeMode;
|
|
89
|
+
/**
|
|
90
|
+
* Convenience wrapper for the `backend-selector.ts` boundary: maps the
|
|
91
|
+
* runtime mode onto the `"desktop" | "mobile"` slot the selector expects.
|
|
92
|
+
*
|
|
93
|
+
* `native-bridge` is treated as `"mobile"` because in every shipping
|
|
94
|
+
* configuration where we'd pick it the host has already classified itself
|
|
95
|
+
* as a mobile device (Capacitor shell that opted out of `bun:ffi`).
|
|
96
|
+
*/
|
|
97
|
+
export declare function inferencePlatformClass(mode?: InferenceRuntimeMode): "desktop" | "mobile";
|
|
98
|
+
//# sourceMappingURL=runtime-target.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runtime-target.d.ts","sourceRoot":"","sources":["runtime-target.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AAEH,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,eAAe,CAAC;AAE3D;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAC9B,QAAQ,GACR,OAAO,GACP,OAAO,GACP,KAAK,GACL,SAAS,GACT,SAAS,CAAC;AAEb,MAAM,WAAW,yBAAyB;IACzC;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,qBAAqB,GAAG,MAAM,CAAC,QAAQ,CAAC;IACnD;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;CACxB;AAED;;;GAGG;AACH,wBAAgB,0BAA0B,CACzC,GAAG,GAAE,MAAM,CAAC,UAAwB,GAClC,oBAAoB,GAAG,IAAI,CAa7B;AAED;;;;;;;GAOG;AACH,wBAAgB,wBAAwB,CACvC,MAAM,GAAE,OAAO,UAAuB,GACpC,OAAO,CAST;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CACnC,KAAK,GAAE,yBAA8B,GACnC,oBAAoB,CAYtB;AAED;;;;;;;GAOG;AACH,wBAAgB,sBAAsB,CACrC,IAAI,GAAE,oBAA6C,GACjD,SAAS,GAAG,QAAQ,CAEtB"}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference runtime-target detection.
|
|
3
|
+
*
|
|
4
|
+
* The local-inference subsystem has one shipping operational shape:
|
|
5
|
+
*
|
|
6
|
+
* - `"ffi"` — in-process bun:ffi streaming bindings against the
|
|
7
|
+
* `libelizainference` (fused omnivoice + llama.cpp)
|
|
8
|
+
* shared library. Lives in `voice/ffi-bindings.ts` and
|
|
9
|
+
* `ffi-streaming-runner.ts`. This is mandatory on
|
|
10
|
+
* desktop and mobile so all generation runs in the
|
|
11
|
+
* app's own address space.
|
|
12
|
+
* - `"native-bridge"`— delegate to a Capacitor / JNI-side native runtime
|
|
13
|
+
* plugin (e.g. a Swift / Kotlin host wrapping
|
|
14
|
+
* llama.cpp directly). Reserved for builds where the
|
|
15
|
+
* FFI layer cannot be used (e.g. a build that disables
|
|
16
|
+
* `bun:ffi`); only the env-override path selects it.
|
|
17
|
+
*
|
|
18
|
+
* This module is the single source of truth for the platform → runtime
|
|
19
|
+
* mapping.
|
|
20
|
+
*
|
|
21
|
+
* Detection inputs (in priority order):
|
|
22
|
+
* 1. `ELIZA_INFERENCE_MODE` env var. Values: `ffi` / `native-bridge`.
|
|
23
|
+
* Wins over every heuristic so operators can force a branch from a CI
|
|
24
|
+
* shell or a debug build without recompiling.
|
|
25
|
+
* 2. Capacitor native marker — when `globalThis.Capacitor.isNativePlatform()`
|
|
26
|
+
* returns `true`, we are inside a Capacitor shell on iOS or Android.
|
|
27
|
+
* Force `"ffi"` regardless of Node's `process.platform`.
|
|
28
|
+
* 3. `process.platform` — all recognised and unknown platforms map to
|
|
29
|
+
* `"ffi"` unless explicitly overridden to `"native-bridge"`.
|
|
30
|
+
*
|
|
31
|
+
* The function is pure: same inputs → same answer. All inputs are explicit
|
|
32
|
+
* arguments so tests can replay the decision offline without poking the live
|
|
33
|
+
* `process` / `globalThis`.
|
|
34
|
+
*
|
|
35
|
+
* NOTE: this module does NOT decide whether the FFI library is actually
|
|
36
|
+
* loaded or whether the FFI symbols are present. `backend-selector.ts`
|
|
37
|
+
* handles that (with a hard throw if the mobile build is missing the
|
|
38
|
+
* streaming-LLM symbols). The two work together:
|
|
39
|
+
*
|
|
40
|
+
* inferenceRuntimeMode() === "ffi" → use `ffi-streaming-runner.ts`
|
|
41
|
+
* inferenceRuntimeMode() === "native-bridge"→ use the Capacitor plugin shim
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
export type InferenceRuntimeMode = "ffi" | "native-bridge";
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Node's `process.platform` values, narrowed to the set we care about.
|
|
48
|
+
* `unknown` covers exotic platforms (aix, freebsd, …) without baking them
|
|
49
|
+
* into the public API of this module.
|
|
50
|
+
*/
|
|
51
|
+
export type SupportedHostPlatform =
|
|
52
|
+
| "darwin"
|
|
53
|
+
| "linux"
|
|
54
|
+
| "win32"
|
|
55
|
+
| "ios"
|
|
56
|
+
| "android"
|
|
57
|
+
| "unknown";
|
|
58
|
+
|
|
59
|
+
export interface InferenceRuntimeModeInput {
|
|
60
|
+
/**
|
|
61
|
+
* Raw `process.platform` value (or a synthetic one in tests). Optional —
|
|
62
|
+
* defaults to the live `process.platform`. Anything other than the
|
|
63
|
+
* recognised set is treated as `"unknown"` and routed to `"ffi"`
|
|
64
|
+
* unless an env override selects the native bridge.
|
|
65
|
+
*/
|
|
66
|
+
platform?: SupportedHostPlatform | NodeJS.Platform;
|
|
67
|
+
/**
|
|
68
|
+
* Whether the JS runtime is currently embedded inside a Capacitor
|
|
69
|
+
* native shell. Defaults to inspecting `globalThis.Capacitor`. Tests
|
|
70
|
+
* pass `false` to keep the env-var / platform branches deterministic.
|
|
71
|
+
*/
|
|
72
|
+
isCapacitorNative?: boolean;
|
|
73
|
+
/**
|
|
74
|
+
* Environment-variable bag. Defaults to `process.env`. The function
|
|
75
|
+
* reads `ELIZA_INFERENCE_MODE` for the override.
|
|
76
|
+
*/
|
|
77
|
+
env?: NodeJS.ProcessEnv;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Read and normalise the `ELIZA_INFERENCE_MODE` env override. Returns `null`
|
|
82
|
+
* when unset or unrecognised.
|
|
83
|
+
*/
|
|
84
|
+
export function readRuntimeModeEnvOverride(
|
|
85
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
86
|
+
): InferenceRuntimeMode | null {
|
|
87
|
+
const raw = (env.ELIZA_INFERENCE_MODE ?? "").trim().toLowerCase();
|
|
88
|
+
if (raw === "") return null;
|
|
89
|
+
if (raw === "ffi" || raw === "ffi-streaming") return "ffi";
|
|
90
|
+
if (
|
|
91
|
+
raw === "native-bridge" ||
|
|
92
|
+
raw === "native" ||
|
|
93
|
+
raw === "bridge" ||
|
|
94
|
+
raw === "capacitor"
|
|
95
|
+
) {
|
|
96
|
+
return "native-bridge";
|
|
97
|
+
}
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Synchronous Capacitor probe. Reads `globalThis.Capacitor.isNativePlatform()`
|
|
103
|
+
* defensively — neither the property nor the call site is guaranteed to
|
|
104
|
+
* exist in every build (desktop, plain Node tests, web-only Vite dev).
|
|
105
|
+
*
|
|
106
|
+
* Surfaces no errors: any failure means "not native". The throw-on-bad-build
|
|
107
|
+
* policy belongs to the backend-selector, not to platform detection.
|
|
108
|
+
*/
|
|
109
|
+
export function isCapacitorNativeRuntime(
|
|
110
|
+
global: typeof globalThis = globalThis,
|
|
111
|
+
): boolean {
|
|
112
|
+
const cap = (global as { Capacitor?: { isNativePlatform?: () => boolean } })
|
|
113
|
+
.Capacitor;
|
|
114
|
+
if (!cap || typeof cap.isNativePlatform !== "function") return false;
|
|
115
|
+
try {
|
|
116
|
+
return cap.isNativePlatform() === true;
|
|
117
|
+
} catch {
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Decide which inference runtime mode the host should use. Pure: caller
|
|
124
|
+
* controls every input. See file header for the decision rules.
|
|
125
|
+
*/
|
|
126
|
+
export function inferenceRuntimeMode(
|
|
127
|
+
input: InferenceRuntimeModeInput = {},
|
|
128
|
+
): InferenceRuntimeMode {
|
|
129
|
+
const env = input.env ?? process.env;
|
|
130
|
+
const override = readRuntimeModeEnvOverride(env);
|
|
131
|
+
if (override) return override;
|
|
132
|
+
|
|
133
|
+
const capacitor =
|
|
134
|
+
input.isCapacitorNative !== undefined
|
|
135
|
+
? input.isCapacitorNative
|
|
136
|
+
: isCapacitorNativeRuntime();
|
|
137
|
+
if (capacitor) return "ffi";
|
|
138
|
+
|
|
139
|
+
return "ffi";
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Convenience wrapper for the `backend-selector.ts` boundary: maps the
|
|
144
|
+
* runtime mode onto the `"desktop" | "mobile"` slot the selector expects.
|
|
145
|
+
*
|
|
146
|
+
* `native-bridge` is treated as `"mobile"` because in every shipping
|
|
147
|
+
* configuration where we'd pick it the host has already classified itself
|
|
148
|
+
* as a mobile device (Capacitor shell that opted out of `bun:ffi`).
|
|
149
|
+
*/
|
|
150
|
+
export function inferencePlatformClass(
|
|
151
|
+
mode: InferenceRuntimeMode = inferenceRuntimeMode(),
|
|
152
|
+
): "desktop" | "mobile" {
|
|
153
|
+
return mode === "ffi" ? "desktop" : "mobile";
|
|
154
|
+
}
|