@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,1237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Device-bridge: agent-side half of the "inference on the user's phone,
|
|
3
|
+
* agent in a container" architecture.
|
|
4
|
+
*
|
|
5
|
+
* Multi-device aware. Any number of devices can dial in; each `generate`
|
|
6
|
+
* is routed to the highest-scoring connected device at call time. A phone
|
|
7
|
+
* and a Mac paired to the same agent → requests go to the Mac; when the
|
|
8
|
+
* Mac disconnects, new requests fall through to the phone automatically.
|
|
9
|
+
*
|
|
10
|
+
* Scoring (higher = preferred):
|
|
11
|
+
* - desktop / electrobun: 100 base
|
|
12
|
+
* - ios / android: 10 base
|
|
13
|
+
* - per GB of total RAM: +2
|
|
14
|
+
* - per GB of VRAM: +5 (dedicated GPU wins big)
|
|
15
|
+
* - has loaded the right model already: +50 (avoid a swap)
|
|
16
|
+
*
|
|
17
|
+
* Disconnect tolerance
|
|
18
|
+
* --------------------
|
|
19
|
+
* A pending request stays in `pendingGenerates` until either (a) a device
|
|
20
|
+
* (same or different) returns a matching correlation-id, or (b) the
|
|
21
|
+
* timeout fires. On any device (re)connect we re-route orphaned
|
|
22
|
+
* generates to the new best device.
|
|
23
|
+
*
|
|
24
|
+
* Durability
|
|
25
|
+
* ----------
|
|
26
|
+
* Pending requests are best-effort persisted to a JSON log under
|
|
27
|
+
* `$ELIZA_STATE_DIR/local-inference/pending-requests.json` so a brief
|
|
28
|
+
* agent restart doesn't lose the queue. Persistence is async and
|
|
29
|
+
* non-blocking — failures fall back to in-memory only.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { randomUUID } from "node:crypto";
|
|
33
|
+
import fs from "node:fs/promises";
|
|
34
|
+
import type { Server as HttpServer, IncomingMessage } from "node:http";
|
|
35
|
+
import path from "node:path";
|
|
36
|
+
import type { Duplex } from "node:stream";
|
|
37
|
+
import type { AgentRuntime } from "@elizaos/core";
|
|
38
|
+
import { logger } from "@elizaos/core";
|
|
39
|
+
import {
|
|
40
|
+
computeGenerationThroughput,
|
|
41
|
+
type GenerationThroughput,
|
|
42
|
+
} from "@elizaos/shared/local-inference";
|
|
43
|
+
import type {
|
|
44
|
+
LocalInferenceLoadArgs,
|
|
45
|
+
LocalInferenceLoader,
|
|
46
|
+
} from "./active-model";
|
|
47
|
+
import { localInferenceRoot } from "./paths";
|
|
48
|
+
|
|
49
|
+
const DEFAULT_CALL_TIMEOUT_MS = 60_000;
|
|
50
|
+
const DEFAULT_LOAD_TIMEOUT_MS = 120_000;
|
|
51
|
+
const HEARTBEAT_INTERVAL_MS = 15_000;
|
|
52
|
+
const PENDING_LOG_FILENAME = "pending-requests.json";
|
|
53
|
+
|
|
54
|
+
interface DeviceCapabilities {
|
|
55
|
+
platform: "ios" | "android" | "web" | "electrobun" | "desktop";
|
|
56
|
+
deviceModel: string;
|
|
57
|
+
machineId?: string;
|
|
58
|
+
osVersion?: string;
|
|
59
|
+
isSimulator?: boolean;
|
|
60
|
+
totalRamGb: number;
|
|
61
|
+
availableRamGb?: number | null;
|
|
62
|
+
freeStorageGb?: number | null;
|
|
63
|
+
cpuCores: number;
|
|
64
|
+
gpu: {
|
|
65
|
+
backend: "metal" | "vulkan" | "gpu-delegate" | "cuda";
|
|
66
|
+
available: boolean;
|
|
67
|
+
totalVramGb?: number;
|
|
68
|
+
} | null;
|
|
69
|
+
gpuSupported?: boolean;
|
|
70
|
+
lowPowerMode?: boolean;
|
|
71
|
+
thermalState?: "nominal" | "fair" | "serious" | "critical" | "unknown";
|
|
72
|
+
mtpSupported?: boolean;
|
|
73
|
+
mtpReason?: string;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
interface DeviceRegistration {
|
|
77
|
+
deviceId: string;
|
|
78
|
+
pairingToken?: string;
|
|
79
|
+
capabilities: DeviceCapabilities;
|
|
80
|
+
loadedPath: string | null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Wire types — kept in sync by hand with the device-side bridge client.
|
|
84
|
+
|
|
85
|
+
type DeviceOutbound =
|
|
86
|
+
| { type: "register"; payload: DeviceRegistration }
|
|
87
|
+
| { type: "loadResult"; correlationId: string; ok: true; loadedPath: string }
|
|
88
|
+
| { type: "loadResult"; correlationId: string; ok: false; error: string }
|
|
89
|
+
| { type: "unloadResult"; correlationId: string; ok: true }
|
|
90
|
+
| { type: "unloadResult"; correlationId: string; ok: false; error: string }
|
|
91
|
+
| {
|
|
92
|
+
type: "generateResult";
|
|
93
|
+
correlationId: string;
|
|
94
|
+
ok: true;
|
|
95
|
+
text: string;
|
|
96
|
+
promptTokens: number;
|
|
97
|
+
outputTokens: number;
|
|
98
|
+
durationMs: number;
|
|
99
|
+
/**
|
|
100
|
+
* Time-to-first-token in ms, when the device measured it. Equals the
|
|
101
|
+
* prefill wall-clock; lets the agent difference prefill vs decode tok/s.
|
|
102
|
+
* Optional — absent on the non-streaming path (older device clients).
|
|
103
|
+
*/
|
|
104
|
+
ttftMs?: number;
|
|
105
|
+
}
|
|
106
|
+
| { type: "generateResult"; correlationId: string; ok: false; error: string }
|
|
107
|
+
| {
|
|
108
|
+
type: "embedResult";
|
|
109
|
+
correlationId: string;
|
|
110
|
+
ok: true;
|
|
111
|
+
embedding: number[];
|
|
112
|
+
tokens: number;
|
|
113
|
+
}
|
|
114
|
+
| { type: "embedResult"; correlationId: string; ok: false; error: string }
|
|
115
|
+
| { type: "pong"; at: number };
|
|
116
|
+
|
|
117
|
+
type AgentOutbound =
|
|
118
|
+
| ({ type: "load"; correlationId: string } & LocalInferenceLoadArgs)
|
|
119
|
+
| { type: "unload"; correlationId: string }
|
|
120
|
+
| {
|
|
121
|
+
type: "generate";
|
|
122
|
+
correlationId: string;
|
|
123
|
+
prompt: string;
|
|
124
|
+
stopSequences?: string[];
|
|
125
|
+
maxTokens?: number;
|
|
126
|
+
temperature?: number;
|
|
127
|
+
/**
|
|
128
|
+
* Forwarded promptCacheKey from `ProviderCachePlan`. The receiving
|
|
129
|
+
* device's local-inference layer can use this to derive a stable
|
|
130
|
+
* slot_id (llama-server) or to look up a session in its session
|
|
131
|
+
* pool (node-llama-cpp). Old clients ignore the field; new clients
|
|
132
|
+
* get prefix-cache reuse across calls with the same key.
|
|
133
|
+
*/
|
|
134
|
+
cacheKey?: string;
|
|
135
|
+
}
|
|
136
|
+
| { type: "embed"; correlationId: string; input: string }
|
|
137
|
+
| { type: "ping"; at: number };
|
|
138
|
+
|
|
139
|
+
interface MinimalWebSocket {
|
|
140
|
+
readyState: number;
|
|
141
|
+
send(data: string): void;
|
|
142
|
+
close(code?: number, reason?: string): void;
|
|
143
|
+
on(event: "message", listener: (data: Buffer | string) => void): unknown;
|
|
144
|
+
on(event: "close", listener: () => void): unknown;
|
|
145
|
+
on(event: "error", listener: (err: Error) => void): unknown;
|
|
146
|
+
on(event: "pong", listener: () => void): unknown;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
interface WsConstructor {
|
|
150
|
+
readonly OPEN: number;
|
|
151
|
+
readonly CLOSED: number;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
interface WssInstance {
|
|
155
|
+
handleUpgrade(
|
|
156
|
+
request: IncomingMessage,
|
|
157
|
+
socket: Duplex,
|
|
158
|
+
head: Buffer,
|
|
159
|
+
cb: (ws: MinimalWebSocket) => void,
|
|
160
|
+
): void;
|
|
161
|
+
on(event: "error", listener: (err: Error) => void): unknown;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
interface WssConstructor {
|
|
165
|
+
new (options: { noServer: boolean; maxPayload?: number }): WssInstance;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
interface WsModule {
|
|
169
|
+
WebSocketServer: WssConstructor;
|
|
170
|
+
WebSocket: WsConstructor;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function isWsModule(value: unknown): value is WsModule {
|
|
174
|
+
if (!value || typeof value !== "object") return false;
|
|
175
|
+
const WebSocketServer = Reflect.get(value, "WebSocketServer");
|
|
176
|
+
const WebSocket = Reflect.get(value, "WebSocket");
|
|
177
|
+
if (
|
|
178
|
+
typeof WebSocketServer !== "function" ||
|
|
179
|
+
typeof WebSocket !== "function"
|
|
180
|
+
) {
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
return (
|
|
184
|
+
typeof Reflect.get(WebSocket, "OPEN") === "number" &&
|
|
185
|
+
typeof Reflect.get(WebSocket, "CLOSED") === "number"
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
async function importWsModule(): Promise<WsModule> {
|
|
190
|
+
const mod: unknown = await import("ws");
|
|
191
|
+
if (!isWsModule(mod)) {
|
|
192
|
+
throw new Error("ws module did not expose WebSocketServer/WebSocket");
|
|
193
|
+
}
|
|
194
|
+
return mod;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
interface PendingLoad {
|
|
198
|
+
correlationId: string;
|
|
199
|
+
modelPath: string;
|
|
200
|
+
resolve: () => void;
|
|
201
|
+
reject: (err: Error) => void;
|
|
202
|
+
timeout: ReturnType<typeof setTimeout>;
|
|
203
|
+
routedDeviceId: string;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
interface PendingUnload {
|
|
207
|
+
correlationId: string;
|
|
208
|
+
resolve: () => void;
|
|
209
|
+
reject: (err: Error) => void;
|
|
210
|
+
timeout: ReturnType<typeof setTimeout>;
|
|
211
|
+
routedDeviceId: string;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
interface PendingGenerate {
|
|
215
|
+
correlationId: string;
|
|
216
|
+
resolve: (text: string) => void;
|
|
217
|
+
reject: (err: Error) => void;
|
|
218
|
+
timeout: ReturnType<typeof setTimeout>;
|
|
219
|
+
request: AgentOutbound;
|
|
220
|
+
/**
|
|
221
|
+
* Device the request was routed to most recently. On device disconnect
|
|
222
|
+
* this is cleared; the request sits orphaned until another device
|
|
223
|
+
* connects, at which point it's re-routed.
|
|
224
|
+
*/
|
|
225
|
+
routedDeviceId: string | null;
|
|
226
|
+
/** ISO timestamp captured on first submission; used to purge stale entries on restart. */
|
|
227
|
+
submittedAt: string;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
interface PendingEmbed {
|
|
231
|
+
correlationId: string;
|
|
232
|
+
resolve: (result: { embedding: number[]; tokens: number }) => void;
|
|
233
|
+
reject: (err: Error) => void;
|
|
234
|
+
timeout: ReturnType<typeof setTimeout>;
|
|
235
|
+
request: AgentOutbound;
|
|
236
|
+
/** Same disconnect semantics as PendingGenerate — null when orphaned. */
|
|
237
|
+
routedDeviceId: string | null;
|
|
238
|
+
/**
|
|
239
|
+
* ISO timestamp captured on first submission. Mirrors PendingGenerate
|
|
240
|
+
* for symmetry; embeds are NOT persisted to disk (they're short-lived
|
|
241
|
+
* and the caller's process holding the result promise has to be alive
|
|
242
|
+
* for the answer to mean anything), so this field is purely
|
|
243
|
+
* observational (status snapshots, debugging) today.
|
|
244
|
+
*/
|
|
245
|
+
submittedAt: string;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
interface ConnectedDevice {
|
|
249
|
+
deviceId: string;
|
|
250
|
+
socket: MinimalWebSocket;
|
|
251
|
+
capabilities: DeviceCapabilities;
|
|
252
|
+
loadedPath: string | null;
|
|
253
|
+
connectedAt: number;
|
|
254
|
+
lastHeartbeatAt: number;
|
|
255
|
+
heartbeatTimer: ReturnType<typeof setInterval>;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
export interface DeviceSummary {
|
|
259
|
+
deviceId: string;
|
|
260
|
+
capabilities: DeviceCapabilities;
|
|
261
|
+
loadedPath: string | null;
|
|
262
|
+
connectedSince: string;
|
|
263
|
+
score: number;
|
|
264
|
+
activeRequests: number;
|
|
265
|
+
isPrimary: boolean;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
export interface DeviceBridgeStatus {
|
|
269
|
+
/** True if any device is currently connected. */
|
|
270
|
+
connected: boolean;
|
|
271
|
+
devices: DeviceSummary[];
|
|
272
|
+
/** Device id of the current best-score device, or null when none. */
|
|
273
|
+
primaryDeviceId: string | null;
|
|
274
|
+
/** Total generates/loads/unloads queued (either in-flight or awaiting a device). */
|
|
275
|
+
pendingRequests: number;
|
|
276
|
+
// Legacy single-device fields — kept for UI backward compat. These mirror
|
|
277
|
+
// the primary device so old `DeviceBridgeStatusBar` code keeps working.
|
|
278
|
+
deviceId: string | null;
|
|
279
|
+
capabilities: DeviceCapabilities | null;
|
|
280
|
+
loadedPath: string | null;
|
|
281
|
+
connectedSince: string | null;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
interface PersistedGenerateRequest {
|
|
285
|
+
correlationId: string;
|
|
286
|
+
request: AgentOutbound;
|
|
287
|
+
submittedAt: string;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* One on-device generation's measured resource signal, emitted to
|
|
292
|
+
* `subscribeGenerationMetrics` listeners after every successful `generateResult`.
|
|
293
|
+
* The Mobile Resource Workbench folds these into a `DeviceResourceMetrics`
|
|
294
|
+
* accumulator (prefill/decode tok/s, TTFT, per-tier aggregation). All
|
|
295
|
+
* throughput fields are `null` when the device could not measure the inputs.
|
|
296
|
+
*/
|
|
297
|
+
export interface DeviceGenerationMetrics {
|
|
298
|
+
deviceId: string;
|
|
299
|
+
platform: DeviceCapabilities["platform"] | null;
|
|
300
|
+
/** Device model identifier (e.g. `iPhone17,2`) for per-device baselines. */
|
|
301
|
+
deviceModel: string | null;
|
|
302
|
+
promptTokens: number;
|
|
303
|
+
outputTokens: number;
|
|
304
|
+
durationMs: number;
|
|
305
|
+
ttftMs: number | null;
|
|
306
|
+
throughput: GenerationThroughput;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Scoring function — pick the most powerful device available.
|
|
311
|
+
* Pure, synchronous, and easy to test.
|
|
312
|
+
*/
|
|
313
|
+
function scoreDevice(
|
|
314
|
+
device: ConnectedDevice,
|
|
315
|
+
opts: { preferLoadedPath?: string } = {},
|
|
316
|
+
): number {
|
|
317
|
+
const cap = device.capabilities;
|
|
318
|
+
const platformBase =
|
|
319
|
+
cap.platform === "desktop" || cap.platform === "electrobun"
|
|
320
|
+
? 100
|
|
321
|
+
: cap.platform === "ios" || cap.platform === "android"
|
|
322
|
+
? 10
|
|
323
|
+
: 0;
|
|
324
|
+
const usableRamGb =
|
|
325
|
+
typeof cap.availableRamGb === "number" && cap.availableRamGb > 0
|
|
326
|
+
? Math.min(
|
|
327
|
+
cap.totalRamGb,
|
|
328
|
+
Math.max(cap.availableRamGb, cap.totalRamGb * 0.6),
|
|
329
|
+
)
|
|
330
|
+
: cap.totalRamGb;
|
|
331
|
+
const ramScore = usableRamGb * 2;
|
|
332
|
+
const vramScore = cap.gpu?.available
|
|
333
|
+
? (cap.gpu.totalVramGb ?? cap.totalRamGb) * 5
|
|
334
|
+
: 0;
|
|
335
|
+
const healthPenalty =
|
|
336
|
+
cap.lowPowerMode || cap.thermalState === "serious"
|
|
337
|
+
? 15
|
|
338
|
+
: cap.thermalState === "critical"
|
|
339
|
+
? 100
|
|
340
|
+
: 0;
|
|
341
|
+
const loadedBonus =
|
|
342
|
+
opts.preferLoadedPath && device.loadedPath === opts.preferLoadedPath
|
|
343
|
+
? 50
|
|
344
|
+
: 0;
|
|
345
|
+
return platformBase + ramScore + vramScore + loadedBonus - healthPenalty;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
export class DeviceBridge {
|
|
349
|
+
private readonly devices = new Map<string, ConnectedDevice>();
|
|
350
|
+
private wss: WssInstance | null = null;
|
|
351
|
+
private restored = false;
|
|
352
|
+
|
|
353
|
+
private readonly pendingLoads = new Map<string, PendingLoad>();
|
|
354
|
+
private readonly pendingUnloads = new Map<string, PendingUnload>();
|
|
355
|
+
private readonly pendingGenerates = new Map<string, PendingGenerate>();
|
|
356
|
+
private readonly pendingEmbeds = new Map<string, PendingEmbed>();
|
|
357
|
+
|
|
358
|
+
private readonly statusListeners = new Set<
|
|
359
|
+
(status: DeviceBridgeStatus) => void
|
|
360
|
+
>();
|
|
361
|
+
|
|
362
|
+
private readonly generationMetricsListeners = new Set<
|
|
363
|
+
(metrics: DeviceGenerationMetrics) => void
|
|
364
|
+
>();
|
|
365
|
+
|
|
366
|
+
/** The most recent successful generation's metrics, or null. */
|
|
367
|
+
private lastGenerationMetrics: DeviceGenerationMetrics | null = null;
|
|
368
|
+
|
|
369
|
+
/** Bounded ring buffer of recent generation metrics for the dev endpoint. */
|
|
370
|
+
private readonly recentGenerations: DeviceGenerationMetrics[] = [];
|
|
371
|
+
private static readonly RECENT_GENERATIONS_CAP = 200;
|
|
372
|
+
|
|
373
|
+
private readonly expectedPairingToken: string | null =
|
|
374
|
+
process.env.ELIZA_DEVICE_PAIRING_TOKEN?.trim() || null;
|
|
375
|
+
|
|
376
|
+
status(): DeviceBridgeStatus {
|
|
377
|
+
const summaries: DeviceSummary[] = [];
|
|
378
|
+
for (const device of this.devices.values()) {
|
|
379
|
+
const score = scoreDevice(device);
|
|
380
|
+
const activeRequests =
|
|
381
|
+
this.countRouted(this.pendingGenerates, device.deviceId) +
|
|
382
|
+
this.countRouted(this.pendingEmbeds, device.deviceId) +
|
|
383
|
+
this.countRouted(this.pendingLoads, device.deviceId) +
|
|
384
|
+
this.countRouted(this.pendingUnloads, device.deviceId);
|
|
385
|
+
summaries.push({
|
|
386
|
+
deviceId: device.deviceId,
|
|
387
|
+
capabilities: device.capabilities,
|
|
388
|
+
loadedPath: device.loadedPath,
|
|
389
|
+
connectedSince: new Date(device.connectedAt).toISOString(),
|
|
390
|
+
score,
|
|
391
|
+
activeRequests,
|
|
392
|
+
isPrimary: false,
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
// Sort desc by score so the UI can just render in order.
|
|
396
|
+
summaries.sort((a, b) => b.score - a.score);
|
|
397
|
+
if (summaries[0]) summaries[0].isPrimary = true;
|
|
398
|
+
|
|
399
|
+
const primary = summaries[0] ?? null;
|
|
400
|
+
const pendingRequests =
|
|
401
|
+
this.pendingGenerates.size +
|
|
402
|
+
this.pendingEmbeds.size +
|
|
403
|
+
this.pendingLoads.size +
|
|
404
|
+
this.pendingUnloads.size;
|
|
405
|
+
|
|
406
|
+
return {
|
|
407
|
+
connected: summaries.length > 0,
|
|
408
|
+
devices: summaries,
|
|
409
|
+
primaryDeviceId: primary?.deviceId,
|
|
410
|
+
pendingRequests,
|
|
411
|
+
deviceId: primary?.deviceId,
|
|
412
|
+
capabilities: primary?.capabilities,
|
|
413
|
+
loadedPath: primary?.loadedPath ?? null,
|
|
414
|
+
connectedSince: primary?.connectedSince,
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
private countRouted<T extends { routedDeviceId: string | null }>(
|
|
419
|
+
map: Map<string, T>,
|
|
420
|
+
deviceId: string,
|
|
421
|
+
): number {
|
|
422
|
+
let n = 0;
|
|
423
|
+
for (const value of map.values()) {
|
|
424
|
+
if (value.routedDeviceId === deviceId) n += 1;
|
|
425
|
+
}
|
|
426
|
+
return n;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
subscribeStatus(listener: (status: DeviceBridgeStatus) => void): () => void {
|
|
430
|
+
this.statusListeners.add(listener);
|
|
431
|
+
return () => {
|
|
432
|
+
this.statusListeners.delete(listener);
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
private emitStatus(): void {
|
|
437
|
+
const snapshot = this.status();
|
|
438
|
+
for (const listener of this.statusListeners) {
|
|
439
|
+
try {
|
|
440
|
+
listener(snapshot);
|
|
441
|
+
} catch {
|
|
442
|
+
this.statusListeners.delete(listener);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Subscribe to per-generation throughput metrics. Fires once per successful
|
|
449
|
+
* on-device generation with the differenced prefill/decode tok/s. Returns an
|
|
450
|
+
* unsubscribe function.
|
|
451
|
+
*/
|
|
452
|
+
subscribeGenerationMetrics(
|
|
453
|
+
listener: (metrics: DeviceGenerationMetrics) => void,
|
|
454
|
+
): () => void {
|
|
455
|
+
this.generationMetricsListeners.add(listener);
|
|
456
|
+
return () => {
|
|
457
|
+
this.generationMetricsListeners.delete(listener);
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/** The most recent successful generation's measured metrics, or null. */
|
|
462
|
+
latestGenerationMetrics(): DeviceGenerationMetrics | null {
|
|
463
|
+
return this.lastGenerationMetrics;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/** Most recent generation metrics (newest last), capped at `limit`. */
|
|
467
|
+
recentGenerationMetrics(limit = 50): DeviceGenerationMetrics[] {
|
|
468
|
+
const n = Math.max(0, Math.trunc(limit));
|
|
469
|
+
return this.recentGenerations.slice(-n);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
private emitGenerationMetrics(metrics: DeviceGenerationMetrics): void {
|
|
473
|
+
this.lastGenerationMetrics = metrics;
|
|
474
|
+
this.recentGenerations.push(metrics);
|
|
475
|
+
if (this.recentGenerations.length > DeviceBridge.RECENT_GENERATIONS_CAP) {
|
|
476
|
+
this.recentGenerations.shift();
|
|
477
|
+
}
|
|
478
|
+
for (const listener of this.generationMetricsListeners) {
|
|
479
|
+
try {
|
|
480
|
+
listener(metrics);
|
|
481
|
+
} catch {
|
|
482
|
+
this.generationMetricsListeners.delete(listener);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
async attachToHttpServer(server: HttpServer): Promise<void> {
|
|
488
|
+
if (this.wss) return;
|
|
489
|
+
const ws = await importWsModule();
|
|
490
|
+
const wss = new ws.WebSocketServer({
|
|
491
|
+
noServer: true,
|
|
492
|
+
maxPayload: 1024 * 1024,
|
|
493
|
+
});
|
|
494
|
+
this.wss = wss;
|
|
495
|
+
|
|
496
|
+
wss.on("error", (err) => {
|
|
497
|
+
logger.warn("[device-bridge] WSS error:", err.message);
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
server.on("upgrade", (request, socket, head) => {
|
|
501
|
+
const url = new URL(request.url ?? "/", "http://localhost");
|
|
502
|
+
if (url.pathname !== "/api/local-inference/device-bridge") return;
|
|
503
|
+
wss.handleUpgrade(request, socket, head, (client) => {
|
|
504
|
+
this.handleConnection(client, ws.WebSocket, url);
|
|
505
|
+
});
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
// Restore persisted pending generates the first time a server attaches.
|
|
509
|
+
// We only restore once per process — avoids double-resubmit on repeated
|
|
510
|
+
// server restarts inside the same worker.
|
|
511
|
+
if (!this.restored) {
|
|
512
|
+
this.restored = true;
|
|
513
|
+
await this.restorePendingGenerates();
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
private handleConnection(
|
|
518
|
+
socket: MinimalWebSocket,
|
|
519
|
+
WsCtor: WsConstructor,
|
|
520
|
+
url: URL,
|
|
521
|
+
): void {
|
|
522
|
+
const queryToken = url.searchParams.get("token")?.trim();
|
|
523
|
+
if (this.expectedPairingToken && queryToken !== this.expectedPairingToken) {
|
|
524
|
+
logger.warn("[device-bridge] Rejecting connection: bad query token");
|
|
525
|
+
socket.close(4001, "unauthorized");
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
let registered = false;
|
|
530
|
+
let registeredDeviceId: string | null = null;
|
|
531
|
+
|
|
532
|
+
socket.on("message", (raw) => {
|
|
533
|
+
let msg: DeviceOutbound;
|
|
534
|
+
try {
|
|
535
|
+
const text = typeof raw === "string" ? raw : raw.toString("utf8");
|
|
536
|
+
msg = JSON.parse(text) as DeviceOutbound;
|
|
537
|
+
} catch {
|
|
538
|
+
logger.warn("[device-bridge] Ignoring non-JSON frame");
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
if (!registered) {
|
|
543
|
+
if (msg.type !== "register") {
|
|
544
|
+
logger.warn("[device-bridge] First frame must be register");
|
|
545
|
+
socket.close(4002, "must-register-first");
|
|
546
|
+
return;
|
|
547
|
+
}
|
|
548
|
+
if (
|
|
549
|
+
this.expectedPairingToken &&
|
|
550
|
+
msg.payload.pairingToken !== this.expectedPairingToken
|
|
551
|
+
) {
|
|
552
|
+
logger.warn("[device-bridge] Rejecting register: bad pairing token");
|
|
553
|
+
socket.close(4001, "unauthorized");
|
|
554
|
+
return;
|
|
555
|
+
}
|
|
556
|
+
registered = true;
|
|
557
|
+
registeredDeviceId = msg.payload.deviceId;
|
|
558
|
+
this.onDeviceRegistered(socket, WsCtor, msg.payload);
|
|
559
|
+
return;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
this.handleDeviceMessage(msg);
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
socket.on("close", () => {
|
|
566
|
+
if (!registered || !registeredDeviceId) return;
|
|
567
|
+
// Only evict if THIS socket is still the current one for the
|
|
568
|
+
// deviceId. When a newer connection supersedes us, its registration
|
|
569
|
+
// already replaced the map entry; the delayed close event from our
|
|
570
|
+
// superseded socket must not tear that down.
|
|
571
|
+
const current = this.devices.get(registeredDeviceId);
|
|
572
|
+
if (current && current.socket === socket) {
|
|
573
|
+
this.onDeviceDisconnected(registeredDeviceId);
|
|
574
|
+
}
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
socket.on("error", (err) => {
|
|
578
|
+
logger.warn("[device-bridge] Socket error:", err.message);
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
private onDeviceRegistered(
|
|
583
|
+
socket: MinimalWebSocket,
|
|
584
|
+
WsCtor: WsConstructor,
|
|
585
|
+
registration: DeviceRegistration,
|
|
586
|
+
): void {
|
|
587
|
+
// Supersede any existing connection under the same deviceId.
|
|
588
|
+
const existing = this.devices.get(registration.deviceId);
|
|
589
|
+
if (existing) {
|
|
590
|
+
try {
|
|
591
|
+
existing.socket.close(4003, "superseded");
|
|
592
|
+
} catch {
|
|
593
|
+
/* best effort */
|
|
594
|
+
}
|
|
595
|
+
clearInterval(existing.heartbeatTimer);
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
const device: ConnectedDevice = {
|
|
599
|
+
deviceId: registration.deviceId,
|
|
600
|
+
socket,
|
|
601
|
+
capabilities: registration.capabilities,
|
|
602
|
+
loadedPath: registration.loadedPath,
|
|
603
|
+
connectedAt: Date.now(),
|
|
604
|
+
lastHeartbeatAt: Date.now(),
|
|
605
|
+
heartbeatTimer: setInterval(() => {
|
|
606
|
+
if (socket.readyState !== WsCtor.OPEN) return;
|
|
607
|
+
try {
|
|
608
|
+
this.sendToDevice(device.deviceId, { type: "ping", at: Date.now() });
|
|
609
|
+
} catch {
|
|
610
|
+
/* ignore after close */
|
|
611
|
+
}
|
|
612
|
+
}, HEARTBEAT_INTERVAL_MS),
|
|
613
|
+
};
|
|
614
|
+
if (
|
|
615
|
+
typeof device.heartbeatTimer === "object" &&
|
|
616
|
+
device.heartbeatTimer &&
|
|
617
|
+
"unref" in device.heartbeatTimer
|
|
618
|
+
) {
|
|
619
|
+
(device.heartbeatTimer as { unref(): void }).unref();
|
|
620
|
+
}
|
|
621
|
+
this.devices.set(device.deviceId, device);
|
|
622
|
+
|
|
623
|
+
logger.info(
|
|
624
|
+
`[device-bridge] Device connected: ${device.deviceId} (${device.capabilities.platform}, score=${scoreDevice(device)})`,
|
|
625
|
+
);
|
|
626
|
+
|
|
627
|
+
// Re-route any orphaned generates (the ones whose prior routed device
|
|
628
|
+
// disconnected). Load/unload orphans reject — device-specific state.
|
|
629
|
+
for (const pending of this.pendingLoads.values()) {
|
|
630
|
+
if (pending.routedDeviceId === device.deviceId) continue;
|
|
631
|
+
if (!this.devices.has(pending.routedDeviceId)) {
|
|
632
|
+
clearTimeout(pending.timeout);
|
|
633
|
+
this.pendingLoads.delete(pending.correlationId);
|
|
634
|
+
pending.reject(
|
|
635
|
+
new Error("DEVICE_RECONNECTED: retry model load after reconnect"),
|
|
636
|
+
);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
for (const pending of this.pendingUnloads.values()) {
|
|
640
|
+
if (!this.devices.has(pending.routedDeviceId)) {
|
|
641
|
+
clearTimeout(pending.timeout);
|
|
642
|
+
this.pendingUnloads.delete(pending.correlationId);
|
|
643
|
+
pending.reject(
|
|
644
|
+
new Error("DEVICE_RECONNECTED: retry model unload after reconnect"),
|
|
645
|
+
);
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
for (const pending of this.pendingGenerates.values()) {
|
|
650
|
+
if (pending.routedDeviceId === null) {
|
|
651
|
+
const best = this.pickBestDevice();
|
|
652
|
+
if (best) {
|
|
653
|
+
pending.routedDeviceId = best.deviceId;
|
|
654
|
+
try {
|
|
655
|
+
this.sendToDevice(best.deviceId, pending.request);
|
|
656
|
+
} catch (err) {
|
|
657
|
+
pending.reject(
|
|
658
|
+
err instanceof Error
|
|
659
|
+
? err
|
|
660
|
+
: new Error("Failed to re-route after reconnect"),
|
|
661
|
+
);
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
// Same re-route logic for orphaned embeds. Embeds are short-lived and
|
|
668
|
+
// idempotent (the device just runs llama_get_embeddings), so we can
|
|
669
|
+
// safely retarget them on reconnect.
|
|
670
|
+
for (const pending of this.pendingEmbeds.values()) {
|
|
671
|
+
if (pending.routedDeviceId === null) {
|
|
672
|
+
const best = this.pickBestDevice();
|
|
673
|
+
if (best) {
|
|
674
|
+
pending.routedDeviceId = best.deviceId;
|
|
675
|
+
try {
|
|
676
|
+
this.sendToDevice(best.deviceId, pending.request);
|
|
677
|
+
} catch (err) {
|
|
678
|
+
pending.reject(
|
|
679
|
+
err instanceof Error
|
|
680
|
+
? err
|
|
681
|
+
: new Error("Failed to re-route after reconnect"),
|
|
682
|
+
);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
this.emitStatus();
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
private onDeviceDisconnected(deviceId: string): void {
|
|
692
|
+
const device = this.devices.get(deviceId);
|
|
693
|
+
if (!device) return;
|
|
694
|
+
clearInterval(device.heartbeatTimer);
|
|
695
|
+
this.devices.delete(deviceId);
|
|
696
|
+
|
|
697
|
+
// Orphan any generates / embeds routed to this device so they can be
|
|
698
|
+
// re-routed to a surviving device (or await a reconnect).
|
|
699
|
+
let orphaned = 0;
|
|
700
|
+
for (const pending of this.pendingGenerates.values()) {
|
|
701
|
+
if (pending.routedDeviceId === deviceId) {
|
|
702
|
+
pending.routedDeviceId = null;
|
|
703
|
+
orphaned += 1;
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
for (const pending of this.pendingEmbeds.values()) {
|
|
707
|
+
if (pending.routedDeviceId === deviceId) {
|
|
708
|
+
pending.routedDeviceId = null;
|
|
709
|
+
orphaned += 1;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
logger.info(
|
|
714
|
+
`[device-bridge] Device disconnected: ${deviceId}; ${orphaned} request(s) orphaned`,
|
|
715
|
+
);
|
|
716
|
+
|
|
717
|
+
// Fast-path: if there are other connected devices, re-route now.
|
|
718
|
+
if (this.devices.size > 0) {
|
|
719
|
+
for (const pending of this.pendingGenerates.values()) {
|
|
720
|
+
if (pending.routedDeviceId === null) {
|
|
721
|
+
const best = this.pickBestDevice();
|
|
722
|
+
if (best) {
|
|
723
|
+
pending.routedDeviceId = best.deviceId;
|
|
724
|
+
try {
|
|
725
|
+
this.sendToDevice(best.deviceId, pending.request);
|
|
726
|
+
} catch {
|
|
727
|
+
/* will be retried on the next reconnect */
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
for (const pending of this.pendingEmbeds.values()) {
|
|
733
|
+
if (pending.routedDeviceId === null) {
|
|
734
|
+
const best = this.pickBestDevice();
|
|
735
|
+
if (best) {
|
|
736
|
+
pending.routedDeviceId = best.deviceId;
|
|
737
|
+
try {
|
|
738
|
+
this.sendToDevice(best.deviceId, pending.request);
|
|
739
|
+
} catch {
|
|
740
|
+
/* will be retried on the next reconnect */
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
this.emitStatus();
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
private handleDeviceMessage(msg: DeviceOutbound): void {
|
|
751
|
+
if (msg.type === "pong") {
|
|
752
|
+
// Heartbeat round-trip — could update lastHeartbeatAt per device, but
|
|
753
|
+
// we don't currently use it for eviction.
|
|
754
|
+
return;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
if (msg.type === "loadResult") {
|
|
758
|
+
const pending = this.pendingLoads.get(msg.correlationId);
|
|
759
|
+
if (!pending) return;
|
|
760
|
+
clearTimeout(pending.timeout);
|
|
761
|
+
this.pendingLoads.delete(msg.correlationId);
|
|
762
|
+
if (msg.ok === false) {
|
|
763
|
+
pending.reject(new Error(msg.error));
|
|
764
|
+
} else {
|
|
765
|
+
const device = this.devices.get(pending.routedDeviceId);
|
|
766
|
+
if (device) device.loadedPath = msg.loadedPath;
|
|
767
|
+
pending.resolve();
|
|
768
|
+
this.emitStatus();
|
|
769
|
+
}
|
|
770
|
+
return;
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
if (msg.type === "unloadResult") {
|
|
774
|
+
const pending = this.pendingUnloads.get(msg.correlationId);
|
|
775
|
+
if (!pending) return;
|
|
776
|
+
clearTimeout(pending.timeout);
|
|
777
|
+
this.pendingUnloads.delete(msg.correlationId);
|
|
778
|
+
if (msg.ok === false) {
|
|
779
|
+
pending.reject(new Error(msg.error));
|
|
780
|
+
} else {
|
|
781
|
+
const device = this.devices.get(pending.routedDeviceId);
|
|
782
|
+
if (device) device.loadedPath = null;
|
|
783
|
+
pending.resolve();
|
|
784
|
+
this.emitStatus();
|
|
785
|
+
}
|
|
786
|
+
return;
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
if (msg.type === "generateResult") {
|
|
790
|
+
const pending = this.pendingGenerates.get(msg.correlationId);
|
|
791
|
+
if (!pending) return;
|
|
792
|
+
clearTimeout(pending.timeout);
|
|
793
|
+
this.pendingGenerates.delete(msg.correlationId);
|
|
794
|
+
// Best-effort purge the persisted copy.
|
|
795
|
+
void this.persistPendingGenerates();
|
|
796
|
+
if (msg.ok === false) {
|
|
797
|
+
pending.reject(new Error(msg.error));
|
|
798
|
+
} else {
|
|
799
|
+
// Difference the raw counters into prefill/decode tok/s and surface
|
|
800
|
+
// them to profiling subscribers. The loader contract is unchanged —
|
|
801
|
+
// callers still get the text; metrics are a side channel.
|
|
802
|
+
const ttftMs = typeof msg.ttftMs === "number" ? msg.ttftMs : null;
|
|
803
|
+
const throughput = computeGenerationThroughput({
|
|
804
|
+
promptTokens: msg.promptTokens,
|
|
805
|
+
outputTokens: msg.outputTokens,
|
|
806
|
+
durationMs: msg.durationMs,
|
|
807
|
+
ttftMs,
|
|
808
|
+
});
|
|
809
|
+
const device = pending.routedDeviceId
|
|
810
|
+
? this.devices.get(pending.routedDeviceId)
|
|
811
|
+
: null;
|
|
812
|
+
this.emitGenerationMetrics({
|
|
813
|
+
deviceId: pending.routedDeviceId ?? "unknown",
|
|
814
|
+
platform: device?.capabilities.platform ?? null,
|
|
815
|
+
deviceModel: device?.capabilities.deviceModel ?? null,
|
|
816
|
+
promptTokens: msg.promptTokens,
|
|
817
|
+
outputTokens: msg.outputTokens,
|
|
818
|
+
durationMs: msg.durationMs,
|
|
819
|
+
ttftMs,
|
|
820
|
+
throughput,
|
|
821
|
+
});
|
|
822
|
+
pending.resolve(msg.text);
|
|
823
|
+
}
|
|
824
|
+
return;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
if (msg.type === "embedResult") {
|
|
828
|
+
const pending = this.pendingEmbeds.get(msg.correlationId);
|
|
829
|
+
if (!pending) return;
|
|
830
|
+
clearTimeout(pending.timeout);
|
|
831
|
+
this.pendingEmbeds.delete(msg.correlationId);
|
|
832
|
+
if (msg.ok === false) {
|
|
833
|
+
pending.reject(new Error(msg.error));
|
|
834
|
+
} else {
|
|
835
|
+
pending.resolve({ embedding: msg.embedding, tokens: msg.tokens });
|
|
836
|
+
}
|
|
837
|
+
return;
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
private sendToDevice(deviceId: string, msg: AgentOutbound): void {
|
|
842
|
+
const device = this.devices.get(deviceId);
|
|
843
|
+
if (!device) throw new Error(`DEVICE_DISCONNECTED: ${deviceId}`);
|
|
844
|
+
device.socket.send(JSON.stringify(msg));
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
/** Highest-scoring connected device, optionally boosted for an already-loaded model. */
|
|
848
|
+
private pickBestDevice(opts?: {
|
|
849
|
+
preferLoadedPath?: string;
|
|
850
|
+
}): ConnectedDevice | null {
|
|
851
|
+
let best: ConnectedDevice | null = null;
|
|
852
|
+
let bestScore = -Infinity;
|
|
853
|
+
for (const device of this.devices.values()) {
|
|
854
|
+
const score = scoreDevice(device, opts);
|
|
855
|
+
if (score > bestScore) {
|
|
856
|
+
best = device;
|
|
857
|
+
bestScore = score;
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
return best;
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
// ── LocalInferenceLoader surface ──────────────────────────────────────
|
|
864
|
+
|
|
865
|
+
async loadModel(args: LocalInferenceLoadArgs): Promise<void> {
|
|
866
|
+
const best = this.pickBestDevice({ preferLoadedPath: args.modelPath });
|
|
867
|
+
if (!best) {
|
|
868
|
+
throw new Error(
|
|
869
|
+
"DEVICE_DISCONNECTED: no mobile / desktop bridge device attached",
|
|
870
|
+
);
|
|
871
|
+
}
|
|
872
|
+
const correlationId = randomUUID();
|
|
873
|
+
return new Promise<void>((resolve, reject) => {
|
|
874
|
+
const timeout = setTimeout(() => {
|
|
875
|
+
this.pendingLoads.delete(correlationId);
|
|
876
|
+
reject(new Error("DEVICE_TIMEOUT: model load exceeded deadline"));
|
|
877
|
+
}, DEFAULT_LOAD_TIMEOUT_MS);
|
|
878
|
+
if (typeof timeout === "object" && timeout && "unref" in timeout) {
|
|
879
|
+
(timeout as { unref(): void }).unref();
|
|
880
|
+
}
|
|
881
|
+
this.pendingLoads.set(correlationId, {
|
|
882
|
+
correlationId,
|
|
883
|
+
modelPath: args.modelPath,
|
|
884
|
+
resolve,
|
|
885
|
+
reject,
|
|
886
|
+
timeout,
|
|
887
|
+
routedDeviceId: best.deviceId,
|
|
888
|
+
});
|
|
889
|
+
try {
|
|
890
|
+
this.sendToDevice(best.deviceId, {
|
|
891
|
+
type: "load",
|
|
892
|
+
correlationId,
|
|
893
|
+
...args,
|
|
894
|
+
});
|
|
895
|
+
} catch (err) {
|
|
896
|
+
clearTimeout(timeout);
|
|
897
|
+
this.pendingLoads.delete(correlationId);
|
|
898
|
+
reject(err instanceof Error ? err : new Error(String(err)));
|
|
899
|
+
}
|
|
900
|
+
});
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
async unloadModel(): Promise<void> {
|
|
904
|
+
// Unload every device that currently has a model loaded. Best-effort —
|
|
905
|
+
// individual failures don't block the others.
|
|
906
|
+
const targets = [...this.devices.values()].filter((d) => d.loadedPath);
|
|
907
|
+
if (targets.length === 0) return;
|
|
908
|
+
await Promise.allSettled(
|
|
909
|
+
targets.map(
|
|
910
|
+
(device) =>
|
|
911
|
+
new Promise<void>((resolve, reject) => {
|
|
912
|
+
const correlationId = randomUUID();
|
|
913
|
+
const timeout = setTimeout(() => {
|
|
914
|
+
this.pendingUnloads.delete(correlationId);
|
|
915
|
+
reject(new Error("DEVICE_TIMEOUT: unload exceeded deadline"));
|
|
916
|
+
}, DEFAULT_CALL_TIMEOUT_MS);
|
|
917
|
+
if (typeof timeout === "object" && timeout && "unref" in timeout) {
|
|
918
|
+
(timeout as { unref(): void }).unref();
|
|
919
|
+
}
|
|
920
|
+
this.pendingUnloads.set(correlationId, {
|
|
921
|
+
correlationId,
|
|
922
|
+
resolve,
|
|
923
|
+
reject,
|
|
924
|
+
timeout,
|
|
925
|
+
routedDeviceId: device.deviceId,
|
|
926
|
+
});
|
|
927
|
+
try {
|
|
928
|
+
this.sendToDevice(device.deviceId, {
|
|
929
|
+
type: "unload",
|
|
930
|
+
correlationId,
|
|
931
|
+
});
|
|
932
|
+
} catch (err) {
|
|
933
|
+
clearTimeout(timeout);
|
|
934
|
+
this.pendingUnloads.delete(correlationId);
|
|
935
|
+
reject(err instanceof Error ? err : new Error(String(err)));
|
|
936
|
+
}
|
|
937
|
+
}),
|
|
938
|
+
),
|
|
939
|
+
);
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
currentModelPath(): string | null {
|
|
943
|
+
// The primary device's loaded path wins — consistent with which device
|
|
944
|
+
// would actually run the next generate.
|
|
945
|
+
const best = this.pickBestDevice();
|
|
946
|
+
return best?.loadedPath ?? null;
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
async embed(args: {
|
|
950
|
+
input: string;
|
|
951
|
+
}): Promise<{ embedding: number[]; tokens: number }> {
|
|
952
|
+
const envTimeout = Number.parseInt(
|
|
953
|
+
process.env.ELIZA_DEVICE_GENERATE_TIMEOUT_MS?.trim() ?? "",
|
|
954
|
+
10,
|
|
955
|
+
);
|
|
956
|
+
const timeoutMs =
|
|
957
|
+
Number.isFinite(envTimeout) && envTimeout > 0
|
|
958
|
+
? envTimeout
|
|
959
|
+
: DEFAULT_CALL_TIMEOUT_MS;
|
|
960
|
+
|
|
961
|
+
const correlationId = randomUUID();
|
|
962
|
+
const request: AgentOutbound = {
|
|
963
|
+
type: "embed",
|
|
964
|
+
correlationId,
|
|
965
|
+
input: args.input,
|
|
966
|
+
};
|
|
967
|
+
|
|
968
|
+
const best = this.pickBestDevice();
|
|
969
|
+
|
|
970
|
+
return new Promise<{ embedding: number[]; tokens: number }>(
|
|
971
|
+
(resolve, reject) => {
|
|
972
|
+
const timeout = setTimeout(() => {
|
|
973
|
+
this.pendingEmbeds.delete(correlationId);
|
|
974
|
+
reject(
|
|
975
|
+
new Error(
|
|
976
|
+
`DEVICE_TIMEOUT: no device responded to embed within ${timeoutMs}ms`,
|
|
977
|
+
),
|
|
978
|
+
);
|
|
979
|
+
}, timeoutMs);
|
|
980
|
+
if (typeof timeout === "object" && timeout && "unref" in timeout) {
|
|
981
|
+
(timeout as { unref(): void }).unref();
|
|
982
|
+
}
|
|
983
|
+
const pending: PendingEmbed = {
|
|
984
|
+
correlationId,
|
|
985
|
+
resolve,
|
|
986
|
+
reject,
|
|
987
|
+
timeout,
|
|
988
|
+
request,
|
|
989
|
+
routedDeviceId: best?.deviceId ?? null,
|
|
990
|
+
submittedAt: new Date().toISOString(),
|
|
991
|
+
};
|
|
992
|
+
this.pendingEmbeds.set(correlationId, pending);
|
|
993
|
+
|
|
994
|
+
if (best) {
|
|
995
|
+
try {
|
|
996
|
+
this.sendToDevice(best.deviceId, request);
|
|
997
|
+
} catch {
|
|
998
|
+
// Routed device went away between pickBestDevice and send.
|
|
999
|
+
// Mark as orphaned; reroute logic will pick it up on the next
|
|
1000
|
+
// device (re)connect.
|
|
1001
|
+
pending.routedDeviceId = null;
|
|
1002
|
+
}
|
|
1003
|
+
} else {
|
|
1004
|
+
logger.debug(
|
|
1005
|
+
`[device-bridge] No device available; parking embed ${correlationId} pending connection`,
|
|
1006
|
+
);
|
|
1007
|
+
}
|
|
1008
|
+
},
|
|
1009
|
+
);
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
async generate(args: {
|
|
1013
|
+
prompt: string;
|
|
1014
|
+
stopSequences?: string[];
|
|
1015
|
+
maxTokens?: number;
|
|
1016
|
+
temperature?: number;
|
|
1017
|
+
cacheKey?: string;
|
|
1018
|
+
}): Promise<string> {
|
|
1019
|
+
const envTimeout = Number.parseInt(
|
|
1020
|
+
process.env.ELIZA_DEVICE_GENERATE_TIMEOUT_MS?.trim() ?? "",
|
|
1021
|
+
10,
|
|
1022
|
+
);
|
|
1023
|
+
const timeoutMs =
|
|
1024
|
+
Number.isFinite(envTimeout) && envTimeout > 0
|
|
1025
|
+
? envTimeout
|
|
1026
|
+
: DEFAULT_CALL_TIMEOUT_MS;
|
|
1027
|
+
|
|
1028
|
+
const correlationId = randomUUID();
|
|
1029
|
+
const request: AgentOutbound = {
|
|
1030
|
+
type: "generate",
|
|
1031
|
+
correlationId,
|
|
1032
|
+
prompt: args.prompt,
|
|
1033
|
+
stopSequences: args.stopSequences,
|
|
1034
|
+
maxTokens: args.maxTokens,
|
|
1035
|
+
temperature: args.temperature,
|
|
1036
|
+
cacheKey: args.cacheKey,
|
|
1037
|
+
};
|
|
1038
|
+
|
|
1039
|
+
const best = this.pickBestDevice();
|
|
1040
|
+
|
|
1041
|
+
return new Promise<string>((resolve, reject) => {
|
|
1042
|
+
const timeout = setTimeout(() => {
|
|
1043
|
+
this.pendingGenerates.delete(correlationId);
|
|
1044
|
+
void this.persistPendingGenerates();
|
|
1045
|
+
reject(
|
|
1046
|
+
new Error(
|
|
1047
|
+
`DEVICE_TIMEOUT: no device responded within ${timeoutMs}ms`,
|
|
1048
|
+
),
|
|
1049
|
+
);
|
|
1050
|
+
}, timeoutMs);
|
|
1051
|
+
if (typeof timeout === "object" && timeout && "unref" in timeout) {
|
|
1052
|
+
(timeout as { unref(): void }).unref();
|
|
1053
|
+
}
|
|
1054
|
+
const pending: PendingGenerate = {
|
|
1055
|
+
correlationId,
|
|
1056
|
+
resolve,
|
|
1057
|
+
reject,
|
|
1058
|
+
timeout,
|
|
1059
|
+
request,
|
|
1060
|
+
routedDeviceId: best?.deviceId ?? null,
|
|
1061
|
+
submittedAt: new Date().toISOString(),
|
|
1062
|
+
};
|
|
1063
|
+
this.pendingGenerates.set(correlationId, pending);
|
|
1064
|
+
void this.persistPendingGenerates();
|
|
1065
|
+
|
|
1066
|
+
if (best) {
|
|
1067
|
+
try {
|
|
1068
|
+
this.sendToDevice(best.deviceId, request);
|
|
1069
|
+
} catch {
|
|
1070
|
+
pending.routedDeviceId = null;
|
|
1071
|
+
}
|
|
1072
|
+
} else {
|
|
1073
|
+
logger.debug(
|
|
1074
|
+
`[device-bridge] No device available; parking generate ${correlationId} pending connection`,
|
|
1075
|
+
);
|
|
1076
|
+
}
|
|
1077
|
+
});
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
// ── Durability ────────────────────────────────────────────────────────
|
|
1081
|
+
|
|
1082
|
+
private pendingLogPath(): string {
|
|
1083
|
+
return path.join(localInferenceRoot(), PENDING_LOG_FILENAME);
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
/**
|
|
1087
|
+
* Rewrite the pending-generate log. Called after every mutation to the
|
|
1088
|
+
* pendingGenerates map. We only persist `generate` — loads/unloads are
|
|
1089
|
+
* bound to a specific device's current state and aren't safely replayable
|
|
1090
|
+
* across restart.
|
|
1091
|
+
*/
|
|
1092
|
+
private async persistPendingGenerates(): Promise<void> {
|
|
1093
|
+
try {
|
|
1094
|
+
await fs.mkdir(localInferenceRoot(), { recursive: true });
|
|
1095
|
+
const payload: PersistedGenerateRequest[] = [
|
|
1096
|
+
...this.pendingGenerates.values(),
|
|
1097
|
+
].map((p) => ({
|
|
1098
|
+
correlationId: p.correlationId,
|
|
1099
|
+
request: p.request,
|
|
1100
|
+
submittedAt: p.submittedAt,
|
|
1101
|
+
}));
|
|
1102
|
+
const tmp = `${this.pendingLogPath()}.tmp`;
|
|
1103
|
+
await fs.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
|
|
1104
|
+
await fs.rename(tmp, this.pendingLogPath());
|
|
1105
|
+
} catch (err) {
|
|
1106
|
+
logger.debug(
|
|
1107
|
+
"[device-bridge] Failed to persist pending generates:",
|
|
1108
|
+
err instanceof Error ? err.message : String(err),
|
|
1109
|
+
);
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
/**
|
|
1114
|
+
* On startup, read persisted pending requests back into memory. Their
|
|
1115
|
+
* promises are gone (the original caller's process is dead) so they can
|
|
1116
|
+
* only be resolved externally, so we re-queue them with a fresh timeout.
|
|
1117
|
+
* The first connected device that can handle generation will process them.
|
|
1118
|
+
* If nothing consumes them within the timeout they reject quietly.
|
|
1119
|
+
*
|
|
1120
|
+
* Stale entries older than 24h are purged rather than resurrected.
|
|
1121
|
+
*/
|
|
1122
|
+
private async restorePendingGenerates(): Promise<void> {
|
|
1123
|
+
let raw: string;
|
|
1124
|
+
try {
|
|
1125
|
+
raw = await fs.readFile(this.pendingLogPath(), "utf8");
|
|
1126
|
+
} catch {
|
|
1127
|
+
return;
|
|
1128
|
+
}
|
|
1129
|
+
let items: PersistedGenerateRequest[];
|
|
1130
|
+
try {
|
|
1131
|
+
items = JSON.parse(raw) as PersistedGenerateRequest[];
|
|
1132
|
+
if (!Array.isArray(items)) return;
|
|
1133
|
+
} catch {
|
|
1134
|
+
return;
|
|
1135
|
+
}
|
|
1136
|
+
const cutoff = Date.now() - 24 * 60 * 60 * 1000;
|
|
1137
|
+
let restored = 0;
|
|
1138
|
+
for (const item of items) {
|
|
1139
|
+
if (
|
|
1140
|
+
!item.correlationId ||
|
|
1141
|
+
!item.request ||
|
|
1142
|
+
item.request.type !== "generate"
|
|
1143
|
+
) {
|
|
1144
|
+
continue;
|
|
1145
|
+
}
|
|
1146
|
+
const submittedAt = Date.parse(item.submittedAt);
|
|
1147
|
+
if (!Number.isFinite(submittedAt) || submittedAt < cutoff) continue;
|
|
1148
|
+
if (this.pendingGenerates.has(item.correlationId)) continue;
|
|
1149
|
+
|
|
1150
|
+
// The original caller's promise is gone. Queue the request so the
|
|
1151
|
+
// first connecting device processes it; if nobody picks it up within
|
|
1152
|
+
// the default timeout, drop it.
|
|
1153
|
+
const timeout = setTimeout(() => {
|
|
1154
|
+
this.pendingGenerates.delete(item.correlationId);
|
|
1155
|
+
void this.persistPendingGenerates();
|
|
1156
|
+
}, DEFAULT_CALL_TIMEOUT_MS);
|
|
1157
|
+
if (typeof timeout === "object" && timeout && "unref" in timeout) {
|
|
1158
|
+
(timeout as { unref(): void }).unref();
|
|
1159
|
+
}
|
|
1160
|
+
this.pendingGenerates.set(item.correlationId, {
|
|
1161
|
+
correlationId: item.correlationId,
|
|
1162
|
+
request: item.request,
|
|
1163
|
+
submittedAt: item.submittedAt,
|
|
1164
|
+
routedDeviceId: null,
|
|
1165
|
+
timeout,
|
|
1166
|
+
resolve: () => {
|
|
1167
|
+
/* no caller to resolve */
|
|
1168
|
+
},
|
|
1169
|
+
reject: () => {
|
|
1170
|
+
/* no caller to reject */
|
|
1171
|
+
},
|
|
1172
|
+
});
|
|
1173
|
+
restored += 1;
|
|
1174
|
+
}
|
|
1175
|
+
if (restored > 0) {
|
|
1176
|
+
logger.info(
|
|
1177
|
+
`[device-bridge] Restored ${restored} pending generate(s) from persistent log`,
|
|
1178
|
+
);
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
export const deviceBridge = new DeviceBridge();
|
|
1184
|
+
|
|
1185
|
+
/** Shape returned by `GET /api/dev/device-resource-metrics`. */
|
|
1186
|
+
export interface DeviceResourceMetricsDevPayload {
|
|
1187
|
+
generatedAtEpochMs: number;
|
|
1188
|
+
status: DeviceBridgeStatus;
|
|
1189
|
+
latest: DeviceGenerationMetrics | null;
|
|
1190
|
+
recentGenerations: DeviceGenerationMetrics[];
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
/**
|
|
1194
|
+
* Build the JSON body for `GET /api/dev/device-resource-metrics` — the Mobile
|
|
1195
|
+
* Resource Workbench reads this to harvest per-generation prefill/decode tok/s
|
|
1196
|
+
* (already differenced by the bridge) without driving the device WebView.
|
|
1197
|
+
*/
|
|
1198
|
+
export function buildDeviceResourceMetricsDevPayload(
|
|
1199
|
+
bridge: DeviceBridge = deviceBridge,
|
|
1200
|
+
limit = 50,
|
|
1201
|
+
): DeviceResourceMetricsDevPayload {
|
|
1202
|
+
return {
|
|
1203
|
+
generatedAtEpochMs: Date.now(),
|
|
1204
|
+
status: bridge.status(),
|
|
1205
|
+
latest: bridge.latestGenerationMetrics(),
|
|
1206
|
+
recentGenerations: bridge.recentGenerationMetrics(limit),
|
|
1207
|
+
};
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
export function registerDeviceBridgeLoader(
|
|
1211
|
+
runtime: AgentRuntime & {
|
|
1212
|
+
registerService?: (name: string, impl: unknown) => unknown;
|
|
1213
|
+
},
|
|
1214
|
+
): void {
|
|
1215
|
+
if (typeof runtime.registerService !== "function") return;
|
|
1216
|
+
const loader: LocalInferenceLoader = {
|
|
1217
|
+
async loadModel(args: LocalInferenceLoadArgs) {
|
|
1218
|
+
await deviceBridge.loadModel(args);
|
|
1219
|
+
},
|
|
1220
|
+
async unloadModel() {
|
|
1221
|
+
await deviceBridge.unloadModel();
|
|
1222
|
+
},
|
|
1223
|
+
currentModelPath() {
|
|
1224
|
+
return deviceBridge.currentModelPath();
|
|
1225
|
+
},
|
|
1226
|
+
async generate(args) {
|
|
1227
|
+
return deviceBridge.generate(args);
|
|
1228
|
+
},
|
|
1229
|
+
async embed(args) {
|
|
1230
|
+
return deviceBridge.embed(args);
|
|
1231
|
+
},
|
|
1232
|
+
};
|
|
1233
|
+
runtime.registerService("localInferenceLoader", loader);
|
|
1234
|
+
logger.info(
|
|
1235
|
+
"[device-bridge] Registered device-bridge loader for remote on-device inference",
|
|
1236
|
+
);
|
|
1237
|
+
}
|