@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,1237 @@
1
+ /**
2
+ * Device-bridge: agent-side half of the "inference on the user's phone,
3
+ * agent in a container" architecture.
4
+ *
5
+ * Multi-device aware. Any number of devices can dial in; each `generate`
6
+ * is routed to the highest-scoring connected device at call time. A phone
7
+ * and a Mac paired to the same agent → requests go to the Mac; when the
8
+ * Mac disconnects, new requests fall through to the phone automatically.
9
+ *
10
+ * Scoring (higher = preferred):
11
+ * - desktop / electrobun: 100 base
12
+ * - ios / android: 10 base
13
+ * - per GB of total RAM: +2
14
+ * - per GB of VRAM: +5 (dedicated GPU wins big)
15
+ * - has loaded the right model already: +50 (avoid a swap)
16
+ *
17
+ * Disconnect tolerance
18
+ * --------------------
19
+ * A pending request stays in `pendingGenerates` until either (a) a device
20
+ * (same or different) returns a matching correlation-id, or (b) the
21
+ * timeout fires. On any device (re)connect we re-route orphaned
22
+ * generates to the new best device.
23
+ *
24
+ * Durability
25
+ * ----------
26
+ * Pending requests are best-effort persisted to a JSON log under
27
+ * `$ELIZA_STATE_DIR/local-inference/pending-requests.json` so a brief
28
+ * agent restart doesn't lose the queue. Persistence is async and
29
+ * non-blocking — failures fall back to in-memory only.
30
+ */
31
+
32
+ import { randomUUID } from "node:crypto";
33
+ import fs from "node:fs/promises";
34
+ import type { Server as HttpServer, IncomingMessage } from "node:http";
35
+ import path from "node:path";
36
+ import type { Duplex } from "node:stream";
37
+ import type { AgentRuntime } from "@elizaos/core";
38
+ import { logger } from "@elizaos/core";
39
+ import {
40
+ computeGenerationThroughput,
41
+ type GenerationThroughput,
42
+ } from "@elizaos/shared/local-inference";
43
+ import type {
44
+ LocalInferenceLoadArgs,
45
+ LocalInferenceLoader,
46
+ } from "./active-model";
47
+ import { localInferenceRoot } from "./paths";
48
+
49
+ const DEFAULT_CALL_TIMEOUT_MS = 60_000;
50
+ const DEFAULT_LOAD_TIMEOUT_MS = 120_000;
51
+ const HEARTBEAT_INTERVAL_MS = 15_000;
52
+ const PENDING_LOG_FILENAME = "pending-requests.json";
53
+
54
+ interface DeviceCapabilities {
55
+ platform: "ios" | "android" | "web" | "electrobun" | "desktop";
56
+ deviceModel: string;
57
+ machineId?: string;
58
+ osVersion?: string;
59
+ isSimulator?: boolean;
60
+ totalRamGb: number;
61
+ availableRamGb?: number | null;
62
+ freeStorageGb?: number | null;
63
+ cpuCores: number;
64
+ gpu: {
65
+ backend: "metal" | "vulkan" | "gpu-delegate" | "cuda";
66
+ available: boolean;
67
+ totalVramGb?: number;
68
+ } | null;
69
+ gpuSupported?: boolean;
70
+ lowPowerMode?: boolean;
71
+ thermalState?: "nominal" | "fair" | "serious" | "critical" | "unknown";
72
+ mtpSupported?: boolean;
73
+ mtpReason?: string;
74
+ }
75
+
76
+ interface DeviceRegistration {
77
+ deviceId: string;
78
+ pairingToken?: string;
79
+ capabilities: DeviceCapabilities;
80
+ loadedPath: string | null;
81
+ }
82
+
83
+ // Wire types — kept in sync by hand with the device-side bridge client.
84
+
85
+ type DeviceOutbound =
86
+ | { type: "register"; payload: DeviceRegistration }
87
+ | { type: "loadResult"; correlationId: string; ok: true; loadedPath: string }
88
+ | { type: "loadResult"; correlationId: string; ok: false; error: string }
89
+ | { type: "unloadResult"; correlationId: string; ok: true }
90
+ | { type: "unloadResult"; correlationId: string; ok: false; error: string }
91
+ | {
92
+ type: "generateResult";
93
+ correlationId: string;
94
+ ok: true;
95
+ text: string;
96
+ promptTokens: number;
97
+ outputTokens: number;
98
+ durationMs: number;
99
+ /**
100
+ * Time-to-first-token in ms, when the device measured it. Equals the
101
+ * prefill wall-clock; lets the agent difference prefill vs decode tok/s.
102
+ * Optional — absent on the non-streaming path (older device clients).
103
+ */
104
+ ttftMs?: number;
105
+ }
106
+ | { type: "generateResult"; correlationId: string; ok: false; error: string }
107
+ | {
108
+ type: "embedResult";
109
+ correlationId: string;
110
+ ok: true;
111
+ embedding: number[];
112
+ tokens: number;
113
+ }
114
+ | { type: "embedResult"; correlationId: string; ok: false; error: string }
115
+ | { type: "pong"; at: number };
116
+
117
+ type AgentOutbound =
118
+ | ({ type: "load"; correlationId: string } & LocalInferenceLoadArgs)
119
+ | { type: "unload"; correlationId: string }
120
+ | {
121
+ type: "generate";
122
+ correlationId: string;
123
+ prompt: string;
124
+ stopSequences?: string[];
125
+ maxTokens?: number;
126
+ temperature?: number;
127
+ /**
128
+ * Forwarded promptCacheKey from `ProviderCachePlan`. The receiving
129
+ * device's local-inference layer can use this to derive a stable
130
+ * slot_id (llama-server) or to look up a session in its session
131
+ * pool (node-llama-cpp). Old clients ignore the field; new clients
132
+ * get prefix-cache reuse across calls with the same key.
133
+ */
134
+ cacheKey?: string;
135
+ }
136
+ | { type: "embed"; correlationId: string; input: string }
137
+ | { type: "ping"; at: number };
138
+
139
+ interface MinimalWebSocket {
140
+ readyState: number;
141
+ send(data: string): void;
142
+ close(code?: number, reason?: string): void;
143
+ on(event: "message", listener: (data: Buffer | string) => void): unknown;
144
+ on(event: "close", listener: () => void): unknown;
145
+ on(event: "error", listener: (err: Error) => void): unknown;
146
+ on(event: "pong", listener: () => void): unknown;
147
+ }
148
+
149
+ interface WsConstructor {
150
+ readonly OPEN: number;
151
+ readonly CLOSED: number;
152
+ }
153
+
154
+ interface WssInstance {
155
+ handleUpgrade(
156
+ request: IncomingMessage,
157
+ socket: Duplex,
158
+ head: Buffer,
159
+ cb: (ws: MinimalWebSocket) => void,
160
+ ): void;
161
+ on(event: "error", listener: (err: Error) => void): unknown;
162
+ }
163
+
164
+ interface WssConstructor {
165
+ new (options: { noServer: boolean; maxPayload?: number }): WssInstance;
166
+ }
167
+
168
+ interface WsModule {
169
+ WebSocketServer: WssConstructor;
170
+ WebSocket: WsConstructor;
171
+ }
172
+
173
+ function isWsModule(value: unknown): value is WsModule {
174
+ if (!value || typeof value !== "object") return false;
175
+ const WebSocketServer = Reflect.get(value, "WebSocketServer");
176
+ const WebSocket = Reflect.get(value, "WebSocket");
177
+ if (
178
+ typeof WebSocketServer !== "function" ||
179
+ typeof WebSocket !== "function"
180
+ ) {
181
+ return false;
182
+ }
183
+ return (
184
+ typeof Reflect.get(WebSocket, "OPEN") === "number" &&
185
+ typeof Reflect.get(WebSocket, "CLOSED") === "number"
186
+ );
187
+ }
188
+
189
+ async function importWsModule(): Promise<WsModule> {
190
+ const mod: unknown = await import("ws");
191
+ if (!isWsModule(mod)) {
192
+ throw new Error("ws module did not expose WebSocketServer/WebSocket");
193
+ }
194
+ return mod;
195
+ }
196
+
197
+ interface PendingLoad {
198
+ correlationId: string;
199
+ modelPath: string;
200
+ resolve: () => void;
201
+ reject: (err: Error) => void;
202
+ timeout: ReturnType<typeof setTimeout>;
203
+ routedDeviceId: string;
204
+ }
205
+
206
+ interface PendingUnload {
207
+ correlationId: string;
208
+ resolve: () => void;
209
+ reject: (err: Error) => void;
210
+ timeout: ReturnType<typeof setTimeout>;
211
+ routedDeviceId: string;
212
+ }
213
+
214
+ interface PendingGenerate {
215
+ correlationId: string;
216
+ resolve: (text: string) => void;
217
+ reject: (err: Error) => void;
218
+ timeout: ReturnType<typeof setTimeout>;
219
+ request: AgentOutbound;
220
+ /**
221
+ * Device the request was routed to most recently. On device disconnect
222
+ * this is cleared; the request sits orphaned until another device
223
+ * connects, at which point it's re-routed.
224
+ */
225
+ routedDeviceId: string | null;
226
+ /** ISO timestamp captured on first submission; used to purge stale entries on restart. */
227
+ submittedAt: string;
228
+ }
229
+
230
+ interface PendingEmbed {
231
+ correlationId: string;
232
+ resolve: (result: { embedding: number[]; tokens: number }) => void;
233
+ reject: (err: Error) => void;
234
+ timeout: ReturnType<typeof setTimeout>;
235
+ request: AgentOutbound;
236
+ /** Same disconnect semantics as PendingGenerate — null when orphaned. */
237
+ routedDeviceId: string | null;
238
+ /**
239
+ * ISO timestamp captured on first submission. Mirrors PendingGenerate
240
+ * for symmetry; embeds are NOT persisted to disk (they're short-lived
241
+ * and the caller's process holding the result promise has to be alive
242
+ * for the answer to mean anything), so this field is purely
243
+ * observational (status snapshots, debugging) today.
244
+ */
245
+ submittedAt: string;
246
+ }
247
+
248
+ interface ConnectedDevice {
249
+ deviceId: string;
250
+ socket: MinimalWebSocket;
251
+ capabilities: DeviceCapabilities;
252
+ loadedPath: string | null;
253
+ connectedAt: number;
254
+ lastHeartbeatAt: number;
255
+ heartbeatTimer: ReturnType<typeof setInterval>;
256
+ }
257
+
258
+ export interface DeviceSummary {
259
+ deviceId: string;
260
+ capabilities: DeviceCapabilities;
261
+ loadedPath: string | null;
262
+ connectedSince: string;
263
+ score: number;
264
+ activeRequests: number;
265
+ isPrimary: boolean;
266
+ }
267
+
268
+ export interface DeviceBridgeStatus {
269
+ /** True if any device is currently connected. */
270
+ connected: boolean;
271
+ devices: DeviceSummary[];
272
+ /** Device id of the current best-score device, or null when none. */
273
+ primaryDeviceId: string | null;
274
+ /** Total generates/loads/unloads queued (either in-flight or awaiting a device). */
275
+ pendingRequests: number;
276
+ // Legacy single-device fields — kept for UI backward compat. These mirror
277
+ // the primary device so old `DeviceBridgeStatusBar` code keeps working.
278
+ deviceId: string | null;
279
+ capabilities: DeviceCapabilities | null;
280
+ loadedPath: string | null;
281
+ connectedSince: string | null;
282
+ }
283
+
284
+ interface PersistedGenerateRequest {
285
+ correlationId: string;
286
+ request: AgentOutbound;
287
+ submittedAt: string;
288
+ }
289
+
290
+ /**
291
+ * One on-device generation's measured resource signal, emitted to
292
+ * `subscribeGenerationMetrics` listeners after every successful `generateResult`.
293
+ * The Mobile Resource Workbench folds these into a `DeviceResourceMetrics`
294
+ * accumulator (prefill/decode tok/s, TTFT, per-tier aggregation). All
295
+ * throughput fields are `null` when the device could not measure the inputs.
296
+ */
297
+ export interface DeviceGenerationMetrics {
298
+ deviceId: string;
299
+ platform: DeviceCapabilities["platform"] | null;
300
+ /** Device model identifier (e.g. `iPhone17,2`) for per-device baselines. */
301
+ deviceModel: string | null;
302
+ promptTokens: number;
303
+ outputTokens: number;
304
+ durationMs: number;
305
+ ttftMs: number | null;
306
+ throughput: GenerationThroughput;
307
+ }
308
+
309
+ /**
310
+ * Scoring function — pick the most powerful device available.
311
+ * Pure, synchronous, and easy to test.
312
+ */
313
+ function scoreDevice(
314
+ device: ConnectedDevice,
315
+ opts: { preferLoadedPath?: string } = {},
316
+ ): number {
317
+ const cap = device.capabilities;
318
+ const platformBase =
319
+ cap.platform === "desktop" || cap.platform === "electrobun"
320
+ ? 100
321
+ : cap.platform === "ios" || cap.platform === "android"
322
+ ? 10
323
+ : 0;
324
+ const usableRamGb =
325
+ typeof cap.availableRamGb === "number" && cap.availableRamGb > 0
326
+ ? Math.min(
327
+ cap.totalRamGb,
328
+ Math.max(cap.availableRamGb, cap.totalRamGb * 0.6),
329
+ )
330
+ : cap.totalRamGb;
331
+ const ramScore = usableRamGb * 2;
332
+ const vramScore = cap.gpu?.available
333
+ ? (cap.gpu.totalVramGb ?? cap.totalRamGb) * 5
334
+ : 0;
335
+ const healthPenalty =
336
+ cap.lowPowerMode || cap.thermalState === "serious"
337
+ ? 15
338
+ : cap.thermalState === "critical"
339
+ ? 100
340
+ : 0;
341
+ const loadedBonus =
342
+ opts.preferLoadedPath && device.loadedPath === opts.preferLoadedPath
343
+ ? 50
344
+ : 0;
345
+ return platformBase + ramScore + vramScore + loadedBonus - healthPenalty;
346
+ }
347
+
348
+ export class DeviceBridge {
349
+ private readonly devices = new Map<string, ConnectedDevice>();
350
+ private wss: WssInstance | null = null;
351
+ private restored = false;
352
+
353
+ private readonly pendingLoads = new Map<string, PendingLoad>();
354
+ private readonly pendingUnloads = new Map<string, PendingUnload>();
355
+ private readonly pendingGenerates = new Map<string, PendingGenerate>();
356
+ private readonly pendingEmbeds = new Map<string, PendingEmbed>();
357
+
358
+ private readonly statusListeners = new Set<
359
+ (status: DeviceBridgeStatus) => void
360
+ >();
361
+
362
+ private readonly generationMetricsListeners = new Set<
363
+ (metrics: DeviceGenerationMetrics) => void
364
+ >();
365
+
366
+ /** The most recent successful generation's metrics, or null. */
367
+ private lastGenerationMetrics: DeviceGenerationMetrics | null = null;
368
+
369
+ /** Bounded ring buffer of recent generation metrics for the dev endpoint. */
370
+ private readonly recentGenerations: DeviceGenerationMetrics[] = [];
371
+ private static readonly RECENT_GENERATIONS_CAP = 200;
372
+
373
+ private readonly expectedPairingToken: string | null =
374
+ process.env.ELIZA_DEVICE_PAIRING_TOKEN?.trim() || null;
375
+
376
+ status(): DeviceBridgeStatus {
377
+ const summaries: DeviceSummary[] = [];
378
+ for (const device of this.devices.values()) {
379
+ const score = scoreDevice(device);
380
+ const activeRequests =
381
+ this.countRouted(this.pendingGenerates, device.deviceId) +
382
+ this.countRouted(this.pendingEmbeds, device.deviceId) +
383
+ this.countRouted(this.pendingLoads, device.deviceId) +
384
+ this.countRouted(this.pendingUnloads, device.deviceId);
385
+ summaries.push({
386
+ deviceId: device.deviceId,
387
+ capabilities: device.capabilities,
388
+ loadedPath: device.loadedPath,
389
+ connectedSince: new Date(device.connectedAt).toISOString(),
390
+ score,
391
+ activeRequests,
392
+ isPrimary: false,
393
+ });
394
+ }
395
+ // Sort desc by score so the UI can just render in order.
396
+ summaries.sort((a, b) => b.score - a.score);
397
+ if (summaries[0]) summaries[0].isPrimary = true;
398
+
399
+ const primary = summaries[0] ?? null;
400
+ const pendingRequests =
401
+ this.pendingGenerates.size +
402
+ this.pendingEmbeds.size +
403
+ this.pendingLoads.size +
404
+ this.pendingUnloads.size;
405
+
406
+ return {
407
+ connected: summaries.length > 0,
408
+ devices: summaries,
409
+ primaryDeviceId: primary?.deviceId,
410
+ pendingRequests,
411
+ deviceId: primary?.deviceId,
412
+ capabilities: primary?.capabilities,
413
+ loadedPath: primary?.loadedPath ?? null,
414
+ connectedSince: primary?.connectedSince,
415
+ };
416
+ }
417
+
418
+ private countRouted<T extends { routedDeviceId: string | null }>(
419
+ map: Map<string, T>,
420
+ deviceId: string,
421
+ ): number {
422
+ let n = 0;
423
+ for (const value of map.values()) {
424
+ if (value.routedDeviceId === deviceId) n += 1;
425
+ }
426
+ return n;
427
+ }
428
+
429
+ subscribeStatus(listener: (status: DeviceBridgeStatus) => void): () => void {
430
+ this.statusListeners.add(listener);
431
+ return () => {
432
+ this.statusListeners.delete(listener);
433
+ };
434
+ }
435
+
436
+ private emitStatus(): void {
437
+ const snapshot = this.status();
438
+ for (const listener of this.statusListeners) {
439
+ try {
440
+ listener(snapshot);
441
+ } catch {
442
+ this.statusListeners.delete(listener);
443
+ }
444
+ }
445
+ }
446
+
447
+ /**
448
+ * Subscribe to per-generation throughput metrics. Fires once per successful
449
+ * on-device generation with the differenced prefill/decode tok/s. Returns an
450
+ * unsubscribe function.
451
+ */
452
+ subscribeGenerationMetrics(
453
+ listener: (metrics: DeviceGenerationMetrics) => void,
454
+ ): () => void {
455
+ this.generationMetricsListeners.add(listener);
456
+ return () => {
457
+ this.generationMetricsListeners.delete(listener);
458
+ };
459
+ }
460
+
461
+ /** The most recent successful generation's measured metrics, or null. */
462
+ latestGenerationMetrics(): DeviceGenerationMetrics | null {
463
+ return this.lastGenerationMetrics;
464
+ }
465
+
466
+ /** Most recent generation metrics (newest last), capped at `limit`. */
467
+ recentGenerationMetrics(limit = 50): DeviceGenerationMetrics[] {
468
+ const n = Math.max(0, Math.trunc(limit));
469
+ return this.recentGenerations.slice(-n);
470
+ }
471
+
472
+ private emitGenerationMetrics(metrics: DeviceGenerationMetrics): void {
473
+ this.lastGenerationMetrics = metrics;
474
+ this.recentGenerations.push(metrics);
475
+ if (this.recentGenerations.length > DeviceBridge.RECENT_GENERATIONS_CAP) {
476
+ this.recentGenerations.shift();
477
+ }
478
+ for (const listener of this.generationMetricsListeners) {
479
+ try {
480
+ listener(metrics);
481
+ } catch {
482
+ this.generationMetricsListeners.delete(listener);
483
+ }
484
+ }
485
+ }
486
+
487
+ async attachToHttpServer(server: HttpServer): Promise<void> {
488
+ if (this.wss) return;
489
+ const ws = await importWsModule();
490
+ const wss = new ws.WebSocketServer({
491
+ noServer: true,
492
+ maxPayload: 1024 * 1024,
493
+ });
494
+ this.wss = wss;
495
+
496
+ wss.on("error", (err) => {
497
+ logger.warn("[device-bridge] WSS error:", err.message);
498
+ });
499
+
500
+ server.on("upgrade", (request, socket, head) => {
501
+ const url = new URL(request.url ?? "/", "http://localhost");
502
+ if (url.pathname !== "/api/local-inference/device-bridge") return;
503
+ wss.handleUpgrade(request, socket, head, (client) => {
504
+ this.handleConnection(client, ws.WebSocket, url);
505
+ });
506
+ });
507
+
508
+ // Restore persisted pending generates the first time a server attaches.
509
+ // We only restore once per process — avoids double-resubmit on repeated
510
+ // server restarts inside the same worker.
511
+ if (!this.restored) {
512
+ this.restored = true;
513
+ await this.restorePendingGenerates();
514
+ }
515
+ }
516
+
517
+ private handleConnection(
518
+ socket: MinimalWebSocket,
519
+ WsCtor: WsConstructor,
520
+ url: URL,
521
+ ): void {
522
+ const queryToken = url.searchParams.get("token")?.trim();
523
+ if (this.expectedPairingToken && queryToken !== this.expectedPairingToken) {
524
+ logger.warn("[device-bridge] Rejecting connection: bad query token");
525
+ socket.close(4001, "unauthorized");
526
+ return;
527
+ }
528
+
529
+ let registered = false;
530
+ let registeredDeviceId: string | null = null;
531
+
532
+ socket.on("message", (raw) => {
533
+ let msg: DeviceOutbound;
534
+ try {
535
+ const text = typeof raw === "string" ? raw : raw.toString("utf8");
536
+ msg = JSON.parse(text) as DeviceOutbound;
537
+ } catch {
538
+ logger.warn("[device-bridge] Ignoring non-JSON frame");
539
+ return;
540
+ }
541
+
542
+ if (!registered) {
543
+ if (msg.type !== "register") {
544
+ logger.warn("[device-bridge] First frame must be register");
545
+ socket.close(4002, "must-register-first");
546
+ return;
547
+ }
548
+ if (
549
+ this.expectedPairingToken &&
550
+ msg.payload.pairingToken !== this.expectedPairingToken
551
+ ) {
552
+ logger.warn("[device-bridge] Rejecting register: bad pairing token");
553
+ socket.close(4001, "unauthorized");
554
+ return;
555
+ }
556
+ registered = true;
557
+ registeredDeviceId = msg.payload.deviceId;
558
+ this.onDeviceRegistered(socket, WsCtor, msg.payload);
559
+ return;
560
+ }
561
+
562
+ this.handleDeviceMessage(msg);
563
+ });
564
+
565
+ socket.on("close", () => {
566
+ if (!registered || !registeredDeviceId) return;
567
+ // Only evict if THIS socket is still the current one for the
568
+ // deviceId. When a newer connection supersedes us, its registration
569
+ // already replaced the map entry; the delayed close event from our
570
+ // superseded socket must not tear that down.
571
+ const current = this.devices.get(registeredDeviceId);
572
+ if (current && current.socket === socket) {
573
+ this.onDeviceDisconnected(registeredDeviceId);
574
+ }
575
+ });
576
+
577
+ socket.on("error", (err) => {
578
+ logger.warn("[device-bridge] Socket error:", err.message);
579
+ });
580
+ }
581
+
582
+ private onDeviceRegistered(
583
+ socket: MinimalWebSocket,
584
+ WsCtor: WsConstructor,
585
+ registration: DeviceRegistration,
586
+ ): void {
587
+ // Supersede any existing connection under the same deviceId.
588
+ const existing = this.devices.get(registration.deviceId);
589
+ if (existing) {
590
+ try {
591
+ existing.socket.close(4003, "superseded");
592
+ } catch {
593
+ /* best effort */
594
+ }
595
+ clearInterval(existing.heartbeatTimer);
596
+ }
597
+
598
+ const device: ConnectedDevice = {
599
+ deviceId: registration.deviceId,
600
+ socket,
601
+ capabilities: registration.capabilities,
602
+ loadedPath: registration.loadedPath,
603
+ connectedAt: Date.now(),
604
+ lastHeartbeatAt: Date.now(),
605
+ heartbeatTimer: setInterval(() => {
606
+ if (socket.readyState !== WsCtor.OPEN) return;
607
+ try {
608
+ this.sendToDevice(device.deviceId, { type: "ping", at: Date.now() });
609
+ } catch {
610
+ /* ignore after close */
611
+ }
612
+ }, HEARTBEAT_INTERVAL_MS),
613
+ };
614
+ if (
615
+ typeof device.heartbeatTimer === "object" &&
616
+ device.heartbeatTimer &&
617
+ "unref" in device.heartbeatTimer
618
+ ) {
619
+ (device.heartbeatTimer as { unref(): void }).unref();
620
+ }
621
+ this.devices.set(device.deviceId, device);
622
+
623
+ logger.info(
624
+ `[device-bridge] Device connected: ${device.deviceId} (${device.capabilities.platform}, score=${scoreDevice(device)})`,
625
+ );
626
+
627
+ // Re-route any orphaned generates (the ones whose prior routed device
628
+ // disconnected). Load/unload orphans reject — device-specific state.
629
+ for (const pending of this.pendingLoads.values()) {
630
+ if (pending.routedDeviceId === device.deviceId) continue;
631
+ if (!this.devices.has(pending.routedDeviceId)) {
632
+ clearTimeout(pending.timeout);
633
+ this.pendingLoads.delete(pending.correlationId);
634
+ pending.reject(
635
+ new Error("DEVICE_RECONNECTED: retry model load after reconnect"),
636
+ );
637
+ }
638
+ }
639
+ for (const pending of this.pendingUnloads.values()) {
640
+ if (!this.devices.has(pending.routedDeviceId)) {
641
+ clearTimeout(pending.timeout);
642
+ this.pendingUnloads.delete(pending.correlationId);
643
+ pending.reject(
644
+ new Error("DEVICE_RECONNECTED: retry model unload after reconnect"),
645
+ );
646
+ }
647
+ }
648
+
649
+ for (const pending of this.pendingGenerates.values()) {
650
+ if (pending.routedDeviceId === null) {
651
+ const best = this.pickBestDevice();
652
+ if (best) {
653
+ pending.routedDeviceId = best.deviceId;
654
+ try {
655
+ this.sendToDevice(best.deviceId, pending.request);
656
+ } catch (err) {
657
+ pending.reject(
658
+ err instanceof Error
659
+ ? err
660
+ : new Error("Failed to re-route after reconnect"),
661
+ );
662
+ }
663
+ }
664
+ }
665
+ }
666
+
667
+ // Same re-route logic for orphaned embeds. Embeds are short-lived and
668
+ // idempotent (the device just runs llama_get_embeddings), so we can
669
+ // safely retarget them on reconnect.
670
+ for (const pending of this.pendingEmbeds.values()) {
671
+ if (pending.routedDeviceId === null) {
672
+ const best = this.pickBestDevice();
673
+ if (best) {
674
+ pending.routedDeviceId = best.deviceId;
675
+ try {
676
+ this.sendToDevice(best.deviceId, pending.request);
677
+ } catch (err) {
678
+ pending.reject(
679
+ err instanceof Error
680
+ ? err
681
+ : new Error("Failed to re-route after reconnect"),
682
+ );
683
+ }
684
+ }
685
+ }
686
+ }
687
+
688
+ this.emitStatus();
689
+ }
690
+
691
+ private onDeviceDisconnected(deviceId: string): void {
692
+ const device = this.devices.get(deviceId);
693
+ if (!device) return;
694
+ clearInterval(device.heartbeatTimer);
695
+ this.devices.delete(deviceId);
696
+
697
+ // Orphan any generates / embeds routed to this device so they can be
698
+ // re-routed to a surviving device (or await a reconnect).
699
+ let orphaned = 0;
700
+ for (const pending of this.pendingGenerates.values()) {
701
+ if (pending.routedDeviceId === deviceId) {
702
+ pending.routedDeviceId = null;
703
+ orphaned += 1;
704
+ }
705
+ }
706
+ for (const pending of this.pendingEmbeds.values()) {
707
+ if (pending.routedDeviceId === deviceId) {
708
+ pending.routedDeviceId = null;
709
+ orphaned += 1;
710
+ }
711
+ }
712
+
713
+ logger.info(
714
+ `[device-bridge] Device disconnected: ${deviceId}; ${orphaned} request(s) orphaned`,
715
+ );
716
+
717
+ // Fast-path: if there are other connected devices, re-route now.
718
+ if (this.devices.size > 0) {
719
+ for (const pending of this.pendingGenerates.values()) {
720
+ if (pending.routedDeviceId === null) {
721
+ const best = this.pickBestDevice();
722
+ if (best) {
723
+ pending.routedDeviceId = best.deviceId;
724
+ try {
725
+ this.sendToDevice(best.deviceId, pending.request);
726
+ } catch {
727
+ /* will be retried on the next reconnect */
728
+ }
729
+ }
730
+ }
731
+ }
732
+ for (const pending of this.pendingEmbeds.values()) {
733
+ if (pending.routedDeviceId === null) {
734
+ const best = this.pickBestDevice();
735
+ if (best) {
736
+ pending.routedDeviceId = best.deviceId;
737
+ try {
738
+ this.sendToDevice(best.deviceId, pending.request);
739
+ } catch {
740
+ /* will be retried on the next reconnect */
741
+ }
742
+ }
743
+ }
744
+ }
745
+ }
746
+
747
+ this.emitStatus();
748
+ }
749
+
750
+ private handleDeviceMessage(msg: DeviceOutbound): void {
751
+ if (msg.type === "pong") {
752
+ // Heartbeat round-trip — could update lastHeartbeatAt per device, but
753
+ // we don't currently use it for eviction.
754
+ return;
755
+ }
756
+
757
+ if (msg.type === "loadResult") {
758
+ const pending = this.pendingLoads.get(msg.correlationId);
759
+ if (!pending) return;
760
+ clearTimeout(pending.timeout);
761
+ this.pendingLoads.delete(msg.correlationId);
762
+ if (msg.ok === false) {
763
+ pending.reject(new Error(msg.error));
764
+ } else {
765
+ const device = this.devices.get(pending.routedDeviceId);
766
+ if (device) device.loadedPath = msg.loadedPath;
767
+ pending.resolve();
768
+ this.emitStatus();
769
+ }
770
+ return;
771
+ }
772
+
773
+ if (msg.type === "unloadResult") {
774
+ const pending = this.pendingUnloads.get(msg.correlationId);
775
+ if (!pending) return;
776
+ clearTimeout(pending.timeout);
777
+ this.pendingUnloads.delete(msg.correlationId);
778
+ if (msg.ok === false) {
779
+ pending.reject(new Error(msg.error));
780
+ } else {
781
+ const device = this.devices.get(pending.routedDeviceId);
782
+ if (device) device.loadedPath = null;
783
+ pending.resolve();
784
+ this.emitStatus();
785
+ }
786
+ return;
787
+ }
788
+
789
+ if (msg.type === "generateResult") {
790
+ const pending = this.pendingGenerates.get(msg.correlationId);
791
+ if (!pending) return;
792
+ clearTimeout(pending.timeout);
793
+ this.pendingGenerates.delete(msg.correlationId);
794
+ // Best-effort purge the persisted copy.
795
+ void this.persistPendingGenerates();
796
+ if (msg.ok === false) {
797
+ pending.reject(new Error(msg.error));
798
+ } else {
799
+ // Difference the raw counters into prefill/decode tok/s and surface
800
+ // them to profiling subscribers. The loader contract is unchanged —
801
+ // callers still get the text; metrics are a side channel.
802
+ const ttftMs = typeof msg.ttftMs === "number" ? msg.ttftMs : null;
803
+ const throughput = computeGenerationThroughput({
804
+ promptTokens: msg.promptTokens,
805
+ outputTokens: msg.outputTokens,
806
+ durationMs: msg.durationMs,
807
+ ttftMs,
808
+ });
809
+ const device = pending.routedDeviceId
810
+ ? this.devices.get(pending.routedDeviceId)
811
+ : null;
812
+ this.emitGenerationMetrics({
813
+ deviceId: pending.routedDeviceId ?? "unknown",
814
+ platform: device?.capabilities.platform ?? null,
815
+ deviceModel: device?.capabilities.deviceModel ?? null,
816
+ promptTokens: msg.promptTokens,
817
+ outputTokens: msg.outputTokens,
818
+ durationMs: msg.durationMs,
819
+ ttftMs,
820
+ throughput,
821
+ });
822
+ pending.resolve(msg.text);
823
+ }
824
+ return;
825
+ }
826
+
827
+ if (msg.type === "embedResult") {
828
+ const pending = this.pendingEmbeds.get(msg.correlationId);
829
+ if (!pending) return;
830
+ clearTimeout(pending.timeout);
831
+ this.pendingEmbeds.delete(msg.correlationId);
832
+ if (msg.ok === false) {
833
+ pending.reject(new Error(msg.error));
834
+ } else {
835
+ pending.resolve({ embedding: msg.embedding, tokens: msg.tokens });
836
+ }
837
+ return;
838
+ }
839
+ }
840
+
841
+ private sendToDevice(deviceId: string, msg: AgentOutbound): void {
842
+ const device = this.devices.get(deviceId);
843
+ if (!device) throw new Error(`DEVICE_DISCONNECTED: ${deviceId}`);
844
+ device.socket.send(JSON.stringify(msg));
845
+ }
846
+
847
+ /** Highest-scoring connected device, optionally boosted for an already-loaded model. */
848
+ private pickBestDevice(opts?: {
849
+ preferLoadedPath?: string;
850
+ }): ConnectedDevice | null {
851
+ let best: ConnectedDevice | null = null;
852
+ let bestScore = -Infinity;
853
+ for (const device of this.devices.values()) {
854
+ const score = scoreDevice(device, opts);
855
+ if (score > bestScore) {
856
+ best = device;
857
+ bestScore = score;
858
+ }
859
+ }
860
+ return best;
861
+ }
862
+
863
+ // ── LocalInferenceLoader surface ──────────────────────────────────────
864
+
865
+ async loadModel(args: LocalInferenceLoadArgs): Promise<void> {
866
+ const best = this.pickBestDevice({ preferLoadedPath: args.modelPath });
867
+ if (!best) {
868
+ throw new Error(
869
+ "DEVICE_DISCONNECTED: no mobile / desktop bridge device attached",
870
+ );
871
+ }
872
+ const correlationId = randomUUID();
873
+ return new Promise<void>((resolve, reject) => {
874
+ const timeout = setTimeout(() => {
875
+ this.pendingLoads.delete(correlationId);
876
+ reject(new Error("DEVICE_TIMEOUT: model load exceeded deadline"));
877
+ }, DEFAULT_LOAD_TIMEOUT_MS);
878
+ if (typeof timeout === "object" && timeout && "unref" in timeout) {
879
+ (timeout as { unref(): void }).unref();
880
+ }
881
+ this.pendingLoads.set(correlationId, {
882
+ correlationId,
883
+ modelPath: args.modelPath,
884
+ resolve,
885
+ reject,
886
+ timeout,
887
+ routedDeviceId: best.deviceId,
888
+ });
889
+ try {
890
+ this.sendToDevice(best.deviceId, {
891
+ type: "load",
892
+ correlationId,
893
+ ...args,
894
+ });
895
+ } catch (err) {
896
+ clearTimeout(timeout);
897
+ this.pendingLoads.delete(correlationId);
898
+ reject(err instanceof Error ? err : new Error(String(err)));
899
+ }
900
+ });
901
+ }
902
+
903
+ async unloadModel(): Promise<void> {
904
+ // Unload every device that currently has a model loaded. Best-effort —
905
+ // individual failures don't block the others.
906
+ const targets = [...this.devices.values()].filter((d) => d.loadedPath);
907
+ if (targets.length === 0) return;
908
+ await Promise.allSettled(
909
+ targets.map(
910
+ (device) =>
911
+ new Promise<void>((resolve, reject) => {
912
+ const correlationId = randomUUID();
913
+ const timeout = setTimeout(() => {
914
+ this.pendingUnloads.delete(correlationId);
915
+ reject(new Error("DEVICE_TIMEOUT: unload exceeded deadline"));
916
+ }, DEFAULT_CALL_TIMEOUT_MS);
917
+ if (typeof timeout === "object" && timeout && "unref" in timeout) {
918
+ (timeout as { unref(): void }).unref();
919
+ }
920
+ this.pendingUnloads.set(correlationId, {
921
+ correlationId,
922
+ resolve,
923
+ reject,
924
+ timeout,
925
+ routedDeviceId: device.deviceId,
926
+ });
927
+ try {
928
+ this.sendToDevice(device.deviceId, {
929
+ type: "unload",
930
+ correlationId,
931
+ });
932
+ } catch (err) {
933
+ clearTimeout(timeout);
934
+ this.pendingUnloads.delete(correlationId);
935
+ reject(err instanceof Error ? err : new Error(String(err)));
936
+ }
937
+ }),
938
+ ),
939
+ );
940
+ }
941
+
942
+ currentModelPath(): string | null {
943
+ // The primary device's loaded path wins — consistent with which device
944
+ // would actually run the next generate.
945
+ const best = this.pickBestDevice();
946
+ return best?.loadedPath ?? null;
947
+ }
948
+
949
+ async embed(args: {
950
+ input: string;
951
+ }): Promise<{ embedding: number[]; tokens: number }> {
952
+ const envTimeout = Number.parseInt(
953
+ process.env.ELIZA_DEVICE_GENERATE_TIMEOUT_MS?.trim() ?? "",
954
+ 10,
955
+ );
956
+ const timeoutMs =
957
+ Number.isFinite(envTimeout) && envTimeout > 0
958
+ ? envTimeout
959
+ : DEFAULT_CALL_TIMEOUT_MS;
960
+
961
+ const correlationId = randomUUID();
962
+ const request: AgentOutbound = {
963
+ type: "embed",
964
+ correlationId,
965
+ input: args.input,
966
+ };
967
+
968
+ const best = this.pickBestDevice();
969
+
970
+ return new Promise<{ embedding: number[]; tokens: number }>(
971
+ (resolve, reject) => {
972
+ const timeout = setTimeout(() => {
973
+ this.pendingEmbeds.delete(correlationId);
974
+ reject(
975
+ new Error(
976
+ `DEVICE_TIMEOUT: no device responded to embed within ${timeoutMs}ms`,
977
+ ),
978
+ );
979
+ }, timeoutMs);
980
+ if (typeof timeout === "object" && timeout && "unref" in timeout) {
981
+ (timeout as { unref(): void }).unref();
982
+ }
983
+ const pending: PendingEmbed = {
984
+ correlationId,
985
+ resolve,
986
+ reject,
987
+ timeout,
988
+ request,
989
+ routedDeviceId: best?.deviceId ?? null,
990
+ submittedAt: new Date().toISOString(),
991
+ };
992
+ this.pendingEmbeds.set(correlationId, pending);
993
+
994
+ if (best) {
995
+ try {
996
+ this.sendToDevice(best.deviceId, request);
997
+ } catch {
998
+ // Routed device went away between pickBestDevice and send.
999
+ // Mark as orphaned; reroute logic will pick it up on the next
1000
+ // device (re)connect.
1001
+ pending.routedDeviceId = null;
1002
+ }
1003
+ } else {
1004
+ logger.debug(
1005
+ `[device-bridge] No device available; parking embed ${correlationId} pending connection`,
1006
+ );
1007
+ }
1008
+ },
1009
+ );
1010
+ }
1011
+
1012
+ async generate(args: {
1013
+ prompt: string;
1014
+ stopSequences?: string[];
1015
+ maxTokens?: number;
1016
+ temperature?: number;
1017
+ cacheKey?: string;
1018
+ }): Promise<string> {
1019
+ const envTimeout = Number.parseInt(
1020
+ process.env.ELIZA_DEVICE_GENERATE_TIMEOUT_MS?.trim() ?? "",
1021
+ 10,
1022
+ );
1023
+ const timeoutMs =
1024
+ Number.isFinite(envTimeout) && envTimeout > 0
1025
+ ? envTimeout
1026
+ : DEFAULT_CALL_TIMEOUT_MS;
1027
+
1028
+ const correlationId = randomUUID();
1029
+ const request: AgentOutbound = {
1030
+ type: "generate",
1031
+ correlationId,
1032
+ prompt: args.prompt,
1033
+ stopSequences: args.stopSequences,
1034
+ maxTokens: args.maxTokens,
1035
+ temperature: args.temperature,
1036
+ cacheKey: args.cacheKey,
1037
+ };
1038
+
1039
+ const best = this.pickBestDevice();
1040
+
1041
+ return new Promise<string>((resolve, reject) => {
1042
+ const timeout = setTimeout(() => {
1043
+ this.pendingGenerates.delete(correlationId);
1044
+ void this.persistPendingGenerates();
1045
+ reject(
1046
+ new Error(
1047
+ `DEVICE_TIMEOUT: no device responded within ${timeoutMs}ms`,
1048
+ ),
1049
+ );
1050
+ }, timeoutMs);
1051
+ if (typeof timeout === "object" && timeout && "unref" in timeout) {
1052
+ (timeout as { unref(): void }).unref();
1053
+ }
1054
+ const pending: PendingGenerate = {
1055
+ correlationId,
1056
+ resolve,
1057
+ reject,
1058
+ timeout,
1059
+ request,
1060
+ routedDeviceId: best?.deviceId ?? null,
1061
+ submittedAt: new Date().toISOString(),
1062
+ };
1063
+ this.pendingGenerates.set(correlationId, pending);
1064
+ void this.persistPendingGenerates();
1065
+
1066
+ if (best) {
1067
+ try {
1068
+ this.sendToDevice(best.deviceId, request);
1069
+ } catch {
1070
+ pending.routedDeviceId = null;
1071
+ }
1072
+ } else {
1073
+ logger.debug(
1074
+ `[device-bridge] No device available; parking generate ${correlationId} pending connection`,
1075
+ );
1076
+ }
1077
+ });
1078
+ }
1079
+
1080
+ // ── Durability ────────────────────────────────────────────────────────
1081
+
1082
+ private pendingLogPath(): string {
1083
+ return path.join(localInferenceRoot(), PENDING_LOG_FILENAME);
1084
+ }
1085
+
1086
+ /**
1087
+ * Rewrite the pending-generate log. Called after every mutation to the
1088
+ * pendingGenerates map. We only persist `generate` — loads/unloads are
1089
+ * bound to a specific device's current state and aren't safely replayable
1090
+ * across restart.
1091
+ */
1092
+ private async persistPendingGenerates(): Promise<void> {
1093
+ try {
1094
+ await fs.mkdir(localInferenceRoot(), { recursive: true });
1095
+ const payload: PersistedGenerateRequest[] = [
1096
+ ...this.pendingGenerates.values(),
1097
+ ].map((p) => ({
1098
+ correlationId: p.correlationId,
1099
+ request: p.request,
1100
+ submittedAt: p.submittedAt,
1101
+ }));
1102
+ const tmp = `${this.pendingLogPath()}.tmp`;
1103
+ await fs.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
1104
+ await fs.rename(tmp, this.pendingLogPath());
1105
+ } catch (err) {
1106
+ logger.debug(
1107
+ "[device-bridge] Failed to persist pending generates:",
1108
+ err instanceof Error ? err.message : String(err),
1109
+ );
1110
+ }
1111
+ }
1112
+
1113
+ /**
1114
+ * On startup, read persisted pending requests back into memory. Their
1115
+ * promises are gone (the original caller's process is dead) so they can
1116
+ * only be resolved externally, so we re-queue them with a fresh timeout.
1117
+ * The first connected device that can handle generation will process them.
1118
+ * If nothing consumes them within the timeout they reject quietly.
1119
+ *
1120
+ * Stale entries older than 24h are purged rather than resurrected.
1121
+ */
1122
+ private async restorePendingGenerates(): Promise<void> {
1123
+ let raw: string;
1124
+ try {
1125
+ raw = await fs.readFile(this.pendingLogPath(), "utf8");
1126
+ } catch {
1127
+ return;
1128
+ }
1129
+ let items: PersistedGenerateRequest[];
1130
+ try {
1131
+ items = JSON.parse(raw) as PersistedGenerateRequest[];
1132
+ if (!Array.isArray(items)) return;
1133
+ } catch {
1134
+ return;
1135
+ }
1136
+ const cutoff = Date.now() - 24 * 60 * 60 * 1000;
1137
+ let restored = 0;
1138
+ for (const item of items) {
1139
+ if (
1140
+ !item.correlationId ||
1141
+ !item.request ||
1142
+ item.request.type !== "generate"
1143
+ ) {
1144
+ continue;
1145
+ }
1146
+ const submittedAt = Date.parse(item.submittedAt);
1147
+ if (!Number.isFinite(submittedAt) || submittedAt < cutoff) continue;
1148
+ if (this.pendingGenerates.has(item.correlationId)) continue;
1149
+
1150
+ // The original caller's promise is gone. Queue the request so the
1151
+ // first connecting device processes it; if nobody picks it up within
1152
+ // the default timeout, drop it.
1153
+ const timeout = setTimeout(() => {
1154
+ this.pendingGenerates.delete(item.correlationId);
1155
+ void this.persistPendingGenerates();
1156
+ }, DEFAULT_CALL_TIMEOUT_MS);
1157
+ if (typeof timeout === "object" && timeout && "unref" in timeout) {
1158
+ (timeout as { unref(): void }).unref();
1159
+ }
1160
+ this.pendingGenerates.set(item.correlationId, {
1161
+ correlationId: item.correlationId,
1162
+ request: item.request,
1163
+ submittedAt: item.submittedAt,
1164
+ routedDeviceId: null,
1165
+ timeout,
1166
+ resolve: () => {
1167
+ /* no caller to resolve */
1168
+ },
1169
+ reject: () => {
1170
+ /* no caller to reject */
1171
+ },
1172
+ });
1173
+ restored += 1;
1174
+ }
1175
+ if (restored > 0) {
1176
+ logger.info(
1177
+ `[device-bridge] Restored ${restored} pending generate(s) from persistent log`,
1178
+ );
1179
+ }
1180
+ }
1181
+ }
1182
+
1183
+ export const deviceBridge = new DeviceBridge();
1184
+
1185
+ /** Shape returned by `GET /api/dev/device-resource-metrics`. */
1186
+ export interface DeviceResourceMetricsDevPayload {
1187
+ generatedAtEpochMs: number;
1188
+ status: DeviceBridgeStatus;
1189
+ latest: DeviceGenerationMetrics | null;
1190
+ recentGenerations: DeviceGenerationMetrics[];
1191
+ }
1192
+
1193
+ /**
1194
+ * Build the JSON body for `GET /api/dev/device-resource-metrics` — the Mobile
1195
+ * Resource Workbench reads this to harvest per-generation prefill/decode tok/s
1196
+ * (already differenced by the bridge) without driving the device WebView.
1197
+ */
1198
+ export function buildDeviceResourceMetricsDevPayload(
1199
+ bridge: DeviceBridge = deviceBridge,
1200
+ limit = 50,
1201
+ ): DeviceResourceMetricsDevPayload {
1202
+ return {
1203
+ generatedAtEpochMs: Date.now(),
1204
+ status: bridge.status(),
1205
+ latest: bridge.latestGenerationMetrics(),
1206
+ recentGenerations: bridge.recentGenerationMetrics(limit),
1207
+ };
1208
+ }
1209
+
1210
+ export function registerDeviceBridgeLoader(
1211
+ runtime: AgentRuntime & {
1212
+ registerService?: (name: string, impl: unknown) => unknown;
1213
+ },
1214
+ ): void {
1215
+ if (typeof runtime.registerService !== "function") return;
1216
+ const loader: LocalInferenceLoader = {
1217
+ async loadModel(args: LocalInferenceLoadArgs) {
1218
+ await deviceBridge.loadModel(args);
1219
+ },
1220
+ async unloadModel() {
1221
+ await deviceBridge.unloadModel();
1222
+ },
1223
+ currentModelPath() {
1224
+ return deviceBridge.currentModelPath();
1225
+ },
1226
+ async generate(args) {
1227
+ return deviceBridge.generate(args);
1228
+ },
1229
+ async embed(args) {
1230
+ return deviceBridge.embed(args);
1231
+ },
1232
+ };
1233
+ runtime.registerService("localInferenceLoader", loader);
1234
+ logger.info(
1235
+ "[device-bridge] Registered device-bridge loader for remote on-device inference",
1236
+ );
1237
+ }