@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,1000 @@
1
+ /**
2
+ * Memory Arbiter — single in-process owner of every model handle (text,
3
+ * embedding, vision-language, ASR, TTS, image generation) for the local
4
+ * inference stack. WS1 deliverable.
5
+ *
6
+ * Why this exists
7
+ * ---------------
8
+ * The current code has every plugin loading independently:
9
+ *
10
+ * - `plugin-local-inference` owns the text + voice GGUFs through
11
+ * `LocalInferenceEngine` + `SharedResourceRegistry`.
12
+ * - `plugin-vision` loads its own TF.js / face-api models with no
13
+ * shared budget.
14
+ * - `plugin-aosp-local-inference` runs the bun:ffi llama.cpp binding
15
+ * in its own world, also with no shared budget.
16
+ *
17
+ * On a 6 GB iPhone or an 8 GB low-tier Android, that means loading a
18
+ * vision model on top of a text model gets the app jetsam'd / lmkd-killed
19
+ * before the planner even runs.
20
+ *
21
+ * The arbiter is the single seam every consumer goes through to acquire
22
+ * a model. It owns the eviction policy across modalities (the existing
23
+ * `ResidentModelRole` priority table + memory-pressure signals from the
24
+ * platform), it owns the queue for capability swaps (a vision-describe
25
+ * arriving while the text model is generating waits its turn rather than
26
+ * triggering a parallel load that OOMs), and it owns the
27
+ * `vision-embedding-cache` so repeat frames don't re-pay the projector.
28
+ *
29
+ * What this module does NOT do
30
+ * ----------------------------
31
+ * - It does not implement any model loader. Loaders are passed in via
32
+ * `registerCapability(...)` by the plugins that own the binding
33
+ * (plugin-local-inference for text/embedding, plugin-vision for
34
+ * vision-describe, plugin-image-gen for diffusion, etc.).
35
+ * - It does not download models, probe hardware, or render UI. Those
36
+ * are the existing `Downloader`, `probeHardware`, and Settings UI
37
+ * concerns.
38
+ * - It does not run on a worker thread. One process, one event loop —
39
+ * the arbiter coordinates async work via promises only.
40
+ *
41
+ * Consumer contract
42
+ * -----------------
43
+ * Capability handlers register themselves at boot:
44
+ *
45
+ * ```ts
46
+ * arbiter.registerCapability({
47
+ * capability: "vision-describe",
48
+ * residentRole: "vision",
49
+ * load: async (modelKey) => loadQwen3VL(modelKey),
50
+ * unload: async (handle) => handle.dispose(),
51
+ * run: async (handle, req) => handle.describe(req.imageBytes),
52
+ * });
53
+ * ```
54
+ *
55
+ * Then anyone can call:
56
+ *
57
+ * ```ts
58
+ * const result = await arbiter.requestVisionDescribe({
59
+ * modelKey: "qwen3-vl-4b",
60
+ * imageBytes: pixels,
61
+ * });
62
+ * ```
63
+ *
64
+ * The arbiter handles:
65
+ * 1. Acquiring (or reusing) the handle for `qwen3-vl-4b`.
66
+ * 2. If a different capability holds the active model and we need to
67
+ * swap, evicting it first.
68
+ * 3. Running the request.
69
+ * 4. Releasing the handle (refcounted; the handle stays loaded until
70
+ * pressure or idle eviction reclaims it).
71
+ *
72
+ * Telemetry
73
+ * ---------
74
+ * The arbiter emits typed events:
75
+ * - `model_load` — a handle came online (capability, modelKey, ms)
76
+ * - `model_unload` — a handle went offline (capability, modelKey, reason)
77
+ * - `memory_pressure` — pressure level changed (level, source, freeMb?)
78
+ * - `eviction` — a role was evicted (capability, modelKey, reason)
79
+ * - `capability_run` — a request completed (capability, modelKey, ms)
80
+ *
81
+ * The runtime observability layer subscribes via `onEvent(...)`.
82
+ */
83
+
84
+ import type {
85
+ MemoryPressureEvent,
86
+ MemoryPressureLevel,
87
+ MemoryPressureSource,
88
+ } from "./memory-pressure";
89
+ import {
90
+ VisionEmbeddingCache,
91
+ type VisionEmbeddingEntry,
92
+ } from "./vision-embedding-cache";
93
+ import {
94
+ createEvictableModelRole,
95
+ type EvictableModelRole,
96
+ RESIDENT_ROLE_PRIORITY,
97
+ type ResidentModelRole,
98
+ type SharedResourceRegistry,
99
+ } from "./voice/shared-resources";
100
+
101
+ /**
102
+ * Capability identifiers the arbiter routes between. One per consumer
103
+ * surface — keep this list short; new capabilities should be added
104
+ * deliberately, not on a whim.
105
+ */
106
+ export type ArbiterCapability =
107
+ | "text"
108
+ | "embedding"
109
+ | "vision-describe"
110
+ | "image-gen"
111
+ | "transcribe"
112
+ | "speak";
113
+
114
+ /**
115
+ * Map a capability to the resident-role bucket the existing
116
+ * `SharedResourceRegistry` already tracks. Adding a new capability MUST
117
+ * extend this map so the eviction priority is well-defined.
118
+ */
119
+ const CAPABILITY_ROLE: Readonly<Record<ArbiterCapability, ResidentModelRole>> =
120
+ {
121
+ text: "text-target",
122
+ embedding: "embedding",
123
+ "vision-describe": "vision",
124
+ // Image-gen has no slot in `ResidentModelRole` today. We park it on
125
+ // `vision` priority so it co-evicts with the VL model — both are
126
+ // GPU-heavy weights with similar lifecycles.
127
+ "image-gen": "vision",
128
+ transcribe: "asr",
129
+ speak: "tts",
130
+ };
131
+
132
+ /** The opaque handle returned by `acquire`. Callers MUST `release` it. */
133
+ export interface ArbiterHandle<TBackend = unknown> {
134
+ readonly capability: ArbiterCapability;
135
+ readonly modelKey: string;
136
+ readonly backend: TBackend;
137
+ /**
138
+ * Increment the refcount so the handle is shared. Returns the same
139
+ * underlying handle. Useful when one consumer hands the handle to
140
+ * another mid-flight.
141
+ */
142
+ retain(): void;
143
+ /** Decrement the refcount. When it hits zero the role becomes evictable. */
144
+ release(): Promise<void>;
145
+ }
146
+
147
+ /**
148
+ * What a capability handler tells the arbiter about itself. The arbiter
149
+ * uses these to load on demand, run requests, and unload under pressure.
150
+ */
151
+ export interface CapabilityRegistration<TBackend, TRequest, TResult> {
152
+ capability: ArbiterCapability;
153
+ /**
154
+ * Optional override for the resident-role priority. Defaults to the
155
+ * `CAPABILITY_ROLE` map; pass when a specific binding has different
156
+ * eviction semantics than the default for its capability.
157
+ */
158
+ residentRole?: ResidentModelRole;
159
+ /**
160
+ * Best-effort estimate of bytes the model occupies in RAM/VRAM once
161
+ * loaded. Used by telemetry only — eviction picks by *priority*, not by
162
+ * size, so a wrong estimate doesn't change behaviour. 0 when unknown.
163
+ */
164
+ estimatedMb?: number;
165
+ /** Load the backend for a given model key. Called at most once per (capability, modelKey). */
166
+ load: (modelKey: string) => Promise<TBackend>;
167
+ /** Tear the backend down. The arbiter stops referencing it after this resolves. */
168
+ unload: (backend: TBackend) => Promise<void>;
169
+ /** Run one request through the backend. */
170
+ run: (backend: TBackend, request: TRequest) => Promise<TResult>;
171
+ }
172
+
173
+ interface ResidentEntry {
174
+ capability: ArbiterCapability;
175
+ modelKey: string;
176
+ backend: unknown;
177
+ residentRole: ResidentModelRole;
178
+ estimatedMb: number;
179
+ refCount: number;
180
+ loadedAtMs: number;
181
+ /**
182
+ * Wall-clock of the most recent `acquire`. Drives the fit-to-budget LRU
183
+ * eviction path (`evictToFit`): when a new load would exceed the usable
184
+ * RAM budget, the least-recently-used evictable entries are dropped first.
185
+ */
186
+ lastUsedAt: number;
187
+ roleId: string;
188
+ }
189
+
190
+ /** Telemetry event the runtime observability layer can subscribe to. */
191
+ export type ArbiterEvent =
192
+ | {
193
+ type: "model_load";
194
+ capability: ArbiterCapability;
195
+ modelKey: string;
196
+ loadMs: number;
197
+ atMs: number;
198
+ }
199
+ | {
200
+ type: "model_unload";
201
+ capability: ArbiterCapability;
202
+ modelKey: string;
203
+ reason: "release" | "swap" | "pressure" | "shutdown" | "fit";
204
+ atMs: number;
205
+ }
206
+ | {
207
+ type: "memory_pressure";
208
+ level: MemoryPressureLevel;
209
+ source: string;
210
+ freeMb?: number;
211
+ atMs: number;
212
+ }
213
+ | {
214
+ type: "eviction";
215
+ capability: ArbiterCapability;
216
+ modelKey: string;
217
+ reason: "pressure" | "swap" | "fit";
218
+ estimatedMb: number;
219
+ atMs: number;
220
+ }
221
+ | {
222
+ type: "capability_run";
223
+ capability: ArbiterCapability;
224
+ modelKey: string;
225
+ runMs: number;
226
+ atMs: number;
227
+ };
228
+
229
+ export type ArbiterEventListener = (event: ArbiterEvent) => void;
230
+
231
+ interface QueueEntry<TRequest, TResult> {
232
+ capability: ArbiterCapability;
233
+ modelKey: string;
234
+ request: TRequest;
235
+ resolve: (value: TResult) => void;
236
+ reject: (err: unknown) => void;
237
+ }
238
+
239
+ export interface MemoryArbiterOptions {
240
+ registry: SharedResourceRegistry;
241
+ pressureSource?: MemoryPressureSource;
242
+ visionCache?: VisionEmbeddingCache;
243
+ logger?: {
244
+ info?: (m: string) => void;
245
+ warn?: (m: string) => void;
246
+ debug?: (m: string) => void;
247
+ };
248
+ now?: () => number;
249
+ /**
250
+ * Usable RAM budget (MB) for the proactive fit-to-budget LRU eviction
251
+ * path. Before loading a model whose `estimatedMb` would push the sum of
252
+ * resident footprints past this budget, the arbiter evicts the
253
+ * least-recently-used evictable entries (refcount 0, never the text
254
+ * target) until it fits. Return `null` to disable the fit path entirely —
255
+ * the default, since an arbiter with no host-RAM knowledge must not guess.
256
+ * Production wiring passes `os.totalmem()/MB - ramHeadroomReserveMb()`.
257
+ */
258
+ budgetMb?: () => number | null;
259
+ }
260
+
261
+ /**
262
+ * The arbiter. One instance per process; the plugin owns the singleton
263
+ * (see `index.ts`), and any consumer calls `getMemoryArbiter()` rather
264
+ * than newing one up.
265
+ */
266
+ export class MemoryArbiter {
267
+ private readonly registry: SharedResourceRegistry;
268
+ private readonly pressureSource: MemoryPressureSource | null;
269
+ private readonly visionCache: VisionEmbeddingCache;
270
+ private readonly log?: MemoryArbiterOptions["logger"];
271
+ private readonly now: () => number;
272
+ private readonly budgetMb: () => number | null;
273
+
274
+ private readonly capabilities = new Map<
275
+ ArbiterCapability,
276
+ CapabilityRegistration<unknown, unknown, unknown>
277
+ >();
278
+ private readonly resident = new Map<string, ResidentEntry>();
279
+
280
+ private readonly listeners = new Set<ArbiterEventListener>();
281
+ private pressureUnsubscribe: (() => void) | null = null;
282
+ private currentPressure: MemoryPressureLevel = "nominal";
283
+
284
+ /**
285
+ * One serialized in-flight load per (capability, modelKey) so concurrent
286
+ * `requestX` calls share a single load promise instead of triggering
287
+ * duplicate weights into RAM.
288
+ */
289
+ private readonly inFlightLoads = new Map<string, Promise<ResidentEntry>>();
290
+
291
+ /**
292
+ * Per-capability run queue. The arbiter does NOT serialize across
293
+ * capabilities; what it serializes is the *swap*: when a request needs
294
+ * to evict another resident role first, the ongoing run on that role is
295
+ * allowed to finish, then the swap proceeds. Concurrent runs against the
296
+ * same loaded handle pass through directly.
297
+ */
298
+ private readonly queues = new Map<
299
+ ArbiterCapability,
300
+ QueueEntry<unknown, unknown>[]
301
+ >();
302
+ private readonly running = new Map<ArbiterCapability, boolean>();
303
+
304
+ private shuttingDown = false;
305
+
306
+ constructor(opts: MemoryArbiterOptions) {
307
+ this.registry = opts.registry;
308
+ this.pressureSource = opts.pressureSource ?? null;
309
+ this.visionCache = opts.visionCache ?? new VisionEmbeddingCache();
310
+ this.log = opts.logger;
311
+ this.now = opts.now ?? (() => Date.now());
312
+ this.budgetMb = opts.budgetMb ?? (() => null);
313
+ }
314
+
315
+ /** Begin observing memory pressure. Idempotent. */
316
+ start(): void {
317
+ if (this.shuttingDown) {
318
+ throw new Error("[memory-arbiter] cannot start after shutdown");
319
+ }
320
+ if (this.pressureUnsubscribe) return;
321
+ const source = this.pressureSource;
322
+ if (!source) return;
323
+ source.start();
324
+ this.pressureUnsubscribe = source.subscribe((event) => {
325
+ void this.handlePressure(event).catch((err) => {
326
+ this.log?.warn?.(
327
+ `[memory-arbiter] pressure handler failed: ${err instanceof Error ? err.message : String(err)}`,
328
+ );
329
+ });
330
+ });
331
+ }
332
+
333
+ /** Stop observing pressure. Does NOT evict resident handles. */
334
+ stop(): void {
335
+ if (this.pressureUnsubscribe) {
336
+ this.pressureUnsubscribe();
337
+ this.pressureUnsubscribe = null;
338
+ }
339
+ this.pressureSource?.stop();
340
+ }
341
+
342
+ /** Tear down: stop pressure observation and unload every resident handle. */
343
+ async shutdown(): Promise<void> {
344
+ this.shuttingDown = true;
345
+ this.stop();
346
+ const keys = Array.from(this.resident.keys());
347
+ for (const key of keys) {
348
+ const entry = this.resident.get(key);
349
+ if (!entry) continue;
350
+ await this.evictEntry(entry, "shutdown").catch((err) => {
351
+ this.log?.warn?.(
352
+ `[memory-arbiter] shutdown evict ${key} failed: ${err instanceof Error ? err.message : String(err)}`,
353
+ );
354
+ });
355
+ }
356
+ this.resident.clear();
357
+ this.inFlightLoads.clear();
358
+ }
359
+
360
+ /** Subscribe to telemetry events. Returns the unsubscribe fn. */
361
+ onEvent(listener: ArbiterEventListener): () => void {
362
+ this.listeners.add(listener);
363
+ return () => {
364
+ this.listeners.delete(listener);
365
+ };
366
+ }
367
+
368
+ private emit(event: ArbiterEvent): void {
369
+ for (const listener of this.listeners) {
370
+ try {
371
+ listener(event);
372
+ } catch {
373
+ this.listeners.delete(listener);
374
+ }
375
+ }
376
+ }
377
+
378
+ /** Register a capability handler. Throws on duplicate registration. */
379
+ registerCapability<TBackend, TRequest, TResult>(
380
+ registration: CapabilityRegistration<TBackend, TRequest, TResult>,
381
+ ): void {
382
+ if (this.capabilities.has(registration.capability)) {
383
+ throw new Error(
384
+ `[memory-arbiter] capability "${registration.capability}" is already registered`,
385
+ );
386
+ }
387
+ this.capabilities.set(
388
+ registration.capability,
389
+ registration as unknown as CapabilityRegistration<
390
+ unknown,
391
+ unknown,
392
+ unknown
393
+ >,
394
+ );
395
+ }
396
+
397
+ /** Whether a capability has been registered. */
398
+ hasCapability(capability: ArbiterCapability): boolean {
399
+ return this.capabilities.has(capability);
400
+ }
401
+
402
+ /** Diagnostic snapshot of all resident handles. */
403
+ residentSnapshot(): ReadonlyArray<{
404
+ capability: ArbiterCapability;
405
+ modelKey: string;
406
+ residentRole: ResidentModelRole;
407
+ estimatedMb: number;
408
+ refCount: number;
409
+ loadedAtMs: number;
410
+ lastUsedAt: number;
411
+ }> {
412
+ return Array.from(this.resident.values()).map((e) => ({
413
+ capability: e.capability,
414
+ modelKey: e.modelKey,
415
+ residentRole: e.residentRole,
416
+ estimatedMb: e.estimatedMb,
417
+ refCount: e.refCount,
418
+ loadedAtMs: e.loadedAtMs,
419
+ lastUsedAt: e.lastUsedAt,
420
+ }));
421
+ }
422
+
423
+ currentPressureLevel(): MemoryPressureLevel {
424
+ return this.currentPressure;
425
+ }
426
+
427
+ /**
428
+ * Acquire a handle for `(capability, modelKey)`. If the model is already
429
+ * resident the refcount is bumped and we return immediately; otherwise we
430
+ * load it (sharing the in-flight promise across concurrent acquirers).
431
+ *
432
+ * Critical pressure causes acquire to throw for non-text capabilities so
433
+ * we don't load on top of a system the OS has already flagged as in
434
+ * trouble. Text always loads — without text the agent is a brick.
435
+ */
436
+ async acquire<TBackend>(
437
+ capability: ArbiterCapability,
438
+ modelKey: string,
439
+ ): Promise<ArbiterHandle<TBackend>> {
440
+ const registration = this.capabilities.get(capability);
441
+ if (!registration) {
442
+ throw new Error(
443
+ `[memory-arbiter] no capability registered for "${capability}"`,
444
+ );
445
+ }
446
+ if (this.shuttingDown) {
447
+ throw new Error(
448
+ `[memory-arbiter] arbiter is shutting down; cannot acquire ${capability}`,
449
+ );
450
+ }
451
+ if (this.currentPressure === "critical" && capability !== "text") {
452
+ throw new Error(
453
+ `[memory-arbiter] memory pressure is critical; refusing to load capability "${capability}". Free RAM and retry.`,
454
+ );
455
+ }
456
+ const entry = await this.loadOrReuse(registration, modelKey);
457
+ entry.refCount++;
458
+ entry.lastUsedAt = this.now();
459
+ return this.handleFor<TBackend>(entry);
460
+ }
461
+
462
+ private handleFor<TBackend>(entry: ResidentEntry): ArbiterHandle<TBackend> {
463
+ const arbiter = this;
464
+ let released = false;
465
+ return {
466
+ capability: entry.capability,
467
+ modelKey: entry.modelKey,
468
+ backend: entry.backend as TBackend,
469
+ retain(): void {
470
+ if (released) {
471
+ throw new Error(
472
+ `[memory-arbiter] cannot retain ${entry.capability}/${entry.modelKey} after release`,
473
+ );
474
+ }
475
+ entry.refCount++;
476
+ },
477
+ async release(): Promise<void> {
478
+ if (released) return;
479
+ released = true;
480
+ entry.refCount = Math.max(0, entry.refCount - 1);
481
+ // We don't unload at refcount=0; the role becomes evictable, and
482
+ // the pressure / idle path is what reclaims it. Keeps warm-paths
483
+ // fast.
484
+ arbiter.log?.debug?.(
485
+ `[memory-arbiter] release ${entry.capability}/${entry.modelKey} refcount=${entry.refCount}`,
486
+ );
487
+ },
488
+ };
489
+ }
490
+
491
+ private residentKey(capability: ArbiterCapability, modelKey: string): string {
492
+ return `${capability}::${modelKey}`;
493
+ }
494
+
495
+ private async loadOrReuse(
496
+ registration: CapabilityRegistration<unknown, unknown, unknown>,
497
+ modelKey: string,
498
+ ): Promise<ResidentEntry> {
499
+ const key = this.residentKey(registration.capability, modelKey);
500
+ const existing = this.resident.get(key);
501
+ if (existing) return existing;
502
+ const inFlight = this.inFlightLoads.get(key);
503
+ if (inFlight) return inFlight;
504
+
505
+ // Before loading, decide whether the new role conflicts with what's
506
+ // currently resident. The conservative policy: if the same
507
+ // `residentRole` is held by a different modelKey, we evict the
508
+ // existing one first (one model per role). Different roles can co-
509
+ // exist; the pressure path is what rebalances them.
510
+ const role =
511
+ registration.residentRole ?? CAPABILITY_ROLE[registration.capability];
512
+ const conflicts = this.findConflictingRole(
513
+ role,
514
+ registration.capability,
515
+ modelKey,
516
+ );
517
+
518
+ const promise = (async (): Promise<ResidentEntry> => {
519
+ for (const conflict of conflicts) {
520
+ if (conflict.refCount > 0) {
521
+ // A different consumer is actively using the conflicting model.
522
+ // Wait for it to drain rather than yanking the rug out — the
523
+ // arbiter does NOT cancel in-flight work for a swap.
524
+ await this.waitForRefcountZero(conflict);
525
+ }
526
+ await this.evictEntry(conflict, "swap");
527
+ }
528
+ // Proactively make room for the incoming weights: evict the
529
+ // least-recently-used evictable models until this one fits the
530
+ // usable RAM budget. No-op when no budget is configured or the
531
+ // incoming footprint is unknown.
532
+ await this.evictToFit(registration.estimatedMb ?? 0);
533
+ const startMs = this.now();
534
+ const backend = await registration.load(modelKey);
535
+ const loadedAtMs = this.now();
536
+ const entry: ResidentEntry = {
537
+ capability: registration.capability,
538
+ modelKey,
539
+ backend,
540
+ residentRole: role,
541
+ estimatedMb: registration.estimatedMb ?? 0,
542
+ refCount: 0,
543
+ loadedAtMs,
544
+ lastUsedAt: loadedAtMs,
545
+ roleId: `arbiter:${registration.capability}:${modelKey}`,
546
+ };
547
+ const evictable = this.makeEvictable(entry, registration);
548
+ this.registry.acquire(evictable);
549
+ this.resident.set(key, entry);
550
+ this.emit({
551
+ type: "model_load",
552
+ capability: registration.capability,
553
+ modelKey,
554
+ loadMs: loadedAtMs - startMs,
555
+ atMs: loadedAtMs,
556
+ });
557
+ this.log?.info?.(
558
+ `[memory-arbiter] loaded ${registration.capability}/${modelKey} in ${loadedAtMs - startMs}ms`,
559
+ );
560
+ return entry;
561
+ })().finally(() => {
562
+ this.inFlightLoads.delete(key);
563
+ });
564
+ this.inFlightLoads.set(key, promise);
565
+ return promise;
566
+ }
567
+
568
+ private findConflictingRole(
569
+ role: ResidentModelRole,
570
+ capability: ArbiterCapability,
571
+ modelKey: string,
572
+ ): ResidentEntry[] {
573
+ const out: ResidentEntry[] = [];
574
+ for (const entry of this.resident.values()) {
575
+ if (entry.residentRole !== role) continue;
576
+ if (entry.capability === capability && entry.modelKey === modelKey)
577
+ continue;
578
+ out.push(entry);
579
+ }
580
+ return out;
581
+ }
582
+
583
+ private async waitForRefcountZero(entry: ResidentEntry): Promise<void> {
584
+ // Cooperative wait — the arbiter doesn't have a per-entry condvar, so
585
+ // we poll on a microtask cadence. Refcount drops happen synchronously
586
+ // inside `release()`, so this terminates within at most one extra
587
+ // run-to-completion cycle when the holder has already released.
588
+ const start = this.now();
589
+ while (entry.refCount > 0) {
590
+ await new Promise<void>((resolve) => setTimeout(resolve, 0));
591
+ if (this.now() - start > 10_000) {
592
+ throw new Error(
593
+ `[memory-arbiter] timeout waiting for ${entry.capability}/${entry.modelKey} to drain (refcount=${entry.refCount}); refusing to swap mid-flight`,
594
+ );
595
+ }
596
+ }
597
+ }
598
+
599
+ private makeEvictable(
600
+ entry: ResidentEntry,
601
+ registration: CapabilityRegistration<unknown, unknown, unknown>,
602
+ ): EvictableModelRole {
603
+ return createEvictableModelRole({
604
+ id: entry.roleId,
605
+ role: entry.residentRole,
606
+ evictionPriority: RESIDENT_ROLE_PRIORITY[entry.residentRole],
607
+ estimatedMb: entry.estimatedMb,
608
+ isResident: () =>
609
+ this.resident.has(this.residentKey(entry.capability, entry.modelKey)),
610
+ evict: async () => {
611
+ // The shared registry's monitor calls this. We must be careful not
612
+ // to evict a handle that's actively in use; refcount > 0 means
613
+ // "someone is holding it" and we leave it alone — the registry
614
+ // will pick the next-priority role.
615
+ if (entry.refCount > 0) return;
616
+ await this.evictEntry(entry, "pressure", registration);
617
+ },
618
+ });
619
+ }
620
+
621
+ private async evictEntry(
622
+ entry: ResidentEntry,
623
+ reason: "release" | "swap" | "pressure" | "shutdown" | "fit",
624
+ registration?: CapabilityRegistration<unknown, unknown, unknown>,
625
+ ): Promise<void> {
626
+ const key = this.residentKey(entry.capability, entry.modelKey);
627
+ if (!this.resident.has(key)) return;
628
+ this.resident.delete(key);
629
+ try {
630
+ await this.registry.release(entry.roleId);
631
+ } catch (err) {
632
+ this.log?.warn?.(
633
+ `[memory-arbiter] registry release failed for ${entry.roleId}: ${err instanceof Error ? err.message : String(err)}`,
634
+ );
635
+ }
636
+ const reg = registration ?? this.capabilities.get(entry.capability);
637
+ try {
638
+ await reg?.unload(entry.backend);
639
+ } catch (err) {
640
+ this.log?.warn?.(
641
+ `[memory-arbiter] unload failed for ${entry.capability}/${entry.modelKey}: ${err instanceof Error ? err.message : String(err)}`,
642
+ );
643
+ }
644
+ this.emit({
645
+ type: "model_unload",
646
+ capability: entry.capability,
647
+ modelKey: entry.modelKey,
648
+ reason,
649
+ atMs: this.now(),
650
+ });
651
+ if (reason === "pressure" || reason === "swap" || reason === "fit") {
652
+ this.emit({
653
+ type: "eviction",
654
+ capability: entry.capability,
655
+ modelKey: entry.modelKey,
656
+ reason,
657
+ estimatedMb: entry.estimatedMb,
658
+ atMs: this.now(),
659
+ });
660
+ }
661
+ this.log?.info?.(
662
+ `[memory-arbiter] evicted ${entry.capability}/${entry.modelKey} reason=${reason}`,
663
+ );
664
+ }
665
+
666
+ /**
667
+ * Proactive fit-to-budget eviction. Before loading a model needing
668
+ * `incomingMb`, evict the least-recently-used evictable residents until
669
+ * the projected resident footprint fits `budgetMb()`.
670
+ *
671
+ * Policy:
672
+ * - Disabled when no budget is configured (`budgetMb()` → null/≤0) or
673
+ * the incoming footprint is unknown (`incomingMb` ≤ 0): we never guess.
674
+ * - Pins: the text target is never evicted (losing it bricks the agent),
675
+ * and any entry with a live refcount is left alone (in active use).
676
+ * - Ordering is pure LRU (oldest `lastUsedAt` first); ties break toward
677
+ * the lower-priority role, then the older load.
678
+ * - Best-effort: if the pins can't be freed enough, the load still
679
+ * proceeds — the OS-pressure path and the `active-model` admission gate
680
+ * are the backstops; this path only avoids predictable overcommit.
681
+ */
682
+ private async evictToFit(incomingMb: number): Promise<void> {
683
+ const budget = this.budgetMb();
684
+ if (budget === null || budget <= 0) return;
685
+ if (incomingMb <= 0) return;
686
+
687
+ const residentMb = (): number => {
688
+ let sum = 0;
689
+ for (const e of this.resident.values()) sum += e.estimatedMb;
690
+ return sum;
691
+ };
692
+
693
+ while (residentMb() + incomingMb > budget) {
694
+ const candidate = this.lruEvictionCandidate();
695
+ if (!candidate) break;
696
+ await this.evictEntry(candidate, "fit");
697
+ }
698
+ }
699
+
700
+ /**
701
+ * The next entry the fit path should drop: least-recently-used among
702
+ * evictable residents (refcount 0, not the text target). Returns null when
703
+ * nothing is evictable.
704
+ */
705
+ private lruEvictionCandidate(): ResidentEntry | null {
706
+ let best: ResidentEntry | null = null;
707
+ for (const entry of this.resident.values()) {
708
+ if (entry.refCount > 0) continue;
709
+ if (entry.residentRole === "text-target") continue;
710
+ if (best === null) {
711
+ best = entry;
712
+ continue;
713
+ }
714
+ if (entry.lastUsedAt !== best.lastUsedAt) {
715
+ if (entry.lastUsedAt < best.lastUsedAt) best = entry;
716
+ continue;
717
+ }
718
+ const pa = RESIDENT_ROLE_PRIORITY[entry.residentRole];
719
+ const pb = RESIDENT_ROLE_PRIORITY[best.residentRole];
720
+ if (pa !== pb) {
721
+ if (pa < pb) best = entry;
722
+ continue;
723
+ }
724
+ if (entry.loadedAtMs < best.loadedAtMs) best = entry;
725
+ }
726
+ return best;
727
+ }
728
+
729
+ private async handlePressure(event: MemoryPressureEvent): Promise<void> {
730
+ this.currentPressure = event.level;
731
+ this.emit({
732
+ type: "memory_pressure",
733
+ level: event.level,
734
+ source: event.source,
735
+ ...(event.freeMb !== undefined ? { freeMb: event.freeMb } : {}),
736
+ atMs: event.atMs,
737
+ });
738
+ if (event.level === "nominal") {
739
+ return;
740
+ }
741
+ // Cheap reclaim first: drop any expired vision-embedding cache entries.
742
+ const purged = this.visionCache.purgeExpired(this.now());
743
+ if (purged > 0) {
744
+ this.log?.debug?.(
745
+ `[memory-arbiter] purged ${purged} expired vision-embedding entries on pressure`,
746
+ );
747
+ }
748
+ // Then ask the SharedResourceRegistry for the cheapest evictable role.
749
+ // `low`: evict one role per pressure tick (gentle).
750
+ // `critical`: evict every non-text role we own.
751
+ if (event.level === "low") {
752
+ await this.registry.evictLowestPriorityRole();
753
+ return;
754
+ }
755
+ // Critical: walk our resident handles in priority order and evict
756
+ // everything that's not the text-target. We do not evict text — losing
757
+ // it bricks the agent and won't actually rescue an OOM that's already
758
+ // past the critical line.
759
+ const entries = Array.from(this.resident.values())
760
+ .filter((e) => e.residentRole !== "text-target")
761
+ .sort(
762
+ (a, b) =>
763
+ RESIDENT_ROLE_PRIORITY[a.residentRole] -
764
+ RESIDENT_ROLE_PRIORITY[b.residentRole],
765
+ );
766
+ for (const entry of entries) {
767
+ if (entry.refCount > 0) continue;
768
+ await this.evictEntry(entry, "pressure");
769
+ }
770
+ }
771
+
772
+ // ---------------------------------------------------------------------
773
+ // Capability-specific request fns. Thin wrappers around the queue —
774
+ // each one calls `enqueueRequest` with its capability tag and the
775
+ // caller's request payload. Plugins call these instead of `acquire`
776
+ // directly when they don't need to keep a long-lived handle.
777
+ // ---------------------------------------------------------------------
778
+
779
+ requestText<TRequest, TResult>(req: {
780
+ modelKey: string;
781
+ payload: TRequest;
782
+ }): Promise<TResult> {
783
+ return this.enqueueRequest("text", req.modelKey, req.payload);
784
+ }
785
+
786
+ requestEmbedding<TRequest, TResult>(req: {
787
+ modelKey: string;
788
+ payload: TRequest;
789
+ }): Promise<TResult> {
790
+ return this.enqueueRequest("embedding", req.modelKey, req.payload);
791
+ }
792
+
793
+ requestVisionDescribe<TRequest, TResult>(req: {
794
+ modelKey: string;
795
+ payload: TRequest;
796
+ }): Promise<TResult> {
797
+ return this.enqueueRequest("vision-describe", req.modelKey, req.payload);
798
+ }
799
+
800
+ requestImageGen<TRequest, TResult>(req: {
801
+ modelKey: string;
802
+ payload: TRequest;
803
+ }): Promise<TResult> {
804
+ return this.enqueueRequest("image-gen", req.modelKey, req.payload);
805
+ }
806
+
807
+ requestTranscribe<TRequest, TResult>(req: {
808
+ modelKey: string;
809
+ payload: TRequest;
810
+ }): Promise<TResult> {
811
+ return this.enqueueRequest("transcribe", req.modelKey, req.payload);
812
+ }
813
+
814
+ requestSpeak<TRequest, TResult>(req: {
815
+ modelKey: string;
816
+ payload: TRequest;
817
+ }): Promise<TResult> {
818
+ return this.enqueueRequest("speak", req.modelKey, req.payload);
819
+ }
820
+
821
+ /**
822
+ * Alias for {@link requestSpeak} that matches the `requestTextToSpeech`
823
+ * naming used by `provider.ts`'s `ModelType.TEXT_TO_SPEECH` handler and
824
+ * by external WS5 callers that don't import the `ArbiterCapability` type.
825
+ * Resolves through the same `"speak"` capability + queue — the two names
826
+ * are interchangeable. Mirrors the `requestVisionDescribe` ergonomic.
827
+ */
828
+ requestTextToSpeech<TRequest, TResult>(req: {
829
+ modelKey: string;
830
+ payload: TRequest;
831
+ }): Promise<TResult> {
832
+ return this.enqueueRequest("speak", req.modelKey, req.payload);
833
+ }
834
+
835
+ private async enqueueRequest<TRequest, TResult>(
836
+ capability: ArbiterCapability,
837
+ modelKey: string,
838
+ payload: TRequest,
839
+ ): Promise<TResult> {
840
+ const reg = this.capabilities.get(capability);
841
+ if (!reg) {
842
+ throw new Error(
843
+ `[memory-arbiter] no capability registered for "${capability}"`,
844
+ );
845
+ }
846
+ return new Promise<TResult>((resolve, reject) => {
847
+ const queue = this.queues.get(capability) ?? [];
848
+ queue.push({
849
+ capability,
850
+ modelKey,
851
+ request: payload,
852
+ resolve: resolve as (value: unknown) => void,
853
+ reject,
854
+ });
855
+ this.queues.set(capability, queue);
856
+ void this.drainQueue(capability).catch((err) => {
857
+ this.log?.warn?.(
858
+ `[memory-arbiter] queue drain failed for ${capability}: ${err instanceof Error ? err.message : String(err)}`,
859
+ );
860
+ });
861
+ });
862
+ }
863
+
864
+ private async drainQueue(capability: ArbiterCapability): Promise<void> {
865
+ if (this.running.get(capability)) return;
866
+ this.running.set(capability, true);
867
+ try {
868
+ const queue = this.queues.get(capability);
869
+ while (queue && queue.length > 0) {
870
+ const next = queue.shift();
871
+ if (!next) break;
872
+ const reg = this.capabilities.get(capability);
873
+ if (!reg) {
874
+ next.reject(
875
+ new Error(
876
+ `[memory-arbiter] capability "${capability}" was deregistered mid-queue`,
877
+ ),
878
+ );
879
+ continue;
880
+ }
881
+ try {
882
+ const handle = await this.acquire(capability, next.modelKey);
883
+ const startMs = this.now();
884
+ try {
885
+ const result = await reg.run(handle.backend, next.request);
886
+ const runMs = this.now() - startMs;
887
+ this.emit({
888
+ type: "capability_run",
889
+ capability,
890
+ modelKey: next.modelKey,
891
+ runMs,
892
+ atMs: this.now(),
893
+ });
894
+ next.resolve(result);
895
+ } finally {
896
+ await handle.release();
897
+ }
898
+ } catch (err) {
899
+ next.reject(err);
900
+ }
901
+ }
902
+ } finally {
903
+ this.running.set(capability, false);
904
+ }
905
+ }
906
+
907
+ // ---------------------------------------------------------------------
908
+ // Vision-embedding cache passthroughs.
909
+ // ---------------------------------------------------------------------
910
+
911
+ getCachedVisionEmbedding(hash: string): VisionEmbeddingEntry | null {
912
+ return this.visionCache.get(hash);
913
+ }
914
+
915
+ setCachedVisionEmbedding(
916
+ hash: string,
917
+ entry: { tokens: Float32Array; tokenCount: number; hiddenSize: number },
918
+ ttlMs?: number,
919
+ ): void {
920
+ this.visionCache.set(hash, entry, ttlMs);
921
+ }
922
+
923
+ // ---------------------------------------------------------------------
924
+ // ASR transcript cache passthroughs.
925
+ //
926
+ // Re-transcribing the same audio is a frequent test/dev pattern (the
927
+ // dashboard's "play the WAV back" view, the streaming-audio handler
928
+ // flushing duplicate frames at segment boundaries). The cache is
929
+ // content-hashed by `services/asr/hash.ts`, with a hard cap so a long
930
+ // session can't memory-leak. Default TTL is 1 hour; entries are
931
+ // evicted on touch when stale.
932
+ // ---------------------------------------------------------------------
933
+
934
+ private readonly asrTranscriptCache = new Map<
935
+ string,
936
+ { text: string; expiresAt: number }
937
+ >();
938
+ private static readonly ASR_TRANSCRIPT_CACHE_MAX = 256;
939
+ private static readonly ASR_TRANSCRIPT_DEFAULT_TTL_MS = 60 * 60 * 1000;
940
+
941
+ getCachedAsrTranscript(
942
+ hash: string,
943
+ ): { text: string; live?: boolean } | null {
944
+ const entry = this.asrTranscriptCache.get(hash);
945
+ if (!entry) return null;
946
+ if (entry.expiresAt <= this.now()) {
947
+ this.asrTranscriptCache.delete(hash);
948
+ return null;
949
+ }
950
+ // Touch for LRU-ish ordering on Map iteration.
951
+ this.asrTranscriptCache.delete(hash);
952
+ this.asrTranscriptCache.set(hash, entry);
953
+ return { text: entry.text, live: true };
954
+ }
955
+
956
+ setCachedAsrTranscript(
957
+ hash: string,
958
+ entry: { text: string },
959
+ ttlMs?: number,
960
+ ): void {
961
+ const ttl = ttlMs ?? MemoryArbiter.ASR_TRANSCRIPT_DEFAULT_TTL_MS;
962
+ this.asrTranscriptCache.set(hash, {
963
+ text: entry.text,
964
+ expiresAt: this.now() + ttl,
965
+ });
966
+ while (
967
+ this.asrTranscriptCache.size > MemoryArbiter.ASR_TRANSCRIPT_CACHE_MAX
968
+ ) {
969
+ const oldest = this.asrTranscriptCache.keys().next().value;
970
+ if (oldest === undefined) break;
971
+ this.asrTranscriptCache.delete(oldest);
972
+ }
973
+ }
974
+ }
975
+
976
+ /**
977
+ * Process-wide singleton accessor. The plugin's `index.ts` calls
978
+ * `setMemoryArbiter` once at boot; consumers call `getMemoryArbiter`.
979
+ * Throws when no arbiter has been configured — the runtime is expected
980
+ * to set one before any consumer touches it.
981
+ */
982
+ let globalArbiter: MemoryArbiter | null = null;
983
+
984
+ export function setMemoryArbiter(arbiter: MemoryArbiter | null): void {
985
+ globalArbiter = arbiter;
986
+ }
987
+
988
+ export function getMemoryArbiter(): MemoryArbiter {
989
+ if (!globalArbiter) {
990
+ throw new Error(
991
+ "[memory-arbiter] no arbiter configured; call setMemoryArbiter() at plugin init",
992
+ );
993
+ }
994
+ return globalArbiter;
995
+ }
996
+
997
+ /** Test/diagnostic — returns the singleton without throwing. */
998
+ export function tryGetMemoryArbiter(): MemoryArbiter | null {
999
+ return globalArbiter;
1000
+ }