@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,991 @@
1
+ /**
2
+ * Memory Arbiter — single in-process owner of every model handle (text,
3
+ * embedding, vision-language, ASR, TTS, image generation) for the local
4
+ * inference stack. WS1 deliverable.
5
+ *
6
+ * Why this exists
7
+ * ---------------
8
+ * The current code has every plugin loading independently:
9
+ *
10
+ * - `plugin-local-inference` owns the text + voice GGUFs through
11
+ * `LocalInferenceEngine` + `SharedResourceRegistry`.
12
+ * - `plugin-vision` loads its own TF.js / face-api models with no
13
+ * shared budget.
14
+ * - `plugin-aosp-local-inference` runs the bun:ffi llama.cpp binding
15
+ * in its own world, also with no shared budget.
16
+ *
17
+ * On a 6 GB iPhone or an 8 GB low-tier Android, that means loading a
18
+ * vision model on top of a text model gets the app jetsam'd / lmkd-killed
19
+ * before the planner even runs.
20
+ *
21
+ * The arbiter is the single seam every consumer goes through to acquire
22
+ * a model. It owns the eviction policy across modalities (the existing
23
+ * `ResidentModelRole` priority table + memory-pressure signals from the
24
+ * platform), it owns the queue for capability swaps (a vision-describe
25
+ * arriving while the text model is generating waits its turn rather than
26
+ * triggering a parallel load that OOMs), and it owns the
27
+ * `vision-embedding-cache` so repeat frames don't re-pay the projector.
28
+ *
29
+ * What this module does NOT do
30
+ * ----------------------------
31
+ * - It does not implement any model loader. Loaders are passed in via
32
+ * `registerCapability(...)` by the plugins that own the binding
33
+ * (plugin-local-inference for text/embedding, plugin-vision for
34
+ * vision-describe, plugin-image-gen for diffusion, etc.).
35
+ * - It does not download models, probe hardware, or render UI. Those
36
+ * are the existing `Downloader`, `probeHardware`, and Settings UI
37
+ * concerns.
38
+ * - It does not run on a worker thread. One process, one event loop —
39
+ * the arbiter coordinates async work via promises only.
40
+ *
41
+ * Consumer contract
42
+ * -----------------
43
+ * Capability handlers register themselves at boot:
44
+ *
45
+ * ```ts
46
+ * arbiter.registerCapability({
47
+ * capability: "vision-describe",
48
+ * residentRole: "vision",
49
+ * load: async (modelKey) => loadVisionModel(modelKey),
50
+ * unload: async (handle) => handle.dispose(),
51
+ * run: async (handle, req) => handle.describe(req.imageBytes),
52
+ * });
53
+ * ```
54
+ *
55
+ * Then anyone can call:
56
+ *
57
+ * ```ts
58
+ * const result = await arbiter.requestVisionDescribe({
59
+ * modelKey: "gemma-vl-4b",
60
+ * imageBytes: pixels,
61
+ * });
62
+ * ```
63
+ *
64
+ * The arbiter handles:
65
+ * 1. Acquiring (or reusing) the handle for `gemma-vl-4b`.
66
+ * 2. If a different capability holds the active model and we need to
67
+ * swap, evicting it first.
68
+ * 3. Running the request.
69
+ * 4. Releasing the handle (refcounted; the handle stays loaded until
70
+ * pressure or idle eviction reclaims it).
71
+ *
72
+ * Telemetry
73
+ * ---------
74
+ * The arbiter emits typed events:
75
+ * - `model_load` — a handle came online (capability, modelKey, ms)
76
+ * - `model_unload` — a handle went offline (capability, modelKey, reason)
77
+ * - `memory_pressure` — pressure level changed (level, source, freeMb?)
78
+ * - `eviction` — a role was evicted (capability, modelKey, reason)
79
+ * - `capability_run` — a request completed (capability, modelKey, ms)
80
+ *
81
+ * The runtime observability layer subscribes via `onEvent(...)`.
82
+ */
83
+
84
+ import type {
85
+ MemoryPressureEvent,
86
+ MemoryPressureLevel,
87
+ MemoryPressureSource,
88
+ } from "./memory-pressure";
89
+ import {
90
+ VisionEmbeddingCache,
91
+ type VisionEmbeddingEntry,
92
+ } from "./vision-embedding-cache";
93
+ import {
94
+ createEvictableModelRole,
95
+ type EvictableModelRole,
96
+ RESIDENT_ROLE_PRIORITY,
97
+ type ResidentModelRole,
98
+ type SharedResourceRegistry,
99
+ } from "./voice/shared-resources";
100
+
101
+ /**
102
+ * Capability identifiers the arbiter routes between. One per consumer
103
+ * surface — keep this list short; new capabilities should be added
104
+ * deliberately, not on a whim.
105
+ */
106
+ export type ArbiterCapability =
107
+ | "text"
108
+ | "embedding"
109
+ | "vision-describe"
110
+ | "image-gen"
111
+ | "transcribe";
112
+
113
+ /** Identifies the arbiter as the registry's eviction-decision owner (#8809 AC#2). */
114
+ const MEMORY_ARBITER_EVICTION_OWNER = "memory-arbiter";
115
+
116
+ /**
117
+ * Map a capability to the resident-role bucket the existing
118
+ * `SharedResourceRegistry` already tracks. Adding a new capability MUST
119
+ * extend this map so the eviction priority is well-defined.
120
+ */
121
+ const CAPABILITY_ROLE: Readonly<Record<ArbiterCapability, ResidentModelRole>> =
122
+ {
123
+ text: "text-target",
124
+ embedding: "embedding",
125
+ "vision-describe": "vision",
126
+ // Image-gen has no slot in `ResidentModelRole` today. We park it on
127
+ // `vision` priority so it co-evicts with the VL model — both are
128
+ // GPU-heavy weights with similar lifecycles.
129
+ "image-gen": "vision",
130
+ transcribe: "asr",
131
+ };
132
+
133
+ /** The opaque handle returned by `acquire`. Callers MUST `release` it. */
134
+ export interface ArbiterHandle<TBackend = unknown> {
135
+ readonly capability: ArbiterCapability;
136
+ readonly modelKey: string;
137
+ readonly backend: TBackend;
138
+ /**
139
+ * Increment the refcount so the handle is shared. Returns the same
140
+ * underlying handle. Useful when one consumer hands the handle to
141
+ * another mid-flight.
142
+ */
143
+ retain(): void;
144
+ /** Decrement the refcount. When it hits zero the role becomes evictable. */
145
+ release(): Promise<void>;
146
+ }
147
+
148
+ /**
149
+ * What a capability handler tells the arbiter about itself. The arbiter
150
+ * uses these to load on demand, run requests, and unload under pressure.
151
+ */
152
+ export interface CapabilityRegistration<TBackend, TRequest, TResult> {
153
+ capability: ArbiterCapability;
154
+ /**
155
+ * Optional override for the resident-role priority. Defaults to the
156
+ * `CAPABILITY_ROLE` map; pass when a specific binding has different
157
+ * eviction semantics than the default for its capability.
158
+ */
159
+ residentRole?: ResidentModelRole;
160
+ /**
161
+ * Best-effort estimate of bytes the model occupies in RAM/VRAM once
162
+ * loaded. Used by telemetry only — eviction picks by *priority*, not by
163
+ * size, so a wrong estimate doesn't change behaviour. 0 when unknown.
164
+ */
165
+ estimatedMb?: number;
166
+ /** Load the backend for a given model key. Called at most once per (capability, modelKey). */
167
+ load: (modelKey: string) => Promise<TBackend>;
168
+ /** Tear the backend down. The arbiter stops referencing it after this resolves. */
169
+ unload: (backend: TBackend) => Promise<void>;
170
+ /** Run one request through the backend. */
171
+ run: (backend: TBackend, request: TRequest) => Promise<TResult>;
172
+ }
173
+
174
+ interface ResidentEntry {
175
+ capability: ArbiterCapability;
176
+ modelKey: string;
177
+ backend: unknown;
178
+ residentRole: ResidentModelRole;
179
+ estimatedMb: number;
180
+ refCount: number;
181
+ loadedAtMs: number;
182
+ /**
183
+ * Wall-clock of the most recent `acquire`. Drives the fit-to-budget LRU
184
+ * eviction path (`evictToFit`): when a new load would exceed the usable
185
+ * RAM budget, the least-recently-used evictable entries are dropped first.
186
+ */
187
+ lastUsedAt: number;
188
+ roleId: string;
189
+ }
190
+
191
+ /** Telemetry event the runtime observability layer can subscribe to. */
192
+ export type ArbiterEvent =
193
+ | {
194
+ type: "model_load";
195
+ capability: ArbiterCapability;
196
+ modelKey: string;
197
+ loadMs: number;
198
+ atMs: number;
199
+ }
200
+ | {
201
+ type: "model_unload";
202
+ capability: ArbiterCapability;
203
+ modelKey: string;
204
+ reason: "release" | "swap" | "pressure" | "shutdown" | "fit";
205
+ atMs: number;
206
+ }
207
+ | {
208
+ type: "memory_pressure";
209
+ level: MemoryPressureLevel;
210
+ source: string;
211
+ freeMb?: number;
212
+ atMs: number;
213
+ }
214
+ | {
215
+ type: "eviction";
216
+ capability: ArbiterCapability;
217
+ modelKey: string;
218
+ reason: "pressure" | "swap" | "fit";
219
+ estimatedMb: number;
220
+ atMs: number;
221
+ }
222
+ | {
223
+ type: "capability_run";
224
+ capability: ArbiterCapability;
225
+ modelKey: string;
226
+ runMs: number;
227
+ atMs: number;
228
+ };
229
+
230
+ export type ArbiterEventListener = (event: ArbiterEvent) => void;
231
+
232
+ interface QueueEntry<TRequest, TResult> {
233
+ capability: ArbiterCapability;
234
+ modelKey: string;
235
+ request: TRequest;
236
+ resolve: (value: TResult) => void;
237
+ reject: (err: unknown) => void;
238
+ }
239
+
240
+ export interface MemoryArbiterOptions {
241
+ registry: SharedResourceRegistry;
242
+ pressureSource?: MemoryPressureSource;
243
+ visionCache?: VisionEmbeddingCache;
244
+ logger?: {
245
+ info?: (m: string) => void;
246
+ warn?: (m: string) => void;
247
+ debug?: (m: string) => void;
248
+ };
249
+ now?: () => number;
250
+ /**
251
+ * Usable RAM budget (MB) for the proactive fit-to-budget LRU eviction
252
+ * path. Before loading a model whose `estimatedMb` would push the sum of
253
+ * resident footprints past this budget, the arbiter evicts the
254
+ * least-recently-used evictable entries (refcount 0, never the text
255
+ * target) until it fits. Return `null` to disable the fit path entirely —
256
+ * the default, since an arbiter with no host-RAM knowledge must not guess.
257
+ * Production wiring passes `os.totalmem()/MB - ramHeadroomReserveMb()`.
258
+ */
259
+ budgetMb?: () => number | null;
260
+ /**
261
+ * Resident footprint (MB) the arbiter does NOT own in its `resident` map
262
+ * but which still consumes the host budget — the text-target and embedding
263
+ * weights loaded by `LocalInferenceEngine` on its own
264
+ * `SharedResourceRegistry` (#8809 M10b). Without this hook the arbiter's
265
+ * `residentFootprintMb()` sees only vision/image-gen and the proactive
266
+ * `evictToFit` path never trips (the two dominant resident consumers are
267
+ * invisible to it). Production wiring (service.ts) reads the engine's
268
+ * `text-target` + `embedding` resident estimates. These weights are owned by
269
+ * the engine and are never the arbiter's eviction target — they are pure
270
+ * budget accounting, exactly like the pinned text-target in the resident map.
271
+ * Defaults to 0 (no external footprint known).
272
+ */
273
+ externalFootprintMb?: () => number;
274
+ }
275
+
276
+ /**
277
+ * The arbiter. One instance per process; the plugin owns the singleton
278
+ * (see `index.ts`), and any consumer calls `getMemoryArbiter()` rather
279
+ * than newing one up.
280
+ */
281
+ export class MemoryArbiter {
282
+ private readonly registry: SharedResourceRegistry;
283
+ private readonly pressureSource: MemoryPressureSource | null;
284
+ private readonly visionCache: VisionEmbeddingCache;
285
+ private readonly log?: MemoryArbiterOptions["logger"];
286
+ private readonly now: () => number;
287
+ private readonly budgetMb: () => number | null;
288
+ private readonly externalFootprintMb: () => number;
289
+
290
+ private readonly capabilities = new Map<
291
+ ArbiterCapability,
292
+ CapabilityRegistration<unknown, unknown, unknown>
293
+ >();
294
+ private readonly resident = new Map<string, ResidentEntry>();
295
+
296
+ private readonly listeners = new Set<ArbiterEventListener>();
297
+ private pressureUnsubscribe: (() => void) | null = null;
298
+ private currentPressure: MemoryPressureLevel = "nominal";
299
+
300
+ /**
301
+ * One serialized in-flight load per (capability, modelKey) so concurrent
302
+ * `requestX` calls share a single load promise instead of triggering
303
+ * duplicate weights into RAM.
304
+ */
305
+ private readonly inFlightLoads = new Map<string, Promise<ResidentEntry>>();
306
+
307
+ /**
308
+ * Per-capability run queue. The arbiter does NOT serialize across
309
+ * capabilities; what it serializes is the *swap*: when a request needs
310
+ * to evict another resident role first, the ongoing run on that role is
311
+ * allowed to finish, then the swap proceeds. Concurrent runs against the
312
+ * same loaded handle pass through directly.
313
+ */
314
+ private readonly queues = new Map<
315
+ ArbiterCapability,
316
+ QueueEntry<unknown, unknown>[]
317
+ >();
318
+ private readonly running = new Map<ArbiterCapability, boolean>();
319
+
320
+ private shuttingDown = false;
321
+
322
+ constructor(opts: MemoryArbiterOptions) {
323
+ this.registry = opts.registry;
324
+ this.pressureSource = opts.pressureSource ?? null;
325
+ this.visionCache = opts.visionCache ?? new VisionEmbeddingCache();
326
+ this.log = opts.logger;
327
+ this.now = opts.now ?? (() => Date.now());
328
+ this.budgetMb = opts.budgetMb ?? (() => null);
329
+ this.externalFootprintMb = opts.externalFootprintMb ?? (() => 0);
330
+ }
331
+
332
+ /** Begin observing memory pressure. Idempotent. */
333
+ start(): void {
334
+ if (this.shuttingDown) {
335
+ throw new Error("[memory-arbiter] cannot start after shutdown");
336
+ }
337
+ if (this.pressureUnsubscribe) return;
338
+ const source = this.pressureSource;
339
+ if (!source) return;
340
+ source.start();
341
+ this.pressureUnsubscribe = source.subscribe((event) => {
342
+ void this.handlePressure(event).catch((err) => {
343
+ this.log?.warn?.(
344
+ `[memory-arbiter] pressure handler failed: ${err instanceof Error ? err.message : String(err)}`,
345
+ );
346
+ });
347
+ });
348
+ // This arbiter is now the single eviction-decision owner for the shared
349
+ // registry; the simpler MemoryMonitor poll defers to it (#8809 AC#2).
350
+ this.registry.claimEvictionOwnership(MEMORY_ARBITER_EVICTION_OWNER);
351
+ }
352
+
353
+ /** Stop observing pressure. Does NOT evict resident handles. */
354
+ stop(): void {
355
+ if (this.pressureUnsubscribe) {
356
+ this.pressureUnsubscribe();
357
+ this.pressureUnsubscribe = null;
358
+ }
359
+ this.pressureSource?.stop();
360
+ this.registry.releaseEvictionOwnership(MEMORY_ARBITER_EVICTION_OWNER);
361
+ }
362
+
363
+ /** Tear down: stop pressure observation and unload every resident handle. */
364
+ async shutdown(): Promise<void> {
365
+ this.shuttingDown = true;
366
+ this.stop();
367
+ const keys = Array.from(this.resident.keys());
368
+ for (const key of keys) {
369
+ const entry = this.resident.get(key);
370
+ if (!entry) continue;
371
+ await this.evictEntry(entry, "shutdown").catch((err) => {
372
+ this.log?.warn?.(
373
+ `[memory-arbiter] shutdown evict ${key} failed: ${err instanceof Error ? err.message : String(err)}`,
374
+ );
375
+ });
376
+ }
377
+ this.resident.clear();
378
+ this.inFlightLoads.clear();
379
+ }
380
+
381
+ /** Subscribe to telemetry events. Returns the unsubscribe fn. */
382
+ onEvent(listener: ArbiterEventListener): () => void {
383
+ this.listeners.add(listener);
384
+ return () => {
385
+ this.listeners.delete(listener);
386
+ };
387
+ }
388
+
389
+ private emit(event: ArbiterEvent): void {
390
+ for (const listener of this.listeners) {
391
+ try {
392
+ listener(event);
393
+ } catch {
394
+ this.listeners.delete(listener);
395
+ }
396
+ }
397
+ }
398
+
399
+ /** Register a capability handler. Throws on duplicate registration. */
400
+ registerCapability<TBackend, TRequest, TResult>(
401
+ registration: CapabilityRegistration<TBackend, TRequest, TResult>,
402
+ ): void {
403
+ if (this.capabilities.has(registration.capability)) {
404
+ throw new Error(
405
+ `[memory-arbiter] capability "${registration.capability}" is already registered`,
406
+ );
407
+ }
408
+ this.capabilities.set(
409
+ registration.capability,
410
+ registration as CapabilityRegistration<unknown, unknown, unknown>,
411
+ );
412
+ }
413
+
414
+ /** Whether a capability has been registered. */
415
+ hasCapability(capability: ArbiterCapability): boolean {
416
+ return this.capabilities.has(capability);
417
+ }
418
+
419
+ /** Diagnostic snapshot of all resident handles. */
420
+ residentSnapshot(): ReadonlyArray<{
421
+ capability: ArbiterCapability;
422
+ modelKey: string;
423
+ residentRole: ResidentModelRole;
424
+ estimatedMb: number;
425
+ refCount: number;
426
+ loadedAtMs: number;
427
+ lastUsedAt: number;
428
+ }> {
429
+ return Array.from(this.resident.values()).map((e) => ({
430
+ capability: e.capability,
431
+ modelKey: e.modelKey,
432
+ residentRole: e.residentRole,
433
+ estimatedMb: e.estimatedMb,
434
+ refCount: e.refCount,
435
+ loadedAtMs: e.loadedAtMs,
436
+ lastUsedAt: e.lastUsedAt,
437
+ }));
438
+ }
439
+
440
+ currentPressureLevel(): MemoryPressureLevel {
441
+ return this.currentPressure;
442
+ }
443
+
444
+ /**
445
+ * Predictive warm-load. Unlike `acquire`, preload never creates pressure to
446
+ * make itself happen: it only loads when the system is nominal and the
447
+ * configured budget proves the incoming resident footprint fits without
448
+ * evicting anything. Returns `false` when the guard declines the preload.
449
+ */
450
+ async preload(
451
+ capability: ArbiterCapability,
452
+ modelKey: string,
453
+ ): Promise<boolean> {
454
+ const registration = this.capabilities.get(capability);
455
+ if (!registration) {
456
+ throw new Error(
457
+ `[memory-arbiter] no capability registered for "${capability}"`,
458
+ );
459
+ }
460
+ if (this.shuttingDown || this.currentPressure !== "nominal") {
461
+ return false;
462
+ }
463
+ const key = this.residentKey(capability, modelKey);
464
+ const existing = this.resident.get(key);
465
+ if (existing) {
466
+ existing.lastUsedAt = this.now();
467
+ return true;
468
+ }
469
+ const incomingMb = registration.estimatedMb ?? 0;
470
+ const budget = this.budgetMb();
471
+ if (budget === null || budget <= 0 || incomingMb <= 0) {
472
+ return false;
473
+ }
474
+ if (this.residentFootprintMb() + incomingMb > budget) {
475
+ return false;
476
+ }
477
+ const entry = await this.loadOrReuse(registration, modelKey);
478
+ entry.lastUsedAt = this.now();
479
+ return true;
480
+ }
481
+
482
+ /**
483
+ * Acquire a handle for `(capability, modelKey)`. If the model is already
484
+ * resident the refcount is bumped and we return immediately; otherwise we
485
+ * load it (sharing the in-flight promise across concurrent acquirers).
486
+ *
487
+ * Critical pressure causes acquire to throw for non-text capabilities so
488
+ * we don't load on top of a system the OS has already flagged as in
489
+ * trouble. Text always loads — without text the agent is a brick.
490
+ */
491
+ async acquire<TBackend>(
492
+ capability: ArbiterCapability,
493
+ modelKey: string,
494
+ ): Promise<ArbiterHandle<TBackend>> {
495
+ const registration = this.capabilities.get(capability);
496
+ if (!registration) {
497
+ throw new Error(
498
+ `[memory-arbiter] no capability registered for "${capability}"`,
499
+ );
500
+ }
501
+ if (this.shuttingDown) {
502
+ throw new Error(
503
+ `[memory-arbiter] arbiter is shutting down; cannot acquire ${capability}`,
504
+ );
505
+ }
506
+ if (this.currentPressure === "critical" && capability !== "text") {
507
+ throw new Error(
508
+ `[memory-arbiter] memory pressure is critical; refusing to load capability "${capability}". Free RAM and retry.`,
509
+ );
510
+ }
511
+ const entry = await this.loadOrReuse(registration, modelKey);
512
+ entry.refCount++;
513
+ entry.lastUsedAt = this.now();
514
+ return this.handleFor<TBackend>(entry);
515
+ }
516
+
517
+ private handleFor<TBackend>(entry: ResidentEntry): ArbiterHandle<TBackend> {
518
+ const arbiter = this;
519
+ let released = false;
520
+ return {
521
+ capability: entry.capability,
522
+ modelKey: entry.modelKey,
523
+ backend: entry.backend as TBackend,
524
+ retain(): void {
525
+ if (released) {
526
+ throw new Error(
527
+ `[memory-arbiter] cannot retain ${entry.capability}/${entry.modelKey} after release`,
528
+ );
529
+ }
530
+ entry.refCount++;
531
+ },
532
+ async release(): Promise<void> {
533
+ if (released) return;
534
+ released = true;
535
+ entry.refCount = Math.max(0, entry.refCount - 1);
536
+ // We don't unload at refcount=0; the role becomes evictable, and
537
+ // the pressure / idle path is what reclaims it. Keeps warm-paths
538
+ // fast.
539
+ arbiter.log?.debug?.(
540
+ `[memory-arbiter] release ${entry.capability}/${entry.modelKey} refcount=${entry.refCount}`,
541
+ );
542
+ },
543
+ };
544
+ }
545
+
546
+ private residentKey(capability: ArbiterCapability, modelKey: string): string {
547
+ return `${capability}::${modelKey}`;
548
+ }
549
+
550
+ private async loadOrReuse(
551
+ registration: CapabilityRegistration<unknown, unknown, unknown>,
552
+ modelKey: string,
553
+ ): Promise<ResidentEntry> {
554
+ const key = this.residentKey(registration.capability, modelKey);
555
+ const existing = this.resident.get(key);
556
+ if (existing) return existing;
557
+ const inFlight = this.inFlightLoads.get(key);
558
+ if (inFlight) return inFlight;
559
+
560
+ // Before loading, decide whether the new role conflicts with what's
561
+ // currently resident. The conservative policy: if the same
562
+ // `residentRole` is held by a different modelKey, we evict the
563
+ // existing one first (one model per role). Different roles can co-
564
+ // exist; the pressure path is what rebalances them.
565
+ const role =
566
+ registration.residentRole ?? CAPABILITY_ROLE[registration.capability];
567
+ const conflicts = this.findConflictingRole(
568
+ role,
569
+ registration.capability,
570
+ modelKey,
571
+ );
572
+
573
+ const promise = (async (): Promise<ResidentEntry> => {
574
+ for (const conflict of conflicts) {
575
+ if (conflict.refCount > 0) {
576
+ // A different consumer is actively using the conflicting model.
577
+ // Wait for it to drain rather than yanking the rug out — the
578
+ // arbiter does NOT cancel in-flight work for a swap.
579
+ await this.waitForRefcountZero(conflict);
580
+ }
581
+ await this.evictEntry(conflict, "swap");
582
+ }
583
+ // Proactively make room for the incoming weights: evict the
584
+ // least-recently-used evictable models until this one fits the
585
+ // usable RAM budget. No-op when no budget is configured or the
586
+ // incoming footprint is unknown.
587
+ await this.evictToFit(registration.estimatedMb ?? 0);
588
+ const startMs = this.now();
589
+ const backend = await registration.load(modelKey);
590
+ const loadedAtMs = this.now();
591
+ const entry: ResidentEntry = {
592
+ capability: registration.capability,
593
+ modelKey,
594
+ backend,
595
+ residentRole: role,
596
+ estimatedMb: registration.estimatedMb ?? 0,
597
+ refCount: 0,
598
+ loadedAtMs,
599
+ lastUsedAt: loadedAtMs,
600
+ roleId: `arbiter:${registration.capability}:${modelKey}`,
601
+ };
602
+ const evictable = this.makeEvictable(entry, registration);
603
+ this.registry.acquire(evictable);
604
+ this.resident.set(key, entry);
605
+ this.emit({
606
+ type: "model_load",
607
+ capability: registration.capability,
608
+ modelKey,
609
+ loadMs: loadedAtMs - startMs,
610
+ atMs: loadedAtMs,
611
+ });
612
+ this.log?.info?.(
613
+ `[memory-arbiter] loaded ${registration.capability}/${modelKey} in ${loadedAtMs - startMs}ms`,
614
+ );
615
+ return entry;
616
+ })().finally(() => {
617
+ this.inFlightLoads.delete(key);
618
+ });
619
+ this.inFlightLoads.set(key, promise);
620
+ return promise;
621
+ }
622
+
623
+ private findConflictingRole(
624
+ role: ResidentModelRole,
625
+ capability: ArbiterCapability,
626
+ modelKey: string,
627
+ ): ResidentEntry[] {
628
+ const out: ResidentEntry[] = [];
629
+ for (const entry of this.resident.values()) {
630
+ if (entry.residentRole !== role) continue;
631
+ if (entry.capability === capability && entry.modelKey === modelKey)
632
+ continue;
633
+ out.push(entry);
634
+ }
635
+ return out;
636
+ }
637
+
638
+ private async waitForRefcountZero(entry: ResidentEntry): Promise<void> {
639
+ // Cooperative wait — the arbiter doesn't have a per-entry condvar, so
640
+ // we poll on a microtask cadence. Refcount drops happen synchronously
641
+ // inside `release()`, so this terminates within at most one extra
642
+ // run-to-completion cycle when the holder has already released.
643
+ const start = this.now();
644
+ while (entry.refCount > 0) {
645
+ await new Promise<void>((resolve) => setTimeout(resolve, 0));
646
+ if (this.now() - start > 10_000) {
647
+ throw new Error(
648
+ `[memory-arbiter] timeout waiting for ${entry.capability}/${entry.modelKey} to drain (refcount=${entry.refCount}); refusing to swap mid-flight`,
649
+ );
650
+ }
651
+ }
652
+ }
653
+
654
+ private makeEvictable(
655
+ entry: ResidentEntry,
656
+ registration: CapabilityRegistration<unknown, unknown, unknown>,
657
+ ): EvictableModelRole {
658
+ return createEvictableModelRole({
659
+ id: entry.roleId,
660
+ role: entry.residentRole,
661
+ evictionPriority: RESIDENT_ROLE_PRIORITY[entry.residentRole],
662
+ estimatedMb: entry.estimatedMb,
663
+ isResident: () =>
664
+ this.resident.has(this.residentKey(entry.capability, entry.modelKey)),
665
+ evict: async () => {
666
+ // The shared registry's monitor calls this. We must be careful not
667
+ // to evict a handle that's actively in use; refcount > 0 means
668
+ // "someone is holding it" and we leave it alone — the registry
669
+ // will pick the next-priority role.
670
+ if (entry.refCount > 0) return;
671
+ await this.evictEntry(entry, "pressure", registration);
672
+ },
673
+ });
674
+ }
675
+
676
+ private async evictEntry(
677
+ entry: ResidentEntry,
678
+ reason: "release" | "swap" | "pressure" | "shutdown" | "fit",
679
+ registration?: CapabilityRegistration<unknown, unknown, unknown>,
680
+ ): Promise<void> {
681
+ const key = this.residentKey(entry.capability, entry.modelKey);
682
+ if (!this.resident.has(key)) return;
683
+ this.resident.delete(key);
684
+ try {
685
+ await this.registry.release(entry.roleId);
686
+ } catch (err) {
687
+ this.log?.warn?.(
688
+ `[memory-arbiter] registry release failed for ${entry.roleId}: ${err instanceof Error ? err.message : String(err)}`,
689
+ );
690
+ }
691
+ const reg = registration ?? this.capabilities.get(entry.capability);
692
+ try {
693
+ await reg?.unload(entry.backend);
694
+ } catch (err) {
695
+ this.log?.warn?.(
696
+ `[memory-arbiter] unload failed for ${entry.capability}/${entry.modelKey}: ${err instanceof Error ? err.message : String(err)}`,
697
+ );
698
+ }
699
+ this.emit({
700
+ type: "model_unload",
701
+ capability: entry.capability,
702
+ modelKey: entry.modelKey,
703
+ reason,
704
+ atMs: this.now(),
705
+ });
706
+ if (reason === "pressure" || reason === "swap" || reason === "fit") {
707
+ this.emit({
708
+ type: "eviction",
709
+ capability: entry.capability,
710
+ modelKey: entry.modelKey,
711
+ reason,
712
+ estimatedMb: entry.estimatedMb,
713
+ atMs: this.now(),
714
+ });
715
+ }
716
+ this.log?.info?.(
717
+ `[memory-arbiter] evicted ${entry.capability}/${entry.modelKey} reason=${reason}`,
718
+ );
719
+ }
720
+
721
+ /**
722
+ * Proactive fit-to-budget eviction. Before loading a model needing
723
+ * `incomingMb`, evict the least-recently-used evictable residents until
724
+ * the projected resident footprint fits `budgetMb()`.
725
+ *
726
+ * Policy:
727
+ * - Disabled when no budget is configured (`budgetMb()` → null/≤0) or
728
+ * the incoming footprint is unknown (`incomingMb` ≤ 0): we never guess.
729
+ * - Pins: the text target is never evicted (losing it bricks the agent),
730
+ * and any entry with a live refcount is left alone (in active use).
731
+ * - Ordering is pure LRU (oldest `lastUsedAt` first); ties break toward
732
+ * the lower-priority role, then the older load.
733
+ * - Best-effort: if the pins can't be freed enough, the load still
734
+ * proceeds — the OS-pressure path and the `active-model` admission gate
735
+ * are the backstops; this path only avoids predictable overcommit.
736
+ */
737
+ private async evictToFit(incomingMb: number): Promise<void> {
738
+ const budget = this.budgetMb();
739
+ if (budget === null || budget <= 0) return;
740
+ if (incomingMb <= 0) return;
741
+
742
+ while (this.residentFootprintMb() + incomingMb > budget) {
743
+ const candidate = this.lruEvictionCandidate();
744
+ if (!candidate) break;
745
+ await this.evictEntry(candidate, "fit");
746
+ }
747
+ }
748
+
749
+ /**
750
+ * Total resident footprint counted against the budget: the arbiter's own
751
+ * resident handles PLUS the engine-owned text-target + embedding weights
752
+ * reported via `externalFootprintMb` (#8809 M10b). The external term is what
753
+ * makes `evictToFit` actually fire — without it the two dominant resident
754
+ * consumers (text, embedding) are invisible and the fit path is dead.
755
+ */
756
+ private residentFootprintMb(): number {
757
+ let sum = 0;
758
+ for (const e of this.resident.values()) sum += e.estimatedMb;
759
+ const external = this.externalFootprintMb();
760
+ if (Number.isFinite(external) && external > 0) sum += external;
761
+ return sum;
762
+ }
763
+
764
+ /**
765
+ * The next entry the fit path should drop: least-recently-used among
766
+ * evictable residents (refcount 0, not the text target). Returns null when
767
+ * nothing is evictable.
768
+ */
769
+ private lruEvictionCandidate(): ResidentEntry | null {
770
+ let best: ResidentEntry | null = null;
771
+ for (const entry of this.resident.values()) {
772
+ if (entry.refCount > 0) continue;
773
+ if (entry.residentRole === "text-target") continue;
774
+ if (best === null) {
775
+ best = entry;
776
+ continue;
777
+ }
778
+ if (entry.lastUsedAt !== best.lastUsedAt) {
779
+ if (entry.lastUsedAt < best.lastUsedAt) best = entry;
780
+ continue;
781
+ }
782
+ const pa = RESIDENT_ROLE_PRIORITY[entry.residentRole];
783
+ const pb = RESIDENT_ROLE_PRIORITY[best.residentRole];
784
+ if (pa !== pb) {
785
+ if (pa < pb) best = entry;
786
+ continue;
787
+ }
788
+ if (entry.loadedAtMs < best.loadedAtMs) best = entry;
789
+ }
790
+ return best;
791
+ }
792
+
793
+ private async handlePressure(event: MemoryPressureEvent): Promise<void> {
794
+ this.currentPressure = event.level;
795
+ this.emit({
796
+ type: "memory_pressure",
797
+ level: event.level,
798
+ source: event.source,
799
+ ...(event.freeMb !== undefined ? { freeMb: event.freeMb } : {}),
800
+ atMs: event.atMs,
801
+ });
802
+ if (event.level === "nominal") {
803
+ return;
804
+ }
805
+ // Cheap reclaim first: drop any expired vision-embedding cache entries.
806
+ const purged = this.visionCache.purgeExpired(this.now());
807
+ if (purged > 0) {
808
+ this.log?.debug?.(
809
+ `[memory-arbiter] purged ${purged} expired vision-embedding entries on pressure`,
810
+ );
811
+ }
812
+ // Then ask the SharedResourceRegistry for the cheapest evictable role.
813
+ // `low`: evict one role per pressure tick (gentle).
814
+ // `critical`: evict every non-text role we own.
815
+ if (event.level === "low") {
816
+ await this.registry.evictLowestPriorityRole();
817
+ return;
818
+ }
819
+ // Critical: walk our resident handles in priority order and evict
820
+ // everything that's not the text-target. We do not evict text — losing
821
+ // it bricks the agent and won't actually rescue an OOM that's already
822
+ // past the critical line.
823
+ const entries = Array.from(this.resident.values())
824
+ .filter((e) => e.residentRole !== "text-target")
825
+ .sort(
826
+ (a, b) =>
827
+ RESIDENT_ROLE_PRIORITY[a.residentRole] -
828
+ RESIDENT_ROLE_PRIORITY[b.residentRole],
829
+ );
830
+ for (const entry of entries) {
831
+ if (entry.refCount > 0) continue;
832
+ await this.evictEntry(entry, "pressure");
833
+ }
834
+ }
835
+
836
+ // ---------------------------------------------------------------------
837
+ // Capability-specific request fns. Thin wrappers around the queue —
838
+ // each one calls `enqueueRequest` with its capability tag and the
839
+ // caller's request payload. Plugins call these instead of `acquire`
840
+ // directly when they don't need to keep a long-lived handle.
841
+ // ---------------------------------------------------------------------
842
+
843
+ requestText<TRequest, TResult>(req: {
844
+ modelKey: string;
845
+ payload: TRequest;
846
+ }): Promise<TResult> {
847
+ return this.enqueueRequest("text", req.modelKey, req.payload);
848
+ }
849
+
850
+ requestEmbedding<TRequest, TResult>(req: {
851
+ modelKey: string;
852
+ payload: TRequest;
853
+ }): Promise<TResult> {
854
+ return this.enqueueRequest("embedding", req.modelKey, req.payload);
855
+ }
856
+
857
+ requestVisionDescribe<TRequest, TResult>(req: {
858
+ modelKey: string;
859
+ payload: TRequest;
860
+ }): Promise<TResult> {
861
+ return this.enqueueRequest("vision-describe", req.modelKey, req.payload);
862
+ }
863
+
864
+ requestImageGen<TRequest, TResult>(req: {
865
+ modelKey: string;
866
+ payload: TRequest;
867
+ }): Promise<TResult> {
868
+ return this.enqueueRequest("image-gen", req.modelKey, req.payload);
869
+ }
870
+
871
+ requestTranscribe<TRequest, TResult>(req: {
872
+ modelKey: string;
873
+ payload: TRequest;
874
+ }): Promise<TResult> {
875
+ return this.enqueueRequest("transcribe", req.modelKey, req.payload);
876
+ }
877
+
878
+ private async enqueueRequest<TRequest, TResult>(
879
+ capability: ArbiterCapability,
880
+ modelKey: string,
881
+ payload: TRequest,
882
+ ): Promise<TResult> {
883
+ const reg = this.capabilities.get(capability);
884
+ if (!reg) {
885
+ throw new Error(
886
+ `[memory-arbiter] no capability registered for "${capability}"`,
887
+ );
888
+ }
889
+ return new Promise<TResult>((resolve, reject) => {
890
+ const queue = this.queues.get(capability) ?? [];
891
+ queue.push({
892
+ capability,
893
+ modelKey,
894
+ request: payload,
895
+ resolve: resolve as (value: unknown) => void,
896
+ reject,
897
+ });
898
+ this.queues.set(capability, queue);
899
+ void this.drainQueue(capability).catch((err) => {
900
+ this.log?.warn?.(
901
+ `[memory-arbiter] queue drain failed for ${capability}: ${err instanceof Error ? err.message : String(err)}`,
902
+ );
903
+ });
904
+ });
905
+ }
906
+
907
+ private async drainQueue(capability: ArbiterCapability): Promise<void> {
908
+ if (this.running.get(capability)) return;
909
+ this.running.set(capability, true);
910
+ try {
911
+ const queue = this.queues.get(capability);
912
+ while (queue && queue.length > 0) {
913
+ const next = queue.shift();
914
+ if (!next) break;
915
+ const reg = this.capabilities.get(capability);
916
+ if (!reg) {
917
+ next.reject(
918
+ new Error(
919
+ `[memory-arbiter] capability "${capability}" was deregistered mid-queue`,
920
+ ),
921
+ );
922
+ continue;
923
+ }
924
+ try {
925
+ const handle = await this.acquire(capability, next.modelKey);
926
+ const startMs = this.now();
927
+ try {
928
+ const result = await reg.run(handle.backend, next.request);
929
+ const runMs = this.now() - startMs;
930
+ this.emit({
931
+ type: "capability_run",
932
+ capability,
933
+ modelKey: next.modelKey,
934
+ runMs,
935
+ atMs: this.now(),
936
+ });
937
+ next.resolve(result);
938
+ } finally {
939
+ await handle.release();
940
+ }
941
+ } catch (err) {
942
+ next.reject(err);
943
+ }
944
+ }
945
+ } finally {
946
+ this.running.set(capability, false);
947
+ }
948
+ }
949
+
950
+ // ---------------------------------------------------------------------
951
+ // Vision-embedding cache passthroughs.
952
+ // ---------------------------------------------------------------------
953
+
954
+ getCachedVisionEmbedding(hash: string): VisionEmbeddingEntry | null {
955
+ return this.visionCache.get(hash);
956
+ }
957
+
958
+ setCachedVisionEmbedding(
959
+ hash: string,
960
+ entry: { tokens: Float32Array; tokenCount: number; hiddenSize: number },
961
+ ttlMs?: number,
962
+ ): void {
963
+ this.visionCache.set(hash, entry, ttlMs);
964
+ }
965
+ }
966
+
967
+ /**
968
+ * Process-wide singleton accessor. The plugin's `index.ts` calls
969
+ * `setMemoryArbiter` once at boot; consumers call `getMemoryArbiter`.
970
+ * Throws when no arbiter has been configured — the runtime is expected
971
+ * to set one before any consumer touches it.
972
+ */
973
+ let globalArbiter: MemoryArbiter | null = null;
974
+
975
+ export function setMemoryArbiter(arbiter: MemoryArbiter | null): void {
976
+ globalArbiter = arbiter;
977
+ }
978
+
979
+ export function getMemoryArbiter(): MemoryArbiter {
980
+ if (!globalArbiter) {
981
+ throw new Error(
982
+ "[memory-arbiter] no arbiter configured; call setMemoryArbiter() at plugin init",
983
+ );
984
+ }
985
+ return globalArbiter;
986
+ }
987
+
988
+ /** Test/diagnostic — returns the singleton without throwing. */
989
+ export function tryGetMemoryArbiter(): MemoryArbiter | null {
990
+ return globalArbiter;
991
+ }