@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,635 @@
1
+ /**
2
+ * Voice-budget allocator — single arbiter of the co-resident memory budget
3
+ * for the whole voice + text bundle (text LM, drafter, ASR, TTS, embedding,
4
+ * VAD, wake-word, turn-detector, emotion classifier, speaker encoder).
5
+ *
6
+ * Today's `ram-budget.ts` is per-tier: it decides whether ONE text bundle
7
+ * fits a host. `voice-budget.ts` is the cross-model layer the brief
8
+ * mandated in `.swarm/VOICE_WAVE_2.md` §H4 and R9 §4 — every model loader
9
+ * calls `reserve()` before it loads weights, releases on unload, and
10
+ * `reserve()` walks the residents under contention by eviction priority
11
+ * (cold → warm → hot) until the requested amount fits.
12
+ *
13
+ * Priorities (from R9 §4.1, mapped to `ResidentModelRole`):
14
+ *
15
+ * - **hot** (priority ≥ 40): `text-target`, `tts`, `asr` — never load
16
+ * on demand, never evicted before pressure-of-last-resort.
17
+ * - **warm** (priority 25–35): `vad`, `embedding` — may be evicted but
18
+ * reload is expensive.
19
+ * - **cold** (priority ≤ 20): `speaker-id` (18), `emotion` (15),
20
+ * `vision` (20), `drafter` (10) — load-on-demand; first to evict.
21
+ *
22
+ * Eviction policy: walk ascending priority (cheapest first) until enough
23
+ * memory has been reclaimed. The text target evicts only when it is
24
+ * literally the only resident role and pressure persists (matches
25
+ * `SharedResourceRegistry.evictLowestPriorityRole` semantics).
26
+ *
27
+ * The allocator is **memory-only** — it does not load weights. The caller
28
+ * (TTS engine, ASR loader, etc.) holds the typed reservation and runs
29
+ * `release()` on unload.
30
+ *
31
+ * Wire-up plan (handed to follow-up commits, NOT done by I9):
32
+ * - `ffi-streaming-backend.ts` → `reserve(role="text-target")` + `reserve(role="drafter")` at spawn.
33
+ * - `voice/pipeline.ts` → `reserve(role="tts", bytes=transientPeakMb*MB)` per synth.
34
+ * - `voice/wake-word.ts`, `vad.ts`, `eot-classifier.ts` → reserve at session arm.
35
+ * - I2/I3 add `emotion` + `speaker-id` reservations when those models register.
36
+ *
37
+ * NOTE: the wire-up is intentionally separate from the allocator
38
+ * implementation because the in-flight I-agents (I1/I2/I3/I5) own those
39
+ * loader files and we must not race their edits. The allocator + the
40
+ * `evictionPriority` hooks are in place; the loaders adopt it as they
41
+ * land.
42
+ */
43
+
44
+ import {
45
+ classifyDeviceTier,
46
+ type DeviceTier,
47
+ type DeviceTierAssessment,
48
+ effectiveModelMemoryGb,
49
+ } from "../device-tier";
50
+ import type { HardwareProbe } from "../types";
51
+ import {
52
+ RESIDENT_ROLE_PRIORITY,
53
+ type ResidentModelRole,
54
+ } from "./shared-resources";
55
+
56
+ const BYTES_PER_MB = 1024 * 1024;
57
+ const BYTES_PER_GB = 1024 ** 3;
58
+
59
+ /** Coarse priority class consumed by `reserve()`. Internally we map this
60
+ * back to the per-role priority number in `RESIDENT_ROLE_PRIORITY`. */
61
+ export type AllocationPriority = "hot" | "warm" | "cold";
62
+
63
+ export function priorityClassForRole(
64
+ role: ResidentModelRole,
65
+ ): AllocationPriority {
66
+ const p = RESIDENT_ROLE_PRIORITY[role];
67
+ if (p >= 40) return "hot";
68
+ if (p >= 25) return "warm";
69
+ return "cold";
70
+ }
71
+
72
+ export interface BudgetReservation {
73
+ readonly id: string;
74
+ readonly role: ResidentModelRole;
75
+ readonly bytes: number;
76
+ readonly priority: AllocationPriority;
77
+ /** Per-role priority number (R9 §4.1 / `RESIDENT_ROLE_PRIORITY`). */
78
+ readonly priorityRank: number;
79
+ /** Idempotent. Multi-release is a no-op (release happens from teardown
80
+ * paths that may race). */
81
+ release(): void;
82
+ }
83
+
84
+ /** Diagnostic snapshot row for `VoiceBudget.snapshot()`. */
85
+ export interface ReservationSnapshot {
86
+ id: string;
87
+ role: ResidentModelRole;
88
+ bytes: number;
89
+ priority: AllocationPriority;
90
+ priorityRank: number;
91
+ }
92
+
93
+ export class BudgetExhaustedError extends Error {
94
+ readonly code = "voice-budget-exhausted";
95
+ readonly details: {
96
+ requestedBytes: number;
97
+ freeBytes: number;
98
+ totalBytes: number;
99
+ role: ResidentModelRole;
100
+ priority: AllocationPriority;
101
+ evictedRoles: ReadonlyArray<ResidentModelRole>;
102
+ evictionCandidate: ResidentModelRole | null;
103
+ };
104
+ constructor(details: BudgetExhaustedError["details"]) {
105
+ super(
106
+ `[voice-budget] Cannot fit ${(details.requestedBytes / BYTES_PER_MB).toFixed(0)} MB ` +
107
+ `reservation for role "${details.role}" (priority ${details.priority}). ` +
108
+ `Free: ${(details.freeBytes / BYTES_PER_MB).toFixed(0)} MB / ` +
109
+ `total: ${(details.totalBytes / BYTES_PER_MB).toFixed(0)} MB. ` +
110
+ `Evicted: [${details.evictedRoles.join(", ")}]. ` +
111
+ `Next candidate: ${details.evictionCandidate ?? "none (only hot reservations remain)"}.`,
112
+ );
113
+ this.name = "BudgetExhaustedError";
114
+ this.details = details;
115
+ }
116
+ }
117
+
118
+ export interface VoiceBudget {
119
+ /**
120
+ * Reserve `bytes` for `modelId` with `priority`. Returns a handle the
121
+ * caller MUST `.release()` to give the memory back. Throws
122
+ * `BudgetExhaustedError` when the requested amount cannot fit even after
123
+ * evicting every available lower-priority reservation.
124
+ *
125
+ * `evictHook` is optional: when present, the allocator will call it for
126
+ * each role that needs to be evicted (one at a time, ascending priority)
127
+ * before recording the new reservation. When omitted, the allocator just
128
+ * walks its own internal table — the caller is expected to drive the
129
+ * actual weight unload (the loader/eviction path lives in the model's
130
+ * own service, not here).
131
+ */
132
+ reserve(args: {
133
+ modelId: string;
134
+ role: ResidentModelRole;
135
+ bytes: number;
136
+ /** Optional; defaults to `priorityClassForRole(role)`. */
137
+ priority?: AllocationPriority;
138
+ /** Optional eviction callback. When provided, called once per evicted
139
+ * role in ascending-priority order before the new reservation is
140
+ * recorded. The callback should drop the weights and return the
141
+ * bytes actually reclaimed (must be >= the reservation's recorded
142
+ * bytes). When omitted, the allocator only drops the internal
143
+ * reservation entry (eviction-by-accounting). */
144
+ evictHook?: (role: ResidentModelRole, id: string) => Promise<number>;
145
+ }): Promise<BudgetReservation>;
146
+
147
+ /** Best-effort current free budget, in bytes. */
148
+ freeBytes(): number;
149
+ /** Total budget on this device, in bytes. */
150
+ totalBytes(): number;
151
+ /** All current reservations, ordered by priority ascending. */
152
+ snapshot(): ReadonlyArray<ReservationSnapshot>;
153
+ /** The tier this budget was sized to. */
154
+ tier(): DeviceTier;
155
+ /** The original assessment. */
156
+ assessment(): DeviceTierAssessment;
157
+ }
158
+
159
+ /**
160
+ * Per-tier total budget table (in bytes). Sized to the §2.3 co-resident
161
+ * roll-up in R9: MAX/GOOD/OKAY/POOR keep the relevant subset of weights +
162
+ * KV + TTS transient peak resident with an OS reserve.
163
+ *
164
+ * - MAX: ~24 GB free RAM (enough to keep 9b + drafter + omnivoice-Q8 +
165
+ * ASR + embed + warm/cold path co-resident).
166
+ * - GOOD: ~12 GB (2b/4b co-resident + transient).
167
+ * - OKAY: ~6 GB (2b entry-tier LM only resident; ASR/TTS swap).
168
+ * - POOR: ~3 GB (turn + VAD + wake only, no LM/TTS local).
169
+ *
170
+ * The `maxRamMB` user override (R9 §5.3) can cap this lower. The default
171
+ * picks the tier's natural total but never exceeds the device's effective
172
+ * model memory.
173
+ */
174
+ function defaultTierBudgetBytes(
175
+ probe: HardwareProbe,
176
+ tier: DeviceTier,
177
+ ): number {
178
+ const effectiveGb = effectiveModelMemoryGb(probe);
179
+ switch (tier) {
180
+ case "MAX":
181
+ return Math.min(24, effectiveGb) * BYTES_PER_GB;
182
+ case "GOOD":
183
+ return Math.min(12, effectiveGb) * BYTES_PER_GB;
184
+ case "OKAY":
185
+ return Math.min(6, effectiveGb) * BYTES_PER_GB;
186
+ case "POOR":
187
+ return Math.min(3, Math.max(1, effectiveGb)) * BYTES_PER_GB;
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Co-resident voice-ensemble RSS estimate in MB. Sourced from R9 §2.3,
193
+ * keyed off the LM-tier slot (the text model that anchors the bundle).
194
+ *
195
+ * Each row is the steady-state weights + KV at default context for the
196
+ * whole voice + text bundle running at once:
197
+ *
198
+ * LM + LM KV + drafter + TTS (omnivoice base + tokenizer or kokoro-q8) +
199
+ * ASR + ASR mmproj + embedding + VAD + wake-word + turn-detector +
200
+ * emotion classifier + speaker encoder.
201
+ *
202
+ * The `transientTtsBufferMb` field is the OmniVoice MaskGIT decode peak
203
+ * (~1.17 GB measured on Metal). Backends that don't run OmniVoice locally
204
+ * (kokoro-only, cloud TTS) have a much smaller transient — kept at 100 MB
205
+ * to leave room for kokoro's ONNX compute path. Mobile defaults to no
206
+ * local TTS, so transient = 0.
207
+ *
208
+ * The figures are MEASURED on-disk (Q4_K_M GGUFs in
209
+ * `<stateDir>/local-inference/models/eliza-1-2b.bundle/`) plus
210
+ * model-card sizes for VAD, wake-word, turn-detector, emotion, speaker-id.
211
+ * See R9 §2.1 + §2.2 + §2.3 for the per-component breakdown.
212
+ */
213
+ export interface VoiceEnsembleBudget {
214
+ readonly tierSlot: VoiceTierSlot;
215
+ readonly lmMb: number;
216
+ readonly lmKvMb: number;
217
+ readonly drafterMb: number;
218
+ readonly ttsMb: number;
219
+ readonly asrMb: number;
220
+ readonly asrMmprojMb: number;
221
+ readonly embeddingMb: number;
222
+ readonly vadMb: number;
223
+ readonly wakeWordMb: number;
224
+ readonly turnDetectorMb: number;
225
+ readonly emotionMb: number;
226
+ readonly speakerEncoderMb: number;
227
+ readonly transientTtsBufferMb: number;
228
+ /** Sum of weights + KV (steady-state). Excludes transient TTS buffer. */
229
+ readonly steadyStateMb: number;
230
+ /** Sum of steady-state + transient TTS peak. */
231
+ readonly peakMb: number;
232
+ }
233
+
234
+ /**
235
+ * The voice ensemble's LM tier slot. We key the table off the LM size +
236
+ * the surrounding voice profile (mobile-cloud vs desktop-omnivoice) since
237
+ * the largest co-resident knob is the LM itself.
238
+ */
239
+ export type VoiceTierSlot =
240
+ | "mobile-2b" // mobile profile: kokoro-q8 + turnsense + ASR-0.6B + LM-2B (entry tier), no dedicated embedding
241
+ | "desktop-2b" // 2b LM (entry tier) + full voice stack + embedding
242
+ | "desktop-4b" // 4b LM + full voice stack + embedding
243
+ | "workstation-9b" // 9b LM + omnivoice-Q8 + ASR-0.6B + embedding
244
+ | "workstation-27b"; // 27b LM + omnivoice-Q8 + ASR-1.7B + embedding
245
+
246
+ const _MB = 1; // alias for readability inside the table
247
+ const _GB = 1024;
248
+
249
+ /** R9 §2.3 — measured co-resident bundle for every supported tier slot. */
250
+ export const VOICE_ENSEMBLE_BUDGETS: Readonly<
251
+ Record<VoiceTierSlot, VoiceEnsembleBudget>
252
+ > = {
253
+ "mobile-2b": buildEnsemble({
254
+ tierSlot: "mobile-2b",
255
+ lmMb: 1.4 * _GB, // eliza-1-2b (entry tier) Q4-ish
256
+ lmKvMb: 0.075 * _GB,
257
+ drafterMb: 0.5 * _GB,
258
+ ttsMb: 0.08 * _GB, // kokoro-q8 ONNX
259
+ asrMb: 0.4 * _GB, // qwen3-asr-0.6B documented Q4-equiv
260
+ asrMmprojMb: 0.2 * _GB,
261
+ embeddingMb: 0, // pools from the text backbone on the 2b entry tier
262
+ vadMb: 2 * _MB, // silero-vad documented baseline
263
+ wakeWordMb: 4 * _MB,
264
+ turnDetectorMb: 60 * _MB, // turnsense 135M int8 mobile
265
+ emotionMb: 40 * _MB, // wav2small int8 acoustic
266
+ speakerEncoderMb: 10 * _MB, // wespeaker / x-vector int8
267
+ transientTtsBufferMb: 0, // mobile defaults to cloud TTS or kokoro burst
268
+ }),
269
+ "desktop-2b": buildEnsemble({
270
+ tierSlot: "desktop-2b",
271
+ lmMb: 1.4 * _GB,
272
+ lmKvMb: 0.075 * _GB,
273
+ drafterMb: 0.5 * _GB,
274
+ ttsMb: 0.65 * _GB,
275
+ asrMb: 0.4 * _GB,
276
+ asrMmprojMb: 0.2 * _GB,
277
+ embeddingMb: 0.4 * _GB, // eliza-1-embedding.gguf 0.6B Q4-ish
278
+ vadMb: 2 * _MB,
279
+ wakeWordMb: 4 * _MB,
280
+ turnDetectorMb: 100 * _MB,
281
+ emotionMb: 40 * _MB,
282
+ speakerEncoderMb: 10 * _MB,
283
+ transientTtsBufferMb: 1.17 * _GB,
284
+ }),
285
+ "desktop-4b": buildEnsemble({
286
+ tierSlot: "desktop-4b",
287
+ lmMb: 2.6 * _GB,
288
+ lmKvMb: 0.3 * _GB,
289
+ drafterMb: 0.7 * _GB,
290
+ ttsMb: 0.65 * _GB,
291
+ asrMb: 0.4 * _GB,
292
+ asrMmprojMb: 0.2 * _GB,
293
+ embeddingMb: 0.4 * _GB,
294
+ vadMb: 2 * _MB,
295
+ wakeWordMb: 4 * _MB,
296
+ turnDetectorMb: 400 * _MB, // livekit/turn-detector v0.4.1-intl Qwen2.5-0.5B
297
+ emotionMb: 40 * _MB,
298
+ speakerEncoderMb: 10 * _MB,
299
+ transientTtsBufferMb: 1.17 * _GB,
300
+ }),
301
+ "workstation-9b": buildEnsemble({
302
+ tierSlot: "workstation-9b",
303
+ lmMb: 5.4 * _GB,
304
+ lmKvMb: 0.56 * _GB,
305
+ drafterMb: 1.4 * _GB,
306
+ ttsMb: 1.28 * _GB, // omnivoice Q8_0 on 9B+ tiers per voiceQuantForTier()
307
+ asrMb: 0.4 * _GB,
308
+ asrMmprojMb: 0.2 * _GB,
309
+ embeddingMb: 0.4 * _GB,
310
+ vadMb: 2 * _MB,
311
+ wakeWordMb: 4 * _MB,
312
+ turnDetectorMb: 400 * _MB,
313
+ emotionMb: 40 * _MB,
314
+ speakerEncoderMb: 10 * _MB,
315
+ transientTtsBufferMb: 1.17 * _GB,
316
+ }),
317
+ "workstation-27b": buildEnsemble({
318
+ tierSlot: "workstation-27b",
319
+ lmMb: 16.8 * _GB,
320
+ lmKvMb: 2.75 * _GB,
321
+ drafterMb: 2.6 * _GB,
322
+ ttsMb: 1.28 * _GB,
323
+ asrMb: 1.1 * _GB, // qwen3-asr-1.7B on the 27B tier
324
+ asrMmprojMb: 0.3 * _GB,
325
+ embeddingMb: 0.4 * _GB,
326
+ vadMb: 2 * _MB,
327
+ wakeWordMb: 4 * _MB,
328
+ turnDetectorMb: 400 * _MB,
329
+ emotionMb: 40 * _MB,
330
+ speakerEncoderMb: 10 * _MB,
331
+ transientTtsBufferMb: 1.17 * _GB,
332
+ }),
333
+ };
334
+
335
+ function buildEnsemble(
336
+ rows: Omit<VoiceEnsembleBudget, "steadyStateMb" | "peakMb">,
337
+ ): VoiceEnsembleBudget {
338
+ const steadyStateMb =
339
+ rows.lmMb +
340
+ rows.lmKvMb +
341
+ rows.drafterMb +
342
+ rows.ttsMb +
343
+ rows.asrMb +
344
+ rows.asrMmprojMb +
345
+ rows.embeddingMb +
346
+ rows.vadMb +
347
+ rows.wakeWordMb +
348
+ rows.turnDetectorMb +
349
+ rows.emotionMb +
350
+ rows.speakerEncoderMb;
351
+ return {
352
+ ...rows,
353
+ steadyStateMb,
354
+ peakMb: steadyStateMb + rows.transientTtsBufferMb,
355
+ };
356
+ }
357
+
358
+ /**
359
+ * Estimate the full voice ensemble's peak resident MB for a tier slot.
360
+ * `assertVoiceBundleFitsHost` consults this against the device's host RAM.
361
+ */
362
+ export function voiceEnsemblePeakMb(slot: VoiceTierSlot): number {
363
+ return VOICE_ENSEMBLE_BUDGETS[slot].peakMb;
364
+ }
365
+
366
+ /** Sum of weights + KV (steady-state, excludes transient TTS buffer). */
367
+ export function voiceEnsembleSteadyStateMb(slot: VoiceTierSlot): number {
368
+ return VOICE_ENSEMBLE_BUDGETS[slot].steadyStateMb;
369
+ }
370
+
371
+ /**
372
+ * Pick the canonical voice-tier slot for an installed text model + device
373
+ * tier. The LM size anchors the slot (`eliza-1-2b` → `2b` (entry tier),
374
+ * `4b` → `4b`, …) and the device tier picks `mobile-` vs `desktop-` vs
375
+ * `workstation-` for the voice surrounding it. Mobile always pulls the
376
+ * `mobile-2b` slot because the brief defaults mobile to cloud TTS+ASR; only
377
+ * the 2B entry-tier local LM stays available there.
378
+ */
379
+ export function pickVoiceTierSlot(args: {
380
+ textModelId: string;
381
+ deviceTier: DeviceTier;
382
+ mobile?: boolean;
383
+ }): VoiceTierSlot {
384
+ if (args.mobile) return "mobile-2b";
385
+ const id = args.textModelId.toLowerCase();
386
+ if (id.includes("27b")) return "workstation-27b";
387
+ if (id.includes("9b")) return "workstation-9b";
388
+ if (id.includes("4b")) return "desktop-4b";
389
+ // 2b is the entry/floor tier; any smaller/unknown id resolves to it.
390
+ return "desktop-2b";
391
+ }
392
+
393
+ /**
394
+ * Decision returned by `assertVoiceBundleFitsHost`. Mirrors the shape of
395
+ * `RamFitDecision` in `ram-budget.ts` but at the bundle level.
396
+ */
397
+ export interface VoiceBundleFitDecision {
398
+ tierSlot: VoiceTierSlot;
399
+ deviceTier: DeviceTier;
400
+ /** Steady-state weights + KV, MB. */
401
+ steadyStateMb: number;
402
+ /** Steady-state + transient TTS peak, MB. */
403
+ peakMb: number;
404
+ /** RAM available to the bundle (host MB - OS reserve). */
405
+ usableMb: number;
406
+ /** True iff `peakMb <= usableMb` AND `steadyStateMb <= usableMb`. */
407
+ fits: boolean;
408
+ /** "fits" when peak fits, "tight" when only steady-state fits, "wontfit"
409
+ * when not even steady-state fits. */
410
+ level: "fits" | "tight" | "wontfit";
411
+ }
412
+
413
+ /** Default OS reserve subtracted from the host before the bundle check. */
414
+ export const DEFAULT_VOICE_BUNDLE_RESERVE_MB = 1536;
415
+
416
+ /**
417
+ * Decide whether the whole voice ensemble fits a host. Used by the runtime
418
+ * at voice-session-start to refuse local-voice entry rather than start it
419
+ * and watch `MemoryMonitor` evict the loaders mid-session.
420
+ *
421
+ * `assertVoiceBundleFitsHost` (in `active-model.ts`) wraps this with a
422
+ * typed error. This function returns the raw decision so callers that want
423
+ * to degrade silently can do so. R9 §1.4 spec.
424
+ */
425
+ export function assessVoiceBundleFits(args: {
426
+ tierSlot: VoiceTierSlot;
427
+ deviceTier: DeviceTier;
428
+ hostRamMb: number;
429
+ reserveMb?: number;
430
+ }): VoiceBundleFitDecision {
431
+ const reserveMb = args.reserveMb ?? DEFAULT_VOICE_BUNDLE_RESERVE_MB;
432
+ const usableMb = Math.max(0, args.hostRamMb - reserveMb);
433
+ const ensemble = VOICE_ENSEMBLE_BUDGETS[args.tierSlot];
434
+ const steadyStateMb = ensemble.steadyStateMb;
435
+ const peakMb = ensemble.peakMb;
436
+ let level: VoiceBundleFitDecision["level"];
437
+ if (usableMb >= peakMb) level = "fits";
438
+ else if (usableMb >= steadyStateMb) level = "tight";
439
+ else level = "wontfit";
440
+ return {
441
+ tierSlot: args.tierSlot,
442
+ deviceTier: args.deviceTier,
443
+ steadyStateMb,
444
+ peakMb,
445
+ usableMb,
446
+ fits: level !== "wontfit",
447
+ level,
448
+ };
449
+ }
450
+
451
+ interface InternalReservation {
452
+ id: string;
453
+ role: ResidentModelRole;
454
+ bytes: number;
455
+ priority: AllocationPriority;
456
+ priorityRank: number;
457
+ released: boolean;
458
+ }
459
+
460
+ class VoiceBudgetImpl implements VoiceBudget {
461
+ private readonly _totalBytes: number;
462
+ private readonly _assessment: DeviceTierAssessment;
463
+ private readonly _reservations = new Map<string, InternalReservation>();
464
+ private _usedBytes = 0;
465
+
466
+ constructor(args: {
467
+ totalBytes: number;
468
+ assessment: DeviceTierAssessment;
469
+ }) {
470
+ this._totalBytes = args.totalBytes;
471
+ this._assessment = args.assessment;
472
+ }
473
+
474
+ freeBytes(): number {
475
+ return Math.max(0, this._totalBytes - this._usedBytes);
476
+ }
477
+
478
+ totalBytes(): number {
479
+ return this._totalBytes;
480
+ }
481
+
482
+ tier(): DeviceTier {
483
+ return this._assessment.tier;
484
+ }
485
+
486
+ assessment(): DeviceTierAssessment {
487
+ return this._assessment;
488
+ }
489
+
490
+ snapshot(): ReadonlyArray<ReservationSnapshot> {
491
+ return Array.from(this._reservations.values())
492
+ .filter((r) => !r.released)
493
+ .sort((a, b) => a.priorityRank - b.priorityRank)
494
+ .map(({ id, role, bytes, priority, priorityRank }) => ({
495
+ id,
496
+ role,
497
+ bytes,
498
+ priority,
499
+ priorityRank,
500
+ }));
501
+ }
502
+
503
+ async reserve(args: {
504
+ modelId: string;
505
+ role: ResidentModelRole;
506
+ bytes: number;
507
+ priority?: AllocationPriority;
508
+ evictHook?: (role: ResidentModelRole, id: string) => Promise<number>;
509
+ }): Promise<BudgetReservation> {
510
+ const priority = args.priority ?? priorityClassForRole(args.role);
511
+ const priorityRank = RESIDENT_ROLE_PRIORITY[args.role];
512
+ const requestedBytes = Math.max(0, Math.floor(args.bytes));
513
+ const requestedPriorityRank = priorityRank;
514
+
515
+ if (requestedBytes > this._totalBytes) {
516
+ throw new BudgetExhaustedError({
517
+ requestedBytes,
518
+ freeBytes: this.freeBytes(),
519
+ totalBytes: this._totalBytes,
520
+ role: args.role,
521
+ priority,
522
+ evictedRoles: [],
523
+ evictionCandidate: null,
524
+ });
525
+ }
526
+
527
+ const evictedRoles: ResidentModelRole[] = [];
528
+
529
+ // Walk evictable reservations in ascending priority (cheapest first)
530
+ // until enough memory fits. We only evict reservations with a STRICTLY
531
+ // LOWER priority rank than the request; equal or higher priority
532
+ // reservations stay put.
533
+ while (this.freeBytes() < requestedBytes) {
534
+ const candidate = this.lowestPriorityEvictableReservation(
535
+ requestedPriorityRank,
536
+ );
537
+ if (!candidate) {
538
+ throw new BudgetExhaustedError({
539
+ requestedBytes,
540
+ freeBytes: this.freeBytes(),
541
+ totalBytes: this._totalBytes,
542
+ role: args.role,
543
+ priority,
544
+ evictedRoles,
545
+ evictionCandidate: null,
546
+ });
547
+ }
548
+ if (args.evictHook) {
549
+ // Let the caller actually unload the weights. The hook returns the
550
+ // bytes it reclaimed; we still drop the accounting entry by the
551
+ // recorded `bytes` field — partial reclamation is treated as
552
+ // success (the loader, not the allocator, owns the side effect).
553
+ await args.evictHook(candidate.role, candidate.id);
554
+ }
555
+ candidate.released = true;
556
+ this._reservations.delete(candidate.id);
557
+ this._usedBytes = Math.max(0, this._usedBytes - candidate.bytes);
558
+ evictedRoles.push(candidate.role);
559
+ }
560
+
561
+ const id = `${args.modelId}#${args.role}#${Date.now().toString(36)}-${Math.random()
562
+ .toString(36)
563
+ .slice(2, 8)}`;
564
+ const entry: InternalReservation = {
565
+ id,
566
+ role: args.role,
567
+ bytes: requestedBytes,
568
+ priority,
569
+ priorityRank,
570
+ released: false,
571
+ };
572
+ this._reservations.set(id, entry);
573
+ this._usedBytes += requestedBytes;
574
+
575
+ const release = (): void => {
576
+ if (entry.released) return;
577
+ entry.released = true;
578
+ this._reservations.delete(id);
579
+ this._usedBytes = Math.max(0, this._usedBytes - entry.bytes);
580
+ };
581
+
582
+ return {
583
+ id,
584
+ role: entry.role,
585
+ bytes: entry.bytes,
586
+ priority: entry.priority,
587
+ priorityRank: entry.priorityRank,
588
+ release,
589
+ };
590
+ }
591
+
592
+ private lowestPriorityEvictableReservation(
593
+ requesterRank: number,
594
+ ): InternalReservation | null {
595
+ let cheapest: InternalReservation | null = null;
596
+ for (const entry of this._reservations.values()) {
597
+ if (entry.released) continue;
598
+ if (entry.priorityRank >= requesterRank) continue;
599
+ if (!cheapest || entry.priorityRank < cheapest.priorityRank) {
600
+ cheapest = entry;
601
+ }
602
+ }
603
+ return cheapest;
604
+ }
605
+ }
606
+
607
+ /** Public factory. */
608
+ export function createVoiceBudget(args: {
609
+ probe: HardwareProbe;
610
+ /** Optional user override for the budget cap, in MB. Default: tier
611
+ * natural total. Clamped to the device's effective model memory. */
612
+ maxRamMb?: number;
613
+ /** Optional pre-computed assessment (avoid double classification). */
614
+ assessment?: DeviceTierAssessment;
615
+ }): VoiceBudget {
616
+ const assessment = args.assessment ?? classifyDeviceTier(args.probe);
617
+ const naturalBytes = defaultTierBudgetBytes(args.probe, assessment.tier);
618
+ let totalBytes = naturalBytes;
619
+ if (typeof args.maxRamMb === "number" && args.maxRamMb > 0) {
620
+ const cap = Math.floor(args.maxRamMb * BYTES_PER_MB);
621
+ totalBytes = Math.min(naturalBytes, cap);
622
+ }
623
+ return new VoiceBudgetImpl({ totalBytes, assessment });
624
+ }
625
+
626
+ /** Test seam — construct a budget with explicit total bytes + assessment. */
627
+ export function createVoiceBudgetForTest(args: {
628
+ totalBytes: number;
629
+ assessment: DeviceTierAssessment;
630
+ }): VoiceBudget {
631
+ return new VoiceBudgetImpl({
632
+ totalBytes: args.totalBytes,
633
+ assessment: args.assessment,
634
+ });
635
+ }