@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Soft cloud-fallback wrapper for local-inference TEXT_LARGE / TEXT_SMALL.
3
+ *
4
+ * Why this exists: on mobile (AOSP / iOS) the local llama backend has very
5
+ * different failure modes from a desktop process. The model GGUF may not be
6
+ * staged yet, the FFI dlopen may have failed, the device may be in low-power
7
+ * mode and refuse to prefill, or the user may have explicitly disabled the
8
+ * local engine. We do not want any of those states to surface as a
9
+ * "No handler found for delegate type: TEXT_LARGE" runtime error — when an
10
+ * Anthropic / OpenAI / Eliza Cloud handler is also registered, the runtime
11
+ * should transparently fall through to cloud.
12
+ *
13
+ * Design constraints (per AGENTS.md):
14
+ * - No silent try/catch. The wrapper distinguishes "ran successfully" from
15
+ * "ran and decided to fallback" via an EXPLICIT typed return:
16
+ * { kind: "ok"; text: string }
17
+ * | { kind: "fallback"; reason: FallbackReason }
18
+ * Callers branch on `kind`. The wrapper does NOT swallow errors —
19
+ * any unhandled throw bubbles up to the runtime.
20
+ * - Local errors are CLASSIFIED. Unrecoverable bugs (programming errors,
21
+ * out-of-memory, OS kill signals) propagate. Recoverable conditions
22
+ * (model not staged, abort, downstream provider transient) trigger
23
+ * fallback.
24
+ * - Cloud forwarding is registry-driven. We look up the next-highest
25
+ * priority handler from the runtime's model registry rather than
26
+ * hardcoding "anthropic" or "openai". That keeps the wrapper neutral
27
+ * to which cloud is paired.
28
+ */
29
+
30
+ import type {
31
+ GenerateTextParams,
32
+ IAgentRuntime,
33
+ JsonValue,
34
+ ModelTypeName,
35
+ } from "@elizaos/core";
36
+
37
+ export type FallbackReason =
38
+ /** Local backend reported it can't serve this request at all (no model, FFI dlopen failed, etc). */
39
+ | "local-unavailable"
40
+ /** Local backend was busy, queued past a deadline, or refused (thermal, low-power). */
41
+ | "local-overloaded"
42
+ /** Local backend errored during prefill or decode. */
43
+ | "local-error"
44
+ /** Caller cancelled before local could finish; cloud may still serve. */
45
+ | "local-aborted-pre-completion"
46
+ /** Local handler isn't registered on this runtime build. */
47
+ | "local-not-registered";
48
+
49
+ export type LocalGenerateOutcome =
50
+ | { kind: "ok"; text: string }
51
+ | { kind: "fallback"; reason: FallbackReason; cause?: Error };
52
+
53
+ /**
54
+ * Classify a thrown error as a fallback-eligible failure or a hard bug that
55
+ * should propagate. The split is conservative: only well-known recoverable
56
+ * shapes flip to fallback; anything else bubbles up so the operator sees the
57
+ * real failure instead of a silent rotation to cloud.
58
+ */
59
+ export function classifyLocalError(err: unknown): {
60
+ fallback: boolean;
61
+ reason: FallbackReason;
62
+ } {
63
+ if (err instanceof Error) {
64
+ const name = err.name;
65
+ const msg = err.message.toLowerCase();
66
+ if (name === "AbortError") {
67
+ return { fallback: false, reason: "local-aborted-pre-completion" };
68
+ }
69
+ // KV-cache spill cannot meet the latency budget on this device — this is
70
+ // a deliberate hard-fail (packages/inference/AGENTS.md §3): the engine
71
+ // surfaces it to the UI as a structured error. There is no silent
72
+ // rotation to cloud and no "load anyway, slowly".
73
+ if (name === "KvSpillUnsupportedError") {
74
+ return { fallback: false, reason: "local-error" };
75
+ }
76
+ if (
77
+ msg.includes("no bundled") ||
78
+ msg.includes("not installed in this build") ||
79
+ msg.includes("node-llama-cpp is not installed") ||
80
+ msg.includes("no local model is active") ||
81
+ msg.includes("dlopen") ||
82
+ msg.includes("missing libllama")
83
+ ) {
84
+ return { fallback: true, reason: "local-unavailable" };
85
+ }
86
+ if (
87
+ msg.includes("decode: failed to find a memory slot") ||
88
+ msg.includes("thermal") ||
89
+ msg.includes("low-power")
90
+ ) {
91
+ return { fallback: true, reason: "local-overloaded" };
92
+ }
93
+ if (
94
+ msg.includes("llama_decode") ||
95
+ msg.includes("llama_tokenize") ||
96
+ msg.includes("llama_sampler") ||
97
+ msg.includes("ggml_assert")
98
+ ) {
99
+ return { fallback: true, reason: "local-error" };
100
+ }
101
+ }
102
+ return { fallback: false, reason: "local-error" };
103
+ }
104
+
105
+ /**
106
+ * Locate a cloud TEXT_* handler in the runtime's model registry that is NOT
107
+ * the supplied `localProvider`. The runtime stores handlers per-modelType
108
+ * sorted by priority; we walk the list and skip our own provider so we
109
+ * delegate to cloud instead of recursing into local.
110
+ */
111
+ export type RuntimeWithModelLookup = IAgentRuntime & {
112
+ models: Map<
113
+ string,
114
+ Array<{
115
+ provider: string;
116
+ priority: number;
117
+ handler: (
118
+ runtime: IAgentRuntime,
119
+ params: Record<string, JsonValue | object>,
120
+ ) => Promise<JsonValue | object>;
121
+ }>
122
+ >;
123
+ };
124
+
125
+ export interface CloudCandidate {
126
+ provider: string;
127
+ priority: number;
128
+ handler: (
129
+ runtime: IAgentRuntime,
130
+ params: Record<string, JsonValue | object>,
131
+ ) => Promise<JsonValue | object>;
132
+ }
133
+
134
+ export function findCloudCandidate(
135
+ runtime: IAgentRuntime,
136
+ modelType: ModelTypeName | string,
137
+ excludeProvider: string,
138
+ ): CloudCandidate | null {
139
+ const r = runtime as RuntimeWithModelLookup;
140
+ const entries = r.models.get(String(modelType));
141
+ if (!entries || entries.length === 0) return null;
142
+ // Sorted highest priority first by the runtime's registration. We want
143
+ // the FIRST non-local provider; that's our cloud candidate.
144
+ for (const entry of entries) {
145
+ if (entry.provider !== excludeProvider) {
146
+ return {
147
+ provider: entry.provider,
148
+ priority: entry.priority,
149
+ handler: entry.handler,
150
+ };
151
+ }
152
+ }
153
+ return null;
154
+ }
155
+
156
+ export interface CloudFallbackOptions {
157
+ /** Provider id of the local handler being wrapped (e.g. "eliza-aosp-llama"). */
158
+ localProvider: string;
159
+ /** Model type this wrapper services (TEXT_LARGE, TEXT_SMALL, etc). */
160
+ modelType: ModelTypeName | string;
161
+ /**
162
+ * The local handler we wrap. Returns `{ kind: "ok" }` on success;
163
+ * `{ kind: "fallback", reason }` to delegate to cloud.
164
+ */
165
+ localGenerate: (
166
+ runtime: IAgentRuntime,
167
+ params: GenerateTextParams,
168
+ ) => Promise<LocalGenerateOutcome>;
169
+ /** Optional logger; defaults to `console`-style no-op so we stay framework-free. */
170
+ log?: (message: string, detail?: Record<string, unknown>) => void;
171
+ }
172
+
173
+ /**
174
+ * Build a registered-handler-shape function that:
175
+ * 1. Calls `localGenerate`.
176
+ * 2. If `localGenerate` returns `{ kind: "ok" }`, returns that text.
177
+ * 3. If it returns `{ kind: "fallback" }`, looks up the next-best cloud
178
+ * handler for the same modelType and forwards to it. If no cloud
179
+ * handler exists, throws a typed error with the fallback reason.
180
+ *
181
+ * The returned function is suitable for `runtime.registerModel`.
182
+ */
183
+ export function makeCloudFallbackHandler(
184
+ opts: CloudFallbackOptions,
185
+ ): (
186
+ runtime: IAgentRuntime,
187
+ params: Record<string, JsonValue | object>,
188
+ ) => Promise<string> {
189
+ const log = opts.log ?? (() => undefined);
190
+ return async (runtime, params) => {
191
+ const generateParams = params as unknown as GenerateTextParams;
192
+ const local = await opts.localGenerate(runtime, generateParams);
193
+ if (local.kind === "ok") {
194
+ return local.text;
195
+ }
196
+ log(
197
+ `[cloud-fallback] local handler returned fallback (reason=${local.reason})`,
198
+ { modelType: String(opts.modelType), reason: local.reason },
199
+ );
200
+ const candidate = findCloudCandidate(
201
+ runtime,
202
+ opts.modelType,
203
+ opts.localProvider,
204
+ );
205
+ if (!candidate) {
206
+ const err = new Error(
207
+ `[cloud-fallback] Local inference reported ${local.reason} and no cloud handler is registered for ${String(opts.modelType)}. Pair Eliza Cloud or install a provider plugin (anthropic/openai) to enable fallback.`,
208
+ );
209
+ if (local.cause) {
210
+ (err as Error & { cause?: unknown }).cause = local.cause;
211
+ }
212
+ throw err;
213
+ }
214
+ log(
215
+ `[cloud-fallback] forwarding to ${candidate.provider} @ priority ${candidate.priority}`,
216
+ {
217
+ modelType: String(opts.modelType),
218
+ provider: candidate.provider,
219
+ reason: local.reason,
220
+ },
221
+ );
222
+ const result = await candidate.handler(runtime, params);
223
+ if (typeof result !== "string") {
224
+ throw new Error(
225
+ `[cloud-fallback] Cloud handler ${candidate.provider} returned non-string result for ${String(opts.modelType)}.`,
226
+ );
227
+ }
228
+ return result;
229
+ };
230
+ }
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Conversation registry for the local-inference path.
3
+ *
4
+ * Today's slot allocation is purely a hash function: `deriveSlotId` maps a
5
+ * `promptCacheKey` (or any stable string) to `slot_id` in `[0, parallel)`.
6
+ * That works for one-shot calls but breaks for long agentic loops:
7
+ *
8
+ * - Two distinct conversations whose cache keys hash to the same slot
9
+ * evict each other's KV every turn (slot thrashing).
10
+ * - The current high-water mark of concurrent conversations is invisible,
11
+ * so `--parallel N` cannot be tuned to fit.
12
+ * - There is no notion of an explicit "I am still using this slot" lease,
13
+ * so eviction is purely best-effort.
14
+ *
15
+ * This registry keeps a per-conversation reservation. `openConversation`
16
+ * picks the lowest-loaded slot and pins the conversation to it; subsequent
17
+ * `generateInConversation` calls always land on the same slot. When the
18
+ * pool is full, slot reuse falls back to the same-as-before hash policy
19
+ * (two leases on the same slot still serialise correctly via the dispatcher's
20
+ * generation queue).
21
+ *
22
+ * The registry tracks the high-water mark of concurrently-open conversations
23
+ * so the engine can warn, or later restart llama-server with a higher
24
+ * --parallel, when the load outgrows the configured slot count.
25
+ */
26
+ /**
27
+ * Opaque handle returned by `openConversation`. Callers MUST treat this as
28
+ * opaque — the registry owns the slot id and lifetime.
29
+ */
30
+ export interface ConversationHandle {
31
+ readonly conversationId: string;
32
+ readonly modelId: string;
33
+ /**
34
+ * Pinned slot id in `[0, parallel)`, or `-1` when slot pinning is disabled
35
+ * (parallel <= 0). Used by both backends as the cache key:
36
+ * - llama-server: forwarded as `slot_id` in the request payload.
37
+ * - node-llama-cpp: combined with the conversation id to derive the
38
+ * session-pool key so identical conversations share a session.
39
+ */
40
+ readonly slotId: number;
41
+ /** Wall-clock ms when the handle was opened. */
42
+ readonly openedAtMs: number;
43
+ /** Wall-clock ms when the handle was last touched (open or generate). */
44
+ lastUsedMs: number;
45
+ /** TTL after which the registry MAY auto-close on the next sweep. */
46
+ readonly ttlMs: number;
47
+ /** True when `closeConversation` has been called; further use is rejected. */
48
+ closed: boolean;
49
+ }
50
+ export interface OpenConversationArgs {
51
+ conversationId: string;
52
+ modelId: string;
53
+ /** Slot count from the running server (`--parallel N`). Defaults to 1. */
54
+ parallel?: number;
55
+ /**
56
+ * TTL after which the handle is considered idle and may be auto-closed
57
+ * by `evictIdle`. Defaults to 60 minutes — long enough for an LLM call
58
+ * to finish even on a slow drafter, short enough to recover from forgotten
59
+ * close calls within the long-cache window.
60
+ */
61
+ ttlMs?: number;
62
+ }
63
+ /**
64
+ * In-memory registry of open conversation handles. A single instance is
65
+ * shared by the engine; each backend reads from it on every generate to
66
+ * decide which slot to pin to.
67
+ */
68
+ export declare class ConversationRegistry {
69
+ private readonly handles;
70
+ /** Per-slot reference count; lowest-loaded slot wins on next open. */
71
+ private readonly slotLoad;
72
+ /** Largest concurrent open count seen; the engine reads this for parallel auto-tune. */
73
+ private highWaterMark;
74
+ /**
75
+ * Lookup / open a conversation handle. Idempotent for the same
76
+ * conversation id + model id; callers can call this on every turn
77
+ * without leaking handles. When the call is reusing an existing handle,
78
+ * `lastUsedMs` is bumped for LRU-style eviction tracking.
79
+ */
80
+ open(args: OpenConversationArgs): ConversationHandle;
81
+ /**
82
+ * Lookup an open handle by conversation+model. Returns null when the
83
+ * conversation has not been opened or has already been closed. Bumps
84
+ * `lastUsedMs` so an LRU sweep treats reads as activity.
85
+ */
86
+ get(conversationId: string, modelId: string): ConversationHandle | null;
87
+ /**
88
+ * Close + drop a handle. Idempotent — closing an unknown / already-closed
89
+ * handle has no additional effect, so callers can call this from cleanup paths
90
+ * unconditionally.
91
+ */
92
+ close(conversationId: string, modelId: string): void;
93
+ /**
94
+ * Sweep handles whose `lastUsedMs` is older than their TTL. Returns the
95
+ * conversation ids dropped so callers can persist final KV state to
96
+ * disk, etc. Safe to call on a timer.
97
+ */
98
+ evictIdle(now?: number): string[];
99
+ /**
100
+ * Snapshot every currently-open handle. Used by the shutdown path to
101
+ * emit a save-state request per slot.
102
+ */
103
+ snapshot(): readonly ConversationHandle[];
104
+ /** Largest concurrent open count seen since the registry was created. */
105
+ highWater(): number;
106
+ /** Number of currently-open handles. */
107
+ size(): number;
108
+ /**
109
+ * Recommended `--parallel` slot count given the observed high-water mark
110
+ * of concurrently-open conversations plus a small headroom (max(2, 25%)).
111
+ * The engine's auto-tune (J4) compares this against the running server's
112
+ * slot count: when this is larger AND there's RAM headroom, it restarts
113
+ * llama-server with the higher value so new conversations get their own
114
+ * KV slots instead of thrashing.
115
+ *
116
+ * `running` is the currently-configured slot count; when the high-water
117
+ * mark hasn't outgrown it, this returns `running` (no resize needed) so
118
+ * callers can compare against equality without a second branch.
119
+ */
120
+ recommendedParallel(running: number): number;
121
+ /**
122
+ * Drop every handle and reset the high-water mark + slot-load bookkeeping.
123
+ * Test-only — the module singleton leaks state across files when the suite
124
+ * runs together; call this in `beforeEach` to isolate. Not part of the
125
+ * runtime contract.
126
+ */
127
+ __resetForTests(): void;
128
+ /**
129
+ * Pick the slot with the fewest in-flight handles. Ties are broken by a
130
+ * deterministic hash of the conversation id, which avoids consistently
131
+ * loading slot 0 when N concurrent opens race.
132
+ */
133
+ private pickLowestLoadedSlot;
134
+ private compositeKey;
135
+ }
136
+ /**
137
+ * Module-singleton registry. The engine reads this on every generate; the
138
+ * conversation lifecycle API (`openConversation`, `closeConversation`)
139
+ * mutates it.
140
+ */
141
+ export declare const conversationRegistry: ConversationRegistry;
142
+ //# sourceMappingURL=conversation-registry.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"conversation-registry.d.ts","sourceRoot":"","sources":["conversation-registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAIH;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IAClC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB;;;;;;OAMG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,gDAAgD;IAChD,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,yEAAyE;IACzE,UAAU,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,8EAA8E;IAC9E,MAAM,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,oBAAoB;IACpC,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,0EAA0E;IAC1E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;CACf;AAID;;;;GAIG;AACH,qBAAa,oBAAoB;IAChC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAyC;IACjE,sEAAsE;IACtE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA6B;IACtD,wFAAwF;IACxF,OAAO,CAAC,aAAa,CAAK;IAE1B;;;;;OAKG;IACH,IAAI,CAAC,IAAI,EAAE,oBAAoB,GAAG,kBAAkB;IAqCpD;;;;OAIG;IACH,GAAG,CAAC,cAAc,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI;IAOvE;;;;OAIG;IACH,KAAK,CAAC,cAAc,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,IAAI;IAcpD;;;;OAIG;IACH,SAAS,CAAC,GAAG,GAAE,MAAmB,GAAG,MAAM,EAAE;IAkB7C;;;OAGG;IACH,QAAQ,IAAI,SAAS,kBAAkB,EAAE;IAIzC,yEAAyE;IACzE,SAAS,IAAI,MAAM;IAInB,wCAAwC;IACxC,IAAI,IAAI,MAAM;IAId;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM;IAM5C;;;;;OAKG;IACH,eAAe,IAAI,IAAI;IAOvB;;;;OAIG;IACH,OAAO,CAAC,oBAAoB;IAqB5B,OAAO,CAAC,YAAY;CAGpB;AAED;;;;GAIG;AACH,eAAO,MAAM,oBAAoB,sBAA6B,CAAC"}
@@ -0,0 +1,235 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ ConversationRegistry,
4
+ conversationRegistry,
5
+ } from "./conversation-registry";
6
+
7
+ describe("ConversationRegistry.open", () => {
8
+ it("returns the same handle for repeated opens of the same conversation", () => {
9
+ const registry = new ConversationRegistry();
10
+ const a = registry.open({
11
+ conversationId: "room-1",
12
+ modelId: "eliza-1-9b",
13
+ parallel: 4,
14
+ });
15
+ const b = registry.open({
16
+ conversationId: "room-1",
17
+ modelId: "eliza-1-9b",
18
+ parallel: 4,
19
+ });
20
+ expect(b).toBe(a);
21
+ expect(registry.size()).toBe(1);
22
+ });
23
+
24
+ it("treats different model ids as distinct handles", () => {
25
+ const registry = new ConversationRegistry();
26
+ const a = registry.open({
27
+ conversationId: "room-1",
28
+ modelId: "model-a",
29
+ parallel: 4,
30
+ });
31
+ const b = registry.open({
32
+ conversationId: "room-1",
33
+ modelId: "model-b",
34
+ parallel: 4,
35
+ });
36
+ expect(b).not.toBe(a);
37
+ expect(registry.size()).toBe(2);
38
+ });
39
+
40
+ it("requires non-empty conversationId and modelId", () => {
41
+ const registry = new ConversationRegistry();
42
+ expect(() => registry.open({ conversationId: "", modelId: "m" })).toThrow();
43
+ expect(() => registry.open({ conversationId: "c", modelId: "" })).toThrow();
44
+ });
45
+
46
+ it("pins the handle to slot 0 when parallel <= 1", () => {
47
+ const registry = new ConversationRegistry();
48
+ const handle = registry.open({
49
+ conversationId: "x",
50
+ modelId: "m",
51
+ parallel: 1,
52
+ });
53
+ expect(handle.slotId).toBe(0);
54
+ });
55
+
56
+ it("spreads concurrent opens across slots, lowest-loaded first", () => {
57
+ const registry = new ConversationRegistry();
58
+ const slots = new Set<number>();
59
+ for (let i = 0; i < 4; i += 1) {
60
+ const handle = registry.open({
61
+ conversationId: `room-${i}`,
62
+ modelId: "m",
63
+ parallel: 4,
64
+ });
65
+ slots.add(handle.slotId);
66
+ }
67
+ expect(slots.size).toBe(4);
68
+ });
69
+ });
70
+
71
+ describe("ConversationRegistry.close", () => {
72
+ it("frees the slot and is idempotent", () => {
73
+ const registry = new ConversationRegistry();
74
+ const handle = registry.open({
75
+ conversationId: "x",
76
+ modelId: "m",
77
+ parallel: 4,
78
+ });
79
+ expect(handle.closed).toBe(false);
80
+ registry.close("x", "m");
81
+ registry.close("x", "m"); // idempotent — must not throw
82
+ expect(registry.get("x", "m")).toBeNull();
83
+ });
84
+
85
+ it("frees a slot for reuse on next open", () => {
86
+ const registry = new ConversationRegistry();
87
+ const a = registry.open({
88
+ conversationId: "a",
89
+ modelId: "m",
90
+ parallel: 2,
91
+ });
92
+ const b = registry.open({
93
+ conversationId: "b",
94
+ modelId: "m",
95
+ parallel: 2,
96
+ });
97
+ expect(a.slotId).not.toBe(b.slotId);
98
+ registry.close("a", "m");
99
+ const c = registry.open({
100
+ conversationId: "c",
101
+ modelId: "m",
102
+ parallel: 2,
103
+ });
104
+ // c should land on the freed slot (a's slot)
105
+ expect(c.slotId).toBe(a.slotId);
106
+ });
107
+ });
108
+
109
+ describe("ConversationRegistry.get", () => {
110
+ it("returns null for unknown or closed handles", () => {
111
+ const registry = new ConversationRegistry();
112
+ expect(registry.get("nope", "m")).toBeNull();
113
+ registry.open({ conversationId: "x", modelId: "m", parallel: 4 });
114
+ registry.close("x", "m");
115
+ expect(registry.get("x", "m")).toBeNull();
116
+ });
117
+ });
118
+
119
+ describe("ConversationRegistry.evictIdle", () => {
120
+ it("drops handles whose ttl has elapsed", () => {
121
+ const registry = new ConversationRegistry();
122
+ registry.open({
123
+ conversationId: "x",
124
+ modelId: "m",
125
+ parallel: 4,
126
+ ttlMs: 1_000,
127
+ });
128
+ expect(registry.size()).toBe(1);
129
+ const dropped = registry.evictIdle(Date.now() + 5_000);
130
+ expect(dropped).toEqual(["x"]);
131
+ expect(registry.size()).toBe(0);
132
+ });
133
+
134
+ it("keeps handles whose ttl has NOT elapsed", () => {
135
+ const registry = new ConversationRegistry();
136
+ registry.open({
137
+ conversationId: "x",
138
+ modelId: "m",
139
+ parallel: 4,
140
+ ttlMs: 60_000,
141
+ });
142
+ const dropped = registry.evictIdle(Date.now() + 10_000);
143
+ expect(dropped).toEqual([]);
144
+ expect(registry.size()).toBe(1);
145
+ });
146
+ });
147
+
148
+ describe("ConversationRegistry.highWater", () => {
149
+ it("tracks the largest concurrent open count", () => {
150
+ const registry = new ConversationRegistry();
151
+ expect(registry.highWater()).toBe(0);
152
+ registry.open({ conversationId: "a", modelId: "m", parallel: 8 });
153
+ registry.open({ conversationId: "b", modelId: "m", parallel: 8 });
154
+ registry.open({ conversationId: "c", modelId: "m", parallel: 8 });
155
+ expect(registry.highWater()).toBe(3);
156
+ registry.close("a", "m");
157
+ registry.close("b", "m");
158
+ // High-water mark must NOT decrease — it's a max over the lifetime
159
+ expect(registry.highWater()).toBe(3);
160
+ });
161
+ });
162
+
163
+ describe("ConversationRegistry.recommendedParallel (--parallel auto-resize decision)", () => {
164
+ it("returns the running count when the high-water mark hasn't outgrown it", () => {
165
+ const registry = new ConversationRegistry();
166
+ // 2 concurrent, headroom max(2, ceil(2*0.25)=1) = 2 → desired 4.
167
+ registry.open({ conversationId: "a", modelId: "m", parallel: 4 });
168
+ registry.open({ conversationId: "b", modelId: "m", parallel: 4 });
169
+ expect(registry.highWater()).toBe(2);
170
+ expect(registry.recommendedParallel(4)).toBe(4); // 4 already covers it
171
+ expect(registry.recommendedParallel(8)).toBe(8); // larger running wins
172
+ });
173
+
174
+ it("recommends high-water + 25%-headroom when it exceeds the running count", () => {
175
+ const registry = new ConversationRegistry();
176
+ for (let i = 0; i < 20; i += 1) {
177
+ registry.open({ conversationId: `c-${i}`, modelId: "m", parallel: 4 });
178
+ }
179
+ expect(registry.highWater()).toBe(20);
180
+ // 20 + max(2, ceil(20*0.25)=5) = 25.
181
+ expect(registry.recommendedParallel(4)).toBe(25);
182
+ });
183
+
184
+ it("headroom floors at 2 (small high-water marks still get a buffer)", () => {
185
+ const registry = new ConversationRegistry();
186
+ for (let i = 0; i < 5; i += 1) {
187
+ registry.open({ conversationId: `c-${i}`, modelId: "m", parallel: 2 });
188
+ }
189
+ expect(registry.highWater()).toBe(5);
190
+ // ceil(5*0.25) = 2 → headroom 2 → desired 7.
191
+ expect(registry.recommendedParallel(2)).toBe(7);
192
+ });
193
+
194
+ it("is monotonic: closing conversations does not shrink the recommendation", () => {
195
+ const registry = new ConversationRegistry();
196
+ const handles = Array.from({ length: 10 }, (_, i) =>
197
+ registry.open({ conversationId: `c-${i}`, modelId: "m", parallel: 4 }),
198
+ );
199
+ expect(registry.recommendedParallel(4)).toBe(13); // 10 + ceil(10*.25)=3
200
+ for (const h of handles) registry.close(h.conversationId, h.modelId);
201
+ expect(registry.size()).toBe(0);
202
+ expect(registry.recommendedParallel(4)).toBe(13); // unchanged
203
+ });
204
+ });
205
+
206
+ describe("ConversationRegistry.__resetForTests", () => {
207
+ it("drops every handle and resets the high-water mark", () => {
208
+ const registry = new ConversationRegistry();
209
+ const a = registry.open({ conversationId: "a", modelId: "m", parallel: 4 });
210
+ registry.open({ conversationId: "b", modelId: "m", parallel: 4 });
211
+ expect(registry.size()).toBe(2);
212
+ expect(registry.highWater()).toBe(2);
213
+ registry.__resetForTests();
214
+ expect(registry.size()).toBe(0);
215
+ expect(registry.highWater()).toBe(0);
216
+ expect(registry.recommendedParallel(4)).toBe(4);
217
+ // The dropped handle is marked closed (further use is rejected by the engine).
218
+ expect(a.closed).toBe(true);
219
+ // A slot freed by reset is reusable from slot 0 again.
220
+ const handle = registry.open({
221
+ conversationId: "c",
222
+ modelId: "m",
223
+ parallel: 4,
224
+ });
225
+ expect(handle.slotId).toBe(0);
226
+ });
227
+
228
+ it("isolates the module singleton across test files", () => {
229
+ conversationRegistry.__resetForTests();
230
+ conversationRegistry.open({ conversationId: "leak", modelId: "m" });
231
+ expect(conversationRegistry.size()).toBe(1);
232
+ conversationRegistry.__resetForTests();
233
+ expect(conversationRegistry.size()).toBe(0);
234
+ });
235
+ });