@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,423 @@
1
+ /**
2
+ * Cache bridge for the local-inference path.
3
+ *
4
+ * Translates the runtime's `ProviderCachePlan` (a provider-neutral cache
5
+ * plan emitted by `@elizaos/core`'s `buildProviderCachePlan`) into
6
+ * concrete behaviour for the two local backends:
7
+ *
8
+ * 1. Out-of-process llama-server (MTP / buun-llama-cpp): stable
9
+ * slot-id derivation + on-disk slot KV save/restore directory layout
10
+ * + TTL-based eviction by mtime.
11
+ * 2. In-process node-llama-cpp: a session pool (see
12
+ * `session-pool.ts`) keyed by `promptCacheKey`.
13
+ *
14
+ * This module is pure logic — no llama-server process management, no
15
+ * node-llama-cpp imports. All filesystem state is rooted under
16
+ * `local-inference/llama-cache/` so cleanup is easy and explicit.
17
+ */
18
+
19
+ import { createHash } from "node:crypto";
20
+ import fs from "node:fs/promises";
21
+ import path from "node:path";
22
+ import { localInferenceRoot } from "./paths";
23
+
24
+ /**
25
+ * TTLs for cached prefix data, mirroring the cloud-side semantics:
26
+ * - `short`: roughly the "default" Anthropic ephemeral cache window.
27
+ * - `long`: roughly the "1h" Anthropic ephemeral cache window.
28
+ * - `extended`: the OpenAI 24h prompt-cache retention window.
29
+ *
30
+ * Values are in milliseconds. Eviction uses file mtime, not access time,
31
+ * so a slot that is read repeatedly without being rewritten still ages
32
+ * out — which matches how llama-server writes the slot file each turn.
33
+ */
34
+ export interface CacheTtls {
35
+ short: number;
36
+ long: number;
37
+ extended?: number;
38
+ }
39
+
40
+ export const DEFAULT_CACHE_TTLS: CacheTtls = {
41
+ short: 5 * 60 * 1000,
42
+ long: 60 * 60 * 1000,
43
+ extended: 24 * 60 * 60 * 1000,
44
+ };
45
+
46
+ /**
47
+ * Root directory for all local llama-cache state. Anything inside is
48
+ * Eliza-owned and safe to delete to reset the cache.
49
+ */
50
+ export function llamaCacheRoot(): string {
51
+ return path.join(localInferenceRoot(), "llama-cache");
52
+ }
53
+
54
+ /**
55
+ * Per-model-hash cache directory. Slot save files for one model never
56
+ * collide with another model's; switching active model does not need to
57
+ * wipe the cache.
58
+ */
59
+ export function cacheRoot(modelHash: string): string {
60
+ if (!modelHash) {
61
+ throw new Error("[cache-bridge] cacheRoot requires a non-empty modelHash");
62
+ }
63
+ return path.join(llamaCacheRoot(), modelHash);
64
+ }
65
+
66
+ /**
67
+ * llama-server `--slot-save-path` argument: the directory llama-server
68
+ * writes per-slot KV state into when a request includes
69
+ * `cache_prompt: true`. One directory per model hash.
70
+ */
71
+ export function slotSavePath(modelHash: string): string {
72
+ return cacheRoot(modelHash);
73
+ }
74
+
75
+ /**
76
+ * Stable model-fingerprint hash. Combines the absolute paths of the
77
+ * target / drafter GGUFs and the cache-type knobs so two distinct
78
+ * configurations don't share a slot directory.
79
+ */
80
+ export function buildModelHash(input: {
81
+ targetModelPath: string;
82
+ drafterModelPath?: string | null;
83
+ cacheTypeK?: string | null;
84
+ cacheTypeV?: string | null;
85
+ /** Optional extra discriminator (context size, parallel, etc.). */
86
+ extra?: string | null;
87
+ }): string {
88
+ const hash = createHash("sha256");
89
+ hash.update(input.targetModelPath);
90
+ hash.update("");
91
+ hash.update(input.drafterModelPath ?? "");
92
+ hash.update("");
93
+ hash.update(input.cacheTypeK ?? "");
94
+ hash.update("");
95
+ hash.update(input.cacheTypeV ?? "");
96
+ hash.update("");
97
+ hash.update(input.extra ?? "");
98
+ return hash.digest("hex").slice(0, 16);
99
+ }
100
+
101
+ /**
102
+ * Map a `promptCacheKey` to a llama-server slot id in [0, parallel).
103
+ *
104
+ * llama-server's `--parallel N` flag pre-allocates N decoding slots and
105
+ * accepts a `slot_id` integer in `[0, N-1]` on each request. By hashing
106
+ * the cache key into that range we get:
107
+ *
108
+ * - The same prefix hash always lands on the same slot, so the in-RAM
109
+ * KV cache from the previous turn is reused.
110
+ * - Different prefix hashes spread across slots and don't fight for
111
+ * the same KV memory.
112
+ *
113
+ * Pass `parallel <= 0` to disable slot pinning (returns -1, the
114
+ * llama-server "any free slot" sentinel).
115
+ */
116
+ export function deriveSlotId(promptCacheKey: string, parallel: number): number {
117
+ if (!Number.isFinite(parallel) || parallel <= 0) return -1;
118
+ if (!promptCacheKey) return -1;
119
+ const integerParallel = Math.max(1, Math.floor(parallel));
120
+ if (integerParallel === 1) return 0;
121
+ const digest = createHash("sha256").update(promptCacheKey).digest();
122
+ // Read first 4 bytes as an unsigned big-endian int. Plenty of entropy
123
+ // for parallel ≤ 64.
124
+ const value = digest.readUInt32BE(0);
125
+ return value % integerParallel;
126
+ }
127
+
128
+ /**
129
+ * Convert the runtime-side `CacheTTL` enum + OpenAI extended retention
130
+ * hint into a concrete TTL in milliseconds. This is what the eviction
131
+ * sweep uses when deciding whether a slot file is still live.
132
+ */
133
+ export function ttlMsForKey(
134
+ ttl: "short" | "long" | "extended" | undefined,
135
+ ttls: CacheTtls = DEFAULT_CACHE_TTLS,
136
+ ): number {
137
+ if (ttl === "long") return ttls.long;
138
+ if (ttl === "extended") return ttls.extended ?? ttls.long;
139
+ return ttls.short;
140
+ }
141
+
142
+ /** TTL classes that can be encoded into a slot `.bin` filename. */
143
+ export type SlotCacheTtlClass = "short" | "long" | "extended";
144
+
145
+ /**
146
+ * Build the basename for a persisted slot/conversation `.bin` file with
147
+ * its TTL class encoded as a middle component: `<base>.<ttl>.bin`. The
148
+ * eviction sweep reads that component back via `parseSlotCacheTtlClass`
149
+ * so a slot persisted with the long retention window isn't deleted on
150
+ * the short horizon (and vice versa). Pass `"long"` for cross-restart
151
+ * conversation KV — that matches the prior global (long-only) behaviour
152
+ * for those files.
153
+ */
154
+ export function slotCacheFileName(
155
+ base: string,
156
+ ttl: SlotCacheTtlClass,
157
+ ): string {
158
+ return `${base}.${ttl}.bin`;
159
+ }
160
+
161
+ /**
162
+ * Parse the TTL class encoded into a slot `.bin` filename by
163
+ * `slotCacheFileName`. Returns `undefined` for legacy / hand-written
164
+ * filenames without an encoded class — those keep the `long` horizon
165
+ * (the prior global behaviour for persisted slot files).
166
+ */
167
+ export function parseSlotCacheTtlClass(
168
+ fileName: string,
169
+ ): SlotCacheTtlClass | undefined {
170
+ // `<base>.<ttl>.bin` — the penultimate dot-component is the class.
171
+ const withoutBin = fileName.endsWith(".bin")
172
+ ? fileName.slice(0, -".bin".length)
173
+ : fileName;
174
+ const lastDot = withoutBin.lastIndexOf(".");
175
+ if (lastDot < 0) return undefined;
176
+ const candidate = withoutBin.slice(lastDot + 1);
177
+ if (
178
+ candidate === "short" ||
179
+ candidate === "long" ||
180
+ candidate === "extended"
181
+ ) {
182
+ return candidate;
183
+ }
184
+ return undefined;
185
+ }
186
+
187
+ /**
188
+ * Sweep the slot-save directory and delete files older than their
189
+ * per-file TTL horizon. The TTL class is read from the filename
190
+ * (`<base>.<ttl>.bin` — see `slotCacheFileName`); files without an
191
+ * encoded class use the `long` horizon (the prior global behaviour).
192
+ * Mtime is the watermark; llama-server rewrites the slot file on every
193
+ * save, so a slot that's actively used keeps a fresh mtime.
194
+ *
195
+ * Returns the number of files deleted. Missing directories are not
196
+ * errors — eviction on a clean install just no-ops.
197
+ */
198
+ export async function evictExpired(
199
+ rootDir: string,
200
+ ttls: CacheTtls = DEFAULT_CACHE_TTLS,
201
+ now: number = Date.now(),
202
+ ): Promise<number> {
203
+ let entries: string[];
204
+ try {
205
+ entries = await fs.readdir(rootDir);
206
+ } catch (err) {
207
+ if ((err as NodeJS.ErrnoException).code === "ENOENT") return 0;
208
+ throw err;
209
+ }
210
+ let deleted = 0;
211
+ for (const entry of entries) {
212
+ const full = path.join(rootDir, entry);
213
+ let stat: Awaited<ReturnType<typeof fs.stat>>;
214
+ try {
215
+ stat = await fs.stat(full);
216
+ } catch {
217
+ continue;
218
+ }
219
+ if (!stat.isFile()) continue;
220
+ const ttlClass = parseSlotCacheTtlClass(entry) ?? "long";
221
+ const horizon = ttlMsForKey(ttlClass, ttls);
222
+ if (now - stat.mtimeMs > horizon) {
223
+ try {
224
+ await fs.unlink(full);
225
+ deleted += 1;
226
+ } catch {
227
+ // Best-effort cleanup; another process may already have removed it.
228
+ }
229
+ }
230
+ }
231
+ return deleted;
232
+ }
233
+
234
+ export interface CacheStatsEntry {
235
+ file: string;
236
+ sizeBytes: number;
237
+ mtimeMs: number;
238
+ ageMs: number;
239
+ }
240
+
241
+ /**
242
+ * Snapshot of the on-disk slot-save directory. Used by the public
243
+ * `getLocalCacheStats()` debugging endpoint.
244
+ */
245
+ export async function readCacheStats(
246
+ rootDir: string,
247
+ now: number = Date.now(),
248
+ ): Promise<CacheStatsEntry[]> {
249
+ let entries: string[];
250
+ try {
251
+ entries = await fs.readdir(rootDir);
252
+ } catch (err) {
253
+ if ((err as NodeJS.ErrnoException).code === "ENOENT") return [];
254
+ throw err;
255
+ }
256
+ const out: CacheStatsEntry[] = [];
257
+ for (const entry of entries) {
258
+ const full = path.join(rootDir, entry);
259
+ let stat: Awaited<ReturnType<typeof fs.stat>>;
260
+ try {
261
+ stat = await fs.stat(full);
262
+ } catch {
263
+ continue;
264
+ }
265
+ if (!stat.isFile()) continue;
266
+ out.push({
267
+ file: entry,
268
+ sizeBytes: stat.size,
269
+ mtimeMs: stat.mtimeMs,
270
+ ageMs: Math.max(0, now - stat.mtimeMs),
271
+ });
272
+ }
273
+ out.sort((left, right) => left.file.localeCompare(right.file));
274
+ return out;
275
+ }
276
+
277
+ /**
278
+ * Resolve `promptCacheKey` from a `providerOptions` payload as emitted
279
+ * by `buildProviderCachePlan`. The runtime stuffs it under
280
+ * `providerOptions.eliza.promptCacheKey`. Returns `null` when the key is
281
+ * missing or not a non-empty string — callers fall back to the default
282
+ * "_default" session in that case.
283
+ */
284
+ export function extractPromptCacheKey(providerOptions: unknown): string | null {
285
+ if (!providerOptions || typeof providerOptions !== "object") return null;
286
+ const eliza = (providerOptions as Record<string, unknown>).eliza;
287
+ if (!eliza || typeof eliza !== "object") return null;
288
+ const raw = (eliza as Record<string, unknown>).promptCacheKey;
289
+ if (typeof raw !== "string" || raw.length === 0) return null;
290
+ return raw;
291
+ }
292
+
293
+ /**
294
+ * Resolve `prefixHash` from `providerOptions.eliza.prefixHash`. Mirrors
295
+ * `extractPromptCacheKey` — returns null when missing or not a non-empty
296
+ * string. The prefix hash covers ONLY the stable prompt prefix (system
297
+ * prompt + tool definitions + large constant context), so a runtime
298
+ * timestamp in the unstable tail does not invalidate it.
299
+ *
300
+ * Local backends prefer this over `promptCacheKey` when available because
301
+ * it gives the strongest "same prefix → same slot" guarantee: two
302
+ * conversations with byte-identical stable prefixes will land on the same
303
+ * slot regardless of how their tail content differs.
304
+ */
305
+ export function extractPrefixHash(providerOptions: unknown): string | null {
306
+ if (!providerOptions || typeof providerOptions !== "object") return null;
307
+ const eliza = (providerOptions as Record<string, unknown>).eliza;
308
+ if (!eliza || typeof eliza !== "object") return null;
309
+ const raw = (eliza as Record<string, unknown>).prefixHash;
310
+ if (typeof raw !== "string" || raw.length === 0) return null;
311
+ return raw;
312
+ }
313
+
314
+ /**
315
+ * Stable annotation describing a single segment of the prompt as it was
316
+ * emitted by the runtime planner. The cache-bridge consumes this to
317
+ * compute a stable-prefix-only hash for slot pinning, without having to
318
+ * look at the (timestamp-laden) tail.
319
+ *
320
+ * Mirrors `PromptSegment` in @elizaos/core/src/types/model.ts but is kept
321
+ * standalone so the cache-bridge can be imported by the local-inference
322
+ * backends without a hard dep on `@elizaos/core`.
323
+ */
324
+ export interface AnnotatedPromptSegment {
325
+ content: string;
326
+ stable: boolean;
327
+ }
328
+
329
+ /**
330
+ * Hash the longest stable prefix of `segments`. Stops at the first
331
+ * unstable segment, so a runtime timestamp in the unstable tail never
332
+ * shifts the hash. Returns `null` when no stable segment exists, signaling
333
+ * to the caller that prefix-cache reuse cannot be derived purely from the
334
+ * prompt structure (fall back to the prompt-cache-key path instead).
335
+ *
336
+ * The hash is sha256-truncated to 16 hex chars, matching `buildModelHash`
337
+ * — short enough for log lines, wide enough that collision is not a
338
+ * realistic concern for any plausible number of concurrent prefixes.
339
+ */
340
+ export function hashStablePrefix(
341
+ segments: readonly AnnotatedPromptSegment[],
342
+ ): string | null {
343
+ if (segments.length === 0) return null;
344
+ const hash = createHash("sha256");
345
+ let consumed = 0;
346
+ for (const segment of segments) {
347
+ if (!segment.stable) break;
348
+ hash.update(segment.content);
349
+ hash.update("");
350
+ consumed += 1;
351
+ }
352
+ if (consumed === 0) return null;
353
+ return hash.digest("hex").slice(0, 16);
354
+ }
355
+
356
+ /**
357
+ * Extract the per-segment stable annotations from a `providerOptions`
358
+ * payload. The runtime emits these as `providerOptions.eliza.promptSegments`
359
+ * when a structured prompt is available — local backends use it to compute
360
+ * `hashStablePrefix` directly, without having to re-parse the prompt text.
361
+ *
362
+ * Returns `null` when the field is absent or malformed; callers fall back
363
+ * to `extractPromptCacheKey` / `extractPrefixHash`.
364
+ */
365
+ export function extractAnnotatedSegments(
366
+ providerOptions: unknown,
367
+ ): AnnotatedPromptSegment[] | null {
368
+ if (!providerOptions || typeof providerOptions !== "object") return null;
369
+ const eliza = (providerOptions as Record<string, unknown>).eliza;
370
+ if (!eliza || typeof eliza !== "object") return null;
371
+ const raw = (eliza as Record<string, unknown>).promptSegments;
372
+ if (!Array.isArray(raw)) return null;
373
+ const out: AnnotatedPromptSegment[] = [];
374
+ for (const entry of raw) {
375
+ if (!entry || typeof entry !== "object") return null;
376
+ const content = (entry as { content?: unknown }).content;
377
+ const stable = (entry as { stable?: unknown }).stable;
378
+ if (typeof content !== "string" || typeof stable !== "boolean") return null;
379
+ out.push({ content, stable });
380
+ }
381
+ return out;
382
+ }
383
+
384
+ /**
385
+ * Resolve the conversation handle id from a `providerOptions` payload.
386
+ * The runtime stuffs it under `providerOptions.eliza.conversationId` when
387
+ * the calling context represents a long-lived conversation (chat handler,
388
+ * planner loop). When present, local backends should use it as the
389
+ * primary slot key — it's stable across turns regardless of prompt
390
+ * content drift, which gives the strongest possible cache reuse for
391
+ * agentic loops.
392
+ */
393
+ export function extractConversationId(providerOptions: unknown): string | null {
394
+ if (!providerOptions || typeof providerOptions !== "object") return null;
395
+ const eliza = (providerOptions as Record<string, unknown>).eliza;
396
+ if (!eliza || typeof eliza !== "object") return null;
397
+ const raw = (eliza as Record<string, unknown>).conversationId;
398
+ if (typeof raw !== "string" || raw.length === 0) return null;
399
+ return raw;
400
+ }
401
+
402
+ /**
403
+ * Resolve the stable per-call cache key for the local backends. Order of
404
+ * precedence:
405
+ * 1. Conversation id — strongest signal, identical across turns.
406
+ * 2. Annotated stable-prefix hash — survives unstable-tail drift.
407
+ * 3. `prefixHash` from the runtime cache plan — already stable-only via
408
+ * `cachePrefixSegments` upstream.
409
+ * 4. `promptCacheKey` (`v5:<prefixHash>`) — back-compat fallback.
410
+ * Returns null when none are available.
411
+ */
412
+ export function resolveLocalCacheKey(providerOptions: unknown): string | null {
413
+ const conversationId = extractConversationId(providerOptions);
414
+ if (conversationId) return `conv:${conversationId}`;
415
+ const segments = extractAnnotatedSegments(providerOptions);
416
+ if (segments) {
417
+ const hashed = hashStablePrefix(segments);
418
+ if (hashed) return `seg:${hashed}`;
419
+ }
420
+ const prefixHash = extractPrefixHash(providerOptions);
421
+ if (prefixHash) return `pfx:${prefixHash}`;
422
+ return extractPromptCacheKey(providerOptions);
423
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Local inference catalog re-exports.
3
+ *
4
+ * The canonical catalog (Eliza-1 tier ids, default-eligibility set,
5
+ * `MODEL_CATALOG`, HuggingFace URL builders) lives in
6
+ * `@elizaos/shared/local-inference`. This shim preserves the historical
7
+ * import path `./catalog` for server-side code.
8
+ */
9
+ export { buildHuggingFaceResolveUrl, buildHuggingFaceResolveUrlForPath, DEFAULT_ELIGIBLE_MODEL_IDS, ELIZA_1_HF_REPO, ELIZA_1_MTP_TIER_IDS, ELIZA_1_PLACEHOLDER_IDS, ELIZA_1_RELEASE_TIER_IDS, ELIZA_1_TIER_IDS, ELIZA_1_TIER_PUBLISH_STATUS, ELIZA_1_VISION_TIER_IDS, type Eliza1TierId, eliza1TierPublishStatus, FIRST_RUN_DEFAULT_MODEL_ID, findCatalogModel, isDefaultEligibleId, MODEL_CATALOG, } from "@elizaos/shared";
10
+ //# sourceMappingURL=catalog.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"catalog.d.ts","sourceRoot":"","sources":["catalog.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACN,0BAA0B,EAC1B,iCAAiC,EACjC,0BAA0B,EAC1B,eAAe,EACf,oBAAoB,EACpB,uBAAuB,EACvB,wBAAwB,EACxB,gBAAgB,EAChB,2BAA2B,EAC3B,uBAAuB,EACvB,KAAK,YAAY,EACjB,uBAAuB,EACvB,0BAA0B,EAC1B,gBAAgB,EAChB,mBAAmB,EACnB,aAAa,GACb,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,240 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ buildHuggingFaceResolveUrl,
4
+ DEFAULT_ELIGIBLE_MODEL_IDS,
5
+ ELIZA_1_MTP_TIER_IDS,
6
+ ELIZA_1_TIER_IDS,
7
+ FIRST_RUN_DEFAULT_MODEL_ID,
8
+ findCatalogModel,
9
+ MODEL_CATALOG,
10
+ } from "./catalog";
11
+ import { recommendForFirstRun } from "./recommendation";
12
+ import { localInferenceService } from "./service";
13
+
14
+ describe("local inference catalog", () => {
15
+ it("ships exactly the visible Eliza-1 tiers", () => {
16
+ const visible = MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog);
17
+ expect(visible.map((m) => m.id).sort()).toEqual(
18
+ [...ELIZA_1_TIER_IDS].sort(),
19
+ );
20
+ });
21
+
22
+ it("marks ONLY the Eliza-1 tiers as default-eligible", () => {
23
+ expect([...DEFAULT_ELIGIBLE_MODEL_IDS].sort()).toEqual(
24
+ [...ELIZA_1_TIER_IDS].sort(),
25
+ );
26
+ for (const id of ELIZA_1_TIER_IDS) {
27
+ expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(id), `${id} not eligible`).toBe(
28
+ true,
29
+ );
30
+ }
31
+ for (const model of MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog)) {
32
+ expect(model.id.startsWith("eliza-1-")).toBe(true);
33
+ }
34
+ });
35
+
36
+ it("uses eliza-1 size ids as user-facing display names", () => {
37
+ for (const id of ELIZA_1_TIER_IDS) {
38
+ const model = findCatalogModel(id);
39
+ expect(model, `${id} missing`).toBeTruthy();
40
+ expect(model?.displayName).toMatch(/^(?:Eliza-1\b|eliza-1-)/);
41
+ expect(model?.blurb).toMatch(/^(?:Eliza-1\b|eliza-1-)/);
42
+ expect(`${model?.displayName} ${model?.blurb}`).not.toMatch(
43
+ /\b(?:Qwen|Llama)\b/i,
44
+ );
45
+ }
46
+ });
47
+
48
+ it("uses the single elizaOS HuggingFace repo for every visible Eliza-1 tier", () => {
49
+ for (const model of MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog)) {
50
+ const tier = model.id.slice("eliza-1-".length);
51
+ expect(model.hfRepo).toBe("elizaos/eliza-1");
52
+ expect(model.hfPathPrefix).toBe(`bundles/${tier}`);
53
+ expect(buildHuggingFaceResolveUrl(model)).toContain(
54
+ `/elizaos/eliza-1/resolve/main/bundles/${tier}/`,
55
+ );
56
+ }
57
+ });
58
+
59
+ it("does not expose hidden companion entries in the hub", () => {
60
+ const visible = localInferenceService.getCatalog();
61
+ const visibleIds = new Set(visible.map((model) => model.id));
62
+ const hiddenCompanionIds = MODEL_CATALOG.filter(
63
+ (model) => model.hiddenFromCatalog,
64
+ ).map((model) => model.id);
65
+ expect(hiddenCompanionIds.filter((id) => visibleIds.has(id))).toEqual([]);
66
+ expect(visible.flatMap((model) => model.companionModelIds ?? [])).toEqual(
67
+ [],
68
+ );
69
+ });
70
+
71
+ it("keeps the visible model hub focused on Eliza-1 only", () => {
72
+ const visible = localInferenceService.getCatalog();
73
+ expect(visible.map((model) => model.id).sort()).toEqual(
74
+ [...ELIZA_1_TIER_IDS].sort(),
75
+ );
76
+ expect(
77
+ visible.filter((model) => DEFAULT_ELIGIBLE_MODEL_IDS.has(model.id))
78
+ .length,
79
+ ).toBe(visible.length);
80
+ });
81
+
82
+ it("declares contextLength on every entry whose blurb claims a long window", () => {
83
+ const longContextRegex =
84
+ /\b(?:128k|256k|long.*context|long-context|128 ?k tokens?)\b/i;
85
+ const offenders: string[] = [];
86
+ for (const model of MODEL_CATALOG) {
87
+ if (!longContextRegex.test(model.blurb)) continue;
88
+ if (
89
+ typeof model.contextLength !== "number" ||
90
+ model.contextLength < 65536
91
+ ) {
92
+ offenders.push(
93
+ `${model.id} claims long context in blurb but contextLength=${String(model.contextLength)}`,
94
+ );
95
+ }
96
+ }
97
+ expect(offenders).toEqual([]);
98
+ });
99
+
100
+ it("sets contextLength on every Eliza-1 tier per the tier matrix", () => {
101
+ const expected: Record<string, number> = {
102
+ "eliza-1-0_8b": 131072,
103
+ "eliza-1-2b": 131072,
104
+ "eliza-1-4b": 131072,
105
+ "eliza-1-9b": 131072,
106
+ "eliza-1-27b": 131072,
107
+ "eliza-1-27b-256k": 262144,
108
+ };
109
+ for (const [id, expectedLength] of Object.entries(expected)) {
110
+ const model = findCatalogModel(id);
111
+ expect(model, `${id} missing from catalog`).toBeTruthy();
112
+ expect(model?.contextLength, `${id} contextLength mismatch`).toBe(
113
+ expectedLength,
114
+ );
115
+ }
116
+ });
117
+
118
+ it("sets a tokenizerFamily on every chat/code/reasoning entry", () => {
119
+ const offenders: string[] = [];
120
+ for (const model of MODEL_CATALOG) {
121
+ if (!model.tokenizerFamily) {
122
+ offenders.push(model.id);
123
+ }
124
+ }
125
+ expect(offenders).toEqual([]);
126
+ });
127
+
128
+ it("declares native MTP on every Eliza-1 tier", () => {
129
+ for (const id of ELIZA_1_MTP_TIER_IDS) {
130
+ const model = findCatalogModel(id);
131
+ expect(model?.runtime?.mtp?.specType, `${id} mtp`).toBe("draft-mtp");
132
+ expect(model?.companionModelIds, `${id} companions`).toBeUndefined();
133
+ }
134
+ });
135
+
136
+ it("declares the mandatory local runtime contract for every default tier", () => {
137
+ const baseKernels = ["turbo3", "turbo4", "qjl_full", "polarquant"];
138
+ for (const id of ELIZA_1_TIER_IDS) {
139
+ const model = findCatalogModel(id);
140
+ expect(model?.runtime?.preferredBackend, `${id} backend`).toBe(
141
+ "llama-cpp",
142
+ );
143
+ for (const kernel of baseKernels) {
144
+ expect(
145
+ model?.runtime?.optimizations?.requiresKernel,
146
+ `${id} kernel ${kernel}`,
147
+ ).toContain(kernel);
148
+ }
149
+ expect(model?.runtime?.mtp?.specType, `${id} mtp`).toBe("draft-mtp");
150
+ expect(model?.companionModelIds, `${id} companions`).toBeUndefined();
151
+ if ((model?.contextLength ?? 0) >= 65536) {
152
+ expect(model?.runtime?.optimizations?.requiresKernel).toContain(
153
+ "turbo3_tcq",
154
+ );
155
+ }
156
+ expect(model?.runtime?.optimizations?.requiresKernel).not.toContain(
157
+ "openvino",
158
+ );
159
+ }
160
+ });
161
+
162
+ it("does not publish external speculative drafter companions", () => {
163
+ const drafters = MODEL_CATALOG.filter((m) => m.companionModelIds?.length);
164
+ expect(drafters).toEqual([]);
165
+ });
166
+
167
+ it("declares the text quantization matrix and voice boundary by tier", () => {
168
+ for (const id of ELIZA_1_TIER_IDS) {
169
+ const model = findCatalogModel(id);
170
+ expect(model?.quantization?.defaultVariantId).toBe("q4_k_m");
171
+ expect(model?.quantization?.variants.map((v) => v.id)).toEqual([
172
+ "q3_k_m",
173
+ "q4_k_m",
174
+ "q5_k_m",
175
+ "q6_k",
176
+ "q8_0",
177
+ ]);
178
+ }
179
+
180
+ // Mobile-class tiers (0_8b/2b/4b) ship Kokoro only — it is smaller +
181
+ // faster and is the exclusive mobile TTS. 9B keeps OmniVoice first with
182
+ // Kokoro bundled; large tiers are OmniVoice-only.
183
+ // See catalog.ts ELIZA_1_VOICE_BACKENDS for the policy rationale.
184
+ expect(findCatalogModel("eliza-1-0_8b")?.voiceBackends).toEqual(["kokoro"]);
185
+ expect(findCatalogModel("eliza-1-2b")?.voiceBackends).toEqual(["kokoro"]);
186
+ expect(findCatalogModel("eliza-1-4b")?.voiceBackends).toEqual(["kokoro"]);
187
+ expect(findCatalogModel("eliza-1-9b")?.voiceBackends).toEqual([
188
+ "omnivoice",
189
+ "kokoro",
190
+ ]);
191
+ expect(findCatalogModel("eliza-1-27b")?.voiceBackends).toEqual([
192
+ "omnivoice",
193
+ ]);
194
+ expect(findCatalogModel("eliza-1-27b-256k")?.voiceBackends).toEqual([
195
+ "omnivoice",
196
+ ]);
197
+ });
198
+
199
+ it("does not leak implementation-family names in visible catalog copy", () => {
200
+ const banned = /\b(?:qwen|llama|turboquant|qjl|polarquant)\b/i;
201
+ for (const model of MODEL_CATALOG.filter((m) => !m.hiddenFromCatalog)) {
202
+ expect(model.displayName).not.toMatch(banned);
203
+ expect(model.quant).not.toMatch(banned);
204
+ expect(model.blurb).not.toMatch(banned);
205
+ }
206
+ });
207
+
208
+ it("does not ship non-Eliza local model entries", () => {
209
+ const offenders: string[] = [];
210
+ for (const model of MODEL_CATALOG) {
211
+ if (!model.id.startsWith("eliza-1-")) {
212
+ offenders.push(model.id);
213
+ }
214
+ }
215
+ expect(offenders).toEqual([]);
216
+ });
217
+
218
+ it("keeps external HF search-shaped ids custom-only", () => {
219
+ const externalId = "hf:some-org/custom-model::model.Q4_K_M.gguf";
220
+ expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(externalId)).toBe(false);
221
+ expect(externalId.startsWith("eliza-1-")).toBe(false);
222
+ });
223
+
224
+ it("FIRST_RUN_DEFAULT_MODEL_ID resolves to a default-eligible Eliza-1 tier", () => {
225
+ const defaultModel = findCatalogModel(FIRST_RUN_DEFAULT_MODEL_ID);
226
+ expect(defaultModel, `${FIRST_RUN_DEFAULT_MODEL_ID} missing`).toBeTruthy();
227
+ expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(FIRST_RUN_DEFAULT_MODEL_ID)).toBe(
228
+ true,
229
+ );
230
+ });
231
+
232
+ it("recommendForFirstRun resolves to a default-eligible Eliza-1 tier", () => {
233
+ const picked = recommendForFirstRun();
234
+ expect(picked).not.toBeNull();
235
+ if (!picked) throw new Error("missing first-run recommendation");
236
+ expect(picked.id).toBe(FIRST_RUN_DEFAULT_MODEL_ID);
237
+ expect(DEFAULT_ELIGIBLE_MODEL_IDS.has(picked.id)).toBe(true);
238
+ expect(picked.displayName).toMatch(/^(?:Eliza-1\b|eliza-1-)/);
239
+ });
240
+ });