@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,612 @@
1
+ /**
2
+ * Local-inference backend interface and dispatcher.
3
+ *
4
+ * One shipping implementation lives behind this interface:
5
+ *
6
+ * - `llama-cpp` → the optimized in-process FFI llama.cpp path.
7
+ * MTP, n-gram drafter, lookahead, `-ot` MoE offload, TurboQuant KV
8
+ * cache, mlock/no-mmap/mmproj, etc. all live here.
9
+ *
10
+ * The dispatcher decides which one to use per-load based on:
11
+ *
12
+ * 1. Catalog `runtime.optimizations.requiresKernel` — if any specialised
13
+ * llama.cpp kernel is required (e.g. `turbo3`), the
14
+ * dispatcher MUST pick `llama-cpp`. Legacy bindings cannot
15
+ * provide these kernels at all.
16
+ * 2. Catalog `runtime.preferredBackend` — retained for metadata
17
+ * compatibility, but generation still routes through `llama-cpp`.
18
+ * 3. Default: optimized llama.cpp FFI.
19
+ *
20
+ * The dispatcher does NOT own backend internals. It owns selection only,
21
+ * plus a small load-state
22
+ * cache so callers can swap models without touching either backend
23
+ * directly.
24
+ */
25
+
26
+ import { findCatalogModel } from "./catalog";
27
+ import type { StructuredGenerateParams } from "./structured-output";
28
+ import type { CatalogModel, LocalRuntimeKernel } from "./types";
29
+ import type { VerifierStreamEvent } from "./voice/types";
30
+
31
+ /**
32
+ * Per-load runtime overrides forwarded by the dispatcher to whichever
33
+ * backend handles the load. Mirror of the relevant fields on
34
+ * `LocalInferenceLoadArgs` from `active-model.ts` — kept inline here so
35
+ * `backend.ts` stays free of cross-file circular imports (active-model
36
+ * imports engine, engine imports backend).
37
+ */
38
+ export interface BackendLoadOverrides {
39
+ contextSize?: number;
40
+ cacheTypeK?: string;
41
+ cacheTypeV?: string;
42
+ gpuLayers?: number | "auto" | "max";
43
+ kvOffload?: "cpu" | "gpu" | "split" | { gpuLayers: number };
44
+ flashAttention?: boolean;
45
+ mmap?: boolean;
46
+ mlock?: boolean;
47
+ useGpu?: boolean;
48
+ /** Absolute path to a multimodal projector GGUF passed to the FFI runtime. */
49
+ mmprojPath?: string;
50
+ /** Absolute path to the MTP drafter GGUF passed to the FFI runtime. */
51
+ draftModelPath?: string;
52
+ /** Eliza-1 bundle root for direct bundle loads not present in the registry. */
53
+ bundleRoot?: string;
54
+ /** Manifest path for direct bundle loads not present in the registry. */
55
+ manifestPath?: string;
56
+ }
57
+
58
+ export interface BackendPlan {
59
+ /** Absolute path to the GGUF on disk. */
60
+ modelPath: string;
61
+ /**
62
+ * Catalog model id, when known. The dispatcher uses this to pull
63
+ * `runtime.optimizations` and `runtime.mtp` — without it, we can
64
+ * only honour the env override and fall back to `capacitor-llama`.
65
+ */
66
+ modelId?: string;
67
+ /** Catalog entry, when the caller already resolved it. */
68
+ catalog?: CatalogModel;
69
+ /**
70
+ * Per-load runtime overrides resolved by the active-model coordinator.
71
+ * The dispatcher passes these through verbatim to the chosen backend
72
+ * so the in-process binding can honour cache-type and contextSize
73
+ * requests instead of silently dropping them.
74
+ */
75
+ overrides?: BackendLoadOverrides;
76
+ }
77
+
78
+ export interface GenerateArgs extends StructuredGenerateParams {
79
+ prompt: string;
80
+ stopSequences?: string[];
81
+ /** Upper bound on output tokens; defaults to 2048. */
82
+ maxTokens?: number;
83
+ /** 0..1; 0.7 default. */
84
+ temperature?: number;
85
+ /** Nucleus sampling; defaults to 0.9. */
86
+ topP?: number;
87
+ /**
88
+ * Optional cache key from the runtime's `ProviderCachePlan`. Identical
89
+ * keys reuse the same KV cache prefix: the `llama-cpp` FFI backend derives
90
+ * a deterministic slot so requests with the same key land on the same
91
+ * persisted KV state. Empty / absent keys fall through to the historical
92
+ * stateless path.
93
+ */
94
+ cacheKey?: string;
95
+ /**
96
+ * Per-request abort signal. The `llama-cpp` FFI backend honours it
97
+ * cooperatively by cancelling the active FFI stream. Callers that want
98
+ * hard cancel for things like app pause / kill-switch pass the same signal
99
+ * here that they pass into `runtime.useModel`.
100
+ */
101
+ signal?: AbortSignal;
102
+ /**
103
+ * Optional per-request backend transport budget. This should be at least as
104
+ * long as the caller's user-visible generation timeout; shorter inner
105
+ * timeouts abort long local-prefill turns before the chat route can make the
106
+ * user-facing decision.
107
+ */
108
+ requestTimeoutMs?: number;
109
+ /**
110
+ * Incremental accepted text from the backend. The `llama-cpp` FFI backend
111
+ * calls this as accepted chunks arrive, per `llmStreamNext` step (it
112
+ * streams even when a `grammar` is set).
113
+ */
114
+ onTextChunk?: (chunk: string) => void | Promise<void>;
115
+ /**
116
+ * Whether this generation is user-visible text and therefore eligible for
117
+ * voice-mode TTS. Internal JSON / planner calls must not be spoken.
118
+ */
119
+ voiceOutput?: "user-visible" | "internal";
120
+ /**
121
+ * Native verifier stream from speculative MTP. Exact accept/reject token
122
+ * ranges let voice TTS rollback avoid inferring state from text chunks.
123
+ */
124
+ onVerifierEvent?: (event: VerifierStreamEvent) => void | Promise<void>;
125
+ }
126
+
127
+ export type GenerateResult = string;
128
+
129
+ export interface LocalGenerateWithUsageResult {
130
+ text: string;
131
+ usage?: {
132
+ prompt_tokens?: number;
133
+ completion_tokens?: number;
134
+ total_tokens?: number;
135
+ [key: string]: unknown;
136
+ };
137
+ slotId?: number;
138
+ firstTokenMs?: number | null;
139
+ mtpStats?: {
140
+ drafted: number;
141
+ accepted: number;
142
+ acceptanceRate: number | null;
143
+ };
144
+ }
145
+
146
+ export interface LocalRuntimeLoadConfig {
147
+ modelId: string | null;
148
+ modelPath: string | null;
149
+ contextSize: number | null;
150
+ cacheTypeK: string | null;
151
+ cacheTypeV: string | null;
152
+ gpuLayers: number | null;
153
+ parallel: number;
154
+ binaryPath: string | null;
155
+ backend: "capacitor-llama" | "llama-cpp" | null;
156
+ mtp: {
157
+ specType: "draft-mtp";
158
+ draftMin: number;
159
+ draftMax: number;
160
+ } | null;
161
+ }
162
+
163
+ /**
164
+ * The backend contract every local-inference implementation satisfies.
165
+ *
166
+ * `available()` is a soft probe — it should NOT spawn anything; it just
167
+ * reports whether the backend can be used at all (e.g. is the binding
168
+ * loadable, is the binary on disk). Loading a specific model is `load()`.
169
+ */
170
+ export interface LocalInferenceBackend {
171
+ /** Identifier for the concrete backend implementation. */
172
+ readonly id: "capacitor-llama" | "llama-cpp";
173
+ available(): Promise<boolean>;
174
+ load(plan: BackendPlan): Promise<void>;
175
+ unload(): Promise<void>;
176
+ generate(args: GenerateArgs): Promise<GenerateResult>;
177
+ hasLoadedModel(): boolean;
178
+ currentModelPath(): string | null;
179
+
180
+ // === Optional methods — backends that don't implement them are surfaced
181
+ // === via `dispatcher.X?.()` calls in `engine.ts`, with safe fallback
182
+ // === values for query methods and actionable throws for required ops.
183
+ // ===
184
+ // === These exist so engine.ts can drive every optimized llama.cpp-specific
185
+ // === feature through the dispatcher and keep FFI as the single runtime
186
+ // === implementation surface.
187
+
188
+ /**
189
+ * Usage-instrumented variant of `generate`. Returns Anthropic-shape
190
+ * usage block plus per-turn MTP stats when available.
191
+ */
192
+ generateWithUsage?(
193
+ args: GenerateArgs & { slotId?: number },
194
+ ): Promise<LocalGenerateWithUsageResult>;
195
+
196
+ /** Vision describe via mmproj. Requires an mmproj-loaded backend. */
197
+ describeImage?(args: {
198
+ bytes: Uint8Array;
199
+ mimeType?: string;
200
+ prompt?: string;
201
+ maxTokens?: number;
202
+ temperature?: number;
203
+ signal?: AbortSignal;
204
+ }): Promise<{
205
+ text: string;
206
+ projectorMs?: number;
207
+ decodeMs?: number;
208
+ }>;
209
+
210
+ /** Persist a slot's KV cache to disk under the conversation directory. */
211
+ persistConversationKv?(conversationId: string, slotId: number): Promise<void>;
212
+
213
+ /** Restore a slot's KV cache from disk into the running backend. */
214
+ restoreConversationKv?(
215
+ conversationId: string,
216
+ slotId: number,
217
+ ): Promise<boolean>;
218
+
219
+ /**
220
+ * Pre-decode `promptPrefix` into the named slot/cache key so the next
221
+ * `generate` against the same key skips re-prefill. Returns false when
222
+ * no warmup happened (already cached, no model loaded, etc).
223
+ */
224
+ prewarmConversation?(
225
+ promptPrefix: string,
226
+ opts: { slotId: number; cacheKey: string },
227
+ ): Promise<boolean>;
228
+
229
+ /**
230
+ * Resize the backend's parallel slot pool. Returns true on a real
231
+ * restart/resize, false when no resize was needed (target ≤ current, etc).
232
+ */
233
+ resizeParallel?(target: number): Promise<boolean>;
234
+
235
+ /** Active parallel slot count. Default `1` on backends without pooling. */
236
+ parallelSlots?(): number;
237
+
238
+ /** True when native MTP speculative decoding is enabled. */
239
+ mtpEnabled?(): boolean;
240
+
241
+ /** Absolute path to the loaded mmproj (vision) GGUF, or null. */
242
+ currentMmprojPath?(): string | null;
243
+
244
+ /**
245
+ * Snapshot of the backend's current load configuration (ctx, cache
246
+ * types, parallel, binary path). Used by engine introspection +
247
+ * /api/local-inference/active.
248
+ */
249
+ currentRuntimeLoadConfig?(): LocalRuntimeLoadConfig | null;
250
+ }
251
+
252
+ export type BackendOverride = "auto" | "llama-cpp";
253
+
254
+ export function readBackendOverride(): BackendOverride {
255
+ const raw = process.env.ELIZA_INFERENCE_BACKEND?.trim().toLowerCase();
256
+ if (raw === "auto") return "auto";
257
+ if (raw === "llama-cpp") {
258
+ return "llama-cpp";
259
+ }
260
+ return "auto";
261
+ }
262
+
263
+ function envFlag(name: string): boolean {
264
+ const v = process.env[name]?.trim().toLowerCase();
265
+ return v === "1" || v === "true" || v === "yes" || v === "on";
266
+ }
267
+
268
+ /**
269
+ * Opt-in "reduced-optimization local mode" (the cross-platform escape hatch
270
+ * documented in `docs/voice-interactive.md` and `packages/inference/AGENTS.md`
271
+ * §4): when the installed llama.cpp runtime does not advertise the
272
+ * custom Eliza-1 KV kernels (`turbo3`/`qjl_full`/`polarquant`/…) — i.e. the
273
+ * fork hasn't been built with those kernels dispatched on this backend yet —
274
+ * setting `ELIZA_LOCAL_ALLOW_STOCK_KV=1` lets the model load anyway with
275
+ * stock `f16` KV cache instead of hard-refusing. The voice pipeline runs;
276
+ * it just runs without the KV-compression speedups on that backend. A loud
277
+ * one-time warning is emitted (see `warnReducedOptimizationLocalMode`).
278
+ *
279
+ * §3-vs-"works everywhere" reconciliation: AGENTS.md §3 says these kernels
280
+ * are *mandatory* and there is *no* "fallback to unoptimized" path. The
281
+ * user's directive for SA-1 is "works everywhere regardless of GPU". The
282
+ * reconciliation: the kernels DO build on every backend where they can be
283
+ * dispatched (Metal, CUDA, Vulkan-source-patched, CPU SIMD TUs), and this
284
+ * fallback is the *opt-in*, *loudly-warned*, *non-publishable* mode for the
285
+ * backends where dispatch isn't wired yet — it is not a silent downgrade,
286
+ * and `defaultEligible` bundles still require the verified kernels.
287
+ */
288
+ export function localAllowStockKv(): boolean {
289
+ return envFlag("ELIZA_LOCAL_ALLOW_STOCK_KV");
290
+ }
291
+
292
+ let reducedModeWarned = false;
293
+ export function warnReducedOptimizationLocalMode(detail: string): void {
294
+ if (reducedModeWarned) return;
295
+ reducedModeWarned = true;
296
+ console.warn(
297
+ `\n[local-inference] ⚠️ REDUCED-OPTIMIZATION LOCAL MODE — ${detail}\n` +
298
+ ` ELIZA_LOCAL_ALLOW_STOCK_KV=1 is set, so the model is loading with stock\n` +
299
+ ` f16 KV cache instead of the Eliza-1 TurboQuant/QJL/PolarQuant KV kernels.\n` +
300
+ ` The voice pipeline will run, but slower and using more memory than a build\n` +
301
+ ` with the kernels dispatched (Metal: all 5; CUDA: ships them; Vulkan: source-\n` +
302
+ ` patched; CPU: SIMD TUs). Rebuild the bundled llama.cpp FFI runtime\n` +
303
+ ` to get the optimized path. This mode is NOT publishable and NOT a default.\n`,
304
+ );
305
+ }
306
+
307
+ /** Reset the one-time warning latch (tests only). */
308
+ export function __resetReducedModeWarnedForTests(): void {
309
+ reducedModeWarned = false;
310
+ }
311
+
312
+ export interface BackendDecision {
313
+ backend: "llama-cpp";
314
+ /** Why this backend was chosen — for diagnostics and warnings. */
315
+ reason: "env-override" | "kernel-required" | "preferred-backend" | "default";
316
+ /** Required kernels declared by the catalog, when any. */
317
+ kernels: LocalRuntimeKernel[];
318
+ /**
319
+ * Set when the dispatcher detected a kernel mismatch — the catalog model
320
+ * declares `requiresKernel: [...]` but CAPABILITIES.json next to the
321
+ * installed binary reports those kernels as unavailable. The dispatcher
322
+ * still routes to optimized llama.cpp (the only backend that could satisfy
323
+ * those kernels), but the load is expected to fail; the caller should
324
+ * surface this to the operator with a clear "rebuild your binary"
325
+ * message instead of letting the model silently misbehave.
326
+ */
327
+ unsatisfiedKernels?: LocalRuntimeKernel[];
328
+ }
329
+
330
+ /**
331
+ * Pure decision function. Easy to unit-test without spawning anything.
332
+ *
333
+ * Inputs are deliberately explicit — the caller resolves the catalog entry,
334
+ * the binary availability, and the env override before calling us.
335
+ *
336
+ * `binaryKernels`, when present, is the parsed CAPABILITIES.json kernels
337
+ * map from the installed llama.cpp FFI runtime. The dispatcher uses it to
338
+ * compute `unsatisfiedKernels`; null means the binary is older / has no
339
+ * capabilities probe, in which case we trust the model's declaration and
340
+ * let the load attempt clarify.
341
+ */
342
+ export function decideBackend(input: {
343
+ override: BackendOverride;
344
+ catalog: CatalogModel | undefined;
345
+ llamaCppAvailable: boolean;
346
+ binaryKernels?: Partial<Record<LocalRuntimeKernel | string, boolean>> | null;
347
+ }): BackendDecision {
348
+ const { override, catalog } = input;
349
+ const optimizations = catalog?.runtime?.optimizations;
350
+ const kernels = optimizations?.requiresKernel ?? [];
351
+ const unsatisfiedKernels = computeUnsatisfiedKernels(
352
+ kernels,
353
+ input.binaryKernels ?? null,
354
+ );
355
+
356
+ if (override === "llama-cpp") {
357
+ return {
358
+ backend: "llama-cpp",
359
+ reason: "env-override",
360
+ kernels,
361
+ unsatisfiedKernels,
362
+ };
363
+ }
364
+
365
+ if (kernels.length > 0) {
366
+ return {
367
+ backend: "llama-cpp",
368
+ reason: "kernel-required",
369
+ kernels,
370
+ unsatisfiedKernels,
371
+ };
372
+ }
373
+ return {
374
+ backend: "llama-cpp",
375
+ reason: "default",
376
+ kernels,
377
+ unsatisfiedKernels,
378
+ };
379
+ }
380
+
381
+ /**
382
+ * Returns the subset of `required` kernels that aren't reported as `true`
383
+ * in the binary's CAPABILITIES.json. Returns undefined when no probe is
384
+ * available; an empty array means "all required kernels are satisfied".
385
+ */
386
+ function computeUnsatisfiedKernels(
387
+ required: LocalRuntimeKernel[],
388
+ binaryKernels: Partial<Record<LocalRuntimeKernel | string, boolean>> | null,
389
+ ): LocalRuntimeKernel[] | undefined {
390
+ if (required.length === 0) return undefined;
391
+ if (!binaryKernels) return undefined;
392
+ return required.filter((k) => binaryKernels[k] !== true);
393
+ }
394
+
395
+ /**
396
+ * Resolve the catalog entry for a `BackendPlan`. Plans may carry the entry
397
+ * already (when the caller has it on hand), reference it by id, or carry
398
+ * neither — in which case the dispatcher falls back to the default backend.
399
+ */
400
+ export function resolveCatalogForPlan(
401
+ plan: BackendPlan,
402
+ ): CatalogModel | undefined {
403
+ if (plan.catalog) return plan.catalog;
404
+ if (plan.modelId) return findCatalogModel(plan.modelId);
405
+ return undefined;
406
+ }
407
+
408
+ /**
409
+ * Dispatcher that fronts the in-process FFI llama.cpp backend behind the
410
+ * `LocalInferenceBackend` contract. Holds at most one active backend at a
411
+ * time — load() unloads the previous backend before loading the new one if
412
+ * they differ.
413
+ */
414
+ export class BackendDispatcher implements LocalInferenceBackend {
415
+ readonly id = "capacitor-llama" as const;
416
+ // The dispatcher's `id` is informational; the active backend's id is what
417
+ // matters for diagnostics. We expose `activeBackendId()` for that.
418
+
419
+ private active: LocalInferenceBackend | null = null;
420
+
421
+ constructor(
422
+ private readonly ffiStreaming: LocalInferenceBackend,
423
+ private readonly probeFfiAvailable: () => boolean,
424
+ /**
425
+ * Optional capabilities probe that returns the kernels map from the
426
+ * installed llama.cpp FFI runtime, or null when no probe is available.
427
+ * Used to flag `unsatisfiedKernels`
428
+ * in the BackendDecision before load() so callers can give a clean
429
+ * "rebuild your fork binary" error instead of a kernel SIGSEGV at
430
+ * generation time.
431
+ */
432
+ private readonly probeBinaryKernels?: () => Partial<
433
+ Record<string, boolean>
434
+ > | null,
435
+ ) {}
436
+
437
+ async available(): Promise<boolean> {
438
+ return this.ffiStreaming.available();
439
+ }
440
+
441
+ activeBackendId(): "capacitor-llama" | "llama-cpp" | null {
442
+ return this.active ? this.active.id : null;
443
+ }
444
+
445
+ hasLoadedModel(): boolean {
446
+ return this.active?.hasLoadedModel() ?? false;
447
+ }
448
+
449
+ currentModelPath(): string | null {
450
+ return this.active?.currentModelPath() ?? null;
451
+ }
452
+
453
+ decide(plan: BackendPlan): BackendDecision {
454
+ const catalog = resolveCatalogForPlan(plan);
455
+ return decideBackend({
456
+ override: readBackendOverride(),
457
+ catalog,
458
+ llamaCppAvailable: this.probeFfiAvailable(),
459
+ binaryKernels: this.probeBinaryKernels?.() ?? null,
460
+ });
461
+ }
462
+
463
+ async load(plan: BackendPlan): Promise<void> {
464
+ let effectivePlan = plan;
465
+ const decision = this.decide(plan);
466
+ if (decision.unsatisfiedKernels && decision.unsatisfiedKernels.length > 0) {
467
+ const missing = decision.unsatisfiedKernels.join(", ");
468
+ if (localAllowStockKv()) {
469
+ // Reduced-optimization local mode: the build hasn't dispatched these
470
+ // kernels on this backend yet, but the user opted into running with
471
+ // stock f16 KV instead of hard-refusing. Strip any custom cache-type
472
+ // override from the plan so the FFI runtime uses f16, and warn
473
+ // loudly exactly once.
474
+ warnReducedOptimizationLocalMode(
475
+ `catalog model requires kernel(s) {${missing}}, not advertised by the installed llama.cpp FFI runtime`,
476
+ );
477
+ if (
478
+ plan.overrides &&
479
+ (plan.overrides.cacheTypeK !== undefined ||
480
+ plan.overrides.cacheTypeV !== undefined)
481
+ ) {
482
+ const { cacheTypeK: _k, cacheTypeV: _v, ...rest } = plan.overrides;
483
+ effectivePlan = { ...plan, overrides: { ...rest } };
484
+ }
485
+ } else {
486
+ throw new Error(
487
+ `[local-inference] Catalog model requires kernel(s) {${missing}}, but the installed llama.cpp FFI runtime does not advertise them. Rebuild the bundled runtime for this target, pick a different model, or set ELIZA_LOCAL_ALLOW_STOCK_KV=1 to load with stock f16 KV (reduced-optimization local mode — loud warning, not publishable).`,
488
+ );
489
+ }
490
+ }
491
+ if (decision.backend === "llama-cpp" && !this.probeFfiAvailable()) {
492
+ throw new Error(
493
+ "[local-inference] Optimized llama.cpp requires the in-process FFI backend. " +
494
+ "Install/rebuild libelizainference with streaming-LLM + MTP support; " +
495
+ "server backends are not supported.",
496
+ );
497
+ }
498
+ const target = this.ffiStreaming;
499
+ if (this.active && this.active !== target) {
500
+ await this.active.unload();
501
+ }
502
+ this.active = target;
503
+ await target.load(effectivePlan);
504
+ }
505
+
506
+ async unload(): Promise<void> {
507
+ const active = this.active;
508
+ this.active = null;
509
+ if (active) await active.unload();
510
+ }
511
+
512
+ async generate(args: GenerateArgs): Promise<GenerateResult> {
513
+ if (!this.active) {
514
+ throw new Error(
515
+ "[local-inference] No backend loaded. Call load() before generate().",
516
+ );
517
+ }
518
+ return this.active.generate(args);
519
+ }
520
+
521
+ // === Forwarders for the optional methods on LocalInferenceBackend.
522
+ // === Required ops (generate / describe / persist / restore / prewarm /
523
+ // === resize / restart) throw an actionable error when the active
524
+ // === backend doesn't implement them, pointing at the FFI parity gap.
525
+ // === Query getters return safe defaults that match the engine's
526
+ // === existing guard expectations.
527
+
528
+ async generateWithUsage(
529
+ args: GenerateArgs & { slotId?: number },
530
+ ): Promise<LocalGenerateWithUsageResult> {
531
+ this.ensureLoaded();
532
+ if (!this.active?.generateWithUsage) {
533
+ throw this.notSupported("generateWithUsage");
534
+ }
535
+ return this.active?.generateWithUsage(args);
536
+ }
537
+
538
+ async describeImage(
539
+ args: Parameters<NonNullable<LocalInferenceBackend["describeImage"]>>[0],
540
+ ): ReturnType<NonNullable<LocalInferenceBackend["describeImage"]>> {
541
+ this.ensureLoaded();
542
+ if (!this.active?.describeImage) {
543
+ throw this.notSupported(
544
+ "describeImage",
545
+ "vision describe requires an mmproj-loaded llama.cpp FFI runtime. Load an Eliza-1 bundle with its vision projector.",
546
+ );
547
+ }
548
+ return this.active?.describeImage(args);
549
+ }
550
+
551
+ async persistConversationKv(
552
+ conversationId: string,
553
+ slotId: number,
554
+ ): Promise<void> {
555
+ this.ensureLoaded();
556
+ if (!this.active?.persistConversationKv) return;
557
+ await this.active?.persistConversationKv(conversationId, slotId);
558
+ }
559
+
560
+ async restoreConversationKv(
561
+ conversationId: string,
562
+ slotId: number,
563
+ ): Promise<boolean> {
564
+ this.ensureLoaded();
565
+ if (!this.active?.restoreConversationKv) return false;
566
+ return this.active?.restoreConversationKv(conversationId, slotId);
567
+ }
568
+
569
+ async prewarmConversation(
570
+ promptPrefix: string,
571
+ opts: { slotId: number; cacheKey: string },
572
+ ): Promise<boolean> {
573
+ this.ensureLoaded();
574
+ if (!this.active?.prewarmConversation) return false;
575
+ return this.active?.prewarmConversation(promptPrefix, opts);
576
+ }
577
+
578
+ async resizeParallel(target: number): Promise<boolean> {
579
+ this.ensureLoaded();
580
+ if (!this.active?.resizeParallel) return false;
581
+ return this.active?.resizeParallel(target);
582
+ }
583
+
584
+ parallelSlots(): number {
585
+ return this.active?.parallelSlots?.() ?? 1;
586
+ }
587
+
588
+ mtpEnabled(): boolean {
589
+ return this.active?.mtpEnabled?.() ?? false;
590
+ }
591
+
592
+ currentMmprojPath(): string | null {
593
+ return this.active?.currentMmprojPath?.() ?? null;
594
+ }
595
+
596
+ currentRuntimeLoadConfig(): LocalRuntimeLoadConfig | null {
597
+ return this.active?.currentRuntimeLoadConfig?.() ?? null;
598
+ }
599
+
600
+ private ensureLoaded(): void {
601
+ if (!this.active) {
602
+ throw new Error(
603
+ "[local-inference] No backend loaded. Call load() first.",
604
+ );
605
+ }
606
+ }
607
+
608
+ private notSupported(method: string, detail?: string): Error {
609
+ const base = `[local-inference] Active backend (${this.active?.id ?? "<none>"}) does not implement ${method}.`;
610
+ return new Error(detail ? `${base} ${detail}` : base);
611
+ }
612
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Bundled-models bootstrap for AOSP / on-device installs.
3
+ *
4
+ * The AOSP build pipeline stages Eliza-1 models into the APK at
5
+ * `assets/agent/models/{file}.gguf` plus a
6
+ * `manifest.json` describing each one (id, role, sha256, sizeBytes).
7
+ * `ElizaAgentService.extractAssetsIfNeeded()` copies those files into
8
+ * `$ELIZA_STATE_DIR/local-inference/models/` on first launch.
9
+ *
10
+ * This module reads the manifest at runtime startup and registers each
11
+ * file as a eliza-owned model in the local-inference registry, so the
12
+ * auto-assign pass picks them up for TEXT_LARGE / TEXT_SMALL /
13
+ * TEXT_EMBEDDING slots without needing the user to download anything.
14
+ *
15
+ * Idempotent: re-running with the registry already populated is a
16
+ * no-op for unchanged entries (`upsertElizaModel` overwrites entries
17
+ * with the same id, so updated sha256s on a later re-bundle replace
18
+ * the old metadata cleanly).
19
+ *
20
+ * Source classification: the runtime treats bundled models as
21
+ * `source: "eliza-download"` because Eliza ships the file and Eliza
22
+ * owns it on disk — same lifecycle as a user-initiated download
23
+ * (uninstall removes the file, the registry tracks the install). The
24
+ * only difference is the file arrived via APK extraction rather than
25
+ * an HTTP transfer.
26
+ */
27
+ /**
28
+ * Walk the manifest and register every bundled GGUF file in the
29
+ * local-inference registry. Returns the number of entries successfully
30
+ * registered. A missing manifest is normal on Capacitor / desktop /
31
+ * non-AOSP installs and returns 0 silently.
32
+ */
33
+ export declare function registerBundledModels(): Promise<number>;
34
+ //# sourceMappingURL=bundled-models.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bundled-models.d.ts","sourceRoot":"","sources":["bundled-models.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AA2DH;;;;;GAKG;AACH,wBAAsB,qBAAqB,IAAI,OAAO,CAAC,MAAM,CAAC,CAsC7D"}