@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,304 @@
1
+ /**
2
+ * Scraper for llama-server's `/metrics` (Prometheus exposition format)
3
+ * endpoint. Translates the running counters into the
4
+ * Anthropic-SDK-shaped `usage` block that callers already know how to
5
+ * consume from the cloud Anthropic plugin.
6
+ *
7
+ * llama-server publishes the following counters (per-process, monotonic):
8
+ *
9
+ * llamacpp:n_decode_total — context tokens decoded (prefill + gen)
10
+ * llamacpp:n_tokens_predicted_total — output tokens
11
+ * llamacpp:prompt_tokens_total — total input tokens accepted
12
+ * llamacpp:n_past_max — high-water mark of cached past-tokens
13
+ * llamacpp:n_prompt_tokens_processed_total — fresh tokens prefilled
14
+ * (i.e. cache MISS), excludes cache hits
15
+ * llamacpp:kv_cache_tokens — current size of KV cache (gauge)
16
+ * llamacpp:kv_cache_used_cells — slots with active KV (gauge)
17
+ *
18
+ * For MTP speculative decoding, the fork additionally publishes:
19
+ *
20
+ * llamacpp:n_drafted_total — drafter-emitted tokens
21
+ * llamacpp:n_drafted_accepted_total — accepted speculative tokens
22
+ *
23
+ * The mapping into Anthropic shape:
24
+ *
25
+ * prompt_tokens_total → input_tokens
26
+ * n_tokens_predicted_total → output_tokens
27
+ * n_prompt_tokens_processed_total → cache_creation_input_tokens
28
+ * prompt_tokens_total - n_prompt_tokens_processed_total → cache_read_input_tokens
29
+ * n_drafted_total / n_drafted_accepted_total → MTP extension fields
30
+ *
31
+ * Counters are taken as deltas across two snapshots: take one before
32
+ * `generate`, one after, and subtract. Losing a few samples to process
33
+ * restart is acceptable — the deltas are useful for the call's own
34
+ * usage accounting, not for global monitoring.
35
+ */
36
+
37
+ export interface LlamaServerMetricSnapshot {
38
+ /** Wall-clock ms when the snapshot was taken; useful for diagnostics. */
39
+ takenAtMs: number;
40
+ /** True when `/metrics` was fetched and parsed. False means scrape failure. */
41
+ scrapeOk?: boolean;
42
+ /** True when the scrape included at least one generation/speculation counter. */
43
+ hasGenerationCounters?: boolean;
44
+ promptTokensTotal: number;
45
+ predictedTokensTotal: number;
46
+ /** Tokens that had to be freshly prefilled — i.e. cache MISS this turn. */
47
+ promptTokensProcessedTotal: number;
48
+ draftedTotal: number;
49
+ acceptedTotal: number;
50
+ /** Current size of the KV cache (gauge). */
51
+ kvCacheTokens: number;
52
+ /** Number of slots currently holding active KV (gauge). */
53
+ kvCacheUsedCells: number;
54
+ }
55
+
56
+ type MetricNumericField = Exclude<
57
+ keyof LlamaServerMetricSnapshot,
58
+ "scrapeOk" | "hasGenerationCounters"
59
+ >;
60
+
61
+ const METRIC_KEYS: Record<string, MetricNumericField> = {
62
+ "llamacpp:prompt_tokens_total": "promptTokensTotal",
63
+ "llamacpp:n_tokens_predicted_total": "predictedTokensTotal",
64
+ "llamacpp:n_prompt_tokens_processed_total": "promptTokensProcessedTotal",
65
+ "llamacpp:n_drafted_total": "draftedTotal",
66
+ "llamacpp:n_drafted": "draftedTotal",
67
+ "llamacpp:n_drafted_accepted_total": "acceptedTotal",
68
+ "llamacpp:n_drafted_accepted": "acceptedTotal",
69
+ "llamacpp:n_accepted_total": "acceptedTotal",
70
+ "llamacpp:n_accepted": "acceptedTotal",
71
+ "llamacpp:kv_cache_tokens": "kvCacheTokens",
72
+ "llamacpp:kv_cache_used_cells": "kvCacheUsedCells",
73
+ };
74
+
75
+ const DEFAULT_METRICS_SCRAPE_TIMEOUT_MS = 2_000;
76
+
77
+ /**
78
+ * Parse a Prometheus exposition-format payload into a metric snapshot.
79
+ * Unknown or malformed lines are silently skipped — counters we don't
80
+ * recognise are not interesting and metric exporters add new ones over
81
+ * time.
82
+ *
83
+ * llama-server usually exposes one sample per metric (no labels), e.g.
84
+ * `llamacpp:prompt_tokens_total 1234`
85
+ * Some MTP forks expose per-slot labelled samples, e.g.
86
+ * `llamacpp:n_drafted_accepted_total{slot_id="0"} 12`
87
+ * Labelled samples are summed unless an unlabelled total exists for the same
88
+ * canonical field, in which case the unlabelled total wins.
89
+ */
90
+ export function parsePrometheusMetrics(
91
+ body: string,
92
+ takenAtMs: number = Date.now(),
93
+ ): LlamaServerMetricSnapshot {
94
+ const snapshot: LlamaServerMetricSnapshot = {
95
+ takenAtMs,
96
+ scrapeOk: true,
97
+ hasGenerationCounters: false,
98
+ promptTokensTotal: 0,
99
+ predictedTokensTotal: 0,
100
+ promptTokensProcessedTotal: 0,
101
+ draftedTotal: 0,
102
+ acceptedTotal: 0,
103
+ kvCacheTokens: 0,
104
+ kvCacheUsedCells: 0,
105
+ };
106
+ const buckets = new Map<
107
+ MetricNumericField,
108
+ { unlabeled: number | null; labeledSum: number }
109
+ >();
110
+ let hasGenerationCounters = false;
111
+
112
+ for (const rawLine of body.split(/\r?\n/)) {
113
+ const line = rawLine.trim();
114
+ if (!line || line.startsWith("#")) continue;
115
+ // Prometheus line format: `name{labels?} value [timestamp]`.
116
+ const match = line.match(
117
+ /^([a-zA-Z_:][\w:]*)(\{[^}]*\})?\s+([+-]?\d+(?:\.\d+)?(?:e[+-]?\d+)?)/i,
118
+ );
119
+ if (!match) continue;
120
+ const name = match[1];
121
+ const labels = match[2];
122
+ const value = Number(match[3]);
123
+ if (!Number.isFinite(value) || name === undefined) continue;
124
+ const field = METRIC_KEYS[name];
125
+ if (!field) continue;
126
+ if (
127
+ field === "promptTokensTotal" ||
128
+ field === "predictedTokensTotal" ||
129
+ field === "promptTokensProcessedTotal" ||
130
+ field === "draftedTotal" ||
131
+ field === "acceptedTotal"
132
+ ) {
133
+ hasGenerationCounters = true;
134
+ }
135
+ const bucket = buckets.get(field) ?? { unlabeled: null, labeledSum: 0 };
136
+ if (labels) bucket.labeledSum += value;
137
+ else bucket.unlabeled = value;
138
+ buckets.set(field, bucket);
139
+ }
140
+
141
+ for (const [field, bucket] of buckets) {
142
+ snapshot[field] = bucket.unlabeled ?? bucket.labeledSum;
143
+ }
144
+
145
+ snapshot.hasGenerationCounters = hasGenerationCounters;
146
+
147
+ return snapshot;
148
+ }
149
+
150
+ /**
151
+ * Anthropic-SDK-shaped usage block, optionally extended with MTP
152
+ * speculative-decoding metrics. The cloud plugin (plugin-anthropic)
153
+ * emits the first three fields verbatim; local inference adds the
154
+ * `mtp_*` fields when speculative decoding is active. Callers that
155
+ * already handle the cloud `usage` shape need no change.
156
+ */
157
+ export interface LocalUsageBlock {
158
+ [key: string]: unknown;
159
+ input_tokens: number;
160
+ output_tokens: number;
161
+ cache_creation_input_tokens: number;
162
+ cache_read_input_tokens: number;
163
+ mtp_drafted_tokens?: number;
164
+ mtp_accepted_tokens?: number;
165
+ /** 0..1 — proportion of drafted tokens that were accepted. */
166
+ mtp_acceptance_rate?: number;
167
+ /** 0..1 — proportion of input tokens that hit a warm slot (cache reuse). */
168
+ cache_hit_rate?: number;
169
+ }
170
+
171
+ /**
172
+ * Compute the Anthropic-shape usage block for a single generation by
173
+ * differencing two snapshots. `before` is taken just before the request,
174
+ * `after` just after the response was received. Negative deltas (caused
175
+ * by a metric reset between snapshots, e.g. server restart) are clamped
176
+ * to 0 — losing the sample is preferable to surfacing nonsense to the
177
+ * caller.
178
+ *
179
+ * Pass `responseUsage` to override input/output counts when the response
180
+ * payload itself reports per-call counters that are more accurate than
181
+ * the metric delta — llama-server's chat completion response includes
182
+ * `usage.{prompt,completion}_tokens` per request, which is exact while
183
+ * the metric delta is "everything that happened during the wall-clock
184
+ * window of the request."
185
+ */
186
+ export function diffSnapshots(
187
+ before: LlamaServerMetricSnapshot,
188
+ after: LlamaServerMetricSnapshot,
189
+ responseUsage?: { prompt_tokens?: number; completion_tokens?: number },
190
+ ): LocalUsageBlock {
191
+ const promptDelta = clampNonNegative(
192
+ after.promptTokensTotal - before.promptTokensTotal,
193
+ );
194
+ const predictedDelta = clampNonNegative(
195
+ after.predictedTokensTotal - before.predictedTokensTotal,
196
+ );
197
+ const processedDelta = clampNonNegative(
198
+ after.promptTokensProcessedTotal - before.promptTokensProcessedTotal,
199
+ );
200
+ const draftedDelta = clampNonNegative(
201
+ after.draftedTotal - before.draftedTotal,
202
+ );
203
+ const acceptedDelta = clampNonNegative(
204
+ after.acceptedTotal - before.acceptedTotal,
205
+ );
206
+
207
+ const responsePrompt = responseUsage?.prompt_tokens ?? promptDelta;
208
+ const responseCompletion = responseUsage?.completion_tokens ?? predictedDelta;
209
+
210
+ const inputTokens = responsePrompt;
211
+ const outputTokens = responseCompletion;
212
+ // Tokens that had to be freshly prefilled this call. Bounded above by
213
+ // the per-call input count — a metric-delta wider than the call's own
214
+ // input is a sampling artifact.
215
+ const cacheCreation = Math.min(processedDelta, inputTokens);
216
+ const cacheRead = Math.max(0, inputTokens - cacheCreation);
217
+
218
+ const block: LocalUsageBlock = {
219
+ input_tokens: inputTokens,
220
+ output_tokens: outputTokens,
221
+ cache_creation_input_tokens: cacheCreation,
222
+ cache_read_input_tokens: cacheRead,
223
+ };
224
+ if (inputTokens > 0) {
225
+ block.cache_hit_rate = cacheRead / inputTokens;
226
+ }
227
+ if (draftedDelta > 0) {
228
+ block.mtp_drafted_tokens = draftedDelta;
229
+ block.mtp_accepted_tokens = acceptedDelta;
230
+ block.mtp_acceptance_rate = acceptedDelta / draftedDelta;
231
+ }
232
+ return block;
233
+ }
234
+
235
+ function clampNonNegative(value: number): number {
236
+ if (!Number.isFinite(value)) return 0;
237
+ return value < 0 ? 0 : value;
238
+ }
239
+
240
+ /**
241
+ * GET `/metrics` from a running llama-server and parse it. Errors fall
242
+ * back to a zero-valued snapshot rather than throwing — observability
243
+ * MUST NOT break generation. `scrapeOk=false` tells callers that the
244
+ * zeros are not evidence of absent MTP/KV activity.
245
+ */
246
+ export async function fetchMetricsSnapshot(
247
+ baseUrl: string,
248
+ signal?: AbortSignal,
249
+ timeoutMs = DEFAULT_METRICS_SCRAPE_TIMEOUT_MS,
250
+ ): Promise<LlamaServerMetricSnapshot> {
251
+ const takenAtMs = Date.now();
252
+ const empty: LlamaServerMetricSnapshot = {
253
+ takenAtMs,
254
+ scrapeOk: false,
255
+ hasGenerationCounters: false,
256
+ promptTokensTotal: 0,
257
+ predictedTokensTotal: 0,
258
+ promptTokensProcessedTotal: 0,
259
+ draftedTotal: 0,
260
+ acceptedTotal: 0,
261
+ kvCacheTokens: 0,
262
+ kvCacheUsedCells: 0,
263
+ };
264
+ const controller = new AbortController();
265
+ const abortFromCaller = () => controller.abort(signal?.reason);
266
+ if (signal?.aborted) {
267
+ abortFromCaller();
268
+ } else {
269
+ signal?.addEventListener("abort", abortFromCaller, { once: true });
270
+ }
271
+ const timer = setTimeout(
272
+ () =>
273
+ controller.abort(
274
+ new DOMException(
275
+ `llama-server metrics scrape timed out after ${timeoutMs}ms`,
276
+ "TimeoutError",
277
+ ),
278
+ ),
279
+ Math.max(1, Math.floor(timeoutMs)),
280
+ );
281
+ let res: Response | null = null;
282
+ let bodySettled = false;
283
+ try {
284
+ res = await fetch(`${baseUrl.replace(/\/$/, "")}/metrics`, {
285
+ method: "GET",
286
+ signal: controller.signal,
287
+ });
288
+ if (!res.ok) return empty;
289
+ const body = await res.text();
290
+ bodySettled = true;
291
+ return parsePrometheusMetrics(body, takenAtMs);
292
+ } catch {
293
+ // Best effort: a metrics scrape failure must not abort the response
294
+ // path. Returning an empty snapshot causes diffSnapshots to surface
295
+ // zero deltas; the caller still sees the response payload usage.
296
+ return empty;
297
+ } finally {
298
+ clearTimeout(timer);
299
+ signal?.removeEventListener("abort", abortFromCaller);
300
+ if (res?.body && (!bodySettled || controller.signal.aborted)) {
301
+ await res.body.cancel(controller.signal.reason).catch(() => undefined);
302
+ }
303
+ }
304
+ }
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Narrow streaming-LLM binding.
3
+ *
4
+ * `FfiStreamingRunner` (`services/ffi-streaming-runner.ts`) used to require
5
+ * the full `ElizaInferenceFfi` surface (TTS + ASR + VAD + mmap regions +
6
+ * the entire fused libelizainference) just to run text generation. That
7
+ * surface implies a *bundle-anchored* runtime — libelizainference owns a
8
+ * context built from a bundle root, not a single GGUF — and ~25 methods
9
+ * that have nothing to do with LLM streaming.
10
+ *
11
+ * This file extracts the actual contract the runner depends on: the seven
12
+ * `llmStream*` methods plus the (optional) two slot save/restore methods.
13
+ * Both libelizainference (via a tiny adapter) and the desktop
14
+ * libllama + eliza-llama-shim path (built by `build-llama-cpp-desktop-dylib.mjs`,
15
+ * mirroring the AOSP adapter pattern) can implement this narrow contract
16
+ * without dragging in TTS/ASR.
17
+ *
18
+ * See `plugins/plugin-local-inference/FFI_BACKEND_WIREUP_PLAN.md` Step B
19
+ * for the desktop adapter follow-up that implements this interface against
20
+ * the libllama symbols.
21
+ */
22
+ import type { ElizaInferenceContextHandle, ElizaInferenceFfi, LlmStreamConfig, LlmStreamHandle, LlmStreamStep } from "./voice/ffi-bindings";
23
+ /**
24
+ * Opaque per-context handle. For libelizainference this is the
25
+ * `ElizaInferenceContextHandle` (a bigint pointer to the bundle context).
26
+ * For the desktop libllama path it's a bigint pointer to a per-model
27
+ * llama_context. The runner only needs to pass it through to
28
+ * `llmStreamOpen`.
29
+ */
30
+ export type LlmCtxHandle = ElizaInferenceContextHandle;
31
+ /**
32
+ * The streaming-LLM contract `FfiStreamingRunner` consumes. Methods
33
+ * mirror the C ABI declared in `tools/omnivoice/include/eliza-inference-ffi.h`
34
+ * (the `eliza_inference_llm_stream_*` surface), but the binding doesn't have
35
+ * to come from libelizainference — any implementation that satisfies this
36
+ * interface works.
37
+ *
38
+ * Slot save/restore are optional because the desktop libllama path
39
+ * does not expose `llama_state_seq_save_file` / `_load_file` through
40
+ * the shim yet. The runner already guards both methods via
41
+ * `if (this.ffi.llmStreamSaveSlot === undefined) throw ...`.
42
+ */
43
+ export interface LlmStreamingBinding {
44
+ /** Probe — must return `true` for the binding to be usable by the runner. */
45
+ llmStreamSupported(): boolean;
46
+ /**
47
+ * Open a streaming-LLM session against `ctx`. Failure throws an
48
+ * implementation-specific error (`VoiceLifecycleError` for
49
+ * libelizainference). Close exactly once via `llmStreamClose`.
50
+ */
51
+ llmStreamOpen(args: {
52
+ ctx: LlmCtxHandle;
53
+ config: LlmStreamConfig;
54
+ }): LlmStreamHandle;
55
+ /** Feed a batch of pre-tokenized prompt tokens before the first `next`. */
56
+ llmStreamPrefill(args: {
57
+ stream: LlmStreamHandle;
58
+ tokens: Int32Array;
59
+ }): void;
60
+ /**
61
+ * Pull the next streaming step. `step.done === true` is the final step.
62
+ * Implementations may bound the step by `maxTokensPerStep` /
63
+ * `maxTextBytes`; defaults are runner-side.
64
+ */
65
+ llmStreamNext(args: {
66
+ stream: LlmStreamHandle;
67
+ maxTokensPerStep?: number;
68
+ maxTextBytes?: number;
69
+ }): LlmStreamStep;
70
+ /** Cancel in-flight generation; the next `_next` returns CANCELLED. */
71
+ llmStreamCancel(stream: LlmStreamHandle): void;
72
+ /** Close + free a streaming-LLM session. Idempotent on already-closed handles. */
73
+ llmStreamClose(stream: LlmStreamHandle): void;
74
+ /** Optional — persist the session's slot KV state to disk. */
75
+ llmStreamSaveSlot?(args: {
76
+ stream: LlmStreamHandle;
77
+ filename: string;
78
+ }): void;
79
+ /** Optional — restore a previously-saved slot KV file. */
80
+ llmStreamRestoreSlot?(args: {
81
+ stream: LlmStreamHandle;
82
+ filename: string;
83
+ }): void;
84
+ }
85
+ /**
86
+ * Wrap a full `ElizaInferenceFfi` as a narrow `LlmStreamingBinding`.
87
+ * The libelizainference path already implements the `llmStream*` methods
88
+ * as optional properties; this adapter promotes them to required and
89
+ * throws if the loaded library is too old to expose them.
90
+ *
91
+ * Usage:
92
+ * const binding = wrapElizaInferenceFfi(ffi);
93
+ * const runner = new FfiStreamingRunner(binding, ctxHandle);
94
+ */
95
+ export declare function wrapElizaInferenceFfi(ffi: ElizaInferenceFfi): LlmStreamingBinding;
96
+ //# sourceMappingURL=llm-streaming-binding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-streaming-binding.d.ts","sourceRoot":"","sources":["llm-streaming-binding.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EACX,2BAA2B,EAC3B,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,aAAa,EACb,MAAM,sBAAsB,CAAC;AAE9B;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GAAG,2BAA2B,CAAC;AAEvD;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,mBAAmB;IACnC,6EAA6E;IAC7E,kBAAkB,IAAI,OAAO,CAAC;IAC9B;;;;OAIG;IACH,aAAa,CAAC,IAAI,EAAE;QACnB,GAAG,EAAE,YAAY,CAAC;QAClB,MAAM,EAAE,eAAe,CAAC;KACxB,GAAG,eAAe,CAAC;IACpB,2EAA2E;IAC3E,gBAAgB,CAAC,IAAI,EAAE;QAAE,MAAM,EAAE,eAAe,CAAC;QAAC,MAAM,EAAE,UAAU,CAAA;KAAE,GAAG,IAAI,CAAC;IAC9E;;;;OAIG;IACH,aAAa,CAAC,IAAI,EAAE;QACnB,MAAM,EAAE,eAAe,CAAC;QACxB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,YAAY,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,aAAa,CAAC;IAClB,uEAAuE;IACvE,eAAe,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CAAC;IAC/C,kFAAkF;IAClF,cAAc,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CAAC;IAC9C,8DAA8D;IAC9D,iBAAiB,CAAC,CAAC,IAAI,EAAE;QAAE,MAAM,EAAE,eAAe,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC9E,0DAA0D;IAC1D,oBAAoB,CAAC,CAAC,IAAI,EAAE;QAC3B,MAAM,EAAE,eAAe,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KACjB,GAAG,IAAI,CAAC;CACT;AAED;;;;;;;;;GASG;AACH,wBAAgB,qBAAqB,CACpC,GAAG,EAAE,iBAAiB,GACpB,mBAAmB,CAmCrB"}
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Narrow streaming-LLM binding.
3
+ *
4
+ * `FfiStreamingRunner` (`services/ffi-streaming-runner.ts`) used to require
5
+ * the full `ElizaInferenceFfi` surface (TTS + ASR + VAD + mmap regions +
6
+ * the entire fused libelizainference) just to run text generation. That
7
+ * surface implies a *bundle-anchored* runtime — libelizainference owns a
8
+ * context built from a bundle root, not a single GGUF — and ~25 methods
9
+ * that have nothing to do with LLM streaming.
10
+ *
11
+ * This file extracts the actual contract the runner depends on: the seven
12
+ * `llmStream*` methods plus the (optional) two slot save/restore methods.
13
+ * Both libelizainference (via a tiny adapter) and the desktop
14
+ * libllama + eliza-llama-shim path (built by `build-llama-cpp-desktop-dylib.mjs`,
15
+ * mirroring the AOSP adapter pattern) can implement this narrow contract
16
+ * without dragging in TTS/ASR.
17
+ *
18
+ * See `plugins/plugin-local-inference/FFI_BACKEND_WIREUP_PLAN.md` Step B
19
+ * for the desktop adapter follow-up that implements this interface against
20
+ * the libllama symbols.
21
+ */
22
+
23
+ import type {
24
+ ElizaInferenceContextHandle,
25
+ ElizaInferenceFfi,
26
+ LlmStreamConfig,
27
+ LlmStreamHandle,
28
+ LlmStreamStep,
29
+ } from "./voice/ffi-bindings";
30
+
31
+ /**
32
+ * Opaque per-context handle. For libelizainference this is the
33
+ * `ElizaInferenceContextHandle` (a bigint pointer to the bundle context).
34
+ * For the desktop libllama path it's a bigint pointer to a per-model
35
+ * llama_context. The runner only needs to pass it through to
36
+ * `llmStreamOpen`.
37
+ */
38
+ export type LlmCtxHandle = ElizaInferenceContextHandle;
39
+
40
+ /**
41
+ * The streaming-LLM contract `FfiStreamingRunner` consumes. Methods
42
+ * mirror the C ABI declared in `tools/omnivoice/include/eliza-inference-ffi.h`
43
+ * (the `eliza_inference_llm_stream_*` surface), but the binding doesn't have
44
+ * to come from libelizainference — any implementation that satisfies this
45
+ * interface works.
46
+ *
47
+ * Slot save/restore are optional because the desktop libllama path
48
+ * does not expose `llama_state_seq_save_file` / `_load_file` through
49
+ * the shim yet. The runner already guards both methods via
50
+ * `if (this.ffi.llmStreamSaveSlot === undefined) throw ...`.
51
+ */
52
+ export interface LlmStreamingBinding {
53
+ /** Probe — must return `true` for the binding to be usable by the runner. */
54
+ llmStreamSupported(): boolean;
55
+ /**
56
+ * Open a streaming-LLM session against `ctx`. Failure throws an
57
+ * implementation-specific error (`VoiceLifecycleError` for
58
+ * libelizainference). Close exactly once via `llmStreamClose`.
59
+ */
60
+ llmStreamOpen(args: {
61
+ ctx: LlmCtxHandle;
62
+ config: LlmStreamConfig;
63
+ }): LlmStreamHandle;
64
+ /** Feed a batch of pre-tokenized prompt tokens before the first `next`. */
65
+ llmStreamPrefill(args: { stream: LlmStreamHandle; tokens: Int32Array }): void;
66
+ /**
67
+ * Pull the next streaming step. `step.done === true` is the final step.
68
+ * Implementations may bound the step by `maxTokensPerStep` /
69
+ * `maxTextBytes`; defaults are runner-side.
70
+ */
71
+ llmStreamNext(args: {
72
+ stream: LlmStreamHandle;
73
+ maxTokensPerStep?: number;
74
+ maxTextBytes?: number;
75
+ }): LlmStreamStep;
76
+ /** Cancel in-flight generation; the next `_next` returns CANCELLED. */
77
+ llmStreamCancel(stream: LlmStreamHandle): void;
78
+ /** Close + free a streaming-LLM session. Idempotent on already-closed handles. */
79
+ llmStreamClose(stream: LlmStreamHandle): void;
80
+ /** Optional — persist the session's slot KV state to disk. */
81
+ llmStreamSaveSlot?(args: { stream: LlmStreamHandle; filename: string }): void;
82
+ /** Optional — restore a previously-saved slot KV file. */
83
+ llmStreamRestoreSlot?(args: {
84
+ stream: LlmStreamHandle;
85
+ filename: string;
86
+ }): void;
87
+ }
88
+
89
+ /**
90
+ * Wrap a full `ElizaInferenceFfi` as a narrow `LlmStreamingBinding`.
91
+ * The libelizainference path already implements the `llmStream*` methods
92
+ * as optional properties; this adapter promotes them to required and
93
+ * throws if the loaded library is too old to expose them.
94
+ *
95
+ * Usage:
96
+ * const binding = wrapElizaInferenceFfi(ffi);
97
+ * const runner = new FfiStreamingRunner(binding, ctxHandle);
98
+ */
99
+ export function wrapElizaInferenceFfi(
100
+ ffi: ElizaInferenceFfi,
101
+ ): LlmStreamingBinding {
102
+ if (
103
+ typeof ffi.llmStreamSupported !== "function" ||
104
+ !ffi.llmStreamSupported() ||
105
+ typeof ffi.llmStreamOpen !== "function" ||
106
+ typeof ffi.llmStreamPrefill !== "function" ||
107
+ typeof ffi.llmStreamNext !== "function" ||
108
+ typeof ffi.llmStreamCancel !== "function" ||
109
+ typeof ffi.llmStreamClose !== "function"
110
+ ) {
111
+ throw new Error(
112
+ "[llm-streaming-binding] The loaded libelizainference does not expose " +
113
+ "the streaming-LLM symbol set (llmStreamSupported/Open/Prefill/Next/" +
114
+ "Cancel/Close). Rebuild the omnivoice fuse against the current " +
115
+ "eliza-inference-ffi.h (verify-fused-symbols requires this set).",
116
+ );
117
+ }
118
+ // Narrowed function references so the returned object types are
119
+ // non-optional even though the source surface declares them
120
+ // optional.
121
+ const open = ffi.llmStreamOpen;
122
+ const prefill = ffi.llmStreamPrefill;
123
+ const next = ffi.llmStreamNext;
124
+ const cancel = ffi.llmStreamCancel;
125
+ const close = ffi.llmStreamClose;
126
+ return {
127
+ llmStreamSupported: () => true,
128
+ llmStreamOpen: open,
129
+ llmStreamPrefill: prefill,
130
+ llmStreamNext: next,
131
+ llmStreamCancel: cancel,
132
+ llmStreamClose: close,
133
+ llmStreamSaveSlot: ffi.llmStreamSaveSlot,
134
+ llmStreamRestoreSlot: ffi.llmStreamRestoreSlot,
135
+ };
136
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Per-load override types for the local inference engine.
3
+ *
4
+ * Extracted from active-model.ts to break the active-model ↔ engine
5
+ * circular dependency. Both modules import from here; neither imports
6
+ * from the other for these definitions.
7
+ *
8
+ * @module services/load-args
9
+ */
10
+ /**
11
+ * KV cache placement strategy. `capacitor-llama` does not currently expose a
12
+ * direct KV-cache placement knob distinct from the model-level `gpuLayers`
13
+ * setting (the KV cache lives wherever the layer that owns it lives). We
14
+ * keep the type here so the API/UI surface and the upstream out-of-process
15
+ * `llama-server` backend can plumb a real choice through; the in-process
16
+ * binding maps any non-default value to a `gpuLayers` override or warns
17
+ * loudly when the value cannot be honoured.
18
+ */
19
+ export type KvOffloadMode = "cpu" | "gpu" | "split" | {
20
+ gpuLayers: number;
21
+ };
22
+ /**
23
+ * Per-load overrides accepted by `localInferenceLoader.loadModel(...)` and
24
+ * `POST /api/local-inference/active`. Catalog defaults are merged in
25
+ * `resolveLocalInferenceLoadArgs`; per-call overrides supplied by the
26
+ * caller win over both catalog metadata and env-var fallbacks.
27
+ */
28
+ export interface LocalInferenceLoadArgs {
29
+ modelPath: string;
30
+ /**
31
+ * Catalog id for direct bundle loads where `modelPath` points at a GGUF
32
+ * inside an Eliza-1 bundle that is not present in the installed-model
33
+ * registry yet.
34
+ */
35
+ modelId?: string;
36
+ contextSize?: number;
37
+ useGpu?: boolean;
38
+ maxThreads?: number;
39
+ draftModelPath?: string;
40
+ draftContextSize?: number;
41
+ draftMin?: number;
42
+ draftMax?: number;
43
+ speculativeSamples?: number;
44
+ mobileSpeculative?: boolean;
45
+ cacheTypeK?: string;
46
+ cacheTypeV?: string;
47
+ disableThinking?: boolean;
48
+ /**
49
+ * Number of model layers to offload to the GPU. `"auto"` and `"max"` are
50
+ * resolved by the backend's own probing — keep the explicit number type
51
+ * here so the API surface accepts the most common `gpuLayers: 32` shape
52
+ * without an extra string branch.
53
+ */
54
+ gpuLayers?: number;
55
+ /**
56
+ * Where to place the KV cache. See `KvOffloadMode`. node-llama-cpp does
57
+ * not expose this distinct from `gpuLayers`; the backend translates
58
+ * the request to a `gpuLayers` override or throws when the value
59
+ * cannot be honoured.
60
+ */
61
+ kvOffload?: KvOffloadMode;
62
+ flashAttention?: boolean;
63
+ mmap?: boolean;
64
+ mlock?: boolean;
65
+ /**
66
+ * Path to the multi-modal projector GGUF (mmproj-<tier>.gguf), when the
67
+ * loaded tier supports vision (`catalog.sourceModel.components.vision`
68
+ * is present AND the file exists on disk). WS2 (vision-describe)
69
+ * resolves this from the installed bundle root in
70
+ * `resolveLocalInferenceLoadArgs`. Backends that support vision use the
71
+ * path verbatim:
72
+ * - llama-server: `--mmproj <path>` flag on spawn.
73
+ * - node-llama-cpp: `mtmd_init_from_file(<path>)` (planned in fork).
74
+ * - AOSP libllama shim: `eliza_llama_mtmd_init_from_file(<path>)`.
75
+ * Undefined when the tier doesn't ship vision or the file isn't on
76
+ * disk yet (e.g. downloaded text-only bundle). The text load is NOT
77
+ * gated on mmproj presence — text+drafter still load and vision is
78
+ * marked unavailable for that session.
79
+ */
80
+ mmprojPath?: string;
81
+ }
82
+ //# sourceMappingURL=load-args.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"load-args.d.ts","sourceRoot":"","sources":["load-args.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;;;;;;;GAQG;AACH,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,KAAK,GAAG,OAAO,GAAG;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC;AAE5E;;;;;GAKG;AACH,MAAM,WAAW,sBAAsB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB;;;;;;;;;;;;;;OAcG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB"}