@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,58 @@
1
+ import fs from "node:fs";
2
+ import os from "node:os";
3
+ import path from "node:path";
4
+ import { afterEach, describe, expect, it } from "vitest";
5
+
6
+ import type { BackendPlan } from "./backend";
7
+ import { LocalInferenceEngine } from "./engine";
8
+
9
+ const ORIGINAL_ENV = { ...process.env };
10
+
11
+ afterEach(() => {
12
+ process.env = { ...ORIGINAL_ENV };
13
+ });
14
+
15
+ describe("LocalInferenceEngine direct Eliza-1 bundle loads", () => {
16
+ it("projects modelId into catalog and bundle overrides before registry install", async () => {
17
+ const root = fs.mkdtempSync(path.join(os.tmpdir(), "eliza-engine-test-"));
18
+ process.env.ELIZA_STATE_DIR = root;
19
+ const engine = new LocalInferenceEngine();
20
+ const internals = engine as unknown as {
21
+ dispatcher: {
22
+ load(plan: BackendPlan): Promise<void>;
23
+ };
24
+ };
25
+ let captured: BackendPlan | undefined;
26
+ internals.dispatcher.load = async (plan) => {
27
+ captured = plan;
28
+ };
29
+
30
+ const bundleRoot = path.join(root, "eliza-1-0_8b.bundle");
31
+ const modelPath = path.join(bundleRoot, "text", "eliza-1-0_8b-128k.gguf");
32
+ await engine.load(modelPath, {
33
+ modelPath,
34
+ modelId: "eliza-1-0_8b",
35
+ });
36
+
37
+ expect(captured).toBeDefined();
38
+ expect(captured?.modelPath).toBe(modelPath);
39
+ expect(captured?.modelId).toBe("eliza-1-0_8b");
40
+ expect(captured?.catalog?.id).toBe("eliza-1-0_8b");
41
+ expect(captured?.overrides?.bundleRoot).toBe(bundleRoot);
42
+ expect(captured?.overrides?.manifestPath).toBe(
43
+ path.join(bundleRoot, "eliza-1.manifest.json"),
44
+ );
45
+ expect(
46
+ (
47
+ engine as unknown as {
48
+ activeEliza1Bundle: { root?: string; tierId?: string } | null;
49
+ }
50
+ ).activeEliza1Bundle,
51
+ ).toEqual(
52
+ expect.objectContaining({
53
+ root: bundleRoot,
54
+ tierId: "eliza-1-0_8b",
55
+ }),
56
+ );
57
+ });
58
+ });
@@ -0,0 +1,80 @@
1
+ import { describe, expect, it } from "vitest";
2
+
3
+ import type { GenerateArgs } from "./backend";
4
+ import { LocalInferenceEngine } from "./engine";
5
+
6
+ /**
7
+ * Regression guard for local chat token streaming through the engine facade.
8
+ *
9
+ * The chat reply path forces a per-turn grammar (the Stage-1 HANDLE_RESPONSE
10
+ * envelope) and asks for `streamStructured`. The runtime wires
11
+ * `params.onStreamChunk` down to the engine's `onTextChunk`, and the
12
+ * `ResponseSkeletonStreamExtractor` slices the `replyText` field out of the
13
+ * streamed JSON. For that to surface incremental deltas, the per-token
14
+ * callback MUST fire once per chunk all the way through the dispatcher — not
15
+ * collapse into a single final chunk.
16
+ */
17
+
18
+ const REPLY_TOKENS = [
19
+ '{"shouldRespond":"RESPOND",',
20
+ '"contexts":["simple"],',
21
+ '"replyText":"On ',
22
+ "it ",
23
+ 'now.","facts":[]}',
24
+ ];
25
+
26
+ // A minimal GBNF source — only its presence matters. This mirrors the Stage-1
27
+ // reply path always carrying a grammar.
28
+ const FORCED_GRAMMAR = 'root ::= "{" [^}]* "}"';
29
+
30
+ describe("LocalInferenceEngine.generateInConversation streaming (chat path)", () => {
31
+ it("forwards onTextChunk per token through the dispatcher when voice is off", async () => {
32
+ // The production chat reply has a conversationId, so the local handler
33
+ // routes through `generateInConversation` (NOT `engine.generate`). With no
34
+ // voice bridge active, `voiceStreamingArgs` is a passthrough, so the
35
+ // dispatcher must receive — and the backend must fire — `onTextChunk`
36
+ // per token. This is the junction the FFI-backed unit tests don't cover.
37
+ const engine = new LocalInferenceEngine();
38
+ const seenChunks: string[] = [];
39
+
40
+ const internals = engine as unknown as {
41
+ dispatcher: {
42
+ generate: (args: GenerateArgs) => Promise<string>;
43
+ activeBackendId: () => string | null;
44
+ };
45
+ currentModelPath: () => string | null;
46
+ };
47
+ // Drive the non-"llama-cpp" branch of generateInConversation (the
48
+ // usage-block-synthesizing forward path) by reporting no active FFI
49
+ // backend while still stubbing dispatcher.generate.
50
+ internals.dispatcher.activeBackendId = () => null;
51
+ internals.currentModelPath = () => "fake-model";
52
+ internals.dispatcher.generate = async (args: GenerateArgs) => {
53
+ // Simulate the backend firing the per-token callback.
54
+ for (const token of REPLY_TOKENS) {
55
+ await args.onTextChunk?.(token);
56
+ }
57
+ return REPLY_TOKENS.join("");
58
+ };
59
+
60
+ const handle = engine.openConversation({
61
+ conversationId: "conv-stream-test",
62
+ modelId: "fake-model",
63
+ });
64
+
65
+ const result = await engine.generateInConversation(handle, {
66
+ prompt: "say hi",
67
+ grammar: FORCED_GRAMMAR,
68
+ streamStructured: true,
69
+ onTextChunk: (chunk) => {
70
+ seenChunks.push(chunk);
71
+ },
72
+ });
73
+
74
+ expect(seenChunks).toEqual(REPLY_TOKENS);
75
+ expect(seenChunks.length).toBeGreaterThan(1);
76
+ expect(result.text).toBe(REPLY_TOKENS.join(""));
77
+
78
+ await engine.closeConversation(handle);
79
+ });
80
+ });
@@ -0,0 +1,540 @@
1
+ /**
2
+ * Standalone llama.cpp engine.
3
+ *
4
+ * Fronts the in-process FFI backend (fused `libelizainference`, or the
5
+ * libllama + eliza-llama-shim fallback) via the `BackendDispatcher`. At most
6
+ * one model is loaded at a time — model swap is unload-then-load so we never
7
+ * double-allocate VRAM.
8
+ *
9
+ * Two consumption paths:
10
+ * 1. The Model Hub UI calls `load()` / `unload()` to make "Activate" work.
11
+ * 2. The agent runtime calls `generate()` via the registered
12
+ * `ModelType.TEXT_SMALL` / `TEXT_LARGE` handlers (see
13
+ * `ensure-local-inference-handler.ts`).
14
+ */
15
+ import type { LocalInferenceLoadArgs } from "./active-model";
16
+ import type { GenerateArgs as BackendGenerateArgs, LocalRuntimeLoadConfig } from "./backend";
17
+ import { type ConversationHandle } from "./conversation-registry";
18
+ import { MemoryMonitor } from "./memory-monitor";
19
+ import type { CoordinatorRuntime } from "./voice/cancellation-coordinator";
20
+ import { EngineVoiceBridge, type EngineVoiceBridgeOptions } from "./voice/engine-bridge";
21
+ import type { AsrWordTiming } from "./voice/ffi-bindings";
22
+ import type { VoicePipelineEvents } from "./voice/pipeline";
23
+ import { type MtpTextRunner } from "./voice/pipeline-impls";
24
+ import { SharedResourceRegistry } from "./voice/shared-resources";
25
+ import type { TextToken, TranscriptionAudio, VerifierStreamEvent } from "./voice/types";
26
+ export interface LocalUsageBlock {
27
+ [key: string]: unknown;
28
+ input_tokens: number;
29
+ output_tokens: number;
30
+ cache_creation_input_tokens: number;
31
+ cache_read_input_tokens: number;
32
+ mtp_drafted_tokens?: number;
33
+ mtp_accepted_tokens?: number;
34
+ mtp_acceptance_rate?: number;
35
+ cache_hit_rate?: number;
36
+ }
37
+ export declare function resolveIdleUnloadMs(): number;
38
+ /**
39
+ * Cap on how many speculative voice responses the turn-controller (W9) may
40
+ * have in flight at once — derived from the running server's slot count
41
+ * (each speculative response needs a slot's KV) but never more than half of
42
+ * them (the other half stays available for confirmed turns + tool calls).
43
+ * Floors at 1. Override via `ELIZA_LOCAL_MAX_SPECULATIVE_RESPONSES`.
44
+ */
45
+ export declare function resolveMaxConcurrentSpeculativeResponses(parallelSlots: number): number;
46
+ export type GenerateArgs = BackendGenerateArgs;
47
+ /**
48
+ * Public engine facade.
49
+ *
50
+ * Pre-existing API: `load(modelPath)`, `unload()`, `generate(args)`,
51
+ * plus the activity probes used by router/handler/active-model code. The
52
+ * implementation now sits behind the backend dispatcher; the
53
+ * shape is preserved so callers (active-model, router-handler, the agent
54
+ * runtime handler) keep working unchanged.
55
+ *
56
+ * MTP now lives in the normal optimized llama.cpp backend path. The
57
+ * dispatcher's decision tree picks `llama-cpp` when a kernel is required
58
+ * or when the catalog prefers optimized llama.cpp.
59
+ */
60
+ export declare class LocalInferenceEngine {
61
+ /**
62
+ * In-process FFI backend — the sole text runtime, served by the FUSED
63
+ * `libelizainference` (`desktop-fused-ffi-backend-runtime.ts`). Text gen,
64
+ * same-file MTP speculative decoding, KV-cache quant, native tokenization,
65
+ * and vision-describe all run through the one fused lib the voice subsystem
66
+ * already loads (ABI v9). libllama has been retired: a fused lib that is
67
+ * absent or lacks the v9 capabilities is a loud `LocalInferenceUnavailable`
68
+ * error, never a silent fallback. There is no server fallback for Eliza-1.
69
+ */
70
+ private readonly ffiBackend;
71
+ private readonly dispatcher;
72
+ /**
73
+ * Active voice-streaming bridge (`EngineVoiceBridge`). Only set when an
74
+ * Eliza-1 bundle has been activated AND `startVoice()` has succeeded —
75
+ * see `packages/inference/AGENTS.md` §3 + §4. The engine never lazily
76
+ * stands up a voice session: callers either start it explicitly or get
77
+ * a hard error.
78
+ */
79
+ private voiceBridge;
80
+ private voiceReadyPromise;
81
+ /**
82
+ * The general onload/offload coordinator (W10 / J5). One registry per
83
+ * engine: text + voice both ref-count their shared resources against it,
84
+ * and every resident model role registers an `EvictableModelRole` here so
85
+ * the `MemoryMonitor` can walk them ascending-priority under RAM pressure.
86
+ * The voice bridge gets this passed in (see `startVoice`) so it doesn't
87
+ * spin up a private one.
88
+ */
89
+ private readonly sharedResources;
90
+ /**
91
+ * RAM-pressure monitor (J2). Started when a model loads, stopped when the
92
+ * engine unloads. Evicts the lowest-priority resident role when free RAM
93
+ * crosses the low-water line.
94
+ */
95
+ private readonly memoryMonitor;
96
+ /** Wall-clock ms of the last `useModel`-style activity. */
97
+ private lastActivityMs;
98
+ /** Idle-unload timer (J3); null when disabled or no model loaded. */
99
+ private idleUnloadTimer;
100
+ /** Evictable text-target role id registered on `sharedResources`, or null. */
101
+ private textTargetRoleId;
102
+ /** Evictable drafter role id registered on `sharedResources`, or null. */
103
+ /**
104
+ * The active Eliza-1 bundle (root dir + tier id), resolved at `load()`
105
+ * from the InstalledModel path/id. `null` when the loaded model is not an
106
+ * Eliza-1 bundle (a user-installed custom). Drives bundle-relative voice
107
+ * resolution — the Kokoro TTS root and the per-tier EOT turn-detector
108
+ * revision.
109
+ */
110
+ private activeEliza1Bundle;
111
+ /**
112
+ * The general onload/offload coordinator for this engine. Exposed so the
113
+ * voice lifecycle, the embedding route, and any other resident model role
114
+ * can register an `EvictableModelRole` against the same registry the
115
+ * `MemoryMonitor` walks under pressure.
116
+ */
117
+ getSharedResources(): SharedResourceRegistry;
118
+ /** The RAM-pressure monitor. Exposed for diagnostics / tests. */
119
+ getMemoryMonitor(): MemoryMonitor;
120
+ /** Record `useModel`-style activity so the idle-unload timer stays armed. */
121
+ private markActivity;
122
+ /**
123
+ * Once a model is resident: register the text target as an evictable role,
124
+ * start the memory monitor, and arm the idle-unload timer. Idempotent.
125
+ */
126
+ private startBackgroundManagement;
127
+ /** Stop the memory monitor + idle timer and deregister evictable roles. */
128
+ private stopBackgroundManagement;
129
+ private registerResidentRoles;
130
+ private deregisterResidentRoles;
131
+ private armIdleUnloadTimer;
132
+ /**
133
+ * Cap on concurrent speculative voice responses (W9 / J4): derived from
134
+ * the running server's slot count (each speculative response needs a KV
135
+ * slot), never more than half of them, floored at 1. The voice
136
+ * turn-controller reads this before kicking a speculative response.
137
+ */
138
+ maxConcurrentSpeculativeResponses(): number;
139
+ /**
140
+ * Auto-tune the running server's `--parallel` (J4): when the conversation
141
+ * high-water mark has outgrown the configured slot count AND there's RAM
142
+ * headroom for the extra KV slots, resize/restart llama.cpp with the larger
143
+ * value so new conversations get their own slot instead of thrashing.
144
+ * Returns `true` when a resize was performed. No-op when the FFI backend
145
+ * isn't loaded. Best-effort: a failed restart leaves the old `--parallel`
146
+ * in place and logs.
147
+ */
148
+ maybeAutoResizeParallel(): Promise<boolean>;
149
+ available(): Promise<boolean>;
150
+ currentModelPath(): string | null;
151
+ hasLoadedModel(): boolean;
152
+ activeBackendId(): "capacitor-llama" | "llama-cpp" | null;
153
+ currentRuntimeLoadConfig(): LocalRuntimeLoadConfig | null;
154
+ unload(): Promise<void>;
155
+ load(modelPath: string, resolved?: LocalInferenceLoadArgs): Promise<void>;
156
+ generate(args: GenerateArgs): Promise<string>;
157
+ /**
158
+ * Vision describe via the running llama.cpp mtmd path. Requires the FFI
159
+ * backend with an mmproj-loaded bundle. The mmproj GGUF must have been
160
+ * declared by the active catalog tier and present on disk under the
161
+ * bundle root; if not, the active backend throws.
162
+ *
163
+ * No fallback: Florence-2 / Transformers.js was the previous fallback
164
+ * and has been removed (see VISION_MIGRATION.md).
165
+ */
166
+ describeImage(args: {
167
+ bytes: Uint8Array;
168
+ mimeType?: string;
169
+ prompt?: string;
170
+ maxTokens?: number;
171
+ temperature?: number;
172
+ signal?: AbortSignal;
173
+ }): Promise<{
174
+ text: string;
175
+ projectorMs?: number;
176
+ decodeMs?: number;
177
+ }>;
178
+ /** True when the active server can serve vision describe (mmproj loaded). */
179
+ canDescribeImages(): boolean;
180
+ /**
181
+ * Diagnostic snapshot of an in-process JS session pool. Always null on the
182
+ * FFI runtime — its KV slots live in the native backend (C), not in a JS
183
+ * session pool. Retained so the API cache-stats panel keeps a stable shape.
184
+ */
185
+ describeSessionPool(): {
186
+ size: number;
187
+ maxSize: number;
188
+ keys: string[];
189
+ } | null;
190
+ /**
191
+ * Reserve a slot for a long-lived conversation. Subsequent
192
+ * `generateInConversation` calls reuse the same slot, so the prefix
193
+ * KV survives across turns regardless of hash collisions with other
194
+ * concurrent conversations.
195
+ *
196
+ * Idempotent for the same (conversationId, modelId): repeated open
197
+ * calls return the same handle. The runtime side should call this
198
+ * lazily on the first turn of a conversation and `closeConversation`
199
+ * when the chat session ends.
200
+ */
201
+ openConversation(args: {
202
+ conversationId: string;
203
+ modelId: string;
204
+ ttlMs?: number;
205
+ }): ConversationHandle;
206
+ /**
207
+ * Run one generation pinned to a previously-opened conversation
208
+ * handle. Cache key, slot id, and (for optimized llama.cpp) kv-restore are
209
+ * all owned by the registry — callers don't need to thread them.
210
+ *
211
+ * Returns the Anthropic-shape `LocalUsageBlock` alongside the text so
212
+ * agentic callers can surface cache-hit telemetry without re-scraping
213
+ * `/metrics` themselves.
214
+ */
215
+ generateInConversation(handle: ConversationHandle, args: Omit<GenerateArgs, "cacheKey">): Promise<{
216
+ text: string;
217
+ usage: LocalUsageBlock;
218
+ slotId: number;
219
+ }>;
220
+ /**
221
+ * KV-prefill a conversation's pinned slot with a known prompt prefix
222
+ * (system prompt + provider context + tool/action schema block + the
223
+ * assistant-turn start), before the real request lands. This is item I1 /
224
+ * C1 of the voice swarm — fire it the moment a message arrives / STT
225
+ * starts so the response-handler prompt is already in the slot's KV when
226
+ * the user's tokens are appended.
227
+ *
228
+ * `conversationOrId` may be a `ConversationHandle` (preferred — pins to
229
+ * the handle's slot) or a raw conversation id (a handle is opened on the
230
+ * fly so the slot derivation matches the real request). Idempotent /
231
+ * cheap to call repeatedly: `cache_prompt: true` reuses the prefix so a
232
+ * second call is a no-op forward pass. Only meaningful once the FFI
233
+ * backend is loaded — returns false otherwise. Returns true when a
234
+ * pre-warm request was issued.
235
+ */
236
+ prewarmConversation(conversationOrId: ConversationHandle | string, promptPrefix: string, opts?: {
237
+ modelId?: string;
238
+ }): Promise<boolean>;
239
+ /**
240
+ * Close + drop a conversation handle. Persists the final KV state to
241
+ * disk so a later open with the same id can lazy-restore. Idempotent;
242
+ * closing an unknown id is a no-op.
243
+ */
244
+ closeConversation(handle: ConversationHandle): Promise<void>;
245
+ /**
246
+ * Read-side accessor for the conversation registry. The runtime handler
247
+ * uses this to look up an existing handle before opening a new one,
248
+ * avoiding the need to thread a handle through every layer.
249
+ */
250
+ conversation(conversationId: string, modelId: string): ConversationHandle | null;
251
+ /**
252
+ * Largest concurrent open-conversation count seen this process lifetime.
253
+ * The auto-tune-parallel path consults this and warns when it exceeds
254
+ * the running server's slot count.
255
+ */
256
+ conversationHighWaterMark(): number;
257
+ /**
258
+ * Recommended `--parallel` value given the current conversation
259
+ * high-water mark plus a small headroom (max(2, 25%)), never below the
260
+ * running slot count. Delegates to `ConversationRegistry.recommendedParallel`
261
+ * so the math lives in one place. When this exceeds `parallelSlots()` the
262
+ * engine can grow the running server (`maybeAutoResizeParallel`).
263
+ */
264
+ recommendedParallel(): number;
265
+ /**
266
+ * Emit a one-line warning when the running `--parallel` slot count is
267
+ * below the recommended value (high-water mark + headroom). Returns true
268
+ * when a warning was emitted. No-op when the FFI backend isn't loaded.
269
+ * The actual resize is `maybeAutoResizeParallel()`
270
+ * — kept separate from this hot-path check so a `useModel` call never
271
+ * blocks on (or is interrupted by) a server restart; the auto-resize is
272
+ * opted into via `ELIZA_LOCAL_AUTO_RESIZE_PARALLEL=1`, in which case this
273
+ * also kicks one off fire-and-forget.
274
+ */
275
+ warnIfParallelTooLow(logger?: {
276
+ warn: (msg: string) => void;
277
+ }): boolean;
278
+ /**
279
+ * Start the voice-streaming pipeline against an already-activated
280
+ * Eliza-1 bundle. Per AGENTS.md §3, voice is mandatory for Eliza-1
281
+ * tiers — every required artifact (speaker preset, fused FFI when
282
+ * `useFfiBackend`, bundle root) is checked up front and missing
283
+ * pieces surface as `VoiceStartupError`. There is no silent fallback
284
+ * to text-only, no log-and-continue.
285
+ *
286
+ * Idempotent guard: starting twice without `stopVoice()` between
287
+ * surfaces a hard error so callers do not double-allocate the
288
+ * scheduler.
289
+ */
290
+ startVoice(opts: EngineVoiceBridgeOptions): EngineVoiceBridge;
291
+ /**
292
+ * True when a voice session is currently active on the engine. Callers
293
+ * use this to decide whether to lazy-start one (e.g. the TTS model
294
+ * handler in `ensure-local-inference-handler.ts`, which auto-starts a
295
+ * Kokoro-only bridge on the first TEXT_TO_SPEECH invocation when the
296
+ * Kokoro artifacts are on disk and no Eliza-1 bundle has activated).
297
+ */
298
+ hasActiveVoiceBridge(): boolean;
299
+ /**
300
+ * Arm the voice lifecycle on the active bridge — lazily loads the TTS
301
+ * mmap region, optional ASR region when present, voice caches, and
302
+ * voice scheduler nodes via the shared resource registry. Throws
303
+ * `VoiceLifecycleError` if any
304
+ * required artifact is unavailable (RAM pressure, mmap fail, kernel
305
+ * missing) — see `voice/lifecycle.ts` for the structured codes.
306
+ *
307
+ * Required before sustained voice use; `startVoice()` only stands up
308
+ * the cold scheduler and bridge. Splitting setup from arming lets
309
+ * the engine keep the voice surface in voice-off (no heavy weights
310
+ * mapped) until the user actually toggles voice on.
311
+ */
312
+ armVoice(): Promise<void>;
313
+ /**
314
+ * Lazily start + arm voice for the active Eliza-1 bundle. Runtime model
315
+ * handlers use this when visible chat text needs local speech output; direct
316
+ * engine callers still use `startVoice()` / `armVoice()` explicitly when they
317
+ * need custom sinks or test backends.
318
+ */
319
+ ensureActiveBundleVoiceReady(): Promise<EngineVoiceBridge>;
320
+ private ensureActiveBundleVoiceReadyOnce;
321
+ /**
322
+ * Assemble + run the full live voice loop on top of `startVoice()` /
323
+ * `armVoice()`: mic → (`pipeMicToRingBuffer` + `VadDetector.pushFrame`)
324
+ * per frame → `StreamingTranscriber.feed` (VAD-gated) → `VoiceTurnController`
325
+ * (speculative-on-pause, abort-on-resume, finalize/promote, barge-in) →
326
+ * `VoiceScheduler` → TTS → audio sink.
327
+ *
328
+ * Gated behind a complete real backend chain (AGENTS.md §3 — no silent
329
+ * backend-mode "voice"):
330
+ * - a `MicSource` (caller-supplied, or `DesktopMicSource` under Electrobun),
331
+ * - a Silero v5 GGML VAD (caller-supplied detector, or `createSileroVadDetector()` — runs through libelizainference's native VAD ABI),
332
+ * - a working ASR (the bridge's `createStreamingTranscriber` throws
333
+ * `AsrUnavailableError` when the fused decoder is unavailable — the
334
+ * fused build is the sole on-device ASR runtime),
335
+ * - a real OmniVoice TTS backend on the bridge (the `StubOmniVoiceBackend`
336
+ * is rejected — it emits zeros).
337
+ * Any missing piece fails loudly with the specific component named.
338
+ *
339
+ * `prewarm` defaults to `this.prewarmConversation(roomId, "")` (best-effort
340
+ * KV-prefill); a caller with the response-handler stable prefix (W6) should
341
+ * pass its own. `generate` is required — it builds the message and runs the
342
+ * runtime turn (streaming `replyText` into TTS via this engine's
343
+ * `generate({ onTextChunk })`, which routes through the voice scheduler).
344
+ */
345
+ startVoiceSession(opts: {
346
+ roomId: string;
347
+ /** Mic source. Defaults to a `DesktopMicSource` (Electrobun). */
348
+ micSource?: import("./voice/types").MicSource;
349
+ /** VAD detector. Defaults to `createSileroVadDetector()`. */
350
+ vad?: import("./voice/vad").VadDetector;
351
+ /** Run one turn: build the message + stream `replyText` into TTS. Required. */
352
+ generate: (request: import("./voice/turn-controller").VoiceGenerateRequest) => Promise<import("./voice/turn-controller").VoiceTurnOutcome>;
353
+ /**
354
+ * Semantic turn detector layered with VAD/STT. Defaults to the local
355
+ * LiveKit ONNX model when installed, otherwise the deterministic heuristic.
356
+ * Pass `false` only for tests/manual troubleshooting.
357
+ */
358
+ turnDetector?: import("./voice/eot-classifier").EotClassifier | false;
359
+ /** Optional local LiveKit turn-detector directory override. */
360
+ turnDetectorModelDir?: string;
361
+ /**
362
+ * Use the already-loaded eliza-1 text model as the EOT classifier — see
363
+ * `voice/eliza1-eot-scorer.ts`. When set, the runtime skips the
364
+ * separate LiveKit/Turnsense ONNX and reads P(`<|im_end|>`) directly
365
+ * off the live model.
366
+ *
367
+ * `"auto"` (default): use eliza-1 EOT when `ELIZA_VOICE_EOT_BACKEND=eliza-1`
368
+ * or when no bundled LiveKit ONNX is resolvable; otherwise fall
369
+ * through to the existing LiveKit path. `true` forces eliza-1 EOT
370
+ * (throws if the active backend is not in-process). `false` forces
371
+ * the historical LiveKit path.
372
+ */
373
+ useEliza1Eot?: boolean | "auto";
374
+ /**
375
+ * Optional path to a fine-tuned EOT LoRA adapter to layer on top of
376
+ * the drafter at scoring time. The training recipe lives in
377
+ * `packages/training/scripts/turn_detector/`.
378
+ */
379
+ eliza1EotLoraPath?: string;
380
+ /** KV-prefill / response-handler-prefix prewarm. Defaults to `prewarmConversation`. */
381
+ prewarm?: (roomId: string) => void | Promise<void>;
382
+ speculatePauseMs?: number;
383
+ events?: import("./voice/turn-controller").VoiceTurnControllerEvents;
384
+ /**
385
+ * Opt-in openWakeWord hotword gate (local mode only — the
386
+ * local-inference engine never runs in cloud mode, and the connector
387
+ * UI hides this surface there per AGENTS.md §5 hide-not-disable).
388
+ * Disabled by default: voice mode works push-to-talk / VAD-gated
389
+ * without it. When `enabled` and the bundle ships the openWakeWord
390
+ * graphs, mic frames are also fanned into an `OpenWakeWordDetector`;
391
+ * each fresh detection prewarms the conversation and calls `onWake`
392
+ * (the same place a push-to-talk press would arm a listening window).
393
+ * Silently inert when the bundle has no wake-word model.
394
+ */
395
+ wakeWord?: {
396
+ enabled: boolean;
397
+ /** Wake phrase head name (defaults to the bundle's `hey-eliza`). */
398
+ head?: string;
399
+ /** P(wake) firing threshold (openWakeWord default ~0.5). */
400
+ threshold?: number;
401
+ /** Called once per detected utterance (refractory-debounced). */
402
+ onWake?: () => void;
403
+ };
404
+ /**
405
+ * Runtime reference for cancellation coordination (W3-9 F1).
406
+ *
407
+ * @deprecated G5.d: pass `runtime` to `startVoice()` (the
408
+ * `EngineVoiceBridgeOptions`) instead. The bridge is the canonical
409
+ * owner of `VoiceCancellationCoordinator` + `OptimisticGenerationPolicy`,
410
+ * and `startVoiceSession()` now delegates to the bridge's coordinator.
411
+ * When this field is supplied here without a matching bridge-level
412
+ * runtime, `startVoiceSession()` logs once and ignores it — the
413
+ * canonical wiring lives on the bridge.
414
+ */
415
+ runtime?: CoordinatorRuntime;
416
+ }): Promise<import("./voice/turn-controller").VoiceTurnController>;
417
+ /**
418
+ * Disarm the voice lifecycle — drains the ring buffer, settles the
419
+ * scheduler, and drops TTS/ASR weights from RAM via `evictPages()`
420
+ * (madvise / VirtualUnlock equivalent — see voice/engine-bridge.ts).
421
+ * No-op when not armed.
422
+ */
423
+ disarmVoice(): Promise<void>;
424
+ /**
425
+ * Tear down the active voice bridge. Idempotent; calling when no
426
+ * voice session is active is a no-op. Disarms the lifecycle first
427
+ * (drops voice weights via `evictPages`), then settles any in-flight
428
+ * TTS so audio committed to the ring buffer surfaces to the sink
429
+ * before the bridge is dropped.
430
+ */
431
+ stopVoice(): Promise<void>;
432
+ synthesizeSpeech(text: string, signal?: AbortSignal): Promise<Uint8Array>;
433
+ prewarmVoicePhrases(texts: ReadonlyArray<string>, opts?: {
434
+ concurrency?: number;
435
+ }): Promise<{
436
+ warmed: number;
437
+ cached: number;
438
+ }>;
439
+ /**
440
+ * Idle-time auto-prewarm: synthesize the canonical common-phrase seed so
441
+ * the phrase cache is warm before the next turn. No-op unless a real TTS
442
+ * backend is present and voice is armed. Callers (the voice bridge /
443
+ * connector) invoke this when the loop is idle.
444
+ */
445
+ prewarmIdleVoicePhrases(opts?: {
446
+ concurrency?: number;
447
+ }): Promise<{
448
+ warmed: number;
449
+ cached: number;
450
+ }>;
451
+ /**
452
+ * Play the first-audio filler (a short cached acknowledgement) — the seam
453
+ * W9's turn controller calls the instant VAD fires `speech-start` to mask
454
+ * first-token latency. Returns the played filler text, or `null` if none
455
+ * was played. No-op without a real TTS backend / armed voice.
456
+ */
457
+ playFirstAudioFiller(): string | null;
458
+ transcribePcm(args: TranscriptionAudio, signal?: AbortSignal): Promise<string>;
459
+ /** Transcribe + per-word timings (fused ASR v12) through the voice bridge. */
460
+ transcribePcmTimed(args: TranscriptionAudio, signal?: AbortSignal): Promise<{
461
+ text: string;
462
+ words: AsrWordTiming[];
463
+ }>;
464
+ /**
465
+ * Run one fused mic→speech voice turn through the overlapped
466
+ * `VoicePipeline`: ASR → {MTP drafts ∥ target verifies} → phrase
467
+ * chunker → OmniVoice → PCM ring buffer, with rollback-on-reject and
468
+ * barge-in cancel. Requires `startVoice()` + `armVoice()` first.
469
+ *
470
+ * `opts.textRunner` lets a host that runs its own text engine in-process
471
+ * (the iOS/Android FFI path or the desktop FFI runtime) supply its own
472
+ * {@link MtpTextRunner}. When omitted, the active local dispatcher is
473
+ * used.
474
+ *
475
+ * Resolves with the turn's exit reason (`done` / `token-cap` /
476
+ * `cancelled`). A missing ASR region in voice mode surfaces as a
477
+ * `VoiceStartupError` — no silent cloud fallback (AGENTS.md §3).
478
+ */
479
+ runVoiceTurn(audio: TranscriptionAudio, opts?: {
480
+ maxDraftTokens?: number;
481
+ maxGeneratedTokens?: number;
482
+ events?: VoicePipelineEvents;
483
+ /**
484
+ * In-process text runner for the mobile FFI path. Must implement the
485
+ * same `MtpTextRunner` contract (`hasDrafter()` +
486
+ * `generateWithVerifierEvents()`); the AOSP/Capacitor bridge wraps
487
+ * its libllama-context-backed speculative loop in one.
488
+ */
489
+ textRunner?: MtpTextRunner;
490
+ }): Promise<"done" | "token-cap" | "cancelled">;
491
+ /**
492
+ * Active voice bridge, or null when voice mode is not running.
493
+ * Callers (router, UI, agent runtime) read this to decide whether to
494
+ * forward verifier events. Voice is mandatory for Eliza-1 tiers but
495
+ * the bridge is still created lazily — `startVoice()` MUST be called
496
+ * before `voice()` returns non-null.
497
+ */
498
+ voice(): EngineVoiceBridge | null;
499
+ private requireVoiceBridge;
500
+ private voiceStreamingArgs;
501
+ /**
502
+ * Forward a verifier-stream event into the voice scheduler. Accepted tokens flow into the
503
+ * phrase chunker; rejected ranges trigger the rollback queue. No-op
504
+ * when voice is not active so callers can fan out events
505
+ * unconditionally.
506
+ *
507
+ * When MTP produces an accepted text token, the phrase chunker MUST hand
508
+ * the chunk to TTS within the same scheduler tick.
509
+ */
510
+ pushVerifierEvent(event: VerifierStreamEvent): Promise<void>;
511
+ /**
512
+ * Mic VAD → barge-in. Per AGENTS.md §4, the PCM ring buffer MUST
513
+ * drain immediately and any in-flight TTS forward pass MUST be
514
+ * cancelled at the next kernel boundary. The scheduler enforces both
515
+ * — this is a thin pass-through.
516
+ */
517
+ triggerBargeIn(): void;
518
+ /**
519
+ * Test surface: fan an accepted-token list into the bridge in one
520
+ * call. Production callers should prefer `pushVerifierEvent` so the
521
+ * accept/reject discriminator stays explicit; this exists so the
522
+ * voice integration test can drive the scheduler without
523
+ * reconstructing `VerifierStreamEvent` boilerplate.
524
+ */
525
+ pushAcceptedTokens(tokens: ReadonlyArray<TextToken>): Promise<void>;
526
+ /**
527
+ * Active llama.cpp parallel slot count from the running FFI backend, or
528
+ * the configured default pool size when no model is loaded yet.
529
+ */
530
+ private activeParallel;
531
+ /**
532
+ * The in-process `Eliza1EotClassifier` required a node-bound `LlamaModel`
533
+ * forward pass, which the FFI runtime does not expose. Always null now —
534
+ * callers fall through to the GGUF (FFI) turn-detector and then the
535
+ * heuristic chain.
536
+ */
537
+ private tryBuildEliza1EotClassifier;
538
+ }
539
+ export declare const localInferenceEngine: LocalInferenceEngine;
540
+ //# sourceMappingURL=engine.d.ts.map