@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,712 @@
1
+ /**
2
+ * Structured-output / forced-span / prefill plumbing for the local-inference
3
+ * engine path.
4
+ *
5
+ * The canonical contract lives in `@elizaos/core` `GenerateTextParams`
6
+ * (`prefill`, `responseSkeleton`, `grammar`, `streamStructured`) and is
7
+ * threaded through `useModel` → router. This module is the
8
+ * local-inference-layer mirror of the relevant subset plus the GBNF
9
+ * compilation that turns a `ResponseSkeleton` into a *lazy* grammar so the
10
+ * model only ever samples the free positions of the response envelope
11
+ * (single-value enums collapse to literals — no tokens spent on the scaffold).
12
+ *
13
+ * Nothing here is local-model-specific in shape; cloud adapters never read
14
+ * these fields. There is no fallback path — adapters that can't honour
15
+ * `grammar` / `prefill` / `responseSkeleton` ignore them, full stop.
16
+ */
17
+
18
+ import type {
19
+ JSONSchema,
20
+ ResponseSkeleton,
21
+ ResponseSkeletonSpan,
22
+ SpanSamplerPlan,
23
+ } from "@elizaos/core";
24
+
25
+ export {
26
+ repairStructuredOutput,
27
+ type StructuredOutputRepairOptions,
28
+ type StructuredOutputRepairResult,
29
+ type StructuredOutputRepairStatus,
30
+ StructuredOutputRepairStream,
31
+ } from "./structured-output/deterministic-repair";
32
+ export type { ResponseSkeleton, ResponseSkeletonSpan, SpanSamplerPlan };
33
+
34
+ /**
35
+ * GBNF grammar fragment ready for a llama-server request body. `lazy` grammars
36
+ * only kick in once a trigger word/sequence appears in the stream
37
+ * (llama.cpp's `grammar_lazy` + `grammar_triggers`) — that lets the model
38
+ * free-run the prose `replyText` and only constrain the structured scaffold
39
+ * once the envelope boundary is reached.
40
+ */
41
+ export interface GbnfGrammar {
42
+ /** GBNF source. */
43
+ source: string;
44
+ /** When true, the server applies the grammar lazily (`grammar_lazy: true`). */
45
+ lazy?: boolean;
46
+ /** Trigger words that activate a lazy grammar (`grammar_triggers`). */
47
+ triggers?: ReadonlyArray<string>;
48
+ }
49
+
50
+ /**
51
+ * Local-inference mirror of the structured-output extensions on
52
+ * `GenerateTextParams`. Threaded `useModel` → router → local handler →
53
+ * engine → FFI runtime.
54
+ */
55
+ export interface StructuredGenerateParams {
56
+ /**
57
+ * Assistant-turn prefill — a partial assistant message the model should
58
+ * *continue* rather than start fresh. On llama-server this is sent as a
59
+ * trailing assistant message with `continue_final_message` / the
60
+ * `assistant` chat-template prefix; the capacitor-llama path seeds the
61
+ * prompt text and re-prepends the prefill to the result.
62
+ */
63
+ prefill?: string;
64
+ /**
65
+ * Forced response skeleton. When set the engine compiles it to a lazy GBNF
66
+ * (single-value enums → literals) so the model only samples the free
67
+ * positions of the envelope.
68
+ */
69
+ responseSkeleton?: ResponseSkeleton;
70
+ /** Optional whole-response JSON schema from `GenerateTextParams`. */
71
+ responseSchema?: JSONSchema;
72
+ /**
73
+ * Explicit GBNF grammar string. When both `grammar` and `responseSkeleton`
74
+ * are present, the explicit `grammar` wins.
75
+ */
76
+ grammar?: string;
77
+ /**
78
+ * When true, the engine streams per-token chunks back via `onTextChunk`
79
+ * (and structured-field events) instead of returning the whole string in
80
+ * one shot.
81
+ */
82
+ streamStructured?: boolean;
83
+ /**
84
+ * The eliza harness schema for this call — the compact descriptor bundling
85
+ * the response skeleton, a pre-built grammar (optional), the derived
86
+ * deterministic-token {@link ElizaPrefillPlan}, and the short/long name maps.
87
+ * When present, guided structured decode is *on* for this call: the engine
88
+ * sends the grammar AND the prefill plan, and seeds the leading literal run
89
+ * as an assistant-turn prefill. Absent → guided decode is off (the engine
90
+ * may still honour a bare `grammar` / `responseSkeleton`, but never emits a
91
+ * prefill plan). This is the off-by-default switch for the deterministic
92
+ * short-circuit.
93
+ */
94
+ elizaSchema?: ElizaHarnessSchema;
95
+ /**
96
+ * Per-span sampler overrides for the {@link responseSkeleton}. When set,
97
+ * the engine emits `eliza_span_samplers` on the llama-server request body so
98
+ * the fork-side server swaps to argmax (`llama_sampler_init_greedy()`) at
99
+ * the indicated enum / number / boolean positions. Stock llama-server
100
+ * ignores the field — the grammar still constrains the same tokens, we
101
+ * just lose the argmax determinism guarantee on that path.
102
+ *
103
+ * Producer: `@elizaos/core` `buildSpanSamplerPlan(skeleton)`.
104
+ */
105
+ spanSamplerPlan?: SpanSamplerPlan;
106
+ /**
107
+ * Per-request chat-template thinking control for reasoning-capable local
108
+ * models. `off` maps to `chat_template_kwargs.enable_thinking=false` for
109
+ * response-handler/direct-reply calls that must emit user-visible text, while
110
+ * planner/action calls can omit this and use the catalog/server default.
111
+ */
112
+ thinking?: "auto" | "on" | "off";
113
+ }
114
+
115
+ /** True when `kind` is a span the model actually samples. */
116
+ function isFreeSpan(span: ResponseSkeletonSpan): boolean {
117
+ return (
118
+ span.kind === "free-string" ||
119
+ span.kind === "free-json" ||
120
+ span.kind === "number" ||
121
+ span.kind === "boolean" ||
122
+ (span.kind === "enum" &&
123
+ Array.isArray(span.enumValues) &&
124
+ span.enumValues.length > 1)
125
+ );
126
+ }
127
+
128
+ /**
129
+ * Escape a string for use inside a GBNF double-quoted literal (C-style escapes).
130
+ */
131
+ function gbnfEscapeLiteral(text: string): string {
132
+ let out = "";
133
+ for (const ch of text) {
134
+ const code = ch.codePointAt(0) ?? 0;
135
+ if (ch === "\\") out += "\\\\";
136
+ else if (ch === '"') out += '\\"';
137
+ else if (ch === "\n") out += "\\n";
138
+ else if (ch === "\r") out += "\\r";
139
+ else if (ch === "\t") out += "\\t";
140
+ else if (code < 0x20) out += `\\x${code.toString(16).padStart(2, "0")}`;
141
+ else out += ch;
142
+ }
143
+ return out;
144
+ }
145
+
146
+ /**
147
+ * Collapse a skeleton: `enum` spans with exactly one value (or zero values)
148
+ * become `literal` spans (C4). Adjacent literals stay separate spans — the
149
+ * compiler merges them in the root rule.
150
+ */
151
+ export function collapseSkeleton(skeleton: ResponseSkeleton): ResponseSkeleton {
152
+ const out: ResponseSkeletonSpan[] = [];
153
+ for (const span of skeleton.spans) {
154
+ if (
155
+ span.kind === "enum" &&
156
+ Array.isArray(span.enumValues) &&
157
+ span.enumValues.length <= 1
158
+ ) {
159
+ const value = span.enumValues[0] ?? span.value ?? "";
160
+ out.push({ kind: "literal", key: span.key, value });
161
+ continue;
162
+ }
163
+ out.push(span);
164
+ }
165
+ return { spans: out, id: skeleton.id };
166
+ }
167
+
168
+ /**
169
+ * GBNF rule body for a quoted JSON string value.
170
+ */
171
+ const GBNF_JSON_STRING = '"\\"" ( [^"\\\\] | "\\\\" . )* "\\""';
172
+ /**
173
+ * GBNF rule body for a JSON value (object/array/string/number/bool/null) —
174
+ * the canonical recursive `json-value` grammar, inlined so a `free-json` span
175
+ * is self-contained without a shared `json` import.
176
+ */
177
+ const GBNF_JSON_VALUE = [
178
+ 'jsonvalue ::= jsonobject | jsonarray | jsonstring | jsonnumber | "true" | "false" | "null"',
179
+ 'jsonobject ::= "{" ws ( jsonstring ws ":" ws jsonvalue ( ws "," ws jsonstring ws ":" ws jsonvalue )* )? ws "}"',
180
+ 'jsonarray ::= "[" ws ( jsonvalue ( ws "," ws jsonvalue )* )? ws "]"',
181
+ `jsonstring ::= ${GBNF_JSON_STRING}`,
182
+ 'jsonnumber ::= "-"? ( [0-9] | [1-9] [0-9]* ) ( "." [0-9]+ )? ( [eE] [-+]? [0-9]+ )?',
183
+ "ws ::= [ \\t\\n\\r]*",
184
+ ].join("\n");
185
+
186
+ /**
187
+ * Compile a `ResponseSkeleton` to a *lazy* GBNF grammar. The grammar's `root`
188
+ * rule is the concatenation of every span:
189
+ * - `literal` spans → GBNF string literals (the JSON key/glue scaffold),
190
+ * - `enum` spans (≥2 values) → an alternation of quoted-string literals,
191
+ * - `free-string` spans → a quoted JSON string rule,
192
+ * - `free-json` spans → the recursive JSON-value rule.
193
+ *
194
+ * The grammar runs *lazily* when the skeleton opens with a literal (the
195
+ * trigger word) — generation free-runs until that literal is seen, then the
196
+ * grammar pins the rest of the envelope. That keeps the prose prefix
197
+ * unconstrained while forcing the JSON scaffold.
198
+ *
199
+ * Returns `null` when the skeleton has no free spans (nothing for the model to
200
+ * sample — the caller should just emit the literal text and skip generation).
201
+ */
202
+ export function compileSkeletonToGbnf(
203
+ skeletonInput: ResponseSkeleton,
204
+ ): GbnfGrammar | null {
205
+ const skeleton = collapseSkeleton(skeletonInput);
206
+ if (!skeleton.spans.some(isFreeSpan)) return null;
207
+
208
+ const rules = new Map<string, string>();
209
+ const rootParts: string[] = [];
210
+ let freeIdx = 0;
211
+ let needsJsonValue = false;
212
+ let triggerWord: string | null = null;
213
+
214
+ for (let i = 0; i < skeleton.spans.length; i += 1) {
215
+ const span = skeleton.spans[i];
216
+ if (span.kind === "literal") {
217
+ const text = span.value ?? "";
218
+ if (i === 0 && text.length > 0) triggerWord = text;
219
+ rootParts.push(`"${gbnfEscapeLiteral(text)}"`);
220
+ continue;
221
+ }
222
+ if (span.kind === "enum") {
223
+ const values =
224
+ Array.isArray(span.enumValues) && span.enumValues.length > 0
225
+ ? span.enumValues
226
+ : [span.value ?? ""];
227
+ if (values.length === 1) {
228
+ // collapseSkeleton already lowered single-value enums; this is a
229
+ // defensive fallback for a producer that didn't.
230
+ rootParts.push(`"${gbnfEscapeLiteral(`"${values[0]}"`)}"`);
231
+ continue;
232
+ }
233
+ const ruleName = span.rule ?? `enum${freeIdx++}`;
234
+ const alts = values.map((v) => `"${gbnfEscapeLiteral(`"${v}"`)}"`);
235
+ rules.set(ruleName, alts.join(" | "));
236
+ rootParts.push(ruleName);
237
+ continue;
238
+ }
239
+ if (span.kind === "free-string") {
240
+ const ruleName = span.rule ?? `freestr${freeIdx++}`;
241
+ if (!rules.has(ruleName)) rules.set(ruleName, GBNF_JSON_STRING);
242
+ rootParts.push(ruleName);
243
+ continue;
244
+ }
245
+ if (span.kind === "number") {
246
+ // jsonnumber lives inside GBNF_JSON_VALUE; pulling that whole block
247
+ // in is overkill for a leaf number span — emit a local rule.
248
+ const ruleName = span.rule ?? `jsonnum${freeIdx++}`;
249
+ if (!rules.has(ruleName)) {
250
+ rules.set(
251
+ ruleName,
252
+ '"-"? ( [0-9] | [1-9] [0-9]* ) ( "." [0-9]+ )? ( [eE] [-+]? [0-9]+ )?',
253
+ );
254
+ }
255
+ rootParts.push(ruleName);
256
+ continue;
257
+ }
258
+ if (span.kind === "boolean") {
259
+ const ruleName = span.rule ?? `jsonbool${freeIdx++}`;
260
+ if (!rules.has(ruleName)) {
261
+ rules.set(ruleName, '"true" | "false"');
262
+ }
263
+ rootParts.push(ruleName);
264
+ continue;
265
+ }
266
+ // free-json
267
+ const ruleName = span.rule ?? "jsonvalue";
268
+ needsJsonValue = needsJsonValue || ruleName === "jsonvalue";
269
+ if (ruleName !== "jsonvalue" && !rules.has(ruleName)) {
270
+ // A producer-named rule with no inline body falls back to a JSON value.
271
+ rules.set(ruleName, "jsonvalue");
272
+ needsJsonValue = true;
273
+ }
274
+ rootParts.push(ruleName);
275
+ }
276
+
277
+ const lines = [`root ::= ${rootParts.join(" ")}`];
278
+ for (const [name, body] of rules) lines.push(`${name} ::= ${body}`);
279
+ if (needsJsonValue) lines.push(GBNF_JSON_VALUE);
280
+ const source = lines.join("\n");
281
+ if (triggerWord) return { source, lazy: true, triggers: [triggerWord] };
282
+ return { source, lazy: false };
283
+ }
284
+
285
+ /**
286
+ * Resolve the GBNF grammar to apply for a generation call. Precedence: an
287
+ * explicit `grammar` string on the params, then a compiled `responseSkeleton`.
288
+ * Returns null when neither is set.
289
+ */
290
+ export function resolveGrammarForParams(
291
+ params: StructuredGenerateParams | undefined,
292
+ ): GbnfGrammar | null {
293
+ if (!params) return null;
294
+ if (typeof params.grammar === "string" && params.grammar.trim().length > 0) {
295
+ return { source: params.grammar, lazy: false };
296
+ }
297
+ if (params.responseSkeleton) {
298
+ return compileSkeletonToGbnf(params.responseSkeleton);
299
+ }
300
+ return null;
301
+ }
302
+
303
+ function stripPrefilledPrefixFromGrammar(
304
+ grammar: GbnfGrammar,
305
+ prefix: string,
306
+ ): GbnfGrammar | null {
307
+ if (!prefix) return grammar;
308
+ const lines = grammar.source.split("\n");
309
+ const root = lines[0] ?? "";
310
+ const rootPrefix = "root ::= ";
311
+ if (!root.startsWith(rootPrefix)) return null;
312
+
313
+ const escapedPrefix = `"${gbnfEscapeLiteral(prefix)}"`;
314
+ const body = root.slice(rootPrefix.length);
315
+ if (body === escapedPrefix) {
316
+ return {
317
+ source: [`${rootPrefix}""`, ...lines.slice(1)].join("\n"),
318
+ lazy: false,
319
+ };
320
+ }
321
+ if (!body.startsWith(`${escapedPrefix} `)) return null;
322
+
323
+ return {
324
+ source: [
325
+ `${rootPrefix}${body.slice(escapedPrefix.length).trimStart()}`,
326
+ ...lines.slice(1),
327
+ ].join("\n"),
328
+ lazy: false,
329
+ };
330
+ }
331
+
332
+ /**
333
+ * Build the OpenAI-/llama-server-compatible request-body fragment for a
334
+ * grammar. Returns `grammar` + (when lazy) `grammar_lazy` / `grammar_triggers`.
335
+ * Recent llama.cpp accepts these on both `/v1/chat/completions` and
336
+ * `/completion`.
337
+ */
338
+ export function grammarRequestFields(
339
+ grammar: GbnfGrammar,
340
+ ): Record<string, unknown> {
341
+ const out: Record<string, unknown> = { grammar: grammar.source };
342
+ if (grammar.lazy) {
343
+ out.grammar_lazy = true;
344
+ if (grammar.triggers && grammar.triggers.length > 0) {
345
+ out.grammar_triggers = grammar.triggers.map((value) => ({
346
+ type: "word",
347
+ value,
348
+ }));
349
+ }
350
+ }
351
+ return out;
352
+ }
353
+
354
+ /**
355
+ * Split a skeleton's leading literal run off as an assistant-turn prefill
356
+ * candidate, returning that prefix plus the remaining spans. Used by the
357
+ * multi-call infill fallback (emit prefix as a prefill, generate the first
358
+ * free span, then loop).
359
+ */
360
+ export function splitSkeletonAtFirstFree(skeleton: ResponseSkeleton): {
361
+ prefixLiteral: string;
362
+ rest: ResponseSkeletonSpan[];
363
+ } {
364
+ let prefixLiteral = "";
365
+ let idx = 0;
366
+ while (
367
+ idx < skeleton.spans.length &&
368
+ skeleton.spans[idx].kind === "literal"
369
+ ) {
370
+ prefixLiteral += skeleton.spans[idx].value ?? "";
371
+ idx += 1;
372
+ }
373
+ return { prefixLiteral, rest: skeleton.spans.slice(idx) };
374
+ }
375
+
376
+ // ---------------------------------------------------------------------------
377
+ // Deterministic-token prefill plan
378
+ // ---------------------------------------------------------------------------
379
+ //
380
+ // The grammar bounds the *search* but the model still spends one forward pass
381
+ // per sampled token, including on the scaffold positions that the grammar
382
+ // forces (the JSON braces, the fixed key names, the `": "` glue). A
383
+ // constrained-decode server that understands the schema can do better: when a
384
+ // run of bytes is *deterministically implied* by the schema given the branch
385
+ // chosen so far, it can write those token ids straight into the sequence and
386
+ // advance the decoder to the next free parameter without a forward pass. The
387
+ // {@link ElizaPrefillPlan} is the compact metadata the engine sends so the
388
+ // server can do exactly that.
389
+ //
390
+ // The plan is purely a *speedup hint*. A server that ignores it still produces
391
+ // the identical output (the grammar already forces the same bytes); a server
392
+ // that honours it produces the identical output faster. Off by default — the
393
+ // engine only emits it when an `ElizaHarnessSchema` (or a `prefillPlan`) is
394
+ // present on the request, never for unguided generation.
395
+
396
+ /**
397
+ * One deterministically-forced byte run in an {@link ElizaPrefillPlan}. The
398
+ * runs alternate with the free (sampled) spans, so a run is unambiguously
399
+ * anchored by *position* in that alternation rather than by an absolute byte
400
+ * offset (the sampled spans have unknown length at plan time):
401
+ *
402
+ * run[0] free[0] run[1] free[1] … run[n] (n = number of free spans)
403
+ *
404
+ * `afterFreeSpan` is `-1` for the leading run (before any free span — the
405
+ * assistant-turn prefill), then `0, 1, 2, …` for the run that follows free
406
+ * span 0, 1, 2, … . The server resumes sampling after writing each run; once
407
+ * the matching free span is sampled it writes the next run's token ids without
408
+ * a forward pass and advances the decoder to the next free span.
409
+ */
410
+ export interface PrefillRun {
411
+ /**
412
+ * Index of the free span this run *follows*. `-1` = the leading run (the
413
+ * prefill); `k >= 0` = the run after free span `k`. The last run (`n`) is the
414
+ * tail scaffold (closing braces) after the final free span.
415
+ */
416
+ afterFreeSpan: number;
417
+ /** The deterministically-forced bytes. */
418
+ text: string;
419
+ /**
420
+ * Optional pre-tokenized token IDs for this run. When provided at compile time
421
+ * via a tokenizer callback, the FFI runtime can use these directly without
422
+ * re-tokenizing, improving latency.
423
+ */
424
+ tokenIds?: number[];
425
+ }
426
+
427
+ /**
428
+ * Compact descriptor of the deterministic structure of a constrained decode:
429
+ * the ordered runs of bytes that are fixed (so the server can prefill their
430
+ * token ids and skip the forward passes) interleaved with the count of free
431
+ * positions, plus the leading literal run that should be seeded as an
432
+ * assistant-turn prefill (`prefix`). Sent on the request as `eliza_prefill_plan`.
433
+ *
434
+ * Purely a speedup hint — a server that ignores it produces the identical
435
+ * output because the lazy GBNF already forces the same bytes.
436
+ */
437
+ export interface ElizaPrefillPlan {
438
+ /**
439
+ * The leading deterministic run — emitted as an assistant-turn prefill so
440
+ * the model never samples it. Empty when the skeleton opens with a free span.
441
+ */
442
+ prefix: string;
443
+ /**
444
+ * Deterministic byte runs alternating with the free spans (see
445
+ * {@link PrefillRun}), in output order, including the prefix run when
446
+ * non-empty.
447
+ */
448
+ runs: PrefillRun[];
449
+ /** Number of free (sampled) spans in the skeleton. `runs.length` is `freeCount + 1` minus the leading run when the skeleton starts free. */
450
+ freeCount: number;
451
+ /**
452
+ * Opaque cache key (mirrors the skeleton's `id`) so the server can cache the
453
+ * tokenised form of the runs across turns when the structure is unchanged.
454
+ */
455
+ id?: string;
456
+ }
457
+
458
+ /**
459
+ * Compute the {@link ElizaPrefillPlan} for a response skeleton: walk the spans,
460
+ * accumulating consecutive `literal` spans (and single-value enums collapsed to
461
+ * literals) into deterministic byte runs and counting the free spans. Adjacent
462
+ * literals merge into one run. Returns `null` when the skeleton has no
463
+ * deterministic runs at all (nothing to prefill).
464
+ *
465
+ * Invariant the consumer relies on: concatenating the runs interleaved with the
466
+ * (eventually-sampled) free-span values, in order, reproduces a byte-identical
467
+ * JSON document to what the lazy GBNF from {@link compileSkeletonToGbnf} would
468
+ * have produced. The tests assert this.
469
+ */
470
+ export function compilePrefillPlan(
471
+ skeletonInput: ResponseSkeleton,
472
+ tokenize?: (text: string) => number[],
473
+ ): ElizaPrefillPlan | null {
474
+ const skeleton = collapseSkeleton(skeletonInput);
475
+ const runs: PrefillRun[] = [];
476
+ let freeCount = 0;
477
+ let pending = "";
478
+
479
+ const flushPending = (afterFreeSpan: number) => {
480
+ if (pending.length === 0) return;
481
+ const run: PrefillRun = { afterFreeSpan, text: pending };
482
+ if (tokenize) {
483
+ run.tokenIds = tokenize(pending);
484
+ }
485
+ runs.push(run);
486
+ pending = "";
487
+ };
488
+
489
+ for (const span of skeleton.spans) {
490
+ if (span.kind === "literal") {
491
+ pending += span.value ?? "";
492
+ continue;
493
+ }
494
+ if (
495
+ span.kind === "enum" &&
496
+ Array.isArray(span.enumValues) &&
497
+ span.enumValues.length === 1
498
+ ) {
499
+ // Defensive: a producer that didn't collapse a single-value enum.
500
+ pending += JSON.stringify(String(span.enumValues[0]));
501
+ continue;
502
+ }
503
+ // A free position (enum ≥2 values, free-string, free-json). The
504
+ // deterministic run accumulated so far follows free span `freeCount - 1`
505
+ // (or is the leading prefill run when `freeCount === 0`).
506
+ flushPending(freeCount - 1);
507
+ freeCount += 1;
508
+ }
509
+ // Tail scaffold after the last free span.
510
+ flushPending(freeCount - 1);
511
+
512
+ if (runs.length === 0) return null;
513
+ const prefix = runs[0].afterFreeSpan === -1 ? runs[0].text : "";
514
+ return { prefix, runs, freeCount, id: skeleton.id };
515
+ }
516
+
517
+ /**
518
+ * Build the request-body fragment carrying the prefill plan. The server reads
519
+ * `eliza_prefill_plan` (a tolerant extension — old binaries ignore it and the
520
+ * grammar still forces the same bytes). Returns `{}` when there is no plan.
521
+ */
522
+ export function prefillPlanRequestFields(
523
+ plan: ElizaPrefillPlan | null,
524
+ ): Record<string, unknown> {
525
+ if (!plan) return {};
526
+ return {
527
+ eliza_prefill_plan: {
528
+ prefix: plan.prefix,
529
+ runs: plan.runs.map((r) => {
530
+ const run: Record<string, unknown> = {
531
+ after_free_span: r.afterFreeSpan,
532
+ text: r.text,
533
+ };
534
+ if (r.tokenIds !== undefined) {
535
+ run.token_ids = r.tokenIds;
536
+ }
537
+ return run;
538
+ }),
539
+ free_count: plan.freeCount,
540
+ id: plan.id,
541
+ },
542
+ };
543
+ }
544
+
545
+ /**
546
+ * Build the request-body fragment carrying per-span sampler overrides. The
547
+ * fork-side llama-server reads `eliza_span_samplers` (a tolerant extension —
548
+ * old binaries ignore it; the grammar still constrains the same tokens, we
549
+ * just lose the per-span argmax determinism guarantee on the legacy path).
550
+ *
551
+ * Wire schema (snake_case for OpenAI body conventions):
552
+ * {
553
+ * overrides: [
554
+ * { span_index: number, temperature: number, top_k?: number, top_p?: number }
555
+ * ],
556
+ * strict?: boolean
557
+ * }
558
+ *
559
+ * Returns `{}` when there is no plan or no overrides — keep the wire surface
560
+ * narrow so a stock server never has to skip past empty fork extensions.
561
+ */
562
+ export function spanSamplerPlanRequestFields(
563
+ plan: SpanSamplerPlan | undefined | null,
564
+ ): Record<string, unknown> {
565
+ if (!plan || plan.overrides.length === 0) return {};
566
+ const overrides = plan.overrides.map((o) => {
567
+ const wire: Record<string, unknown> = {
568
+ span_index: o.spanIndex,
569
+ temperature: o.temperature,
570
+ };
571
+ if (typeof o.topK === "number") wire.top_k = o.topK;
572
+ if (typeof o.topP === "number") wire.top_p = o.topP;
573
+ return wire;
574
+ });
575
+ const body: Record<string, unknown> = { overrides };
576
+ if (plan.strict === true) body.strict = true;
577
+ return { eliza_span_samplers: body };
578
+ }
579
+
580
+ // ---------------------------------------------------------------------------
581
+ // Eliza harness schema — the compact descriptor the agent loop hands the engine
582
+ // ---------------------------------------------------------------------------
583
+
584
+ /**
585
+ * The compact, engine-facing descriptor for a structured output the agent loop
586
+ * wants forced. It is the bundle of (a) a {@link ResponseSkeleton} (which
587
+ * compiles to a lazy GBNF for the constrained-decode path), (b) the derived
588
+ * {@link ElizaPrefillPlan} (the deterministic-token short-circuit), and (c) the
589
+ * short-name ↔ long-name maps so the on-wire/decoded form uses canonical short
590
+ * action ids / enum values and the runtime expands them for the caller.
591
+ *
592
+ * Producers: `@elizaos/core` `buildPlannerActionGrammar` / `buildResponseGrammar`
593
+ * wrapped by {@link elizaHarnessSchemaFromSkeleton}. Consumer: the local engine
594
+ * (`ffi-streaming-backend.ts` / `engine.ts`).
595
+ */
596
+ export interface ElizaHarnessSchema {
597
+ /** Structure-forcing description; compiles to a lazy GBNF. */
598
+ skeleton: ResponseSkeleton;
599
+ /** Pre-built GBNF (wins over compiling the skeleton), when the producer made one. */
600
+ grammar?: string;
601
+ /** Deterministic-token short-circuit derived from the skeleton. */
602
+ prefillPlan: ElizaPrefillPlan | null;
603
+ /**
604
+ * Canonical short id → human-facing long name (display label), for any
605
+ * closed enum the descriptor pins (action ids, known enum values). The wire
606
+ * form is the short id; callers that want the long name look it up here.
607
+ * Empty when nothing needs expanding.
608
+ */
609
+ longNames: Record<string, string>;
610
+ /** Cache key (the skeleton's id). */
611
+ id?: string;
612
+ }
613
+
614
+ /**
615
+ * Wrap a {@link ResponseSkeleton} (+ optional pre-built grammar + name map)
616
+ * into an {@link ElizaHarnessSchema}, computing the prefill plan. This is the
617
+ * single place the prefill plan is derived so producers don't each reimplement
618
+ * it.
619
+ */
620
+ export function elizaHarnessSchemaFromSkeleton(input: {
621
+ skeleton: ResponseSkeleton;
622
+ grammar?: string;
623
+ longNames?: Record<string, string>;
624
+ tokenize?: (text: string) => number[];
625
+ }): ElizaHarnessSchema {
626
+ return {
627
+ skeleton: input.skeleton,
628
+ grammar: input.grammar,
629
+ prefillPlan: compilePrefillPlan(input.skeleton, input.tokenize),
630
+ longNames: input.longNames ?? {},
631
+ id: input.skeleton.id,
632
+ };
633
+ }
634
+
635
+ /**
636
+ * Expand a canonical short id decoded out of a constrained generation back to
637
+ * its human-facing long name (display label), using the descriptor's
638
+ * {@link ElizaHarnessSchema.longNames} map (sourced from the action catalog).
639
+ * Identity when there is no mapping — the canonical action ids
640
+ * (`normalizeActionName` results, e.g. `SEND_MESSAGE`) are already the on-wire
641
+ * form, so this is only meaningful when a producer registered a separate
642
+ * display label.
643
+ */
644
+ export function expandShortName(
645
+ schema: ElizaHarnessSchema | undefined,
646
+ shortId: string,
647
+ ): string {
648
+ if (!schema) return shortId;
649
+ return schema.longNames[shortId] ?? shortId;
650
+ }
651
+
652
+ /**
653
+ * Invert {@link expandShortName}: given a (possibly long) name the caller
654
+ * supplied, return the canonical short id the wire form expects. Identity when
655
+ * the name is already a known short id or no mapping matches.
656
+ */
657
+ export function canonicalizeShortName(
658
+ schema: ElizaHarnessSchema | undefined,
659
+ name: string,
660
+ ): string {
661
+ if (!schema) return name;
662
+ if (Object.hasOwn(schema.longNames, name)) return name; // already a short id
663
+ for (const [shortId, longName] of Object.entries(schema.longNames)) {
664
+ if (longName === name) return shortId;
665
+ }
666
+ return name;
667
+ }
668
+
669
+ /**
670
+ * Resolve the GBNF + prefill plan + assistant-turn prefill to apply for a
671
+ * generation call given the structured params. Precedence for the grammar:
672
+ * an explicit `grammar` string, then a harness schema's `grammar`, then
673
+ * compiling the harness schema's / params' `responseSkeleton`. The prefill plan
674
+ * is only present when a harness schema is supplied (off by default).
675
+ */
676
+ export function resolveGuidedDecodeForParams(
677
+ params: StructuredGenerateParams | undefined,
678
+ ): {
679
+ grammar: GbnfGrammar | null;
680
+ prefillPlan: ElizaPrefillPlan | null;
681
+ prefill: string | null;
682
+ } {
683
+ if (!params) return { grammar: null, prefillPlan: null, prefill: null };
684
+ const schema = params.elizaSchema;
685
+ if (schema) {
686
+ const baseGrammar: GbnfGrammar | null =
687
+ typeof schema.grammar === "string" && schema.grammar.trim().length > 0
688
+ ? { source: schema.grammar, lazy: false }
689
+ : compileSkeletonToGbnf(schema.skeleton);
690
+ const plan = schema.prefillPlan ?? compilePrefillPlan(schema.skeleton);
691
+ // Only use the plan's prefix when the caller didn't already supply one.
692
+ const prefill =
693
+ typeof params.prefill === "string" && params.prefill.length > 0
694
+ ? params.prefill
695
+ : plan && plan.prefix.length > 0
696
+ ? plan.prefix
697
+ : null;
698
+ const grammar =
699
+ baseGrammar && prefill && plan?.prefix === prefill
700
+ ? (stripPrefilledPrefixFromGrammar(baseGrammar, prefill) ?? baseGrammar)
701
+ : baseGrammar;
702
+ return { grammar, prefillPlan: plan, prefill };
703
+ }
704
+ return {
705
+ grammar: resolveGrammarForParams(params),
706
+ prefillPlan: null,
707
+ prefill:
708
+ typeof params.prefill === "string" && params.prefill.length > 0
709
+ ? params.prefill
710
+ : null,
711
+ };
712
+ }