@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,808 @@
1
+ /**
2
+ * HTTP routes for the local-inference / model management feature.
3
+ *
4
+ * Route shape and auth follow the established `*-compat-routes.ts` pattern:
5
+ * - `handleLocalInferenceCompatRoutes` returns `true` when it handles a
6
+ * request and `false` to pass through to the next handler.
7
+ * - Regular reads use `ensureCompatApiAuthorized`.
8
+ * - Mutating routes (download start/cancel, active switch, uninstall)
9
+ * use `ensureCompatSensitiveRouteAuthorized`.
10
+ * - SSE allows `?token=...` as an alternative to the auth header, via
11
+ * `isStreamAuthorized`.
12
+ */
13
+
14
+ import type http from "node:http";
15
+ import {
16
+ CandidateModelActivationError,
17
+ type KvOffloadMode,
18
+ type LocalInferenceLoadOverrides,
19
+ validateLocalInferenceLoadArgs,
20
+ } from "../services/active-model";
21
+ import { deviceBridge } from "../services/device-bridge";
22
+ import { classifyDeviceTier } from "../services/device-tier";
23
+ import {
24
+ handlerRegistry,
25
+ toPublicRegistration,
26
+ } from "../services/handler-registry";
27
+ import { tryGetMemoryArbiter } from "../services/memory-arbiter";
28
+ import { snapshotProviders } from "../services/providers";
29
+ import {
30
+ isRoutingPolicy,
31
+ ROUTING_POLICIES,
32
+ readRoutingPreferences,
33
+ setPolicy,
34
+ setPreferredProvider,
35
+ } from "../services/routing-preferences";
36
+ import { localInferenceService } from "../services/service";
37
+ import { readSystemMemory } from "../services/system-memory";
38
+ import type { AgentModelSlot, CatalogModel } from "../services/types";
39
+ import { AGENT_MODEL_SLOTS } from "../services/types";
40
+ import {
41
+ type CompatRuntimeState,
42
+ ensureCompatSensitiveRouteAuthorized,
43
+ ensureRouteAuthorized,
44
+ getCompatApiToken,
45
+ getProvidedApiToken,
46
+ readCompatJsonBody,
47
+ sendJsonError as sendJsonErrorResponse,
48
+ sendJson as sendJsonResponse,
49
+ tokenMatches,
50
+ } from "./compat-helpers";
51
+
52
+ function isCatalogModel(value: unknown): value is CatalogModel {
53
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
54
+ return false;
55
+ }
56
+ const record = value as Partial<CatalogModel>;
57
+ return (
58
+ typeof record.id === "string" &&
59
+ typeof record.displayName === "string" &&
60
+ typeof record.hfRepo === "string" &&
61
+ typeof record.ggufFile === "string" &&
62
+ typeof record.params === "string" &&
63
+ typeof record.quant === "string" &&
64
+ typeof record.sizeGb === "number" &&
65
+ typeof record.minRamGb === "number" &&
66
+ typeof record.category === "string" &&
67
+ typeof record.bucket === "string" &&
68
+ typeof record.blurb === "string"
69
+ );
70
+ }
71
+
72
+ function isStreamAuthorized(
73
+ req: http.IncomingMessage,
74
+ res: http.ServerResponse,
75
+ url: URL,
76
+ ): boolean {
77
+ const expected = getCompatApiToken();
78
+ if (!expected) return true;
79
+
80
+ const headerToken = getProvidedApiToken(req);
81
+ const queryToken = url.searchParams.get("token")?.trim();
82
+ if (
83
+ (headerToken && tokenMatches(expected, headerToken)) ||
84
+ (queryToken && tokenMatches(expected, queryToken))
85
+ ) {
86
+ return true;
87
+ }
88
+
89
+ res.writeHead(401, { "Content-Type": "application/json" });
90
+ res.end(JSON.stringify({ error: "Unauthorized" }));
91
+ return false;
92
+ }
93
+
94
+ function writeSseEvent(
95
+ res: http.ServerResponse,
96
+ payload: Record<string, unknown>,
97
+ ): void {
98
+ res.write(`data: ${JSON.stringify(payload)}\n\n`);
99
+ }
100
+
101
+ function stringBody(
102
+ body: Record<string, unknown> | null,
103
+ key: string,
104
+ ): string | null {
105
+ if (!body) return null;
106
+ const raw = body[key];
107
+ return typeof raw === "string" && raw.trim().length > 0 ? raw.trim() : null;
108
+ }
109
+
110
+ /**
111
+ * Strict parser for the per-load `overrides` field on
112
+ * `POST /api/local-inference/active`. Returns either a validated
113
+ * `LocalInferenceLoadOverrides` value or a non-null `error` string.
114
+ *
115
+ * The parser is the single boundary where untrusted JSON becomes typed
116
+ * load args — `validateLocalInferenceLoadArgs` re-runs invariant checks
117
+ * after merging with catalog defaults to catch any catalog-side rule
118
+ * we haven't taught the route layer yet.
119
+ */
120
+ function parseLocalInferenceLoadOverrides(raw: unknown):
121
+ | { overrides: LocalInferenceLoadOverrides; error: null }
122
+ | {
123
+ overrides: null;
124
+ error: string;
125
+ } {
126
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
127
+ return { overrides: null, error: "overrides must be an object" };
128
+ }
129
+ const record = raw as Record<string, unknown>;
130
+ const out: LocalInferenceLoadOverrides = {};
131
+
132
+ if (record.contextSize !== undefined) {
133
+ if (
134
+ typeof record.contextSize !== "number" ||
135
+ !Number.isInteger(record.contextSize) ||
136
+ record.contextSize < 256
137
+ ) {
138
+ return {
139
+ overrides: null,
140
+ error: "overrides.contextSize must be an integer >= 256",
141
+ };
142
+ }
143
+ out.contextSize = record.contextSize;
144
+ }
145
+ for (const key of ["cacheTypeK", "cacheTypeV"] as const) {
146
+ const value = record[key];
147
+ if (value === undefined) continue;
148
+ if (typeof value !== "string" || value.trim().length === 0) {
149
+ return {
150
+ overrides: null,
151
+ error: `overrides.${key} must be a non-empty string`,
152
+ };
153
+ }
154
+ out[key] = value.trim().toLowerCase();
155
+ }
156
+ if (record.gpuLayers !== undefined) {
157
+ if (
158
+ typeof record.gpuLayers !== "number" ||
159
+ !Number.isInteger(record.gpuLayers) ||
160
+ record.gpuLayers < 0
161
+ ) {
162
+ return {
163
+ overrides: null,
164
+ error: "overrides.gpuLayers must be a non-negative integer",
165
+ };
166
+ }
167
+ out.gpuLayers = record.gpuLayers;
168
+ }
169
+ if (record.kvOffload !== undefined) {
170
+ const value = record.kvOffload;
171
+ if (typeof value === "string") {
172
+ if (value !== "cpu" && value !== "gpu" && value !== "split") {
173
+ return {
174
+ overrides: null,
175
+ error:
176
+ 'overrides.kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number }',
177
+ };
178
+ }
179
+ out.kvOffload = value as KvOffloadMode;
180
+ } else if (
181
+ value !== null &&
182
+ typeof value === "object" &&
183
+ typeof (value as { gpuLayers?: unknown }).gpuLayers === "number" &&
184
+ Number.isInteger((value as { gpuLayers: number }).gpuLayers) &&
185
+ (value as { gpuLayers: number }).gpuLayers >= 0
186
+ ) {
187
+ out.kvOffload = {
188
+ gpuLayers: (value as { gpuLayers: number }).gpuLayers,
189
+ };
190
+ } else {
191
+ return {
192
+ overrides: null,
193
+ error:
194
+ 'overrides.kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number }',
195
+ };
196
+ }
197
+ }
198
+ for (const key of ["flashAttention", "mmap", "mlock"] as const) {
199
+ const value = record[key];
200
+ if (value === undefined) continue;
201
+ if (typeof value !== "boolean") {
202
+ return {
203
+ overrides: null,
204
+ error: `overrides.${key} must be a boolean`,
205
+ };
206
+ }
207
+ out[key] = value;
208
+ }
209
+
210
+ // Run the same validation `resolveLocalInferenceLoadArgs` will run. The
211
+ // optimized desktop FFI runtime can honor the elizaOS fork's KV-cache
212
+ // types; unsupported runtimes fail later at the backend capability gate
213
+ // instead of silently loading fp16.
214
+ try {
215
+ validateLocalInferenceLoadArgs(out, { allowFork: true });
216
+ } catch (err) {
217
+ return {
218
+ overrides: null,
219
+ error: err instanceof Error ? err.message : "invalid overrides",
220
+ };
221
+ }
222
+ return { overrides: out, error: null };
223
+ }
224
+
225
+ /**
226
+ * Match POST/DELETE/GET for `/api/local-inference/installed/:id`.
227
+ * Returns the trimmed id or null.
228
+ */
229
+ function matchInstalledId(pathname: string): string | null {
230
+ const match = /^\/api\/local-inference\/installed\/([^/]+)$/.exec(pathname);
231
+ return match?.[1] ?? null;
232
+ }
233
+
234
+ export async function handleLocalInferenceCompatRoutes(
235
+ req: http.IncomingMessage,
236
+ res: http.ServerResponse,
237
+ state: CompatRuntimeState,
238
+ ): Promise<boolean> {
239
+ const method = (req.method ?? "GET").toUpperCase();
240
+ const url = new URL(req.url ?? "/", "http://localhost");
241
+ const pathname = url.pathname;
242
+
243
+ if (!pathname.startsWith("/api/local-inference/")) return false;
244
+
245
+ // ── SSE: download progress stream ───────────────────────────────────
246
+ if (
247
+ method === "GET" &&
248
+ pathname === "/api/local-inference/downloads/stream"
249
+ ) {
250
+ if (!isStreamAuthorized(req, res, url)) return true;
251
+
252
+ res.writeHead(200, {
253
+ "Content-Type": "text/event-stream",
254
+ "Cache-Control": "no-cache, no-transform",
255
+ Connection: "keep-alive",
256
+ "X-Accel-Buffering": "no",
257
+ });
258
+
259
+ // Send initial snapshot so a freshly-opened stream immediately reflects
260
+ // whatever is in flight.
261
+ writeSseEvent(res, {
262
+ type: "snapshot",
263
+ downloads: localInferenceService.getDownloads(),
264
+ active: localInferenceService.getActive(),
265
+ });
266
+
267
+ const unsubscribeDownloads = localInferenceService.subscribeDownloads(
268
+ (event) => {
269
+ writeSseEvent(res, {
270
+ type: event.type,
271
+ job: event.job,
272
+ });
273
+ },
274
+ );
275
+ const unsubscribeActive = localInferenceService.subscribeActive(
276
+ (active) => {
277
+ writeSseEvent(res, {
278
+ type: "active",
279
+ active,
280
+ });
281
+ },
282
+ );
283
+
284
+ const heartbeat = setInterval(() => {
285
+ res.write(": heartbeat\n\n");
286
+ }, 15_000);
287
+ if (typeof heartbeat === "object" && "unref" in heartbeat) {
288
+ heartbeat.unref();
289
+ }
290
+
291
+ const cleanup = () => {
292
+ clearInterval(heartbeat);
293
+ unsubscribeDownloads();
294
+ unsubscribeActive();
295
+ };
296
+ req.on("close", cleanup);
297
+ req.on("aborted", cleanup);
298
+ return true;
299
+ }
300
+
301
+ // ── GET: full hub snapshot (catalog + installed + hardware + state) ─
302
+ if (method === "GET" && pathname === "/api/local-inference/hub") {
303
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
304
+ try {
305
+ const snapshot = await localInferenceService.snapshot();
306
+ sendJsonResponse(res, 200, snapshot);
307
+ } catch (err) {
308
+ sendJsonErrorResponse(
309
+ res,
310
+ 500,
311
+ err instanceof Error ? err.message : "Failed to load hub",
312
+ );
313
+ }
314
+ return true;
315
+ }
316
+
317
+ // ── GET: hardware probe only ────────────────────────────────────────
318
+ if (method === "GET" && pathname === "/api/local-inference/hardware") {
319
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
320
+ try {
321
+ const probe = await localInferenceService.getHardware();
322
+ sendJsonResponse(res, 200, probe);
323
+ } catch (err) {
324
+ sendJsonErrorResponse(
325
+ res,
326
+ 500,
327
+ err instanceof Error ? err.message : "Failed to probe hardware",
328
+ );
329
+ }
330
+ return true;
331
+ }
332
+
333
+ // ── GET: device tier + live memory budget + resident model state ────
334
+ // The single read clients use to decide local-vs-cloud and to render what
335
+ // the memory arbiter currently holds. Memory is the kernel's allocatable
336
+ // estimate (MemAvailable on Linux/Android), not MemFree.
337
+ if (method === "GET" && pathname === "/api/local-inference/device-tier") {
338
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
339
+ try {
340
+ const probe = await localInferenceService.getHardware();
341
+ const tier = classifyDeviceTier(probe);
342
+ const sysmem = readSystemMemory();
343
+ const arbiter = tryGetMemoryArbiter();
344
+ const resident = arbiter
345
+ ? {
346
+ pressure: arbiter.currentPressureLevel(),
347
+ models: arbiter.residentSnapshot(),
348
+ }
349
+ : null;
350
+ sendJsonResponse(res, 200, {
351
+ tier,
352
+ memory: {
353
+ availableBytes: sysmem.freeBytes,
354
+ totalBytes: sysmem.totalBytes,
355
+ },
356
+ resident,
357
+ });
358
+ } catch (err) {
359
+ sendJsonErrorResponse(
360
+ res,
361
+ 500,
362
+ err instanceof Error ? err.message : "Failed to classify device tier",
363
+ );
364
+ }
365
+ return true;
366
+ }
367
+
368
+ // ── GET: curated catalog ────────────────────────────────────────────
369
+ if (method === "GET" && pathname === "/api/local-inference/catalog") {
370
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
371
+ sendJsonResponse(res, 200, {
372
+ models: localInferenceService.getCatalog(),
373
+ });
374
+ return true;
375
+ }
376
+
377
+ // ── GET: installed models ───────────────────────────────────────────
378
+ if (method === "GET" && pathname === "/api/local-inference/installed") {
379
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
380
+ try {
381
+ const models = await localInferenceService.getInstalled();
382
+ sendJsonResponse(res, 200, { models });
383
+ } catch (err) {
384
+ sendJsonErrorResponse(
385
+ res,
386
+ 500,
387
+ err instanceof Error ? err.message : "Failed to list installed models",
388
+ );
389
+ }
390
+ return true;
391
+ }
392
+
393
+ // ── POST: start download ────────────────────────────────────────────
394
+ // Body: either `{ modelId }` for a curated entry, or
395
+ // `{ spec: CatalogModel }` for a HuggingFace-search result.
396
+ if (method === "POST" && pathname === "/api/local-inference/downloads") {
397
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
398
+ const body = await readCompatJsonBody(req, res);
399
+ if (!body) return true;
400
+ const modelId = stringBody(body, "modelId");
401
+ const rawSpec = body.spec;
402
+ try {
403
+ let job: Awaited<ReturnType<typeof localInferenceService.startDownload>>;
404
+ if (rawSpec) {
405
+ if (!isCatalogModel(rawSpec)) {
406
+ sendJsonErrorResponse(res, 400, "Invalid model spec");
407
+ return true;
408
+ }
409
+ job = await localInferenceService.startDownload(rawSpec);
410
+ } else if (modelId) {
411
+ job = await localInferenceService.startDownload(modelId);
412
+ } else {
413
+ sendJsonErrorResponse(res, 400, "modelId or spec is required");
414
+ return true;
415
+ }
416
+ sendJsonResponse(res, 202, { job });
417
+ } catch (err) {
418
+ sendJsonErrorResponse(
419
+ res,
420
+ 400,
421
+ err instanceof Error ? err.message : "Failed to start download",
422
+ );
423
+ }
424
+ return true;
425
+ }
426
+
427
+ // ── GET: provider status snapshot ──────────────────────────────────
428
+ if (method === "GET" && pathname === "/api/local-inference/providers") {
429
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
430
+ try {
431
+ const providers = await snapshotProviders();
432
+ sendJsonResponse(res, 200, { providers });
433
+ } catch (err) {
434
+ sendJsonErrorResponse(
435
+ res,
436
+ 500,
437
+ err instanceof Error ? err.message : "Failed to read providers",
438
+ );
439
+ }
440
+ return true;
441
+ }
442
+
443
+ // ── GET: registered model handlers across all providers ────────────
444
+ if (method === "GET" && pathname === "/api/local-inference/routing") {
445
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
446
+ try {
447
+ const [prefs, registrations] = await Promise.all([
448
+ readRoutingPreferences(),
449
+ Promise.resolve(handlerRegistry.getAll().map(toPublicRegistration)),
450
+ ]);
451
+ sendJsonResponse(res, 200, {
452
+ registrations,
453
+ preferences: prefs,
454
+ });
455
+ } catch (err) {
456
+ sendJsonErrorResponse(
457
+ res,
458
+ 500,
459
+ err instanceof Error ? err.message : "Failed to read routing state",
460
+ );
461
+ }
462
+ return true;
463
+ }
464
+
465
+ // ── POST: set preferred provider for a slot (manual override) ──────
466
+ if (
467
+ method === "POST" &&
468
+ pathname === "/api/local-inference/routing/preferred"
469
+ ) {
470
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
471
+ const body = await readCompatJsonBody(req, res);
472
+ if (!body) return true;
473
+ const slot = stringBody(body, "slot") as AgentModelSlot | null;
474
+ if (!slot || !AGENT_MODEL_SLOTS.includes(slot)) {
475
+ sendJsonErrorResponse(
476
+ res,
477
+ 400,
478
+ "slot is required and must be a valid AgentModelSlot",
479
+ );
480
+ return true;
481
+ }
482
+ const raw = body.provider;
483
+ const provider =
484
+ raw === null
485
+ ? null
486
+ : typeof raw === "string" && raw.trim().length > 0
487
+ ? raw.trim()
488
+ : null;
489
+ try {
490
+ const prefs = await setPreferredProvider(slot, provider);
491
+ sendJsonResponse(res, 200, { preferences: prefs });
492
+ } catch (err) {
493
+ sendJsonErrorResponse(
494
+ res,
495
+ 500,
496
+ err instanceof Error
497
+ ? err.message
498
+ : "Failed to write preferred provider",
499
+ );
500
+ }
501
+ return true;
502
+ }
503
+
504
+ // ── POST: set routing policy for a slot ─────────────────────────────
505
+ if (method === "POST" && pathname === "/api/local-inference/routing/policy") {
506
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
507
+ const body = await readCompatJsonBody(req, res);
508
+ if (!body) return true;
509
+ const slot = stringBody(body, "slot") as AgentModelSlot | null;
510
+ if (!slot || !AGENT_MODEL_SLOTS.includes(slot)) {
511
+ sendJsonErrorResponse(
512
+ res,
513
+ 400,
514
+ "slot is required and must be a valid AgentModelSlot",
515
+ );
516
+ return true;
517
+ }
518
+ const raw = body.policy;
519
+ const policy = raw === null ? null : isRoutingPolicy(raw) ? raw : null;
520
+ if (raw !== null && policy === null) {
521
+ sendJsonErrorResponse(
522
+ res,
523
+ 400,
524
+ `policy must be one of ${ROUTING_POLICIES.join(", ")} or null`,
525
+ );
526
+ return true;
527
+ }
528
+ try {
529
+ const prefs = await setPolicy(slot, policy);
530
+ sendJsonResponse(res, 200, { preferences: prefs });
531
+ } catch (err) {
532
+ sendJsonErrorResponse(
533
+ res,
534
+ 500,
535
+ err instanceof Error ? err.message : "Failed to write routing policy",
536
+ );
537
+ }
538
+ return true;
539
+ }
540
+
541
+ // ── GET: model-type assignments ─────────────────────────────────────
542
+ if (method === "GET" && pathname === "/api/local-inference/assignments") {
543
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
544
+ try {
545
+ const assignments = await localInferenceService.getAssignments();
546
+ sendJsonResponse(res, 200, { assignments });
547
+ } catch (err) {
548
+ sendJsonErrorResponse(
549
+ res,
550
+ 500,
551
+ err instanceof Error ? err.message : "Failed to read assignments",
552
+ );
553
+ }
554
+ return true;
555
+ }
556
+
557
+ // ── POST: set / clear a model-type assignment ───────────────────────
558
+ if (method === "POST" && pathname === "/api/local-inference/assignments") {
559
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
560
+ const body = await readCompatJsonBody(req, res);
561
+ if (!body) return true;
562
+ const slot = stringBody(body, "slot") as AgentModelSlot | null;
563
+ if (!slot || !AGENT_MODEL_SLOTS.includes(slot)) {
564
+ sendJsonErrorResponse(
565
+ res,
566
+ 400,
567
+ `slot must be one of ${AGENT_MODEL_SLOTS.join(", ")}`,
568
+ );
569
+ return true;
570
+ }
571
+ // modelId can be null to clear the slot
572
+ const rawModelId = body.modelId;
573
+ const modelId =
574
+ rawModelId === null
575
+ ? null
576
+ : typeof rawModelId === "string" && rawModelId.trim().length > 0
577
+ ? rawModelId.trim()
578
+ : null;
579
+ try {
580
+ const assignments = await localInferenceService.setSlotAssignment(
581
+ slot,
582
+ modelId,
583
+ );
584
+ sendJsonResponse(res, 200, { assignments });
585
+ } catch (err) {
586
+ sendJsonErrorResponse(
587
+ res,
588
+ 500,
589
+ err instanceof Error ? err.message : "Failed to write assignment",
590
+ );
591
+ }
592
+ return true;
593
+ }
594
+
595
+ // ── GET: device bridge status (paired mobile device connectivity) ───
596
+ if (method === "GET" && pathname === "/api/local-inference/device") {
597
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
598
+ sendJsonResponse(res, 200, deviceBridge.status());
599
+ return true;
600
+ }
601
+
602
+ // ── SSE: device bridge status stream ────────────────────────────────
603
+ if (method === "GET" && pathname === "/api/local-inference/device/stream") {
604
+ if (!isStreamAuthorized(req, res, url)) return true;
605
+
606
+ res.writeHead(200, {
607
+ "Content-Type": "text/event-stream",
608
+ "Cache-Control": "no-cache, no-transform",
609
+ Connection: "keep-alive",
610
+ "X-Accel-Buffering": "no",
611
+ });
612
+
613
+ writeSseEvent(res, { type: "status", status: deviceBridge.status() });
614
+ const unsubscribe = deviceBridge.subscribeStatus((status) => {
615
+ writeSseEvent(res, { type: "status", status });
616
+ });
617
+ const heartbeat = setInterval(() => {
618
+ res.write(": heartbeat\n\n");
619
+ }, 15_000);
620
+ if (typeof heartbeat === "object" && "unref" in heartbeat) {
621
+ heartbeat.unref();
622
+ }
623
+ const cleanup = () => {
624
+ clearInterval(heartbeat);
625
+ unsubscribe();
626
+ };
627
+ req.on("close", cleanup);
628
+ req.on("aborted", cleanup);
629
+ return true;
630
+ }
631
+
632
+ // ── GET: explicit custom model search (Hugging Face / ModelScope) ───
633
+ if (method === "GET" && pathname === "/api/local-inference/hf-search") {
634
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
635
+ const q = url.searchParams.get("q")?.trim() ?? "";
636
+ if (q.length === 0) {
637
+ sendJsonResponse(res, 200, { models: [] });
638
+ return true;
639
+ }
640
+ const limitRaw = url.searchParams.get("limit");
641
+ const limit = limitRaw
642
+ ? Math.max(1, Math.min(50, Number.parseInt(limitRaw, 10) || 12))
643
+ : 12;
644
+ const hub =
645
+ url.searchParams.get("hub")?.trim().toLowerCase() === "modelscope"
646
+ ? "modelscope"
647
+ : "huggingface";
648
+ try {
649
+ const models =
650
+ typeof localInferenceService.searchModelHub === "function"
651
+ ? await localInferenceService.searchModelHub(q, hub, limit)
652
+ : await localInferenceService.searchHuggingFace(q, limit);
653
+ sendJsonResponse(res, 200, { models });
654
+ } catch (err) {
655
+ sendJsonErrorResponse(
656
+ res,
657
+ 502,
658
+ err instanceof Error ? err.message : "Model search failed",
659
+ );
660
+ }
661
+ return true;
662
+ }
663
+
664
+ // ── DELETE: cancel download ─────────────────────────────────────────
665
+ {
666
+ const match = /^\/api\/local-inference\/downloads\/([^/]+)$/.exec(pathname);
667
+ if (method === "DELETE" && match) {
668
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
669
+ const cancelled = localInferenceService.cancelDownload(match[1] ?? "");
670
+ sendJsonResponse(res, cancelled ? 200 : 404, { cancelled });
671
+ return true;
672
+ }
673
+ }
674
+
675
+ // ── GET: active model ───────────────────────────────────────────────
676
+ if (method === "GET" && pathname === "/api/local-inference/active") {
677
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
678
+ sendJsonResponse(res, 200, localInferenceService.getActive());
679
+ return true;
680
+ }
681
+
682
+ // ── POST: switch active model ───────────────────────────────────────
683
+ // Accepts either:
684
+ // { "modelId": "..." } — legacy shape
685
+ // { "modelId": "...", "overrides": { ... } } — per-load overrides
686
+ // Overrides honour: contextSize, cacheTypeK, cacheTypeV, gpuLayers,
687
+ // kvOffload, flashAttention, mmap, mlock. Validation is delegated to
688
+ // `validateLocalInferenceLoadArgs` (desktop-only acceptance set by
689
+ // default; AOSP / paired-device callers route through their own
690
+ // adapter and bypass this path).
691
+ if (method === "POST" && pathname === "/api/local-inference/active") {
692
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
693
+ const body = await readCompatJsonBody(req, res);
694
+ if (!body) return true;
695
+ const modelId = stringBody(body, "modelId");
696
+ if (!modelId) {
697
+ sendJsonErrorResponse(res, 400, "modelId is required");
698
+ return true;
699
+ }
700
+ let overrides: LocalInferenceLoadOverrides | undefined;
701
+ if (body.overrides !== undefined && body.overrides !== null) {
702
+ const parsed = parseLocalInferenceLoadOverrides(body.overrides);
703
+ if (parsed.error !== null) {
704
+ sendJsonErrorResponse(res, 400, parsed.error);
705
+ return true;
706
+ }
707
+ overrides = parsed.overrides;
708
+ }
709
+ try {
710
+ const active = await localInferenceService.setActive(
711
+ state.current,
712
+ modelId,
713
+ overrides,
714
+ );
715
+ sendJsonResponse(res, 200, active);
716
+ } catch (err) {
717
+ // #7679: refuse to activate a candidate-only / weights-staged
718
+ // bundle whose manifest reports `evals.textEval.passed=false`.
719
+ // Surface the structured payload (modelId, manifestVersion,
720
+ // failedEvals) verbatim so the UI can render an actionable
721
+ // "this tier isn't ready" message instead of `[unused]` tokens
722
+ // downstream.
723
+ if (err instanceof CandidateModelActivationError) {
724
+ sendJsonResponse(res, 422, {
725
+ error: err.message,
726
+ modelId: err.modelId,
727
+ manifestVersion: err.manifestVersion,
728
+ failedEvals: err.failedEvals,
729
+ });
730
+ return true;
731
+ }
732
+ sendJsonErrorResponse(
733
+ res,
734
+ 400,
735
+ err instanceof Error ? err.message : "Failed to set active model",
736
+ );
737
+ }
738
+ return true;
739
+ }
740
+
741
+ // ── DELETE: clear active model ──────────────────────────────────────
742
+ if (method === "DELETE" && pathname === "/api/local-inference/active") {
743
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
744
+ try {
745
+ const active = await localInferenceService.clearActive(state.current);
746
+ sendJsonResponse(res, 200, active);
747
+ } catch (err) {
748
+ sendJsonErrorResponse(
749
+ res,
750
+ 500,
751
+ err instanceof Error ? err.message : "Failed to unload model",
752
+ );
753
+ }
754
+ return true;
755
+ }
756
+
757
+ // ── POST: verify installed model ────────────────────────────────────
758
+ {
759
+ const match = /^\/api\/local-inference\/installed\/([^/]+)\/verify$/.exec(
760
+ pathname,
761
+ );
762
+ if (method === "POST" && match) {
763
+ if (!(await ensureRouteAuthorized(req, res, state))) return true;
764
+ try {
765
+ const result = await localInferenceService.verifyModel(match[1] ?? "");
766
+ sendJsonResponse(res, 200, result);
767
+ } catch (err) {
768
+ sendJsonErrorResponse(
769
+ res,
770
+ 404,
771
+ err instanceof Error ? err.message : "Failed to verify model",
772
+ );
773
+ }
774
+ return true;
775
+ }
776
+ }
777
+
778
+ // ── DELETE: uninstall model ─────────────────────────────────────────
779
+ {
780
+ const id = matchInstalledId(pathname);
781
+ if (method === "DELETE" && id) {
782
+ if (!ensureCompatSensitiveRouteAuthorized(req, res)) return true;
783
+ try {
784
+ const result = await localInferenceService.uninstall(id);
785
+ if (result.removed) {
786
+ sendJsonResponse(res, 200, { removed: true });
787
+ } else if (result.reason === "external") {
788
+ sendJsonErrorResponse(
789
+ res,
790
+ 409,
791
+ "Model was discovered from another tool; Eliza will not delete files it does not own",
792
+ );
793
+ } else {
794
+ sendJsonErrorResponse(res, 404, "Model not installed");
795
+ }
796
+ } catch (err) {
797
+ sendJsonErrorResponse(
798
+ res,
799
+ 500,
800
+ err instanceof Error ? err.message : "Failed to uninstall model",
801
+ );
802
+ }
803
+ return true;
804
+ }
805
+ }
806
+
807
+ return false;
808
+ }