@elizaos/plugin-elizacloud 2.0.0-alpha.8 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (452) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +196 -0
  3. package/auto-enable.ts +22 -0
  4. package/dist/browser/index.browser.js +2 -21
  5. package/dist/browser/index.browser.js.map +5 -38
  6. package/dist/cjs/index.d.ts +2 -2
  7. package/dist/cjs/index.node.cjs +9112 -2265
  8. package/dist/cjs/index.node.js.map +68 -30
  9. package/dist/cloud/auth-service-types.d.ts +8 -0
  10. package/dist/cloud/auth-service-types.d.ts.map +1 -0
  11. package/dist/cloud/auth-service-types.js +36 -0
  12. package/dist/cloud/auth-service-types.js.map +10 -0
  13. package/dist/cloud/auth.d.ts +19 -0
  14. package/dist/cloud/auth.d.ts.map +1 -0
  15. package/dist/cloud/auth.js +283 -0
  16. package/dist/cloud/auth.js.map +12 -0
  17. package/dist/cloud/backup.d.ts +18 -0
  18. package/dist/cloud/backup.d.ts.map +1 -0
  19. package/dist/cloud/backup.js +63 -0
  20. package/dist/cloud/backup.js.map +10 -0
  21. package/dist/cloud/base-url.d.ts +7 -0
  22. package/dist/cloud/base-url.d.ts.map +1 -0
  23. package/dist/cloud/base-url.js +29 -0
  24. package/dist/cloud/base-url.js.map +10 -0
  25. package/dist/cloud/bridge-client.d.ts +126 -0
  26. package/dist/cloud/bridge-client.d.ts.map +1 -0
  27. package/dist/cloud/bridge-client.js +384 -0
  28. package/dist/cloud/bridge-client.js.map +11 -0
  29. package/dist/cloud/clack-observer.d.ts +35 -0
  30. package/dist/cloud/clack-observer.d.ts.map +1 -0
  31. package/dist/cloud/clack-observer.js +143 -0
  32. package/dist/cloud/clack-observer.js.map +10 -0
  33. package/dist/cloud/cloud-api-key.d.ts +26 -0
  34. package/dist/cloud/cloud-api-key.d.ts.map +1 -0
  35. package/dist/cloud/cloud-api-key.js +60 -0
  36. package/dist/cloud/cloud-api-key.js.map +10 -0
  37. package/dist/cloud/cloud-manager.d.ts +33 -0
  38. package/dist/cloud/cloud-manager.d.ts.map +1 -0
  39. package/dist/cloud/cloud-manager.js +806 -0
  40. package/dist/cloud/cloud-manager.js.map +16 -0
  41. package/dist/cloud/cloud-proxy.d.ts +20 -0
  42. package/dist/cloud/cloud-proxy.d.ts.map +1 -0
  43. package/dist/cloud/cloud-proxy.js +54 -0
  44. package/dist/cloud/cloud-proxy.js.map +10 -0
  45. package/dist/cloud/cloud-wallet.d.ts +94 -0
  46. package/dist/cloud/cloud-wallet.d.ts.map +1 -0
  47. package/dist/cloud/cloud-wallet.js +362 -0
  48. package/dist/cloud/cloud-wallet.js.map +13 -0
  49. package/dist/cloud/duffel-client.d.ts +181 -0
  50. package/dist/cloud/duffel-client.d.ts.map +1 -0
  51. package/dist/cloud/duffel-client.js +506 -0
  52. package/dist/cloud/duffel-client.js.map +11 -0
  53. package/dist/cloud/index.d.ts +15 -0
  54. package/dist/cloud/index.d.ts.map +1 -0
  55. package/dist/cloud/index.js +1811 -0
  56. package/dist/cloud/index.js.map +24 -0
  57. package/dist/cloud/lifeops-schedule-sync-client.d.ts +43 -0
  58. package/dist/cloud/lifeops-schedule-sync-client.d.ts.map +1 -0
  59. package/dist/cloud/lifeops-schedule-sync-client.js +180 -0
  60. package/dist/cloud/lifeops-schedule-sync-client.js.map +11 -0
  61. package/dist/cloud/lifeops-schedule-sync-contracts.d.ts +89 -0
  62. package/dist/cloud/lifeops-schedule-sync-contracts.d.ts.map +1 -0
  63. package/dist/cloud/lifeops-schedule-sync-contracts.js +39 -0
  64. package/dist/cloud/lifeops-schedule-sync-contracts.js.map +10 -0
  65. package/dist/cloud/managed-payment-clients.d.ts +166 -0
  66. package/dist/cloud/managed-payment-clients.d.ts.map +1 -0
  67. package/dist/cloud/managed-payment-clients.js +238 -0
  68. package/dist/cloud/managed-payment-clients.js.map +11 -0
  69. package/dist/cloud/null-observer.d.ts +35 -0
  70. package/dist/cloud/null-observer.d.ts.map +1 -0
  71. package/dist/cloud/null-observer.js +45 -0
  72. package/dist/cloud/null-observer.js.map +10 -0
  73. package/dist/cloud/reconnect.d.ts +26 -0
  74. package/dist/cloud/reconnect.d.ts.map +1 -0
  75. package/dist/cloud/reconnect.js +104 -0
  76. package/dist/cloud/reconnect.js.map +10 -0
  77. package/dist/cloud/setup-observer.d.ts +98 -0
  78. package/dist/cloud/setup-observer.d.ts.map +1 -0
  79. package/dist/cloud/setup-observer.js +2 -0
  80. package/dist/cloud/setup-observer.js.map +9 -0
  81. package/dist/cloud/validate-url.d.ts +2 -0
  82. package/dist/cloud/validate-url.d.ts.map +1 -0
  83. package/dist/cloud/validate-url.js +175 -0
  84. package/dist/cloud/validate-url.js.map +10 -0
  85. package/dist/cloud/x402-payment-handler.d.ts +85 -0
  86. package/dist/cloud/x402-payment-handler.d.ts.map +1 -0
  87. package/dist/cloud/x402-payment-handler.js +119 -0
  88. package/dist/cloud/x402-payment-handler.js.map +10 -0
  89. package/dist/cloud-providers/cloud-status.d.ts.map +1 -1
  90. package/dist/cloud-providers/cloud-status.js +78 -0
  91. package/dist/cloud-providers/cloud-status.js.map +10 -0
  92. package/dist/cloud-providers/container-health.d.ts.map +1 -1
  93. package/dist/cloud-providers/container-health.js +74 -0
  94. package/dist/cloud-providers/container-health.js.map +10 -0
  95. package/dist/cloud-providers/credit-balance.d.ts.map +1 -1
  96. package/dist/cloud-providers/credit-balance.js +85 -0
  97. package/dist/cloud-providers/credit-balance.js.map +10 -0
  98. package/dist/cloud-providers/index.d.ts.map +1 -1
  99. package/dist/cloud-providers/index.js +24 -0
  100. package/dist/cloud-providers/index.js.map +9 -0
  101. package/dist/cloud-providers/model-registry.d.ts.map +1 -1
  102. package/dist/cloud-providers/model-registry.js +71 -0
  103. package/dist/cloud-providers/model-registry.js.map +10 -0
  104. package/dist/cloud-setup.d.ts +36 -0
  105. package/dist/cloud-setup.d.ts.map +1 -0
  106. package/dist/cloud-setup.js +883 -0
  107. package/dist/cloud-setup.js.map +14 -0
  108. package/dist/cloud-voice-catalog.d.ts +65 -0
  109. package/dist/cloud-voice-catalog.d.ts.map +1 -0
  110. package/dist/cloud-voice-catalog.js +278 -0
  111. package/dist/cloud-voice-catalog.js.map +12 -0
  112. package/dist/index.browser.d.ts +15 -3
  113. package/dist/index.browser.d.ts.map +1 -1
  114. package/dist/index.d.ts +24 -0
  115. package/dist/index.d.ts.map +1 -1
  116. package/dist/index.js +9862 -0
  117. package/dist/index.js.map +77 -0
  118. package/dist/index.node.d.ts +21 -2
  119. package/dist/index.node.d.ts.map +1 -1
  120. package/dist/init.d.ts.map +1 -1
  121. package/dist/init.js +182 -0
  122. package/dist/init.js.map +12 -0
  123. package/dist/lib/cloud-connection.d.ts +77 -0
  124. package/dist/lib/cloud-connection.d.ts.map +1 -0
  125. package/dist/lib/cloud-connection.js +654 -0
  126. package/dist/lib/cloud-connection.js.map +14 -0
  127. package/dist/lib/cloud-secrets.d.ts +10 -0
  128. package/dist/lib/cloud-secrets.d.ts.map +1 -0
  129. package/dist/lib/cloud-secrets.js +36 -0
  130. package/dist/lib/cloud-secrets.js.map +10 -0
  131. package/dist/lib/config-env.d.ts +5 -0
  132. package/dist/lib/config-env.d.ts.map +1 -0
  133. package/dist/lib/config-env.js +191 -0
  134. package/dist/lib/config-env.js.map +11 -0
  135. package/dist/lib/config-like.d.ts +40 -0
  136. package/dist/lib/config-like.d.ts.map +1 -0
  137. package/dist/lib/config-like.js +103 -0
  138. package/dist/lib/config-like.js.map +10 -0
  139. package/dist/lib/credential-type-map.d.ts +53 -0
  140. package/dist/lib/credential-type-map.d.ts.map +1 -0
  141. package/dist/lib/credential-type-map.js +88 -0
  142. package/dist/lib/credential-type-map.js.map +10 -0
  143. package/dist/lib/feature-flags.d.ts +2 -0
  144. package/dist/lib/feature-flags.d.ts.map +1 -0
  145. package/dist/lib/feature-flags.js +40 -0
  146. package/dist/lib/feature-flags.js.map +10 -0
  147. package/dist/lib/http.d.ts +11 -0
  148. package/dist/lib/http.d.ts.map +1 -0
  149. package/dist/lib/http.js +107 -0
  150. package/dist/lib/http.js.map +10 -0
  151. package/dist/lib/server-cloud-tts.d.ts +21 -0
  152. package/dist/lib/server-cloud-tts.d.ts.map +1 -0
  153. package/dist/lib/server-cloud-tts.js +251 -0
  154. package/dist/lib/server-cloud-tts.js.map +10 -0
  155. package/dist/lib/state-paths.d.ts +4 -0
  156. package/dist/lib/state-paths.d.ts.map +1 -0
  157. package/dist/lib/state-paths.js +52 -0
  158. package/dist/lib/state-paths.js.map +10 -0
  159. package/dist/lib/tts-debug.d.ts +6 -0
  160. package/dist/lib/tts-debug.d.ts.map +1 -0
  161. package/dist/lib/tts-debug.js +24 -0
  162. package/dist/lib/tts-debug.js.map +9 -0
  163. package/dist/models/embeddings.d.ts.map +1 -1
  164. package/dist/models/embeddings.js +329 -0
  165. package/dist/models/embeddings.js.map +13 -0
  166. package/dist/models/image.d.ts.map +1 -1
  167. package/dist/models/image.js +401 -0
  168. package/dist/models/image.js.map +14 -0
  169. package/dist/models/index.d.ts +1 -2
  170. package/dist/models/index.d.ts.map +1 -1
  171. package/dist/models/index.js +1896 -0
  172. package/dist/models/index.js.map +19 -0
  173. package/dist/models/research.d.ts.map +1 -1
  174. package/dist/models/research.js +341 -0
  175. package/dist/models/research.js.map +13 -0
  176. package/dist/models/speech.d.ts +61 -3
  177. package/dist/models/speech.d.ts.map +1 -1
  178. package/dist/models/speech.js +429 -0
  179. package/dist/models/speech.js.map +13 -0
  180. package/dist/models/text.d.ts +111 -3
  181. package/dist/models/text.d.ts.map +1 -1
  182. package/dist/models/text.js +1173 -0
  183. package/dist/models/text.js.map +14 -0
  184. package/dist/models/tokenization.d.ts.map +1 -1
  185. package/dist/models/tokenization.js +65 -0
  186. package/dist/models/tokenization.js.map +10 -0
  187. package/dist/models/transcription.d.ts.map +1 -1
  188. package/dist/models/transcription.js +297 -0
  189. package/dist/models/transcription.js.map +13 -0
  190. package/dist/node/index.d.ts +2 -2
  191. package/dist/node/index.node.js +9189 -2295
  192. package/dist/node/index.node.js.map +68 -30
  193. package/dist/plugin.d.ts +20 -0
  194. package/dist/plugin.d.ts.map +1 -0
  195. package/dist/plugin.js +2937 -0
  196. package/dist/plugin.js.map +28 -0
  197. package/dist/providers/openai.d.ts.map +1 -1
  198. package/dist/providers/openai.js +136 -0
  199. package/dist/providers/openai.js.map +11 -0
  200. package/dist/register-routes.d.ts +2 -0
  201. package/dist/register-routes.d.ts.map +1 -0
  202. package/dist/register-routes.js +2938 -0
  203. package/dist/register-routes.js.map +29 -0
  204. package/dist/routes/cloud-billing-routes.d.ts +9 -0
  205. package/dist/routes/cloud-billing-routes.d.ts.map +1 -0
  206. package/dist/routes/cloud-billing-routes.js +764 -0
  207. package/dist/routes/cloud-billing-routes.js.map +15 -0
  208. package/dist/routes/cloud-coding-container-routes.d.ts +8 -0
  209. package/dist/routes/cloud-coding-container-routes.d.ts.map +1 -0
  210. package/dist/routes/cloud-coding-container-routes.js +214 -0
  211. package/dist/routes/cloud-coding-container-routes.js.map +11 -0
  212. package/dist/routes/cloud-compat-routes.d.ts +10 -0
  213. package/dist/routes/cloud-compat-routes.d.ts.map +1 -0
  214. package/dist/routes/cloud-compat-routes.js +495 -0
  215. package/dist/routes/cloud-compat-routes.js.map +15 -0
  216. package/dist/routes/cloud-features-routes.d.ts +9 -0
  217. package/dist/routes/cloud-features-routes.d.ts.map +1 -0
  218. package/dist/routes/cloud-features-routes.js +124 -0
  219. package/dist/routes/cloud-features-routes.js.map +11 -0
  220. package/dist/routes/cloud-provisioning.d.ts +14 -0
  221. package/dist/routes/cloud-provisioning.d.ts.map +1 -0
  222. package/dist/routes/cloud-provisioning.js +37 -0
  223. package/dist/routes/cloud-provisioning.js.map +10 -0
  224. package/dist/routes/cloud-relay-routes.d.ts +23 -0
  225. package/dist/routes/cloud-relay-routes.d.ts.map +1 -0
  226. package/dist/routes/cloud-relay-routes.js +142 -0
  227. package/dist/routes/cloud-relay-routes.js.map +11 -0
  228. package/dist/routes/cloud-routes-autonomous.d.ts +82 -0
  229. package/dist/routes/cloud-routes-autonomous.d.ts.map +1 -0
  230. package/dist/routes/cloud-routes-autonomous.js +1252 -0
  231. package/dist/routes/cloud-routes-autonomous.js.map +18 -0
  232. package/dist/routes/cloud-routes.d.ts +35 -0
  233. package/dist/routes/cloud-routes.d.ts.map +1 -0
  234. package/dist/routes/cloud-routes.js +2173 -0
  235. package/dist/routes/cloud-routes.js.map +23 -0
  236. package/dist/routes/cloud-status-routes-autonomous.d.ts +14 -0
  237. package/dist/routes/cloud-status-routes-autonomous.d.ts.map +1 -0
  238. package/dist/routes/cloud-status-routes-autonomous.js +349 -0
  239. package/dist/routes/cloud-status-routes-autonomous.js.map +13 -0
  240. package/dist/routes/cloud-status-routes.d.ts +4 -0
  241. package/dist/routes/cloud-status-routes.d.ts.map +1 -0
  242. package/dist/routes/cloud-status-routes.js +695 -0
  243. package/dist/routes/cloud-status-routes.js.map +15 -0
  244. package/dist/routes/home-remote-runner-access-url.d.ts +16 -0
  245. package/dist/routes/home-remote-runner-access-url.d.ts.map +1 -0
  246. package/dist/routes/home-remote-runner-access-url.js +91 -0
  247. package/dist/routes/home-remote-runner-access-url.js.map +10 -0
  248. package/dist/routes/travel-provider-relay-routes.d.ts +9 -0
  249. package/dist/routes/travel-provider-relay-routes.d.ts.map +1 -0
  250. package/dist/routes/travel-provider-relay-routes.js +358 -0
  251. package/dist/routes/travel-provider-relay-routes.js.map +14 -0
  252. package/dist/services/cloud-auth.d.ts +140 -5
  253. package/dist/services/cloud-auth.d.ts.map +1 -1
  254. package/dist/services/cloud-auth.js +368 -0
  255. package/dist/services/cloud-auth.js.map +12 -0
  256. package/dist/services/cloud-backup.d.ts.map +1 -1
  257. package/dist/services/cloud-backup.js +176 -0
  258. package/dist/services/cloud-backup.js.map +11 -0
  259. package/dist/services/cloud-bootstrap.d.ts +38 -0
  260. package/dist/services/cloud-bootstrap.d.ts.map +1 -0
  261. package/dist/services/cloud-bootstrap.js +84 -0
  262. package/dist/services/cloud-bootstrap.js.map +10 -0
  263. package/dist/services/cloud-bridge.d.ts +1 -1
  264. package/dist/services/cloud-bridge.d.ts.map +1 -1
  265. package/dist/services/cloud-bridge.js +308 -0
  266. package/dist/services/cloud-bridge.js.map +11 -0
  267. package/dist/services/cloud-container.d.ts +5 -1
  268. package/dist/services/cloud-container.d.ts.map +1 -1
  269. package/dist/services/cloud-container.js +292 -0
  270. package/dist/services/cloud-container.js.map +11 -0
  271. package/dist/services/cloud-credential-provider.d.ts +55 -0
  272. package/dist/services/cloud-credential-provider.d.ts.map +1 -0
  273. package/dist/services/cloud-credential-provider.js +190 -0
  274. package/dist/services/cloud-credential-provider.js.map +11 -0
  275. package/dist/services/cloud-managed-gateway-relay.d.ts +38 -0
  276. package/dist/services/cloud-managed-gateway-relay.d.ts.map +1 -0
  277. package/dist/services/cloud-managed-gateway-relay.js +479 -0
  278. package/dist/services/cloud-managed-gateway-relay.js.map +10 -0
  279. package/dist/services/cloud-model-registry.d.ts.map +1 -1
  280. package/dist/services/cloud-model-registry.js +175 -0
  281. package/dist/services/cloud-model-registry.js.map +10 -0
  282. package/dist/services/index.d.ts +3 -1
  283. package/dist/services/index.d.ts.map +1 -1
  284. package/dist/services/index.js +29 -0
  285. package/dist/services/index.js.map +9 -0
  286. package/dist/types/cloud.d.ts +42 -19
  287. package/dist/types/cloud.d.ts.map +1 -1
  288. package/dist/types/cloud.js +52 -0
  289. package/dist/types/cloud.js.map +10 -0
  290. package/dist/types/index.d.ts +1 -1
  291. package/dist/types/index.d.ts.map +1 -1
  292. package/dist/types/index.js +24 -0
  293. package/dist/types/index.js.map +9 -0
  294. package/dist/utils/cloud-api.d.ts +2 -27
  295. package/dist/utils/cloud-api.d.ts.map +1 -1
  296. package/dist/utils/cloud-api.js +33 -0
  297. package/dist/utils/cloud-api.js.map +10 -0
  298. package/dist/utils/cloud-sdk/client.d.ts +133 -0
  299. package/dist/utils/cloud-sdk/client.d.ts.map +1 -0
  300. package/dist/utils/cloud-sdk/client.js +3693 -0
  301. package/dist/utils/cloud-sdk/client.js.map +13 -0
  302. package/dist/utils/cloud-sdk/http.d.ts +37 -0
  303. package/dist/utils/cloud-sdk/http.d.ts.map +1 -0
  304. package/dist/utils/cloud-sdk/http.js +237 -0
  305. package/dist/utils/cloud-sdk/http.js.map +11 -0
  306. package/dist/utils/cloud-sdk/index.d.ts +6 -0
  307. package/dist/utils/cloud-sdk/index.d.ts.map +1 -0
  308. package/dist/utils/cloud-sdk/index.js +29 -0
  309. package/dist/utils/cloud-sdk/index.js.map +9 -0
  310. package/dist/utils/cloud-sdk/public-routes.d.ts +5563 -0
  311. package/dist/utils/cloud-sdk/public-routes.d.ts.map +1 -0
  312. package/dist/utils/cloud-sdk/public-routes.js +3048 -0
  313. package/dist/utils/cloud-sdk/public-routes.js.map +10 -0
  314. package/dist/utils/cloud-sdk/types.cloud-api.d.ts +101 -0
  315. package/dist/utils/cloud-sdk/types.cloud-api.d.ts.map +1 -0
  316. package/dist/utils/cloud-sdk/types.cloud-api.js +2 -0
  317. package/dist/utils/cloud-sdk/types.cloud-api.js.map +9 -0
  318. package/dist/utils/cloud-sdk/types.d.ts +653 -0
  319. package/dist/utils/cloud-sdk/types.d.ts.map +1 -0
  320. package/dist/utils/cloud-sdk/types.js +29 -0
  321. package/dist/utils/cloud-sdk/types.js.map +10 -0
  322. package/dist/utils/config.d.ts +16 -3
  323. package/dist/utils/config.d.ts.map +1 -1
  324. package/dist/utils/config.js +147 -0
  325. package/dist/utils/config.js.map +10 -0
  326. package/dist/utils/events.d.ts +23 -2
  327. package/dist/utils/events.d.ts.map +1 -1
  328. package/dist/utils/events.js +45 -0
  329. package/dist/utils/events.js.map +10 -0
  330. package/dist/utils/helpers.d.ts.map +1 -1
  331. package/dist/utils/helpers.js +103 -0
  332. package/dist/utils/helpers.js.map +10 -0
  333. package/dist/utils/responses-output.d.ts +13 -0
  334. package/dist/utils/responses-output.d.ts.map +1 -0
  335. package/dist/utils/responses-output.js +102 -0
  336. package/dist/utils/responses-output.js.map +10 -0
  337. package/dist/utils/sdk-client.d.ts +5 -0
  338. package/dist/utils/sdk-client.d.ts.map +1 -0
  339. package/dist/utils/sdk-client.js +157 -0
  340. package/dist/utils/sdk-client.js.map +11 -0
  341. package/dist/utils/waifu-metering.d.ts +108 -0
  342. package/dist/utils/waifu-metering.d.ts.map +1 -0
  343. package/dist/utils/waifu-metering.js +166 -0
  344. package/dist/utils/waifu-metering.js.map +10 -0
  345. package/package.json +139 -21
  346. package/src/cloud/auth-service-types.ts +24 -0
  347. package/src/cloud/auth.ts +175 -0
  348. package/src/cloud/backup.ts +46 -0
  349. package/src/cloud/base-url.ts +6 -0
  350. package/src/cloud/bridge-client.ts +602 -0
  351. package/src/cloud/clack-observer.ts +189 -0
  352. package/src/cloud/cloud-api-key.ts +80 -0
  353. package/src/cloud/cloud-manager.ts +163 -0
  354. package/src/cloud/cloud-proxy.ts +52 -0
  355. package/src/cloud/cloud-wallet.ts +341 -0
  356. package/src/cloud/duffel-client.ts +847 -0
  357. package/src/cloud/index.ts +38 -0
  358. package/src/cloud/lifeops-schedule-sync-client.ts +245 -0
  359. package/src/cloud/lifeops-schedule-sync-contracts.ts +124 -0
  360. package/src/cloud/managed-payment-clients.ts +374 -0
  361. package/src/cloud/null-observer.ts +45 -0
  362. package/src/cloud/reconnect.ts +111 -0
  363. package/src/cloud/setup-observer.ts +125 -0
  364. package/src/cloud/validate-url.ts +187 -0
  365. package/src/cloud/x402-payment-handler.ts +215 -0
  366. package/src/cloud-providers/cloud-status.ts +75 -0
  367. package/src/cloud-providers/container-health.ts +68 -0
  368. package/src/cloud-providers/credit-balance.ts +70 -0
  369. package/src/cloud-providers/index.ts +3 -0
  370. package/src/cloud-providers/model-registry.ts +74 -0
  371. package/src/cloud-setup.ts +531 -0
  372. package/src/cloud-voice-catalog.test.ts +254 -0
  373. package/src/cloud-voice-catalog.ts +246 -0
  374. package/src/index.browser.ts +39 -0
  375. package/src/index.node.ts +69 -0
  376. package/src/index.ts +419 -0
  377. package/src/init.ts +39 -0
  378. package/src/lib/cloud-connection.ts +661 -0
  379. package/src/lib/cloud-secrets.ts +14 -0
  380. package/src/lib/config-env.ts +168 -0
  381. package/src/lib/config-like.ts +149 -0
  382. package/src/lib/credential-type-map.ts +130 -0
  383. package/src/lib/feature-flags.ts +26 -0
  384. package/src/lib/http.ts +122 -0
  385. package/src/lib/server-cloud-tts.ts +301 -0
  386. package/src/lib/state-paths.ts +28 -0
  387. package/src/lib/tts-debug.ts +5 -0
  388. package/src/models/embeddings.ts +298 -0
  389. package/src/models/image.ts +234 -0
  390. package/src/models/index.ts +16 -0
  391. package/src/models/research.ts +275 -0
  392. package/src/models/speech.ts +324 -0
  393. package/src/models/text.ts +1493 -0
  394. package/src/models/tokenization.ts +67 -0
  395. package/src/models/transcription.ts +101 -0
  396. package/src/plugin.ts +281 -0
  397. package/src/providers/openai.ts +16 -0
  398. package/src/register-routes.ts +6 -0
  399. package/src/routes/cloud-billing-routes.ts +744 -0
  400. package/src/routes/cloud-coding-container-routes.ts +198 -0
  401. package/src/routes/cloud-compat-routes.ts +304 -0
  402. package/src/routes/cloud-features-routes.ts +57 -0
  403. package/src/routes/cloud-provisioning.ts +37 -0
  404. package/src/routes/cloud-relay-routes.ts +135 -0
  405. package/src/routes/cloud-routes-autonomous.ts +993 -0
  406. package/src/routes/cloud-routes.ts +637 -0
  407. package/src/routes/cloud-status-routes-autonomous.ts +238 -0
  408. package/src/routes/cloud-status-routes.ts +73 -0
  409. package/src/routes/home-remote-runner-access-url.ts +83 -0
  410. package/src/routes/travel-provider-relay-routes.ts +193 -0
  411. package/src/services/cloud-auth.ts +574 -0
  412. package/src/services/cloud-backup.ts +208 -0
  413. package/src/services/cloud-bootstrap.ts +106 -0
  414. package/src/services/cloud-bridge.ts +386 -0
  415. package/src/services/cloud-container.ts +390 -0
  416. package/src/services/cloud-credential-provider.ts +210 -0
  417. package/src/services/cloud-managed-gateway-relay.ts +663 -0
  418. package/src/services/cloud-model-registry.ts +202 -0
  419. package/src/services/index.ts +17 -0
  420. package/{types → src/types}/cloud.ts +74 -29
  421. package/{types → src/types}/index.ts +25 -0
  422. package/src/utils/cloud-api.ts +10 -0
  423. package/src/utils/cloud-sdk/client.ts +774 -0
  424. package/src/utils/cloud-sdk/http.ts +291 -0
  425. package/src/utils/cloud-sdk/index.ts +23 -0
  426. package/src/utils/cloud-sdk/public-routes.ts +5238 -0
  427. package/src/utils/cloud-sdk/types.cloud-api.ts +120 -0
  428. package/src/utils/cloud-sdk/types.ts +760 -0
  429. package/src/utils/config.ts +193 -0
  430. package/src/utils/events.ts +65 -0
  431. package/src/utils/helpers.ts +107 -0
  432. package/src/utils/responses-output.ts +115 -0
  433. package/src/utils/sdk-client.ts +41 -0
  434. package/src/utils/waifu-metering.ts +302 -0
  435. package/dist/actions/check-credits.d.ts +0 -6
  436. package/dist/actions/check-credits.d.ts.map +0 -1
  437. package/dist/actions/freeze-agent.d.ts +0 -9
  438. package/dist/actions/freeze-agent.d.ts.map +0 -1
  439. package/dist/actions/index.d.ts +0 -5
  440. package/dist/actions/index.d.ts.map +0 -1
  441. package/dist/actions/provision-agent.d.ts +0 -8
  442. package/dist/actions/provision-agent.d.ts.map +0 -1
  443. package/dist/actions/resume-agent.d.ts +0 -9
  444. package/dist/actions/resume-agent.d.ts.map +0 -1
  445. package/dist/build.d.ts +0 -3
  446. package/dist/build.d.ts.map +0 -1
  447. package/dist/generated/specs/specs.d.ts +0 -55
  448. package/dist/generated/specs/specs.d.ts.map +0 -1
  449. package/dist/models/object.d.ts +0 -4
  450. package/dist/models/object.d.ts.map +0 -1
  451. package/dist/utils/forwarded-settings.d.ts +0 -8
  452. package/dist/utils/forwarded-settings.d.ts.map +0 -1
@@ -0,0 +1,1493 @@
1
+ import type {
2
+ GenerateTextParams,
3
+ IAgentRuntime,
4
+ ModelTypeName,
5
+ TextStreamResult,
6
+ TokenUsage,
7
+ } from "@elizaos/core";
8
+ import {
9
+ buildCanonicalSystemPrompt,
10
+ logger,
11
+ ModelType,
12
+ recordInferenceSpan,
13
+ renderChatMessagesForPrompt,
14
+ resolveEffectiveSystemPrompt,
15
+ Semaphore,
16
+ timeInferenceSpan,
17
+ } from "@elizaos/core";
18
+ import {
19
+ getActionPlannerModel,
20
+ getLargeModel,
21
+ getMediumModel,
22
+ getMegaModel,
23
+ getNanoModel,
24
+ getResponseHandlerModel,
25
+ getSmallModel,
26
+ } from "../utils/config";
27
+ import { emitModelUsageEvent } from "../utils/events";
28
+ import { extractResponsesOutputText } from "../utils/responses-output";
29
+ import { createCloudApiClient } from "../utils/sdk-client";
30
+
31
+ const TEXT_NANO_MODEL_TYPE = (ModelType.TEXT_NANO ?? "TEXT_NANO") as ModelTypeName;
32
+ const TEXT_MEDIUM_MODEL_TYPE = (ModelType.TEXT_MEDIUM ?? "TEXT_MEDIUM") as ModelTypeName;
33
+ const TEXT_SMALL_MODEL_TYPE = ModelType.TEXT_SMALL;
34
+ const TEXT_LARGE_MODEL_TYPE = ModelType.TEXT_LARGE;
35
+ const TEXT_MEGA_MODEL_TYPE = (ModelType.TEXT_MEGA ?? "TEXT_MEGA") as ModelTypeName;
36
+ const RESPONSE_HANDLER_MODEL_TYPE = (ModelType.RESPONSE_HANDLER ??
37
+ "RESPONSE_HANDLER") as ModelTypeName;
38
+ const ACTION_PLANNER_MODEL_TYPE = (ModelType.ACTION_PLANNER ?? "ACTION_PLANNER") as ModelTypeName;
39
+
40
+ /**
41
+ * Per-process cap on CONCURRENT native cloud text calls.
42
+ *
43
+ * Covers BOTH native cloud text routes that share the one cerebras key:
44
+ * the `/chat/completions` round-trip (native-transport callers) AND the
45
+ * `/responses` round-trip (bare-`{ prompt }` callers, incl. the primary reply
46
+ * action). Same model name -> same shared key -> same concurrency budget, so
47
+ * both routes must funnel through this one semaphore or a bare-prompt call can
48
+ * still push the key over its limit.
49
+ *
50
+ * The per-turn burst that triggers the 429 comes from the prompt BATCHER
51
+ * (`dynamicPromptExecFromState`, which always sets providerOptions -> native
52
+ * `/chat/completions`) and the merged evaluator call — NOT from composeState
53
+ * providers (no provider calls `useModel` during composeState). Firing those
54
+ * at once overruns the ONE shared cerebras key's concurrent-request limit
55
+ * -> 429 -> 3 retries x backoff -> 30-63s of latency. Capping in-flight calls
56
+ * through a small semaphore keeps each call ~3s with no 429, without needing
57
+ * more keys or backend changes.
58
+ *
59
+ * Default is a SAFETY CEILING, not full serialization: the paid cerebras key
60
+ * (1000 req/min) and leaner per-turn call counts make the 429 risk small, so
61
+ * the default of 8 leaves the typical 1-3 concurrent calls/turn untouched while
62
+ * still bounding a pathological burst. The limiter is process-global and keys
63
+ * on native transport, not the model, so it also bounds non-cerebras native
64
+ * calls (e.g. zai-glm-4.7) — a high default avoids serializing those. Set
65
+ * `ELIZAOS_CLOUD_NATIVE_CONCURRENCY` (positive integer) to tighten it (1 = fully
66
+ * serialize) on a cerebras-bottlenecked single-key deployment, or raise it for
67
+ * more parallelism. Embeddings use a SEPARATE `/embeddings` route
68
+ * (embeddings.ts) and are intentionally NOT gated here.
69
+ */
70
+ const NATIVE_CONCURRENCY_ENV = "ELIZAOS_CLOUD_NATIVE_CONCURRENCY";
71
+ const DEFAULT_NATIVE_CONCURRENCY = 8;
72
+
73
+ /**
74
+ * Client-side timeout for cloud text round-trips. Without this the handler
75
+ * passes no `timeoutMs`/`signal` to `requestRaw`, so a hung/slow gateway holds
76
+ * the concurrency permit AND stalls the whole turn until fetch's own (very
77
+ * long) default. `ELIZAOS_CLOUD_TEXT_TIMEOUT_MS` overrides; `0`/negative opts
78
+ * out (no client-side timeout).
79
+ */
80
+ const TEXT_TIMEOUT_ENV = "ELIZAOS_CLOUD_TEXT_TIMEOUT_MS";
81
+ const DEFAULT_TEXT_TIMEOUT_MS = 120_000;
82
+
83
+ export function resolveTextTimeoutMs(): number | undefined {
84
+ const raw =
85
+ typeof process !== "undefined" ? process.env[TEXT_TIMEOUT_ENV] : undefined;
86
+ if (raw === undefined || raw.trim() === "") return DEFAULT_TEXT_TIMEOUT_MS;
87
+ const parsed = Number.parseInt(raw, 10);
88
+ if (!Number.isFinite(parsed)) return DEFAULT_TEXT_TIMEOUT_MS;
89
+ return parsed <= 0 ? undefined : parsed;
90
+ }
91
+
92
+ /**
93
+ * Token-by-token streaming of the native `/chat/completions` round-trip. On by
94
+ * default so the user-visible reply renders from the first token instead of
95
+ * waiting for the whole generation. `ELIZAOS_CLOUD_STREAMING=0`/`false`/`off`
96
+ * forces the buffered path (kill-switch). Streaming only engages when the
97
+ * runtime actually requests it (`params.stream`), so non-streaming callers
98
+ * (connectors with no UI stream) are unaffected.
99
+ */
100
+ const STREAMING_ENV = "ELIZAOS_CLOUD_STREAMING";
101
+
102
+ export function resolveStreamingEnabled(): boolean {
103
+ const raw = typeof process !== "undefined" ? process.env[STREAMING_ENV] : undefined;
104
+ if (raw === undefined) return true;
105
+ const v = raw.trim().toLowerCase();
106
+ return v !== "0" && v !== "false" && v !== "off";
107
+ }
108
+
109
+ /**
110
+ * Combine the runtime's abort signal with the client-side timeout into one
111
+ * signal for `requestRaw`. A stream is long-lived, so it should abort on EITHER
112
+ * a caller cancel OR the timeout — `requestRaw` honors only a single signal, so
113
+ * merge them here.
114
+ */
115
+ function buildStreamAbortSignal(
116
+ abortSignal: AbortSignal | undefined,
117
+ timeoutMs: number | undefined
118
+ ): AbortSignal | undefined {
119
+ const timeoutSig =
120
+ typeof timeoutMs === "number" && timeoutMs > 0
121
+ ? AbortSignal.timeout(timeoutMs)
122
+ : undefined;
123
+ if (abortSignal && timeoutSig) return AbortSignal.any([abortSignal, timeoutSig]);
124
+ return abortSignal ?? timeoutSig;
125
+ }
126
+
127
+ let nativeChatLimiter: Semaphore | null = null;
128
+
129
+ function resolveNativeConcurrency(): number {
130
+ const raw =
131
+ typeof process !== "undefined" ? process.env[NATIVE_CONCURRENCY_ENV] : undefined;
132
+ const parsed = raw ? Number.parseInt(raw, 10) : Number.NaN;
133
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_NATIVE_CONCURRENCY;
134
+ }
135
+
136
+ function getNativeChatLimiter(): Semaphore {
137
+ if (!nativeChatLimiter) {
138
+ nativeChatLimiter = new Semaphore(resolveNativeConcurrency());
139
+ }
140
+ return nativeChatLimiter;
141
+ }
142
+
143
+ /**
144
+ * Run a single cerebras-bound network round-trip under the shared per-process
145
+ * concurrency cap. Hold the permit only across `fn` (the `requestRaw` call);
146
+ * release the instant the server responds so response-body parsing runs
147
+ * unguarded. `finally` frees the permit even on throw so a failed call never
148
+ * starves the queue. Used by BOTH native text routes (`/chat/completions` and
149
+ * `/responses`) so every cerebras text call shares one budget.
150
+ *
151
+ * Exported for unit tests that drive the shared cap directly.
152
+ *
153
+ * `label` (e.g. `responses` / `chat/completions`) tags the latency spans this
154
+ * records on the active per-turn inference timer: `cloud.semaphore-wait` (time
155
+ * spent queued for a permit — non-zero means the cap is serializing) and
156
+ * `cloud.http:<label>` (the network round-trip). Both are no-ops when no turn
157
+ * timer is active.
158
+ */
159
+ export async function withNativeChatLimit<T>(
160
+ fn: () => Promise<T>,
161
+ label = "native"
162
+ ): Promise<T> {
163
+ const limiter = getNativeChatLimiter();
164
+ const waitStartedAt = Date.now();
165
+ await limiter.acquire();
166
+ recordInferenceSpan("cloud.semaphore-wait", Date.now() - waitStartedAt, {
167
+ route: label,
168
+ });
169
+ try {
170
+ return await timeInferenceSpan(`cloud.http:${label}`, fn, { route: label });
171
+ } finally {
172
+ limiter.release();
173
+ }
174
+ }
175
+
176
+ /**
177
+ * Test-only: discard the cached limiter so the next call re-reads the env knob.
178
+ * Production code never needs this — the knob is read once per process.
179
+ */
180
+ export function __resetNativeChatLimiterForTests(): void {
181
+ nativeChatLimiter = null;
182
+ }
183
+
184
+ type ResponsesApiResponse = Record<string, unknown> & {
185
+ error?: {
186
+ message?: string;
187
+ };
188
+ usage?: {
189
+ input_tokens?: number;
190
+ output_tokens?: number;
191
+ total_tokens?: number;
192
+ } & Record<string, unknown>;
193
+ };
194
+
195
+ /**
196
+ * Models that are known to be reasoning-class and don't support temperature.
197
+ * These are models that use chain-of-thought internally and reject
198
+ */
199
+ const REASONING_MODEL_PATTERNS = [
200
+ "o1",
201
+ "o3",
202
+ "o4",
203
+ "deepseek-r1",
204
+ "deepseek-reasoner",
205
+ "claude-opus-4.7",
206
+ "claude-opus-4-7",
207
+ "gpt-5",
208
+ ] as const;
209
+ type ChatAttachment = {
210
+ data: string | Uint8Array | URL;
211
+ mediaType: string;
212
+ filename?: string;
213
+ };
214
+
215
+ type GenerateTextParamsWithAttachments = GenerateTextParams & {
216
+ attachments?: ChatAttachment[];
217
+ };
218
+
219
+ type GenerateTextParamsWithNativeOptions = GenerateTextParamsWithAttachments & {
220
+ messages?: unknown[];
221
+ tools?: unknown;
222
+ toolChoice?: unknown;
223
+ responseSchema?: unknown;
224
+ providerOptions?: Record<string, unknown>;
225
+ };
226
+
227
+ type NativeTokenUsage = {
228
+ promptTokens: number;
229
+ completionTokens: number;
230
+ totalTokens: number;
231
+ cachedPromptTokens?: number;
232
+ cacheReadInputTokens?: number;
233
+ cacheCreationInputTokens?: number;
234
+ };
235
+
236
+ type NativeGenerateTextResult = {
237
+ text: string;
238
+ toolCalls: unknown[];
239
+ finishReason?: string;
240
+ usage?: NativeTokenUsage;
241
+ providerMetadata?: unknown;
242
+ };
243
+
244
+ type NativeGenerateTextModelResult = NativeGenerateTextResult & string;
245
+
246
+ type NativeToolCall = {
247
+ type: "tool-call";
248
+ toolCallId: string;
249
+ toolName: string;
250
+ input: unknown;
251
+ };
252
+
253
+ type ChatCompletionsResponse = Record<string, unknown> & {
254
+ error?: {
255
+ message?: string;
256
+ };
257
+ choices?: Array<{
258
+ text?: string;
259
+ finish_reason?: string;
260
+ message?: {
261
+ content?: unknown;
262
+ tool_calls?: unknown[];
263
+ };
264
+ }>;
265
+ usage?: Record<string, unknown>;
266
+ };
267
+
268
+ /**
269
+ * Eliza-Cloud-hosted `eliza-1` model ids that run a fork of llama-server (or
270
+ * vLLM with the eliza1 parsers) capable of honoring the `x-eliza-span-samplers`
271
+ * header. Other upstreams (OpenAI / Anthropic / generic OpenRouter) strip
272
+ * unknown headers safely, but to keep the wire surface narrow we only attach
273
+ * the per-span sampler plan when the resolved model is one we know honors it.
274
+ *
275
+ * The "we know" bound is conservative — extend the prefix list when a new
276
+ * fork-built deployment lands. The fallback is "do not send the header" which
277
+ * preserves today's behavior on every other provider.
278
+ */
279
+ const SPAN_SAMPLER_HONORING_MODEL_PREFIXES = [
280
+ "vast/eliza-1-",
281
+ "elizaos/eliza-1-",
282
+ "eliza-1-",
283
+ ] as const;
284
+
285
+ function isSpanSamplerHonoringModel(modelName: string): boolean {
286
+ const lower = modelName.toLowerCase();
287
+ return SPAN_SAMPLER_HONORING_MODEL_PREFIXES.some((prefix) =>
288
+ lower.startsWith(prefix),
289
+ );
290
+ }
291
+
292
+ /**
293
+ * Build the `x-eliza-span-samplers` HTTP header value from a {@link SpanSamplerPlan}.
294
+ * Returns `undefined` when there is no plan or no overrides — narrow the wire
295
+ * surface so non-eliza providers never see a stray fork-extension header.
296
+ *
297
+ * Wire schema (snake_case):
298
+ * { overrides: [{ span_index, temperature, top_k?, top_p? }, ...], strict?: boolean }
299
+ */
300
+ function buildSpanSamplerHeader(
301
+ plan: GenerateTextParams["spanSamplerPlan"],
302
+ ): string | undefined {
303
+ if (!plan || plan.overrides.length === 0) return undefined;
304
+ const overrides = plan.overrides.map((o) => {
305
+ const wire: Record<string, unknown> = {
306
+ span_index: o.spanIndex,
307
+ temperature: o.temperature,
308
+ };
309
+ if (typeof o.topK === "number") wire.top_k = o.topK;
310
+ if (typeof o.topP === "number") wire.top_p = o.topP;
311
+ return wire;
312
+ });
313
+ const body: Record<string, unknown> = { overrides };
314
+ if (plan.strict === true) body.strict = true;
315
+ return JSON.stringify(body);
316
+ }
317
+
318
+ /**
319
+ * Extract the authoritative USD cost the metered cloud gateway charged for a
320
+ * request, when it surfaces one. The gateway is the only honest source of USD
321
+ * (it owns the model-pricing table + platform markup); we prefer it over any
322
+ * client-side token estimate. Checks the response body `usage.cost_usd` first,
323
+ * then the `X-Eliza-Cost-Usd` response header. Returns undefined when neither
324
+ * is present so consumers fall back to a token-based estimate.
325
+ */
326
+ function extractCostUsd(
327
+ usage: unknown,
328
+ response?: { headers?: { get?: (name: string) => string | null } }
329
+ ): number | undefined {
330
+ const fromBody = firstNumber(
331
+ asRecord(usage).cost_usd,
332
+ asRecord(usage).costUsd,
333
+ asRecord(usage).cost
334
+ );
335
+ if (typeof fromBody === "number" && Number.isFinite(fromBody)) {
336
+ return fromBody;
337
+ }
338
+ const header = response?.headers?.get?.("X-Eliza-Cost-Usd");
339
+ if (header) {
340
+ const parsed = Number(header);
341
+ if (Number.isFinite(parsed)) return parsed;
342
+ }
343
+ return undefined;
344
+ }
345
+
346
+ function isReasoningModel(modelName: string): boolean {
347
+ const lower = modelName.toLowerCase();
348
+ return REASONING_MODEL_PATTERNS.some((pattern) => lower.includes(pattern));
349
+ }
350
+
351
+ function isRecord(value: unknown): value is Record<string, unknown> {
352
+ return typeof value === "object" && value !== null && !Array.isArray(value);
353
+ }
354
+
355
+ function asRecord(value: unknown): Record<string, unknown> {
356
+ return isRecord(value) ? value : {};
357
+ }
358
+
359
+ function recordAt(value: Record<string, unknown>, key: string): Record<string, unknown> {
360
+ return asRecord(value[key]);
361
+ }
362
+
363
+ function firstString(...values: unknown[]): string | undefined {
364
+ for (const value of values) {
365
+ if (typeof value === "string" && value.trim().length > 0) {
366
+ return value.trim();
367
+ }
368
+ }
369
+ return undefined;
370
+ }
371
+
372
+ function firstNumber(...values: unknown[]): number | undefined {
373
+ for (const value of values) {
374
+ if (typeof value === "number" && Number.isFinite(value)) {
375
+ return value;
376
+ }
377
+ if (typeof value === "string" && value.trim().length > 0) {
378
+ const parsed = Number(value);
379
+ if (Number.isFinite(parsed)) {
380
+ return parsed;
381
+ }
382
+ }
383
+ }
384
+ return undefined;
385
+ }
386
+
387
+ function parseJsonIfPossible(value: unknown): unknown {
388
+ if (typeof value !== "string") {
389
+ return value ?? {};
390
+ }
391
+ try {
392
+ return JSON.parse(value) as unknown;
393
+ } catch {
394
+ return value;
395
+ }
396
+ }
397
+
398
+ function stringifyMessageContent(content: unknown): string {
399
+ if (typeof content === "string") {
400
+ return content;
401
+ }
402
+ if (content == null) {
403
+ return "";
404
+ }
405
+ return typeof content === "object" ? JSON.stringify(content) : String(content);
406
+ }
407
+
408
+ function hasNativeTransportOptions(params: GenerateTextParamsWithNativeOptions): boolean {
409
+ return Boolean(
410
+ params.messages ||
411
+ params.tools ||
412
+ params.toolChoice ||
413
+ params.responseSchema ||
414
+ params.providerOptions
415
+ );
416
+ }
417
+
418
+ function shouldReturnNativeResult(params: GenerateTextParamsWithNativeOptions): boolean {
419
+ return Boolean(params.messages || params.tools || params.toolChoice || params.responseSchema);
420
+ }
421
+
422
+ function buildNativeMessages(
423
+ params: GenerateTextParamsWithNativeOptions,
424
+ promptText: string,
425
+ systemPrompt?: string
426
+ ): Array<Record<string, unknown>> {
427
+ if (Array.isArray(params.messages) && params.messages.length > 0) {
428
+ const messages = params.messages.map((message) =>
429
+ isRecord(message)
430
+ ? { ...message }
431
+ : { role: "user", content: stringifyMessageContent(message) }
432
+ );
433
+ const first = asRecord(messages[0]);
434
+ if (systemPrompt && first.role !== "system") {
435
+ return [{ role: "system", content: systemPrompt }, ...messages];
436
+ }
437
+ return messages;
438
+ }
439
+
440
+ const messages: Array<Record<string, unknown>> = [];
441
+ if (systemPrompt) {
442
+ messages.push({ role: "system", content: systemPrompt });
443
+ }
444
+ messages.push({ role: "user", content: promptText });
445
+ return messages;
446
+ }
447
+
448
+ function unwrapJsonSchema(value: unknown): unknown {
449
+ const record = asRecord(value);
450
+ return record.schema ?? record.jsonSchema ?? value;
451
+ }
452
+
453
+ // Normalize a single tool entry into the OpenAI `{ type, function }` wire
454
+ // shape. Accepts BOTH the already-nested form (`{ type: "function", function:
455
+ // { name, parameters } }`) and core's FLAT `ToolDefinition` envelope
456
+ // (`{ name, type: "function", parameters }`, e.g. createHandleResponseTool /
457
+ // the action planner). Returning the flat form verbatim made the cloud gateway
458
+ // read `tool.function.name` on an undefined `function` → "Cannot read
459
+ // properties of undefined (reading 'name')". Returns undefined for entries with
460
+ // no resolvable name so they are dropped rather than crashing downstream.
461
+ function normalizeNativeToolEntry(
462
+ rawTool: unknown,
463
+ fallbackName?: string
464
+ ): Record<string, unknown> | undefined {
465
+ const tool = asRecord(rawTool);
466
+ const nested = asRecord(tool.function);
467
+ const name = firstString(nested.name, tool.name, fallbackName);
468
+ if (!name) {
469
+ return undefined;
470
+ }
471
+ const description = firstString(nested.description, tool.description);
472
+ const inputSchema = unwrapJsonSchema(
473
+ nested.parameters ??
474
+ tool.inputSchema ??
475
+ tool.parameters ??
476
+ tool.schema ?? { type: "object" }
477
+ );
478
+ return {
479
+ type: "function",
480
+ function: {
481
+ name,
482
+ ...(description ? { description } : {}),
483
+ parameters: inputSchema,
484
+ },
485
+ };
486
+ }
487
+
488
+ export function normalizeNativeTools(tools: unknown): unknown[] | undefined {
489
+ if (!tools) {
490
+ return undefined;
491
+ }
492
+
493
+ if (Array.isArray(tools)) {
494
+ const normalized = tools
495
+ .map((tool) => normalizeNativeToolEntry(tool))
496
+ .filter((tool): tool is Record<string, unknown> => tool !== undefined);
497
+ return normalized.length > 0 ? normalized : undefined;
498
+ }
499
+
500
+ const toolSet = asRecord(tools);
501
+ const normalized: unknown[] = [];
502
+ for (const [name, rawTool] of Object.entries(toolSet)) {
503
+ const entry = normalizeNativeToolEntry(rawTool, name);
504
+ if (entry) {
505
+ normalized.push(entry);
506
+ }
507
+ }
508
+
509
+ return normalized.length > 0 ? normalized : undefined;
510
+ }
511
+
512
+ function normalizeNativeToolChoice(toolChoice: unknown): unknown {
513
+ if (!toolChoice) {
514
+ return undefined;
515
+ }
516
+
517
+ if (
518
+ typeof toolChoice === "string" &&
519
+ (toolChoice === "auto" || toolChoice === "none" || toolChoice === "required")
520
+ ) {
521
+ return toolChoice;
522
+ }
523
+
524
+ const choice = asRecord(toolChoice);
525
+ if (choice.type === "function") {
526
+ return toolChoice;
527
+ }
528
+ if (choice.type === "tool") {
529
+ const toolName = firstString(choice.toolName, choice.name);
530
+ return toolName ? { type: "function", function: { name: toolName } } : toolChoice;
531
+ }
532
+
533
+ const functionChoice = asRecord(choice.function);
534
+ const toolName = firstString(choice.toolName, choice.name, functionChoice.name);
535
+ return toolName ? { type: "function", function: { name: toolName } } : toolChoice;
536
+ }
537
+
538
+ function buildNativeResponseFormat(responseSchema: unknown): unknown {
539
+ if (!responseSchema) {
540
+ return undefined;
541
+ }
542
+
543
+ const schemaRecord = asRecord(responseSchema);
544
+ if (schemaRecord.responseFormat) {
545
+ return schemaRecord.responseFormat;
546
+ }
547
+
548
+ const schemaOptions =
549
+ "schema" in schemaRecord
550
+ ? {
551
+ schema: schemaRecord.schema,
552
+ name: firstString(schemaRecord.name) ?? "structured_response",
553
+ description: firstString(schemaRecord.description),
554
+ }
555
+ : { schema: responseSchema, name: "structured_response", description: undefined };
556
+
557
+ return {
558
+ type: "json_schema",
559
+ json_schema: {
560
+ name: schemaOptions.name,
561
+ ...(schemaOptions.description ? { description: schemaOptions.description } : {}),
562
+ schema: schemaOptions.schema,
563
+ },
564
+ };
565
+ }
566
+
567
+ function resolvePromptCacheKey(providerOptions: Record<string, unknown>): string | undefined {
568
+ const eliza = recordAt(providerOptions, "eliza");
569
+ const openrouter = recordAt(providerOptions, "openrouter");
570
+ const openai = recordAt(providerOptions, "openai");
571
+ const cerebras = recordAt(providerOptions, "cerebras");
572
+
573
+ return firstString(
574
+ providerOptions.promptCacheKey,
575
+ providerOptions.prompt_cache_key,
576
+ eliza.promptCacheKey,
577
+ eliza.prompt_cache_key,
578
+ openrouter.promptCacheKey,
579
+ openrouter.prompt_cache_key,
580
+ openai.promptCacheKey,
581
+ openai.prompt_cache_key,
582
+ cerebras.promptCacheKey,
583
+ cerebras.prompt_cache_key
584
+ );
585
+ }
586
+
587
+ function resolveNativeProviderOptions(
588
+ params: GenerateTextParamsWithNativeOptions
589
+ ): Record<string, unknown> | undefined {
590
+ const raw = asRecord(params.providerOptions);
591
+ if (Object.keys(raw).length === 0) {
592
+ return undefined;
593
+ }
594
+
595
+ const { agentName: _agentName, eliza: _eliza, ...rest } = raw;
596
+ const providerOptions: Record<string, unknown> = { ...rest };
597
+ const promptCacheKey = resolvePromptCacheKey(raw);
598
+
599
+ if (promptCacheKey) {
600
+ providerOptions.openai = {
601
+ ...recordAt(providerOptions, "openai"),
602
+ promptCacheKey,
603
+ prompt_cache_key: promptCacheKey,
604
+ };
605
+ providerOptions.openrouter = {
606
+ ...recordAt(providerOptions, "openrouter"),
607
+ promptCacheKey,
608
+ prompt_cache_key: promptCacheKey,
609
+ };
610
+ providerOptions.cerebras = {
611
+ ...recordAt(providerOptions, "cerebras"),
612
+ prompt_cache_key: promptCacheKey,
613
+ };
614
+ }
615
+
616
+ return Object.keys(providerOptions).length > 0 ? providerOptions : undefined;
617
+ }
618
+
619
+ function applyOpenRouterPassthroughFields(
620
+ requestBody: Record<string, unknown>,
621
+ providerOptions: Record<string, unknown> | undefined
622
+ ): void {
623
+ if (!providerOptions) {
624
+ return;
625
+ }
626
+
627
+ const openrouter = recordAt(providerOptions, "openrouter");
628
+ if (Object.keys(openrouter).length > 0) {
629
+ const provider = openrouter.provider;
630
+ if (provider !== undefined) {
631
+ requestBody.provider = provider;
632
+ }
633
+ for (const key of ["models", "route", "transforms", "reasoning"] as const) {
634
+ if (openrouter[key] !== undefined) {
635
+ requestBody[key] = openrouter[key];
636
+ }
637
+ }
638
+ }
639
+
640
+ const gateway = providerOptions.gateway;
641
+ if (gateway !== undefined) {
642
+ requestBody.gateway = gateway;
643
+ }
644
+ }
645
+
646
+ function buildNativeRequestBody(
647
+ params: GenerateTextParamsWithNativeOptions,
648
+ modelName: string,
649
+ promptText: string,
650
+ systemPrompt?: string
651
+ ): Record<string, unknown> {
652
+ const providerOptions = resolveNativeProviderOptions(params);
653
+ const promptCacheKey = providerOptions ? resolvePromptCacheKey(providerOptions) : undefined;
654
+ const tools = normalizeNativeTools(params.tools);
655
+ const toolChoice = normalizeNativeToolChoice(params.toolChoice);
656
+ const responseFormat = buildNativeResponseFormat(params.responseSchema);
657
+ const requestBody: Record<string, unknown> = {
658
+ model: modelName,
659
+ messages: buildNativeMessages(params, promptText, systemPrompt),
660
+ max_tokens: params.maxTokens ?? 8192,
661
+ };
662
+
663
+ if (!isReasoningModel(modelName) && typeof params.temperature === "number") {
664
+ requestBody.temperature = params.temperature;
665
+ }
666
+ if (tools) {
667
+ requestBody.tools = tools;
668
+ }
669
+ if (toolChoice) {
670
+ requestBody.tool_choice = toolChoice;
671
+ }
672
+ if (responseFormat) {
673
+ requestBody.response_format = responseFormat;
674
+ }
675
+ if (providerOptions) {
676
+ requestBody.providerOptions = providerOptions;
677
+ requestBody.provider_options = providerOptions;
678
+ }
679
+ if (promptCacheKey) {
680
+ requestBody.promptCacheKey = promptCacheKey;
681
+ requestBody.prompt_cache_key = promptCacheKey;
682
+ }
683
+
684
+ applyOpenRouterPassthroughFields(requestBody, providerOptions);
685
+ return requestBody;
686
+ }
687
+
688
+ function extractTextFromContent(content: unknown): string {
689
+ if (typeof content === "string") {
690
+ return content;
691
+ }
692
+ if (!Array.isArray(content)) {
693
+ return "";
694
+ }
695
+ return content
696
+ .map((item) => {
697
+ if (typeof item === "string") return item;
698
+ const record = asRecord(item);
699
+ return firstString(record.text, record.output_text, record.content) ?? "";
700
+ })
701
+ .join("");
702
+ }
703
+
704
+ function extractChatCompletionText(data: ChatCompletionsResponse): string {
705
+ const firstChoice = data.choices?.[0];
706
+ if (!firstChoice) {
707
+ return "";
708
+ }
709
+ return firstString(firstChoice.text, extractTextFromContent(firstChoice.message?.content)) ?? "";
710
+ }
711
+
712
+ function extractNativeToolCalls(data: ChatCompletionsResponse): NativeToolCall[] {
713
+ const rawCalls = data.choices?.[0]?.message?.tool_calls ?? [];
714
+ if (!Array.isArray(rawCalls)) {
715
+ return [];
716
+ }
717
+
718
+ return rawCalls
719
+ .map<NativeToolCall | undefined>((rawCall) => {
720
+ const call = asRecord(rawCall);
721
+ const fn = recordAt(call, "function");
722
+ const toolName = firstString(call.name, call.toolName, fn.name);
723
+ if (!toolName) {
724
+ return undefined;
725
+ }
726
+ return {
727
+ type: "tool-call",
728
+ toolCallId: firstString(call.id, call.toolCallId) ?? `call_${toolName}`,
729
+ toolName,
730
+ input: parseJsonIfPossible(call.input ?? call.arguments ?? fn.arguments ?? {}),
731
+ };
732
+ })
733
+ .filter((call): call is NativeToolCall => call !== undefined);
734
+ }
735
+
736
+ function convertNativeUsage(usage: unknown): NativeTokenUsage | undefined {
737
+ const root = asRecord(usage);
738
+ if (Object.keys(root).length === 0) {
739
+ return undefined;
740
+ }
741
+
742
+ const inputTokenDetails = recordAt(root, "inputTokenDetails");
743
+ const promptTokenDetails = recordAt(root, "prompt_tokens_details");
744
+ const inputTokenDetailsSnake = recordAt(root, "input_tokens_details");
745
+ const promptTokens =
746
+ firstNumber(root.inputTokens, root.input_tokens, root.promptTokens, root.prompt_tokens) ?? 0;
747
+ const completionTokens =
748
+ firstNumber(
749
+ root.outputTokens,
750
+ root.output_tokens,
751
+ root.completionTokens,
752
+ root.completion_tokens
753
+ ) ?? 0;
754
+ const cacheReadInputTokens = firstNumber(
755
+ root.cacheReadInputTokens,
756
+ root.cache_read_input_tokens,
757
+ root.cachedInputTokens,
758
+ root.cached_input_tokens,
759
+ root.cachedTokens,
760
+ root.cached_tokens,
761
+ inputTokenDetails.cacheReadTokens,
762
+ inputTokenDetails.cachedInputTokens,
763
+ inputTokenDetails.cachedTokens,
764
+ promptTokenDetails.cached_tokens,
765
+ inputTokenDetailsSnake.cache_read_input_tokens,
766
+ inputTokenDetailsSnake.cached_tokens
767
+ );
768
+ const cacheCreationInputTokens = firstNumber(
769
+ root.cacheCreationInputTokens,
770
+ root.cache_creation_input_tokens,
771
+ root.cacheWriteInputTokens,
772
+ root.cache_write_input_tokens,
773
+ inputTokenDetails.cacheCreationInputTokens,
774
+ inputTokenDetails.cacheCreationTokens,
775
+ inputTokenDetails.cacheWriteTokens,
776
+ inputTokenDetailsSnake.cache_creation_input_tokens
777
+ );
778
+
779
+ return {
780
+ promptTokens,
781
+ completionTokens,
782
+ totalTokens:
783
+ firstNumber(root.totalTokens, root.total_tokens) ?? promptTokens + completionTokens,
784
+ cachedPromptTokens: cacheReadInputTokens,
785
+ cacheReadInputTokens,
786
+ cacheCreationInputTokens,
787
+ };
788
+ }
789
+
790
+ type TextModelType =
791
+ | typeof TEXT_NANO_MODEL_TYPE
792
+ | typeof TEXT_MEDIUM_MODEL_TYPE
793
+ | typeof TEXT_SMALL_MODEL_TYPE
794
+ | typeof TEXT_LARGE_MODEL_TYPE
795
+ | typeof TEXT_MEGA_MODEL_TYPE
796
+ | typeof RESPONSE_HANDLER_MODEL_TYPE
797
+ | typeof ACTION_PLANNER_MODEL_TYPE;
798
+
799
+ function getPurposeForModelType(modelType: TextModelType): string {
800
+ switch (modelType) {
801
+ case RESPONSE_HANDLER_MODEL_TYPE:
802
+ return "should_respond";
803
+ case ACTION_PLANNER_MODEL_TYPE:
804
+ return "action_planner";
805
+ default:
806
+ return "response";
807
+ }
808
+ }
809
+
810
+ function getModelNameForType(runtime: IAgentRuntime, modelType: TextModelType): string {
811
+ switch (modelType) {
812
+ case TEXT_NANO_MODEL_TYPE:
813
+ return getNanoModel(runtime);
814
+ case TEXT_MEDIUM_MODEL_TYPE:
815
+ return getMediumModel(runtime);
816
+ case TEXT_SMALL_MODEL_TYPE:
817
+ return getSmallModel(runtime);
818
+ case TEXT_LARGE_MODEL_TYPE:
819
+ return getLargeModel(runtime);
820
+ case TEXT_MEGA_MODEL_TYPE:
821
+ return getMegaModel(runtime);
822
+ case RESPONSE_HANDLER_MODEL_TYPE:
823
+ return getResponseHandlerModel(runtime);
824
+ case ACTION_PLANNER_MODEL_TYPE:
825
+ return getActionPlannerModel(runtime);
826
+ default:
827
+ return getLargeModel(runtime);
828
+ }
829
+ }
830
+
831
+ /**
832
+ * Resolve the model name, rendered prompt, and effective system prompt for a
833
+ * cloud text call.
834
+ *
835
+ * This used to also construct a Vercel AI-SDK `LanguageModel` (`openai.chat()`)
836
+ * plus a full `generateParams` object — but the handlers below call the cloud
837
+ * HTTP API directly (`requestRaw` → `/responses` / `/chat/completions`), so that
838
+ * AI-SDK client + params object was built and immediately discarded on every
839
+ * single text generation. Removed: it was pure per-call overhead and a
840
+ * misleading code path when reasoning about which transport actually runs.
841
+ */
842
+ function buildGenerateParams(
843
+ runtime: IAgentRuntime,
844
+ modelType: TextModelType,
845
+ params: GenerateTextParams
846
+ ) {
847
+ const prompt = params.prompt ?? "";
848
+ const modelName = getModelNameForType(runtime, modelType);
849
+ const systemPrompt = resolveEffectiveSystemPrompt({
850
+ params,
851
+ fallback: buildCanonicalSystemPrompt({ character: runtime.character }),
852
+ });
853
+ const promptText =
854
+ renderChatMessagesForPrompt(params.messages, {
855
+ omitDuplicateSystem: systemPrompt,
856
+ }) ?? prompt;
857
+
858
+ return { modelName, modelType, prompt: promptText, systemPrompt };
859
+ }
860
+
861
+ async function generateTextWithModel(
862
+ runtime: IAgentRuntime,
863
+ modelType: TextModelType,
864
+ params: GenerateTextParams
865
+ ): Promise<string | TextStreamResult> {
866
+ const { modelName, prompt, systemPrompt } = buildGenerateParams(runtime, modelType, params);
867
+ const paramsWithNative = params as GenerateTextParamsWithNativeOptions;
868
+
869
+ logger.debug(`[ELIZAOS_CLOUD] Generating text with ${modelType} model: ${modelName}`);
870
+
871
+ // Stream the user-visible reply token-by-token. Gated to the structured
872
+ // reply path (`streamStructured`, set only by the RESPONSE_HANDLER stage-1
873
+ // call): that call carries a responseSkeleton, so the runtime's field
874
+ // extractor surfaces `replyText` incrementally to the UI. Planner/other
875
+ // native calls (no responseSkeleton) stay buffered — streaming their raw
876
+ // envelope would leak internals to the UI stream. The bare `/responses`
877
+ // route stays buffered too (different SSE schema, not on the reply path).
878
+ const paramsStreaming = params as {
879
+ stream?: boolean;
880
+ streamStructured?: boolean;
881
+ };
882
+ const wantsStream =
883
+ Boolean(paramsStreaming.stream) &&
884
+ paramsStreaming.streamStructured === true &&
885
+ resolveStreamingEnabled();
886
+
887
+ logger.log(`[ELIZAOS_CLOUD] Using ${modelType} model: ${modelName}`);
888
+ logger.log(prompt);
889
+
890
+ if (hasNativeTransportOptions(paramsWithNative)) {
891
+ if (wantsStream) {
892
+ return streamNativeChatCompletion(runtime, modelType, paramsWithNative, {
893
+ modelName,
894
+ prompt,
895
+ systemPrompt,
896
+ });
897
+ }
898
+ const nativeResult = await generateNativeChatCompletion(runtime, modelType, paramsWithNative, {
899
+ modelName,
900
+ prompt,
901
+ systemPrompt,
902
+ });
903
+ return shouldReturnNativeResult(paramsWithNative)
904
+ ? (nativeResult as NativeGenerateTextModelResult)
905
+ : nativeResult.text;
906
+ }
907
+
908
+ const reasoning = isReasoningModel(modelName);
909
+ const input: Array<{
910
+ role: "system" | "user";
911
+ content: Array<{ type: "input_text"; text: string }>;
912
+ }> = [];
913
+ if (systemPrompt) {
914
+ input.push({
915
+ role: "system",
916
+ content: [{ type: "input_text", text: systemPrompt }],
917
+ });
918
+ }
919
+ input.push({
920
+ role: "user",
921
+ content: [{ type: "input_text", text: prompt }],
922
+ });
923
+
924
+ const requestBody: Record<string, unknown> = {
925
+ model: modelName,
926
+ input,
927
+ max_output_tokens: params.maxTokens ?? 8192,
928
+ };
929
+ if (!reasoning && typeof params.temperature === "number") {
930
+ requestBody.temperature = params.temperature;
931
+ }
932
+
933
+ const responsesHeaders: Record<string, string> = {
934
+ "X-Eliza-Llm-Purpose": getPurposeForModelType(modelType),
935
+ "X-Eliza-Model-Type": modelType,
936
+ };
937
+ if (isSpanSamplerHonoringModel(modelName)) {
938
+ const samplerHeader = buildSpanSamplerHeader(params.spanSamplerPlan);
939
+ if (samplerHeader) {
940
+ responsesHeaders["x-eliza-span-samplers"] = samplerHeader;
941
+ }
942
+ }
943
+ // Same shared cerebras key as the /chat/completions route, so gate this
944
+ // bare-prompt round-trip through the SAME limiter (parsing stays unguarded).
945
+ const response = await withNativeChatLimit(
946
+ () =>
947
+ createCloudApiClient(runtime).requestRaw("POST", "/responses", {
948
+ headers: responsesHeaders,
949
+ json: requestBody,
950
+ timeoutMs: resolveTextTimeoutMs(),
951
+ }),
952
+ "responses"
953
+ );
954
+ const responseText = await response.text();
955
+ let data: ResponsesApiResponse = {};
956
+ if (responseText) {
957
+ try {
958
+ data = JSON.parse(responseText) as ResponsesApiResponse;
959
+ } catch (parseErr) {
960
+ logger.error(
961
+ `[ELIZAOS_CLOUD] Failed to parse responses JSON: ${
962
+ parseErr instanceof Error ? parseErr.message : String(parseErr)
963
+ }`
964
+ );
965
+ }
966
+ }
967
+
968
+ if (!response.ok) {
969
+ const errorBody = typeof data === "object" && data ? data.error : undefined;
970
+ const errorMessage =
971
+ typeof errorBody?.message === "string" && errorBody.message.trim()
972
+ ? errorBody.message.trim()
973
+ : `elizaOS Cloud error ${response.status}`;
974
+ const requestError = new Error(errorMessage) as Error & {
975
+ status?: number;
976
+ error?: unknown;
977
+ };
978
+ requestError.status = response.status;
979
+ if (errorBody) {
980
+ requestError.error = errorBody;
981
+ }
982
+ throw requestError;
983
+ }
984
+
985
+ if (data.usage) {
986
+ emitModelUsageEvent(
987
+ runtime,
988
+ modelType,
989
+ prompt,
990
+ {
991
+ inputTokens: data.usage.input_tokens ?? 0,
992
+ outputTokens: data.usage.output_tokens ?? 0,
993
+ totalTokens: data.usage.total_tokens ?? 0,
994
+ },
995
+ {
996
+ modelName: getModelNameForType(runtime, modelType),
997
+ ...(() => {
998
+ const costUsd = extractCostUsd(data.usage, response);
999
+ return typeof costUsd === "number" ? { costUsd } : {};
1000
+ })(),
1001
+ }
1002
+ );
1003
+ }
1004
+
1005
+ const text = extractResponsesOutputText(data);
1006
+ if (!text.trim()) {
1007
+ throw new Error("elizaOS Cloud returned no text response");
1008
+ }
1009
+
1010
+ return text;
1011
+ }
1012
+
1013
+ // Exported for unit tests (the concurrency limiter wrapper). Not part of the
1014
+ // plugin's public model-handler surface.
1015
+ export async function generateNativeChatCompletion(
1016
+ runtime: IAgentRuntime,
1017
+ modelType: TextModelType,
1018
+ params: GenerateTextParamsWithNativeOptions,
1019
+ context: {
1020
+ modelName: string;
1021
+ prompt: string;
1022
+ systemPrompt?: string;
1023
+ }
1024
+ ): Promise<NativeGenerateTextResult> {
1025
+ const requestBody = buildNativeRequestBody(
1026
+ params,
1027
+ context.modelName,
1028
+ context.prompt,
1029
+ context.systemPrompt
1030
+ );
1031
+ const headers: Record<string, string> = {
1032
+ "X-Eliza-Llm-Purpose": getPurposeForModelType(modelType),
1033
+ "X-Eliza-Model-Type": modelType,
1034
+ };
1035
+ // Per-span sampler overrides only ride along when the resolved model is a
1036
+ // fork-built eliza-1 deployment that knows how to honor the header. Other
1037
+ // upstreams (OpenAI / Anthropic / generic OpenRouter) strip unknown headers
1038
+ // safely, but we keep the wire surface narrow until the cloud honor path
1039
+ // lands in Wave 3.
1040
+ if (isSpanSamplerHonoringModel(context.modelName)) {
1041
+ const samplerHeader = buildSpanSamplerHeader(params.spanSamplerPlan);
1042
+ if (samplerHeader) {
1043
+ headers["x-eliza-span-samplers"] = samplerHeader;
1044
+ }
1045
+ }
1046
+ // Serialize the per-turn batcher/evaluator burst through the SAME shared
1047
+ // semaphore the /responses route uses, so N simultaneous native cloud text
1048
+ // calls don't overrun the one shared cerebras key's concurrent limit (-> 429
1049
+ // -> retries -> 30-63s). The permit is held only across the network
1050
+ // round-trip; the text()/JSON parse below runs unguarded.
1051
+ const response = await withNativeChatLimit(
1052
+ () =>
1053
+ createCloudApiClient(runtime).requestRaw("POST", "/chat/completions", {
1054
+ headers,
1055
+ json: requestBody,
1056
+ timeoutMs: resolveTextTimeoutMs(),
1057
+ }),
1058
+ "chat/completions"
1059
+ );
1060
+ const responseText = await response.text();
1061
+ let data: ChatCompletionsResponse = {};
1062
+ if (responseText) {
1063
+ try {
1064
+ data = JSON.parse(responseText) as ChatCompletionsResponse;
1065
+ } catch (parseErr) {
1066
+ logger.error(
1067
+ `[ELIZAOS_CLOUD] Failed to parse chat completions JSON: ${
1068
+ parseErr instanceof Error ? parseErr.message : String(parseErr)
1069
+ }`
1070
+ );
1071
+ }
1072
+ }
1073
+
1074
+ if (!response.ok) {
1075
+ const errorBody = typeof data === "object" && data ? data.error : undefined;
1076
+ const errorMessage =
1077
+ typeof errorBody?.message === "string" && errorBody.message.trim()
1078
+ ? errorBody.message.trim()
1079
+ : `elizaOS Cloud error ${response.status}`;
1080
+ const requestError = new Error(errorMessage) as Error & {
1081
+ status?: number;
1082
+ error?: unknown;
1083
+ };
1084
+ requestError.status = response.status;
1085
+ if (errorBody) {
1086
+ requestError.error = errorBody;
1087
+ }
1088
+ throw requestError;
1089
+ }
1090
+
1091
+ const usage = convertNativeUsage(data.usage);
1092
+ if (usage) {
1093
+ emitModelUsageEvent(runtime, modelType, context.prompt, usage, {
1094
+ modelName: context.modelName,
1095
+ ...(() => {
1096
+ const costUsd = extractCostUsd(data.usage, response);
1097
+ return typeof costUsd === "number" ? { costUsd } : {};
1098
+ })(),
1099
+ });
1100
+ }
1101
+
1102
+ const text = extractChatCompletionText(data);
1103
+ const toolCalls = extractNativeToolCalls(data);
1104
+ if (!text.trim() && toolCalls.length === 0) {
1105
+ throw new Error("elizaOS Cloud returned no text or tool calls");
1106
+ }
1107
+
1108
+ return {
1109
+ text,
1110
+ toolCalls,
1111
+ finishReason: data.choices?.[0]?.finish_reason,
1112
+ usage,
1113
+ providerMetadata: {
1114
+ modelName: context.modelName,
1115
+ usage: data.usage,
1116
+ },
1117
+ };
1118
+ }
1119
+
1120
+ // ---------------------------------------------------------------------------
1121
+ // Streaming native /chat/completions (token-by-token, OpenAI-compatible SSE)
1122
+ // ---------------------------------------------------------------------------
1123
+
1124
+ interface Deferred<T> {
1125
+ promise: Promise<T>;
1126
+ resolve: (value: T) => void;
1127
+ }
1128
+
1129
+ function deferred<T>(): Deferred<T> {
1130
+ let resolve!: (value: T) => void;
1131
+ const promise = new Promise<T>((r) => {
1132
+ resolve = r;
1133
+ });
1134
+ return { promise, resolve };
1135
+ }
1136
+
1137
+ /**
1138
+ * Parse an OpenAI-compatible SSE byte stream into the decoded JSON frame of
1139
+ * each `data:` line. Yields one object per frame; stops at `data: [DONE]`.
1140
+ * Tolerates partial reads (buffers across chunk boundaries) and ignores
1141
+ * non-`data:` lines (comments, blank separators). Exported for unit tests.
1142
+ */
1143
+ export async function* parseOpenAiSseStream(
1144
+ body: ReadableStream<Uint8Array>
1145
+ ): AsyncGenerator<Record<string, unknown>> {
1146
+ const reader = body.getReader();
1147
+ const decoder = new TextDecoder();
1148
+ let buffer = "";
1149
+ const handle = (line: string): Record<string, unknown> | "DONE" | null => {
1150
+ const trimmed = line.trimStart();
1151
+ if (!trimmed.startsWith("data:")) return null;
1152
+ const payload = trimmed.slice(5).trim();
1153
+ if (payload === "") return null;
1154
+ if (payload === "[DONE]") return "DONE";
1155
+ try {
1156
+ return JSON.parse(payload) as Record<string, unknown>;
1157
+ } catch {
1158
+ return null;
1159
+ }
1160
+ };
1161
+ try {
1162
+ for (;;) {
1163
+ const { value, done } = await reader.read();
1164
+ if (done) break;
1165
+ buffer += decoder.decode(value, { stream: true });
1166
+ let nl: number;
1167
+ while ((nl = buffer.indexOf("\n")) >= 0) {
1168
+ const line = buffer.slice(0, nl);
1169
+ buffer = buffer.slice(nl + 1);
1170
+ const frame = handle(line);
1171
+ if (frame === "DONE") return;
1172
+ if (frame) yield frame;
1173
+ }
1174
+ }
1175
+ const tail = handle(buffer);
1176
+ if (tail && tail !== "DONE") yield tail;
1177
+ } finally {
1178
+ // cancel() (not just releaseLock()) tears down the underlying connection,
1179
+ // so an EARLY consumer break (runtime abort / turn-supersede / a downstream
1180
+ // throw closes this generator via .return()) stops the upstream generation
1181
+ // instead of letting it run to its natural end and bill tokens nobody reads.
1182
+ // On natural completion the stream is already done, so this is a no-op; it
1183
+ // also releases the lock. Not threading the abort signal into the fetch on
1184
+ // purpose — cancel() gets the teardown without rejecting an in-flight read
1185
+ // with AbortError and changing the runtime's quiet-stop semantics.
1186
+ try {
1187
+ await reader.cancel();
1188
+ } catch {
1189
+ // Reader already cancelled/released by an upstream abort — nothing to do.
1190
+ }
1191
+ }
1192
+ }
1193
+
1194
+ interface StreamingToolCallAcc {
1195
+ id?: string;
1196
+ name?: string;
1197
+ args: string;
1198
+ }
1199
+
1200
+ /** Fold one SSE `delta.tool_calls[]` array into the per-index accumulator. */
1201
+ export function accumulateToolCallDeltas(
1202
+ acc: Map<number, StreamingToolCallAcc>,
1203
+ deltas: unknown
1204
+ ): void {
1205
+ if (!Array.isArray(deltas)) return;
1206
+ for (const raw of deltas) {
1207
+ const d = asRecord(raw);
1208
+ const index = typeof d.index === "number" ? d.index : 0;
1209
+ const cur = acc.get(index) ?? { args: "" };
1210
+ const id = firstString(d.id);
1211
+ if (id) cur.id = id;
1212
+ const fn = recordAt(d, "function");
1213
+ const name = firstString(fn.name);
1214
+ if (name) cur.name = name;
1215
+ if (typeof fn.arguments === "string") cur.args += fn.arguments;
1216
+ acc.set(index, cur);
1217
+ }
1218
+ }
1219
+
1220
+ /** Materialize accumulated tool-call deltas into the buffered-path shape. */
1221
+ export function finalizeStreamedToolCalls(
1222
+ acc: Map<number, StreamingToolCallAcc>
1223
+ ): NativeToolCall[] {
1224
+ const out: NativeToolCall[] = [];
1225
+ for (const [index, c] of [...acc.entries()].sort((a, b) => a[0] - b[0])) {
1226
+ if (!c.name) continue;
1227
+ out.push({
1228
+ type: "tool-call",
1229
+ toolCallId: c.id ?? `call_${c.name}_${index}`,
1230
+ toolName: c.name,
1231
+ input: parseJsonIfPossible(c.args.trim() === "" ? "{}" : c.args),
1232
+ });
1233
+ }
1234
+ return out;
1235
+ }
1236
+
1237
+ /**
1238
+ * Streaming variant of {@link generateNativeChatCompletion}: returns a
1239
+ * {@link TextStreamResult} whose `textStream` yields `delta.content` as it
1240
+ * arrives, so `useModel`'s for-await loop streams it to the UI from the first
1241
+ * token. Falls back to a single-chunk buffered result if the gateway answers
1242
+ * non-SSE (self-healing). The shared concurrency permit is held for the whole
1243
+ * stream lifetime (released in the generator's `finally`), not just until
1244
+ * headers arrive — otherwise the cap would under-count in-flight requests.
1245
+ */
1246
+ export async function streamNativeChatCompletion(
1247
+ runtime: IAgentRuntime,
1248
+ modelType: TextModelType,
1249
+ params: GenerateTextParamsWithNativeOptions,
1250
+ context: { modelName: string; prompt: string; systemPrompt?: string }
1251
+ ): Promise<TextStreamResult> {
1252
+ const requestBody = buildNativeRequestBody(
1253
+ params,
1254
+ context.modelName,
1255
+ context.prompt,
1256
+ context.systemPrompt
1257
+ );
1258
+ requestBody.stream = true;
1259
+ // OpenAI-compatible: ask the server to include a final usage-only frame so we
1260
+ // can meter the streamed call accurately.
1261
+ requestBody.stream_options = { include_usage: true };
1262
+
1263
+ const headers: Record<string, string> = {
1264
+ "X-Eliza-Llm-Purpose": getPurposeForModelType(modelType),
1265
+ "X-Eliza-Model-Type": modelType,
1266
+ };
1267
+ if (isSpanSamplerHonoringModel(context.modelName)) {
1268
+ const samplerHeader = buildSpanSamplerHeader(params.spanSamplerPlan);
1269
+ if (samplerHeader) {
1270
+ headers["x-eliza-span-samplers"] = samplerHeader;
1271
+ }
1272
+ }
1273
+
1274
+ const abortSignal = (params as { signal?: AbortSignal }).signal;
1275
+ const signal = buildStreamAbortSignal(abortSignal, resolveTextTimeoutMs());
1276
+
1277
+ const limiter = getNativeChatLimiter();
1278
+ const waitStartedAt = Date.now();
1279
+ await limiter.acquire();
1280
+ recordInferenceSpan("cloud.semaphore-wait", Date.now() - waitStartedAt, {
1281
+ route: "chat/completions:stream",
1282
+ });
1283
+ let permitReleased = false;
1284
+ const releasePermit = (): void => {
1285
+ if (!permitReleased) {
1286
+ permitReleased = true;
1287
+ limiter.release();
1288
+ }
1289
+ };
1290
+
1291
+ let response: Response;
1292
+ try {
1293
+ response = await createCloudApiClient(runtime).requestRaw("POST", "/chat/completions", {
1294
+ headers,
1295
+ json: requestBody,
1296
+ ...(signal ? { signal } : {}),
1297
+ });
1298
+ } catch (err) {
1299
+ releasePermit();
1300
+ throw err;
1301
+ }
1302
+
1303
+ if (!response.ok) {
1304
+ let errorBody: { message?: string } | undefined;
1305
+ try {
1306
+ const errText = await response.text();
1307
+ if (errText) {
1308
+ errorBody = (JSON.parse(errText) as ChatCompletionsResponse).error;
1309
+ }
1310
+ } catch {
1311
+ // Non-JSON error body — fall through to the status-coded message.
1312
+ }
1313
+ releasePermit();
1314
+ const message =
1315
+ typeof errorBody?.message === "string" && errorBody.message.trim()
1316
+ ? errorBody.message.trim()
1317
+ : `elizaOS Cloud error ${response.status}`;
1318
+ const requestError = new Error(message) as Error & {
1319
+ status?: number;
1320
+ error?: unknown;
1321
+ };
1322
+ requestError.status = response.status;
1323
+ if (errorBody) requestError.error = errorBody;
1324
+ throw requestError;
1325
+ }
1326
+
1327
+ const contentType = response.headers.get("content-type") ?? "";
1328
+ const isSse = contentType.includes("text/event-stream") && response.body !== null;
1329
+
1330
+ // Self-healing fallback: gateway answered with a buffered JSON body despite
1331
+ // the stream request. Yield it as a single chunk so the streaming contract
1332
+ // (and the structured-field extractor downstream) still works.
1333
+ if (!isSse) {
1334
+ const bufferedText = await response.text();
1335
+ releasePermit();
1336
+ let data: ChatCompletionsResponse = {};
1337
+ if (bufferedText) {
1338
+ try {
1339
+ data = JSON.parse(bufferedText) as ChatCompletionsResponse;
1340
+ } catch (parseErr) {
1341
+ logger.error(
1342
+ `[ELIZAOS_CLOUD] Failed to parse buffered chat completions JSON: ${
1343
+ parseErr instanceof Error ? parseErr.message : String(parseErr)
1344
+ }`
1345
+ );
1346
+ }
1347
+ }
1348
+ const text = extractChatCompletionText(data);
1349
+ const toolCalls = extractNativeToolCalls(data);
1350
+ const usage = convertNativeUsage(data.usage);
1351
+ if (usage) {
1352
+ emitModelUsageEvent(runtime, modelType, context.prompt, usage, {
1353
+ modelName: context.modelName,
1354
+ ...(() => {
1355
+ const costUsd = extractCostUsd(data.usage, response);
1356
+ return typeof costUsd === "number" ? { costUsd } : {};
1357
+ })(),
1358
+ });
1359
+ }
1360
+ if (!text.trim() && toolCalls.length === 0) {
1361
+ throw new Error("elizaOS Cloud returned no text or tool calls");
1362
+ }
1363
+ async function* single(): AsyncGenerator<string> {
1364
+ if (text) yield text;
1365
+ }
1366
+ return {
1367
+ textStream: single(),
1368
+ text: Promise.resolve(text),
1369
+ usage: Promise.resolve(usage),
1370
+ finishReason: Promise.resolve(data.choices?.[0]?.finish_reason),
1371
+ toolCalls: Promise.resolve(toolCalls),
1372
+ providerMetadata: { modelName: context.modelName, usage: data.usage },
1373
+ };
1374
+ }
1375
+
1376
+ const body = response.body as ReadableStream<Uint8Array>;
1377
+ const toolAcc = new Map<number, StreamingToolCallAcc>();
1378
+ let accumulated = "";
1379
+ let nativeUsage: NativeTokenUsage | undefined;
1380
+ let rawUsage: unknown;
1381
+ let finishReason: string | undefined;
1382
+
1383
+ const textD = deferred<string>();
1384
+ const usageD = deferred<TokenUsage | undefined>();
1385
+ const finishD = deferred<string | undefined>();
1386
+ const toolCallsD = deferred<NativeToolCall[]>();
1387
+
1388
+ async function* generate(): AsyncGenerator<string> {
1389
+ try {
1390
+ for await (const frame of parseOpenAiSseStream(body)) {
1391
+ if (frame.error) {
1392
+ const message = asRecord(frame.error).message;
1393
+ throw new Error(
1394
+ typeof message === "string" && message.trim()
1395
+ ? message.trim()
1396
+ : "elizaOS Cloud stream error"
1397
+ );
1398
+ }
1399
+ const choices = Array.isArray(frame.choices) ? frame.choices : [];
1400
+ const choice = asRecord(choices[0]);
1401
+ const delta = recordAt(choice, "delta");
1402
+ // Raw (un-trimmed) content — inter-token whitespace is significant.
1403
+ if (typeof delta.content === "string" && delta.content.length > 0) {
1404
+ accumulated += delta.content;
1405
+ yield delta.content;
1406
+ }
1407
+ if (delta.tool_calls) {
1408
+ accumulateToolCallDeltas(toolAcc, delta.tool_calls);
1409
+ }
1410
+ const fr = firstString(choice.finish_reason);
1411
+ if (fr) finishReason = fr;
1412
+ if (frame.usage) {
1413
+ rawUsage = frame.usage;
1414
+ nativeUsage = convertNativeUsage(frame.usage);
1415
+ }
1416
+ }
1417
+ } finally {
1418
+ releasePermit();
1419
+ const toolCalls = finalizeStreamedToolCalls(toolAcc);
1420
+ textD.resolve(accumulated);
1421
+ usageD.resolve(nativeUsage);
1422
+ finishD.resolve(finishReason);
1423
+ toolCallsD.resolve(toolCalls);
1424
+ if (nativeUsage) {
1425
+ emitModelUsageEvent(runtime, modelType, context.prompt, nativeUsage, {
1426
+ modelName: context.modelName,
1427
+ ...(() => {
1428
+ const costUsd = extractCostUsd(rawUsage, response);
1429
+ return typeof costUsd === "number" ? { costUsd } : {};
1430
+ })(),
1431
+ });
1432
+ }
1433
+ }
1434
+ }
1435
+
1436
+ return {
1437
+ textStream: generate(),
1438
+ text: textD.promise,
1439
+ usage: usageD.promise,
1440
+ finishReason: finishD.promise,
1441
+ toolCalls: toolCallsD.promise,
1442
+ providerMetadata: { modelName: context.modelName },
1443
+ };
1444
+ }
1445
+
1446
+ export async function handleTextSmall(
1447
+ runtime: IAgentRuntime,
1448
+ params: GenerateTextParams
1449
+ ): Promise<string | TextStreamResult> {
1450
+ return generateTextWithModel(runtime, TEXT_SMALL_MODEL_TYPE, params);
1451
+ }
1452
+
1453
+ export async function handleTextNano(
1454
+ runtime: IAgentRuntime,
1455
+ params: GenerateTextParams
1456
+ ): Promise<string | TextStreamResult> {
1457
+ return generateTextWithModel(runtime, TEXT_NANO_MODEL_TYPE, params);
1458
+ }
1459
+
1460
+ export async function handleTextMedium(
1461
+ runtime: IAgentRuntime,
1462
+ params: GenerateTextParams
1463
+ ): Promise<string | TextStreamResult> {
1464
+ return generateTextWithModel(runtime, TEXT_MEDIUM_MODEL_TYPE, params);
1465
+ }
1466
+
1467
+ export async function handleTextLarge(
1468
+ runtime: IAgentRuntime,
1469
+ params: GenerateTextParams
1470
+ ): Promise<string | TextStreamResult> {
1471
+ return generateTextWithModel(runtime, TEXT_LARGE_MODEL_TYPE, params);
1472
+ }
1473
+
1474
+ export async function handleTextMega(
1475
+ runtime: IAgentRuntime,
1476
+ params: GenerateTextParams
1477
+ ): Promise<string | TextStreamResult> {
1478
+ return generateTextWithModel(runtime, TEXT_MEGA_MODEL_TYPE, params);
1479
+ }
1480
+
1481
+ export async function handleResponseHandler(
1482
+ runtime: IAgentRuntime,
1483
+ params: GenerateTextParams
1484
+ ): Promise<string | TextStreamResult> {
1485
+ return generateTextWithModel(runtime, RESPONSE_HANDLER_MODEL_TYPE, params);
1486
+ }
1487
+
1488
+ export async function handleActionPlanner(
1489
+ runtime: IAgentRuntime,
1490
+ params: GenerateTextParams
1491
+ ): Promise<string | TextStreamResult> {
1492
+ return generateTextWithModel(runtime, ACTION_PLANNER_MODEL_TYPE, params);
1493
+ }