keboola-cli 0.63.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. keboola_agent_cli/__init__.py +34 -0
  2. keboola_agent_cli/__main__.py +5 -0
  3. keboola_agent_cli/_ui_dist/assets/arc-DhFYIddx.js +2 -0
  4. keboola_agent_cli/_ui_dist/assets/arc-DhFYIddx.js.map +1 -0
  5. keboola_agent_cli/_ui_dist/assets/architecture-7EHR7CIX-hNCijx_H.js +1 -0
  6. keboola_agent_cli/_ui_dist/assets/architectureDiagram-3BPJPVTR-C6hUlprM.js +37 -0
  7. keboola_agent_cli/_ui_dist/assets/architectureDiagram-3BPJPVTR-C6hUlprM.js.map +1 -0
  8. keboola_agent_cli/_ui_dist/assets/array-BifhSqXX.js +2 -0
  9. keboola_agent_cli/_ui_dist/assets/array-BifhSqXX.js.map +1 -0
  10. keboola_agent_cli/_ui_dist/assets/blockDiagram-GPEHLZMM-DC7qY9i4.js +133 -0
  11. keboola_agent_cli/_ui_dist/assets/blockDiagram-GPEHLZMM-DC7qY9i4.js.map +1 -0
  12. keboola_agent_cli/_ui_dist/assets/c4Diagram-AAUBKEIU-5Lh44evt.js +11 -0
  13. keboola_agent_cli/_ui_dist/assets/c4Diagram-AAUBKEIU-5Lh44evt.js.map +1 -0
  14. keboola_agent_cli/_ui_dist/assets/channel-DBMrXlxx.js +2 -0
  15. keboola_agent_cli/_ui_dist/assets/channel-DBMrXlxx.js.map +1 -0
  16. keboola_agent_cli/_ui_dist/assets/chunk-2J33WTMH-Coy82EBh.js +2 -0
  17. keboola_agent_cli/_ui_dist/assets/chunk-2J33WTMH-Coy82EBh.js.map +1 -0
  18. keboola_agent_cli/_ui_dist/assets/chunk-3OPIFGDE-BQC5CRHI.js +63 -0
  19. keboola_agent_cli/_ui_dist/assets/chunk-3OPIFGDE-BQC5CRHI.js.map +1 -0
  20. keboola_agent_cli/_ui_dist/assets/chunk-4BX2VUAB-DUuEt70o.js +2 -0
  21. keboola_agent_cli/_ui_dist/assets/chunk-4BX2VUAB-DUuEt70o.js.map +1 -0
  22. keboola_agent_cli/_ui_dist/assets/chunk-55IACEB6-BvR-6chF.js +2 -0
  23. keboola_agent_cli/_ui_dist/assets/chunk-55IACEB6-BvR-6chF.js.map +1 -0
  24. keboola_agent_cli/_ui_dist/assets/chunk-5ZQYHXKU-BjcTN7ul.js +3 -0
  25. keboola_agent_cli/_ui_dist/assets/chunk-5ZQYHXKU-BjcTN7ul.js.map +1 -0
  26. keboola_agent_cli/_ui_dist/assets/chunk-727SXJPM-C0zxqqRN.js +207 -0
  27. keboola_agent_cli/_ui_dist/assets/chunk-727SXJPM-C0zxqqRN.js.map +1 -0
  28. keboola_agent_cli/_ui_dist/assets/chunk-AQP2D5EJ-CXf7rIlZ.js +232 -0
  29. keboola_agent_cli/_ui_dist/assets/chunk-AQP2D5EJ-CXf7rIlZ.js.map +1 -0
  30. keboola_agent_cli/_ui_dist/assets/chunk-BSJP7CBP-Oj_FO9Q7.js +2 -0
  31. keboola_agent_cli/_ui_dist/assets/chunk-BSJP7CBP-Oj_FO9Q7.js.map +1 -0
  32. keboola_agent_cli/_ui_dist/assets/chunk-CSCIHK7Q-CcTsLrFc.js +124 -0
  33. keboola_agent_cli/_ui_dist/assets/chunk-CSCIHK7Q-CcTsLrFc.js.map +1 -0
  34. keboola_agent_cli/_ui_dist/assets/chunk-FMBD7UC4-FH-zLkkW.js +16 -0
  35. keboola_agent_cli/_ui_dist/assets/chunk-FMBD7UC4-FH-zLkkW.js.map +1 -0
  36. keboola_agent_cli/_ui_dist/assets/chunk-L5ZTLDWV-B1Ky_e7O.js +2 -0
  37. keboola_agent_cli/_ui_dist/assets/chunk-L5ZTLDWV-B1Ky_e7O.js.map +1 -0
  38. keboola_agent_cli/_ui_dist/assets/chunk-ND2GUHAM-BHz1rpbm.js +2 -0
  39. keboola_agent_cli/_ui_dist/assets/chunk-ND2GUHAM-BHz1rpbm.js.map +1 -0
  40. keboola_agent_cli/_ui_dist/assets/chunk-NNHCCRGN-DlpIbxXb.js +160 -0
  41. keboola_agent_cli/_ui_dist/assets/chunk-NNHCCRGN-DlpIbxXb.js.map +1 -0
  42. keboola_agent_cli/_ui_dist/assets/chunk-NZK2D7GU-tnrSoegS.js +2 -0
  43. keboola_agent_cli/_ui_dist/assets/chunk-NZK2D7GU-tnrSoegS.js.map +1 -0
  44. keboola_agent_cli/_ui_dist/assets/chunk-O5CBEL6O-DxxqDH0l.js +71 -0
  45. keboola_agent_cli/_ui_dist/assets/chunk-O5CBEL6O-DxxqDH0l.js.map +1 -0
  46. keboola_agent_cli/_ui_dist/assets/chunk-QZHKN3VN-CSjc2gjj.js +2 -0
  47. keboola_agent_cli/_ui_dist/assets/chunk-QZHKN3VN-CSjc2gjj.js.map +1 -0
  48. keboola_agent_cli/_ui_dist/assets/classDiagram-4FO5ZUOK-BuZcZu85.js +2 -0
  49. keboola_agent_cli/_ui_dist/assets/classDiagram-4FO5ZUOK-BuZcZu85.js.map +1 -0
  50. keboola_agent_cli/_ui_dist/assets/classDiagram-v2-Q7XG4LA2-BuZcZu85.js +2 -0
  51. keboola_agent_cli/_ui_dist/assets/classDiagram-v2-Q7XG4LA2-BuZcZu85.js.map +1 -0
  52. keboola_agent_cli/_ui_dist/assets/cose-bilkent-S5V4N54A-Y0L8LDMa.js +2 -0
  53. keboola_agent_cli/_ui_dist/assets/cose-bilkent-S5V4N54A-Y0L8LDMa.js.map +1 -0
  54. keboola_agent_cli/_ui_dist/assets/cytoscape.esm-C8YCVR3_.js +322 -0
  55. keboola_agent_cli/_ui_dist/assets/cytoscape.esm-C8YCVR3_.js.map +1 -0
  56. keboola_agent_cli/_ui_dist/assets/dagre-BM42HDAG-UZ-9BTqF.js +5 -0
  57. keboola_agent_cli/_ui_dist/assets/dagre-BM42HDAG-UZ-9BTqF.js.map +1 -0
  58. keboola_agent_cli/_ui_dist/assets/dagre-Bx709z4p.js +2 -0
  59. keboola_agent_cli/_ui_dist/assets/dagre-Bx709z4p.js.map +1 -0
  60. keboola_agent_cli/_ui_dist/assets/defaultLocale-C8Fc0cco.js +2 -0
  61. keboola_agent_cli/_ui_dist/assets/defaultLocale-C8Fc0cco.js.map +1 -0
  62. keboola_agent_cli/_ui_dist/assets/diagram-2AECGRRQ-DoDQ60wi.js +44 -0
  63. keboola_agent_cli/_ui_dist/assets/diagram-2AECGRRQ-DoDQ60wi.js.map +1 -0
  64. keboola_agent_cli/_ui_dist/assets/diagram-5GNKFQAL-CMGFxpUs.js +11 -0
  65. keboola_agent_cli/_ui_dist/assets/diagram-5GNKFQAL-CMGFxpUs.js.map +1 -0
  66. keboola_agent_cli/_ui_dist/assets/diagram-KO2AKTUF-1uGDa-Iu.js +4 -0
  67. keboola_agent_cli/_ui_dist/assets/diagram-KO2AKTUF-1uGDa-Iu.js.map +1 -0
  68. keboola_agent_cli/_ui_dist/assets/diagram-LMA3HP47-XtFH7B51.js +25 -0
  69. keboola_agent_cli/_ui_dist/assets/diagram-LMA3HP47-XtFH7B51.js.map +1 -0
  70. keboola_agent_cli/_ui_dist/assets/diagram-OG6HWLK6-B4_Te1T5.js +25 -0
  71. keboola_agent_cli/_ui_dist/assets/diagram-OG6HWLK6-B4_Te1T5.js.map +1 -0
  72. keboola_agent_cli/_ui_dist/assets/dist-Di6zmlv0.js +2 -0
  73. keboola_agent_cli/_ui_dist/assets/dist-Di6zmlv0.js.map +1 -0
  74. keboola_agent_cli/_ui_dist/assets/erDiagram-TEJ5UH35-NjQkrdFt.js +86 -0
  75. keboola_agent_cli/_ui_dist/assets/erDiagram-TEJ5UH35-NjQkrdFt.js.map +1 -0
  76. keboola_agent_cli/_ui_dist/assets/eventmodeling-FCH6USID-BrJMIks8.js +1 -0
  77. keboola_agent_cli/_ui_dist/assets/flowDiagram-I6XJVG4X-CIr8DWl7.js +163 -0
  78. keboola_agent_cli/_ui_dist/assets/flowDiagram-I6XJVG4X-CIr8DWl7.js.map +1 -0
  79. keboola_agent_cli/_ui_dist/assets/ganttDiagram-6RSMTGT7-C1VY_xbQ.js +293 -0
  80. keboola_agent_cli/_ui_dist/assets/ganttDiagram-6RSMTGT7-C1VY_xbQ.js.map +1 -0
  81. keboola_agent_cli/_ui_dist/assets/gitGraph-WXDBUCRP-COacYjo-.js +1 -0
  82. keboola_agent_cli/_ui_dist/assets/gitGraphDiagram-PVQCEYII-DQT8-kg2.js +107 -0
  83. keboola_agent_cli/_ui_dist/assets/gitGraphDiagram-PVQCEYII-DQT8-kg2.js.map +1 -0
  84. keboola_agent_cli/_ui_dist/assets/graphlib-B8gBHxth.js +2 -0
  85. keboola_agent_cli/_ui_dist/assets/graphlib-B8gBHxth.js.map +1 -0
  86. keboola_agent_cli/_ui_dist/assets/index-CMq50kkV.css +1 -0
  87. keboola_agent_cli/_ui_dist/assets/index-D8W97DAz.js +118 -0
  88. keboola_agent_cli/_ui_dist/assets/index-D8W97DAz.js.map +1 -0
  89. keboola_agent_cli/_ui_dist/assets/info-J43DQDTF-DdCTRIzU.js +1 -0
  90. keboola_agent_cli/_ui_dist/assets/infoDiagram-5YYISTIA-C77rsoTp.js +3 -0
  91. keboola_agent_cli/_ui_dist/assets/infoDiagram-5YYISTIA-C77rsoTp.js.map +1 -0
  92. keboola_agent_cli/_ui_dist/assets/init-D6jRqBbL.js +2 -0
  93. keboola_agent_cli/_ui_dist/assets/init-D6jRqBbL.js.map +1 -0
  94. keboola_agent_cli/_ui_dist/assets/ishikawaDiagram-YF4QCWOH-BcTbXaLy.js +71 -0
  95. keboola_agent_cli/_ui_dist/assets/ishikawaDiagram-YF4QCWOH-BcTbXaLy.js.map +1 -0
  96. keboola_agent_cli/_ui_dist/assets/journeyDiagram-JHISSGLW-BejeAJQ_.js +140 -0
  97. keboola_agent_cli/_ui_dist/assets/journeyDiagram-JHISSGLW-BejeAJQ_.js.map +1 -0
  98. keboola_agent_cli/_ui_dist/assets/kanban-definition-UN3LZRKU-BRNz_UrH.js +90 -0
  99. keboola_agent_cli/_ui_dist/assets/kanban-definition-UN3LZRKU-BRNz_UrH.js.map +1 -0
  100. keboola_agent_cli/_ui_dist/assets/katex-C4eR7coU.js +258 -0
  101. keboola_agent_cli/_ui_dist/assets/katex-C4eR7coU.js.map +1 -0
  102. keboola_agent_cli/_ui_dist/assets/line-CzAQKFbJ.js +2 -0
  103. keboola_agent_cli/_ui_dist/assets/line-CzAQKFbJ.js.map +1 -0
  104. keboola_agent_cli/_ui_dist/assets/linear-DUNFFdck.js +2 -0
  105. keboola_agent_cli/_ui_dist/assets/linear-DUNFFdck.js.map +1 -0
  106. keboola_agent_cli/_ui_dist/assets/mermaid-parser.core-CpuBOkFa.js +5 -0
  107. keboola_agent_cli/_ui_dist/assets/mermaid-parser.core-CpuBOkFa.js.map +1 -0
  108. keboola_agent_cli/_ui_dist/assets/mindmap-definition-RKZ34NQL-9EJQNjH0.js +97 -0
  109. keboola_agent_cli/_ui_dist/assets/mindmap-definition-RKZ34NQL-9EJQNjH0.js.map +1 -0
  110. keboola_agent_cli/_ui_dist/assets/ordinal-hYBb2elL.js +2 -0
  111. keboola_agent_cli/_ui_dist/assets/ordinal-hYBb2elL.js.map +1 -0
  112. keboola_agent_cli/_ui_dist/assets/packet-YPE3B663-DLiiw_B2.js +1 -0
  113. keboola_agent_cli/_ui_dist/assets/path-BWPyau1x.js +2 -0
  114. keboola_agent_cli/_ui_dist/assets/path-BWPyau1x.js.map +1 -0
  115. keboola_agent_cli/_ui_dist/assets/pie-LRSECV5Y-CRoO8G1g.js +1 -0
  116. keboola_agent_cli/_ui_dist/assets/pieDiagram-4H26LBE5-XH4cy6Cb.js +31 -0
  117. keboola_agent_cli/_ui_dist/assets/pieDiagram-4H26LBE5-XH4cy6Cb.js.map +1 -0
  118. keboola_agent_cli/_ui_dist/assets/quadrantDiagram-W4KKPZXB-fdhc93U8.js +8 -0
  119. keboola_agent_cli/_ui_dist/assets/quadrantDiagram-W4KKPZXB-fdhc93U8.js.map +1 -0
  120. keboola_agent_cli/_ui_dist/assets/radar-GUYGQ44K-DAlLVJHm.js +1 -0
  121. keboola_agent_cli/_ui_dist/assets/requirementDiagram-4Y6WPE33-a94eP3R9.js +85 -0
  122. keboola_agent_cli/_ui_dist/assets/requirementDiagram-4Y6WPE33-a94eP3R9.js.map +1 -0
  123. keboola_agent_cli/_ui_dist/assets/rough.esm-CSKSodPl.js +2 -0
  124. keboola_agent_cli/_ui_dist/assets/rough.esm-CSKSodPl.js.map +1 -0
  125. keboola_agent_cli/_ui_dist/assets/sankeyDiagram-5OEKKPKP-jcBa02sp.js +41 -0
  126. keboola_agent_cli/_ui_dist/assets/sankeyDiagram-5OEKKPKP-jcBa02sp.js.map +1 -0
  127. keboola_agent_cli/_ui_dist/assets/sequenceDiagram-3UESZ5HK-A5-GGM-e.js +163 -0
  128. keboola_agent_cli/_ui_dist/assets/sequenceDiagram-3UESZ5HK-A5-GGM-e.js.map +1 -0
  129. keboola_agent_cli/_ui_dist/assets/src-ZI-V_AF0.js +2 -0
  130. keboola_agent_cli/_ui_dist/assets/src-ZI-V_AF0.js.map +1 -0
  131. keboola_agent_cli/_ui_dist/assets/stateDiagram-AJRCARHV-BKAA5rqE.js +2 -0
  132. keboola_agent_cli/_ui_dist/assets/stateDiagram-AJRCARHV-BKAA5rqE.js.map +1 -0
  133. keboola_agent_cli/_ui_dist/assets/stateDiagram-v2-BHNVJYJU-DnJwJBsE.js +2 -0
  134. keboola_agent_cli/_ui_dist/assets/stateDiagram-v2-BHNVJYJU-DnJwJBsE.js.map +1 -0
  135. keboola_agent_cli/_ui_dist/assets/timeline-definition-PNZ67QCA-Cy39jp8b.js +121 -0
  136. keboola_agent_cli/_ui_dist/assets/timeline-definition-PNZ67QCA-Cy39jp8b.js.map +1 -0
  137. keboola_agent_cli/_ui_dist/assets/treeView-BLDUP644-DbLYl23-.js +1 -0
  138. keboola_agent_cli/_ui_dist/assets/treemap-LRROVOQU-Bp0eGlOt.js +1 -0
  139. keboola_agent_cli/_ui_dist/assets/vennDiagram-CIIHVFJN-BGECKubd.js +35 -0
  140. keboola_agent_cli/_ui_dist/assets/vennDiagram-CIIHVFJN-BGECKubd.js.map +1 -0
  141. keboola_agent_cli/_ui_dist/assets/wardley-L42UT6IY-D4yH4jqS.js +1 -0
  142. keboola_agent_cli/_ui_dist/assets/wardleyDiagram-YWT4CUSO-D6XRG3cZ.js +79 -0
  143. keboola_agent_cli/_ui_dist/assets/wardleyDiagram-YWT4CUSO-D6XRG3cZ.js.map +1 -0
  144. keboola_agent_cli/_ui_dist/assets/xychartDiagram-2RQKCTM6-DRre-pfZ.js +8 -0
  145. keboola_agent_cli/_ui_dist/assets/xychartDiagram-2RQKCTM6-DRre-pfZ.js.map +1 -0
  146. keboola_agent_cli/_ui_dist/index.html +50 -0
  147. keboola_agent_cli/ai_client.py +83 -0
  148. keboola_agent_cli/auto_update.py +550 -0
  149. keboola_agent_cli/changelog.py +1198 -0
  150. keboola_agent_cli/cli.py +448 -0
  151. keboola_agent_cli/client.py +3422 -0
  152. keboola_agent_cli/commands/__init__.py +0 -0
  153. keboola_agent_cli/commands/_data_app_git.py +343 -0
  154. keboola_agent_cli/commands/_helpers.py +377 -0
  155. keboola_agent_cli/commands/_metadata_input.py +49 -0
  156. keboola_agent_cli/commands/_semantic_layer_crud.py +632 -0
  157. keboola_agent_cli/commands/_semantic_layer_helpers.py +44 -0
  158. keboola_agent_cli/commands/_semantic_layer_reference_data.py +247 -0
  159. keboola_agent_cli/commands/agent.py +968 -0
  160. keboola_agent_cli/commands/branch.py +423 -0
  161. keboola_agent_cli/commands/changelog.py +168 -0
  162. keboola_agent_cli/commands/component.py +216 -0
  163. keboola_agent_cli/commands/config.py +2442 -0
  164. keboola_agent_cli/commands/context.py +1481 -0
  165. keboola_agent_cli/commands/data_app.py +1279 -0
  166. keboola_agent_cli/commands/dev_portal.py +584 -0
  167. keboola_agent_cli/commands/doctor.py +37 -0
  168. keboola_agent_cli/commands/encrypt.py +145 -0
  169. keboola_agent_cli/commands/feature.py +311 -0
  170. keboola_agent_cli/commands/flow.py +948 -0
  171. keboola_agent_cli/commands/http_client.py +157 -0
  172. keboola_agent_cli/commands/init.py +279 -0
  173. keboola_agent_cli/commands/job.py +661 -0
  174. keboola_agent_cli/commands/kai.py +301 -0
  175. keboola_agent_cli/commands/lineage.py +1464 -0
  176. keboola_agent_cli/commands/org.py +292 -0
  177. keboola_agent_cli/commands/permissions.py +360 -0
  178. keboola_agent_cli/commands/project.py +1192 -0
  179. keboola_agent_cli/commands/repl.py +243 -0
  180. keboola_agent_cli/commands/schedule.py +340 -0
  181. keboola_agent_cli/commands/search.py +178 -0
  182. keboola_agent_cli/commands/semantic_layer.py +939 -0
  183. keboola_agent_cli/commands/serve.py +272 -0
  184. keboola_agent_cli/commands/sharing.py +340 -0
  185. keboola_agent_cli/commands/storage.py +2630 -0
  186. keboola_agent_cli/commands/stream.py +266 -0
  187. keboola_agent_cli/commands/sync.py +1277 -0
  188. keboola_agent_cli/commands/tool.py +206 -0
  189. keboola_agent_cli/commands/version.py +186 -0
  190. keboola_agent_cli/commands/workspace.py +635 -0
  191. keboola_agent_cli/config_store.py +582 -0
  192. keboola_agent_cli/constants.py +528 -0
  193. keboola_agent_cli/data_science_client.py +342 -0
  194. keboola_agent_cli/dev_portal_client.py +323 -0
  195. keboola_agent_cli/errors.py +248 -0
  196. keboola_agent_cli/http_base.py +315 -0
  197. keboola_agent_cli/json_utils.py +126 -0
  198. keboola_agent_cli/lib.py +536 -0
  199. keboola_agent_cli/manage_client.py +324 -0
  200. keboola_agent_cli/metastore_client.py +214 -0
  201. keboola_agent_cli/models.py +427 -0
  202. keboola_agent_cli/output.py +1084 -0
  203. keboola_agent_cli/permissions.py +469 -0
  204. keboola_agent_cli/py.typed +3 -0
  205. keboola_agent_cli/result_models.py +271 -0
  206. keboola_agent_cli/server/__init__.py +34 -0
  207. keboola_agent_cli/server/agent_runner.py +1289 -0
  208. keboola_agent_cli/server/agents_store.py +325 -0
  209. keboola_agent_cli/server/app.py +764 -0
  210. keboola_agent_cli/server/auth.py +117 -0
  211. keboola_agent_cli/server/dependencies.py +149 -0
  212. keboola_agent_cli/server/pricing.py +303 -0
  213. keboola_agent_cli/server/routers/__init__.py +1 -0
  214. keboola_agent_cli/server/routers/agents.py +616 -0
  215. keboola_agent_cli/server/routers/ai_chat.py +129 -0
  216. keboola_agent_cli/server/routers/branches.py +133 -0
  217. keboola_agent_cli/server/routers/components.py +48 -0
  218. keboola_agent_cli/server/routers/configs.py +507 -0
  219. keboola_agent_cli/server/routers/data_apps.py +384 -0
  220. keboola_agent_cli/server/routers/dev_portal.py +67 -0
  221. keboola_agent_cli/server/routers/encrypt.py +35 -0
  222. keboola_agent_cli/server/routers/feature.py +179 -0
  223. keboola_agent_cli/server/routers/flows.py +204 -0
  224. keboola_agent_cli/server/routers/health.py +53 -0
  225. keboola_agent_cli/server/routers/jobs.py +175 -0
  226. keboola_agent_cli/server/routers/kai.py +80 -0
  227. keboola_agent_cli/server/routers/lineage.py +226 -0
  228. keboola_agent_cli/server/routers/mcp.py +70 -0
  229. keboola_agent_cli/server/routers/members.py +170 -0
  230. keboola_agent_cli/server/routers/org.py +96 -0
  231. keboola_agent_cli/server/routers/projects.py +106 -0
  232. keboola_agent_cli/server/routers/schedules.py +54 -0
  233. keboola_agent_cli/server/routers/search.py +30 -0
  234. keboola_agent_cli/server/routers/semantic_layer.py +650 -0
  235. keboola_agent_cli/server/routers/sharing.py +86 -0
  236. keboola_agent_cli/server/routers/storage.py +574 -0
  237. keboola_agent_cli/server/routers/stream.py +100 -0
  238. keboola_agent_cli/server/routers/workspaces.py +302 -0
  239. keboola_agent_cli/server/run_broadcaster.py +329 -0
  240. keboola_agent_cli/server/sse.py +25 -0
  241. keboola_agent_cli/services/__init__.py +0 -0
  242. keboola_agent_cli/services/_encryption.py +217 -0
  243. keboola_agent_cli/services/_semantic_layer_cascade.py +147 -0
  244. keboola_agent_cli/services/_semantic_layer_crud.py +382 -0
  245. keboola_agent_cli/services/_semantic_layer_internals.py +1078 -0
  246. keboola_agent_cli/services/_semantic_layer_lookup.py +181 -0
  247. keboola_agent_cli/services/_semantic_layer_reference_data.py +217 -0
  248. keboola_agent_cli/services/_sync_bindings.py +456 -0
  249. keboola_agent_cli/services/_sync_branch.py +191 -0
  250. keboola_agent_cli/services/_sync_bulk.py +228 -0
  251. keboola_agent_cli/services/_sync_clone.py +163 -0
  252. keboola_agent_cli/services/_sync_models.py +97 -0
  253. keboola_agent_cli/services/_sync_push_ops.py +369 -0
  254. keboola_agent_cli/services/_sync_storage.py +376 -0
  255. keboola_agent_cli/services/_sync_writeback.py +167 -0
  256. keboola_agent_cli/services/agent_service.py +458 -0
  257. keboola_agent_cli/services/base.py +175 -0
  258. keboola_agent_cli/services/branch_service.py +588 -0
  259. keboola_agent_cli/services/component_service.py +694 -0
  260. keboola_agent_cli/services/config_service.py +2099 -0
  261. keboola_agent_cli/services/data_app_git_service.py +224 -0
  262. keboola_agent_cli/services/data_app_service.py +2082 -0
  263. keboola_agent_cli/services/deep_lineage_service.py +1322 -0
  264. keboola_agent_cli/services/dev_portal_service.py +345 -0
  265. keboola_agent_cli/services/doctor_service.py +445 -0
  266. keboola_agent_cli/services/encrypt_service.py +87 -0
  267. keboola_agent_cli/services/feature_service.py +268 -0
  268. keboola_agent_cli/services/flow_service.py +769 -0
  269. keboola_agent_cli/services/flow_validation.py +188 -0
  270. keboola_agent_cli/services/http_forwarder_service.py +236 -0
  271. keboola_agent_cli/services/job_idempotency_store.py +285 -0
  272. keboola_agent_cli/services/job_service.py +797 -0
  273. keboola_agent_cli/services/kai_service.py +367 -0
  274. keboola_agent_cli/services/lineage_service.py +274 -0
  275. keboola_agent_cli/services/mcp_service.py +1498 -0
  276. keboola_agent_cli/services/mcp_transport.py +259 -0
  277. keboola_agent_cli/services/member_service.py +593 -0
  278. keboola_agent_cli/services/org_service.py +619 -0
  279. keboola_agent_cli/services/project_service.py +947 -0
  280. keboola_agent_cli/services/repo_validate_service.py +767 -0
  281. keboola_agent_cli/services/schedule_service.py +731 -0
  282. keboola_agent_cli/services/search_service.py +331 -0
  283. keboola_agent_cli/services/semantic_layer_service.py +1497 -0
  284. keboola_agent_cli/services/sharing_service.py +307 -0
  285. keboola_agent_cli/services/storage_service.py +2524 -0
  286. keboola_agent_cli/services/stream_service.py +395 -0
  287. keboola_agent_cli/services/sync_service.py +2244 -0
  288. keboola_agent_cli/services/variables_service.py +447 -0
  289. keboola_agent_cli/services/version_service.py +1038 -0
  290. keboola_agent_cli/services/workspace_service.py +1103 -0
  291. keboola_agent_cli/stream_client.py +217 -0
  292. keboola_agent_cli/sync/__init__.py +1 -0
  293. keboola_agent_cli/sync/branch_mapping.py +174 -0
  294. keboola_agent_cli/sync/clone.py +211 -0
  295. keboola_agent_cli/sync/code_extraction.py +655 -0
  296. keboola_agent_cli/sync/config_format.py +290 -0
  297. keboola_agent_cli/sync/diff_engine.py +566 -0
  298. keboola_agent_cli/sync/git_utils.py +93 -0
  299. keboola_agent_cli/sync/manifest.py +162 -0
  300. keboola_agent_cli/sync/naming.py +90 -0
  301. keboola_agent_cli/sync/secrets.py +62 -0
  302. keboola_agent_cli/sync/sql_split.py +134 -0
  303. keboola_cli-0.63.4.dist-info/METADATA +308 -0
  304. keboola_cli-0.63.4.dist-info/RECORD +306 -0
  305. keboola_cli-0.63.4.dist-info/WHEEL +4 -0
  306. keboola_cli-0.63.4.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1289 @@
1
+ """Execution backend for scheduled agent tasks + the cron scheduler loop.
2
+
3
+ Two action types are supported today:
4
+
5
+ - ``mcp_tool``: call a keboola-mcp-server tool via :class:`McpService`.
6
+ - ``cli_command``: spawn ``kbagent <argv>`` as a subprocess and capture stdout.
7
+
8
+ The scheduler is a single asyncio loop attached to the FastAPI lifespan
9
+ (``serve.create_app()``); it ticks once a minute, checks every enabled
10
+ task's cron expression against ``datetime.now(UTC)`` (truncated to the
11
+ minute), and dispatches due tasks via :func:`run_task_once`.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import contextlib
18
+ import json
19
+ import logging
20
+ import os
21
+ import sys
22
+ import time
23
+ from collections.abc import AsyncIterator
24
+ from datetime import UTC, datetime
25
+ from typing import Any
26
+
27
+ from croniter import croniter
28
+
29
+ from ..constants import (
30
+ ENV_ALLOW_AI_EXTRA_ARGS,
31
+ ENV_CONFIG_DIR,
32
+ ENV_KBAGENT_SERVE_TOKEN,
33
+ ENV_KBAGENT_SERVE_URL,
34
+ ENV_KBAGENT_UPSTREAM_RUN_ID,
35
+ ENV_KBAGENT_UPSTREAM_STATUS,
36
+ ENV_KBAGENT_UPSTREAM_TASK_ID,
37
+ )
38
+ from .agents_store import AgentRun, AgentStore, AgentTask
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ def _build_subprocess_env(
44
+ registry: Any,
45
+ *,
46
+ upstream_run: AgentRun | None = None,
47
+ upstream_task: AgentTask | None = None,
48
+ strip_admin_tokens: bool = False,
49
+ ) -> dict[str, str]:
50
+ """Compose the env for an AI / CLI subprocess spawned by the scheduler.
51
+
52
+ Inherits the parent's environment and overlays three keys so the child
53
+ process points back at *this* serve instead of falling back to the
54
+ global ``~/.config/keboola-agent-cli/config.json`` (which is almost
55
+ always a different set of project tokens than what the operator
56
+ configured for the running serve):
57
+
58
+ - ``KBAGENT_CONFIG_DIR`` aligns any spawned ``kbagent <cmd>`` with the
59
+ serve's config -- same projects, same storage tokens, same active
60
+ branches. Read by ``resolve_config_dir`` in ``config_store.py``.
61
+ - ``KBAGENT_SERVE_URL`` + ``KBAGENT_SERVE_TOKEN`` let the child use
62
+ ``kbagent http get/post/patch/delete`` to call the live HTTP API
63
+ directly, bypassing local config entirely. Useful for AI agents
64
+ that prefer one stateless HTTP hop over forking ``kbagent`` CLIs.
65
+
66
+ When the task was triggered as a downstream of another task's
67
+ ``trigger`` chain, three more keys are added so the subprocess can
68
+ discover its upstream context:
69
+
70
+ - ``KBAGENT_UPSTREAM_TASK_ID``
71
+ - ``KBAGENT_UPSTREAM_RUN_ID``
72
+ - ``KBAGENT_UPSTREAM_STATUS`` (``ok`` or ``error``)
73
+
74
+ Returns a fresh dict (callers can mutate without affecting parent env).
75
+
76
+ When ``strip_admin_tokens`` is set (the AI-agent paths), the manage
77
+ (super-admin) and master tokens are removed from the child env. An
78
+ autonomous AI CLI (claude/codex/gemini) never legitimately needs them --
79
+ it reaches Keboola via ``kbagent http`` (KBAGENT_SERVE_*) or by forking
80
+ ``kbagent`` against the serve's on-disk config (KBAGENT_CONFIG_DIR) -- and
81
+ handing a prompt-injectable child the highest-value credentials is the leak
82
+ fixed here (GHSA-wm54-r2hh-cxm9). Mirrors the MCP-child isolation in
83
+ ``mcp_transport._build_minimal_env`` and the manage-token default-deny. The
84
+ per-project storage token (``KBC_TOKEN``) is intentionally retained so the
85
+ child can still run headless ``--project __env__`` reads; cli_command
86
+ children keep every token -- they are ``kbagent`` itself and need them.
87
+ """
88
+ env = dict(os.environ)
89
+ if strip_admin_tokens:
90
+ for key in [k for k in env if k.startswith(("KBC_MANAGE_", "KBC_MASTER_"))]:
91
+ del env[key]
92
+ config_store = getattr(registry, "config_store", None)
93
+ if config_store is not None:
94
+ env[ENV_CONFIG_DIR] = str(config_store.config_dir)
95
+ serve_url = getattr(registry, "serve_url", None)
96
+ if serve_url:
97
+ env[ENV_KBAGENT_SERVE_URL] = serve_url
98
+ serve_token = getattr(registry, "serve_token", None)
99
+ if serve_token:
100
+ env[ENV_KBAGENT_SERVE_TOKEN] = serve_token
101
+ if upstream_task is not None and upstream_run is not None:
102
+ env[ENV_KBAGENT_UPSTREAM_TASK_ID] = upstream_task.id
103
+ env[ENV_KBAGENT_UPSTREAM_RUN_ID] = upstream_run.run_id
104
+ env[ENV_KBAGENT_UPSTREAM_STATUS] = upstream_run.status
105
+ return env
106
+
107
+
108
+ def _upstream_prompt_prefix(upstream_run: AgentRun | None, upstream_task: AgentTask | None) -> str:
109
+ """Compose a short prompt prefix announcing the upstream chain context.
110
+
111
+ Empty when no upstream — kept out-of-line so the regular prefix stays
112
+ the dominant signal for cron-driven runs.
113
+ """
114
+ if upstream_run is None or upstream_task is None:
115
+ return ""
116
+ return (
117
+ "[Upstream chain context]\n"
118
+ f"You were triggered after the upstream task '{upstream_task.name}' "
119
+ f"(id={upstream_task.id}) completed with status '{upstream_run.status}'.\n"
120
+ f"Read the full upstream output: "
121
+ f"`kbagent http get /agents/{upstream_task.id}/runs/{upstream_run.run_id}`\n"
122
+ "Env vars KBAGENT_UPSTREAM_TASK_ID + KBAGENT_UPSTREAM_RUN_ID carry the same values.\n\n"
123
+ )
124
+
125
+
126
+ # Instruction injected at the head of every ai_agent prompt. Tells the AI
127
+ # CLI it is running inside ``kbagent serve`` and how to call the live API
128
+ # instead of forking a stale ``kbagent`` CLI subprocess. Kept short so the
129
+ # user's actual prompt remains the dominant signal.
130
+ _AI_AGENT_PROMPT_PREFIX = """\
131
+ [kbagent serve runtime context]
132
+ You are running inside a `kbagent serve` instance. Two ways to query Keboola:
133
+
134
+ 1) Preferred: HTTP API of *this* serve.
135
+ - URL in env var KBAGENT_SERVE_URL
136
+ - Bearer in env var KBAGENT_SERVE_TOKEN
137
+ - Browse the OpenAPI: `kbagent http get /openapi.json`
138
+ - Example: `kbagent http get /projects`, `kbagent http get /configs?project=padak`
139
+
140
+ 2) Fallback: local CLI. KBAGENT_CONFIG_DIR is set so any `kbagent <cmd>` you
141
+ run reads the SAME config the serve uses (no stale tokens). Refresh /
142
+ manage-token operations still need a human at a terminal -- do not try
143
+ to obtain manage tokens yourself.
144
+
145
+ End of runtime context. The user's task follows:
146
+
147
+ """
148
+
149
+
150
+ def build_prompt_helper_meta_prompt(
151
+ *,
152
+ goal: str,
153
+ draft: str = "",
154
+ project: str | None = None,
155
+ ) -> str:
156
+ """Compose the meta-prompt sent to the AI CLI by the prompt-helper.
157
+
158
+ The helper's job is to take a user's plain-English goal (and an optional
159
+ half-baked draft) and produce a polished prompt that another scheduled
160
+ AI agent will execute. The output is consumed verbatim, so the meta-
161
+ prompt is engineered to make the AI emit ONLY the final prompt body --
162
+ no preamble, no code fences, no commentary.
163
+
164
+ The meta-prompt is deliberately specific about kbagent CLI commands so
165
+ the AI recommends real commands the scheduled agent can actually invoke
166
+ instead of inventing API calls.
167
+ """
168
+ project_hint = (
169
+ f"The user has pinned project '{project}'. Reference it explicitly in the prompt."
170
+ if project
171
+ else "No project is pinned in this serve; if the goal needs one, the prompt should ask for it."
172
+ )
173
+ draft_block = (
174
+ f"USER'S CURRENT DRAFT (preserve any concrete details from here):\n{draft.strip()}"
175
+ if draft.strip()
176
+ else "USER'S CURRENT DRAFT: (empty -- write the prompt from scratch.)"
177
+ )
178
+ return f"""\
179
+ You are a senior prompt engineer. Rewrite the user's request into a polished
180
+ single-shot prompt for an AI agent that will run unattended on a CRON schedule
181
+ inside `kbagent serve`. The scheduled agent has access to the kbagent CLI and
182
+ the `kbagent http` family of commands; it can query Keboola Connection
183
+ projects via the serve's REST API (env vars KBAGENT_SERVE_URL +
184
+ KBAGENT_SERVE_TOKEN) or fall back to local CLI calls (KBAGENT_CONFIG_DIR is
185
+ pre-set).
186
+
187
+ USER'S GOAL (plain English):
188
+ {goal.strip()}
189
+
190
+ {draft_block}
191
+
192
+ PROJECT CONTEXT: {project_hint}
193
+
194
+ REQUIREMENTS for the rewritten prompt:
195
+ - Imperative voice, second person ("Use ...", "Then summarize ...").
196
+ - Concrete: name the kbagent commands the agent should run. Examples of
197
+ real commands: `kbagent job list --project NAME --status error --limit 10`,
198
+ `kbagent config search --query 'snowflake'`, `kbagent doctor`,
199
+ `kbagent http get /projects`, `kbagent storage tables --project NAME`.
200
+ - Bound the scope: time window, project alias, max results, expected output
201
+ format (markdown table, JSON, top-3 list, ...).
202
+ - Be self-contained: the agent has no chat history. Restate the goal in
203
+ the prompt body.
204
+ - 80 to 250 words. No preamble, no headings, no code fences.
205
+
206
+ OUTPUT CONTRACT (critical):
207
+ - Output ONLY the rewritten prompt body. Plain text.
208
+ - Do not say "Here is the prompt:" or wrap the result in ``` fences.
209
+ - Do not include any text before or after the prompt body.
210
+ """
211
+
212
+
213
+ # Markdown artifacts the AI sometimes emits despite the OUTPUT CONTRACT.
214
+ # Stripped post-hoc so the textarea is filled with a clean prompt body.
215
+ _PROMPT_RESPONSE_PREAMBLES = (
216
+ "here is the prompt:",
217
+ "here's the prompt:",
218
+ "here is a prompt:",
219
+ "here's a prompt:",
220
+ "rewritten prompt:",
221
+ "prompt:",
222
+ )
223
+
224
+ # Same idea for the SQL helper; the AI is told to emit only SQL but routinely
225
+ # starts with "Here's the SQL:" or wraps the body in ```sql fences.
226
+ _SQL_RESPONSE_PREAMBLES = (
227
+ "here is the sql:",
228
+ "here's the sql:",
229
+ "here is a sql:",
230
+ "here's a sql:",
231
+ "here is the query:",
232
+ "here's the query:",
233
+ "sql:",
234
+ "query:",
235
+ )
236
+
237
+
238
+ def build_sql_helper_meta_prompt(
239
+ *,
240
+ goal: str,
241
+ project: str,
242
+ backend: str,
243
+ schema: str,
244
+ draft_sql: str = "",
245
+ bucket_ids: list[str] | None = None,
246
+ serve_url: str | None = None,
247
+ failed_error: str | None = None,
248
+ ) -> str:
249
+ """Compose the meta-prompt sent to the AI CLI by the workspace SQL helper.
250
+
251
+ The AI is asked to produce a single polished SQL statement (or a small
252
+ statement batch) that runs against the user's Keboola workspace. It is
253
+ explicitly instructed to discover table / column shape via
254
+ INFORMATION_SCHEMA using the kbagent CLI before guessing column names.
255
+
256
+ Backend-specific hints are folded in so claude doesn't have to "know" the
257
+ quirks: BigQuery's backticked dataset paths and per-dataset
258
+ INFORMATION_SCHEMA, Snowflake's CURRENT_SCHEMA() default, etc. The
259
+ bucket list (when supplied) gives the AI a starting catalog without
260
+ burning a tool call.
261
+
262
+ When ``failed_error`` is provided the helper switches to "fix mode": the
263
+ user just ran ``draft_sql`` and the warehouse rejected it. The prompt
264
+ pivots from "write SQL for goal" to "diagnose and fix this SQL", and the
265
+ error text is surfaced verbatim so the AI can match it against schema
266
+ discovery output (e.g. linked-bucket FQN mistakes).
267
+ """
268
+ goal_clean = goal.strip()
269
+ if failed_error and failed_error.strip():
270
+ # Fix mode: existing SQL is the input, error explains why it failed.
271
+ draft_block = (
272
+ "FAILED QUERY (this is the SQL the user just ran -- it broke):\n"
273
+ f"{draft_sql.strip() or '(query body empty — recover from the error message alone.)'}\n\n"
274
+ "WAREHOUSE ERROR:\n"
275
+ f"{failed_error.strip()}"
276
+ )
277
+ elif draft_sql.strip():
278
+ draft_block = (
279
+ f"USER'S CURRENT DRAFT (refine this, don't throw it away):\n{draft_sql.strip()}"
280
+ )
281
+ else:
282
+ draft_block = "USER'S CURRENT DRAFT: (empty -- write the query from scratch.)"
283
+ bucket_block = (
284
+ "VISIBLE BUCKETS (already loaded in the editor sidebar):\n"
285
+ + "\n".join(f" - {b}" for b in bucket_ids[:50])
286
+ if bucket_ids
287
+ else "VISIBLE BUCKETS: (none preloaded -- discover via INFORMATION_SCHEMA.)"
288
+ )
289
+ if len(bucket_ids or []) > 50:
290
+ bucket_block += f"\n ... and {len(bucket_ids or []) - 50} more (truncated)"
291
+
292
+ backend_hint = _sql_helper_backend_hint(backend, schema)
293
+ serve_hint = (
294
+ f"SERVE CONTEXT: kbagent serve is reachable at {serve_url}; the AI agent\n"
295
+ "shell has KBAGENT_SERVE_URL + KBAGENT_SERVE_TOKEN env vars pre-set, so\n"
296
+ "`kbagent http get /...` is the fastest discovery path."
297
+ if serve_url
298
+ else "SERVE CONTEXT: assume `kbagent` CLI is available on PATH."
299
+ )
300
+
301
+ return f"""\
302
+ You are a senior data engineer writing SQL for a Keboola workspace. Your
303
+ output will be pasted into the workspace SQL editor verbatim and executed
304
+ through the Keboola Query Service against project '{project}'. The Query
305
+ Service runs SELECT only -- it rejects SHOW / DESCRIBE / DDL / DML.
306
+
307
+ WORKSPACE CONTEXT:
308
+ - Project alias: {project}
309
+ - Backend: {backend}
310
+ - Default schema: {schema}
311
+
312
+ USER'S GOAL (plain English):
313
+ {goal_clean}
314
+
315
+ {draft_block}
316
+
317
+ {bucket_block}
318
+
319
+ {backend_hint}
320
+
321
+ MANDATORY FIRST STEP — resolve every bucket FQN (cross-project hazard):
322
+ - A workspace mounts ONLY the project's own database / dataset. Linked
323
+ buckets (shared from another project) live in a DIFFERENT Snowflake
324
+ database OR a different GCP project on BigQuery. Writing the obvious
325
+ `"in.c-foo"."table"` against a linked bucket WILL fail with
326
+ "Schema 'KBC_USE4_<workspace_project>.\"in.c-foo\"' does not exist".
327
+ - BEFORE you write a single line of SQL, for EVERY bucket in the VISIBLE
328
+ BUCKETS list above that you intend to reference, run:
329
+ kbagent storage bucket-detail --project {project} --bucket-id <id>
330
+ Read the `sql_path` field on the table you need and use it VERBATIM.
331
+ The path is already correctly quoted for the bucket's backend, e.g.
332
+ Snowflake linked → `"KBC_USE4_340"."out.c-out_bamboohr"."employee_snapshot"`.
333
+ - This step is NOT optional even if you "already know the columns" from
334
+ `table-detail` — `table-detail` gives you column names, NOT the
335
+ correct database. Skipping `bucket-detail` is the single most common
336
+ cause of broken queries in this helper. Always run it. It is cheap
337
+ and idempotent.
338
+ - Workflow order, no exceptions:
339
+ 1. `bucket-detail` for each bucket you reference → record `sql_path`.
340
+ 2. `table-detail` (or an INFORMATION_SCHEMA query) for column names.
341
+ 3. ONLY THEN write the SQL, using the recorded `sql_path` values.
342
+
343
+ COLUMN DISCOVERY (after step 1 above):
344
+ - `kbagent workspace query --project {project} --workspace-id <id> --sql '...'`
345
+ with an INFORMATION_SCHEMA query confirms table + column names exist.
346
+ - Alternative: `kbagent storage table-detail --project {project} --table-id <id>`
347
+ returns the full column list for a Storage table without a query roundtrip.
348
+ {serve_hint}
349
+
350
+ REQUIREMENTS for the returned SQL:
351
+ - Match the user's goal precisely; do not invent columns.
352
+ - Be a single SELECT statement (or a tiny CTE batch) -- nothing destructive.
353
+ - Qualify tables explicitly when joining across buckets so the result is
354
+ unambiguous after the workspace is reused.
355
+ - Add a brief 1-line `-- comment` at the top describing what the query
356
+ returns (purpose + key filters), but no other prose.
357
+
358
+ OUTPUT CONTRACT (critical):
359
+ - Output ONLY the SQL. Plain text.
360
+ - Do NOT wrap the SQL in ```sql fences.
361
+ - Do NOT prefix with "Here's the SQL:" / "Rewritten query:" / similar.
362
+ - Do NOT append commentary after the SQL.
363
+ - Do NOT emit "★ Insight ───" blocks, decorative separators, reasoning
364
+ bullets, or any explanatory commentary BEFORE the SQL. The user's
365
+ workspace editor pastes your entire response verbatim — any prose
366
+ before the SELECT statement breaks the parse. If a user has set the
367
+ `explanatory` Claude output style globally, you must override it here
368
+ and emit raw SQL only.
369
+ """
370
+
371
+
372
+ def _sql_helper_backend_hint(backend: str, schema: str) -> str:
373
+ """Emit backend-specific INFORMATION_SCHEMA recipes for the meta-prompt.
374
+
375
+ Keboola Workspaces run on three backends; each has different table-catalog
376
+ surface area, so the meta-prompt embeds the exact INFORMATION_SCHEMA query
377
+ the AI should run for discovery. Without this hint claude routinely
378
+ invents Snowflake-style queries when the workspace is BigQuery.
379
+ """
380
+ backend_lc = (backend or "").lower()
381
+ if backend_lc == "bigquery":
382
+ return (
383
+ "BACKEND HINT (BigQuery):\n"
384
+ f"- Workspace schema is the dataset `{schema}`.\n"
385
+ f"- Backtick-quote dataset + table names: `\\`{schema}\\`.\\`<table>\\``.\n"
386
+ f"- Discovery: SELECT table_name FROM `{schema}.INFORMATION_SCHEMA.TABLES`;\n"
387
+ f"- Columns: SELECT column_name, data_type FROM "
388
+ f"`{schema}.INFORMATION_SCHEMA.COLUMNS` WHERE table_name='<table>';"
389
+ )
390
+ if backend_lc == "snowflake":
391
+ return (
392
+ "BACKEND HINT (Snowflake):\n"
393
+ f"- Workspace default schema is `{schema}`. Identifiers are case-sensitive\n"
394
+ f' when quoted; Keboola Storage tables are quoted ("my-table").\n'
395
+ "- CRITICAL QUOTING RULE: Snowflake uppercases ANY unquoted identifier.\n"
396
+ " Keboola column / table / alias names are lowercase, so EVERY\n"
397
+ " identifier you emit MUST be wrapped in double quotes — including\n"
398
+ ' column aliases (`AS "month"` not `AS month`), table aliases\n'
399
+ ' (`AS "s"` not `AS s`), and CTE names. Otherwise the result CSV\n'
400
+ " comes back with MONTH / EMPLOYEE / EMPLOYEE_COUNT column headers\n"
401
+ " instead of the lowercase names the user (and downstream tools)\n"
402
+ " expect.\n"
403
+ "- Discovery: SELECT TABLE_NAME, ROW_COUNT FROM INFORMATION_SCHEMA.TABLES\n"
404
+ " WHERE TABLE_SCHEMA = CURRENT_SCHEMA();\n"
405
+ "- Columns: SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS\n"
406
+ " WHERE TABLE_SCHEMA = CURRENT_SCHEMA() AND TABLE_NAME = '<table>';"
407
+ )
408
+ # Unknown / future backend: stay generic so the AI still has a starting point.
409
+ return (
410
+ f"BACKEND HINT ({backend or 'unknown'}):\n"
411
+ f"- Workspace default schema is `{schema}`.\n"
412
+ "- Discovery: query INFORMATION_SCHEMA.TABLES / COLUMNS following the\n"
413
+ " backend's conventions (Snowflake = CURRENT_SCHEMA(), BigQuery = dataset\n"
414
+ " path, Postgres = current_schema())."
415
+ )
416
+
417
+
418
+ def clean_sql_helper_response(text: str) -> str:
419
+ """Strip code fences, preambles, and claude jsonl duplication from SQL output.
420
+
421
+ Mirrors :func:`clean_prompt_helper_response` step-for-step but uses the
422
+ SQL-specific preamble list. Two distinct cleaners (instead of a unified
423
+ one with a knob) makes the call sites self-documenting and lets future
424
+ SQL/prompt divergence land without entangling.
425
+ """
426
+ text = text.strip()
427
+ # Step 1: collapse "AB" where A == B (claude jsonl duplication).
428
+ if text and len(text) % 2 == 0:
429
+ half = len(text) // 2
430
+ if text[:half] == text[half:]:
431
+ text = text[:half].rstrip()
432
+ # Step 2: strip a single set of leading/trailing code fences. Accept
433
+ # ```sql or ``` -- the AI uses both interchangeably.
434
+ if text.startswith("```"):
435
+ nl = text.find("\n")
436
+ if nl != -1:
437
+ text = text[nl + 1 :]
438
+ if text.endswith("```"):
439
+ text = text[:-3]
440
+ text = text.strip()
441
+ # Step 3: strip a preamble like "Here's the SQL:\n\n..." on the first line.
442
+ lines = text.split("\n", 1)
443
+ first = lines[0].strip().lower()
444
+ if any(first == p or first.startswith(p) for p in _SQL_RESPONSE_PREAMBLES):
445
+ text = lines[1].strip() if len(lines) > 1 else ""
446
+
447
+ # Step 4: locate where the actual SQL starts and drop everything before.
448
+ # Defends against:
449
+ # - Czech/English commentary blocks ("★ Insight ───") that claude with
450
+ # the explanatory output style emits before the SQL,
451
+ # - Multi-paragraph reasoning the AI sometimes adds despite the
452
+ # OUTPUT CONTRACT,
453
+ # - Loose "Note: " / "Reasoning: " preambles.
454
+ # Heuristic: find the first non-empty line that either (a) starts with a
455
+ # SQL keyword or (b) is a `-- ` comment AND a subsequent line within the
456
+ # next ~20 lines starts with a SQL keyword. The two-stage rule lets a
457
+ # leading SQL header comment ("-- Monthly headcount: ...") survive while
458
+ # rejecting `-- claude's chatter` that never resolves into actual SQL.
459
+ text = _strip_pre_sql_chatter(text)
460
+ return text.strip()
461
+
462
+
463
+ # SQL statement keywords that can legally start a Keboola Query Service
464
+ # submission (it's read-only, so DML / DDL would fail anyway -- listing
465
+ # them keeps the extractor permissive enough not to false-negative if a
466
+ # user explicitly asks for one).
467
+ _SQL_KEYWORDS = (
468
+ "select",
469
+ "with",
470
+ "show",
471
+ "describe",
472
+ "desc ",
473
+ "explain",
474
+ "insert",
475
+ "update",
476
+ "delete",
477
+ "merge",
478
+ "create",
479
+ "alter",
480
+ "drop",
481
+ "truncate",
482
+ "use ",
483
+ )
484
+
485
+
486
+ def _strip_pre_sql_chatter(text: str) -> str:
487
+ """Drop prose before the first real SQL line.
488
+
489
+ Strategy: scan top-down for the first non-empty line that is either an
490
+ SQL keyword or a SQL comment immediately followed by SQL. Everything
491
+ before is preamble (Insight blocks, commentary, decorative separators).
492
+
493
+ Returns the original text untouched when no SQL keyword is detectable
494
+ in the first ~50 non-empty lines -- the helper would rather pass through
495
+ garbage than silently truncate a valid response that happens to look
496
+ unusual. Callers display the cleaned text to the user, who will spot
497
+ the issue.
498
+ """
499
+ lines = text.split("\n")
500
+
501
+ def is_sql_keyword(line: str) -> bool:
502
+ return any(line.lstrip().lower().startswith(kw) for kw in _SQL_KEYWORDS)
503
+
504
+ # Single-pass: walk non-empty lines, watching for either a direct SQL
505
+ # keyword OR a `-- comment` that is followed (within 20 lines) by SQL.
506
+ for i, line in enumerate(lines):
507
+ stripped = line.strip()
508
+ if not stripped:
509
+ continue
510
+ if is_sql_keyword(stripped):
511
+ return "\n".join(lines[i:]).strip()
512
+ if stripped.startswith("--") and not stripped.startswith("---"):
513
+ # Header SQL comment? Peek ahead for SQL within 20 lines.
514
+ window = lines[i + 1 : i + 21]
515
+ if any(is_sql_keyword(later.strip()) for later in window if later.strip()):
516
+ return "\n".join(lines[i:]).strip()
517
+ # Standalone `-- comment` with no SQL after -- it's prose; keep
518
+ # scanning past it instead of clipping here.
519
+ continue
520
+ # Not a SQL line and not a SQL header comment -- preamble. Keep
521
+ # scanning; the SQL is likely below.
522
+ continue
523
+
524
+ # Fallback: no SQL keyword found anywhere. Return the text as-is so
525
+ # the user can at least see what the AI sent.
526
+ return text
527
+
528
+
529
+ def clean_prompt_helper_response(text: str) -> str:
530
+ """Trim surrounding code fences, preambles, and dedup the response.
531
+
532
+ Three independent cleanups, applied in order:
533
+
534
+ 1. **Deduplication.** ``stream_ai_agent_events`` accumulates both
535
+ claude's incremental ``assistant`` turns AND the final ``result``
536
+ event into ``response``. Claude often emits the same body in both
537
+ (assistant streams it; result repeats the whole thing). For a
538
+ prompt-helper task -- a single non-tool turn -- the result is
539
+ effectively duplicated. If the string is exactly two equal halves,
540
+ collapse to one.
541
+ 2. **Code-fence strip** (``` / ```text / ```md).
542
+ 3. **Preamble strip** (``Here is the prompt:`` / ``Rewritten prompt:`` ...).
543
+ """
544
+ text = text.strip()
545
+ # Step 1: collapse "AB" where A == B (claude jsonl duplication).
546
+ if text and len(text) % 2 == 0:
547
+ half = len(text) // 2
548
+ if text[:half] == text[half:]:
549
+ text = text[:half].rstrip()
550
+ # Step 2: strip a single set of leading/trailing code fences.
551
+ if text.startswith("```"):
552
+ nl = text.find("\n")
553
+ if nl != -1:
554
+ text = text[nl + 1 :]
555
+ if text.endswith("```"):
556
+ text = text[:-3]
557
+ text = text.strip()
558
+ # Step 3: strip a preamble like "Here is the prompt:\n\n..." on the first line.
559
+ lines = text.split("\n", 1)
560
+ first = lines[0].strip().lower()
561
+ if any(first == p or first.startswith(p) for p in _PROMPT_RESPONSE_PREAMBLES):
562
+ text = lines[1].strip() if len(lines) > 1 else ""
563
+ return text.strip()
564
+
565
+
566
+ def build_local_ai_meta_prompt(
567
+ *,
568
+ message: str,
569
+ project: str | None = None,
570
+ branch_id: int | None = None,
571
+ serve_url: str | None = None,
572
+ ) -> str:
573
+ """Compose the meta-prompt for the dashboard Local AI chat (#300).
574
+
575
+ This is the most generic of the three helper meta-prompts in this
576
+ module: it does NOT pin an output shape (unlike SQL helper which
577
+ must emit raw SQL) and does NOT pin a task shape (unlike the agent
578
+ prompt helper which rewrites a draft). It simply tells the AI:
579
+ "you are running inside kbagent serve, you have the kbagent CLI on
580
+ PATH, here is what the user wants — answer it."
581
+
582
+ The user's local Claude / codex / gemini install handles markdown
583
+ rendering on the UI side, so the prompt encourages markdown output
584
+ rather than the spartan output contract that the SQL / prompt
585
+ helpers enforce.
586
+
587
+ The kbagent-skill content (workflow knowledge, gotchas, command
588
+ reference) is NOT inlined verbatim — it is ~70 KB of documentation
589
+ that would balloon every chat round trip. Instead the AI is told
590
+ to run ``kbagent context`` to load the full documentation on demand,
591
+ mirroring how Claude Code's plugin loader bootstraps the skill.
592
+ """
593
+ message_clean = message.strip()
594
+ project_block = (
595
+ f"- Active project: {project!r} (use `--project {project}` on `kbagent` "
596
+ "commands; multi-project commands also accept multiple `--project` flags)"
597
+ if project
598
+ else "- Active project: (none — multi-project mode. Ask the user to "
599
+ "pick one if a single-project answer is required, or use explicit "
600
+ "`--project NAME` flags / `kbagent project list` to discover)"
601
+ )
602
+ branch_block = (
603
+ f"- Active branch: #{branch_id} (use `--branch {branch_id}` where supported)"
604
+ if branch_id
605
+ else "- Active branch: main (production)"
606
+ )
607
+ serve_block = (
608
+ f"- `kbagent http get|post /...` reaches the running serve at {serve_url}. "
609
+ "Env vars `KBAGENT_SERVE_URL` + `KBAGENT_SERVE_TOKEN` are pre-set, so "
610
+ "this is the fastest path for read queries against the live API."
611
+ if serve_url
612
+ else "- `kbagent http get|post /...` reaches the running serve when "
613
+ "`KBAGENT_SERVE_URL` + `KBAGENT_SERVE_TOKEN` are set (which they are "
614
+ "inside this subprocess)."
615
+ )
616
+ return f"""\
617
+ You are a Keboola data engineer's AI co-pilot, running inside
618
+ `kbagent serve`. The user types questions in a chat box on the dashboard
619
+ and you answer them by running real `kbagent` commands and summarising
620
+ the results — no guessing, no fabrication.
621
+
622
+ TOOLS AVAILABLE:
623
+ - `kbagent` CLI is on PATH and pre-configured for the user's workspace
624
+ (same `config.json` the serve uses; same Keboola projects).
625
+ - Run `kbagent context` FIRST when you need to discover the full command
626
+ inventory or workflow knowledge. It dumps the kbagent skill (commands,
627
+ gotchas, workflows) into your context on demand — designed for AI
628
+ consumption.
629
+ - Add `--json` to ANY command for machine-parseable output (every
630
+ `kbagent` command supports it).
631
+ {serve_block}
632
+
633
+ USER CONTEXT:
634
+ {project_block}
635
+ {branch_block}
636
+
637
+ USER'S MESSAGE:
638
+ {message_clean}
639
+
640
+ HOW TO ANSWER:
641
+ - If the question is concrete ("list failed jobs", "show config X"),
642
+ run the relevant `kbagent` command, parse the result, and answer.
643
+ - If the question is open-ended ("what should I clean up?"), discover
644
+ first (run a relevant `--json` command, scan the result), then
645
+ summarise with specific findings.
646
+ - Cross-project work is a first-class flag: most commands accept
647
+ multiple `--project NAME` flags. Don't artificially constrain to a
648
+ single project unless the question is single-project.
649
+
650
+ OUTPUT FORMAT:
651
+ - Markdown. Use code blocks for SQL / commands you ran or recommend.
652
+ - Tables when comparing multiple projects / configs / rows.
653
+ - Be concrete: cite specific IDs, project aliases, timestamps. Avoid
654
+ vague "you might want to..." — say what to run and what to expect.
655
+ - If you cannot answer (Kai-required feature, missing token, blocked
656
+ by permissions), say so explicitly and name the missing piece.
657
+ """
658
+
659
+
660
+ def _now_utc() -> datetime:
661
+ return datetime.now(UTC).replace(microsecond=0)
662
+
663
+
664
+ def compute_next_run(cron: str, after: datetime | None = None) -> str | None:
665
+ """Return ISO timestamp of the next cron firing after ``after`` (default now)."""
666
+ try:
667
+ base = after or _now_utc()
668
+ nxt = croniter(cron, base).get_next(datetime)
669
+ if nxt.tzinfo is None:
670
+ nxt = nxt.replace(tzinfo=UTC)
671
+ return nxt.isoformat()
672
+ except Exception as exc:
673
+ logger.warning("Invalid cron %r: %s", cron, exc)
674
+ return None
675
+
676
+
677
+ def is_due(cron: str, last_run: datetime | None, now: datetime) -> bool:
678
+ """Has this task's cron crossed since ``last_run`` (or since 1 minute ago)?
679
+
680
+ The scheduler ticks every minute; we look at the previous firing and
681
+ say "due" iff that firing is strictly after the last successful run.
682
+ """
683
+ try:
684
+ prev_iter = croniter(cron, now)
685
+ prev = prev_iter.get_prev(datetime)
686
+ if prev.tzinfo is None:
687
+ prev = prev.replace(tzinfo=UTC)
688
+ except Exception:
689
+ return False
690
+ if last_run is None:
691
+ return True
692
+ return prev > last_run
693
+
694
+
695
+ async def _run_mcp_tool(registry: Any, params: dict[str, Any]) -> dict[str, Any]:
696
+ """Dispatch an mcp_tool action via the McpService."""
697
+ tool = params.get("tool")
698
+ if not tool:
699
+ raise ValueError("mcp_tool action requires a 'tool' name in params")
700
+ project = params.get("project")
701
+ branch_id = params.get("branch_id")
702
+ tool_input = params.get("input") or {}
703
+ return await asyncio.to_thread(
704
+ registry.mcp.validate_and_call_tool,
705
+ tool_name=str(tool),
706
+ tool_input=tool_input,
707
+ alias=project,
708
+ branch_id=branch_id,
709
+ )
710
+
711
+
712
+ async def _run_cli(
713
+ registry: Any,
714
+ params: dict[str, Any],
715
+ *,
716
+ upstream_run: AgentRun | None = None,
717
+ upstream_task: AgentTask | None = None,
718
+ ) -> dict[str, Any]:
719
+ """Dispatch a cli_command action via subprocess."""
720
+ argv_param = params.get("argv")
721
+ if not isinstance(argv_param, list) or not argv_param:
722
+ raise ValueError("cli_command action requires non-empty 'argv' list")
723
+ argv = [str(a) for a in argv_param]
724
+ if argv[0] != "kbagent":
725
+ argv = ["kbagent", *argv]
726
+ proc = await asyncio.create_subprocess_exec(
727
+ *argv,
728
+ stdout=asyncio.subprocess.PIPE,
729
+ stderr=asyncio.subprocess.PIPE,
730
+ env=_build_subprocess_env(registry, upstream_run=upstream_run, upstream_task=upstream_task),
731
+ )
732
+ timeout = float(params.get("timeout", 300.0))
733
+ try:
734
+ stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
735
+ except TimeoutError:
736
+ proc.kill()
737
+ raise RuntimeError(f"CLI command timed out after {timeout}s") from None
738
+ return {
739
+ "argv": argv,
740
+ "exit_code": proc.returncode,
741
+ "stdout": stdout.decode("utf-8", errors="replace"),
742
+ "stderr": stderr.decode("utf-8", errors="replace"),
743
+ }
744
+
745
+
746
+ def _resolve_ai_extra_args(params: dict[str, Any]) -> list[str]:
747
+ """Resolve an ai_agent task's ``extra_args``, gated behind an explicit opt-in.
748
+
749
+ ``extra_args`` are passed verbatim to the underlying AI CLI, so they can
750
+ carry rail-disabling flags (permission-skip / unrestricted-execution) that
751
+ turn a contained headless agent into arbitrary host command execution
752
+ (GHSA-777j-6p95-qv3m). They are therefore IGNORED unless the serve operator
753
+ explicitly opts in via ``KBAGENT_ALLOW_AI_EXTRA_ARGS`` -- mirroring the
754
+ ``--allow-env-manage-token`` opt-in for the manage token. When opted out,
755
+ any supplied args are dropped with a loud warning so the drop stays visible
756
+ to an operator who expected them to take effect.
757
+ """
758
+ raw = params.get("extra_args") or []
759
+ if not isinstance(raw, list):
760
+ raise ValueError("ai_agent.extra_args must be a list of strings")
761
+ extra_args = [str(a) for a in raw]
762
+ if not extra_args:
763
+ return []
764
+ flag = os.environ.get(ENV_ALLOW_AI_EXTRA_ARGS, "").strip().lower()
765
+ if flag not in ("1", "true", "yes", "on"):
766
+ logger.warning(
767
+ "Ignoring %d ai_agent extra_args because %s is not set in the kbagent "
768
+ "environment (serve, or this local `agent` run). extra_args are passed "
769
+ "verbatim to the AI CLI and can disable its safety rails; set %s=1 to "
770
+ "honor them.",
771
+ len(extra_args),
772
+ ENV_ALLOW_AI_EXTRA_ARGS,
773
+ ENV_ALLOW_AI_EXTRA_ARGS,
774
+ )
775
+ return []
776
+ return extra_args
777
+
778
+
779
+ # Per-CLI launcher recipes for "single prompt, no interaction" mode.
780
+ _AI_CLI_RECIPES: dict[str, Any] = {
781
+ # Anthropic Claude Code: -p PROMPT runs in headless / non-interactive mode.
782
+ "claude": lambda prompt, extra: ["claude", "-p", prompt, *extra],
783
+ # OpenAI Codex CLI: `codex exec PROMPT` runs once and exits.
784
+ # `--skip-git-repo-check` is mandatory for headless invocation: codex 0.131+
785
+ # refuses to run in any directory it has not been interactively "trusted"
786
+ # via the first-run dialog, which a subprocess never sees. Without the flag
787
+ # the CLI exits 1 with "Not inside a trusted directory" before reading the
788
+ # prompt -- that is the failure path that surfaced as "AI chat failed" in
789
+ # the Local AI dashboard tile.
790
+ "codex": lambda prompt, extra: ["codex", "exec", "--skip-git-repo-check", *extra, prompt],
791
+ # Google Gemini CLI: `gemini -p PROMPT` for non-interactive single prompt.
792
+ "gemini": lambda prompt, extra: ["gemini", "-p", prompt, *extra],
793
+ }
794
+
795
+
796
+ async def _run_ai_agent(
797
+ registry: Any,
798
+ params: dict[str, Any],
799
+ *,
800
+ upstream_run: AgentRun | None = None,
801
+ upstream_task: AgentTask | None = None,
802
+ ) -> dict[str, Any]:
803
+ """Dispatch an ai_agent action via an AI CLI (claude / codex / gemini).
804
+
805
+ Spawns the chosen CLI once with the prompt, captures stdout (the AI
806
+ response), exits. Use this for "summarize my error jobs at midnight"
807
+ style autonomous agents -- the assistant can use its own tools (web
808
+ search, MCP, file ops) to satisfy the prompt.
809
+
810
+ The user's prompt is wrapped with a small runtime-context preamble
811
+ (KBAGENT_SERVE_URL / KBAGENT_SERVE_TOKEN / KBAGENT_CONFIG_DIR env vars
812
+ plus a `kbagent http` usage hint) so the AI knows it can talk to *this*
813
+ serve over HTTP and that any `kbagent` CLI calls will see the serve's
814
+ config -- not the global one.
815
+ """
816
+ cli_name = str(params.get("cli", "")).lower()
817
+ if cli_name not in _AI_CLI_RECIPES:
818
+ raise ValueError(f"ai_agent.cli must be one of {sorted(_AI_CLI_RECIPES)}, got {cli_name!r}")
819
+ prompt = params.get("prompt")
820
+ if not isinstance(prompt, str) or not prompt.strip():
821
+ raise ValueError("ai_agent action requires a non-empty 'prompt'")
822
+ extra_args = _resolve_ai_extra_args(params)
823
+ timeout = float(params.get("timeout", 600.0))
824
+
825
+ wrapped_prompt = (
826
+ _upstream_prompt_prefix(upstream_run, upstream_task) + _AI_AGENT_PROMPT_PREFIX + prompt
827
+ )
828
+ argv = _AI_CLI_RECIPES[cli_name](wrapped_prompt, extra_args)
829
+ proc = await asyncio.create_subprocess_exec(
830
+ *argv,
831
+ stdout=asyncio.subprocess.PIPE,
832
+ stderr=asyncio.subprocess.PIPE,
833
+ stdin=asyncio.subprocess.DEVNULL,
834
+ env=_build_subprocess_env(
835
+ registry,
836
+ upstream_run=upstream_run,
837
+ upstream_task=upstream_task,
838
+ strip_admin_tokens=True,
839
+ ),
840
+ )
841
+ try:
842
+ stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
843
+ except TimeoutError:
844
+ proc.kill()
845
+ raise RuntimeError(f"AI CLI '{cli_name}' timed out after {timeout}s") from None
846
+ return {
847
+ "cli": cli_name,
848
+ "argv": argv,
849
+ "prompt_preview": prompt[:200],
850
+ "exit_code": proc.returncode,
851
+ "response": stdout.decode("utf-8", errors="replace"),
852
+ "stderr": stderr.decode("utf-8", errors="replace"),
853
+ }
854
+
855
+
856
+ # Per-CLI streaming recipes. Only claude supports a structured JSONL stream
857
+ # today (`--output-format=stream-json --verbose`); codex and gemini fall back
858
+ # to the unstructured plain-text path. The recipe builder returns argv, and
859
+ # whether the resulting subprocess emits JSONL (so the consumer knows whether
860
+ # to JSON-parse each line or treat it as raw text).
861
+ _AI_CLI_STREAM_RECIPES: dict[str, Any] = {
862
+ # `--verbose` is REQUIRED by claude for `--output-format=stream-json`;
863
+ # without it claude refuses and prints an error to stderr. Note: stream-json
864
+ # is line-buffered by claude itself, so we don't need to disable Python
865
+ # / Node stdout buffering on our side.
866
+ "claude": lambda prompt, extra: (
867
+ [
868
+ "claude",
869
+ "-p",
870
+ prompt,
871
+ "--output-format=stream-json",
872
+ "--verbose",
873
+ *extra,
874
+ ],
875
+ True, # jsonl
876
+ ),
877
+ # See _AI_CLI_RECIPES above for the `--skip-git-repo-check` rationale.
878
+ "codex": lambda prompt, extra: (
879
+ ["codex", "exec", "--skip-git-repo-check", *extra, prompt],
880
+ False,
881
+ ),
882
+ "gemini": lambda prompt, extra: (["gemini", "-p", prompt, *extra], False),
883
+ }
884
+
885
+
886
+ async def stream_ai_agent_events(
887
+ registry: Any,
888
+ params: dict[str, Any],
889
+ *,
890
+ upstream_run: AgentRun | None = None,
891
+ upstream_task: AgentTask | None = None,
892
+ ) -> AsyncIterator[dict[str, Any]]:
893
+ """Spawn an AI CLI and yield events as they are emitted, live.
894
+
895
+ Each yielded value is a dict shaped like ``{"event": <name>, "data": ...}``,
896
+ drop-in for an SSE serializer:
897
+
898
+ - ``init`` -- one-shot at the start. Contains ``cli``, ``argv``,
899
+ ``jsonl`` flag, ``started_at``, ``prompt_preview``.
900
+ - ``stdout`` -- one per line of stdout. For ``cli == "claude"`` the
901
+ ``data`` is the parsed JSONL object (raw line preserved under
902
+ ``data.raw``). For codex/gemini ``data`` is just the raw line.
903
+ - ``stderr`` -- one per line of stderr (each AI CLI writes its
904
+ progress notes there too; rarely empty).
905
+ - ``done`` -- one-shot at the end. Contains ``exit_code``,
906
+ ``elapsed_seconds``, ``ended_at``, ``status``, plus the accumulated
907
+ ``response_text`` (text content joined across all ``assistant``
908
+ turn events -- so callers don't have to re-walk the stream).
909
+
910
+ Cancellation: the caller can stop iterating; the subprocess is killed
911
+ via try/finally. Timeout-induced kills emit a final ``done`` with
912
+ ``status="error"`` and ``error="timeout"``.
913
+ """
914
+ cli_name = str(params.get("cli", "")).lower()
915
+ if cli_name not in _AI_CLI_STREAM_RECIPES:
916
+ raise ValueError(
917
+ f"ai_agent.cli must be one of {sorted(_AI_CLI_STREAM_RECIPES)}, got {cli_name!r}"
918
+ )
919
+ prompt = params.get("prompt")
920
+ if not isinstance(prompt, str) or not prompt.strip():
921
+ raise ValueError("ai_agent action requires a non-empty 'prompt'")
922
+ extra_args = _resolve_ai_extra_args(params)
923
+ timeout = float(params.get("timeout", 600.0))
924
+
925
+ wrapped_prompt = (
926
+ _upstream_prompt_prefix(upstream_run, upstream_task) + _AI_AGENT_PROMPT_PREFIX + prompt
927
+ )
928
+ argv, jsonl = _AI_CLI_STREAM_RECIPES[cli_name](wrapped_prompt, extra_args)
929
+ started_monotonic = time.monotonic()
930
+ started_at = _now_utc().isoformat()
931
+
932
+ yield {
933
+ "event": "init",
934
+ "data": {
935
+ "cli": cli_name,
936
+ "argv": argv,
937
+ "jsonl": jsonl,
938
+ "started_at": started_at,
939
+ "prompt_preview": prompt[:200],
940
+ },
941
+ }
942
+
943
+ proc = await asyncio.create_subprocess_exec(
944
+ *argv,
945
+ stdout=asyncio.subprocess.PIPE,
946
+ stderr=asyncio.subprocess.PIPE,
947
+ stdin=asyncio.subprocess.DEVNULL,
948
+ env=_build_subprocess_env(
949
+ registry,
950
+ upstream_run=upstream_run,
951
+ upstream_task=upstream_task,
952
+ strip_admin_tokens=True,
953
+ ),
954
+ )
955
+
956
+ # Walk stdout + stderr concurrently. Each side gets its own consumer
957
+ # coroutine that pushes items into a shared queue; the generator
958
+ # awaits the queue and yields events as they arrive. This is the
959
+ # idiomatic asyncio fan-in -- avoids the trap of `async for line in
960
+ # proc.stdout` blocking stderr until EOF (which would defeat the
961
+ # whole point of "show me what's happening live").
962
+ queue: asyncio.Queue[tuple[str, dict[str, Any]] | None] = asyncio.Queue()
963
+ response_chunks: list[str] = []
964
+ stderr_chunks: list[str] = []
965
+
966
+ async def _consume(stream: asyncio.StreamReader, kind: str) -> None:
967
+ while True:
968
+ raw = await stream.readline()
969
+ if not raw:
970
+ break
971
+ line = raw.decode("utf-8", errors="replace").rstrip("\n")
972
+ if kind == "stdout" and jsonl:
973
+ try:
974
+ parsed = json.loads(line)
975
+ except json.JSONDecodeError:
976
+ await queue.put(("stdout", {"raw": line}))
977
+ continue
978
+ # Best-effort extract of assistant text so the final
979
+ # `done` event carries a `response_text` field. claude's
980
+ # stream-json shapes assistant turns as
981
+ # ``{"type":"assistant","message":{"content":[{"type":"text","text":"..."}]}}``
982
+ # and the final result as ``{"type":"result","result":"..."}``.
983
+ if isinstance(parsed, dict):
984
+ if parsed.get("type") == "assistant":
985
+ content = parsed.get("message", {}).get("content", [])
986
+ for block in content if isinstance(content, list) else []:
987
+ if (
988
+ isinstance(block, dict)
989
+ and block.get("type") == "text"
990
+ and isinstance(block.get("text"), str)
991
+ ):
992
+ response_chunks.append(block["text"])
993
+ elif parsed.get("type") == "result" and isinstance(parsed.get("result"), str):
994
+ response_chunks.append(parsed["result"])
995
+ await queue.put(
996
+ ("stdout", parsed if isinstance(parsed, dict) else {"value": parsed})
997
+ )
998
+ else:
999
+ if kind == "stderr":
1000
+ stderr_chunks.append(line)
1001
+ await queue.put((kind, {"raw": line}))
1002
+
1003
+ assert proc.stdout is not None
1004
+ assert proc.stderr is not None
1005
+ stdout_task = asyncio.create_task(_consume(proc.stdout, "stdout"))
1006
+ stderr_task = asyncio.create_task(_consume(proc.stderr, "stderr"))
1007
+
1008
+ async def _wait_and_signal() -> None:
1009
+ await asyncio.gather(stdout_task, stderr_task)
1010
+ await proc.wait()
1011
+ await queue.put(None)
1012
+
1013
+ wait_task = asyncio.create_task(_wait_and_signal())
1014
+
1015
+ deadline = started_monotonic + timeout
1016
+ timed_out = False
1017
+ try:
1018
+ while True:
1019
+ remaining = deadline - time.monotonic()
1020
+ if remaining <= 0:
1021
+ timed_out = True
1022
+ break
1023
+ try:
1024
+ item = await asyncio.wait_for(queue.get(), timeout=remaining)
1025
+ except TimeoutError:
1026
+ timed_out = True
1027
+ break
1028
+ if item is None:
1029
+ break
1030
+ kind, payload = item
1031
+ yield {"event": kind, "data": payload}
1032
+ finally:
1033
+ if timed_out and proc.returncode is None:
1034
+ proc.kill()
1035
+ with contextlib.suppress(TimeoutError):
1036
+ await asyncio.wait_for(proc.wait(), timeout=2.0)
1037
+ for t in (stdout_task, stderr_task, wait_task):
1038
+ if not t.done():
1039
+ t.cancel()
1040
+ # Drain any remaining queue items already produced before kill.
1041
+ while not queue.empty():
1042
+ item = queue.get_nowait()
1043
+ if item is None:
1044
+ continue
1045
+ kind, payload = item
1046
+ yield {"event": kind, "data": payload}
1047
+
1048
+ elapsed = round(time.monotonic() - started_monotonic, 2)
1049
+ status = "error" if (timed_out or proc.returncode not in (0, None)) else "ok"
1050
+ final: dict[str, Any] = {
1051
+ "cli": cli_name,
1052
+ "argv": argv,
1053
+ "exit_code": proc.returncode,
1054
+ "elapsed_seconds": elapsed,
1055
+ "ended_at": _now_utc().isoformat(),
1056
+ "status": status,
1057
+ "response": "".join(response_chunks),
1058
+ "stderr": "\n".join(stderr_chunks),
1059
+ }
1060
+ if timed_out:
1061
+ final["error"] = f"AI CLI '{cli_name}' timed out after {timeout}s"
1062
+ elif status == "error":
1063
+ # Non-timeout failure (CLI exited with non-zero rc). Surface a
1064
+ # human-readable error so the UI does not fall back to a generic
1065
+ # "AI chat failed" placeholder -- the stderr tail almost always
1066
+ # explains the failure (e.g. codex "Not inside a trusted directory",
1067
+ # claude auth errors, network blips). Cap tail length so a chatty
1068
+ # CLI cannot blow up the SSE frame.
1069
+ tail_lines = [s for s in stderr_chunks if s.strip()][-8:]
1070
+ tail = "\n".join(tail_lines)[-800:].strip()
1071
+ final["error"] = f"AI CLI '{cli_name}' exited with code {proc.returncode}." + (
1072
+ f"\nstderr (tail):\n{tail}" if tail else ""
1073
+ )
1074
+ yield {"event": "done", "data": final}
1075
+
1076
+
1077
+ def _trigger_should_fire(trigger_on: str, run_status: str) -> bool:
1078
+ """Match a Trigger.on filter against the upstream run status.
1079
+
1080
+ Pulled out so tests can assert the policy in isolation; also makes
1081
+ the fan-out site at the bottom of ``run_task_once`` legible.
1082
+ """
1083
+ if trigger_on == "always":
1084
+ return True
1085
+ if trigger_on == "success" and run_status == "ok":
1086
+ return True
1087
+ return trigger_on == "error" and run_status == "error"
1088
+
1089
+
1090
+ async def run_task_once(
1091
+ task: AgentTask,
1092
+ registry: Any,
1093
+ store: AgentStore,
1094
+ *,
1095
+ upstream_run: AgentRun | None = None,
1096
+ upstream_task: AgentTask | None = None,
1097
+ ) -> AgentRun:
1098
+ """Execute one task and append a run record.
1099
+
1100
+ For ai_agent runs we now drive the streaming generator and capture
1101
+ every emitted event so the persisted run carries:
1102
+ - the full timeline (saved to ``agent_runs/<task_id>/<run_id>.jsonl``)
1103
+ - a precomputed summary (model, tokens, cost, tool calls)
1104
+
1105
+ This unifies cron-driven runs with UI-driven runs (RunBroadcaster):
1106
+ both produce the same persisted shape, and the detail drawer can
1107
+ replay either using the same /events endpoint. cli_command and
1108
+ mcp_tool runs still use the one-shot path; their structured output
1109
+ fits in the ``output`` field directly.
1110
+
1111
+ When ``upstream_run`` + ``upstream_task`` are supplied (the run was
1112
+ triggered as a chained downstream), the subprocess receives extra
1113
+ ``KBAGENT_UPSTREAM_*`` env vars and the ai_agent prompt is prefixed
1114
+ with a hint explaining where to fetch the upstream output.
1115
+
1116
+ After persist, if ``task.trigger`` is set and its ``on`` filter
1117
+ matches this run's status, the downstream task runs synchronously
1118
+ with this run threaded through as its upstream context. The chain
1119
+ is awaited because each downstream's persist depends on the
1120
+ upstream's persist already being on disk.
1121
+ """
1122
+ started = _now_utc()
1123
+ run = AgentRun(task_id=task.id, started_at=started.isoformat())
1124
+ captured_events: list[dict[str, Any]] = []
1125
+ try:
1126
+ if task.action.type == "mcp_tool":
1127
+ # mcp_tool runs in-process via McpService, no env vars to
1128
+ # propagate. The upstream payload, when relevant, can still
1129
+ # be read from store by a follow-up ai_agent task.
1130
+ output = await _run_mcp_tool(registry, task.action.params)
1131
+ run.status = "ok"
1132
+ run.output = output if isinstance(output, dict) else {"value": output}
1133
+ elif task.action.type == "cli_command":
1134
+ output = await _run_cli(
1135
+ registry,
1136
+ task.action.params,
1137
+ upstream_run=upstream_run,
1138
+ upstream_task=upstream_task,
1139
+ )
1140
+ run.status = "ok"
1141
+ run.output = output if isinstance(output, dict) else {"value": output}
1142
+ elif task.action.type == "ai_agent":
1143
+ # Stream events so we can persist the full timeline. The final
1144
+ # ``done`` event carries the same payload the legacy
1145
+ # _run_ai_agent built, so callers reading ``run.output`` see
1146
+ # an identical shape.
1147
+ done_payload: dict[str, Any] | None = None
1148
+ async for evt in stream_ai_agent_events(
1149
+ registry,
1150
+ task.action.params,
1151
+ upstream_run=upstream_run,
1152
+ upstream_task=upstream_task,
1153
+ ):
1154
+ captured_events.append(evt)
1155
+ if evt["event"] == "done":
1156
+ done_payload = evt["data"]
1157
+ if done_payload is None:
1158
+ # Stream ended without a done frame -- treat as error so
1159
+ # the UI flags it; the captured events still get persisted
1160
+ # so an operator can see what claude was up to.
1161
+ run.status = "error"
1162
+ run.error = "ai_agent stream ended without a done event"
1163
+ else:
1164
+ run.status = done_payload.get("status", "ok")
1165
+ run.output = done_payload
1166
+ if done_payload.get("error"):
1167
+ run.error = done_payload["error"]
1168
+ else:
1169
+ raise ValueError(f"Unknown action type: {task.action.type}")
1170
+ except Exception as exc:
1171
+ logger.exception("Agent task %s failed", task.id)
1172
+ run.status = "error"
1173
+ run.error = str(exc)
1174
+ finally:
1175
+ run.ended_at = _now_utc().isoformat()
1176
+ # Persist the timeline + compute summary BEFORE appending the run
1177
+ # row so events_path/summary land on the same JSONL line.
1178
+ from .pricing import build_run_summary
1179
+
1180
+ try:
1181
+ if task.action.type == "ai_agent" and captured_events:
1182
+ run.summary = build_run_summary(captured_events)
1183
+ run.events_path = store.append_events(task.id, run.run_id, captured_events)
1184
+ except Exception:
1185
+ logger.exception("Failed to persist event timeline for %s/%s", task.id, run.run_id)
1186
+ store.append_run(run)
1187
+ # Update last_run / next_run on the PERSISTED task. We refetch from
1188
+ # the store rather than mutating the in-memory `task` because
1189
+ # callers (router /run with runtime_input) may pass a model-copy
1190
+ # of the original with merged action params — upserting that ghost
1191
+ # would clobber the saved action on disk. Refetch is cheap (single
1192
+ # JSON file read) and keeps the persisted record clean.
1193
+ persisted = store.get_task(task.id)
1194
+ if persisted is not None:
1195
+ persisted.last_run_at = run.started_at
1196
+ persisted.next_run_at = None if persisted.manual else compute_next_run(persisted.cron)
1197
+ store.upsert_task(persisted)
1198
+
1199
+ # Fan-out a chained downstream AFTER persist, so any HTTP-read the
1200
+ # downstream does (`kbagent http get /agents/<id>/runs/<run_id>`)
1201
+ # sees the upstream output already on disk. Disabled downstreams
1202
+ # are skipped silently — disabling is the operator's "off switch"
1203
+ # for the chain.
1204
+ if task.trigger and _trigger_should_fire(task.trigger.on, run.status):
1205
+ downstream = store.get_task(task.trigger.task_id)
1206
+ if downstream is None:
1207
+ logger.warning(
1208
+ "Chain target task %s (from %s) not found; skipping fan-out",
1209
+ task.trigger.task_id,
1210
+ task.id,
1211
+ )
1212
+ elif not downstream.enabled:
1213
+ logger.info(
1214
+ "Chain target %s disabled; skipping fan-out from %s",
1215
+ downstream.id,
1216
+ task.id,
1217
+ )
1218
+ else:
1219
+ logger.info(
1220
+ "Chain: %s -> %s (on=%s, status=%s)",
1221
+ task.name,
1222
+ downstream.name,
1223
+ task.trigger.on,
1224
+ run.status,
1225
+ )
1226
+ try:
1227
+ await run_task_once(
1228
+ downstream,
1229
+ registry,
1230
+ store,
1231
+ upstream_run=run,
1232
+ upstream_task=task,
1233
+ )
1234
+ except Exception:
1235
+ # Swallow downstream errors so the upstream's run record
1236
+ # stays "ok". The downstream's own run record captures
1237
+ # its failure; we don't want a bad downstream to retro-
1238
+ # flip the upstream's status.
1239
+ logger.exception(
1240
+ "Chain downstream %s failed (upstream %s already persisted)",
1241
+ downstream.id,
1242
+ task.id,
1243
+ )
1244
+
1245
+ return run
1246
+
1247
+
1248
+ async def scheduler_loop(store: AgentStore, registry: Any, *, tick_seconds: int = 60) -> None:
1249
+ """Run forever: every tick, dispatch due tasks."""
1250
+ logger.info("Agent scheduler started (tick=%ss)", tick_seconds)
1251
+ # Hold strong references to in-flight task coroutines so they don't get
1252
+ # GC'd mid-flight (RUF006). We discard them via a done-callback.
1253
+ in_flight: set[asyncio.Task[None]] = set()
1254
+ while True:
1255
+ try:
1256
+ now = _now_utc()
1257
+ for task in store.load_tasks():
1258
+ if not task.enabled:
1259
+ continue
1260
+ if task.manual:
1261
+ # Manual tasks only run via POST /agents/{id}/run or as a
1262
+ # chained downstream — cron is preserved on the record
1263
+ # but the scheduler ignores it.
1264
+ continue
1265
+ last = datetime.fromisoformat(task.last_run_at) if task.last_run_at else None
1266
+ if not is_due(task.cron, last, now):
1267
+ continue
1268
+ logger.info("Dispatching agent task: %s (%s)", task.name, task.id)
1269
+ fut = asyncio.create_task(_safe_run(task, registry, store))
1270
+ in_flight.add(fut)
1271
+ fut.add_done_callback(in_flight.discard)
1272
+ except Exception as exc:
1273
+ logger.exception("Scheduler tick error: %s", exc)
1274
+ try:
1275
+ await asyncio.sleep(tick_seconds)
1276
+ except asyncio.CancelledError:
1277
+ logger.info("Agent scheduler stopping")
1278
+ return
1279
+
1280
+
1281
+ async def _safe_run(task: AgentTask, registry: Any, store: AgentStore) -> None:
1282
+ try:
1283
+ await run_task_once(task, registry, store)
1284
+ except Exception:
1285
+ logger.exception("Background task execution failed for %s", task.id)
1286
+
1287
+
1288
+ def stdin_isatty() -> bool:
1289
+ return bool(getattr(sys.stdin, "isatty", lambda: False)())