keboola-cli 0.63.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. keboola_agent_cli/__init__.py +34 -0
  2. keboola_agent_cli/__main__.py +5 -0
  3. keboola_agent_cli/_ui_dist/assets/arc-DhFYIddx.js +2 -0
  4. keboola_agent_cli/_ui_dist/assets/arc-DhFYIddx.js.map +1 -0
  5. keboola_agent_cli/_ui_dist/assets/architecture-7EHR7CIX-hNCijx_H.js +1 -0
  6. keboola_agent_cli/_ui_dist/assets/architectureDiagram-3BPJPVTR-C6hUlprM.js +37 -0
  7. keboola_agent_cli/_ui_dist/assets/architectureDiagram-3BPJPVTR-C6hUlprM.js.map +1 -0
  8. keboola_agent_cli/_ui_dist/assets/array-BifhSqXX.js +2 -0
  9. keboola_agent_cli/_ui_dist/assets/array-BifhSqXX.js.map +1 -0
  10. keboola_agent_cli/_ui_dist/assets/blockDiagram-GPEHLZMM-DC7qY9i4.js +133 -0
  11. keboola_agent_cli/_ui_dist/assets/blockDiagram-GPEHLZMM-DC7qY9i4.js.map +1 -0
  12. keboola_agent_cli/_ui_dist/assets/c4Diagram-AAUBKEIU-5Lh44evt.js +11 -0
  13. keboola_agent_cli/_ui_dist/assets/c4Diagram-AAUBKEIU-5Lh44evt.js.map +1 -0
  14. keboola_agent_cli/_ui_dist/assets/channel-DBMrXlxx.js +2 -0
  15. keboola_agent_cli/_ui_dist/assets/channel-DBMrXlxx.js.map +1 -0
  16. keboola_agent_cli/_ui_dist/assets/chunk-2J33WTMH-Coy82EBh.js +2 -0
  17. keboola_agent_cli/_ui_dist/assets/chunk-2J33WTMH-Coy82EBh.js.map +1 -0
  18. keboola_agent_cli/_ui_dist/assets/chunk-3OPIFGDE-BQC5CRHI.js +63 -0
  19. keboola_agent_cli/_ui_dist/assets/chunk-3OPIFGDE-BQC5CRHI.js.map +1 -0
  20. keboola_agent_cli/_ui_dist/assets/chunk-4BX2VUAB-DUuEt70o.js +2 -0
  21. keboola_agent_cli/_ui_dist/assets/chunk-4BX2VUAB-DUuEt70o.js.map +1 -0
  22. keboola_agent_cli/_ui_dist/assets/chunk-55IACEB6-BvR-6chF.js +2 -0
  23. keboola_agent_cli/_ui_dist/assets/chunk-55IACEB6-BvR-6chF.js.map +1 -0
  24. keboola_agent_cli/_ui_dist/assets/chunk-5ZQYHXKU-BjcTN7ul.js +3 -0
  25. keboola_agent_cli/_ui_dist/assets/chunk-5ZQYHXKU-BjcTN7ul.js.map +1 -0
  26. keboola_agent_cli/_ui_dist/assets/chunk-727SXJPM-C0zxqqRN.js +207 -0
  27. keboola_agent_cli/_ui_dist/assets/chunk-727SXJPM-C0zxqqRN.js.map +1 -0
  28. keboola_agent_cli/_ui_dist/assets/chunk-AQP2D5EJ-CXf7rIlZ.js +232 -0
  29. keboola_agent_cli/_ui_dist/assets/chunk-AQP2D5EJ-CXf7rIlZ.js.map +1 -0
  30. keboola_agent_cli/_ui_dist/assets/chunk-BSJP7CBP-Oj_FO9Q7.js +2 -0
  31. keboola_agent_cli/_ui_dist/assets/chunk-BSJP7CBP-Oj_FO9Q7.js.map +1 -0
  32. keboola_agent_cli/_ui_dist/assets/chunk-CSCIHK7Q-CcTsLrFc.js +124 -0
  33. keboola_agent_cli/_ui_dist/assets/chunk-CSCIHK7Q-CcTsLrFc.js.map +1 -0
  34. keboola_agent_cli/_ui_dist/assets/chunk-FMBD7UC4-FH-zLkkW.js +16 -0
  35. keboola_agent_cli/_ui_dist/assets/chunk-FMBD7UC4-FH-zLkkW.js.map +1 -0
  36. keboola_agent_cli/_ui_dist/assets/chunk-L5ZTLDWV-B1Ky_e7O.js +2 -0
  37. keboola_agent_cli/_ui_dist/assets/chunk-L5ZTLDWV-B1Ky_e7O.js.map +1 -0
  38. keboola_agent_cli/_ui_dist/assets/chunk-ND2GUHAM-BHz1rpbm.js +2 -0
  39. keboola_agent_cli/_ui_dist/assets/chunk-ND2GUHAM-BHz1rpbm.js.map +1 -0
  40. keboola_agent_cli/_ui_dist/assets/chunk-NNHCCRGN-DlpIbxXb.js +160 -0
  41. keboola_agent_cli/_ui_dist/assets/chunk-NNHCCRGN-DlpIbxXb.js.map +1 -0
  42. keboola_agent_cli/_ui_dist/assets/chunk-NZK2D7GU-tnrSoegS.js +2 -0
  43. keboola_agent_cli/_ui_dist/assets/chunk-NZK2D7GU-tnrSoegS.js.map +1 -0
  44. keboola_agent_cli/_ui_dist/assets/chunk-O5CBEL6O-DxxqDH0l.js +71 -0
  45. keboola_agent_cli/_ui_dist/assets/chunk-O5CBEL6O-DxxqDH0l.js.map +1 -0
  46. keboola_agent_cli/_ui_dist/assets/chunk-QZHKN3VN-CSjc2gjj.js +2 -0
  47. keboola_agent_cli/_ui_dist/assets/chunk-QZHKN3VN-CSjc2gjj.js.map +1 -0
  48. keboola_agent_cli/_ui_dist/assets/classDiagram-4FO5ZUOK-BuZcZu85.js +2 -0
  49. keboola_agent_cli/_ui_dist/assets/classDiagram-4FO5ZUOK-BuZcZu85.js.map +1 -0
  50. keboola_agent_cli/_ui_dist/assets/classDiagram-v2-Q7XG4LA2-BuZcZu85.js +2 -0
  51. keboola_agent_cli/_ui_dist/assets/classDiagram-v2-Q7XG4LA2-BuZcZu85.js.map +1 -0
  52. keboola_agent_cli/_ui_dist/assets/cose-bilkent-S5V4N54A-Y0L8LDMa.js +2 -0
  53. keboola_agent_cli/_ui_dist/assets/cose-bilkent-S5V4N54A-Y0L8LDMa.js.map +1 -0
  54. keboola_agent_cli/_ui_dist/assets/cytoscape.esm-C8YCVR3_.js +322 -0
  55. keboola_agent_cli/_ui_dist/assets/cytoscape.esm-C8YCVR3_.js.map +1 -0
  56. keboola_agent_cli/_ui_dist/assets/dagre-BM42HDAG-UZ-9BTqF.js +5 -0
  57. keboola_agent_cli/_ui_dist/assets/dagre-BM42HDAG-UZ-9BTqF.js.map +1 -0
  58. keboola_agent_cli/_ui_dist/assets/dagre-Bx709z4p.js +2 -0
  59. keboola_agent_cli/_ui_dist/assets/dagre-Bx709z4p.js.map +1 -0
  60. keboola_agent_cli/_ui_dist/assets/defaultLocale-C8Fc0cco.js +2 -0
  61. keboola_agent_cli/_ui_dist/assets/defaultLocale-C8Fc0cco.js.map +1 -0
  62. keboola_agent_cli/_ui_dist/assets/diagram-2AECGRRQ-DoDQ60wi.js +44 -0
  63. keboola_agent_cli/_ui_dist/assets/diagram-2AECGRRQ-DoDQ60wi.js.map +1 -0
  64. keboola_agent_cli/_ui_dist/assets/diagram-5GNKFQAL-CMGFxpUs.js +11 -0
  65. keboola_agent_cli/_ui_dist/assets/diagram-5GNKFQAL-CMGFxpUs.js.map +1 -0
  66. keboola_agent_cli/_ui_dist/assets/diagram-KO2AKTUF-1uGDa-Iu.js +4 -0
  67. keboola_agent_cli/_ui_dist/assets/diagram-KO2AKTUF-1uGDa-Iu.js.map +1 -0
  68. keboola_agent_cli/_ui_dist/assets/diagram-LMA3HP47-XtFH7B51.js +25 -0
  69. keboola_agent_cli/_ui_dist/assets/diagram-LMA3HP47-XtFH7B51.js.map +1 -0
  70. keboola_agent_cli/_ui_dist/assets/diagram-OG6HWLK6-B4_Te1T5.js +25 -0
  71. keboola_agent_cli/_ui_dist/assets/diagram-OG6HWLK6-B4_Te1T5.js.map +1 -0
  72. keboola_agent_cli/_ui_dist/assets/dist-Di6zmlv0.js +2 -0
  73. keboola_agent_cli/_ui_dist/assets/dist-Di6zmlv0.js.map +1 -0
  74. keboola_agent_cli/_ui_dist/assets/erDiagram-TEJ5UH35-NjQkrdFt.js +86 -0
  75. keboola_agent_cli/_ui_dist/assets/erDiagram-TEJ5UH35-NjQkrdFt.js.map +1 -0
  76. keboola_agent_cli/_ui_dist/assets/eventmodeling-FCH6USID-BrJMIks8.js +1 -0
  77. keboola_agent_cli/_ui_dist/assets/flowDiagram-I6XJVG4X-CIr8DWl7.js +163 -0
  78. keboola_agent_cli/_ui_dist/assets/flowDiagram-I6XJVG4X-CIr8DWl7.js.map +1 -0
  79. keboola_agent_cli/_ui_dist/assets/ganttDiagram-6RSMTGT7-C1VY_xbQ.js +293 -0
  80. keboola_agent_cli/_ui_dist/assets/ganttDiagram-6RSMTGT7-C1VY_xbQ.js.map +1 -0
  81. keboola_agent_cli/_ui_dist/assets/gitGraph-WXDBUCRP-COacYjo-.js +1 -0
  82. keboola_agent_cli/_ui_dist/assets/gitGraphDiagram-PVQCEYII-DQT8-kg2.js +107 -0
  83. keboola_agent_cli/_ui_dist/assets/gitGraphDiagram-PVQCEYII-DQT8-kg2.js.map +1 -0
  84. keboola_agent_cli/_ui_dist/assets/graphlib-B8gBHxth.js +2 -0
  85. keboola_agent_cli/_ui_dist/assets/graphlib-B8gBHxth.js.map +1 -0
  86. keboola_agent_cli/_ui_dist/assets/index-CMq50kkV.css +1 -0
  87. keboola_agent_cli/_ui_dist/assets/index-D8W97DAz.js +118 -0
  88. keboola_agent_cli/_ui_dist/assets/index-D8W97DAz.js.map +1 -0
  89. keboola_agent_cli/_ui_dist/assets/info-J43DQDTF-DdCTRIzU.js +1 -0
  90. keboola_agent_cli/_ui_dist/assets/infoDiagram-5YYISTIA-C77rsoTp.js +3 -0
  91. keboola_agent_cli/_ui_dist/assets/infoDiagram-5YYISTIA-C77rsoTp.js.map +1 -0
  92. keboola_agent_cli/_ui_dist/assets/init-D6jRqBbL.js +2 -0
  93. keboola_agent_cli/_ui_dist/assets/init-D6jRqBbL.js.map +1 -0
  94. keboola_agent_cli/_ui_dist/assets/ishikawaDiagram-YF4QCWOH-BcTbXaLy.js +71 -0
  95. keboola_agent_cli/_ui_dist/assets/ishikawaDiagram-YF4QCWOH-BcTbXaLy.js.map +1 -0
  96. keboola_agent_cli/_ui_dist/assets/journeyDiagram-JHISSGLW-BejeAJQ_.js +140 -0
  97. keboola_agent_cli/_ui_dist/assets/journeyDiagram-JHISSGLW-BejeAJQ_.js.map +1 -0
  98. keboola_agent_cli/_ui_dist/assets/kanban-definition-UN3LZRKU-BRNz_UrH.js +90 -0
  99. keboola_agent_cli/_ui_dist/assets/kanban-definition-UN3LZRKU-BRNz_UrH.js.map +1 -0
  100. keboola_agent_cli/_ui_dist/assets/katex-C4eR7coU.js +258 -0
  101. keboola_agent_cli/_ui_dist/assets/katex-C4eR7coU.js.map +1 -0
  102. keboola_agent_cli/_ui_dist/assets/line-CzAQKFbJ.js +2 -0
  103. keboola_agent_cli/_ui_dist/assets/line-CzAQKFbJ.js.map +1 -0
  104. keboola_agent_cli/_ui_dist/assets/linear-DUNFFdck.js +2 -0
  105. keboola_agent_cli/_ui_dist/assets/linear-DUNFFdck.js.map +1 -0
  106. keboola_agent_cli/_ui_dist/assets/mermaid-parser.core-CpuBOkFa.js +5 -0
  107. keboola_agent_cli/_ui_dist/assets/mermaid-parser.core-CpuBOkFa.js.map +1 -0
  108. keboola_agent_cli/_ui_dist/assets/mindmap-definition-RKZ34NQL-9EJQNjH0.js +97 -0
  109. keboola_agent_cli/_ui_dist/assets/mindmap-definition-RKZ34NQL-9EJQNjH0.js.map +1 -0
  110. keboola_agent_cli/_ui_dist/assets/ordinal-hYBb2elL.js +2 -0
  111. keboola_agent_cli/_ui_dist/assets/ordinal-hYBb2elL.js.map +1 -0
  112. keboola_agent_cli/_ui_dist/assets/packet-YPE3B663-DLiiw_B2.js +1 -0
  113. keboola_agent_cli/_ui_dist/assets/path-BWPyau1x.js +2 -0
  114. keboola_agent_cli/_ui_dist/assets/path-BWPyau1x.js.map +1 -0
  115. keboola_agent_cli/_ui_dist/assets/pie-LRSECV5Y-CRoO8G1g.js +1 -0
  116. keboola_agent_cli/_ui_dist/assets/pieDiagram-4H26LBE5-XH4cy6Cb.js +31 -0
  117. keboola_agent_cli/_ui_dist/assets/pieDiagram-4H26LBE5-XH4cy6Cb.js.map +1 -0
  118. keboola_agent_cli/_ui_dist/assets/quadrantDiagram-W4KKPZXB-fdhc93U8.js +8 -0
  119. keboola_agent_cli/_ui_dist/assets/quadrantDiagram-W4KKPZXB-fdhc93U8.js.map +1 -0
  120. keboola_agent_cli/_ui_dist/assets/radar-GUYGQ44K-DAlLVJHm.js +1 -0
  121. keboola_agent_cli/_ui_dist/assets/requirementDiagram-4Y6WPE33-a94eP3R9.js +85 -0
  122. keboola_agent_cli/_ui_dist/assets/requirementDiagram-4Y6WPE33-a94eP3R9.js.map +1 -0
  123. keboola_agent_cli/_ui_dist/assets/rough.esm-CSKSodPl.js +2 -0
  124. keboola_agent_cli/_ui_dist/assets/rough.esm-CSKSodPl.js.map +1 -0
  125. keboola_agent_cli/_ui_dist/assets/sankeyDiagram-5OEKKPKP-jcBa02sp.js +41 -0
  126. keboola_agent_cli/_ui_dist/assets/sankeyDiagram-5OEKKPKP-jcBa02sp.js.map +1 -0
  127. keboola_agent_cli/_ui_dist/assets/sequenceDiagram-3UESZ5HK-A5-GGM-e.js +163 -0
  128. keboola_agent_cli/_ui_dist/assets/sequenceDiagram-3UESZ5HK-A5-GGM-e.js.map +1 -0
  129. keboola_agent_cli/_ui_dist/assets/src-ZI-V_AF0.js +2 -0
  130. keboola_agent_cli/_ui_dist/assets/src-ZI-V_AF0.js.map +1 -0
  131. keboola_agent_cli/_ui_dist/assets/stateDiagram-AJRCARHV-BKAA5rqE.js +2 -0
  132. keboola_agent_cli/_ui_dist/assets/stateDiagram-AJRCARHV-BKAA5rqE.js.map +1 -0
  133. keboola_agent_cli/_ui_dist/assets/stateDiagram-v2-BHNVJYJU-DnJwJBsE.js +2 -0
  134. keboola_agent_cli/_ui_dist/assets/stateDiagram-v2-BHNVJYJU-DnJwJBsE.js.map +1 -0
  135. keboola_agent_cli/_ui_dist/assets/timeline-definition-PNZ67QCA-Cy39jp8b.js +121 -0
  136. keboola_agent_cli/_ui_dist/assets/timeline-definition-PNZ67QCA-Cy39jp8b.js.map +1 -0
  137. keboola_agent_cli/_ui_dist/assets/treeView-BLDUP644-DbLYl23-.js +1 -0
  138. keboola_agent_cli/_ui_dist/assets/treemap-LRROVOQU-Bp0eGlOt.js +1 -0
  139. keboola_agent_cli/_ui_dist/assets/vennDiagram-CIIHVFJN-BGECKubd.js +35 -0
  140. keboola_agent_cli/_ui_dist/assets/vennDiagram-CIIHVFJN-BGECKubd.js.map +1 -0
  141. keboola_agent_cli/_ui_dist/assets/wardley-L42UT6IY-D4yH4jqS.js +1 -0
  142. keboola_agent_cli/_ui_dist/assets/wardleyDiagram-YWT4CUSO-D6XRG3cZ.js +79 -0
  143. keboola_agent_cli/_ui_dist/assets/wardleyDiagram-YWT4CUSO-D6XRG3cZ.js.map +1 -0
  144. keboola_agent_cli/_ui_dist/assets/xychartDiagram-2RQKCTM6-DRre-pfZ.js +8 -0
  145. keboola_agent_cli/_ui_dist/assets/xychartDiagram-2RQKCTM6-DRre-pfZ.js.map +1 -0
  146. keboola_agent_cli/_ui_dist/index.html +50 -0
  147. keboola_agent_cli/ai_client.py +83 -0
  148. keboola_agent_cli/auto_update.py +550 -0
  149. keboola_agent_cli/changelog.py +1198 -0
  150. keboola_agent_cli/cli.py +448 -0
  151. keboola_agent_cli/client.py +3422 -0
  152. keboola_agent_cli/commands/__init__.py +0 -0
  153. keboola_agent_cli/commands/_data_app_git.py +343 -0
  154. keboola_agent_cli/commands/_helpers.py +377 -0
  155. keboola_agent_cli/commands/_metadata_input.py +49 -0
  156. keboola_agent_cli/commands/_semantic_layer_crud.py +632 -0
  157. keboola_agent_cli/commands/_semantic_layer_helpers.py +44 -0
  158. keboola_agent_cli/commands/_semantic_layer_reference_data.py +247 -0
  159. keboola_agent_cli/commands/agent.py +968 -0
  160. keboola_agent_cli/commands/branch.py +423 -0
  161. keboola_agent_cli/commands/changelog.py +168 -0
  162. keboola_agent_cli/commands/component.py +216 -0
  163. keboola_agent_cli/commands/config.py +2442 -0
  164. keboola_agent_cli/commands/context.py +1481 -0
  165. keboola_agent_cli/commands/data_app.py +1279 -0
  166. keboola_agent_cli/commands/dev_portal.py +584 -0
  167. keboola_agent_cli/commands/doctor.py +37 -0
  168. keboola_agent_cli/commands/encrypt.py +145 -0
  169. keboola_agent_cli/commands/feature.py +311 -0
  170. keboola_agent_cli/commands/flow.py +948 -0
  171. keboola_agent_cli/commands/http_client.py +157 -0
  172. keboola_agent_cli/commands/init.py +279 -0
  173. keboola_agent_cli/commands/job.py +661 -0
  174. keboola_agent_cli/commands/kai.py +301 -0
  175. keboola_agent_cli/commands/lineage.py +1464 -0
  176. keboola_agent_cli/commands/org.py +292 -0
  177. keboola_agent_cli/commands/permissions.py +360 -0
  178. keboola_agent_cli/commands/project.py +1192 -0
  179. keboola_agent_cli/commands/repl.py +243 -0
  180. keboola_agent_cli/commands/schedule.py +340 -0
  181. keboola_agent_cli/commands/search.py +178 -0
  182. keboola_agent_cli/commands/semantic_layer.py +939 -0
  183. keboola_agent_cli/commands/serve.py +272 -0
  184. keboola_agent_cli/commands/sharing.py +340 -0
  185. keboola_agent_cli/commands/storage.py +2630 -0
  186. keboola_agent_cli/commands/stream.py +266 -0
  187. keboola_agent_cli/commands/sync.py +1277 -0
  188. keboola_agent_cli/commands/tool.py +206 -0
  189. keboola_agent_cli/commands/version.py +186 -0
  190. keboola_agent_cli/commands/workspace.py +635 -0
  191. keboola_agent_cli/config_store.py +582 -0
  192. keboola_agent_cli/constants.py +528 -0
  193. keboola_agent_cli/data_science_client.py +342 -0
  194. keboola_agent_cli/dev_portal_client.py +323 -0
  195. keboola_agent_cli/errors.py +248 -0
  196. keboola_agent_cli/http_base.py +315 -0
  197. keboola_agent_cli/json_utils.py +126 -0
  198. keboola_agent_cli/lib.py +536 -0
  199. keboola_agent_cli/manage_client.py +324 -0
  200. keboola_agent_cli/metastore_client.py +214 -0
  201. keboola_agent_cli/models.py +427 -0
  202. keboola_agent_cli/output.py +1084 -0
  203. keboola_agent_cli/permissions.py +469 -0
  204. keboola_agent_cli/py.typed +3 -0
  205. keboola_agent_cli/result_models.py +271 -0
  206. keboola_agent_cli/server/__init__.py +34 -0
  207. keboola_agent_cli/server/agent_runner.py +1289 -0
  208. keboola_agent_cli/server/agents_store.py +325 -0
  209. keboola_agent_cli/server/app.py +764 -0
  210. keboola_agent_cli/server/auth.py +117 -0
  211. keboola_agent_cli/server/dependencies.py +149 -0
  212. keboola_agent_cli/server/pricing.py +303 -0
  213. keboola_agent_cli/server/routers/__init__.py +1 -0
  214. keboola_agent_cli/server/routers/agents.py +616 -0
  215. keboola_agent_cli/server/routers/ai_chat.py +129 -0
  216. keboola_agent_cli/server/routers/branches.py +133 -0
  217. keboola_agent_cli/server/routers/components.py +48 -0
  218. keboola_agent_cli/server/routers/configs.py +507 -0
  219. keboola_agent_cli/server/routers/data_apps.py +384 -0
  220. keboola_agent_cli/server/routers/dev_portal.py +67 -0
  221. keboola_agent_cli/server/routers/encrypt.py +35 -0
  222. keboola_agent_cli/server/routers/feature.py +179 -0
  223. keboola_agent_cli/server/routers/flows.py +204 -0
  224. keboola_agent_cli/server/routers/health.py +53 -0
  225. keboola_agent_cli/server/routers/jobs.py +175 -0
  226. keboola_agent_cli/server/routers/kai.py +80 -0
  227. keboola_agent_cli/server/routers/lineage.py +226 -0
  228. keboola_agent_cli/server/routers/mcp.py +70 -0
  229. keboola_agent_cli/server/routers/members.py +170 -0
  230. keboola_agent_cli/server/routers/org.py +96 -0
  231. keboola_agent_cli/server/routers/projects.py +106 -0
  232. keboola_agent_cli/server/routers/schedules.py +54 -0
  233. keboola_agent_cli/server/routers/search.py +30 -0
  234. keboola_agent_cli/server/routers/semantic_layer.py +650 -0
  235. keboola_agent_cli/server/routers/sharing.py +86 -0
  236. keboola_agent_cli/server/routers/storage.py +574 -0
  237. keboola_agent_cli/server/routers/stream.py +100 -0
  238. keboola_agent_cli/server/routers/workspaces.py +302 -0
  239. keboola_agent_cli/server/run_broadcaster.py +329 -0
  240. keboola_agent_cli/server/sse.py +25 -0
  241. keboola_agent_cli/services/__init__.py +0 -0
  242. keboola_agent_cli/services/_encryption.py +217 -0
  243. keboola_agent_cli/services/_semantic_layer_cascade.py +147 -0
  244. keboola_agent_cli/services/_semantic_layer_crud.py +382 -0
  245. keboola_agent_cli/services/_semantic_layer_internals.py +1078 -0
  246. keboola_agent_cli/services/_semantic_layer_lookup.py +181 -0
  247. keboola_agent_cli/services/_semantic_layer_reference_data.py +217 -0
  248. keboola_agent_cli/services/_sync_bindings.py +456 -0
  249. keboola_agent_cli/services/_sync_branch.py +191 -0
  250. keboola_agent_cli/services/_sync_bulk.py +228 -0
  251. keboola_agent_cli/services/_sync_clone.py +163 -0
  252. keboola_agent_cli/services/_sync_models.py +97 -0
  253. keboola_agent_cli/services/_sync_push_ops.py +369 -0
  254. keboola_agent_cli/services/_sync_storage.py +376 -0
  255. keboola_agent_cli/services/_sync_writeback.py +167 -0
  256. keboola_agent_cli/services/agent_service.py +458 -0
  257. keboola_agent_cli/services/base.py +175 -0
  258. keboola_agent_cli/services/branch_service.py +588 -0
  259. keboola_agent_cli/services/component_service.py +694 -0
  260. keboola_agent_cli/services/config_service.py +2099 -0
  261. keboola_agent_cli/services/data_app_git_service.py +224 -0
  262. keboola_agent_cli/services/data_app_service.py +2082 -0
  263. keboola_agent_cli/services/deep_lineage_service.py +1322 -0
  264. keboola_agent_cli/services/dev_portal_service.py +345 -0
  265. keboola_agent_cli/services/doctor_service.py +445 -0
  266. keboola_agent_cli/services/encrypt_service.py +87 -0
  267. keboola_agent_cli/services/feature_service.py +268 -0
  268. keboola_agent_cli/services/flow_service.py +769 -0
  269. keboola_agent_cli/services/flow_validation.py +188 -0
  270. keboola_agent_cli/services/http_forwarder_service.py +236 -0
  271. keboola_agent_cli/services/job_idempotency_store.py +285 -0
  272. keboola_agent_cli/services/job_service.py +797 -0
  273. keboola_agent_cli/services/kai_service.py +367 -0
  274. keboola_agent_cli/services/lineage_service.py +274 -0
  275. keboola_agent_cli/services/mcp_service.py +1498 -0
  276. keboola_agent_cli/services/mcp_transport.py +259 -0
  277. keboola_agent_cli/services/member_service.py +593 -0
  278. keboola_agent_cli/services/org_service.py +619 -0
  279. keboola_agent_cli/services/project_service.py +947 -0
  280. keboola_agent_cli/services/repo_validate_service.py +767 -0
  281. keboola_agent_cli/services/schedule_service.py +731 -0
  282. keboola_agent_cli/services/search_service.py +331 -0
  283. keboola_agent_cli/services/semantic_layer_service.py +1497 -0
  284. keboola_agent_cli/services/sharing_service.py +307 -0
  285. keboola_agent_cli/services/storage_service.py +2524 -0
  286. keboola_agent_cli/services/stream_service.py +395 -0
  287. keboola_agent_cli/services/sync_service.py +2244 -0
  288. keboola_agent_cli/services/variables_service.py +447 -0
  289. keboola_agent_cli/services/version_service.py +1038 -0
  290. keboola_agent_cli/services/workspace_service.py +1103 -0
  291. keboola_agent_cli/stream_client.py +217 -0
  292. keboola_agent_cli/sync/__init__.py +1 -0
  293. keboola_agent_cli/sync/branch_mapping.py +174 -0
  294. keboola_agent_cli/sync/clone.py +211 -0
  295. keboola_agent_cli/sync/code_extraction.py +655 -0
  296. keboola_agent_cli/sync/config_format.py +290 -0
  297. keboola_agent_cli/sync/diff_engine.py +566 -0
  298. keboola_agent_cli/sync/git_utils.py +93 -0
  299. keboola_agent_cli/sync/manifest.py +162 -0
  300. keboola_agent_cli/sync/naming.py +90 -0
  301. keboola_agent_cli/sync/secrets.py +62 -0
  302. keboola_agent_cli/sync/sql_split.py +134 -0
  303. keboola_cli-0.63.4.dist-info/METADATA +308 -0
  304. keboola_cli-0.63.4.dist-info/RECORD +306 -0
  305. keboola_cli-0.63.4.dist-info/WHEEL +4 -0
  306. keboola_cli-0.63.4.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,3422 @@
1
+ """Keboola API client with retry, timeouts, and token masking.
2
+
3
+ This is the only module that communicates with the Keboola Storage API
4
+ and the Keboola Queue API. All HTTP details, endpoint URLs, and error
5
+ mapping are encapsulated here.
6
+
7
+ Inherits shared retry/error logic from BaseHttpClient.
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ import re
13
+ import time
14
+ from collections.abc import Iterator
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any
18
+ from urllib.parse import quote
19
+
20
+ import httpx
21
+
22
+ from .constants import (
23
+ DEFAULT_GROUPED_JOBS_LIMIT,
24
+ DEFAULT_JOB_LIMIT,
25
+ DEFAULT_JOBS_PER_CONFIG,
26
+ DEFAULT_POLL_STRATEGY,
27
+ DEFAULT_TIMEOUT,
28
+ EXPORT_JOB_MAX_WAIT,
29
+ FILE_DOWNLOAD_CHUNK_SIZE,
30
+ FILE_DOWNLOAD_TIMEOUT,
31
+ FILE_UPLOAD_TIMEOUT,
32
+ IMPORT_JOB_MAX_WAIT,
33
+ JOB_POLL_CURVE,
34
+ METADATA_NOT_FOUND,
35
+ OAUTH_HOST,
36
+ OAUTH_PATH,
37
+ QUERY_JOB_MAX_WAIT,
38
+ QUERY_JOB_POLL_INTERVAL,
39
+ QUERY_RESULTS_PAGE_SIZE,
40
+ STORAGE_JOB_MAX_WAIT,
41
+ STORAGE_JOB_POLL_INTERVAL,
42
+ VALID_POLL_STRATEGIES,
43
+ )
44
+ from .errors import ErrorCode, KeboolaApiError
45
+ from .http_base import BaseHttpClient
46
+ from .models import TokenVerifyResponse
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class InlineQueryResult:
53
+ """One statement's result fetched via the fast inline ``/results`` path."""
54
+
55
+ columns: list[dict[str, Any]] # [{"name", "type", "nullable"}]
56
+ rows: list[list[Any]] # row values, row-major; capped at the requested limit
57
+ total_rows: int | None # numberOfRows reported by the warehouse (full count)
58
+ truncated: bool # True when the warehouse has more rows than we fetched
59
+
60
+
61
+ def _collect_inline_results(
62
+ client: "KeboolaClient",
63
+ query_job_id: str,
64
+ statement_id: str,
65
+ limit: int,
66
+ ) -> InlineQueryResult:
67
+ """Page through ``GET .../results``, accumulating up to ``limit`` rows.
68
+
69
+ The endpoint enforces ``100 <= pageSize <= 100000``, so we always request a
70
+ fixed, valid ``QUERY_RESULTS_PAGE_SIZE`` page and cap the accumulated rows at
71
+ ``limit`` locally -- deriving ``pageSize`` from a small ``limit`` (e.g. 5)
72
+ would trip the API's minimum with a 400. A ``limit`` larger than one page is
73
+ satisfied by walking ``offset``; we stop once the limit is reached (marking
74
+ the result truncated) or when the warehouse runs out of rows.
75
+
76
+ Lives in the client layer (not a service) because it is pure Query Service
77
+ pagination over :meth:`KeboolaClient.get_query_results` -- no config, no
78
+ business logic -- so both ``WorkspaceService`` and the public library facade
79
+ (:mod:`keboola_agent_cli.lib`) can share it.
80
+ """
81
+ collected: list[list[Any]] = []
82
+ columns: list[dict[str, Any]] = []
83
+ total_rows: int | None = None
84
+ offset = 0
85
+ exhausted = False
86
+ while len(collected) < limit:
87
+ payload = client.get_query_results(
88
+ query_job_id, statement_id, offset=offset, page_size=QUERY_RESULTS_PAGE_SIZE
89
+ )
90
+ if not columns:
91
+ columns = payload.get("columns", []) or []
92
+ if total_rows is None:
93
+ total_rows = payload.get("numberOfRows")
94
+ page_rows = payload.get("data", []) or []
95
+ collected.extend(page_rows)
96
+ # Last page: the warehouse returned fewer rows than a full page.
97
+ if len(page_rows) < QUERY_RESULTS_PAGE_SIZE:
98
+ exhausted = True
99
+ break
100
+ offset += len(page_rows)
101
+ # Reached the reported total on a page boundary: stop without spending a
102
+ # round-trip on the empty next page (e.g. total == a multiple of the
103
+ # page size, limit larger than total).
104
+ if total_rows is not None and offset >= total_rows:
105
+ exhausted = True
106
+ break
107
+
108
+ rows = collected[:limit]
109
+ if total_rows is not None:
110
+ truncated = total_rows > len(rows)
111
+ else:
112
+ # The Query Service normally reports numberOfRows, but if it omits the
113
+ # count we fall back to *how* the loop ended: stopping at the limit cap
114
+ # without exhausting a full last page means there may be more rows. Bias
115
+ # toward over-warning when the true count is unknown.
116
+ truncated = not exhausted and len(collected) >= limit
117
+ return InlineQueryResult(
118
+ columns=columns,
119
+ rows=rows,
120
+ total_rows=total_rows,
121
+ truncated=truncated,
122
+ )
123
+
124
+
125
+ def _iter_poll_intervals(strategy: str) -> Iterator[float]:
126
+ """Yield sleep intervals (seconds) for Queue job polling.
127
+
128
+ Two strategies:
129
+
130
+ - ``"exponential"`` walks ``JOB_POLL_CURVE``: each (interval, count)
131
+ segment yields ``count`` copies of ``interval``; a segment with
132
+ ``count == 0`` keeps yielding ``interval`` forever (valid only on
133
+ the last segment).
134
+ - ``"fixed"`` yields ``STORAGE_JOB_POLL_INTERVAL`` forever (legacy
135
+ behavior preserved for opt-out via ``--poll-strategy fixed``).
136
+
137
+ The deadline check in ``wait_for_queue_job`` stops iteration.
138
+ """
139
+ if strategy == "fixed":
140
+ while True:
141
+ yield STORAGE_JOB_POLL_INTERVAL
142
+ for interval, count in JOB_POLL_CURVE:
143
+ if count <= 0:
144
+ while True:
145
+ yield interval
146
+ for _ in range(count):
147
+ yield interval
148
+
149
+
150
+ class KeboolaClient(BaseHttpClient):
151
+ """HTTP client for the Keboola Storage API and Queue API.
152
+
153
+ Provides methods to interact with Keboola endpoints with built-in
154
+ retry logic (exponential backoff for 429/5xx), timeouts, and
155
+ automatic token masking in error messages.
156
+
157
+ Inherits _do_request() and _raise_api_error() from BaseHttpClient.
158
+ """
159
+
160
+ def __init__(self, stack_url: str, token: str) -> None:
161
+ self._stack_url = stack_url.rstrip("/")
162
+ headers = {
163
+ "X-StorageApi-Token": token,
164
+ }
165
+ super().__init__(
166
+ base_url=self._stack_url,
167
+ token=token,
168
+ headers=headers,
169
+ timeout=DEFAULT_TIMEOUT,
170
+ )
171
+ self._queue_client: httpx.Client | None = None
172
+ self._query_client: httpx.Client | None = None
173
+ self._encrypt_client: httpx.Client | None = None
174
+ # Cache of project feature flags. Populated lazily on first
175
+ # has_feature() / get_project_features() call so we don't pay an
176
+ # extra verify_token round-trip on every kbagent invocation, and
177
+ # only when business logic actually needs to branch on a feature
178
+ # (e.g. legacy fake-branch storage detection).
179
+ self._features_cache: frozenset[str] | None = None
180
+
181
+ @property
182
+ def _queue_base_url(self) -> str:
183
+ return self._derive_service_url(self._stack_url, "queue")
184
+
185
+ @property
186
+ def _query_base_url(self) -> str:
187
+ return self._derive_service_url(self._stack_url, "query")
188
+
189
+ @property
190
+ def _encrypt_base_url(self) -> str:
191
+ return self._derive_service_url(self._stack_url, "encryption")
192
+
193
+ def close(self) -> None:
194
+ """Close the underlying HTTP clients."""
195
+ super().close()
196
+ if self._queue_client is not None:
197
+ self._queue_client.close()
198
+ if self._query_client is not None:
199
+ self._query_client.close()
200
+ if self._encrypt_client is not None:
201
+ self._encrypt_client.close()
202
+
203
+ def __enter__(self) -> "KeboolaClient":
204
+ return self
205
+
206
+ def __exit__(self, *args: Any) -> None:
207
+ self.close()
208
+
209
+ def _request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
210
+ """Execute a Storage API request with retry."""
211
+ return self._do_request(method, path, **kwargs)
212
+
213
+ def _get_or_create_sub_client(
214
+ self,
215
+ attr: str,
216
+ base_url: str,
217
+ headers: dict[str, str] | None = None,
218
+ ) -> httpx.Client:
219
+ """Return an existing sub-client or lazily create one.
220
+
221
+ Args:
222
+ attr: Instance attribute name (e.g. "_queue_client").
223
+ base_url: Base URL for the sub-client.
224
+ headers: Custom headers; defaults to the main client's headers.
225
+ """
226
+ client = getattr(self, attr)
227
+ if client is None:
228
+ client = httpx.Client(
229
+ base_url=base_url,
230
+ timeout=DEFAULT_TIMEOUT,
231
+ headers=self._client._headers.copy() if headers is None else headers,
232
+ )
233
+ setattr(self, attr, client)
234
+ return client
235
+
236
+ def _queue_request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
237
+ """Execute a Queue API request with retry."""
238
+ client = self._get_or_create_sub_client("_queue_client", self._queue_base_url)
239
+ return self._do_request(
240
+ method, path, client=client, base_url=self._queue_base_url, **kwargs
241
+ )
242
+
243
+ def _query_request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
244
+ """Execute a Query Service request with retry."""
245
+ client = self._get_or_create_sub_client("_query_client", self._query_base_url)
246
+ return self._do_request(
247
+ method, path, client=client, base_url=self._query_base_url, **kwargs
248
+ )
249
+
250
+ def _encrypt_request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
251
+ """Execute an Encryption API request with retry."""
252
+ client = self._get_or_create_sub_client(
253
+ "_encrypt_client", self._encrypt_base_url, headers={"Content-Type": "application/json"}
254
+ )
255
+ return self._do_request(
256
+ method, path, client=client, base_url=self._encrypt_base_url, **kwargs
257
+ )
258
+
259
+ def encrypt_values(
260
+ self,
261
+ project_id: int,
262
+ component_id: str,
263
+ data: dict[str, str],
264
+ ) -> dict[str, str]:
265
+ """Encrypt secret values via the Keboola Encryption API.
266
+
267
+ Sends a dict of {key: plaintext} and receives {key: encrypted}.
268
+ Keys must start with '#'. Encrypted values start with 'KBC::ProjectSecure::'.
269
+
270
+ Args:
271
+ project_id: Keboola project numeric ID.
272
+ component_id: Component identifier (e.g. 'keboola.ex-db-snowflake').
273
+ data: Dict of secret keys to encrypt (e.g. {'#password': 'my-secret'}).
274
+
275
+ Returns:
276
+ Dict of {key: encrypted_value}.
277
+ """
278
+ response = self._encrypt_request(
279
+ "POST",
280
+ "/encrypt",
281
+ params={"projectId": project_id, "componentId": component_id},
282
+ json=data,
283
+ )
284
+ return response.json()
285
+
286
+ def verify_token(self) -> TokenVerifyResponse:
287
+ """Verify the storage API token and retrieve project information.
288
+
289
+ Returns:
290
+ TokenVerifyResponse with project name, ID, and token description.
291
+
292
+ Raises:
293
+ KeboolaApiError: If token is invalid (401) or other API error.
294
+ """
295
+ response = self._request("GET", "/v2/storage/tokens/verify")
296
+ data = response.json()
297
+
298
+ owner = data.get("owner", {})
299
+ # /v2/storage/tokens/verify carries `organization` at the TOP level
300
+ # (NOT nested under `owner` like I'd previously assumed -- three
301
+ # rounds of broken backfill traced back to this mismatch). The
302
+ # payload is minimal -- only `{"id": "73"}` on the GCP us-east4
303
+ # stack -- so org name has to come from the Manage API path.
304
+ org = data.get("organization") or {}
305
+ org_id_raw = org.get("id")
306
+ # Storage API serializes org id as a string ("73"); normalise to int
307
+ # so callers and persisted ProjectConfig.org_id can keep its int
308
+ # type without each consumer doing the cast.
309
+ org_id: int | None
310
+ try:
311
+ org_id = int(org_id_raw) if org_id_raw is not None else None
312
+ except (TypeError, ValueError):
313
+ org_id = None
314
+ response = TokenVerifyResponse(
315
+ token_id=str(data.get("id", "")),
316
+ token_description=data.get("description", ""),
317
+ project_id=owner.get("id"),
318
+ project_name=owner.get("name", ""),
319
+ owner_name=owner.get("name", ""),
320
+ default_backend=owner.get("defaultBackend", "snowflake"),
321
+ features=owner.get("features", []),
322
+ org_id=org_id,
323
+ # Top-level `organization` block does NOT carry a name; that
324
+ # field is Manage-API-only. Leave None and let the UI show
325
+ # the id (e.g. "#73") as a fallback until `org setup` fills
326
+ # in the human-readable name.
327
+ org_name=None,
328
+ )
329
+ # Refresh the features cache on every successful verify so explicit
330
+ # callers stay consistent with the cached view used by has_feature().
331
+ self._features_cache = frozenset(response.features)
332
+ return response
333
+
334
+ def get_project_info(self) -> dict[str, Any]:
335
+ """Return full project/token info from /v2/storage/tokens/verify.
336
+
337
+ Unlike verify_token() which parses only a subset of fields into
338
+ TokenVerifyResponse, this method returns the complete raw API response
339
+ so callers can access all fields (features, limits, metrics, etc.).
340
+
341
+ Returns:
342
+ Full JSON response dict from /v2/storage/tokens/verify.
343
+
344
+ Raises:
345
+ KeboolaApiError: If token is invalid (401) or other API error.
346
+ """
347
+ response = self._request("GET", "/v2/storage/tokens/verify")
348
+ return response.json()
349
+
350
+ def create_short_lived_token(
351
+ self,
352
+ description: str,
353
+ component_access: list[str],
354
+ expires_in: int = 3600,
355
+ ) -> dict[str, Any]:
356
+ """Create a short-lived Storage API token restricted to a component.
357
+
358
+ POST /v2/storage/tokens
359
+
360
+ Args:
361
+ description: Human-readable token description.
362
+ component_access: List of component IDs this token may access.
363
+ expires_in: Token lifetime in seconds (default: 3600 = 1 hour).
364
+
365
+ Returns:
366
+ Token dict from the API, including the 'token' field.
367
+ """
368
+ response = self._request(
369
+ "POST",
370
+ "/v2/storage/tokens",
371
+ data={
372
+ "description": description,
373
+ "expiresIn": str(expires_in),
374
+ "componentAccess[]": component_access,
375
+ },
376
+ )
377
+ return response.json()
378
+
379
+ def global_search(
380
+ self,
381
+ query: str,
382
+ project_id: int,
383
+ types: list[str] | None = None,
384
+ branch_type: str = "production",
385
+ branch_id: int | None = None,
386
+ limit: int = 50,
387
+ offset: int = 0,
388
+ ) -> dict[str, Any]:
389
+ """Search for items by name across the project using the Storage API global-search endpoint.
390
+
391
+ Calls GET /v2/storage/global-search with the given query and optional type filters.
392
+ This performs textual (name-based) search only — it does not scan configuration bodies.
393
+ Results are scoped to the single project identified by ``project_id``.
394
+
395
+ Args:
396
+ query: Search string to match against item names.
397
+ project_id: Numeric Keboola project ID (required by the API).
398
+ types: Optional list of item types to filter results. Supported values:
399
+ ``bucket``, ``table``, ``flow``, ``transformation``, ``configuration``,
400
+ ``configuration-row``, ``workspace``, ``shared-code``.
401
+ If None or empty, all types are returned.
402
+ branch_type: ``"production"`` (default) or ``"development"``.
403
+ branch_id: Required when ``branch_type="development"``; ignored otherwise.
404
+ limit: Maximum number of results to return (default 50, max 100).
405
+ offset: Pagination offset (default 0).
406
+
407
+ Returns:
408
+ Raw API response dict with keys ``"all"`` (total count) and
409
+ ``"items"`` (list of matching item dicts).
410
+
411
+ Raises:
412
+ KeboolaApiError: On API errors (auth, network, rate limits).
413
+ """
414
+ params: dict[str, Any] = {
415
+ "query": query,
416
+ "projectIds[]": project_id,
417
+ "limit": limit,
418
+ "offset": offset,
419
+ }
420
+ if types:
421
+ params["types[]"] = types
422
+ if branch_type == "development" and branch_id is not None:
423
+ params["branchTypes[]"] = "development"
424
+ params["branchIds[]"] = branch_id
425
+ else:
426
+ params["branchTypes[]"] = "production"
427
+
428
+ response = self._request("GET", "/v2/storage/global-search", params=params)
429
+ return response.json()
430
+
431
+ def get_oauth_url(
432
+ self,
433
+ component_id: str,
434
+ config_id: str,
435
+ redirect_url: str | None = None,
436
+ ) -> str:
437
+ """Generate an OAuth authorization URL for a component configuration.
438
+
439
+ Creates a short-lived, component-scoped Storage API token and builds
440
+ the URL the user must open to grant OAuth access.
441
+
442
+ Args:
443
+ component_id: The component ID (e.g. 'keboola.ex-google-drive').
444
+ config_id: The configuration ID to authorize.
445
+ redirect_url: Optional URL the OAuth wizard returns to after the
446
+ flow completes (passed as the ``returnUrl`` query param).
447
+
448
+ Returns:
449
+ The full OAuth authorization URL as a string.
450
+ """
451
+ from urllib.parse import urlencode, urlunsplit
452
+
453
+ token_response = self.create_short_lived_token(
454
+ description=f"Short-lived token for OAuth URL - {component_id}/{config_id}",
455
+ component_access=[component_id],
456
+ expires_in=3600,
457
+ )
458
+ sapi_token = token_response["token"]
459
+
460
+ query: dict[str, str] = {"token": sapi_token, "sapiUrl": self._stack_url}
461
+ if redirect_url:
462
+ query["returnUrl"] = redirect_url
463
+ query_params = urlencode(query)
464
+ fragment = f"/{component_id}/{config_id}"
465
+
466
+ return urlunsplit(("https", OAUTH_HOST, OAUTH_PATH, query_params, fragment))
467
+
468
+ def get_project_features(self) -> frozenset[str]:
469
+ """Return the project's feature flags, fetching once per client lifetime.
470
+
471
+ Calls ``verify_token()`` lazily on first request and caches the result.
472
+ Subsequent calls do not trigger HTTP. The cache lives for the life of
473
+ the ``KeboolaClient`` instance, which is one CLI invocation -- short
474
+ enough that staleness across feature toggles is not a practical risk.
475
+ """
476
+ if self._features_cache is None:
477
+ self.verify_token()
478
+ # _features_cache is non-None here: verify_token() always sets it (or
479
+ # raises on auth/network failure, which propagates to the caller).
480
+ assert self._features_cache is not None
481
+ return self._features_cache
482
+
483
+ def has_feature(self, feature: str) -> bool:
484
+ """True if the project owner has ``feature`` enabled.
485
+
486
+ Convenience wrapper over ``get_project_features()`` for code paths
487
+ that branch on a single flag (e.g. ``"storage-branches"``).
488
+ """
489
+ return feature in self.get_project_features()
490
+
491
+ def list_components(
492
+ self,
493
+ component_type: str | None = None,
494
+ branch_id: int | None = None,
495
+ ) -> list[dict[str, Any]]:
496
+ """List components with their configurations.
497
+
498
+ Args:
499
+ component_type: Optional filter (extractor, writer, transformation, application).
500
+ branch_id: If set, list components from a specific dev branch.
501
+
502
+ Returns:
503
+ List of component dicts from the API.
504
+ """
505
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
506
+ params: dict[str, str] = {"include": "configuration"}
507
+ if component_type:
508
+ params["componentType"] = component_type
509
+
510
+ response = self._request("GET", f"{prefix}/components", params=params)
511
+ return response.json()
512
+
513
+ def list_components_with_configs(
514
+ self,
515
+ branch_id: int | None = None,
516
+ component_type: str | None = None,
517
+ include_state: bool = False,
518
+ ) -> list[dict[str, Any]]:
519
+ """List all components with full configuration bodies and rows.
520
+
521
+ Makes a single API call to fetch everything needed for sync pull and
522
+ for deep search (row-level configuration). Uses the
523
+ include=configuration,rows parameter to get full config bodies and
524
+ config rows in one request. When ``include_state`` is True, the
525
+ response also embeds each configuration's runtime ``state`` dict
526
+ (same data as ``get_config_state``) so bulk-state retrieval stays a
527
+ single request instead of N+1. Also used by the bulk-detail caller
528
+ in ``ConfigService`` when ``--with-state`` is set on
529
+ ``config detail`` without a specific ``--config-id``.
530
+
531
+ Args:
532
+ branch_id: If set, target a specific dev branch.
533
+ component_type: Optional filter (extractor, writer, transformation,
534
+ application). Passed to the API as ``componentType``.
535
+ include_state: When True, adds ``state`` to the ``include``
536
+ resource list so each returned configuration carries its
537
+ runtime state dict.
538
+
539
+ Returns:
540
+ List of component dicts, each containing a 'configurations' list
541
+ with full config bodies and nested 'rows'.
542
+ """
543
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
544
+ include_parts = ["configuration", "rows"]
545
+ if include_state:
546
+ include_parts.append("state")
547
+ params: dict[str, str] = {"include": ",".join(include_parts)}
548
+ if component_type:
549
+ params["componentType"] = component_type
550
+ resp = self._request(
551
+ "GET",
552
+ f"{prefix}/components",
553
+ params=params,
554
+ )
555
+ return resp.json()
556
+
557
+ def list_component_configs(
558
+ self,
559
+ component_id: str,
560
+ branch_id: int | None = None,
561
+ ) -> list[dict[str, Any]]:
562
+ """List all configurations for a specific component.
563
+
564
+ Args:
565
+ component_id: Component identifier (e.g. 'keboola.sandboxes').
566
+ branch_id: If set, target a specific dev branch.
567
+
568
+ Returns:
569
+ List of configuration dicts (id, name, description, etc.).
570
+ """
571
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
572
+ resp = self._request(
573
+ "GET",
574
+ f"{prefix}/components/{quote(component_id, safe='')}/configs",
575
+ )
576
+ return resp.json()
577
+
578
+ def list_config_rows(
579
+ self,
580
+ component_id: str,
581
+ config_id: str,
582
+ branch_id: int | None = None,
583
+ ) -> list[dict[str, Any]]:
584
+ """List all rows for a specific configuration.
585
+
586
+ Args:
587
+ component_id: Component identifier (e.g. 'keboola.ex-http').
588
+ config_id: Configuration ID.
589
+ branch_id: If set, target a specific dev branch.
590
+
591
+ Returns:
592
+ List of config row dicts.
593
+ """
594
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
595
+ resp = self._request(
596
+ "GET",
597
+ f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows",
598
+ )
599
+ return resp.json()
600
+
601
+ def get_config_row(
602
+ self,
603
+ component_id: str,
604
+ config_id: str,
605
+ row_id: str,
606
+ branch_id: int | None = None,
607
+ ) -> dict[str, Any]:
608
+ """Get a single configuration row by ID.
609
+
610
+ Args:
611
+ component_id: Component identifier.
612
+ config_id: Configuration ID.
613
+ row_id: Row ID.
614
+ branch_id: If set, target a specific dev branch.
615
+
616
+ Returns:
617
+ Row detail dict from the API.
618
+ """
619
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
620
+ resp = self._request(
621
+ "GET",
622
+ f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows/{quote(row_id)}",
623
+ )
624
+ return resp.json()
625
+
626
+ def get_config_detail(
627
+ self,
628
+ component_id: str,
629
+ config_id: str,
630
+ branch_id: int | None = None,
631
+ ) -> dict[str, Any]:
632
+ """Get detailed information about a specific configuration.
633
+
634
+ Args:
635
+ component_id: The component ID (e.g. keboola.ex-db-snowflake).
636
+ config_id: The configuration ID.
637
+ branch_id: If set, get detail from a specific dev branch.
638
+
639
+ Returns:
640
+ Configuration detail dict from the API.
641
+ """
642
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
643
+ safe_component_id = quote(component_id, safe="")
644
+ safe_config_id = quote(config_id, safe="")
645
+ response = self._request(
646
+ "GET",
647
+ f"{prefix}/components/{safe_component_id}/configs/{safe_config_id}",
648
+ )
649
+ return response.json()
650
+
651
+ def get_config_state(
652
+ self,
653
+ component_id: str,
654
+ config_id: str,
655
+ branch_id: int | None = None,
656
+ ) -> dict[str, Any]:
657
+ """Get the runtime state dict of a specific configuration.
658
+
659
+ Convenience wrapper over
660
+ ``get_config_detail(...).get("state", {})``: Storage API does not
661
+ expose a standalone ``GET .../state`` resource (production returns
662
+ 404, branch-scoped returns 501 Not Implemented), so the state is
663
+ only served inline as a field inside the configuration detail
664
+ response. This wrapper is retained for API discoverability, but
665
+ callers that already have a detail response should read ``state``
666
+ from it directly instead of issuing this second identical request
667
+ -- the service layer's single-mode ``--with-state`` does exactly
668
+ that (see ``ConfigService.get_config_detail``).
669
+
670
+ For bulk state retrieval across many configs, prefer the
671
+ ``include=state`` query param on
672
+ ``list_components_with_configs(include="configuration,rows,state")``
673
+ -- one request serves every config's state instead of N requests.
674
+
675
+ Args:
676
+ component_id: The component ID (e.g. keboola.ex-db-snowflake).
677
+ config_id: The configuration ID.
678
+ branch_id: If set, fetch state from a specific dev branch.
679
+
680
+ Returns:
681
+ The state dict (empty ``{}`` when the config has no saved state).
682
+ """
683
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
684
+ safe_component_id = quote(component_id, safe="")
685
+ safe_config_id = quote(config_id, safe="")
686
+ response = self._request(
687
+ "GET",
688
+ f"{prefix}/components/{safe_component_id}/configs/{safe_config_id}",
689
+ )
690
+ body = response.json()
691
+ state = body.get("state")
692
+ return state if isinstance(state, dict) else {}
693
+
694
+ def list_config_folder_metadata(self, branch_id: int) -> dict[str, str]:
695
+ """Fetch folder names for all configurations via metadata search.
696
+
697
+ Uses the search/component-configurations endpoint to find configs
698
+ with ``KBC.configuration.folderName`` metadata.
699
+
700
+ Note: This endpoint requires a branch ID (branch-only route).
701
+
702
+ Args:
703
+ branch_id: Branch ID (required — use default branch for production).
704
+
705
+ Returns:
706
+ Dict mapping ``"{component_id}/{config_id}"`` to folder name.
707
+ """
708
+ prefix = f"/v2/storage/branch/{branch_id}"
709
+ resp = self._request(
710
+ "GET",
711
+ f"{prefix}/search/component-configurations",
712
+ params={
713
+ "metadataKeys[]": "KBC.configuration.folderName",
714
+ "include": "filteredMetadata",
715
+ },
716
+ )
717
+ folder_map: dict[str, str] = {}
718
+ for item in resp.json():
719
+ comp_id = item.get("idComponent", "")
720
+ config_id = str(item.get("configurationId", ""))
721
+ meta = next(
722
+ (m for m in item.get("metadata", []) if m["key"] == "KBC.configuration.folderName"),
723
+ None,
724
+ )
725
+ if meta:
726
+ folder_map[f"{comp_id}/{config_id}"] = meta["value"]
727
+ return folder_map
728
+
729
+ def list_config_metadata(
730
+ self,
731
+ component_id: str,
732
+ config_id: str,
733
+ branch_id: int | None = None,
734
+ ) -> list[dict[str, Any]]:
735
+ """List metadata entries on a configuration.
736
+
737
+ GET /v2/storage/[branch/{b}/]components/{c}/configs/{id}/metadata
738
+ """
739
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
740
+ response = self._request(
741
+ "GET",
742
+ f"{prefix}/components/{quote(component_id, safe='')}/configs/{quote(config_id, safe='')}/metadata",
743
+ )
744
+ return response.json()
745
+
746
+ def set_config_metadata(
747
+ self,
748
+ component_id: str,
749
+ config_id: str,
750
+ entries: list[tuple[str, str]],
751
+ branch_id: int | None = None,
752
+ ) -> list[dict[str, Any]]:
753
+ """Bulk-set metadata key/value pairs on a configuration.
754
+
755
+ POST /v2/storage/[branch/{b}/]components/{c}/configs/{id}/metadata
756
+ Same PHP-style indexed form as set_branch_metadata.
757
+ """
758
+ form: dict[str, str] = {}
759
+ for i, (key, value) in enumerate(entries):
760
+ form[f"metadata[{i}][key]"] = key
761
+ form[f"metadata[{i}][value]"] = value
762
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
763
+ response = self._request(
764
+ "POST",
765
+ f"{prefix}/components/{quote(component_id, safe='')}/configs/{quote(config_id, safe='')}/metadata",
766
+ data=form,
767
+ )
768
+ return response.json()
769
+
770
+ def delete_config_metadata(
771
+ self,
772
+ component_id: str,
773
+ config_id: str,
774
+ metadata_id: int | str,
775
+ branch_id: int | None = None,
776
+ ) -> None:
777
+ """Delete a single metadata entry on a configuration by its numeric ID.
778
+
779
+ DELETE /v2/storage/[branch/{b}/]components/{c}/configs/{id}/metadata/{mid}
780
+ """
781
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
782
+ self._request(
783
+ "DELETE",
784
+ f"{prefix}/components/{quote(component_id, safe='')}/configs/{quote(config_id, safe='')}/metadata/{metadata_id}",
785
+ )
786
+
787
+ def create_config(
788
+ self,
789
+ component_id: str,
790
+ name: str,
791
+ configuration: dict[str, Any],
792
+ description: str = "",
793
+ branch_id: int | None = None,
794
+ ) -> dict[str, Any]:
795
+ """Create a new configuration for a component.
796
+
797
+ POST /v2/storage/[branch/{id}/]components/{comp_id}/configs
798
+
799
+ Args:
800
+ component_id: Component identifier.
801
+ name: Configuration name.
802
+ configuration: Configuration body (parameters, storage, etc.).
803
+ description: Optional description.
804
+ branch_id: If set, target a specific dev branch.
805
+
806
+ Returns:
807
+ Created configuration dict including the assigned 'id'.
808
+ """
809
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
810
+ resp = self._request(
811
+ "POST",
812
+ f"{prefix}/components/{quote(component_id)}/configs",
813
+ data={
814
+ "name": name,
815
+ "description": description,
816
+ "configuration": json.dumps(configuration),
817
+ },
818
+ )
819
+ return resp.json()
820
+
821
+ def update_config(
822
+ self,
823
+ component_id: str,
824
+ config_id: str,
825
+ name: str | None = None,
826
+ configuration: dict[str, Any] | None = None,
827
+ description: str | None = None,
828
+ change_description: str = "",
829
+ branch_id: int | None = None,
830
+ ) -> dict[str, Any]:
831
+ """Update an existing configuration.
832
+
833
+ PUT /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}
834
+
835
+ Only provided (non-None) fields are sent in the request.
836
+
837
+ Returns:
838
+ Updated configuration dict.
839
+ """
840
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
841
+ data: dict[str, Any] = {}
842
+ if name is not None:
843
+ data["name"] = name
844
+ if description is not None:
845
+ data["description"] = description
846
+ if configuration is not None:
847
+ data["configuration"] = json.dumps(configuration)
848
+ if change_description:
849
+ data["changeDescription"] = change_description
850
+ resp = self._request(
851
+ "PUT",
852
+ f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}",
853
+ data=data,
854
+ )
855
+ return resp.json()
856
+
857
+ def create_config_row(
858
+ self,
859
+ component_id: str,
860
+ config_id: str,
861
+ name: str,
862
+ configuration: dict[str, Any],
863
+ description: str = "",
864
+ is_disabled: bool = False,
865
+ branch_id: int | None = None,
866
+ ) -> dict[str, Any]:
867
+ """Create a new configuration row.
868
+
869
+ POST /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}/rows
870
+
871
+ Args:
872
+ component_id: The component ID.
873
+ config_id: The parent configuration ID.
874
+ name: Row name.
875
+ configuration: Row-level configuration dict.
876
+ description: Optional row description.
877
+ is_disabled: When True, the row is created in disabled state and
878
+ excluded from job runs until re-enabled.
879
+ branch_id: Optional dev branch ID.
880
+
881
+ Returns:
882
+ Created row dict including the assigned 'id'.
883
+ """
884
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
885
+ data: dict[str, Any] = {
886
+ "name": name,
887
+ "description": description,
888
+ "configuration": json.dumps(configuration),
889
+ }
890
+ if is_disabled:
891
+ data["isDisabled"] = "1"
892
+ resp = self._request(
893
+ "POST",
894
+ f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows",
895
+ data=data,
896
+ )
897
+ return resp.json()
898
+
899
+ def update_config_row(
900
+ self,
901
+ component_id: str,
902
+ config_id: str,
903
+ row_id: str,
904
+ name: str | None = None,
905
+ configuration: dict[str, Any] | None = None,
906
+ description: str | None = None,
907
+ is_disabled: bool | None = None,
908
+ change_description: str = "",
909
+ branch_id: int | None = None,
910
+ ) -> dict[str, Any]:
911
+ """Update an existing configuration row.
912
+
913
+ PUT /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}/rows/{row_id}
914
+
915
+ Args:
916
+ is_disabled: When True, disable the row; when False, enable it;
917
+ when None, leave the current state unchanged.
918
+ """
919
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
920
+ data: dict[str, Any] = {}
921
+ if name is not None:
922
+ data["name"] = name
923
+ if description is not None:
924
+ data["description"] = description
925
+ if configuration is not None:
926
+ data["configuration"] = json.dumps(configuration)
927
+ if is_disabled is not None:
928
+ data["isDisabled"] = "1" if is_disabled else "0"
929
+ if change_description:
930
+ data["changeDescription"] = change_description
931
+ resp = self._request(
932
+ "PUT",
933
+ f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows/{quote(row_id)}",
934
+ data=data,
935
+ )
936
+ return resp.json()
937
+
938
+ def delete_config_row(
939
+ self,
940
+ component_id: str,
941
+ config_id: str,
942
+ row_id: str,
943
+ branch_id: int | None = None,
944
+ ) -> None:
945
+ """Delete a configuration row.
946
+
947
+ DELETE /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}/rows/{row_id}
948
+ """
949
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
950
+ self._request(
951
+ "DELETE",
952
+ f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows/{quote(row_id)}",
953
+ )
954
+
955
+ def _wait_for_storage_job(
956
+ self,
957
+ job: dict[str, Any],
958
+ max_wait: float = STORAGE_JOB_MAX_WAIT,
959
+ ) -> dict[str, Any]:
960
+ """Poll a Storage API job until it reaches a terminal state.
961
+
962
+ Args:
963
+ job: Initial job response from POST/DELETE.
964
+ max_wait: Maximum seconds to wait (default: STORAGE_JOB_MAX_WAIT).
965
+
966
+ Returns:
967
+ Completed job dict (with results on success).
968
+
969
+ Raises:
970
+ KeboolaApiError: If the job fails or times out.
971
+ """
972
+ job_id = job.get("id")
973
+ if job.get("status") in ("success", "error"):
974
+ return job
975
+
976
+ deadline = time.monotonic() + max_wait
977
+ while time.monotonic() < deadline:
978
+ time.sleep(STORAGE_JOB_POLL_INTERVAL)
979
+ response = self._request("GET", f"/v2/storage/jobs/{job_id}")
980
+ job = response.json()
981
+ status = job.get("status")
982
+ if status == "success":
983
+ return job
984
+ if status == "error":
985
+ error_msg = job.get("error", {}).get("message", "Storage job failed")
986
+ raise KeboolaApiError(
987
+ message=error_msg,
988
+ status_code=500,
989
+ error_code=ErrorCode.STORAGE_JOB_FAILED,
990
+ retryable=False,
991
+ )
992
+ raise KeboolaApiError(
993
+ message=f"Storage job {job_id} did not complete within {max_wait}s",
994
+ status_code=504,
995
+ error_code=ErrorCode.STORAGE_JOB_TIMEOUT,
996
+ retryable=True,
997
+ )
998
+
999
+ def create_dev_branch(self, name: str, description: str = "") -> dict[str, Any]:
1000
+ """Create a new development branch (waits for async job to complete).
1001
+
1002
+ The Storage API returns an async job. This method polls until the job
1003
+ completes and returns the branch data from the job results.
1004
+
1005
+ Args:
1006
+ name: Branch name.
1007
+ description: Optional branch description.
1008
+
1009
+ Returns:
1010
+ Branch dict with id, name, description, created, etc.
1011
+
1012
+ Raises:
1013
+ KeboolaApiError: If the API call or job fails.
1014
+ """
1015
+ body: dict[str, str] = {"name": name}
1016
+ if description:
1017
+ body["description"] = description
1018
+ response = self._request("POST", "/v2/storage/dev-branches", json=body)
1019
+ job = self._wait_for_storage_job(response.json())
1020
+ return job.get("results", {})
1021
+
1022
+ def delete_dev_branch(self, branch_id: int) -> None:
1023
+ """Delete a development branch (waits for async job to complete).
1024
+
1025
+ Args:
1026
+ branch_id: The branch ID to delete.
1027
+
1028
+ Raises:
1029
+ KeboolaApiError: If the API call or job fails.
1030
+ """
1031
+ response = self._request("DELETE", f"/v2/storage/dev-branches/{branch_id}")
1032
+ self._wait_for_storage_job(response.json())
1033
+
1034
+ def list_dev_branches(self) -> list[dict[str, Any]]:
1035
+ """List development branches for the project.
1036
+
1037
+ Returns:
1038
+ List of branch dicts from the API.
1039
+ """
1040
+ response = self._request("GET", "/v2/storage/dev-branches")
1041
+ return response.json()
1042
+
1043
+ def list_branch_metadata(self, branch_id: int | str = "default") -> list[dict[str, Any]]:
1044
+ """List metadata entries on a branch.
1045
+
1046
+ GET /v2/storage/branch/{id}/metadata
1047
+
1048
+ Args:
1049
+ branch_id: Branch ID or the literal "default" for the main branch.
1050
+
1051
+ Returns:
1052
+ List of metadata dicts with keys: id, key, value, provider, timestamp.
1053
+ """
1054
+ response = self._request("GET", f"/v2/storage/branch/{branch_id}/metadata")
1055
+ return response.json()
1056
+
1057
+ def set_branch_metadata(
1058
+ self,
1059
+ entries: list[tuple[str, str]],
1060
+ branch_id: int | str = "default",
1061
+ ) -> list[dict[str, Any]]:
1062
+ """Bulk-set metadata key/value pairs on a branch.
1063
+
1064
+ POST /v2/storage/branch/{id}/metadata
1065
+
1066
+ Keboola's endpoint expects PHP-style array indices in the
1067
+ form-urlencoded body, e.g.::
1068
+
1069
+ metadata[0][key]=KBC.projectDescription
1070
+ metadata[0][value]=My project
1071
+
1072
+ httpx's ``data=`` accepts a mapping of str -> str and URL-encodes it.
1073
+ Since each ``metadata[i][...]`` key is unique per index, a plain dict
1074
+ preserves both ordering (Python 3.7+) and Keboola's expected shape.
1075
+
1076
+ Args:
1077
+ entries: Ordered list of ``(key, value)`` metadata tuples.
1078
+ branch_id: Branch ID or the literal "default" for the main branch.
1079
+
1080
+ Returns:
1081
+ List of metadata dicts created/updated by the API.
1082
+ """
1083
+ form: dict[str, str] = {}
1084
+ for i, (key, value) in enumerate(entries):
1085
+ form[f"metadata[{i}][key]"] = key
1086
+ form[f"metadata[{i}][value]"] = value
1087
+ response = self._request(
1088
+ "POST",
1089
+ f"/v2/storage/branch/{branch_id}/metadata",
1090
+ data=form,
1091
+ )
1092
+ return response.json()
1093
+
1094
+ def delete_branch_metadata(
1095
+ self,
1096
+ metadata_id: int | str,
1097
+ branch_id: int | str = "default",
1098
+ ) -> None:
1099
+ """Delete a single metadata entry on a branch by its numeric ID.
1100
+
1101
+ DELETE /v2/storage/branch/{id}/metadata/{metadataId}
1102
+
1103
+ Args:
1104
+ metadata_id: ID of the metadata entry (from ``list_branch_metadata``).
1105
+ branch_id: Branch ID or the literal "default" for the main branch.
1106
+ """
1107
+ self._request(
1108
+ "DELETE",
1109
+ f"/v2/storage/branch/{branch_id}/metadata/{metadata_id}",
1110
+ )
1111
+
1112
+ def get_branch_metadata_value(
1113
+ self,
1114
+ key: str,
1115
+ branch_id: int | str = "default",
1116
+ ) -> str | None | object:
1117
+ """Return the value for a single metadata key on a branch, or None if absent.
1118
+
1119
+ Convenience wrapper around ``list_branch_metadata`` that filters by key.
1120
+
1121
+ Args:
1122
+ key: Metadata key to look up (e.g. "KBC.projectDescription").
1123
+ branch_id: Branch ID or the literal "default" for the main branch.
1124
+
1125
+ Returns:
1126
+ The string value if the key exists (may be None if the API stored null),
1127
+ or ``METADATA_NOT_FOUND`` sentinel if the key is not present.
1128
+ """
1129
+ for entry in self.list_branch_metadata(branch_id=branch_id):
1130
+ if entry.get("key") == key:
1131
+ return entry.get("value")
1132
+ return METADATA_NOT_FOUND
1133
+
1134
+ def list_buckets(
1135
+ self, include: str | None = None, branch_id: int | None = None
1136
+ ) -> list[dict[str, Any]]:
1137
+ """List storage buckets with optional extended information.
1138
+
1139
+ Args:
1140
+ include: Optional include parameter (e.g. "linkedBuckets" for sharing info).
1141
+ branch_id: If set, list buckets from a specific dev branch.
1142
+
1143
+ Returns:
1144
+ List of bucket dicts from the API.
1145
+ """
1146
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1147
+ params: dict[str, str] = {}
1148
+ if include:
1149
+ params["include"] = include
1150
+ response = self._request("GET", f"{prefix}/buckets", params=params)
1151
+ return response.json()
1152
+
1153
+ def list_buckets_with_metadata(self) -> list[dict[str, Any]]:
1154
+ """List storage buckets with metadata included.
1155
+
1156
+ Returns:
1157
+ List of bucket dicts with metadata fields.
1158
+ """
1159
+ return self.list_buckets(include="metadata")
1160
+
1161
+ def list_bucket_metadata(
1162
+ self,
1163
+ bucket_id: str,
1164
+ branch_id: int | None = None,
1165
+ ) -> list[dict[str, Any]]:
1166
+ """List metadata entries on a single storage bucket.
1167
+
1168
+ GET /v2/storage/[branch/{b}/]buckets/{id}/metadata
1169
+
1170
+ Args:
1171
+ bucket_id: Bucket ID (e.g. 'in.c-db').
1172
+ branch_id: If set, target a specific dev branch.
1173
+
1174
+ Returns:
1175
+ List of metadata dicts (id/key/value/provider/timestamp).
1176
+ """
1177
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1178
+ safe_id = quote(bucket_id, safe="")
1179
+ response = self._request("GET", f"{prefix}/buckets/{safe_id}/metadata")
1180
+ return response.json()
1181
+
1182
+ def set_bucket_metadata(
1183
+ self,
1184
+ bucket_id: str,
1185
+ entries: list[tuple[str, str]],
1186
+ branch_id: int | None = None,
1187
+ provider: str = "user",
1188
+ ) -> list[dict[str, Any]]:
1189
+ """Upsert metadata key/value pairs on a storage bucket.
1190
+
1191
+ POST /v2/storage/buckets/{id}/metadata
1192
+
1193
+ Uses the same PHP-style array form encoding as ``set_branch_metadata``.
1194
+
1195
+ Args:
1196
+ bucket_id: Bucket ID (e.g. 'in.c-db').
1197
+ entries: Ordered list of ``(key, value)`` metadata tuples.
1198
+ branch_id: If set, target a specific dev branch.
1199
+ provider: Metadata provider. Defaults to ``"user"`` for
1200
+ CLI-originated descriptions; pass ``"system"`` for reserved
1201
+ ``KBC.*`` keys (e.g. ``KBC.createdBy.branch.id``) -- the API
1202
+ rejects user-provider writes on that namespace.
1203
+
1204
+ Returns:
1205
+ Full metadata list for the bucket after the upsert.
1206
+ """
1207
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1208
+ safe_id = quote(bucket_id, safe="")
1209
+ form: dict[str, str] = {"provider": provider}
1210
+ for i, (key, value) in enumerate(entries):
1211
+ form[f"metadata[{i}][key]"] = key
1212
+ form[f"metadata[{i}][value]"] = value
1213
+ response = self._request("POST", f"{prefix}/buckets/{safe_id}/metadata", data=form)
1214
+ return response.json()
1215
+
1216
+ def set_table_metadata(
1217
+ self,
1218
+ table_id: str,
1219
+ entries: list[tuple[str, str]],
1220
+ branch_id: int | None = None,
1221
+ ) -> list[dict[str, Any]]:
1222
+ """Upsert metadata key/value pairs on a storage table.
1223
+
1224
+ POST /v2/storage/tables/{id}/metadata
1225
+
1226
+ Provider is always ``"user"`` for CLI-originated descriptions.
1227
+ Column-level descriptions use the namespaced key convention
1228
+ ``KBC.column.{colname}.description`` stored at table-metadata level
1229
+ (Keboola Storage API does not expose a user-writable column-metadata
1230
+ endpoint; ``columnMetadata`` is populated exclusively by components).
1231
+
1232
+ Args:
1233
+ table_id: Full table ID (e.g. "in.c-bucket.table").
1234
+ entries: Ordered list of ``(key, value)`` metadata tuples.
1235
+ branch_id: If set, target a specific dev branch.
1236
+
1237
+ Returns:
1238
+ Full metadata list for the table after the upsert.
1239
+ """
1240
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1241
+ safe_id = quote(table_id, safe="")
1242
+ form: dict[str, str] = {"provider": "user"}
1243
+ for i, (key, value) in enumerate(entries):
1244
+ form[f"metadata[{i}][key]"] = key
1245
+ form[f"metadata[{i}][value]"] = value
1246
+ response = self._request("POST", f"{prefix}/tables/{safe_id}/metadata", data=form)
1247
+ return response.json()
1248
+
1249
+ def get_bucket_detail(
1250
+ self,
1251
+ bucket_id: str,
1252
+ branch_id: int | None = None,
1253
+ ) -> dict[str, Any]:
1254
+ """Get detailed information about a storage bucket.
1255
+
1256
+ Returns full bucket metadata including sharing/linked info
1257
+ (sourceBucket, sourceTable with project references).
1258
+
1259
+ Args:
1260
+ bucket_id: Bucket ID (e.g. 'in.c-db').
1261
+ branch_id: If set, target a specific dev branch.
1262
+
1263
+ Returns:
1264
+ Bucket detail dict from the API.
1265
+ """
1266
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1267
+ safe_id = quote(bucket_id, safe="")
1268
+ response = self._request("GET", f"{prefix}/buckets/{safe_id}")
1269
+ return response.json()
1270
+
1271
+ def get_table_detail(
1272
+ self,
1273
+ table_id: str,
1274
+ branch_id: int | None = None,
1275
+ ) -> dict[str, Any]:
1276
+ """Get detailed information about a storage table.
1277
+
1278
+ Args:
1279
+ table_id: Full table ID (e.g. "in.c-bucket.table").
1280
+ branch_id: If set, target a specific dev branch.
1281
+
1282
+ Returns:
1283
+ Table detail dict including columns, metadata, bucket info.
1284
+ """
1285
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1286
+ safe_id = quote(table_id, safe="")
1287
+ response = self._request("GET", f"{prefix}/tables/{safe_id}")
1288
+ return response.json()
1289
+
1290
+ def list_tables(
1291
+ self,
1292
+ bucket_id: str | None = None,
1293
+ branch_id: int | None = None,
1294
+ include: str | None = None,
1295
+ ) -> list[dict[str, Any]]:
1296
+ """List storage tables, optionally filtered by bucket.
1297
+
1298
+ Args:
1299
+ bucket_id: If set, list tables only from this bucket.
1300
+ branch_id: If set, target a specific dev branch.
1301
+ include: Optional include parameter (e.g. 'columns').
1302
+
1303
+ Returns:
1304
+ List of table dicts from the API.
1305
+ """
1306
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1307
+ params: dict[str, str] = {}
1308
+ if include:
1309
+ params["include"] = include
1310
+ if bucket_id:
1311
+ safe_id = quote(bucket_id, safe="")
1312
+ response = self._request("GET", f"{prefix}/buckets/{safe_id}/tables", params=params)
1313
+ else:
1314
+ response = self._request("GET", f"{prefix}/tables", params=params)
1315
+ return response.json()
1316
+
1317
+ # ------------------------------------------------------------------
1318
+ # Bucket sharing & linking
1319
+ # ------------------------------------------------------------------
1320
+
1321
+ def list_shared_buckets(self, include: str | None = None) -> list[dict[str, Any]]:
1322
+ """List buckets shared into the current project's organization.
1323
+
1324
+ GET /v2/storage/shared-buckets
1325
+
1326
+ Args:
1327
+ include: Optional include parameter (e.g. "metadata").
1328
+
1329
+ Returns:
1330
+ List of shared bucket dicts.
1331
+ """
1332
+ params: dict[str, str] = {}
1333
+ if include:
1334
+ params["include"] = include
1335
+ response = self._request("GET", "/v2/storage/shared-buckets", params=params)
1336
+ return response.json()
1337
+
1338
+ def share_bucket(
1339
+ self,
1340
+ bucket_id: str,
1341
+ sharing_type: str,
1342
+ target_project_ids: list[int] | None = None,
1343
+ target_users: list[str] | None = None,
1344
+ ) -> dict[str, Any]:
1345
+ """Enable sharing on a bucket (async, waits for completion).
1346
+
1347
+ Args:
1348
+ bucket_id: Bucket ID to share (e.g. "out.c-data").
1349
+ sharing_type: One of "organization", "organization-project",
1350
+ "selected-projects", "selected-users".
1351
+ target_project_ids: Required for "selected-projects" type.
1352
+ target_users: Required for "selected-users" type (email addresses).
1353
+
1354
+ Returns:
1355
+ Completed storage job dict.
1356
+
1357
+ Raises:
1358
+ KeboolaApiError: If the share operation fails (e.g. 403 for non-master token).
1359
+ """
1360
+ safe_id = quote(bucket_id, safe="")
1361
+
1362
+ endpoint_map = {
1363
+ "organization": f"/v2/storage/buckets/{safe_id}/share-organization",
1364
+ "organization-project": f"/v2/storage/buckets/{safe_id}/share-organization-project",
1365
+ "selected-projects": f"/v2/storage/buckets/{safe_id}/share-to-projects",
1366
+ "selected-users": f"/v2/storage/buckets/{safe_id}/share-to-users",
1367
+ }
1368
+
1369
+ endpoint = endpoint_map.get(sharing_type)
1370
+ if not endpoint:
1371
+ raise KeboolaApiError(
1372
+ message=f"Invalid sharing type: '{sharing_type}'. "
1373
+ f"Valid types: {', '.join(endpoint_map.keys())}",
1374
+ status_code=400,
1375
+ error_code=ErrorCode.INVALID_SHARING_TYPE,
1376
+ retryable=False,
1377
+ )
1378
+
1379
+ data: dict[str, Any] = {}
1380
+ if sharing_type == "selected-projects" and target_project_ids:
1381
+ data["targetProjectIds"] = [str(pid) for pid in target_project_ids]
1382
+ elif sharing_type == "selected-users" and target_users:
1383
+ data["targetUsers"] = target_users
1384
+
1385
+ response = self._request("POST", endpoint, params={"async": "true"}, data=data)
1386
+ return self._wait_for_storage_job(response.json())
1387
+
1388
+ def change_sharing_type(
1389
+ self,
1390
+ bucket_id: str,
1391
+ sharing_type: str,
1392
+ ) -> dict[str, Any]:
1393
+ """Change the sharing type of an already-shared bucket (async).
1394
+
1395
+ PUT /v2/storage/buckets/{bucket_id}/share
1396
+
1397
+ Args:
1398
+ bucket_id: Bucket ID.
1399
+ sharing_type: "organization" or "organization-project".
1400
+
1401
+ Returns:
1402
+ Completed storage job dict.
1403
+ """
1404
+ safe_id = quote(bucket_id, safe="")
1405
+ response = self._request(
1406
+ "PUT",
1407
+ f"/v2/storage/buckets/{safe_id}/share",
1408
+ json={"sharing": sharing_type},
1409
+ params={"async": "true"},
1410
+ )
1411
+ return self._wait_for_storage_job(response.json())
1412
+
1413
+ def unshare_bucket(self, bucket_id: str) -> dict[str, Any]:
1414
+ """Disable sharing on a bucket (async, waits for completion).
1415
+
1416
+ DELETE /v2/storage/buckets/{bucket_id}/share
1417
+
1418
+ Prerequisite: no linked buckets exist in other projects.
1419
+
1420
+ Returns:
1421
+ Completed storage job dict.
1422
+ """
1423
+ safe_id = quote(bucket_id, safe="")
1424
+ response = self._request(
1425
+ "DELETE",
1426
+ f"/v2/storage/buckets/{safe_id}/share",
1427
+ params={"async": "true"},
1428
+ )
1429
+ return self._wait_for_storage_job(response.json())
1430
+
1431
+ def link_bucket(
1432
+ self,
1433
+ source_project_id: int,
1434
+ source_bucket_id: str,
1435
+ name: str,
1436
+ stage: str = "in",
1437
+ ) -> dict[str, Any]:
1438
+ """Link a shared bucket from another project (async, waits for completion).
1439
+
1440
+ POST /v2/storage/buckets (with sourceProjectId + sourceBucketId)
1441
+
1442
+ Args:
1443
+ source_project_id: Project ID that owns the shared bucket.
1444
+ source_bucket_id: Bucket ID in the source project.
1445
+ name: Display name for the linked bucket in this project.
1446
+ stage: Bucket stage ("in" or "out"). Defaults to "in".
1447
+
1448
+ Returns:
1449
+ Completed storage job dict with linked bucket info in results.
1450
+ """
1451
+ response = self._request(
1452
+ "POST",
1453
+ "/v2/storage/buckets",
1454
+ params={"async": "true"},
1455
+ data={
1456
+ "stage": stage,
1457
+ "name": name,
1458
+ "displayName": name,
1459
+ "sourceProjectId": source_project_id,
1460
+ "sourceBucketId": source_bucket_id,
1461
+ },
1462
+ )
1463
+ return self._wait_for_storage_job(response.json())
1464
+
1465
+ def delete_bucket(
1466
+ self, bucket_id: str, force: bool = False, branch_id: int | None = None
1467
+ ) -> dict[str, Any]:
1468
+ """Delete a bucket (async, waits for completion).
1469
+
1470
+ Used for unlinking shared buckets or deleting regular buckets.
1471
+
1472
+ Args:
1473
+ bucket_id: Bucket ID to delete.
1474
+ force: If True, delete even if bucket contains tables.
1475
+ branch_id: If set, target a specific dev branch.
1476
+
1477
+ Returns:
1478
+ Completed storage job dict.
1479
+ """
1480
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1481
+ safe_id = quote(bucket_id, safe="")
1482
+ params: dict[str, str] = {"async": "true"}
1483
+ if force:
1484
+ params["force"] = "true"
1485
+ response = self._request("DELETE", f"{prefix}/buckets/{safe_id}", params=params)
1486
+ return self._wait_for_storage_job(response.json())
1487
+
1488
+ def create_bucket(
1489
+ self,
1490
+ stage: str,
1491
+ name: str,
1492
+ description: str | None = None,
1493
+ backend: str | None = None,
1494
+ branch_id: int | None = None,
1495
+ ) -> dict[str, Any]:
1496
+ """Create a new storage bucket (sync).
1497
+
1498
+ Args:
1499
+ stage: Bucket stage — "in" or "out".
1500
+ name: Bucket name slug (e.g. "my-bucket").
1501
+ description: Optional description.
1502
+ backend: Optional backend type (e.g. "snowflake", "bigquery").
1503
+ branch_id: If set, create bucket in a specific dev branch.
1504
+
1505
+ Returns:
1506
+ New bucket dict from the API.
1507
+ """
1508
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1509
+ body: dict[str, str] = {"stage": stage, "name": name}
1510
+ if description is not None:
1511
+ body["description"] = description
1512
+ if backend is not None:
1513
+ body["backend"] = backend
1514
+ response = self._request("POST", f"{prefix}/buckets", json=body)
1515
+ return response.json()
1516
+
1517
+ def create_table(
1518
+ self,
1519
+ bucket_id: str,
1520
+ name: str,
1521
+ columns: list[dict[str, Any]],
1522
+ primary_key: list[str] | None = None,
1523
+ branch_id: int | None = None,
1524
+ ) -> dict[str, Any]:
1525
+ """Create a new table with typed columns (async, waits for completion).
1526
+
1527
+ Args:
1528
+ bucket_id: Target bucket ID (e.g. "in.c-my-bucket").
1529
+ name: Table name.
1530
+ columns: List of column dicts with "name" and "definition.type" keys,
1531
+ e.g. [{"name": "id", "definition": {"type": "INTEGER"}}].
1532
+ primary_key: Optional list of column names for the primary key.
1533
+ branch_id: If set, create table in a specific dev branch.
1534
+
1535
+ Returns:
1536
+ Completed storage job results dict.
1537
+ """
1538
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1539
+ safe_id = quote(bucket_id, safe="")
1540
+ body: dict[str, Any] = {
1541
+ "name": name,
1542
+ "primaryKeysNames": primary_key or [],
1543
+ "columns": columns,
1544
+ }
1545
+ response = self._request("POST", f"{prefix}/buckets/{safe_id}/tables-definition", json=body)
1546
+ job = self._wait_for_storage_job(response.json())
1547
+ return job.get("results", {})
1548
+
1549
+ def prepare_file_upload(
1550
+ self,
1551
+ name: str,
1552
+ size_bytes: int,
1553
+ tags: list[str] | None = None,
1554
+ is_permanent: bool = False,
1555
+ notify: bool = False,
1556
+ ) -> dict[str, Any]:
1557
+ """Register a file with the Storage API and get a presigned upload URL.
1558
+
1559
+ Step 1 of the async table upload flow.
1560
+
1561
+ Args:
1562
+ name: Filename (e.g. "data.csv").
1563
+ size_bytes: File size in bytes.
1564
+ tags: Optional list of tags to assign to the file.
1565
+ is_permanent: If True, file is not auto-deleted after 15 days.
1566
+ notify: If True, send notification on upload completion.
1567
+
1568
+ Returns:
1569
+ File resource dict including 'id' (fileId), 'url', 'uploadParams',
1570
+ and 'gcsUploadParams' (present on GCP stacks; contains bearer token
1571
+ and GCS bucket/key for direct PUT upload).
1572
+ """
1573
+ # federationToken=1 is required on newer stacks (AWS, Azure) to get
1574
+ # cloud-native credentials instead of deprecated presigned POST fields.
1575
+ body: dict[str, Any] = {"name": name, "sizeBytes": size_bytes, "federationToken": "1"}
1576
+ if is_permanent:
1577
+ body["isPermanent"] = "1"
1578
+ if notify:
1579
+ body["notify"] = "1"
1580
+ if tags:
1581
+ for i, tag in enumerate(tags):
1582
+ body[f"tags[{i}]"] = tag
1583
+ response = self._request("POST", "/v2/storage/files/prepare", data=body)
1584
+ return response.json()
1585
+
1586
+ def _upload_to_cloud(
1587
+ self,
1588
+ upload_info: dict[str, Any],
1589
+ file_path: str,
1590
+ ) -> None:
1591
+ """Upload a file to cloud storage using credentials from files/prepare.
1592
+
1593
+ Four upload paths based on what the API returns:
1594
+
1595
+ GCP stack (``gcsUploadParams`` present):
1596
+ PUT to ``https://storage.googleapis.com/{bucket}/{key}`` with an
1597
+ OAuth2 ``Authorization: Bearer`` header.
1598
+
1599
+ Azure stack (``absUploadParams`` present):
1600
+ PUT to ABS container URL constructed from SASConnectionString
1601
+ with ``x-ms-blob-type: BlockBlob`` header.
1602
+
1603
+ AWS stack with federation (``uploadParams.credentials`` present):
1604
+ PUT to ``https://{bucket}.s3.{region}.amazonaws.com/{key}``
1605
+ with AWS SigV4 signed headers.
1606
+
1607
+ Legacy S3 presigned POST (``uploadParams`` without credentials):
1608
+ Multipart form POST — deprecated on newer stacks.
1609
+
1610
+ Args:
1611
+ upload_info: Full response dict from prepare_file_upload().
1612
+ file_path: Local path to the file.
1613
+ """
1614
+ p = Path(file_path)
1615
+
1616
+ gcs_params = upload_info.get("gcsUploadParams")
1617
+ abs_params = upload_info.get("absUploadParams")
1618
+ upload_params = upload_info.get("uploadParams") or {}
1619
+
1620
+ if gcs_params:
1621
+ # GCP: PUT via GCS JSON API with short-lived OAuth2 bearer token
1622
+ bucket = gcs_params["bucket"]
1623
+ key = gcs_params["key"]
1624
+ access_token = gcs_params["access_token"]
1625
+ upload_url = f"https://storage.googleapis.com/{bucket}/{key}"
1626
+ with p.open("rb") as fh, httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
1627
+ response = http.put(
1628
+ upload_url,
1629
+ content=fh,
1630
+ headers={"Authorization": f"Bearer {access_token}"},
1631
+ )
1632
+ success_codes = (200,)
1633
+ elif abs_params:
1634
+ # Azure Blob Storage: PUT with write-capable SAS from absUploadParams
1635
+ upload_url = _build_abs_upload_url(abs_params)
1636
+ with p.open("rb") as fh, httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
1637
+ response = http.put(
1638
+ upload_url,
1639
+ content=fh,
1640
+ headers={"x-ms-blob-type": "BlockBlob"},
1641
+ )
1642
+ success_codes = (200, 201)
1643
+ elif upload_params.get("credentials"):
1644
+ # AWS S3 with federation token: PUT with SigV4 signed headers
1645
+ creds = upload_params["credentials"]
1646
+ bucket = upload_params["bucket"]
1647
+ key = upload_params["key"]
1648
+ region = upload_info.get("region", "us-east-1")
1649
+ upload_url = f"https://{bucket}.s3.{region}.amazonaws.com/{key}"
1650
+ with p.open("rb") as fh:
1651
+ file_bytes = fh.read()
1652
+ headers = _s3_signed_headers(
1653
+ upload_url, creds, region, method="PUT", payload=file_bytes
1654
+ )
1655
+ with httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
1656
+ response = http.put(upload_url, content=file_bytes, headers=headers)
1657
+ success_codes = (200,)
1658
+ elif upload_params:
1659
+ # Legacy S3 presigned POST: multipart form — uploadParams first, file last
1660
+ url = upload_info["url"]
1661
+ with httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
1662
+ form_fields: list[tuple[str, Any]] = [
1663
+ (k, (None, str(v))) for k, v in upload_params.items()
1664
+ ]
1665
+ with p.open("rb") as fh:
1666
+ form_fields.append(("file", (p.name, fh, "application/octet-stream")))
1667
+ response = http.post(url, files=form_fields)
1668
+ success_codes = (200, 204)
1669
+ else:
1670
+ # Fallback: signed URL PUT (no extra auth needed)
1671
+ url = upload_info["url"]
1672
+ with p.open("rb") as fh, httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
1673
+ response = http.put(url, content=fh)
1674
+ success_codes = (200, 201)
1675
+
1676
+ if response.status_code not in success_codes:
1677
+ raise KeboolaApiError(
1678
+ message=f"Cloud storage upload failed (HTTP {response.status_code})",
1679
+ status_code=response.status_code,
1680
+ error_code=ErrorCode.UPLOAD_FAILED,
1681
+ retryable=False,
1682
+ )
1683
+
1684
+ def import_table_async(
1685
+ self,
1686
+ table_id: str,
1687
+ file_id: int,
1688
+ incremental: bool = False,
1689
+ delimiter: str = ",",
1690
+ enclosure: str = '"',
1691
+ branch_id: int | None = None,
1692
+ ) -> dict[str, Any]:
1693
+ """Trigger async import of a pre-uploaded file into a table (step 3).
1694
+
1695
+ Polls until the import job completes (up to IMPORT_JOB_MAX_WAIT seconds).
1696
+
1697
+ Args:
1698
+ table_id: Target table ID (e.g. "in.c-my-bucket.my-table").
1699
+ file_id: File ID returned by prepare_file_upload().
1700
+ incremental: If True, append rows; if False, full load.
1701
+ delimiter: CSV column delimiter.
1702
+ enclosure: CSV value enclosure character.
1703
+ branch_id: If set, target a specific dev branch.
1704
+
1705
+ Returns:
1706
+ Completed import job dict.
1707
+ """
1708
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1709
+ safe_id = quote(table_id, safe="")
1710
+ body: dict[str, str] = {
1711
+ "dataFileId": str(file_id),
1712
+ "incremental": "1" if incremental else "0",
1713
+ "delimiter": delimiter,
1714
+ "enclosure": enclosure,
1715
+ }
1716
+ response = self._request("POST", f"{prefix}/tables/{safe_id}/import-async", data=body)
1717
+ return self._wait_for_storage_job(response.json(), max_wait=IMPORT_JOB_MAX_WAIT)
1718
+
1719
+ def upload_table(
1720
+ self,
1721
+ table_id: str,
1722
+ file_path: str,
1723
+ incremental: bool = False,
1724
+ delimiter: str = ",",
1725
+ enclosure: str = '"',
1726
+ branch_id: int | None = None,
1727
+ ) -> dict[str, Any]:
1728
+ """Upload a CSV file into an existing table (async, waits for completion).
1729
+
1730
+ Uses the file-first async flow to support files up to 5 GB:
1731
+ 1. Register file with Storage API → get presigned cloud upload URL
1732
+ 2. Upload file bytes directly to cloud storage (GCP bearer token, S3 presigned POST, or signed URL PUT)
1733
+ 3. Trigger import-async job → poll until complete
1734
+
1735
+ Args:
1736
+ table_id: Target table ID (e.g. "in.c-my-bucket.my-table").
1737
+ file_path: Local path to the CSV file.
1738
+ incremental: If True, append rows; if False (default), full load.
1739
+ delimiter: CSV column delimiter (default ",").
1740
+ enclosure: CSV value enclosure character (default '"').
1741
+ branch_id: If set, target a specific dev branch.
1742
+
1743
+ Returns:
1744
+ Import results dict with importedRowsCount, warnings, etc.
1745
+ """
1746
+ p = Path(file_path)
1747
+ size_bytes = p.stat().st_size
1748
+ upload_info = self.prepare_file_upload(name=p.name, size_bytes=size_bytes)
1749
+ file_id = upload_info["id"]
1750
+ self._upload_to_cloud(upload_info, file_path)
1751
+ job = self.import_table_async(
1752
+ table_id=table_id,
1753
+ file_id=file_id,
1754
+ incremental=incremental,
1755
+ delimiter=delimiter,
1756
+ enclosure=enclosure,
1757
+ branch_id=branch_id,
1758
+ )
1759
+ return job.get("results", {})
1760
+
1761
+ def delete_table(
1762
+ self,
1763
+ table_id: str,
1764
+ branch_id: int | None = None,
1765
+ force: bool = False,
1766
+ ) -> dict[str, Any]:
1767
+ """Delete a storage table (async, waits for completion).
1768
+
1769
+ Args:
1770
+ table_id: Full table ID (e.g. "in.c-bucket.table").
1771
+ branch_id: If set, target a specific dev branch.
1772
+ force: If True, cascade-delete the table and all its aliases.
1773
+
1774
+ Returns:
1775
+ Completed storage job dict.
1776
+ """
1777
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1778
+ safe_id = quote(table_id, safe="")
1779
+ params: dict[str, str] = {"async": "true"}
1780
+ if force:
1781
+ params["force"] = "true"
1782
+ response = self._request("DELETE", f"{prefix}/tables/{safe_id}", params=params)
1783
+ return self._wait_for_storage_job(response.json())
1784
+
1785
+ def truncate_table(
1786
+ self,
1787
+ table_id: str,
1788
+ branch_id: int | None = None,
1789
+ ) -> dict[str, Any]:
1790
+ """Truncate a storage table (delete all rows; preserve schema).
1791
+
1792
+ The Storage API requires the ``allowTruncate=1`` safety opt-in to
1793
+ confirm the caller intends to remove every row when no filter
1794
+ clauses are sent. The endpoint is inherently asynchronous on
1795
+ every branch -- it always returns ``HTTP 202`` with a queued
1796
+ storage job (``operationName: tableRowsDelete``), which
1797
+ ``_wait_for_storage_job`` polls to completion. Passing
1798
+ ``async=true`` is rejected by the API as an unknown field, so
1799
+ we do NOT send it (this is a deliberate departure from
1800
+ ``delete_table``'s contract -- see the truncate-table gotcha
1801
+ in plugins/.../gotchas.md for the live-API evidence).
1802
+
1803
+ The table definition (columns, types, primary key, descriptions,
1804
+ sharing edges, and dependents) is preserved -- only the rows
1805
+ are removed.
1806
+
1807
+ Args:
1808
+ table_id: Full table ID (e.g. "in.c-bucket.table").
1809
+ branch_id: If set, target a specific dev branch.
1810
+
1811
+ Returns:
1812
+ Completed storage job dict.
1813
+ """
1814
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1815
+ safe_id = quote(table_id, safe="")
1816
+ params: dict[str, str] = {"allowTruncate": "1"}
1817
+ response = self._request("DELETE", f"{prefix}/tables/{safe_id}/rows", params=params)
1818
+ return self._wait_for_storage_job(response.json())
1819
+
1820
+ def delete_column(
1821
+ self,
1822
+ table_id: str,
1823
+ column_name: str,
1824
+ branch_id: int | None = None,
1825
+ force: bool = False,
1826
+ ) -> dict[str, Any]:
1827
+ """Delete a column from a storage table (async, waits for completion).
1828
+
1829
+ Args:
1830
+ table_id: Full table ID (e.g. "in.c-bucket.table").
1831
+ column_name: Name of the column to delete.
1832
+ branch_id: If set, target a specific dev branch.
1833
+ force: If True, also delete from aliased tables.
1834
+
1835
+ Returns:
1836
+ Completed storage job dict.
1837
+ """
1838
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
1839
+ safe_table_id = quote(table_id, safe="")
1840
+ safe_column = quote(column_name, safe="")
1841
+ params: dict[str, str] = {"async": "true"}
1842
+ if force:
1843
+ params["force"] = "true"
1844
+ response = self._request(
1845
+ "DELETE",
1846
+ f"{prefix}/tables/{safe_table_id}/columns/{safe_column}",
1847
+ params=params,
1848
+ )
1849
+ return self._wait_for_storage_job(response.json())
1850
+
1851
+ def swap_tables(
1852
+ self,
1853
+ table_id: str,
1854
+ target_table_id: str,
1855
+ branch_id: int,
1856
+ ) -> dict[str, Any]:
1857
+ """Swap two storage tables (async, waits for completion; branch-scoped).
1858
+
1859
+ Both tables exchange physical positions; aliases keep pointing at the
1860
+ same physical position and therefore expose the OTHER table's data
1861
+ after the swap. ``branch_id`` is mandatory (the swap is always scoped
1862
+ to a branch), but ANY branch works -- including the default/production
1863
+ branch. A default-branch swap is the supported way to retype a prod
1864
+ table, because dev-branch merge does not propagate storage schema.
1865
+
1866
+ The API returns a queued storage job (``operationName: tableSwap``)
1867
+ which this method polls to completion before returning, mirroring
1868
+ ``delete_table`` semantics. (The PHP reference client returns the
1869
+ raw initial response, but the operation is asynchronous on every
1870
+ backend tested -- callers expect a finished swap on return.)
1871
+
1872
+ Args:
1873
+ table_id: Full ID of the first table (e.g. "in.c-bucket.table").
1874
+ target_table_id: Full ID of the second table to swap with.
1875
+ branch_id: Development branch ID. Required by the API.
1876
+
1877
+ Returns:
1878
+ Completed storage job dict.
1879
+ """
1880
+ prefix = f"/v2/storage/branch/{branch_id}"
1881
+ safe_id = quote(table_id, safe="")
1882
+ body = {"targetTableId": target_table_id}
1883
+ response = self._request("POST", f"{prefix}/tables/{safe_id}/swap", json=body)
1884
+ return self._wait_for_storage_job(response.json())
1885
+
1886
+ def pull_table(self, table_id: str, branch_id: int) -> dict[str, Any]:
1887
+ """Pull (clone) a table from the default branch into a dev branch.
1888
+
1889
+ On ``storage-branches`` projects a dev branch reads production tables
1890
+ transparently (copy-on-write) until the first write. Operations that
1891
+ mutate a table in the branch -- such as ``swap_tables`` or a column
1892
+ drop -- require a branch-local materialization of the table first;
1893
+ otherwise the Storage API reports the bucket as "not found" in the
1894
+ branch. This endpoint performs that materialization: it copies the
1895
+ table from the default (production) branch into the branch's isolated
1896
+ storage. It is the same call the platform issues on a branch's first
1897
+ write to a production table.
1898
+
1899
+ The pull is one-way (default -> branch). The API returns a queued
1900
+ storage job which this method polls to completion before returning,
1901
+ mirroring ``swap_tables`` semantics.
1902
+
1903
+ Args:
1904
+ table_id: Full ID of the table to pull (e.g. "in.c-bucket.table").
1905
+ branch_id: Target development branch ID. The source is always the
1906
+ default/production branch.
1907
+
1908
+ Returns:
1909
+ Completed storage job dict.
1910
+ """
1911
+ prefix = f"/v2/storage/branch/{branch_id}"
1912
+ safe_id = quote(table_id, safe="")
1913
+ response = self._request("POST", f"{prefix}/tables/{safe_id}/pull")
1914
+ return self._wait_for_storage_job(response.json())
1915
+
1916
+ def list_tables_with_metadata(self) -> list[dict[str, Any]]:
1917
+ """List all storage tables with columns and metadata.
1918
+
1919
+ Returns:
1920
+ List of table dicts with columns, metadata, and bucket info.
1921
+ """
1922
+ return self.list_tables(include="columns,metadata,buckets")
1923
+
1924
+ @staticmethod
1925
+ def _apply_table_filters(
1926
+ params: dict[str, Any],
1927
+ *,
1928
+ where_column: str | None = None,
1929
+ where_operator: str = "eq",
1930
+ where_values: list[str] | None = None,
1931
+ changed_since: str | None = None,
1932
+ changed_until: str | None = None,
1933
+ ) -> None:
1934
+ """Mutate ``params`` with Storage table export/preview filter clauses.
1935
+
1936
+ Shared by :meth:`get_table_data_preview` and :meth:`export_table_async`
1937
+ so the ``whereColumn`` / ``whereOperator`` / ``whereValues[]`` and
1938
+ ``changedSince`` / ``changedUntil`` contract is identical across the
1939
+ sync-preview and async-export endpoints.
1940
+
1941
+ Args:
1942
+ where_column: Column to filter on. Must be paired with ``where_values``.
1943
+ where_operator: ``"eq"`` or ``"neq"`` (only meaningful with a filter).
1944
+ where_values: Values the column is matched against (OR within the set).
1945
+ changed_since: Lower bound on import time -- a unix timestamp or a
1946
+ strtotime string like ``"-2 days"``.
1947
+ changed_until: Upper bound on import time (same formats).
1948
+
1949
+ Raises:
1950
+ ValueError: On an invalid ``where_operator`` or a half-specified
1951
+ where-clause (a column without values, or values without a column).
1952
+ """
1953
+ if (where_column is None) != (where_values is None):
1954
+ raise ValueError(
1955
+ "where_column and where_values must be given together "
1956
+ "(the column to match and the values to match it against)."
1957
+ )
1958
+ if where_column is not None:
1959
+ if where_operator not in ("eq", "neq"):
1960
+ raise ValueError(f"where_operator must be 'eq' or 'neq', got {where_operator!r}.")
1961
+ params["whereColumn"] = where_column
1962
+ params["whereOperator"] = where_operator
1963
+ params["whereValues[]"] = where_values
1964
+ if changed_since is not None:
1965
+ params["changedSince"] = changed_since
1966
+ if changed_until is not None:
1967
+ params["changedUntil"] = changed_until
1968
+
1969
+ def get_table_data_preview(
1970
+ self,
1971
+ table_id: str,
1972
+ limit: int = 100,
1973
+ columns: list[str] | None = None,
1974
+ *,
1975
+ where_column: str | None = None,
1976
+ where_operator: str = "eq",
1977
+ where_values: list[str] | None = None,
1978
+ changed_since: str | None = None,
1979
+ changed_until: str | None = None,
1980
+ ) -> str:
1981
+ """Get a CSV preview of table data.
1982
+
1983
+ Args:
1984
+ table_id: Full table ID (e.g. "in.c-bucket.table").
1985
+ limit: Max number of rows to return.
1986
+ columns: Optional list of column names to export.
1987
+ Storage API limits sync export to 30 columns max.
1988
+ where_column: Filter to rows where this column matches ``where_values``.
1989
+ where_operator: ``"eq"`` (default) or ``"neq"``.
1990
+ where_values: Values for the ``where_column`` filter.
1991
+ changed_since: Only rows imported since this time (unix ts / strtotime).
1992
+ changed_until: Only rows imported up to this time.
1993
+
1994
+ Returns:
1995
+ CSV string with table data preview.
1996
+ """
1997
+ safe_id = quote(table_id, safe="")
1998
+ params: dict[str, Any] = {"limit": limit}
1999
+ if columns:
2000
+ params["columns"] = ",".join(columns)
2001
+ self._apply_table_filters(
2002
+ params,
2003
+ where_column=where_column,
2004
+ where_operator=where_operator,
2005
+ where_values=where_values,
2006
+ changed_since=changed_since,
2007
+ changed_until=changed_until,
2008
+ )
2009
+ response = self._request(
2010
+ "GET",
2011
+ f"/v2/storage/tables/{safe_id}/data-preview",
2012
+ params=params,
2013
+ )
2014
+ return response.text
2015
+
2016
+ def export_table_async(
2017
+ self,
2018
+ table_id: str,
2019
+ columns: list[str] | None = None,
2020
+ limit: int | None = None,
2021
+ branch_id: int | None = None,
2022
+ file_type: str = "csv",
2023
+ *,
2024
+ where_column: str | None = None,
2025
+ where_operator: str = "eq",
2026
+ where_values: list[str] | None = None,
2027
+ changed_since: str | None = None,
2028
+ changed_until: str | None = None,
2029
+ ) -> dict[str, Any]:
2030
+ """Start an async table export and wait for completion.
2031
+
2032
+ Args:
2033
+ table_id: Full table ID (e.g. "in.c-bucket.table").
2034
+ columns: Optional list of column names to export.
2035
+ limit: Optional max number of rows to export.
2036
+ branch_id: If set, target a specific dev branch.
2037
+ file_type: Output format, either "csv" (default) or "parquet".
2038
+ Parquet exports are always sliced and Snappy-compressed inside
2039
+ the parquet format (not gzipped at the slice level).
2040
+ where_column: Filter to rows where this column matches ``where_values``.
2041
+ where_operator: ``"eq"`` (default) or ``"neq"``.
2042
+ where_values: Values for the ``where_column`` filter.
2043
+ changed_since: Only rows imported since this time (unix ts / strtotime).
2044
+ changed_until: Only rows imported up to this time.
2045
+
2046
+ Returns:
2047
+ Completed export job dict (results contain file info).
2048
+ """
2049
+ if file_type not in ("csv", "parquet"):
2050
+ raise ValueError(f"file_type must be 'csv' or 'parquet', got {file_type!r}")
2051
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2052
+ safe_id = quote(table_id, safe="")
2053
+ params: dict[str, Any] = {"fileType": file_type}
2054
+ if columns:
2055
+ params["columns"] = ",".join(columns)
2056
+ if limit is not None:
2057
+ params["limit"] = str(limit)
2058
+ self._apply_table_filters(
2059
+ params,
2060
+ where_column=where_column,
2061
+ where_operator=where_operator,
2062
+ where_values=where_values,
2063
+ changed_since=changed_since,
2064
+ changed_until=changed_until,
2065
+ )
2066
+ response = self._request(
2067
+ "POST",
2068
+ f"{prefix}/tables/{safe_id}/export-async",
2069
+ data=params,
2070
+ )
2071
+ return self._wait_for_storage_job(response.json(), max_wait=EXPORT_JOB_MAX_WAIT)
2072
+
2073
+ def add_column(
2074
+ self,
2075
+ table_id: str,
2076
+ name: str,
2077
+ definition: dict[str, Any] | None = None,
2078
+ branch_id: int | None = None,
2079
+ ) -> dict[str, Any]:
2080
+ """Add a single column to an existing table (synchronous).
2081
+
2082
+ Unlike ``delete_column`` (async storage job), the Storage API
2083
+ ``POST /tables/{id}/columns`` endpoint is synchronous and returns the
2084
+ updated table resource directly -- there is no job to poll.
2085
+
2086
+ Args:
2087
+ table_id: Full table ID (e.g. "in.c-bucket.table").
2088
+ name: Name of the new column.
2089
+ definition: Optional typed-column definition for a typed table, e.g.
2090
+ ``{"type": "NUMBER", "length": "18,2", "nullable": False,
2091
+ "default": "0"}``. Omit for an untyped column.
2092
+ branch_id: If set, target a specific dev branch.
2093
+
2094
+ Returns:
2095
+ The updated table resource dict from the API.
2096
+ """
2097
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2098
+ safe_id = quote(table_id, safe="")
2099
+ body: dict[str, Any] = {"name": name}
2100
+ if definition:
2101
+ body["definition"] = definition
2102
+ response = self._request("POST", f"{prefix}/tables/{safe_id}/columns", json=body)
2103
+ return response.json()
2104
+
2105
+ def get_file_info(self, file_id: int, branch_id: int | None = None) -> dict[str, Any]:
2106
+ """Get file metadata including download URL.
2107
+
2108
+ Args:
2109
+ file_id: Storage file ID (from export job results).
2110
+ branch_id: If set, query file from a specific dev branch scope.
2111
+
2112
+ Returns:
2113
+ File resource dict with 'url', 'isSliced', 'sizeBytes', etc.
2114
+ """
2115
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2116
+ response = self._request(
2117
+ "GET",
2118
+ f"{prefix}/files/{file_id}",
2119
+ params={"federationToken": "1"},
2120
+ )
2121
+ return response.json()
2122
+
2123
+ def list_files(
2124
+ self,
2125
+ limit: int = 20,
2126
+ offset: int = 0,
2127
+ tags: list[str] | None = None,
2128
+ since_id: int | None = None,
2129
+ query: str | None = None,
2130
+ branch_id: int | None = None,
2131
+ ) -> list[dict[str, Any]]:
2132
+ """List Storage Files with optional filtering.
2133
+
2134
+ Args:
2135
+ limit: Max number of files to return.
2136
+ offset: Pagination offset.
2137
+ tags: Filter by tags (AND logic — all tags must match).
2138
+ since_id: Return only files with ID greater than this.
2139
+ query: Full-text search query on file name.
2140
+ branch_id: If set, list files from a specific dev branch.
2141
+
2142
+ Returns:
2143
+ List of file resource dicts.
2144
+ """
2145
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2146
+ params: dict[str, Any] = {"limit": limit, "offset": offset}
2147
+ if tags:
2148
+ for i, tag in enumerate(tags):
2149
+ params[f"tags[{i}]"] = tag
2150
+ if since_id is not None:
2151
+ params["sinceId"] = since_id
2152
+ if query:
2153
+ params["q"] = query
2154
+ response = self._request("GET", f"{prefix}/files", params=params)
2155
+ return response.json()
2156
+
2157
+ def upload_file(
2158
+ self,
2159
+ file_path: str,
2160
+ name: str | None = None,
2161
+ tags: list[str] | None = None,
2162
+ is_permanent: bool = False,
2163
+ notify: bool = False,
2164
+ branch_id: int | None = None,
2165
+ ) -> dict[str, Any]:
2166
+ """Upload a local file to Storage Files.
2167
+
2168
+ Wraps prepare_file_upload + _upload_to_cloud into a single call.
2169
+
2170
+ Args:
2171
+ file_path: Local path to the file to upload.
2172
+ name: Custom filename (defaults to local file basename).
2173
+ tags: Optional list of tags to assign.
2174
+ is_permanent: If True, file is not auto-deleted after 15 days.
2175
+ notify: If True, send notification on upload completion.
2176
+ branch_id: If set, upload to a specific dev branch.
2177
+
2178
+ Returns:
2179
+ File resource dict with id, name, sizeBytes, tags, url.
2180
+ """
2181
+ p = Path(file_path)
2182
+ size_bytes = p.stat().st_size
2183
+ file_name = name or p.name
2184
+ upload_info = self.prepare_file_upload(
2185
+ name=file_name,
2186
+ size_bytes=size_bytes,
2187
+ tags=tags,
2188
+ is_permanent=is_permanent,
2189
+ notify=notify,
2190
+ )
2191
+ self._upload_to_cloud(upload_info, file_path)
2192
+ # Return file info (prepare response has the file metadata)
2193
+ return {
2194
+ "id": upload_info["id"],
2195
+ "name": upload_info.get("name", file_name),
2196
+ "sizeBytes": size_bytes,
2197
+ "tags": upload_info.get("tags", tags or []),
2198
+ "isPermanent": upload_info.get("isPermanent", is_permanent),
2199
+ "created": upload_info.get("created"),
2200
+ }
2201
+
2202
+ def delete_file(self, file_id: int, branch_id: int | None = None) -> None:
2203
+ """Delete a Storage File.
2204
+
2205
+ Args:
2206
+ file_id: Storage file ID.
2207
+ branch_id: If set, target a file in a specific dev branch scope.
2208
+ """
2209
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2210
+ self._request("DELETE", f"{prefix}/files/{file_id}")
2211
+
2212
+ def tag_file(self, file_id: int, tag: str, branch_id: int | None = None) -> None:
2213
+ """Add a tag to a Storage File.
2214
+
2215
+ Args:
2216
+ file_id: Storage file ID.
2217
+ tag: Tag string to add.
2218
+ branch_id: If set, target a file in a specific dev branch scope.
2219
+ """
2220
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2221
+ self._request("POST", f"{prefix}/files/{file_id}/tags", data={"tag": tag})
2222
+
2223
+ def untag_file(self, file_id: int, tag: str, branch_id: int | None = None) -> None:
2224
+ """Remove a tag from a Storage File.
2225
+
2226
+ Args:
2227
+ file_id: Storage file ID.
2228
+ tag: Tag string to remove.
2229
+ branch_id: If set, target a file in a specific dev branch scope.
2230
+ """
2231
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2232
+ safe_tag = quote(tag, safe="")
2233
+ self._request("DELETE", f"{prefix}/files/{file_id}/tags/{safe_tag}")
2234
+
2235
+ def download_sliced_file(self, file_detail: dict[str, Any], output_path: str) -> int:
2236
+ """Download a sliced file by fetching manifest and concatenating slices.
2237
+
2238
+ Handles S3 (SigV4 auth) and GCS (bearer token) providers.
2239
+ Decompresses gzipped slices transparently.
2240
+
2241
+ Streams each slice chunk-by-chunk into a temp file and concatenates
2242
+ into ``output_path``. Peak RAM is O(chunk size), not O(slice size) —
2243
+ required for multi-GB tables on memory-constrained hosts (issue #187).
2244
+
2245
+ The manifest `url` from file info is already a presigned URL (download
2246
+ directly). Manifest entries have cloud-native URLs (s3://, gs://) that
2247
+ need auth — we build HTTPS URLs from the s3Path/gcsPath credentials.
2248
+
2249
+ Args:
2250
+ file_detail: Full file info dict from get_file_info()
2251
+ (must include provider credentials from federationToken=1).
2252
+ output_path: Local file path to write to.
2253
+
2254
+ Returns:
2255
+ Number of bytes written.
2256
+ """
2257
+ import shutil
2258
+ import tempfile
2259
+
2260
+ entries, base_url, downloader, _manifest_data = self._prepare_sliced_download(file_detail)
2261
+
2262
+ # Stream each slice into a temp file, then copy-append into output.
2263
+ # Keeping per-slice temp files on disk (not in RAM) is the whole point.
2264
+ total = 0
2265
+ out_path = Path(output_path)
2266
+ out_path.parent.mkdir(parents=True, exist_ok=True)
2267
+ with out_path.open("wb") as out_fh:
2268
+ for entry in entries:
2269
+ entry_url = entry.get("url", "")
2270
+ slice_url = downloader.resolve_slice_url(base_url, entry_url, file_detail)
2271
+ is_gz = entry_url.split("?")[0].endswith(".gz")
2272
+ with tempfile.NamedTemporaryFile(
2273
+ dir=out_path.parent, prefix=".slice-", delete=True
2274
+ ) as tmp:
2275
+ downloader.stream_to_file(slice_url, tmp.name, decompress_gzip=is_gz)
2276
+ tmp.seek(0)
2277
+ shutil.copyfileobj(tmp, out_fh, length=FILE_DOWNLOAD_CHUNK_SIZE)
2278
+ total += Path(tmp.name).stat().st_size
2279
+
2280
+ return total
2281
+
2282
+ def _prepare_sliced_download(
2283
+ self, file_detail: dict[str, Any]
2284
+ ) -> tuple[list[dict[str, Any]], str, "_CloudDownloader", bytes]:
2285
+ """Fetch and parse the manifest, returning entries + download context.
2286
+
2287
+ The manifest is small JSON (few KB even for TB tables), so loading it
2288
+ fully is fine. Entries are the per-slice URLs that callers iterate.
2289
+
2290
+ Returns a 4-tuple: (entries, base_url, downloader, raw_manifest_bytes).
2291
+ The raw manifest is useful for callers that persist it next to slices.
2292
+ """
2293
+ import json as json_mod
2294
+
2295
+ provider = file_detail.get("provider", "")
2296
+ downloader = _CloudDownloader.create(file_detail)
2297
+
2298
+ with httpx.Client(timeout=FILE_DOWNLOAD_TIMEOUT) as http:
2299
+ resp = http.get(file_detail["url"])
2300
+ resp.raise_for_status()
2301
+ manifest_data = resp.content
2302
+
2303
+ manifest = json_mod.loads(manifest_data)
2304
+ entries = manifest.get("entries", [])
2305
+ if not entries:
2306
+ raise KeboolaApiError(
2307
+ message="Sliced file manifest has no entries",
2308
+ status_code=500,
2309
+ error_code=ErrorCode.EXPORT_EMPTY_MANIFEST,
2310
+ retryable=False,
2311
+ )
2312
+
2313
+ logger.info("Downloading %d slices (provider=%s)", len(entries), provider)
2314
+ base_url = downloader.resolve_base_url(file_detail)
2315
+ return entries, base_url, downloader, manifest_data
2316
+
2317
+ def download_sliced_file_to_dir(
2318
+ self, file_detail: dict[str, Any], output_dir: str
2319
+ ) -> dict[str, Any]:
2320
+ """Download a sliced file preserving each slice as a separate local file.
2321
+
2322
+ Unlike download_sliced_file() which binary-concatenates slices, this
2323
+ writes every manifest entry into its own file under ``output_dir``.
2324
+ Required for formats like Parquet where each slice is a self-contained
2325
+ file with its own footer and cannot be safely concatenated.
2326
+
2327
+ The original manifest is also written to ``output_dir/_manifest.json``
2328
+ so the slice set stays self-describing. The leading underscore follows
2329
+ the Hive/Spark/pyarrow convention that makes Parquet readers skip the
2330
+ file when scanning the directory as a dataset.
2331
+
2332
+ Gzip-compressed slices (typical for CSV) are decompressed transparently
2333
+ and the ``.gz`` suffix is stripped from the written filename. Parquet
2334
+ slices are written as-is (Snappy compression lives inside the format).
2335
+
2336
+ Args:
2337
+ file_detail: Full file info dict from get_file_info() with
2338
+ federationToken=1 provider credentials.
2339
+ output_dir: Directory to write slices into. Created if missing.
2340
+
2341
+ Returns:
2342
+ Dict with ``output_dir``, ``slice_count``, ``total_bytes``, and
2343
+ ``slices`` (list of ``{path, size_bytes}``).
2344
+ """
2345
+ out = Path(output_dir)
2346
+ out.mkdir(parents=True, exist_ok=True)
2347
+
2348
+ entries, base_url, downloader, manifest_data = self._prepare_sliced_download(file_detail)
2349
+
2350
+ # Persist the manifest alongside slices for traceability.
2351
+ (out / "_manifest.json").write_bytes(manifest_data)
2352
+
2353
+ slices: list[dict[str, Any]] = []
2354
+ total = 0
2355
+
2356
+ for idx, entry in enumerate(entries):
2357
+ entry_url = entry.get("url", "")
2358
+ slice_url = downloader.resolve_slice_url(base_url, entry_url, file_detail)
2359
+
2360
+ clean_url = entry_url.split("?")[0]
2361
+ basename = clean_url.rsplit("/", 1)[-1]
2362
+ is_gz = clean_url.endswith(".gz")
2363
+ if is_gz:
2364
+ basename = basename.removesuffix(".gz")
2365
+ if not basename:
2366
+ basename = f"part-{idx:05d}"
2367
+
2368
+ slice_path = out / basename
2369
+ written = downloader.stream_to_file(slice_url, slice_path, decompress_gzip=is_gz)
2370
+ slices.append({"path": str(slice_path.resolve()), "size_bytes": written})
2371
+ total += written
2372
+
2373
+ return {
2374
+ "output_dir": str(out.resolve()),
2375
+ "slice_count": len(slices),
2376
+ "total_bytes": total,
2377
+ "slices": slices,
2378
+ }
2379
+
2380
+ def download_file(self, url: str, output_path: str) -> int:
2381
+ """Download a non-sliced file from a presigned URL.
2382
+
2383
+ Streams the body chunk-by-chunk and decompresses gzip on the fly, so
2384
+ peak RAM stays at O(chunk size) even for multi-GB payloads (issue #187).
2385
+
2386
+ Args:
2387
+ url: Presigned download URL from file info.
2388
+ output_path: Local file path to write to.
2389
+
2390
+ Returns:
2391
+ Number of bytes written (post-decompression if the URL is gzipped).
2392
+ """
2393
+ import gzip
2394
+ import shutil
2395
+
2396
+ out_path = Path(output_path)
2397
+ out_path.parent.mkdir(parents=True, exist_ok=True)
2398
+ is_gzipped = url.rstrip("?").split("?")[0].endswith(".gz")
2399
+
2400
+ with (
2401
+ httpx.Client(timeout=FILE_DOWNLOAD_TIMEOUT) as http,
2402
+ http.stream("GET", url) as response,
2403
+ ):
2404
+ response.raise_for_status()
2405
+ source: Any = _IterBytesReader(response.iter_bytes(FILE_DOWNLOAD_CHUNK_SIZE))
2406
+ if is_gzipped:
2407
+ source = gzip.GzipFile(fileobj=source, mode="rb")
2408
+ with out_path.open("wb") as fh:
2409
+ shutil.copyfileobj(source, fh, length=FILE_DOWNLOAD_CHUNK_SIZE)
2410
+
2411
+ return out_path.stat().st_size
2412
+
2413
+ def list_jobs(
2414
+ self,
2415
+ component_id: str | None = None,
2416
+ config_id: str | None = None,
2417
+ status: str | None = None,
2418
+ limit: int = DEFAULT_JOB_LIMIT,
2419
+ offset: int = 0,
2420
+ ) -> list[dict[str, Any]]:
2421
+ """List jobs from the Queue API.
2422
+
2423
+ Args:
2424
+ component_id: Optional filter by component ID.
2425
+ config_id: Optional filter by config ID (requires component_id).
2426
+ status: Optional filter by job status.
2427
+ limit: Max number of jobs to return (1-500).
2428
+ offset: Offset for pagination.
2429
+
2430
+ Returns:
2431
+ List of job dicts from the Queue API.
2432
+ """
2433
+ params: dict[str, str | int] = {"limit": limit, "offset": offset}
2434
+ if component_id:
2435
+ params["component"] = component_id
2436
+ if config_id:
2437
+ params["config"] = config_id
2438
+ if status:
2439
+ params["status"] = status
2440
+
2441
+ response = self._queue_request("GET", "/search/jobs", params=params)
2442
+ return response.json()
2443
+
2444
+ def list_jobs_grouped(
2445
+ self,
2446
+ jobs_per_group: int = DEFAULT_JOBS_PER_CONFIG,
2447
+ limit: int = DEFAULT_GROUPED_JOBS_LIMIT,
2448
+ sort_by: str = "startTime",
2449
+ sort_order: str = "desc",
2450
+ created_time_from: str | None = None,
2451
+ ) -> list[dict[str, Any]]:
2452
+ """List jobs grouped by component+config from the Queue API.
2453
+
2454
+ Uses GET /search/grouped-jobs to fetch the latest N jobs for each
2455
+ unique component+config combination in a single API call.
2456
+
2457
+ Args:
2458
+ jobs_per_group: Max jobs per component+config group (1-500).
2459
+ limit: Max number of groups to return (1-500).
2460
+ sort_by: Sort field for jobs within each group.
2461
+ sort_order: Sort direction ("asc" or "desc").
2462
+ created_time_from: Optional ISO datetime filter (e.g. "2026-03-20T00:00:00Z").
2463
+
2464
+ Returns:
2465
+ List of group dicts: [{"group": {"componentId": ..., "configId": ...}, "jobs": [...]}]
2466
+ """
2467
+ params: list[tuple[str, str]] = [
2468
+ ("groupBy[]", "componentId"),
2469
+ ("groupBy[]", "configId"),
2470
+ ("jobsPerGroup", str(jobs_per_group)),
2471
+ ("limit", str(limit)),
2472
+ ("sortBy", sort_by),
2473
+ ("sortOrder", sort_order),
2474
+ ]
2475
+ if created_time_from:
2476
+ params.append(("filters[createdTimeFrom]", created_time_from))
2477
+
2478
+ response = self._queue_request("GET", "/search/grouped-jobs", params=params)
2479
+ return response.json()
2480
+
2481
+ def get_job_detail(self, job_id: str) -> dict[str, Any]:
2482
+ """Get detailed information about a specific job from the Queue API.
2483
+
2484
+ Args:
2485
+ job_id: The job ID.
2486
+
2487
+ Returns:
2488
+ Job detail dict from the Queue API.
2489
+ """
2490
+ safe_job_id = quote(job_id, safe="")
2491
+ response = self._queue_request("GET", f"/jobs/{safe_job_id}")
2492
+ return response.json()
2493
+
2494
+ # --- Queue Job Creation ---
2495
+
2496
+ def create_job(
2497
+ self,
2498
+ component_id: str,
2499
+ config_id: str,
2500
+ config_data: dict[str, Any] | None = None,
2501
+ config_row_ids: list[str] | None = None,
2502
+ mode: str = "run",
2503
+ branch_id: int | None = None,
2504
+ variable_values_id: str | None = None,
2505
+ ) -> dict[str, Any]:
2506
+ """Create and run a Queue API job.
2507
+
2508
+ Args:
2509
+ component_id: Component ID (e.g. keboola.sandboxes).
2510
+ config_id: Configuration ID.
2511
+ config_data: Optional runtime config data override.
2512
+ config_row_ids: Optional list of config row IDs to run
2513
+ (omit to run entire config).
2514
+ mode: Job mode (default: run).
2515
+ branch_id: Optional dev branch ID. When set, the job runs
2516
+ on that branch instead of the default (production) branch.
2517
+ variable_values_id: Optional id of a row in the linked
2518
+ ``keboola.variables`` config. When set, the Queue API binds
2519
+ the row's values to the job's `{{ variable }}` placeholders.
2520
+ Omit for configurations that have no linked variables.
2521
+
2522
+ Returns:
2523
+ Job dict from the Queue API.
2524
+ """
2525
+ body: dict[str, Any] = {
2526
+ "component": component_id,
2527
+ "config": config_id,
2528
+ "mode": mode,
2529
+ }
2530
+ if branch_id is not None:
2531
+ body["branchId"] = str(branch_id)
2532
+ if config_data:
2533
+ body["configData"] = config_data
2534
+ if config_row_ids:
2535
+ body["configRowIds"] = config_row_ids
2536
+ if variable_values_id:
2537
+ body["variableValuesId"] = variable_values_id
2538
+ response = self._queue_request("POST", "/jobs", json=body)
2539
+ return response.json()
2540
+
2541
+ def kill_job(self, job_id: str) -> dict[str, Any]:
2542
+ """Request termination of a running Queue API job.
2543
+
2544
+ Sets the job's desiredStatus to "terminating"; the executor transitions
2545
+ the actual status asynchronously (waiting -> cancelled, processing ->
2546
+ terminating -> terminated). Poll get_job_detail until isFinished=True
2547
+ to observe the terminal state.
2548
+
2549
+ Killable states per Queue API: created, waiting, processing. Calling
2550
+ kill on any other state returns HTTP 400 with a "not in one of killable
2551
+ states" message; callers that want idempotent behavior (e.g. bulk
2552
+ terminate after list_jobs under race conditions) should translate that
2553
+ into a no-op success at the service layer.
2554
+ """
2555
+ safe_job_id = quote(job_id, safe="")
2556
+ response = self._queue_request("POST", f"/jobs/{safe_job_id}/kill")
2557
+ return response.json()
2558
+
2559
+ def fetch_job_events(self, run_id: str, limit: int | None = None) -> list[dict[str, Any]]:
2560
+ """Fetch events emitted during a job's run.
2561
+
2562
+ Wraps the Storage API's ``GET /v2/storage/events?runId={runId}``
2563
+ endpoint -- NOT a Queue API path. Queue jobs (Queue API v2) expose a
2564
+ ``runId`` on the job dict (typically equal to the job ``id``); the
2565
+ Storage Events API is the canonical event feed for the job. Returns
2566
+ the list in Storage API order (newest -> oldest; callers that want
2567
+ a chronological "tail" should reverse the slice).
2568
+
2569
+ Args:
2570
+ run_id: The job's ``runId`` (``job["runId"]``; falls back to
2571
+ ``job["id"]`` on legacy records where they match).
2572
+ limit: Optional server-side event cap. Storage API default is
2573
+ about 100; pass an explicit value to cover long runs.
2574
+
2575
+ Returns:
2576
+ List of event dicts. Each event typically has ``uuid``,
2577
+ ``event``, ``component``, ``message``, ``type``, ``created``,
2578
+ ``runId``, ``configurationId`` keys. Empty when the run emitted
2579
+ no events yet.
2580
+ """
2581
+ params: dict[str, Any] = {"runId": run_id}
2582
+ if limit is not None and limit > 0:
2583
+ params["limit"] = limit
2584
+ response = self._request("GET", "/v2/storage/events", params=params)
2585
+ payload = response.json()
2586
+ # Storage events returns a bare list. Tolerate a dict-wrapped
2587
+ # future shape defensively.
2588
+ if isinstance(payload, list):
2589
+ return payload
2590
+ if isinstance(payload, dict) and isinstance(payload.get("events"), list):
2591
+ return payload["events"]
2592
+ return []
2593
+
2594
+ def wait_for_queue_job(
2595
+ self,
2596
+ job_id: str,
2597
+ max_wait: float = STORAGE_JOB_MAX_WAIT,
2598
+ poll_strategy: str = DEFAULT_POLL_STRATEGY,
2599
+ ) -> dict[str, Any]:
2600
+ """Poll a Queue API job until it reaches a terminal state.
2601
+
2602
+ Uses the piecewise ``JOB_POLL_CURVE`` from constants for the
2603
+ ``"exponential"`` strategy (2s x 30 -> 5s x 48 -> 15s forever) and
2604
+ the legacy fixed ``STORAGE_JOB_POLL_INTERVAL`` for ``"fixed"``. The
2605
+ curve matches the cadence used by FIIA and the official
2606
+ ``keboola-as-code`` Go CLI.
2607
+
2608
+ Args:
2609
+ job_id: The Queue job ID.
2610
+ max_wait: Maximum seconds to wait (default: STORAGE_JOB_MAX_WAIT).
2611
+ poll_strategy: "exponential" (default) or "fixed". Any other
2612
+ value raises ValueError before the first network call.
2613
+
2614
+ Returns:
2615
+ Completed job dict.
2616
+
2617
+ Raises:
2618
+ ValueError: If poll_strategy is not one of VALID_POLL_STRATEGIES.
2619
+ KeboolaApiError: If the job fails (QUEUE_JOB_FAILED) or the
2620
+ deadline elapses before the job finishes (QUEUE_JOB_TIMEOUT).
2621
+ """
2622
+ if poll_strategy not in VALID_POLL_STRATEGIES:
2623
+ # ValueError (not KeboolaApiError) because this is a programming
2624
+ # error -- the caller passed an invalid literal, not a bad API
2625
+ # response. JobService validates before reaching this layer, so
2626
+ # hitting this path from the CLI would be a bug in kbagent.
2627
+ raise ValueError(
2628
+ f"Invalid poll_strategy {poll_strategy!r}. "
2629
+ f"Expected one of: {sorted(VALID_POLL_STRATEGIES)}."
2630
+ )
2631
+
2632
+ deadline = time.monotonic() + max_wait
2633
+ for interval in _iter_poll_intervals(poll_strategy):
2634
+ job = self.get_job_detail(job_id)
2635
+ if job.get("isFinished"):
2636
+ if job.get("status") == "error":
2637
+ result = job.get("result", {})
2638
+ error_msg = (
2639
+ result.get("message", "Queue job failed")
2640
+ if isinstance(result, dict)
2641
+ else "Queue job failed"
2642
+ )
2643
+ raise KeboolaApiError(
2644
+ message=f"Queue job {job_id} failed: {error_msg}",
2645
+ status_code=500,
2646
+ error_code=ErrorCode.QUEUE_JOB_FAILED,
2647
+ retryable=False,
2648
+ )
2649
+ return job
2650
+
2651
+ # Cap the sleep so we never blow past the deadline by more than
2652
+ # one interval: trim to whatever time remains; if zero, break.
2653
+ remaining = deadline - time.monotonic()
2654
+ if remaining <= 0:
2655
+ break
2656
+ time.sleep(min(interval, remaining))
2657
+
2658
+ raise KeboolaApiError(
2659
+ message=f"Queue job {job_id} did not complete within {max_wait}s",
2660
+ status_code=504,
2661
+ error_code=ErrorCode.QUEUE_JOB_TIMEOUT,
2662
+ retryable=True,
2663
+ )
2664
+
2665
+ # --- Workspace CRUD ---
2666
+
2667
+ def list_workspaces(self, branch_id: int | None = None) -> list[dict[str, Any]]:
2668
+ """List all workspaces in the project."""
2669
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2670
+ response = self._request("GET", f"{prefix}/workspaces")
2671
+ return response.json()
2672
+
2673
+ def get_workspace(self, workspace_id: int, branch_id: int | None = None) -> dict[str, Any]:
2674
+ """Get workspace details (note: password is NOT included)."""
2675
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2676
+ response = self._request("GET", f"{prefix}/workspaces/{workspace_id}")
2677
+ return response.json()
2678
+
2679
+ def delete_workspace(self, workspace_id: int, branch_id: int | None = None) -> None:
2680
+ """Delete a workspace (synchronous)."""
2681
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2682
+ self._request("DELETE", f"{prefix}/workspaces/{workspace_id}")
2683
+
2684
+ def reset_workspace_password(
2685
+ self, workspace_id: int, branch_id: int | None = None
2686
+ ) -> dict[str, Any]:
2687
+ """Reset workspace password. Returns new password."""
2688
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2689
+ response = self._request("POST", f"{prefix}/workspaces/{workspace_id}/password")
2690
+ return response.json()
2691
+
2692
+ def create_sandbox_config(
2693
+ self,
2694
+ name: str,
2695
+ description: str = "",
2696
+ backend_size: str = "small",
2697
+ branch_id: int | None = None,
2698
+ ) -> dict[str, Any]:
2699
+ """Create a keboola.sandboxes configuration.
2700
+
2701
+ This is needed to make workspaces visible in the Keboola UI.
2702
+ The UI only shows workspaces tied to a sandboxes config.
2703
+
2704
+ Args:
2705
+ name: Human-readable name for the workspace.
2706
+ description: Optional description.
2707
+ backend_size: Backend size (small, medium, large).
2708
+ branch_id: Branch ID. If provided, creates config in that branch.
2709
+
2710
+ Returns:
2711
+ Configuration dict with id, name, etc.
2712
+ """
2713
+ config = {
2714
+ "parameters": {
2715
+ "runtime": {"shared": False},
2716
+ "storage": {"input": {"tables": []}, "output": {"tables": []}},
2717
+ "parameters": {"id": "", "blocks": []},
2718
+ "backendSize": backend_size,
2719
+ },
2720
+ "runtime": {"shared": False},
2721
+ }
2722
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2723
+ response = self._request(
2724
+ "POST",
2725
+ f"{prefix}/components/keboola.sandboxes/configs",
2726
+ data={
2727
+ "name": name,
2728
+ "description": description,
2729
+ "configuration": json.dumps(config),
2730
+ },
2731
+ )
2732
+ return response.json()
2733
+
2734
+ def delete_config(
2735
+ self, component_id: str, config_id: str, branch_id: int | None = None
2736
+ ) -> None:
2737
+ """Delete a component configuration.
2738
+
2739
+ Args:
2740
+ component_id: Component ID.
2741
+ config_id: Configuration ID.
2742
+ branch_id: Branch ID. If provided, deletes config in that branch.
2743
+ """
2744
+ safe_component = quote(component_id, safe="")
2745
+ safe_config = quote(config_id, safe="")
2746
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2747
+ self._request(
2748
+ "DELETE",
2749
+ f"{prefix}/components/{safe_component}/configs/{safe_config}",
2750
+ )
2751
+
2752
+ def create_config_workspace(
2753
+ self,
2754
+ branch_id: int,
2755
+ component_id: str,
2756
+ config_id: str,
2757
+ backend: str = "snowflake",
2758
+ login_type: str | None = None,
2759
+ public_key: str | None = None,
2760
+ ) -> dict[str, Any]:
2761
+ """Create a workspace tied to a specific configuration.
2762
+
2763
+ Args:
2764
+ branch_id: Branch ID (use main branch ID for production).
2765
+ component_id: Component ID (e.g. keboola.snowflake-transformation).
2766
+ config_id: Configuration ID.
2767
+ backend: Workspace backend.
2768
+ login_type: Optional Storage API loginType. Omitted when None.
2769
+ public_key: Optional public key for key-pair workspaces. Omitted when None.
2770
+
2771
+ Returns:
2772
+ Workspace dict including connection credentials.
2773
+ """
2774
+ safe_component = quote(component_id, safe="")
2775
+ safe_config = quote(config_id, safe="")
2776
+ payload: dict[str, Any] = {"backend": backend}
2777
+ if login_type is not None:
2778
+ payload["loginType"] = login_type
2779
+ if public_key is not None:
2780
+ payload["publicKey"] = public_key
2781
+
2782
+ response = self._request(
2783
+ "POST",
2784
+ f"/v2/storage/branch/{branch_id}/components/{safe_component}/configs/{safe_config}/workspaces",
2785
+ json=payload,
2786
+ )
2787
+ return response.json()
2788
+
2789
+ def list_config_workspaces(
2790
+ self,
2791
+ branch_id: int,
2792
+ component_id: str,
2793
+ config_id: str,
2794
+ ) -> list[dict[str, Any]]:
2795
+ """List workspaces tied to a specific configuration."""
2796
+ safe_component = quote(component_id, safe="")
2797
+ safe_config = quote(config_id, safe="")
2798
+ response = self._request(
2799
+ "GET",
2800
+ f"/v2/storage/branch/{branch_id}/components/{safe_component}/configs/{safe_config}/workspaces",
2801
+ )
2802
+ return response.json()
2803
+
2804
+ def load_workspace_tables(
2805
+ self,
2806
+ workspace_id: int,
2807
+ tables: list[dict[str, Any]],
2808
+ branch_id: int | None = None,
2809
+ preserve: bool = False,
2810
+ ) -> dict[str, Any]:
2811
+ """Load tables into a workspace (async operation).
2812
+
2813
+ Args:
2814
+ workspace_id: Target workspace ID.
2815
+ tables: List of table load definitions, each with at minimum:
2816
+ - source: table ID (e.g. "in.c-bucket.table")
2817
+ - destination: target table name in workspace
2818
+ branch_id: Branch ID. Required for workspaces on dev branches.
2819
+ preserve: If True, keep existing tables in the workspace. Default is False
2820
+ (workspace is cleared before loading).
2821
+
2822
+ Returns:
2823
+ Completed storage job dict (polls until done).
2824
+
2825
+ Raises:
2826
+ KeboolaApiError: If the load job fails or times out.
2827
+ """
2828
+ prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
2829
+ body: dict[str, Any] = {"input": tables, "preserve": preserve}
2830
+ response = self._request(
2831
+ "POST",
2832
+ f"{prefix}/workspaces/{workspace_id}/load",
2833
+ json=body,
2834
+ )
2835
+ return self._wait_for_storage_job(response.json())
2836
+
2837
+ # --- Query Service ---
2838
+
2839
+ def submit_query(
2840
+ self,
2841
+ branch_id: int,
2842
+ workspace_id: int,
2843
+ statements: list[str],
2844
+ transactional: bool = False,
2845
+ ) -> dict[str, Any]:
2846
+ """Submit SQL statements to the Query Service.
2847
+
2848
+ Args:
2849
+ branch_id: Branch ID.
2850
+ workspace_id: Workspace ID.
2851
+ statements: List of SQL statements to execute.
2852
+ transactional: Whether to wrap in a transaction.
2853
+
2854
+ Returns:
2855
+ Query job dict with id and status.
2856
+ """
2857
+ body: dict[str, Any] = {
2858
+ "statements": statements,
2859
+ "transactional": transactional,
2860
+ }
2861
+ response = self._query_request(
2862
+ "POST",
2863
+ f"/api/v1/branches/{branch_id}/workspaces/{workspace_id}/queries",
2864
+ json=body,
2865
+ )
2866
+ return response.json()
2867
+
2868
+ def get_query_job(self, query_job_id: str) -> dict[str, Any]:
2869
+ """Get query job status."""
2870
+ response = self._query_request("GET", f"/api/v1/queries/{query_job_id}")
2871
+ return response.json()
2872
+
2873
+ def export_query_results(
2874
+ self,
2875
+ query_job_id: str,
2876
+ statement_id: str,
2877
+ file_type: str = "csv",
2878
+ ) -> str:
2879
+ """Export query results as CSV (or other format).
2880
+
2881
+ Returns:
2882
+ Raw CSV string of query results.
2883
+ """
2884
+ response = self._query_request(
2885
+ "GET",
2886
+ f"/api/v1/queries/{query_job_id}/{statement_id}/export",
2887
+ params={"fileType": file_type},
2888
+ )
2889
+ return response.text
2890
+
2891
+ def get_query_results(
2892
+ self,
2893
+ query_job_id: str,
2894
+ statement_id: str,
2895
+ offset: int = 0,
2896
+ page_size: int = QUERY_RESULTS_PAGE_SIZE,
2897
+ ) -> dict[str, Any]:
2898
+ """Fetch a page of inline statement results from the Query Service.
2899
+
2900
+ Unlike :meth:`export_query_results`, which materializes a CSV file via the
2901
+ warehouse UNLOAD path (slow), this reads the already-computed result set
2902
+ inline as JSON -- much faster for interactive queries. The endpoint is
2903
+ paginated; ``offset``/``page_size`` walk the result set.
2904
+
2905
+ Args:
2906
+ query_job_id: The query job ID.
2907
+ statement_id: The statement ID within the job.
2908
+ offset: Row offset to start from (for pagination).
2909
+ page_size: Maximum rows to return in this page.
2910
+
2911
+ Returns:
2912
+ Raw QueryResult dict, e.g.::
2913
+
2914
+ {
2915
+ "status": "completed",
2916
+ "columns": [{"name": "id", "type": "INTEGER", "nullable": false}],
2917
+ "data": [[1, "a"], [2, "b"]],
2918
+ "numberOfRows": 2,
2919
+ }
2920
+ """
2921
+ response = self._query_request(
2922
+ "GET",
2923
+ f"/api/v1/queries/{query_job_id}/{statement_id}/results",
2924
+ params={"offset": offset, "pageSize": page_size},
2925
+ )
2926
+ return response.json()
2927
+
2928
+ def get_query_history(
2929
+ self,
2930
+ branch_id: int,
2931
+ workspace_id: int,
2932
+ ) -> dict[str, Any]:
2933
+ """Get query history for a workspace."""
2934
+ response = self._query_request(
2935
+ "GET",
2936
+ f"/api/v1/branches/{branch_id}/workspaces/{workspace_id}/queries",
2937
+ )
2938
+ return response.json()
2939
+
2940
+ def wait_for_query_job(self, query_job_id: str) -> dict[str, Any]:
2941
+ """Poll a Query Service job until it reaches a terminal state.
2942
+
2943
+ Args:
2944
+ query_job_id: The query job ID.
2945
+
2946
+ Returns:
2947
+ Completed query job dict.
2948
+
2949
+ Raises:
2950
+ KeboolaApiError: If the query fails or times out.
2951
+ """
2952
+ deadline = time.monotonic() + QUERY_JOB_MAX_WAIT
2953
+ while time.monotonic() < deadline:
2954
+ job = self.get_query_job(query_job_id)
2955
+ status = job.get("status", "")
2956
+ if status == "completed":
2957
+ return job
2958
+ if status in ("error", "failed"):
2959
+ raise KeboolaApiError(
2960
+ message=f"Query job failed: {_extract_query_job_error(job)}",
2961
+ status_code=500,
2962
+ error_code=ErrorCode.QUERY_JOB_FAILED,
2963
+ retryable=False,
2964
+ )
2965
+ time.sleep(QUERY_JOB_POLL_INTERVAL)
2966
+
2967
+ raise KeboolaApiError(
2968
+ message=f"Query job {query_job_id} did not complete within {QUERY_JOB_MAX_WAIT}s",
2969
+ status_code=504,
2970
+ error_code=ErrorCode.QUERY_JOB_TIMEOUT,
2971
+ retryable=True,
2972
+ )
2973
+
2974
+
2975
+ # The Query Service surfaces BigQuery errors as a serialized object string, e.g.
2976
+ # {Location: "query"; Message: "Syntax error: Unexpected identifier ..."; Reason: "invalidQuery"}
2977
+ # Pull out the human-readable `Message: "..."` part so a BigQuery failure reads
2978
+ # like Snowflake's plain text instead of leaking the wrapper into the user's red
2979
+ # error box. Mirrors keboola-mcp-server's `_BigQueryWorkspace._format_error_message`.
2980
+ _BQ_ERROR_MESSAGE_RE = re.compile(r'Message:\s*"((?:[^"\\]|\\.)*)"')
2981
+
2982
+
2983
+ def _unwrap_bigquery_error(message: str) -> str:
2984
+ """Extract the inner message from a serialized BigQuery Query-Service error.
2985
+
2986
+ Snowflake errors are plain strings with no ``Message: "..."`` wrapper, so
2987
+ they pass through unchanged. Only the BigQuery object shape is rewritten.
2988
+ """
2989
+ if message and (match := _BQ_ERROR_MESSAGE_RE.search(message)):
2990
+ return match.group(1).replace('\\"', '"')
2991
+ return message
2992
+
2993
+
2994
+ def _extract_query_job_error(job: dict[str, Any]) -> str:
2995
+ """Pull the most useful warehouse error message out of a failed Query Service job.
2996
+
2997
+ The Query Service `/api/v1/queries/{id}` response for a failed job carries
2998
+ the actual Snowflake / BigQuery error inside ``statements[i].error`` as a
2999
+ plain string (e.g. "SQL compilation error:\\nFunction DATE_TRUNC does not
3000
+ support VARCHAR(10) argument type"). The top-level ``error`` field is
3001
+ usually ABSENT on failures — the previous extractor read only that and so
3002
+ emitted the useless "Query job failed: Query execution failed" message
3003
+ users were seeing in the SQL editor's red error box (#287).
3004
+
3005
+ Strategy:
3006
+ 1. Walk ``statements`` and collect every failed statement's error,
3007
+ prefixed with the statement index so multi-statement batches stay
3008
+ readable. Strings, dicts ({\"message\": "..."}), and unknown shapes
3009
+ are all handled.
3010
+ 2. Fall back to top-level ``error`` (string OR dict-with-message) for
3011
+ legacy shapes that don't carry statement-level errors.
3012
+ 3. Fall back to the original generic string only when neither is set,
3013
+ so the caller never sees an empty message.
3014
+
3015
+ The returned string is meant to be embedded into a
3016
+ ``KeboolaApiError(message=f"Query job failed: ...")`` and ultimately
3017
+ surfaced to the user (and the AI fix-mode helper, which pivots its
3018
+ meta-prompt on the warehouse text).
3019
+ """
3020
+
3021
+ def _as_text(err: Any) -> str:
3022
+ if isinstance(err, str):
3023
+ raw = err.strip()
3024
+ elif isinstance(err, dict):
3025
+ raw = ""
3026
+ for key in ("message", "error", "detail"):
3027
+ val = err.get(key)
3028
+ if isinstance(val, str) and val.strip():
3029
+ raw = val.strip()
3030
+ break
3031
+ else:
3032
+ raw = str(err).strip() if err is not None else ""
3033
+ # BigQuery wraps the real message in a serialized object; Snowflake plain
3034
+ # text passes through untouched.
3035
+ return _unwrap_bigquery_error(raw)
3036
+
3037
+ statement_errors: list[str] = []
3038
+ for i, stmt in enumerate(job.get("statements") or []):
3039
+ if not isinstance(stmt, dict):
3040
+ continue
3041
+ if stmt.get("status") not in ("error", "failed"):
3042
+ continue
3043
+ text = _as_text(stmt.get("error"))
3044
+ if not text:
3045
+ continue
3046
+ # Single-statement queries don't need the "Statement 1:" prefix —
3047
+ # it adds visual noise in the editor's red box for the common case.
3048
+ prefix = "" if len(job.get("statements") or []) == 1 else f"Statement {i + 1}: "
3049
+ statement_errors.append(f"{prefix}{text}")
3050
+
3051
+ if statement_errors:
3052
+ return "\n".join(statement_errors)
3053
+
3054
+ top_level = _as_text(job.get("error"))
3055
+ if top_level:
3056
+ return top_level
3057
+
3058
+ return "Query execution failed (no error details from Query Service)"
3059
+
3060
+
3061
+ # ---------------------------------------------------------------------------
3062
+ # Cloud storage upload helpers
3063
+ # ---------------------------------------------------------------------------
3064
+
3065
+
3066
+ def _build_abs_upload_url(abs_params: dict[str, Any]) -> str:
3067
+ """Build Azure Blob Storage upload URL from absUploadParams.
3068
+
3069
+ Parses SASConnectionString to extract BlobEndpoint and SharedAccessSignature,
3070
+ then constructs: ``{BlobEndpoint}/{container}/{blobName}?{SAS}``.
3071
+
3072
+ The ``url`` field in the API response is read-only (``sp=rl``).
3073
+ The write-capable SAS (``sp=rwl``) is only in ``absUploadParams``.
3074
+
3075
+ Args:
3076
+ abs_params: The absUploadParams dict from files/prepare response.
3077
+
3078
+ Returns:
3079
+ Full HTTPS URL with write-capable SAS token.
3080
+ """
3081
+ blob_name = abs_params["blobName"]
3082
+ container = abs_params["container"]
3083
+ sas_string = abs_params["absCredentials"]["SASConnectionString"]
3084
+
3085
+ # Format: "BlobEndpoint=https://...;SharedAccessSignature=sv=2017-11-09&..."
3086
+ # partition("=") splits on first "=" only, preserving "=" in SAS values.
3087
+ parts: dict[str, str] = {}
3088
+ for segment in sas_string.split(";"):
3089
+ key, sep, value = segment.partition("=")
3090
+ if sep:
3091
+ parts[key] = value
3092
+
3093
+ blob_endpoint = parts.get("BlobEndpoint", "").rstrip("/")
3094
+ sas = parts.get("SharedAccessSignature", "")
3095
+
3096
+ return f"{blob_endpoint}/{container}/{blob_name}?{sas}"
3097
+
3098
+
3099
+ # ---------------------------------------------------------------------------
3100
+ # Cloud storage download helpers (S3 SigV4, GCS bearer, ABS signed URL)
3101
+ # ---------------------------------------------------------------------------
3102
+
3103
+
3104
+ class _IterBytesReader:
3105
+ """Adapt an httpx iter_bytes() iterator to a .read(n) file-like interface.
3106
+
3107
+ shutil.copyfileobj and gzip.GzipFile both need a binary stream with
3108
+ read(size). httpx exposes an iterator instead, so we buffer the current
3109
+ chunk and hand out at most ``size`` bytes per read, refilling from the
3110
+ iterator as needed. The buffer holds at most one iterator chunk at a time
3111
+ (~1 MiB), so total memory stays bounded regardless of response size.
3112
+ """
3113
+
3114
+ def __init__(self, chunks: Any) -> None:
3115
+ self._chunks = iter(chunks)
3116
+ self._buf = b""
3117
+
3118
+ def read(self, size: int = -1) -> bytes:
3119
+ if size is None or size < 0:
3120
+ pieces = [self._buf]
3121
+ self._buf = b""
3122
+ pieces.extend(self._chunks)
3123
+ return b"".join(pieces)
3124
+ while len(self._buf) < size:
3125
+ try:
3126
+ self._buf += next(self._chunks)
3127
+ except StopIteration:
3128
+ break
3129
+ out = self._buf[:size]
3130
+ self._buf = self._buf[size:]
3131
+ return out
3132
+
3133
+
3134
+ class _CloudDownloader:
3135
+ """Abstraction for downloading from cloud storage using Keboola file credentials.
3136
+
3137
+ Supports three cloud backends:
3138
+ - AWS S3: Uses SigV4 signing with temporary credentials
3139
+ - GCP GCS: Uses OAuth2 bearer token
3140
+ - Azure ABS: Uses presigned/SAS URLs
3141
+ """
3142
+
3143
+ def __init__(self, provider: str, auth_fn: Any) -> None:
3144
+ self._provider = provider
3145
+ self._auth_fn = auth_fn
3146
+
3147
+ @staticmethod
3148
+ def create(file_detail: dict[str, Any]) -> "_CloudDownloader":
3149
+ """Create a downloader from file detail response.
3150
+
3151
+ Args:
3152
+ file_detail: Response from GET /v2/storage/files/{id}?federationToken=1.
3153
+ """
3154
+ provider = file_detail.get("provider", "")
3155
+
3156
+ if provider == "aws":
3157
+ creds = file_detail.get("credentials", {})
3158
+ region = file_detail.get("region", "us-east-1")
3159
+ return _CloudDownloader(
3160
+ provider="aws",
3161
+ auth_fn=lambda url: _s3_signed_headers(url, creds, region),
3162
+ )
3163
+ elif provider == "gcp":
3164
+ gcs_creds = file_detail.get("gcsCredentials", {})
3165
+ token = gcs_creds.get("access_token", "")
3166
+ token_type = gcs_creds.get("token_type", "Bearer")
3167
+ return _CloudDownloader(
3168
+ provider="gcp",
3169
+ auth_fn=lambda _url: {"Authorization": f"{token_type} {token}"},
3170
+ )
3171
+ elif provider == "azure":
3172
+ # Azure: SAS token from absCredentials for authenticating slice downloads
3173
+ abs_creds = file_detail.get("absCredentials", {})
3174
+ sas_string = abs_creds.get("SASConnectionString", "")
3175
+ # Parse "BlobEndpoint=https://...;SharedAccessSignature=sv=..."
3176
+ sas_parts: dict[str, str] = {}
3177
+ for segment in sas_string.split(";"):
3178
+ key, sep, value = segment.partition("=")
3179
+ if sep:
3180
+ sas_parts[key] = value
3181
+ blob_endpoint = sas_parts.get("BlobEndpoint", "").rstrip("/")
3182
+ sas = sas_parts.get("SharedAccessSignature", "")
3183
+ return _CloudDownloader(
3184
+ provider="azure",
3185
+ auth_fn=lambda _url, _be=blob_endpoint, _sas=sas: {
3186
+ "_blob_endpoint": _be,
3187
+ "_sas": _sas,
3188
+ },
3189
+ )
3190
+ else:
3191
+ # Other: presigned URLs, no extra auth needed
3192
+ return _CloudDownloader(provider=provider, auth_fn=lambda _url: {})
3193
+
3194
+ def resolve_base_url(self, file_detail: dict[str, Any]) -> str:
3195
+ """Build the HTTPS base URL for downloading slices.
3196
+
3197
+ Returns:
3198
+ Base HTTPS URL (e.g. "https://bucket.s3.region.amazonaws.com/key/prefix/").
3199
+ """
3200
+ if self._provider == "aws":
3201
+ s3_path = file_detail.get("s3Path", {})
3202
+ bucket = s3_path.get("bucket", "")
3203
+ key = s3_path.get("key", "")
3204
+ region = file_detail.get("region", "us-east-1")
3205
+ return f"https://{bucket}.s3.{region}.amazonaws.com/{key}"
3206
+ elif self._provider == "gcp":
3207
+ gcs_path = file_detail.get("gcsPath", {})
3208
+ bucket = gcs_path.get("bucket", "")
3209
+ key = gcs_path.get("key", "")
3210
+ return f"https://storage.googleapis.com/{bucket}/{key}"
3211
+ elif self._provider == "azure":
3212
+ # Azure: base URL from absCredentials endpoint + container
3213
+ auth_info = self._auth_fn("")
3214
+ blob_endpoint = auth_info.get("_blob_endpoint", "")
3215
+ abs_path = file_detail.get("absPath", {})
3216
+ container = abs_path.get("container", "")
3217
+ return f"{blob_endpoint}/{container}/"
3218
+ else:
3219
+ # Other: entries should be full URLs
3220
+ return ""
3221
+
3222
+ def resolve_slice_url(
3223
+ self,
3224
+ base_url: str,
3225
+ entry_url: str,
3226
+ file_detail: dict[str, Any],
3227
+ ) -> str:
3228
+ """Convert a manifest entry URL to a downloadable HTTPS URL.
3229
+
3230
+ Manifest entries use cloud-native URLs (s3://bucket/key/slice.gz,
3231
+ azure://container/blob). This strips the cloud prefix and builds
3232
+ an HTTPS URL for download.
3233
+
3234
+ Args:
3235
+ base_url: HTTPS base URL from resolve_base_url().
3236
+ entry_url: Raw entry URL from manifest (e.g. "s3://bucket/key/slice.gz").
3237
+ file_detail: Full file detail dict.
3238
+
3239
+ Returns:
3240
+ Full HTTPS URL for the slice.
3241
+ """
3242
+ if self._provider == "aws":
3243
+ # entry_url: "s3://bucket/key/prefix/slice.csv.gz"
3244
+ # base_url: "https://bucket.s3.region.amazonaws.com/key/prefix/"
3245
+ s3_path = file_detail.get("s3Path", {})
3246
+ bucket = s3_path.get("bucket", "")
3247
+ key = s3_path.get("key", "")
3248
+ prefix = f"s3://{bucket}/{key}"
3249
+ relative = entry_url.removeprefix(prefix) if entry_url.startswith(prefix) else entry_url
3250
+ return base_url + relative
3251
+ elif self._provider == "gcp":
3252
+ # entry_url: "gs://bucket/key/prefix/slice.csv.gz"
3253
+ gcs_path = file_detail.get("gcsPath", {})
3254
+ bucket = gcs_path.get("bucket", "")
3255
+ key = gcs_path.get("key", "")
3256
+ prefix = f"gs://{bucket}/{key}"
3257
+ relative = entry_url.removeprefix(prefix) if entry_url.startswith(prefix) else entry_url
3258
+ return base_url + relative
3259
+ elif self._provider == "azure":
3260
+ # entry_url: "azure://account.blob.core.windows.net/container/blob.gz"
3261
+ # Replace azure:// with https:// and append SAS token
3262
+ auth_info = self._auth_fn("")
3263
+ sas = auth_info.get("_sas", "")
3264
+ if entry_url.startswith("azure://"):
3265
+ https_url = "https://" + entry_url[len("azure://") :]
3266
+ return f"{https_url}?{sas}"
3267
+ return entry_url
3268
+ else:
3269
+ # Other: entry URLs should be full HTTPS URLs
3270
+ return entry_url
3271
+
3272
+ def _request_headers(self, url: str) -> dict[str, str]:
3273
+ """Resolve auth headers for a cloud URL.
3274
+
3275
+ Azure stores metadata (endpoint, SAS) in the auth_fn result (keys
3276
+ prefixed with "_"); those are filtered out here. The SAS token itself
3277
+ is embedded into the URL by resolve_slice_url().
3278
+ """
3279
+ auth_result = self._auth_fn(url)
3280
+ return {k: v for k, v in auth_result.items() if not k.startswith("_")}
3281
+
3282
+ def stream_to_file(self, url: str, dest: "Path | str", decompress_gzip: bool) -> int:
3283
+ """Stream a cloud URL directly to a local file in bounded-memory chunks.
3284
+
3285
+ Used for slice downloads where the payload can be hundreds of MB per
3286
+ slice. Peak RAM is O(chunk size), not O(slice size), which is what
3287
+ makes multi-GB table exports survive on small VMs (see issue #187).
3288
+
3289
+ Args:
3290
+ url: Full HTTPS URL (with auth baked in for Azure).
3291
+ dest: Local file path to write to.
3292
+ decompress_gzip: If True, wrap the response stream in gzip.GzipFile
3293
+ so the decompressed bytes are what lands on disk. Streaming
3294
+ gzip keeps both compressed and decompressed state bounded.
3295
+
3296
+ Returns:
3297
+ Number of bytes written to ``dest`` (post-decompression if applicable).
3298
+ """
3299
+ import gzip
3300
+ import shutil
3301
+
3302
+ headers = self._request_headers(url)
3303
+ dest_path = Path(dest)
3304
+ with (
3305
+ httpx.Client(timeout=FILE_DOWNLOAD_TIMEOUT) as http,
3306
+ http.stream("GET", url, headers=headers) as response,
3307
+ ):
3308
+ response.raise_for_status()
3309
+ source: Any = _IterBytesReader(response.iter_bytes(FILE_DOWNLOAD_CHUNK_SIZE))
3310
+ if decompress_gzip:
3311
+ source = gzip.GzipFile(fileobj=source, mode="rb")
3312
+ with dest_path.open("wb") as fh:
3313
+ shutil.copyfileobj(source, fh, length=FILE_DOWNLOAD_CHUNK_SIZE)
3314
+
3315
+ return dest_path.stat().st_size
3316
+
3317
+
3318
+ def _s3_signed_headers(
3319
+ url: str,
3320
+ creds: dict[str, str],
3321
+ region: str,
3322
+ method: str = "GET",
3323
+ payload: bytes = b"",
3324
+ ) -> dict[str, str]:
3325
+ """Generate AWS SigV4 signed headers for an S3 request.
3326
+
3327
+ Implements minimal AWS Signature Version 4 signing using only stdlib
3328
+ (hmac, hashlib, urllib.parse). No boto3/botocore dependency required.
3329
+
3330
+ Args:
3331
+ url: Full S3 URL (https://bucket.s3.region.amazonaws.com/key).
3332
+ creds: Dict with AccessKeyId, SecretAccessKey, SessionToken.
3333
+ region: AWS region (e.g. "us-east-1").
3334
+ method: HTTP method (GET or PUT).
3335
+ payload: Request body bytes (empty for GET).
3336
+
3337
+ Returns:
3338
+ Dict of headers to include in the request.
3339
+ """
3340
+ import datetime
3341
+ import hashlib
3342
+ import hmac
3343
+ from urllib.parse import unquote, urlparse
3344
+
3345
+ access_key = creds["AccessKeyId"]
3346
+ secret_key = creds["SecretAccessKey"]
3347
+ session_token = creds.get("SessionToken", "")
3348
+
3349
+ parsed = urlparse(url)
3350
+ host = parsed.hostname or ""
3351
+ path = parsed.path or "/"
3352
+ query = parsed.query or ""
3353
+
3354
+ now = datetime.datetime.now(datetime.UTC)
3355
+ date_stamp = now.strftime("%Y%m%d")
3356
+ amz_date = now.strftime("%Y%m%dT%H%M%SZ")
3357
+
3358
+ service = "s3"
3359
+ scope = f"{date_stamp}/{region}/{service}/aws4_request"
3360
+
3361
+ # Canonical request
3362
+ canonical_uri = quote(unquote(path), safe="/~")
3363
+ if query:
3364
+ params_list = sorted(query.split("&"))
3365
+ canonical_querystring = "&".join(params_list)
3366
+ else:
3367
+ canonical_querystring = ""
3368
+
3369
+ headers_to_sign: dict[str, str] = {"host": host, "x-amz-date": amz_date}
3370
+ if session_token:
3371
+ headers_to_sign["x-amz-security-token"] = session_token
3372
+
3373
+ signed_headers = ";".join(sorted(headers_to_sign.keys()))
3374
+ canonical_headers = "".join(f"{k}:{v}\n" for k, v in sorted(headers_to_sign.items()))
3375
+
3376
+ payload_hash = hashlib.sha256(payload).hexdigest()
3377
+
3378
+ canonical_request = "\n".join(
3379
+ [
3380
+ method,
3381
+ canonical_uri,
3382
+ canonical_querystring,
3383
+ canonical_headers,
3384
+ signed_headers,
3385
+ payload_hash,
3386
+ ]
3387
+ )
3388
+
3389
+ # String to sign
3390
+ string_to_sign = "\n".join(
3391
+ [
3392
+ "AWS4-HMAC-SHA256",
3393
+ amz_date,
3394
+ scope,
3395
+ hashlib.sha256(canonical_request.encode("utf-8")).hexdigest(),
3396
+ ]
3397
+ )
3398
+
3399
+ # Signing key
3400
+ def _hmac_sha256(key: bytes, msg: str) -> bytes:
3401
+ return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
3402
+
3403
+ k_date = _hmac_sha256(f"AWS4{secret_key}".encode(), date_stamp)
3404
+ k_region = _hmac_sha256(k_date, region)
3405
+ k_service = _hmac_sha256(k_region, service)
3406
+ k_signing = _hmac_sha256(k_service, "aws4_request")
3407
+
3408
+ signature = hmac.new(k_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
3409
+
3410
+ authorization = (
3411
+ f"AWS4-HMAC-SHA256 Credential={access_key}/{scope}, "
3412
+ f"SignedHeaders={signed_headers}, Signature={signature}"
3413
+ )
3414
+
3415
+ result: dict[str, str] = {
3416
+ "Authorization": authorization,
3417
+ "x-amz-date": amz_date,
3418
+ "x-amz-content-sha256": payload_hash,
3419
+ }
3420
+ if session_token:
3421
+ result["x-amz-security-token"] = session_token
3422
+ return result