keboola-cli 0.63.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keboola_agent_cli/__init__.py +34 -0
- keboola_agent_cli/__main__.py +5 -0
- keboola_agent_cli/_ui_dist/assets/arc-DhFYIddx.js +2 -0
- keboola_agent_cli/_ui_dist/assets/arc-DhFYIddx.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/architecture-7EHR7CIX-hNCijx_H.js +1 -0
- keboola_agent_cli/_ui_dist/assets/architectureDiagram-3BPJPVTR-C6hUlprM.js +37 -0
- keboola_agent_cli/_ui_dist/assets/architectureDiagram-3BPJPVTR-C6hUlprM.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/array-BifhSqXX.js +2 -0
- keboola_agent_cli/_ui_dist/assets/array-BifhSqXX.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/blockDiagram-GPEHLZMM-DC7qY9i4.js +133 -0
- keboola_agent_cli/_ui_dist/assets/blockDiagram-GPEHLZMM-DC7qY9i4.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/c4Diagram-AAUBKEIU-5Lh44evt.js +11 -0
- keboola_agent_cli/_ui_dist/assets/c4Diagram-AAUBKEIU-5Lh44evt.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/channel-DBMrXlxx.js +2 -0
- keboola_agent_cli/_ui_dist/assets/channel-DBMrXlxx.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-2J33WTMH-Coy82EBh.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-2J33WTMH-Coy82EBh.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-3OPIFGDE-BQC5CRHI.js +63 -0
- keboola_agent_cli/_ui_dist/assets/chunk-3OPIFGDE-BQC5CRHI.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-4BX2VUAB-DUuEt70o.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-4BX2VUAB-DUuEt70o.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-55IACEB6-BvR-6chF.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-55IACEB6-BvR-6chF.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-5ZQYHXKU-BjcTN7ul.js +3 -0
- keboola_agent_cli/_ui_dist/assets/chunk-5ZQYHXKU-BjcTN7ul.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-727SXJPM-C0zxqqRN.js +207 -0
- keboola_agent_cli/_ui_dist/assets/chunk-727SXJPM-C0zxqqRN.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-AQP2D5EJ-CXf7rIlZ.js +232 -0
- keboola_agent_cli/_ui_dist/assets/chunk-AQP2D5EJ-CXf7rIlZ.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-BSJP7CBP-Oj_FO9Q7.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-BSJP7CBP-Oj_FO9Q7.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-CSCIHK7Q-CcTsLrFc.js +124 -0
- keboola_agent_cli/_ui_dist/assets/chunk-CSCIHK7Q-CcTsLrFc.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-FMBD7UC4-FH-zLkkW.js +16 -0
- keboola_agent_cli/_ui_dist/assets/chunk-FMBD7UC4-FH-zLkkW.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-L5ZTLDWV-B1Ky_e7O.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-L5ZTLDWV-B1Ky_e7O.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-ND2GUHAM-BHz1rpbm.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-ND2GUHAM-BHz1rpbm.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-NNHCCRGN-DlpIbxXb.js +160 -0
- keboola_agent_cli/_ui_dist/assets/chunk-NNHCCRGN-DlpIbxXb.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-NZK2D7GU-tnrSoegS.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-NZK2D7GU-tnrSoegS.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-O5CBEL6O-DxxqDH0l.js +71 -0
- keboola_agent_cli/_ui_dist/assets/chunk-O5CBEL6O-DxxqDH0l.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/chunk-QZHKN3VN-CSjc2gjj.js +2 -0
- keboola_agent_cli/_ui_dist/assets/chunk-QZHKN3VN-CSjc2gjj.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/classDiagram-4FO5ZUOK-BuZcZu85.js +2 -0
- keboola_agent_cli/_ui_dist/assets/classDiagram-4FO5ZUOK-BuZcZu85.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/classDiagram-v2-Q7XG4LA2-BuZcZu85.js +2 -0
- keboola_agent_cli/_ui_dist/assets/classDiagram-v2-Q7XG4LA2-BuZcZu85.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/cose-bilkent-S5V4N54A-Y0L8LDMa.js +2 -0
- keboola_agent_cli/_ui_dist/assets/cose-bilkent-S5V4N54A-Y0L8LDMa.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/cytoscape.esm-C8YCVR3_.js +322 -0
- keboola_agent_cli/_ui_dist/assets/cytoscape.esm-C8YCVR3_.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/dagre-BM42HDAG-UZ-9BTqF.js +5 -0
- keboola_agent_cli/_ui_dist/assets/dagre-BM42HDAG-UZ-9BTqF.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/dagre-Bx709z4p.js +2 -0
- keboola_agent_cli/_ui_dist/assets/dagre-Bx709z4p.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/defaultLocale-C8Fc0cco.js +2 -0
- keboola_agent_cli/_ui_dist/assets/defaultLocale-C8Fc0cco.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/diagram-2AECGRRQ-DoDQ60wi.js +44 -0
- keboola_agent_cli/_ui_dist/assets/diagram-2AECGRRQ-DoDQ60wi.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/diagram-5GNKFQAL-CMGFxpUs.js +11 -0
- keboola_agent_cli/_ui_dist/assets/diagram-5GNKFQAL-CMGFxpUs.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/diagram-KO2AKTUF-1uGDa-Iu.js +4 -0
- keboola_agent_cli/_ui_dist/assets/diagram-KO2AKTUF-1uGDa-Iu.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/diagram-LMA3HP47-XtFH7B51.js +25 -0
- keboola_agent_cli/_ui_dist/assets/diagram-LMA3HP47-XtFH7B51.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/diagram-OG6HWLK6-B4_Te1T5.js +25 -0
- keboola_agent_cli/_ui_dist/assets/diagram-OG6HWLK6-B4_Te1T5.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/dist-Di6zmlv0.js +2 -0
- keboola_agent_cli/_ui_dist/assets/dist-Di6zmlv0.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/erDiagram-TEJ5UH35-NjQkrdFt.js +86 -0
- keboola_agent_cli/_ui_dist/assets/erDiagram-TEJ5UH35-NjQkrdFt.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/eventmodeling-FCH6USID-BrJMIks8.js +1 -0
- keboola_agent_cli/_ui_dist/assets/flowDiagram-I6XJVG4X-CIr8DWl7.js +163 -0
- keboola_agent_cli/_ui_dist/assets/flowDiagram-I6XJVG4X-CIr8DWl7.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/ganttDiagram-6RSMTGT7-C1VY_xbQ.js +293 -0
- keboola_agent_cli/_ui_dist/assets/ganttDiagram-6RSMTGT7-C1VY_xbQ.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/gitGraph-WXDBUCRP-COacYjo-.js +1 -0
- keboola_agent_cli/_ui_dist/assets/gitGraphDiagram-PVQCEYII-DQT8-kg2.js +107 -0
- keboola_agent_cli/_ui_dist/assets/gitGraphDiagram-PVQCEYII-DQT8-kg2.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/graphlib-B8gBHxth.js +2 -0
- keboola_agent_cli/_ui_dist/assets/graphlib-B8gBHxth.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/index-CMq50kkV.css +1 -0
- keboola_agent_cli/_ui_dist/assets/index-D8W97DAz.js +118 -0
- keboola_agent_cli/_ui_dist/assets/index-D8W97DAz.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/info-J43DQDTF-DdCTRIzU.js +1 -0
- keboola_agent_cli/_ui_dist/assets/infoDiagram-5YYISTIA-C77rsoTp.js +3 -0
- keboola_agent_cli/_ui_dist/assets/infoDiagram-5YYISTIA-C77rsoTp.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/init-D6jRqBbL.js +2 -0
- keboola_agent_cli/_ui_dist/assets/init-D6jRqBbL.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/ishikawaDiagram-YF4QCWOH-BcTbXaLy.js +71 -0
- keboola_agent_cli/_ui_dist/assets/ishikawaDiagram-YF4QCWOH-BcTbXaLy.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/journeyDiagram-JHISSGLW-BejeAJQ_.js +140 -0
- keboola_agent_cli/_ui_dist/assets/journeyDiagram-JHISSGLW-BejeAJQ_.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/kanban-definition-UN3LZRKU-BRNz_UrH.js +90 -0
- keboola_agent_cli/_ui_dist/assets/kanban-definition-UN3LZRKU-BRNz_UrH.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/katex-C4eR7coU.js +258 -0
- keboola_agent_cli/_ui_dist/assets/katex-C4eR7coU.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/line-CzAQKFbJ.js +2 -0
- keboola_agent_cli/_ui_dist/assets/line-CzAQKFbJ.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/linear-DUNFFdck.js +2 -0
- keboola_agent_cli/_ui_dist/assets/linear-DUNFFdck.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/mermaid-parser.core-CpuBOkFa.js +5 -0
- keboola_agent_cli/_ui_dist/assets/mermaid-parser.core-CpuBOkFa.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/mindmap-definition-RKZ34NQL-9EJQNjH0.js +97 -0
- keboola_agent_cli/_ui_dist/assets/mindmap-definition-RKZ34NQL-9EJQNjH0.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/ordinal-hYBb2elL.js +2 -0
- keboola_agent_cli/_ui_dist/assets/ordinal-hYBb2elL.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/packet-YPE3B663-DLiiw_B2.js +1 -0
- keboola_agent_cli/_ui_dist/assets/path-BWPyau1x.js +2 -0
- keboola_agent_cli/_ui_dist/assets/path-BWPyau1x.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/pie-LRSECV5Y-CRoO8G1g.js +1 -0
- keboola_agent_cli/_ui_dist/assets/pieDiagram-4H26LBE5-XH4cy6Cb.js +31 -0
- keboola_agent_cli/_ui_dist/assets/pieDiagram-4H26LBE5-XH4cy6Cb.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/quadrantDiagram-W4KKPZXB-fdhc93U8.js +8 -0
- keboola_agent_cli/_ui_dist/assets/quadrantDiagram-W4KKPZXB-fdhc93U8.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/radar-GUYGQ44K-DAlLVJHm.js +1 -0
- keboola_agent_cli/_ui_dist/assets/requirementDiagram-4Y6WPE33-a94eP3R9.js +85 -0
- keboola_agent_cli/_ui_dist/assets/requirementDiagram-4Y6WPE33-a94eP3R9.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/rough.esm-CSKSodPl.js +2 -0
- keboola_agent_cli/_ui_dist/assets/rough.esm-CSKSodPl.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/sankeyDiagram-5OEKKPKP-jcBa02sp.js +41 -0
- keboola_agent_cli/_ui_dist/assets/sankeyDiagram-5OEKKPKP-jcBa02sp.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/sequenceDiagram-3UESZ5HK-A5-GGM-e.js +163 -0
- keboola_agent_cli/_ui_dist/assets/sequenceDiagram-3UESZ5HK-A5-GGM-e.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/src-ZI-V_AF0.js +2 -0
- keboola_agent_cli/_ui_dist/assets/src-ZI-V_AF0.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/stateDiagram-AJRCARHV-BKAA5rqE.js +2 -0
- keboola_agent_cli/_ui_dist/assets/stateDiagram-AJRCARHV-BKAA5rqE.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/stateDiagram-v2-BHNVJYJU-DnJwJBsE.js +2 -0
- keboola_agent_cli/_ui_dist/assets/stateDiagram-v2-BHNVJYJU-DnJwJBsE.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/timeline-definition-PNZ67QCA-Cy39jp8b.js +121 -0
- keboola_agent_cli/_ui_dist/assets/timeline-definition-PNZ67QCA-Cy39jp8b.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/treeView-BLDUP644-DbLYl23-.js +1 -0
- keboola_agent_cli/_ui_dist/assets/treemap-LRROVOQU-Bp0eGlOt.js +1 -0
- keboola_agent_cli/_ui_dist/assets/vennDiagram-CIIHVFJN-BGECKubd.js +35 -0
- keboola_agent_cli/_ui_dist/assets/vennDiagram-CIIHVFJN-BGECKubd.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/wardley-L42UT6IY-D4yH4jqS.js +1 -0
- keboola_agent_cli/_ui_dist/assets/wardleyDiagram-YWT4CUSO-D6XRG3cZ.js +79 -0
- keboola_agent_cli/_ui_dist/assets/wardleyDiagram-YWT4CUSO-D6XRG3cZ.js.map +1 -0
- keboola_agent_cli/_ui_dist/assets/xychartDiagram-2RQKCTM6-DRre-pfZ.js +8 -0
- keboola_agent_cli/_ui_dist/assets/xychartDiagram-2RQKCTM6-DRre-pfZ.js.map +1 -0
- keboola_agent_cli/_ui_dist/index.html +50 -0
- keboola_agent_cli/ai_client.py +83 -0
- keboola_agent_cli/auto_update.py +550 -0
- keboola_agent_cli/changelog.py +1198 -0
- keboola_agent_cli/cli.py +448 -0
- keboola_agent_cli/client.py +3422 -0
- keboola_agent_cli/commands/__init__.py +0 -0
- keboola_agent_cli/commands/_data_app_git.py +343 -0
- keboola_agent_cli/commands/_helpers.py +377 -0
- keboola_agent_cli/commands/_metadata_input.py +49 -0
- keboola_agent_cli/commands/_semantic_layer_crud.py +632 -0
- keboola_agent_cli/commands/_semantic_layer_helpers.py +44 -0
- keboola_agent_cli/commands/_semantic_layer_reference_data.py +247 -0
- keboola_agent_cli/commands/agent.py +968 -0
- keboola_agent_cli/commands/branch.py +423 -0
- keboola_agent_cli/commands/changelog.py +168 -0
- keboola_agent_cli/commands/component.py +216 -0
- keboola_agent_cli/commands/config.py +2442 -0
- keboola_agent_cli/commands/context.py +1481 -0
- keboola_agent_cli/commands/data_app.py +1279 -0
- keboola_agent_cli/commands/dev_portal.py +584 -0
- keboola_agent_cli/commands/doctor.py +37 -0
- keboola_agent_cli/commands/encrypt.py +145 -0
- keboola_agent_cli/commands/feature.py +311 -0
- keboola_agent_cli/commands/flow.py +948 -0
- keboola_agent_cli/commands/http_client.py +157 -0
- keboola_agent_cli/commands/init.py +279 -0
- keboola_agent_cli/commands/job.py +661 -0
- keboola_agent_cli/commands/kai.py +301 -0
- keboola_agent_cli/commands/lineage.py +1464 -0
- keboola_agent_cli/commands/org.py +292 -0
- keboola_agent_cli/commands/permissions.py +360 -0
- keboola_agent_cli/commands/project.py +1192 -0
- keboola_agent_cli/commands/repl.py +243 -0
- keboola_agent_cli/commands/schedule.py +340 -0
- keboola_agent_cli/commands/search.py +178 -0
- keboola_agent_cli/commands/semantic_layer.py +939 -0
- keboola_agent_cli/commands/serve.py +272 -0
- keboola_agent_cli/commands/sharing.py +340 -0
- keboola_agent_cli/commands/storage.py +2630 -0
- keboola_agent_cli/commands/stream.py +266 -0
- keboola_agent_cli/commands/sync.py +1277 -0
- keboola_agent_cli/commands/tool.py +206 -0
- keboola_agent_cli/commands/version.py +186 -0
- keboola_agent_cli/commands/workspace.py +635 -0
- keboola_agent_cli/config_store.py +582 -0
- keboola_agent_cli/constants.py +528 -0
- keboola_agent_cli/data_science_client.py +342 -0
- keboola_agent_cli/dev_portal_client.py +323 -0
- keboola_agent_cli/errors.py +248 -0
- keboola_agent_cli/http_base.py +315 -0
- keboola_agent_cli/json_utils.py +126 -0
- keboola_agent_cli/lib.py +536 -0
- keboola_agent_cli/manage_client.py +324 -0
- keboola_agent_cli/metastore_client.py +214 -0
- keboola_agent_cli/models.py +427 -0
- keboola_agent_cli/output.py +1084 -0
- keboola_agent_cli/permissions.py +469 -0
- keboola_agent_cli/py.typed +3 -0
- keboola_agent_cli/result_models.py +271 -0
- keboola_agent_cli/server/__init__.py +34 -0
- keboola_agent_cli/server/agent_runner.py +1289 -0
- keboola_agent_cli/server/agents_store.py +325 -0
- keboola_agent_cli/server/app.py +764 -0
- keboola_agent_cli/server/auth.py +117 -0
- keboola_agent_cli/server/dependencies.py +149 -0
- keboola_agent_cli/server/pricing.py +303 -0
- keboola_agent_cli/server/routers/__init__.py +1 -0
- keboola_agent_cli/server/routers/agents.py +616 -0
- keboola_agent_cli/server/routers/ai_chat.py +129 -0
- keboola_agent_cli/server/routers/branches.py +133 -0
- keboola_agent_cli/server/routers/components.py +48 -0
- keboola_agent_cli/server/routers/configs.py +507 -0
- keboola_agent_cli/server/routers/data_apps.py +384 -0
- keboola_agent_cli/server/routers/dev_portal.py +67 -0
- keboola_agent_cli/server/routers/encrypt.py +35 -0
- keboola_agent_cli/server/routers/feature.py +179 -0
- keboola_agent_cli/server/routers/flows.py +204 -0
- keboola_agent_cli/server/routers/health.py +53 -0
- keboola_agent_cli/server/routers/jobs.py +175 -0
- keboola_agent_cli/server/routers/kai.py +80 -0
- keboola_agent_cli/server/routers/lineage.py +226 -0
- keboola_agent_cli/server/routers/mcp.py +70 -0
- keboola_agent_cli/server/routers/members.py +170 -0
- keboola_agent_cli/server/routers/org.py +96 -0
- keboola_agent_cli/server/routers/projects.py +106 -0
- keboola_agent_cli/server/routers/schedules.py +54 -0
- keboola_agent_cli/server/routers/search.py +30 -0
- keboola_agent_cli/server/routers/semantic_layer.py +650 -0
- keboola_agent_cli/server/routers/sharing.py +86 -0
- keboola_agent_cli/server/routers/storage.py +574 -0
- keboola_agent_cli/server/routers/stream.py +100 -0
- keboola_agent_cli/server/routers/workspaces.py +302 -0
- keboola_agent_cli/server/run_broadcaster.py +329 -0
- keboola_agent_cli/server/sse.py +25 -0
- keboola_agent_cli/services/__init__.py +0 -0
- keboola_agent_cli/services/_encryption.py +217 -0
- keboola_agent_cli/services/_semantic_layer_cascade.py +147 -0
- keboola_agent_cli/services/_semantic_layer_crud.py +382 -0
- keboola_agent_cli/services/_semantic_layer_internals.py +1078 -0
- keboola_agent_cli/services/_semantic_layer_lookup.py +181 -0
- keboola_agent_cli/services/_semantic_layer_reference_data.py +217 -0
- keboola_agent_cli/services/_sync_bindings.py +456 -0
- keboola_agent_cli/services/_sync_branch.py +191 -0
- keboola_agent_cli/services/_sync_bulk.py +228 -0
- keboola_agent_cli/services/_sync_clone.py +163 -0
- keboola_agent_cli/services/_sync_models.py +97 -0
- keboola_agent_cli/services/_sync_push_ops.py +369 -0
- keboola_agent_cli/services/_sync_storage.py +376 -0
- keboola_agent_cli/services/_sync_writeback.py +167 -0
- keboola_agent_cli/services/agent_service.py +458 -0
- keboola_agent_cli/services/base.py +175 -0
- keboola_agent_cli/services/branch_service.py +588 -0
- keboola_agent_cli/services/component_service.py +694 -0
- keboola_agent_cli/services/config_service.py +2099 -0
- keboola_agent_cli/services/data_app_git_service.py +224 -0
- keboola_agent_cli/services/data_app_service.py +2082 -0
- keboola_agent_cli/services/deep_lineage_service.py +1322 -0
- keboola_agent_cli/services/dev_portal_service.py +345 -0
- keboola_agent_cli/services/doctor_service.py +445 -0
- keboola_agent_cli/services/encrypt_service.py +87 -0
- keboola_agent_cli/services/feature_service.py +268 -0
- keboola_agent_cli/services/flow_service.py +769 -0
- keboola_agent_cli/services/flow_validation.py +188 -0
- keboola_agent_cli/services/http_forwarder_service.py +236 -0
- keboola_agent_cli/services/job_idempotency_store.py +285 -0
- keboola_agent_cli/services/job_service.py +797 -0
- keboola_agent_cli/services/kai_service.py +367 -0
- keboola_agent_cli/services/lineage_service.py +274 -0
- keboola_agent_cli/services/mcp_service.py +1498 -0
- keboola_agent_cli/services/mcp_transport.py +259 -0
- keboola_agent_cli/services/member_service.py +593 -0
- keboola_agent_cli/services/org_service.py +619 -0
- keboola_agent_cli/services/project_service.py +947 -0
- keboola_agent_cli/services/repo_validate_service.py +767 -0
- keboola_agent_cli/services/schedule_service.py +731 -0
- keboola_agent_cli/services/search_service.py +331 -0
- keboola_agent_cli/services/semantic_layer_service.py +1497 -0
- keboola_agent_cli/services/sharing_service.py +307 -0
- keboola_agent_cli/services/storage_service.py +2524 -0
- keboola_agent_cli/services/stream_service.py +395 -0
- keboola_agent_cli/services/sync_service.py +2244 -0
- keboola_agent_cli/services/variables_service.py +447 -0
- keboola_agent_cli/services/version_service.py +1038 -0
- keboola_agent_cli/services/workspace_service.py +1103 -0
- keboola_agent_cli/stream_client.py +217 -0
- keboola_agent_cli/sync/__init__.py +1 -0
- keboola_agent_cli/sync/branch_mapping.py +174 -0
- keboola_agent_cli/sync/clone.py +211 -0
- keboola_agent_cli/sync/code_extraction.py +655 -0
- keboola_agent_cli/sync/config_format.py +290 -0
- keboola_agent_cli/sync/diff_engine.py +566 -0
- keboola_agent_cli/sync/git_utils.py +93 -0
- keboola_agent_cli/sync/manifest.py +162 -0
- keboola_agent_cli/sync/naming.py +90 -0
- keboola_agent_cli/sync/secrets.py +62 -0
- keboola_agent_cli/sync/sql_split.py +134 -0
- keboola_cli-0.63.4.dist-info/METADATA +308 -0
- keboola_cli-0.63.4.dist-info/RECORD +306 -0
- keboola_cli-0.63.4.dist-info/WHEEL +4 -0
- keboola_cli-0.63.4.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,3422 @@
|
|
|
1
|
+
"""Keboola API client with retry, timeouts, and token masking.
|
|
2
|
+
|
|
3
|
+
This is the only module that communicates with the Keboola Storage API
|
|
4
|
+
and the Keboola Queue API. All HTTP details, endpoint URLs, and error
|
|
5
|
+
mapping are encapsulated here.
|
|
6
|
+
|
|
7
|
+
Inherits shared retry/error logic from BaseHttpClient.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import re
|
|
13
|
+
import time
|
|
14
|
+
from collections.abc import Iterator
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
from urllib.parse import quote
|
|
19
|
+
|
|
20
|
+
import httpx
|
|
21
|
+
|
|
22
|
+
from .constants import (
|
|
23
|
+
DEFAULT_GROUPED_JOBS_LIMIT,
|
|
24
|
+
DEFAULT_JOB_LIMIT,
|
|
25
|
+
DEFAULT_JOBS_PER_CONFIG,
|
|
26
|
+
DEFAULT_POLL_STRATEGY,
|
|
27
|
+
DEFAULT_TIMEOUT,
|
|
28
|
+
EXPORT_JOB_MAX_WAIT,
|
|
29
|
+
FILE_DOWNLOAD_CHUNK_SIZE,
|
|
30
|
+
FILE_DOWNLOAD_TIMEOUT,
|
|
31
|
+
FILE_UPLOAD_TIMEOUT,
|
|
32
|
+
IMPORT_JOB_MAX_WAIT,
|
|
33
|
+
JOB_POLL_CURVE,
|
|
34
|
+
METADATA_NOT_FOUND,
|
|
35
|
+
OAUTH_HOST,
|
|
36
|
+
OAUTH_PATH,
|
|
37
|
+
QUERY_JOB_MAX_WAIT,
|
|
38
|
+
QUERY_JOB_POLL_INTERVAL,
|
|
39
|
+
QUERY_RESULTS_PAGE_SIZE,
|
|
40
|
+
STORAGE_JOB_MAX_WAIT,
|
|
41
|
+
STORAGE_JOB_POLL_INTERVAL,
|
|
42
|
+
VALID_POLL_STRATEGIES,
|
|
43
|
+
)
|
|
44
|
+
from .errors import ErrorCode, KeboolaApiError
|
|
45
|
+
from .http_base import BaseHttpClient
|
|
46
|
+
from .models import TokenVerifyResponse
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class InlineQueryResult:
|
|
53
|
+
"""One statement's result fetched via the fast inline ``/results`` path."""
|
|
54
|
+
|
|
55
|
+
columns: list[dict[str, Any]] # [{"name", "type", "nullable"}]
|
|
56
|
+
rows: list[list[Any]] # row values, row-major; capped at the requested limit
|
|
57
|
+
total_rows: int | None # numberOfRows reported by the warehouse (full count)
|
|
58
|
+
truncated: bool # True when the warehouse has more rows than we fetched
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _collect_inline_results(
|
|
62
|
+
client: "KeboolaClient",
|
|
63
|
+
query_job_id: str,
|
|
64
|
+
statement_id: str,
|
|
65
|
+
limit: int,
|
|
66
|
+
) -> InlineQueryResult:
|
|
67
|
+
"""Page through ``GET .../results``, accumulating up to ``limit`` rows.
|
|
68
|
+
|
|
69
|
+
The endpoint enforces ``100 <= pageSize <= 100000``, so we always request a
|
|
70
|
+
fixed, valid ``QUERY_RESULTS_PAGE_SIZE`` page and cap the accumulated rows at
|
|
71
|
+
``limit`` locally -- deriving ``pageSize`` from a small ``limit`` (e.g. 5)
|
|
72
|
+
would trip the API's minimum with a 400. A ``limit`` larger than one page is
|
|
73
|
+
satisfied by walking ``offset``; we stop once the limit is reached (marking
|
|
74
|
+
the result truncated) or when the warehouse runs out of rows.
|
|
75
|
+
|
|
76
|
+
Lives in the client layer (not a service) because it is pure Query Service
|
|
77
|
+
pagination over :meth:`KeboolaClient.get_query_results` -- no config, no
|
|
78
|
+
business logic -- so both ``WorkspaceService`` and the public library facade
|
|
79
|
+
(:mod:`keboola_agent_cli.lib`) can share it.
|
|
80
|
+
"""
|
|
81
|
+
collected: list[list[Any]] = []
|
|
82
|
+
columns: list[dict[str, Any]] = []
|
|
83
|
+
total_rows: int | None = None
|
|
84
|
+
offset = 0
|
|
85
|
+
exhausted = False
|
|
86
|
+
while len(collected) < limit:
|
|
87
|
+
payload = client.get_query_results(
|
|
88
|
+
query_job_id, statement_id, offset=offset, page_size=QUERY_RESULTS_PAGE_SIZE
|
|
89
|
+
)
|
|
90
|
+
if not columns:
|
|
91
|
+
columns = payload.get("columns", []) or []
|
|
92
|
+
if total_rows is None:
|
|
93
|
+
total_rows = payload.get("numberOfRows")
|
|
94
|
+
page_rows = payload.get("data", []) or []
|
|
95
|
+
collected.extend(page_rows)
|
|
96
|
+
# Last page: the warehouse returned fewer rows than a full page.
|
|
97
|
+
if len(page_rows) < QUERY_RESULTS_PAGE_SIZE:
|
|
98
|
+
exhausted = True
|
|
99
|
+
break
|
|
100
|
+
offset += len(page_rows)
|
|
101
|
+
# Reached the reported total on a page boundary: stop without spending a
|
|
102
|
+
# round-trip on the empty next page (e.g. total == a multiple of the
|
|
103
|
+
# page size, limit larger than total).
|
|
104
|
+
if total_rows is not None and offset >= total_rows:
|
|
105
|
+
exhausted = True
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
rows = collected[:limit]
|
|
109
|
+
if total_rows is not None:
|
|
110
|
+
truncated = total_rows > len(rows)
|
|
111
|
+
else:
|
|
112
|
+
# The Query Service normally reports numberOfRows, but if it omits the
|
|
113
|
+
# count we fall back to *how* the loop ended: stopping at the limit cap
|
|
114
|
+
# without exhausting a full last page means there may be more rows. Bias
|
|
115
|
+
# toward over-warning when the true count is unknown.
|
|
116
|
+
truncated = not exhausted and len(collected) >= limit
|
|
117
|
+
return InlineQueryResult(
|
|
118
|
+
columns=columns,
|
|
119
|
+
rows=rows,
|
|
120
|
+
total_rows=total_rows,
|
|
121
|
+
truncated=truncated,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _iter_poll_intervals(strategy: str) -> Iterator[float]:
|
|
126
|
+
"""Yield sleep intervals (seconds) for Queue job polling.
|
|
127
|
+
|
|
128
|
+
Two strategies:
|
|
129
|
+
|
|
130
|
+
- ``"exponential"`` walks ``JOB_POLL_CURVE``: each (interval, count)
|
|
131
|
+
segment yields ``count`` copies of ``interval``; a segment with
|
|
132
|
+
``count == 0`` keeps yielding ``interval`` forever (valid only on
|
|
133
|
+
the last segment).
|
|
134
|
+
- ``"fixed"`` yields ``STORAGE_JOB_POLL_INTERVAL`` forever (legacy
|
|
135
|
+
behavior preserved for opt-out via ``--poll-strategy fixed``).
|
|
136
|
+
|
|
137
|
+
The deadline check in ``wait_for_queue_job`` stops iteration.
|
|
138
|
+
"""
|
|
139
|
+
if strategy == "fixed":
|
|
140
|
+
while True:
|
|
141
|
+
yield STORAGE_JOB_POLL_INTERVAL
|
|
142
|
+
for interval, count in JOB_POLL_CURVE:
|
|
143
|
+
if count <= 0:
|
|
144
|
+
while True:
|
|
145
|
+
yield interval
|
|
146
|
+
for _ in range(count):
|
|
147
|
+
yield interval
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class KeboolaClient(BaseHttpClient):
|
|
151
|
+
"""HTTP client for the Keboola Storage API and Queue API.
|
|
152
|
+
|
|
153
|
+
Provides methods to interact with Keboola endpoints with built-in
|
|
154
|
+
retry logic (exponential backoff for 429/5xx), timeouts, and
|
|
155
|
+
automatic token masking in error messages.
|
|
156
|
+
|
|
157
|
+
Inherits _do_request() and _raise_api_error() from BaseHttpClient.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(self, stack_url: str, token: str) -> None:
|
|
161
|
+
self._stack_url = stack_url.rstrip("/")
|
|
162
|
+
headers = {
|
|
163
|
+
"X-StorageApi-Token": token,
|
|
164
|
+
}
|
|
165
|
+
super().__init__(
|
|
166
|
+
base_url=self._stack_url,
|
|
167
|
+
token=token,
|
|
168
|
+
headers=headers,
|
|
169
|
+
timeout=DEFAULT_TIMEOUT,
|
|
170
|
+
)
|
|
171
|
+
self._queue_client: httpx.Client | None = None
|
|
172
|
+
self._query_client: httpx.Client | None = None
|
|
173
|
+
self._encrypt_client: httpx.Client | None = None
|
|
174
|
+
# Cache of project feature flags. Populated lazily on first
|
|
175
|
+
# has_feature() / get_project_features() call so we don't pay an
|
|
176
|
+
# extra verify_token round-trip on every kbagent invocation, and
|
|
177
|
+
# only when business logic actually needs to branch on a feature
|
|
178
|
+
# (e.g. legacy fake-branch storage detection).
|
|
179
|
+
self._features_cache: frozenset[str] | None = None
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def _queue_base_url(self) -> str:
|
|
183
|
+
return self._derive_service_url(self._stack_url, "queue")
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def _query_base_url(self) -> str:
|
|
187
|
+
return self._derive_service_url(self._stack_url, "query")
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def _encrypt_base_url(self) -> str:
|
|
191
|
+
return self._derive_service_url(self._stack_url, "encryption")
|
|
192
|
+
|
|
193
|
+
def close(self) -> None:
|
|
194
|
+
"""Close the underlying HTTP clients."""
|
|
195
|
+
super().close()
|
|
196
|
+
if self._queue_client is not None:
|
|
197
|
+
self._queue_client.close()
|
|
198
|
+
if self._query_client is not None:
|
|
199
|
+
self._query_client.close()
|
|
200
|
+
if self._encrypt_client is not None:
|
|
201
|
+
self._encrypt_client.close()
|
|
202
|
+
|
|
203
|
+
def __enter__(self) -> "KeboolaClient":
|
|
204
|
+
return self
|
|
205
|
+
|
|
206
|
+
def __exit__(self, *args: Any) -> None:
|
|
207
|
+
self.close()
|
|
208
|
+
|
|
209
|
+
def _request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
|
|
210
|
+
"""Execute a Storage API request with retry."""
|
|
211
|
+
return self._do_request(method, path, **kwargs)
|
|
212
|
+
|
|
213
|
+
def _get_or_create_sub_client(
|
|
214
|
+
self,
|
|
215
|
+
attr: str,
|
|
216
|
+
base_url: str,
|
|
217
|
+
headers: dict[str, str] | None = None,
|
|
218
|
+
) -> httpx.Client:
|
|
219
|
+
"""Return an existing sub-client or lazily create one.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
attr: Instance attribute name (e.g. "_queue_client").
|
|
223
|
+
base_url: Base URL for the sub-client.
|
|
224
|
+
headers: Custom headers; defaults to the main client's headers.
|
|
225
|
+
"""
|
|
226
|
+
client = getattr(self, attr)
|
|
227
|
+
if client is None:
|
|
228
|
+
client = httpx.Client(
|
|
229
|
+
base_url=base_url,
|
|
230
|
+
timeout=DEFAULT_TIMEOUT,
|
|
231
|
+
headers=self._client._headers.copy() if headers is None else headers,
|
|
232
|
+
)
|
|
233
|
+
setattr(self, attr, client)
|
|
234
|
+
return client
|
|
235
|
+
|
|
236
|
+
def _queue_request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
|
|
237
|
+
"""Execute a Queue API request with retry."""
|
|
238
|
+
client = self._get_or_create_sub_client("_queue_client", self._queue_base_url)
|
|
239
|
+
return self._do_request(
|
|
240
|
+
method, path, client=client, base_url=self._queue_base_url, **kwargs
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def _query_request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
|
|
244
|
+
"""Execute a Query Service request with retry."""
|
|
245
|
+
client = self._get_or_create_sub_client("_query_client", self._query_base_url)
|
|
246
|
+
return self._do_request(
|
|
247
|
+
method, path, client=client, base_url=self._query_base_url, **kwargs
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
def _encrypt_request(self, method: str, path: str, **kwargs: Any) -> httpx.Response:
|
|
251
|
+
"""Execute an Encryption API request with retry."""
|
|
252
|
+
client = self._get_or_create_sub_client(
|
|
253
|
+
"_encrypt_client", self._encrypt_base_url, headers={"Content-Type": "application/json"}
|
|
254
|
+
)
|
|
255
|
+
return self._do_request(
|
|
256
|
+
method, path, client=client, base_url=self._encrypt_base_url, **kwargs
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
def encrypt_values(
|
|
260
|
+
self,
|
|
261
|
+
project_id: int,
|
|
262
|
+
component_id: str,
|
|
263
|
+
data: dict[str, str],
|
|
264
|
+
) -> dict[str, str]:
|
|
265
|
+
"""Encrypt secret values via the Keboola Encryption API.
|
|
266
|
+
|
|
267
|
+
Sends a dict of {key: plaintext} and receives {key: encrypted}.
|
|
268
|
+
Keys must start with '#'. Encrypted values start with 'KBC::ProjectSecure::'.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
project_id: Keboola project numeric ID.
|
|
272
|
+
component_id: Component identifier (e.g. 'keboola.ex-db-snowflake').
|
|
273
|
+
data: Dict of secret keys to encrypt (e.g. {'#password': 'my-secret'}).
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Dict of {key: encrypted_value}.
|
|
277
|
+
"""
|
|
278
|
+
response = self._encrypt_request(
|
|
279
|
+
"POST",
|
|
280
|
+
"/encrypt",
|
|
281
|
+
params={"projectId": project_id, "componentId": component_id},
|
|
282
|
+
json=data,
|
|
283
|
+
)
|
|
284
|
+
return response.json()
|
|
285
|
+
|
|
286
|
+
def verify_token(self) -> TokenVerifyResponse:
|
|
287
|
+
"""Verify the storage API token and retrieve project information.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
TokenVerifyResponse with project name, ID, and token description.
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
KeboolaApiError: If token is invalid (401) or other API error.
|
|
294
|
+
"""
|
|
295
|
+
response = self._request("GET", "/v2/storage/tokens/verify")
|
|
296
|
+
data = response.json()
|
|
297
|
+
|
|
298
|
+
owner = data.get("owner", {})
|
|
299
|
+
# /v2/storage/tokens/verify carries `organization` at the TOP level
|
|
300
|
+
# (NOT nested under `owner` like I'd previously assumed -- three
|
|
301
|
+
# rounds of broken backfill traced back to this mismatch). The
|
|
302
|
+
# payload is minimal -- only `{"id": "73"}` on the GCP us-east4
|
|
303
|
+
# stack -- so org name has to come from the Manage API path.
|
|
304
|
+
org = data.get("organization") or {}
|
|
305
|
+
org_id_raw = org.get("id")
|
|
306
|
+
# Storage API serializes org id as a string ("73"); normalise to int
|
|
307
|
+
# so callers and persisted ProjectConfig.org_id can keep its int
|
|
308
|
+
# type without each consumer doing the cast.
|
|
309
|
+
org_id: int | None
|
|
310
|
+
try:
|
|
311
|
+
org_id = int(org_id_raw) if org_id_raw is not None else None
|
|
312
|
+
except (TypeError, ValueError):
|
|
313
|
+
org_id = None
|
|
314
|
+
response = TokenVerifyResponse(
|
|
315
|
+
token_id=str(data.get("id", "")),
|
|
316
|
+
token_description=data.get("description", ""),
|
|
317
|
+
project_id=owner.get("id"),
|
|
318
|
+
project_name=owner.get("name", ""),
|
|
319
|
+
owner_name=owner.get("name", ""),
|
|
320
|
+
default_backend=owner.get("defaultBackend", "snowflake"),
|
|
321
|
+
features=owner.get("features", []),
|
|
322
|
+
org_id=org_id,
|
|
323
|
+
# Top-level `organization` block does NOT carry a name; that
|
|
324
|
+
# field is Manage-API-only. Leave None and let the UI show
|
|
325
|
+
# the id (e.g. "#73") as a fallback until `org setup` fills
|
|
326
|
+
# in the human-readable name.
|
|
327
|
+
org_name=None,
|
|
328
|
+
)
|
|
329
|
+
# Refresh the features cache on every successful verify so explicit
|
|
330
|
+
# callers stay consistent with the cached view used by has_feature().
|
|
331
|
+
self._features_cache = frozenset(response.features)
|
|
332
|
+
return response
|
|
333
|
+
|
|
334
|
+
def get_project_info(self) -> dict[str, Any]:
|
|
335
|
+
"""Return full project/token info from /v2/storage/tokens/verify.
|
|
336
|
+
|
|
337
|
+
Unlike verify_token() which parses only a subset of fields into
|
|
338
|
+
TokenVerifyResponse, this method returns the complete raw API response
|
|
339
|
+
so callers can access all fields (features, limits, metrics, etc.).
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Full JSON response dict from /v2/storage/tokens/verify.
|
|
343
|
+
|
|
344
|
+
Raises:
|
|
345
|
+
KeboolaApiError: If token is invalid (401) or other API error.
|
|
346
|
+
"""
|
|
347
|
+
response = self._request("GET", "/v2/storage/tokens/verify")
|
|
348
|
+
return response.json()
|
|
349
|
+
|
|
350
|
+
def create_short_lived_token(
|
|
351
|
+
self,
|
|
352
|
+
description: str,
|
|
353
|
+
component_access: list[str],
|
|
354
|
+
expires_in: int = 3600,
|
|
355
|
+
) -> dict[str, Any]:
|
|
356
|
+
"""Create a short-lived Storage API token restricted to a component.
|
|
357
|
+
|
|
358
|
+
POST /v2/storage/tokens
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
description: Human-readable token description.
|
|
362
|
+
component_access: List of component IDs this token may access.
|
|
363
|
+
expires_in: Token lifetime in seconds (default: 3600 = 1 hour).
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Token dict from the API, including the 'token' field.
|
|
367
|
+
"""
|
|
368
|
+
response = self._request(
|
|
369
|
+
"POST",
|
|
370
|
+
"/v2/storage/tokens",
|
|
371
|
+
data={
|
|
372
|
+
"description": description,
|
|
373
|
+
"expiresIn": str(expires_in),
|
|
374
|
+
"componentAccess[]": component_access,
|
|
375
|
+
},
|
|
376
|
+
)
|
|
377
|
+
return response.json()
|
|
378
|
+
|
|
379
|
+
def global_search(
|
|
380
|
+
self,
|
|
381
|
+
query: str,
|
|
382
|
+
project_id: int,
|
|
383
|
+
types: list[str] | None = None,
|
|
384
|
+
branch_type: str = "production",
|
|
385
|
+
branch_id: int | None = None,
|
|
386
|
+
limit: int = 50,
|
|
387
|
+
offset: int = 0,
|
|
388
|
+
) -> dict[str, Any]:
|
|
389
|
+
"""Search for items by name across the project using the Storage API global-search endpoint.
|
|
390
|
+
|
|
391
|
+
Calls GET /v2/storage/global-search with the given query and optional type filters.
|
|
392
|
+
This performs textual (name-based) search only — it does not scan configuration bodies.
|
|
393
|
+
Results are scoped to the single project identified by ``project_id``.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
query: Search string to match against item names.
|
|
397
|
+
project_id: Numeric Keboola project ID (required by the API).
|
|
398
|
+
types: Optional list of item types to filter results. Supported values:
|
|
399
|
+
``bucket``, ``table``, ``flow``, ``transformation``, ``configuration``,
|
|
400
|
+
``configuration-row``, ``workspace``, ``shared-code``.
|
|
401
|
+
If None or empty, all types are returned.
|
|
402
|
+
branch_type: ``"production"`` (default) or ``"development"``.
|
|
403
|
+
branch_id: Required when ``branch_type="development"``; ignored otherwise.
|
|
404
|
+
limit: Maximum number of results to return (default 50, max 100).
|
|
405
|
+
offset: Pagination offset (default 0).
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
Raw API response dict with keys ``"all"`` (total count) and
|
|
409
|
+
``"items"`` (list of matching item dicts).
|
|
410
|
+
|
|
411
|
+
Raises:
|
|
412
|
+
KeboolaApiError: On API errors (auth, network, rate limits).
|
|
413
|
+
"""
|
|
414
|
+
params: dict[str, Any] = {
|
|
415
|
+
"query": query,
|
|
416
|
+
"projectIds[]": project_id,
|
|
417
|
+
"limit": limit,
|
|
418
|
+
"offset": offset,
|
|
419
|
+
}
|
|
420
|
+
if types:
|
|
421
|
+
params["types[]"] = types
|
|
422
|
+
if branch_type == "development" and branch_id is not None:
|
|
423
|
+
params["branchTypes[]"] = "development"
|
|
424
|
+
params["branchIds[]"] = branch_id
|
|
425
|
+
else:
|
|
426
|
+
params["branchTypes[]"] = "production"
|
|
427
|
+
|
|
428
|
+
response = self._request("GET", "/v2/storage/global-search", params=params)
|
|
429
|
+
return response.json()
|
|
430
|
+
|
|
431
|
+
def get_oauth_url(
|
|
432
|
+
self,
|
|
433
|
+
component_id: str,
|
|
434
|
+
config_id: str,
|
|
435
|
+
redirect_url: str | None = None,
|
|
436
|
+
) -> str:
|
|
437
|
+
"""Generate an OAuth authorization URL for a component configuration.
|
|
438
|
+
|
|
439
|
+
Creates a short-lived, component-scoped Storage API token and builds
|
|
440
|
+
the URL the user must open to grant OAuth access.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
component_id: The component ID (e.g. 'keboola.ex-google-drive').
|
|
444
|
+
config_id: The configuration ID to authorize.
|
|
445
|
+
redirect_url: Optional URL the OAuth wizard returns to after the
|
|
446
|
+
flow completes (passed as the ``returnUrl`` query param).
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
The full OAuth authorization URL as a string.
|
|
450
|
+
"""
|
|
451
|
+
from urllib.parse import urlencode, urlunsplit
|
|
452
|
+
|
|
453
|
+
token_response = self.create_short_lived_token(
|
|
454
|
+
description=f"Short-lived token for OAuth URL - {component_id}/{config_id}",
|
|
455
|
+
component_access=[component_id],
|
|
456
|
+
expires_in=3600,
|
|
457
|
+
)
|
|
458
|
+
sapi_token = token_response["token"]
|
|
459
|
+
|
|
460
|
+
query: dict[str, str] = {"token": sapi_token, "sapiUrl": self._stack_url}
|
|
461
|
+
if redirect_url:
|
|
462
|
+
query["returnUrl"] = redirect_url
|
|
463
|
+
query_params = urlencode(query)
|
|
464
|
+
fragment = f"/{component_id}/{config_id}"
|
|
465
|
+
|
|
466
|
+
return urlunsplit(("https", OAUTH_HOST, OAUTH_PATH, query_params, fragment))
|
|
467
|
+
|
|
468
|
+
def get_project_features(self) -> frozenset[str]:
|
|
469
|
+
"""Return the project's feature flags, fetching once per client lifetime.
|
|
470
|
+
|
|
471
|
+
Calls ``verify_token()`` lazily on first request and caches the result.
|
|
472
|
+
Subsequent calls do not trigger HTTP. The cache lives for the life of
|
|
473
|
+
the ``KeboolaClient`` instance, which is one CLI invocation -- short
|
|
474
|
+
enough that staleness across feature toggles is not a practical risk.
|
|
475
|
+
"""
|
|
476
|
+
if self._features_cache is None:
|
|
477
|
+
self.verify_token()
|
|
478
|
+
# _features_cache is non-None here: verify_token() always sets it (or
|
|
479
|
+
# raises on auth/network failure, which propagates to the caller).
|
|
480
|
+
assert self._features_cache is not None
|
|
481
|
+
return self._features_cache
|
|
482
|
+
|
|
483
|
+
def has_feature(self, feature: str) -> bool:
|
|
484
|
+
"""True if the project owner has ``feature`` enabled.
|
|
485
|
+
|
|
486
|
+
Convenience wrapper over ``get_project_features()`` for code paths
|
|
487
|
+
that branch on a single flag (e.g. ``"storage-branches"``).
|
|
488
|
+
"""
|
|
489
|
+
return feature in self.get_project_features()
|
|
490
|
+
|
|
491
|
+
def list_components(
|
|
492
|
+
self,
|
|
493
|
+
component_type: str | None = None,
|
|
494
|
+
branch_id: int | None = None,
|
|
495
|
+
) -> list[dict[str, Any]]:
|
|
496
|
+
"""List components with their configurations.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
component_type: Optional filter (extractor, writer, transformation, application).
|
|
500
|
+
branch_id: If set, list components from a specific dev branch.
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
List of component dicts from the API.
|
|
504
|
+
"""
|
|
505
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
506
|
+
params: dict[str, str] = {"include": "configuration"}
|
|
507
|
+
if component_type:
|
|
508
|
+
params["componentType"] = component_type
|
|
509
|
+
|
|
510
|
+
response = self._request("GET", f"{prefix}/components", params=params)
|
|
511
|
+
return response.json()
|
|
512
|
+
|
|
513
|
+
def list_components_with_configs(
|
|
514
|
+
self,
|
|
515
|
+
branch_id: int | None = None,
|
|
516
|
+
component_type: str | None = None,
|
|
517
|
+
include_state: bool = False,
|
|
518
|
+
) -> list[dict[str, Any]]:
|
|
519
|
+
"""List all components with full configuration bodies and rows.
|
|
520
|
+
|
|
521
|
+
Makes a single API call to fetch everything needed for sync pull and
|
|
522
|
+
for deep search (row-level configuration). Uses the
|
|
523
|
+
include=configuration,rows parameter to get full config bodies and
|
|
524
|
+
config rows in one request. When ``include_state`` is True, the
|
|
525
|
+
response also embeds each configuration's runtime ``state`` dict
|
|
526
|
+
(same data as ``get_config_state``) so bulk-state retrieval stays a
|
|
527
|
+
single request instead of N+1. Also used by the bulk-detail caller
|
|
528
|
+
in ``ConfigService`` when ``--with-state`` is set on
|
|
529
|
+
``config detail`` without a specific ``--config-id``.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
branch_id: If set, target a specific dev branch.
|
|
533
|
+
component_type: Optional filter (extractor, writer, transformation,
|
|
534
|
+
application). Passed to the API as ``componentType``.
|
|
535
|
+
include_state: When True, adds ``state`` to the ``include``
|
|
536
|
+
resource list so each returned configuration carries its
|
|
537
|
+
runtime state dict.
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
List of component dicts, each containing a 'configurations' list
|
|
541
|
+
with full config bodies and nested 'rows'.
|
|
542
|
+
"""
|
|
543
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
544
|
+
include_parts = ["configuration", "rows"]
|
|
545
|
+
if include_state:
|
|
546
|
+
include_parts.append("state")
|
|
547
|
+
params: dict[str, str] = {"include": ",".join(include_parts)}
|
|
548
|
+
if component_type:
|
|
549
|
+
params["componentType"] = component_type
|
|
550
|
+
resp = self._request(
|
|
551
|
+
"GET",
|
|
552
|
+
f"{prefix}/components",
|
|
553
|
+
params=params,
|
|
554
|
+
)
|
|
555
|
+
return resp.json()
|
|
556
|
+
|
|
557
|
+
def list_component_configs(
|
|
558
|
+
self,
|
|
559
|
+
component_id: str,
|
|
560
|
+
branch_id: int | None = None,
|
|
561
|
+
) -> list[dict[str, Any]]:
|
|
562
|
+
"""List all configurations for a specific component.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
component_id: Component identifier (e.g. 'keboola.sandboxes').
|
|
566
|
+
branch_id: If set, target a specific dev branch.
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
List of configuration dicts (id, name, description, etc.).
|
|
570
|
+
"""
|
|
571
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
572
|
+
resp = self._request(
|
|
573
|
+
"GET",
|
|
574
|
+
f"{prefix}/components/{quote(component_id, safe='')}/configs",
|
|
575
|
+
)
|
|
576
|
+
return resp.json()
|
|
577
|
+
|
|
578
|
+
def list_config_rows(
|
|
579
|
+
self,
|
|
580
|
+
component_id: str,
|
|
581
|
+
config_id: str,
|
|
582
|
+
branch_id: int | None = None,
|
|
583
|
+
) -> list[dict[str, Any]]:
|
|
584
|
+
"""List all rows for a specific configuration.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
component_id: Component identifier (e.g. 'keboola.ex-http').
|
|
588
|
+
config_id: Configuration ID.
|
|
589
|
+
branch_id: If set, target a specific dev branch.
|
|
590
|
+
|
|
591
|
+
Returns:
|
|
592
|
+
List of config row dicts.
|
|
593
|
+
"""
|
|
594
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
595
|
+
resp = self._request(
|
|
596
|
+
"GET",
|
|
597
|
+
f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows",
|
|
598
|
+
)
|
|
599
|
+
return resp.json()
|
|
600
|
+
|
|
601
|
+
def get_config_row(
|
|
602
|
+
self,
|
|
603
|
+
component_id: str,
|
|
604
|
+
config_id: str,
|
|
605
|
+
row_id: str,
|
|
606
|
+
branch_id: int | None = None,
|
|
607
|
+
) -> dict[str, Any]:
|
|
608
|
+
"""Get a single configuration row by ID.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
component_id: Component identifier.
|
|
612
|
+
config_id: Configuration ID.
|
|
613
|
+
row_id: Row ID.
|
|
614
|
+
branch_id: If set, target a specific dev branch.
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
Row detail dict from the API.
|
|
618
|
+
"""
|
|
619
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
620
|
+
resp = self._request(
|
|
621
|
+
"GET",
|
|
622
|
+
f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows/{quote(row_id)}",
|
|
623
|
+
)
|
|
624
|
+
return resp.json()
|
|
625
|
+
|
|
626
|
+
def get_config_detail(
|
|
627
|
+
self,
|
|
628
|
+
component_id: str,
|
|
629
|
+
config_id: str,
|
|
630
|
+
branch_id: int | None = None,
|
|
631
|
+
) -> dict[str, Any]:
|
|
632
|
+
"""Get detailed information about a specific configuration.
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
component_id: The component ID (e.g. keboola.ex-db-snowflake).
|
|
636
|
+
config_id: The configuration ID.
|
|
637
|
+
branch_id: If set, get detail from a specific dev branch.
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Configuration detail dict from the API.
|
|
641
|
+
"""
|
|
642
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
643
|
+
safe_component_id = quote(component_id, safe="")
|
|
644
|
+
safe_config_id = quote(config_id, safe="")
|
|
645
|
+
response = self._request(
|
|
646
|
+
"GET",
|
|
647
|
+
f"{prefix}/components/{safe_component_id}/configs/{safe_config_id}",
|
|
648
|
+
)
|
|
649
|
+
return response.json()
|
|
650
|
+
|
|
651
|
+
def get_config_state(
|
|
652
|
+
self,
|
|
653
|
+
component_id: str,
|
|
654
|
+
config_id: str,
|
|
655
|
+
branch_id: int | None = None,
|
|
656
|
+
) -> dict[str, Any]:
|
|
657
|
+
"""Get the runtime state dict of a specific configuration.
|
|
658
|
+
|
|
659
|
+
Convenience wrapper over
|
|
660
|
+
``get_config_detail(...).get("state", {})``: Storage API does not
|
|
661
|
+
expose a standalone ``GET .../state`` resource (production returns
|
|
662
|
+
404, branch-scoped returns 501 Not Implemented), so the state is
|
|
663
|
+
only served inline as a field inside the configuration detail
|
|
664
|
+
response. This wrapper is retained for API discoverability, but
|
|
665
|
+
callers that already have a detail response should read ``state``
|
|
666
|
+
from it directly instead of issuing this second identical request
|
|
667
|
+
-- the service layer's single-mode ``--with-state`` does exactly
|
|
668
|
+
that (see ``ConfigService.get_config_detail``).
|
|
669
|
+
|
|
670
|
+
For bulk state retrieval across many configs, prefer the
|
|
671
|
+
``include=state`` query param on
|
|
672
|
+
``list_components_with_configs(include="configuration,rows,state")``
|
|
673
|
+
-- one request serves every config's state instead of N requests.
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
component_id: The component ID (e.g. keboola.ex-db-snowflake).
|
|
677
|
+
config_id: The configuration ID.
|
|
678
|
+
branch_id: If set, fetch state from a specific dev branch.
|
|
679
|
+
|
|
680
|
+
Returns:
|
|
681
|
+
The state dict (empty ``{}`` when the config has no saved state).
|
|
682
|
+
"""
|
|
683
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
684
|
+
safe_component_id = quote(component_id, safe="")
|
|
685
|
+
safe_config_id = quote(config_id, safe="")
|
|
686
|
+
response = self._request(
|
|
687
|
+
"GET",
|
|
688
|
+
f"{prefix}/components/{safe_component_id}/configs/{safe_config_id}",
|
|
689
|
+
)
|
|
690
|
+
body = response.json()
|
|
691
|
+
state = body.get("state")
|
|
692
|
+
return state if isinstance(state, dict) else {}
|
|
693
|
+
|
|
694
|
+
def list_config_folder_metadata(self, branch_id: int) -> dict[str, str]:
|
|
695
|
+
"""Fetch folder names for all configurations via metadata search.
|
|
696
|
+
|
|
697
|
+
Uses the search/component-configurations endpoint to find configs
|
|
698
|
+
with ``KBC.configuration.folderName`` metadata.
|
|
699
|
+
|
|
700
|
+
Note: This endpoint requires a branch ID (branch-only route).
|
|
701
|
+
|
|
702
|
+
Args:
|
|
703
|
+
branch_id: Branch ID (required — use default branch for production).
|
|
704
|
+
|
|
705
|
+
Returns:
|
|
706
|
+
Dict mapping ``"{component_id}/{config_id}"`` to folder name.
|
|
707
|
+
"""
|
|
708
|
+
prefix = f"/v2/storage/branch/{branch_id}"
|
|
709
|
+
resp = self._request(
|
|
710
|
+
"GET",
|
|
711
|
+
f"{prefix}/search/component-configurations",
|
|
712
|
+
params={
|
|
713
|
+
"metadataKeys[]": "KBC.configuration.folderName",
|
|
714
|
+
"include": "filteredMetadata",
|
|
715
|
+
},
|
|
716
|
+
)
|
|
717
|
+
folder_map: dict[str, str] = {}
|
|
718
|
+
for item in resp.json():
|
|
719
|
+
comp_id = item.get("idComponent", "")
|
|
720
|
+
config_id = str(item.get("configurationId", ""))
|
|
721
|
+
meta = next(
|
|
722
|
+
(m for m in item.get("metadata", []) if m["key"] == "KBC.configuration.folderName"),
|
|
723
|
+
None,
|
|
724
|
+
)
|
|
725
|
+
if meta:
|
|
726
|
+
folder_map[f"{comp_id}/{config_id}"] = meta["value"]
|
|
727
|
+
return folder_map
|
|
728
|
+
|
|
729
|
+
def list_config_metadata(
|
|
730
|
+
self,
|
|
731
|
+
component_id: str,
|
|
732
|
+
config_id: str,
|
|
733
|
+
branch_id: int | None = None,
|
|
734
|
+
) -> list[dict[str, Any]]:
|
|
735
|
+
"""List metadata entries on a configuration.
|
|
736
|
+
|
|
737
|
+
GET /v2/storage/[branch/{b}/]components/{c}/configs/{id}/metadata
|
|
738
|
+
"""
|
|
739
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
740
|
+
response = self._request(
|
|
741
|
+
"GET",
|
|
742
|
+
f"{prefix}/components/{quote(component_id, safe='')}/configs/{quote(config_id, safe='')}/metadata",
|
|
743
|
+
)
|
|
744
|
+
return response.json()
|
|
745
|
+
|
|
746
|
+
def set_config_metadata(
|
|
747
|
+
self,
|
|
748
|
+
component_id: str,
|
|
749
|
+
config_id: str,
|
|
750
|
+
entries: list[tuple[str, str]],
|
|
751
|
+
branch_id: int | None = None,
|
|
752
|
+
) -> list[dict[str, Any]]:
|
|
753
|
+
"""Bulk-set metadata key/value pairs on a configuration.
|
|
754
|
+
|
|
755
|
+
POST /v2/storage/[branch/{b}/]components/{c}/configs/{id}/metadata
|
|
756
|
+
Same PHP-style indexed form as set_branch_metadata.
|
|
757
|
+
"""
|
|
758
|
+
form: dict[str, str] = {}
|
|
759
|
+
for i, (key, value) in enumerate(entries):
|
|
760
|
+
form[f"metadata[{i}][key]"] = key
|
|
761
|
+
form[f"metadata[{i}][value]"] = value
|
|
762
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
763
|
+
response = self._request(
|
|
764
|
+
"POST",
|
|
765
|
+
f"{prefix}/components/{quote(component_id, safe='')}/configs/{quote(config_id, safe='')}/metadata",
|
|
766
|
+
data=form,
|
|
767
|
+
)
|
|
768
|
+
return response.json()
|
|
769
|
+
|
|
770
|
+
def delete_config_metadata(
|
|
771
|
+
self,
|
|
772
|
+
component_id: str,
|
|
773
|
+
config_id: str,
|
|
774
|
+
metadata_id: int | str,
|
|
775
|
+
branch_id: int | None = None,
|
|
776
|
+
) -> None:
|
|
777
|
+
"""Delete a single metadata entry on a configuration by its numeric ID.
|
|
778
|
+
|
|
779
|
+
DELETE /v2/storage/[branch/{b}/]components/{c}/configs/{id}/metadata/{mid}
|
|
780
|
+
"""
|
|
781
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
782
|
+
self._request(
|
|
783
|
+
"DELETE",
|
|
784
|
+
f"{prefix}/components/{quote(component_id, safe='')}/configs/{quote(config_id, safe='')}/metadata/{metadata_id}",
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
def create_config(
|
|
788
|
+
self,
|
|
789
|
+
component_id: str,
|
|
790
|
+
name: str,
|
|
791
|
+
configuration: dict[str, Any],
|
|
792
|
+
description: str = "",
|
|
793
|
+
branch_id: int | None = None,
|
|
794
|
+
) -> dict[str, Any]:
|
|
795
|
+
"""Create a new configuration for a component.
|
|
796
|
+
|
|
797
|
+
POST /v2/storage/[branch/{id}/]components/{comp_id}/configs
|
|
798
|
+
|
|
799
|
+
Args:
|
|
800
|
+
component_id: Component identifier.
|
|
801
|
+
name: Configuration name.
|
|
802
|
+
configuration: Configuration body (parameters, storage, etc.).
|
|
803
|
+
description: Optional description.
|
|
804
|
+
branch_id: If set, target a specific dev branch.
|
|
805
|
+
|
|
806
|
+
Returns:
|
|
807
|
+
Created configuration dict including the assigned 'id'.
|
|
808
|
+
"""
|
|
809
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
810
|
+
resp = self._request(
|
|
811
|
+
"POST",
|
|
812
|
+
f"{prefix}/components/{quote(component_id)}/configs",
|
|
813
|
+
data={
|
|
814
|
+
"name": name,
|
|
815
|
+
"description": description,
|
|
816
|
+
"configuration": json.dumps(configuration),
|
|
817
|
+
},
|
|
818
|
+
)
|
|
819
|
+
return resp.json()
|
|
820
|
+
|
|
821
|
+
def update_config(
|
|
822
|
+
self,
|
|
823
|
+
component_id: str,
|
|
824
|
+
config_id: str,
|
|
825
|
+
name: str | None = None,
|
|
826
|
+
configuration: dict[str, Any] | None = None,
|
|
827
|
+
description: str | None = None,
|
|
828
|
+
change_description: str = "",
|
|
829
|
+
branch_id: int | None = None,
|
|
830
|
+
) -> dict[str, Any]:
|
|
831
|
+
"""Update an existing configuration.
|
|
832
|
+
|
|
833
|
+
PUT /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}
|
|
834
|
+
|
|
835
|
+
Only provided (non-None) fields are sent in the request.
|
|
836
|
+
|
|
837
|
+
Returns:
|
|
838
|
+
Updated configuration dict.
|
|
839
|
+
"""
|
|
840
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
841
|
+
data: dict[str, Any] = {}
|
|
842
|
+
if name is not None:
|
|
843
|
+
data["name"] = name
|
|
844
|
+
if description is not None:
|
|
845
|
+
data["description"] = description
|
|
846
|
+
if configuration is not None:
|
|
847
|
+
data["configuration"] = json.dumps(configuration)
|
|
848
|
+
if change_description:
|
|
849
|
+
data["changeDescription"] = change_description
|
|
850
|
+
resp = self._request(
|
|
851
|
+
"PUT",
|
|
852
|
+
f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}",
|
|
853
|
+
data=data,
|
|
854
|
+
)
|
|
855
|
+
return resp.json()
|
|
856
|
+
|
|
857
|
+
def create_config_row(
|
|
858
|
+
self,
|
|
859
|
+
component_id: str,
|
|
860
|
+
config_id: str,
|
|
861
|
+
name: str,
|
|
862
|
+
configuration: dict[str, Any],
|
|
863
|
+
description: str = "",
|
|
864
|
+
is_disabled: bool = False,
|
|
865
|
+
branch_id: int | None = None,
|
|
866
|
+
) -> dict[str, Any]:
|
|
867
|
+
"""Create a new configuration row.
|
|
868
|
+
|
|
869
|
+
POST /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}/rows
|
|
870
|
+
|
|
871
|
+
Args:
|
|
872
|
+
component_id: The component ID.
|
|
873
|
+
config_id: The parent configuration ID.
|
|
874
|
+
name: Row name.
|
|
875
|
+
configuration: Row-level configuration dict.
|
|
876
|
+
description: Optional row description.
|
|
877
|
+
is_disabled: When True, the row is created in disabled state and
|
|
878
|
+
excluded from job runs until re-enabled.
|
|
879
|
+
branch_id: Optional dev branch ID.
|
|
880
|
+
|
|
881
|
+
Returns:
|
|
882
|
+
Created row dict including the assigned 'id'.
|
|
883
|
+
"""
|
|
884
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
885
|
+
data: dict[str, Any] = {
|
|
886
|
+
"name": name,
|
|
887
|
+
"description": description,
|
|
888
|
+
"configuration": json.dumps(configuration),
|
|
889
|
+
}
|
|
890
|
+
if is_disabled:
|
|
891
|
+
data["isDisabled"] = "1"
|
|
892
|
+
resp = self._request(
|
|
893
|
+
"POST",
|
|
894
|
+
f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows",
|
|
895
|
+
data=data,
|
|
896
|
+
)
|
|
897
|
+
return resp.json()
|
|
898
|
+
|
|
899
|
+
def update_config_row(
|
|
900
|
+
self,
|
|
901
|
+
component_id: str,
|
|
902
|
+
config_id: str,
|
|
903
|
+
row_id: str,
|
|
904
|
+
name: str | None = None,
|
|
905
|
+
configuration: dict[str, Any] | None = None,
|
|
906
|
+
description: str | None = None,
|
|
907
|
+
is_disabled: bool | None = None,
|
|
908
|
+
change_description: str = "",
|
|
909
|
+
branch_id: int | None = None,
|
|
910
|
+
) -> dict[str, Any]:
|
|
911
|
+
"""Update an existing configuration row.
|
|
912
|
+
|
|
913
|
+
PUT /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}/rows/{row_id}
|
|
914
|
+
|
|
915
|
+
Args:
|
|
916
|
+
is_disabled: When True, disable the row; when False, enable it;
|
|
917
|
+
when None, leave the current state unchanged.
|
|
918
|
+
"""
|
|
919
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
920
|
+
data: dict[str, Any] = {}
|
|
921
|
+
if name is not None:
|
|
922
|
+
data["name"] = name
|
|
923
|
+
if description is not None:
|
|
924
|
+
data["description"] = description
|
|
925
|
+
if configuration is not None:
|
|
926
|
+
data["configuration"] = json.dumps(configuration)
|
|
927
|
+
if is_disabled is not None:
|
|
928
|
+
data["isDisabled"] = "1" if is_disabled else "0"
|
|
929
|
+
if change_description:
|
|
930
|
+
data["changeDescription"] = change_description
|
|
931
|
+
resp = self._request(
|
|
932
|
+
"PUT",
|
|
933
|
+
f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows/{quote(row_id)}",
|
|
934
|
+
data=data,
|
|
935
|
+
)
|
|
936
|
+
return resp.json()
|
|
937
|
+
|
|
938
|
+
def delete_config_row(
|
|
939
|
+
self,
|
|
940
|
+
component_id: str,
|
|
941
|
+
config_id: str,
|
|
942
|
+
row_id: str,
|
|
943
|
+
branch_id: int | None = None,
|
|
944
|
+
) -> None:
|
|
945
|
+
"""Delete a configuration row.
|
|
946
|
+
|
|
947
|
+
DELETE /v2/storage/[branch/{id}/]components/{comp_id}/configs/{config_id}/rows/{row_id}
|
|
948
|
+
"""
|
|
949
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
950
|
+
self._request(
|
|
951
|
+
"DELETE",
|
|
952
|
+
f"{prefix}/components/{quote(component_id)}/configs/{quote(config_id)}/rows/{quote(row_id)}",
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
def _wait_for_storage_job(
|
|
956
|
+
self,
|
|
957
|
+
job: dict[str, Any],
|
|
958
|
+
max_wait: float = STORAGE_JOB_MAX_WAIT,
|
|
959
|
+
) -> dict[str, Any]:
|
|
960
|
+
"""Poll a Storage API job until it reaches a terminal state.
|
|
961
|
+
|
|
962
|
+
Args:
|
|
963
|
+
job: Initial job response from POST/DELETE.
|
|
964
|
+
max_wait: Maximum seconds to wait (default: STORAGE_JOB_MAX_WAIT).
|
|
965
|
+
|
|
966
|
+
Returns:
|
|
967
|
+
Completed job dict (with results on success).
|
|
968
|
+
|
|
969
|
+
Raises:
|
|
970
|
+
KeboolaApiError: If the job fails or times out.
|
|
971
|
+
"""
|
|
972
|
+
job_id = job.get("id")
|
|
973
|
+
if job.get("status") in ("success", "error"):
|
|
974
|
+
return job
|
|
975
|
+
|
|
976
|
+
deadline = time.monotonic() + max_wait
|
|
977
|
+
while time.monotonic() < deadline:
|
|
978
|
+
time.sleep(STORAGE_JOB_POLL_INTERVAL)
|
|
979
|
+
response = self._request("GET", f"/v2/storage/jobs/{job_id}")
|
|
980
|
+
job = response.json()
|
|
981
|
+
status = job.get("status")
|
|
982
|
+
if status == "success":
|
|
983
|
+
return job
|
|
984
|
+
if status == "error":
|
|
985
|
+
error_msg = job.get("error", {}).get("message", "Storage job failed")
|
|
986
|
+
raise KeboolaApiError(
|
|
987
|
+
message=error_msg,
|
|
988
|
+
status_code=500,
|
|
989
|
+
error_code=ErrorCode.STORAGE_JOB_FAILED,
|
|
990
|
+
retryable=False,
|
|
991
|
+
)
|
|
992
|
+
raise KeboolaApiError(
|
|
993
|
+
message=f"Storage job {job_id} did not complete within {max_wait}s",
|
|
994
|
+
status_code=504,
|
|
995
|
+
error_code=ErrorCode.STORAGE_JOB_TIMEOUT,
|
|
996
|
+
retryable=True,
|
|
997
|
+
)
|
|
998
|
+
|
|
999
|
+
def create_dev_branch(self, name: str, description: str = "") -> dict[str, Any]:
|
|
1000
|
+
"""Create a new development branch (waits for async job to complete).
|
|
1001
|
+
|
|
1002
|
+
The Storage API returns an async job. This method polls until the job
|
|
1003
|
+
completes and returns the branch data from the job results.
|
|
1004
|
+
|
|
1005
|
+
Args:
|
|
1006
|
+
name: Branch name.
|
|
1007
|
+
description: Optional branch description.
|
|
1008
|
+
|
|
1009
|
+
Returns:
|
|
1010
|
+
Branch dict with id, name, description, created, etc.
|
|
1011
|
+
|
|
1012
|
+
Raises:
|
|
1013
|
+
KeboolaApiError: If the API call or job fails.
|
|
1014
|
+
"""
|
|
1015
|
+
body: dict[str, str] = {"name": name}
|
|
1016
|
+
if description:
|
|
1017
|
+
body["description"] = description
|
|
1018
|
+
response = self._request("POST", "/v2/storage/dev-branches", json=body)
|
|
1019
|
+
job = self._wait_for_storage_job(response.json())
|
|
1020
|
+
return job.get("results", {})
|
|
1021
|
+
|
|
1022
|
+
def delete_dev_branch(self, branch_id: int) -> None:
|
|
1023
|
+
"""Delete a development branch (waits for async job to complete).
|
|
1024
|
+
|
|
1025
|
+
Args:
|
|
1026
|
+
branch_id: The branch ID to delete.
|
|
1027
|
+
|
|
1028
|
+
Raises:
|
|
1029
|
+
KeboolaApiError: If the API call or job fails.
|
|
1030
|
+
"""
|
|
1031
|
+
response = self._request("DELETE", f"/v2/storage/dev-branches/{branch_id}")
|
|
1032
|
+
self._wait_for_storage_job(response.json())
|
|
1033
|
+
|
|
1034
|
+
def list_dev_branches(self) -> list[dict[str, Any]]:
|
|
1035
|
+
"""List development branches for the project.
|
|
1036
|
+
|
|
1037
|
+
Returns:
|
|
1038
|
+
List of branch dicts from the API.
|
|
1039
|
+
"""
|
|
1040
|
+
response = self._request("GET", "/v2/storage/dev-branches")
|
|
1041
|
+
return response.json()
|
|
1042
|
+
|
|
1043
|
+
def list_branch_metadata(self, branch_id: int | str = "default") -> list[dict[str, Any]]:
|
|
1044
|
+
"""List metadata entries on a branch.
|
|
1045
|
+
|
|
1046
|
+
GET /v2/storage/branch/{id}/metadata
|
|
1047
|
+
|
|
1048
|
+
Args:
|
|
1049
|
+
branch_id: Branch ID or the literal "default" for the main branch.
|
|
1050
|
+
|
|
1051
|
+
Returns:
|
|
1052
|
+
List of metadata dicts with keys: id, key, value, provider, timestamp.
|
|
1053
|
+
"""
|
|
1054
|
+
response = self._request("GET", f"/v2/storage/branch/{branch_id}/metadata")
|
|
1055
|
+
return response.json()
|
|
1056
|
+
|
|
1057
|
+
def set_branch_metadata(
|
|
1058
|
+
self,
|
|
1059
|
+
entries: list[tuple[str, str]],
|
|
1060
|
+
branch_id: int | str = "default",
|
|
1061
|
+
) -> list[dict[str, Any]]:
|
|
1062
|
+
"""Bulk-set metadata key/value pairs on a branch.
|
|
1063
|
+
|
|
1064
|
+
POST /v2/storage/branch/{id}/metadata
|
|
1065
|
+
|
|
1066
|
+
Keboola's endpoint expects PHP-style array indices in the
|
|
1067
|
+
form-urlencoded body, e.g.::
|
|
1068
|
+
|
|
1069
|
+
metadata[0][key]=KBC.projectDescription
|
|
1070
|
+
metadata[0][value]=My project
|
|
1071
|
+
|
|
1072
|
+
httpx's ``data=`` accepts a mapping of str -> str and URL-encodes it.
|
|
1073
|
+
Since each ``metadata[i][...]`` key is unique per index, a plain dict
|
|
1074
|
+
preserves both ordering (Python 3.7+) and Keboola's expected shape.
|
|
1075
|
+
|
|
1076
|
+
Args:
|
|
1077
|
+
entries: Ordered list of ``(key, value)`` metadata tuples.
|
|
1078
|
+
branch_id: Branch ID or the literal "default" for the main branch.
|
|
1079
|
+
|
|
1080
|
+
Returns:
|
|
1081
|
+
List of metadata dicts created/updated by the API.
|
|
1082
|
+
"""
|
|
1083
|
+
form: dict[str, str] = {}
|
|
1084
|
+
for i, (key, value) in enumerate(entries):
|
|
1085
|
+
form[f"metadata[{i}][key]"] = key
|
|
1086
|
+
form[f"metadata[{i}][value]"] = value
|
|
1087
|
+
response = self._request(
|
|
1088
|
+
"POST",
|
|
1089
|
+
f"/v2/storage/branch/{branch_id}/metadata",
|
|
1090
|
+
data=form,
|
|
1091
|
+
)
|
|
1092
|
+
return response.json()
|
|
1093
|
+
|
|
1094
|
+
def delete_branch_metadata(
|
|
1095
|
+
self,
|
|
1096
|
+
metadata_id: int | str,
|
|
1097
|
+
branch_id: int | str = "default",
|
|
1098
|
+
) -> None:
|
|
1099
|
+
"""Delete a single metadata entry on a branch by its numeric ID.
|
|
1100
|
+
|
|
1101
|
+
DELETE /v2/storage/branch/{id}/metadata/{metadataId}
|
|
1102
|
+
|
|
1103
|
+
Args:
|
|
1104
|
+
metadata_id: ID of the metadata entry (from ``list_branch_metadata``).
|
|
1105
|
+
branch_id: Branch ID or the literal "default" for the main branch.
|
|
1106
|
+
"""
|
|
1107
|
+
self._request(
|
|
1108
|
+
"DELETE",
|
|
1109
|
+
f"/v2/storage/branch/{branch_id}/metadata/{metadata_id}",
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
def get_branch_metadata_value(
|
|
1113
|
+
self,
|
|
1114
|
+
key: str,
|
|
1115
|
+
branch_id: int | str = "default",
|
|
1116
|
+
) -> str | None | object:
|
|
1117
|
+
"""Return the value for a single metadata key on a branch, or None if absent.
|
|
1118
|
+
|
|
1119
|
+
Convenience wrapper around ``list_branch_metadata`` that filters by key.
|
|
1120
|
+
|
|
1121
|
+
Args:
|
|
1122
|
+
key: Metadata key to look up (e.g. "KBC.projectDescription").
|
|
1123
|
+
branch_id: Branch ID or the literal "default" for the main branch.
|
|
1124
|
+
|
|
1125
|
+
Returns:
|
|
1126
|
+
The string value if the key exists (may be None if the API stored null),
|
|
1127
|
+
or ``METADATA_NOT_FOUND`` sentinel if the key is not present.
|
|
1128
|
+
"""
|
|
1129
|
+
for entry in self.list_branch_metadata(branch_id=branch_id):
|
|
1130
|
+
if entry.get("key") == key:
|
|
1131
|
+
return entry.get("value")
|
|
1132
|
+
return METADATA_NOT_FOUND
|
|
1133
|
+
|
|
1134
|
+
def list_buckets(
|
|
1135
|
+
self, include: str | None = None, branch_id: int | None = None
|
|
1136
|
+
) -> list[dict[str, Any]]:
|
|
1137
|
+
"""List storage buckets with optional extended information.
|
|
1138
|
+
|
|
1139
|
+
Args:
|
|
1140
|
+
include: Optional include parameter (e.g. "linkedBuckets" for sharing info).
|
|
1141
|
+
branch_id: If set, list buckets from a specific dev branch.
|
|
1142
|
+
|
|
1143
|
+
Returns:
|
|
1144
|
+
List of bucket dicts from the API.
|
|
1145
|
+
"""
|
|
1146
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1147
|
+
params: dict[str, str] = {}
|
|
1148
|
+
if include:
|
|
1149
|
+
params["include"] = include
|
|
1150
|
+
response = self._request("GET", f"{prefix}/buckets", params=params)
|
|
1151
|
+
return response.json()
|
|
1152
|
+
|
|
1153
|
+
def list_buckets_with_metadata(self) -> list[dict[str, Any]]:
|
|
1154
|
+
"""List storage buckets with metadata included.
|
|
1155
|
+
|
|
1156
|
+
Returns:
|
|
1157
|
+
List of bucket dicts with metadata fields.
|
|
1158
|
+
"""
|
|
1159
|
+
return self.list_buckets(include="metadata")
|
|
1160
|
+
|
|
1161
|
+
def list_bucket_metadata(
|
|
1162
|
+
self,
|
|
1163
|
+
bucket_id: str,
|
|
1164
|
+
branch_id: int | None = None,
|
|
1165
|
+
) -> list[dict[str, Any]]:
|
|
1166
|
+
"""List metadata entries on a single storage bucket.
|
|
1167
|
+
|
|
1168
|
+
GET /v2/storage/[branch/{b}/]buckets/{id}/metadata
|
|
1169
|
+
|
|
1170
|
+
Args:
|
|
1171
|
+
bucket_id: Bucket ID (e.g. 'in.c-db').
|
|
1172
|
+
branch_id: If set, target a specific dev branch.
|
|
1173
|
+
|
|
1174
|
+
Returns:
|
|
1175
|
+
List of metadata dicts (id/key/value/provider/timestamp).
|
|
1176
|
+
"""
|
|
1177
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1178
|
+
safe_id = quote(bucket_id, safe="")
|
|
1179
|
+
response = self._request("GET", f"{prefix}/buckets/{safe_id}/metadata")
|
|
1180
|
+
return response.json()
|
|
1181
|
+
|
|
1182
|
+
def set_bucket_metadata(
|
|
1183
|
+
self,
|
|
1184
|
+
bucket_id: str,
|
|
1185
|
+
entries: list[tuple[str, str]],
|
|
1186
|
+
branch_id: int | None = None,
|
|
1187
|
+
provider: str = "user",
|
|
1188
|
+
) -> list[dict[str, Any]]:
|
|
1189
|
+
"""Upsert metadata key/value pairs on a storage bucket.
|
|
1190
|
+
|
|
1191
|
+
POST /v2/storage/buckets/{id}/metadata
|
|
1192
|
+
|
|
1193
|
+
Uses the same PHP-style array form encoding as ``set_branch_metadata``.
|
|
1194
|
+
|
|
1195
|
+
Args:
|
|
1196
|
+
bucket_id: Bucket ID (e.g. 'in.c-db').
|
|
1197
|
+
entries: Ordered list of ``(key, value)`` metadata tuples.
|
|
1198
|
+
branch_id: If set, target a specific dev branch.
|
|
1199
|
+
provider: Metadata provider. Defaults to ``"user"`` for
|
|
1200
|
+
CLI-originated descriptions; pass ``"system"`` for reserved
|
|
1201
|
+
``KBC.*`` keys (e.g. ``KBC.createdBy.branch.id``) -- the API
|
|
1202
|
+
rejects user-provider writes on that namespace.
|
|
1203
|
+
|
|
1204
|
+
Returns:
|
|
1205
|
+
Full metadata list for the bucket after the upsert.
|
|
1206
|
+
"""
|
|
1207
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1208
|
+
safe_id = quote(bucket_id, safe="")
|
|
1209
|
+
form: dict[str, str] = {"provider": provider}
|
|
1210
|
+
for i, (key, value) in enumerate(entries):
|
|
1211
|
+
form[f"metadata[{i}][key]"] = key
|
|
1212
|
+
form[f"metadata[{i}][value]"] = value
|
|
1213
|
+
response = self._request("POST", f"{prefix}/buckets/{safe_id}/metadata", data=form)
|
|
1214
|
+
return response.json()
|
|
1215
|
+
|
|
1216
|
+
def set_table_metadata(
|
|
1217
|
+
self,
|
|
1218
|
+
table_id: str,
|
|
1219
|
+
entries: list[tuple[str, str]],
|
|
1220
|
+
branch_id: int | None = None,
|
|
1221
|
+
) -> list[dict[str, Any]]:
|
|
1222
|
+
"""Upsert metadata key/value pairs on a storage table.
|
|
1223
|
+
|
|
1224
|
+
POST /v2/storage/tables/{id}/metadata
|
|
1225
|
+
|
|
1226
|
+
Provider is always ``"user"`` for CLI-originated descriptions.
|
|
1227
|
+
Column-level descriptions use the namespaced key convention
|
|
1228
|
+
``KBC.column.{colname}.description`` stored at table-metadata level
|
|
1229
|
+
(Keboola Storage API does not expose a user-writable column-metadata
|
|
1230
|
+
endpoint; ``columnMetadata`` is populated exclusively by components).
|
|
1231
|
+
|
|
1232
|
+
Args:
|
|
1233
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
1234
|
+
entries: Ordered list of ``(key, value)`` metadata tuples.
|
|
1235
|
+
branch_id: If set, target a specific dev branch.
|
|
1236
|
+
|
|
1237
|
+
Returns:
|
|
1238
|
+
Full metadata list for the table after the upsert.
|
|
1239
|
+
"""
|
|
1240
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1241
|
+
safe_id = quote(table_id, safe="")
|
|
1242
|
+
form: dict[str, str] = {"provider": "user"}
|
|
1243
|
+
for i, (key, value) in enumerate(entries):
|
|
1244
|
+
form[f"metadata[{i}][key]"] = key
|
|
1245
|
+
form[f"metadata[{i}][value]"] = value
|
|
1246
|
+
response = self._request("POST", f"{prefix}/tables/{safe_id}/metadata", data=form)
|
|
1247
|
+
return response.json()
|
|
1248
|
+
|
|
1249
|
+
def get_bucket_detail(
|
|
1250
|
+
self,
|
|
1251
|
+
bucket_id: str,
|
|
1252
|
+
branch_id: int | None = None,
|
|
1253
|
+
) -> dict[str, Any]:
|
|
1254
|
+
"""Get detailed information about a storage bucket.
|
|
1255
|
+
|
|
1256
|
+
Returns full bucket metadata including sharing/linked info
|
|
1257
|
+
(sourceBucket, sourceTable with project references).
|
|
1258
|
+
|
|
1259
|
+
Args:
|
|
1260
|
+
bucket_id: Bucket ID (e.g. 'in.c-db').
|
|
1261
|
+
branch_id: If set, target a specific dev branch.
|
|
1262
|
+
|
|
1263
|
+
Returns:
|
|
1264
|
+
Bucket detail dict from the API.
|
|
1265
|
+
"""
|
|
1266
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1267
|
+
safe_id = quote(bucket_id, safe="")
|
|
1268
|
+
response = self._request("GET", f"{prefix}/buckets/{safe_id}")
|
|
1269
|
+
return response.json()
|
|
1270
|
+
|
|
1271
|
+
def get_table_detail(
|
|
1272
|
+
self,
|
|
1273
|
+
table_id: str,
|
|
1274
|
+
branch_id: int | None = None,
|
|
1275
|
+
) -> dict[str, Any]:
|
|
1276
|
+
"""Get detailed information about a storage table.
|
|
1277
|
+
|
|
1278
|
+
Args:
|
|
1279
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
1280
|
+
branch_id: If set, target a specific dev branch.
|
|
1281
|
+
|
|
1282
|
+
Returns:
|
|
1283
|
+
Table detail dict including columns, metadata, bucket info.
|
|
1284
|
+
"""
|
|
1285
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1286
|
+
safe_id = quote(table_id, safe="")
|
|
1287
|
+
response = self._request("GET", f"{prefix}/tables/{safe_id}")
|
|
1288
|
+
return response.json()
|
|
1289
|
+
|
|
1290
|
+
def list_tables(
|
|
1291
|
+
self,
|
|
1292
|
+
bucket_id: str | None = None,
|
|
1293
|
+
branch_id: int | None = None,
|
|
1294
|
+
include: str | None = None,
|
|
1295
|
+
) -> list[dict[str, Any]]:
|
|
1296
|
+
"""List storage tables, optionally filtered by bucket.
|
|
1297
|
+
|
|
1298
|
+
Args:
|
|
1299
|
+
bucket_id: If set, list tables only from this bucket.
|
|
1300
|
+
branch_id: If set, target a specific dev branch.
|
|
1301
|
+
include: Optional include parameter (e.g. 'columns').
|
|
1302
|
+
|
|
1303
|
+
Returns:
|
|
1304
|
+
List of table dicts from the API.
|
|
1305
|
+
"""
|
|
1306
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1307
|
+
params: dict[str, str] = {}
|
|
1308
|
+
if include:
|
|
1309
|
+
params["include"] = include
|
|
1310
|
+
if bucket_id:
|
|
1311
|
+
safe_id = quote(bucket_id, safe="")
|
|
1312
|
+
response = self._request("GET", f"{prefix}/buckets/{safe_id}/tables", params=params)
|
|
1313
|
+
else:
|
|
1314
|
+
response = self._request("GET", f"{prefix}/tables", params=params)
|
|
1315
|
+
return response.json()
|
|
1316
|
+
|
|
1317
|
+
# ------------------------------------------------------------------
|
|
1318
|
+
# Bucket sharing & linking
|
|
1319
|
+
# ------------------------------------------------------------------
|
|
1320
|
+
|
|
1321
|
+
def list_shared_buckets(self, include: str | None = None) -> list[dict[str, Any]]:
|
|
1322
|
+
"""List buckets shared into the current project's organization.
|
|
1323
|
+
|
|
1324
|
+
GET /v2/storage/shared-buckets
|
|
1325
|
+
|
|
1326
|
+
Args:
|
|
1327
|
+
include: Optional include parameter (e.g. "metadata").
|
|
1328
|
+
|
|
1329
|
+
Returns:
|
|
1330
|
+
List of shared bucket dicts.
|
|
1331
|
+
"""
|
|
1332
|
+
params: dict[str, str] = {}
|
|
1333
|
+
if include:
|
|
1334
|
+
params["include"] = include
|
|
1335
|
+
response = self._request("GET", "/v2/storage/shared-buckets", params=params)
|
|
1336
|
+
return response.json()
|
|
1337
|
+
|
|
1338
|
+
def share_bucket(
|
|
1339
|
+
self,
|
|
1340
|
+
bucket_id: str,
|
|
1341
|
+
sharing_type: str,
|
|
1342
|
+
target_project_ids: list[int] | None = None,
|
|
1343
|
+
target_users: list[str] | None = None,
|
|
1344
|
+
) -> dict[str, Any]:
|
|
1345
|
+
"""Enable sharing on a bucket (async, waits for completion).
|
|
1346
|
+
|
|
1347
|
+
Args:
|
|
1348
|
+
bucket_id: Bucket ID to share (e.g. "out.c-data").
|
|
1349
|
+
sharing_type: One of "organization", "organization-project",
|
|
1350
|
+
"selected-projects", "selected-users".
|
|
1351
|
+
target_project_ids: Required for "selected-projects" type.
|
|
1352
|
+
target_users: Required for "selected-users" type (email addresses).
|
|
1353
|
+
|
|
1354
|
+
Returns:
|
|
1355
|
+
Completed storage job dict.
|
|
1356
|
+
|
|
1357
|
+
Raises:
|
|
1358
|
+
KeboolaApiError: If the share operation fails (e.g. 403 for non-master token).
|
|
1359
|
+
"""
|
|
1360
|
+
safe_id = quote(bucket_id, safe="")
|
|
1361
|
+
|
|
1362
|
+
endpoint_map = {
|
|
1363
|
+
"organization": f"/v2/storage/buckets/{safe_id}/share-organization",
|
|
1364
|
+
"organization-project": f"/v2/storage/buckets/{safe_id}/share-organization-project",
|
|
1365
|
+
"selected-projects": f"/v2/storage/buckets/{safe_id}/share-to-projects",
|
|
1366
|
+
"selected-users": f"/v2/storage/buckets/{safe_id}/share-to-users",
|
|
1367
|
+
}
|
|
1368
|
+
|
|
1369
|
+
endpoint = endpoint_map.get(sharing_type)
|
|
1370
|
+
if not endpoint:
|
|
1371
|
+
raise KeboolaApiError(
|
|
1372
|
+
message=f"Invalid sharing type: '{sharing_type}'. "
|
|
1373
|
+
f"Valid types: {', '.join(endpoint_map.keys())}",
|
|
1374
|
+
status_code=400,
|
|
1375
|
+
error_code=ErrorCode.INVALID_SHARING_TYPE,
|
|
1376
|
+
retryable=False,
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1379
|
+
data: dict[str, Any] = {}
|
|
1380
|
+
if sharing_type == "selected-projects" and target_project_ids:
|
|
1381
|
+
data["targetProjectIds"] = [str(pid) for pid in target_project_ids]
|
|
1382
|
+
elif sharing_type == "selected-users" and target_users:
|
|
1383
|
+
data["targetUsers"] = target_users
|
|
1384
|
+
|
|
1385
|
+
response = self._request("POST", endpoint, params={"async": "true"}, data=data)
|
|
1386
|
+
return self._wait_for_storage_job(response.json())
|
|
1387
|
+
|
|
1388
|
+
def change_sharing_type(
|
|
1389
|
+
self,
|
|
1390
|
+
bucket_id: str,
|
|
1391
|
+
sharing_type: str,
|
|
1392
|
+
) -> dict[str, Any]:
|
|
1393
|
+
"""Change the sharing type of an already-shared bucket (async).
|
|
1394
|
+
|
|
1395
|
+
PUT /v2/storage/buckets/{bucket_id}/share
|
|
1396
|
+
|
|
1397
|
+
Args:
|
|
1398
|
+
bucket_id: Bucket ID.
|
|
1399
|
+
sharing_type: "organization" or "organization-project".
|
|
1400
|
+
|
|
1401
|
+
Returns:
|
|
1402
|
+
Completed storage job dict.
|
|
1403
|
+
"""
|
|
1404
|
+
safe_id = quote(bucket_id, safe="")
|
|
1405
|
+
response = self._request(
|
|
1406
|
+
"PUT",
|
|
1407
|
+
f"/v2/storage/buckets/{safe_id}/share",
|
|
1408
|
+
json={"sharing": sharing_type},
|
|
1409
|
+
params={"async": "true"},
|
|
1410
|
+
)
|
|
1411
|
+
return self._wait_for_storage_job(response.json())
|
|
1412
|
+
|
|
1413
|
+
def unshare_bucket(self, bucket_id: str) -> dict[str, Any]:
|
|
1414
|
+
"""Disable sharing on a bucket (async, waits for completion).
|
|
1415
|
+
|
|
1416
|
+
DELETE /v2/storage/buckets/{bucket_id}/share
|
|
1417
|
+
|
|
1418
|
+
Prerequisite: no linked buckets exist in other projects.
|
|
1419
|
+
|
|
1420
|
+
Returns:
|
|
1421
|
+
Completed storage job dict.
|
|
1422
|
+
"""
|
|
1423
|
+
safe_id = quote(bucket_id, safe="")
|
|
1424
|
+
response = self._request(
|
|
1425
|
+
"DELETE",
|
|
1426
|
+
f"/v2/storage/buckets/{safe_id}/share",
|
|
1427
|
+
params={"async": "true"},
|
|
1428
|
+
)
|
|
1429
|
+
return self._wait_for_storage_job(response.json())
|
|
1430
|
+
|
|
1431
|
+
def link_bucket(
|
|
1432
|
+
self,
|
|
1433
|
+
source_project_id: int,
|
|
1434
|
+
source_bucket_id: str,
|
|
1435
|
+
name: str,
|
|
1436
|
+
stage: str = "in",
|
|
1437
|
+
) -> dict[str, Any]:
|
|
1438
|
+
"""Link a shared bucket from another project (async, waits for completion).
|
|
1439
|
+
|
|
1440
|
+
POST /v2/storage/buckets (with sourceProjectId + sourceBucketId)
|
|
1441
|
+
|
|
1442
|
+
Args:
|
|
1443
|
+
source_project_id: Project ID that owns the shared bucket.
|
|
1444
|
+
source_bucket_id: Bucket ID in the source project.
|
|
1445
|
+
name: Display name for the linked bucket in this project.
|
|
1446
|
+
stage: Bucket stage ("in" or "out"). Defaults to "in".
|
|
1447
|
+
|
|
1448
|
+
Returns:
|
|
1449
|
+
Completed storage job dict with linked bucket info in results.
|
|
1450
|
+
"""
|
|
1451
|
+
response = self._request(
|
|
1452
|
+
"POST",
|
|
1453
|
+
"/v2/storage/buckets",
|
|
1454
|
+
params={"async": "true"},
|
|
1455
|
+
data={
|
|
1456
|
+
"stage": stage,
|
|
1457
|
+
"name": name,
|
|
1458
|
+
"displayName": name,
|
|
1459
|
+
"sourceProjectId": source_project_id,
|
|
1460
|
+
"sourceBucketId": source_bucket_id,
|
|
1461
|
+
},
|
|
1462
|
+
)
|
|
1463
|
+
return self._wait_for_storage_job(response.json())
|
|
1464
|
+
|
|
1465
|
+
def delete_bucket(
|
|
1466
|
+
self, bucket_id: str, force: bool = False, branch_id: int | None = None
|
|
1467
|
+
) -> dict[str, Any]:
|
|
1468
|
+
"""Delete a bucket (async, waits for completion).
|
|
1469
|
+
|
|
1470
|
+
Used for unlinking shared buckets or deleting regular buckets.
|
|
1471
|
+
|
|
1472
|
+
Args:
|
|
1473
|
+
bucket_id: Bucket ID to delete.
|
|
1474
|
+
force: If True, delete even if bucket contains tables.
|
|
1475
|
+
branch_id: If set, target a specific dev branch.
|
|
1476
|
+
|
|
1477
|
+
Returns:
|
|
1478
|
+
Completed storage job dict.
|
|
1479
|
+
"""
|
|
1480
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1481
|
+
safe_id = quote(bucket_id, safe="")
|
|
1482
|
+
params: dict[str, str] = {"async": "true"}
|
|
1483
|
+
if force:
|
|
1484
|
+
params["force"] = "true"
|
|
1485
|
+
response = self._request("DELETE", f"{prefix}/buckets/{safe_id}", params=params)
|
|
1486
|
+
return self._wait_for_storage_job(response.json())
|
|
1487
|
+
|
|
1488
|
+
def create_bucket(
|
|
1489
|
+
self,
|
|
1490
|
+
stage: str,
|
|
1491
|
+
name: str,
|
|
1492
|
+
description: str | None = None,
|
|
1493
|
+
backend: str | None = None,
|
|
1494
|
+
branch_id: int | None = None,
|
|
1495
|
+
) -> dict[str, Any]:
|
|
1496
|
+
"""Create a new storage bucket (sync).
|
|
1497
|
+
|
|
1498
|
+
Args:
|
|
1499
|
+
stage: Bucket stage — "in" or "out".
|
|
1500
|
+
name: Bucket name slug (e.g. "my-bucket").
|
|
1501
|
+
description: Optional description.
|
|
1502
|
+
backend: Optional backend type (e.g. "snowflake", "bigquery").
|
|
1503
|
+
branch_id: If set, create bucket in a specific dev branch.
|
|
1504
|
+
|
|
1505
|
+
Returns:
|
|
1506
|
+
New bucket dict from the API.
|
|
1507
|
+
"""
|
|
1508
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1509
|
+
body: dict[str, str] = {"stage": stage, "name": name}
|
|
1510
|
+
if description is not None:
|
|
1511
|
+
body["description"] = description
|
|
1512
|
+
if backend is not None:
|
|
1513
|
+
body["backend"] = backend
|
|
1514
|
+
response = self._request("POST", f"{prefix}/buckets", json=body)
|
|
1515
|
+
return response.json()
|
|
1516
|
+
|
|
1517
|
+
def create_table(
|
|
1518
|
+
self,
|
|
1519
|
+
bucket_id: str,
|
|
1520
|
+
name: str,
|
|
1521
|
+
columns: list[dict[str, Any]],
|
|
1522
|
+
primary_key: list[str] | None = None,
|
|
1523
|
+
branch_id: int | None = None,
|
|
1524
|
+
) -> dict[str, Any]:
|
|
1525
|
+
"""Create a new table with typed columns (async, waits for completion).
|
|
1526
|
+
|
|
1527
|
+
Args:
|
|
1528
|
+
bucket_id: Target bucket ID (e.g. "in.c-my-bucket").
|
|
1529
|
+
name: Table name.
|
|
1530
|
+
columns: List of column dicts with "name" and "definition.type" keys,
|
|
1531
|
+
e.g. [{"name": "id", "definition": {"type": "INTEGER"}}].
|
|
1532
|
+
primary_key: Optional list of column names for the primary key.
|
|
1533
|
+
branch_id: If set, create table in a specific dev branch.
|
|
1534
|
+
|
|
1535
|
+
Returns:
|
|
1536
|
+
Completed storage job results dict.
|
|
1537
|
+
"""
|
|
1538
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1539
|
+
safe_id = quote(bucket_id, safe="")
|
|
1540
|
+
body: dict[str, Any] = {
|
|
1541
|
+
"name": name,
|
|
1542
|
+
"primaryKeysNames": primary_key or [],
|
|
1543
|
+
"columns": columns,
|
|
1544
|
+
}
|
|
1545
|
+
response = self._request("POST", f"{prefix}/buckets/{safe_id}/tables-definition", json=body)
|
|
1546
|
+
job = self._wait_for_storage_job(response.json())
|
|
1547
|
+
return job.get("results", {})
|
|
1548
|
+
|
|
1549
|
+
def prepare_file_upload(
|
|
1550
|
+
self,
|
|
1551
|
+
name: str,
|
|
1552
|
+
size_bytes: int,
|
|
1553
|
+
tags: list[str] | None = None,
|
|
1554
|
+
is_permanent: bool = False,
|
|
1555
|
+
notify: bool = False,
|
|
1556
|
+
) -> dict[str, Any]:
|
|
1557
|
+
"""Register a file with the Storage API and get a presigned upload URL.
|
|
1558
|
+
|
|
1559
|
+
Step 1 of the async table upload flow.
|
|
1560
|
+
|
|
1561
|
+
Args:
|
|
1562
|
+
name: Filename (e.g. "data.csv").
|
|
1563
|
+
size_bytes: File size in bytes.
|
|
1564
|
+
tags: Optional list of tags to assign to the file.
|
|
1565
|
+
is_permanent: If True, file is not auto-deleted after 15 days.
|
|
1566
|
+
notify: If True, send notification on upload completion.
|
|
1567
|
+
|
|
1568
|
+
Returns:
|
|
1569
|
+
File resource dict including 'id' (fileId), 'url', 'uploadParams',
|
|
1570
|
+
and 'gcsUploadParams' (present on GCP stacks; contains bearer token
|
|
1571
|
+
and GCS bucket/key for direct PUT upload).
|
|
1572
|
+
"""
|
|
1573
|
+
# federationToken=1 is required on newer stacks (AWS, Azure) to get
|
|
1574
|
+
# cloud-native credentials instead of deprecated presigned POST fields.
|
|
1575
|
+
body: dict[str, Any] = {"name": name, "sizeBytes": size_bytes, "federationToken": "1"}
|
|
1576
|
+
if is_permanent:
|
|
1577
|
+
body["isPermanent"] = "1"
|
|
1578
|
+
if notify:
|
|
1579
|
+
body["notify"] = "1"
|
|
1580
|
+
if tags:
|
|
1581
|
+
for i, tag in enumerate(tags):
|
|
1582
|
+
body[f"tags[{i}]"] = tag
|
|
1583
|
+
response = self._request("POST", "/v2/storage/files/prepare", data=body)
|
|
1584
|
+
return response.json()
|
|
1585
|
+
|
|
1586
|
+
def _upload_to_cloud(
|
|
1587
|
+
self,
|
|
1588
|
+
upload_info: dict[str, Any],
|
|
1589
|
+
file_path: str,
|
|
1590
|
+
) -> None:
|
|
1591
|
+
"""Upload a file to cloud storage using credentials from files/prepare.
|
|
1592
|
+
|
|
1593
|
+
Four upload paths based on what the API returns:
|
|
1594
|
+
|
|
1595
|
+
GCP stack (``gcsUploadParams`` present):
|
|
1596
|
+
PUT to ``https://storage.googleapis.com/{bucket}/{key}`` with an
|
|
1597
|
+
OAuth2 ``Authorization: Bearer`` header.
|
|
1598
|
+
|
|
1599
|
+
Azure stack (``absUploadParams`` present):
|
|
1600
|
+
PUT to ABS container URL constructed from SASConnectionString
|
|
1601
|
+
with ``x-ms-blob-type: BlockBlob`` header.
|
|
1602
|
+
|
|
1603
|
+
AWS stack with federation (``uploadParams.credentials`` present):
|
|
1604
|
+
PUT to ``https://{bucket}.s3.{region}.amazonaws.com/{key}``
|
|
1605
|
+
with AWS SigV4 signed headers.
|
|
1606
|
+
|
|
1607
|
+
Legacy S3 presigned POST (``uploadParams`` without credentials):
|
|
1608
|
+
Multipart form POST — deprecated on newer stacks.
|
|
1609
|
+
|
|
1610
|
+
Args:
|
|
1611
|
+
upload_info: Full response dict from prepare_file_upload().
|
|
1612
|
+
file_path: Local path to the file.
|
|
1613
|
+
"""
|
|
1614
|
+
p = Path(file_path)
|
|
1615
|
+
|
|
1616
|
+
gcs_params = upload_info.get("gcsUploadParams")
|
|
1617
|
+
abs_params = upload_info.get("absUploadParams")
|
|
1618
|
+
upload_params = upload_info.get("uploadParams") or {}
|
|
1619
|
+
|
|
1620
|
+
if gcs_params:
|
|
1621
|
+
# GCP: PUT via GCS JSON API with short-lived OAuth2 bearer token
|
|
1622
|
+
bucket = gcs_params["bucket"]
|
|
1623
|
+
key = gcs_params["key"]
|
|
1624
|
+
access_token = gcs_params["access_token"]
|
|
1625
|
+
upload_url = f"https://storage.googleapis.com/{bucket}/{key}"
|
|
1626
|
+
with p.open("rb") as fh, httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
|
|
1627
|
+
response = http.put(
|
|
1628
|
+
upload_url,
|
|
1629
|
+
content=fh,
|
|
1630
|
+
headers={"Authorization": f"Bearer {access_token}"},
|
|
1631
|
+
)
|
|
1632
|
+
success_codes = (200,)
|
|
1633
|
+
elif abs_params:
|
|
1634
|
+
# Azure Blob Storage: PUT with write-capable SAS from absUploadParams
|
|
1635
|
+
upload_url = _build_abs_upload_url(abs_params)
|
|
1636
|
+
with p.open("rb") as fh, httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
|
|
1637
|
+
response = http.put(
|
|
1638
|
+
upload_url,
|
|
1639
|
+
content=fh,
|
|
1640
|
+
headers={"x-ms-blob-type": "BlockBlob"},
|
|
1641
|
+
)
|
|
1642
|
+
success_codes = (200, 201)
|
|
1643
|
+
elif upload_params.get("credentials"):
|
|
1644
|
+
# AWS S3 with federation token: PUT with SigV4 signed headers
|
|
1645
|
+
creds = upload_params["credentials"]
|
|
1646
|
+
bucket = upload_params["bucket"]
|
|
1647
|
+
key = upload_params["key"]
|
|
1648
|
+
region = upload_info.get("region", "us-east-1")
|
|
1649
|
+
upload_url = f"https://{bucket}.s3.{region}.amazonaws.com/{key}"
|
|
1650
|
+
with p.open("rb") as fh:
|
|
1651
|
+
file_bytes = fh.read()
|
|
1652
|
+
headers = _s3_signed_headers(
|
|
1653
|
+
upload_url, creds, region, method="PUT", payload=file_bytes
|
|
1654
|
+
)
|
|
1655
|
+
with httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
|
|
1656
|
+
response = http.put(upload_url, content=file_bytes, headers=headers)
|
|
1657
|
+
success_codes = (200,)
|
|
1658
|
+
elif upload_params:
|
|
1659
|
+
# Legacy S3 presigned POST: multipart form — uploadParams first, file last
|
|
1660
|
+
url = upload_info["url"]
|
|
1661
|
+
with httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
|
|
1662
|
+
form_fields: list[tuple[str, Any]] = [
|
|
1663
|
+
(k, (None, str(v))) for k, v in upload_params.items()
|
|
1664
|
+
]
|
|
1665
|
+
with p.open("rb") as fh:
|
|
1666
|
+
form_fields.append(("file", (p.name, fh, "application/octet-stream")))
|
|
1667
|
+
response = http.post(url, files=form_fields)
|
|
1668
|
+
success_codes = (200, 204)
|
|
1669
|
+
else:
|
|
1670
|
+
# Fallback: signed URL PUT (no extra auth needed)
|
|
1671
|
+
url = upload_info["url"]
|
|
1672
|
+
with p.open("rb") as fh, httpx.Client(timeout=FILE_UPLOAD_TIMEOUT) as http:
|
|
1673
|
+
response = http.put(url, content=fh)
|
|
1674
|
+
success_codes = (200, 201)
|
|
1675
|
+
|
|
1676
|
+
if response.status_code not in success_codes:
|
|
1677
|
+
raise KeboolaApiError(
|
|
1678
|
+
message=f"Cloud storage upload failed (HTTP {response.status_code})",
|
|
1679
|
+
status_code=response.status_code,
|
|
1680
|
+
error_code=ErrorCode.UPLOAD_FAILED,
|
|
1681
|
+
retryable=False,
|
|
1682
|
+
)
|
|
1683
|
+
|
|
1684
|
+
def import_table_async(
|
|
1685
|
+
self,
|
|
1686
|
+
table_id: str,
|
|
1687
|
+
file_id: int,
|
|
1688
|
+
incremental: bool = False,
|
|
1689
|
+
delimiter: str = ",",
|
|
1690
|
+
enclosure: str = '"',
|
|
1691
|
+
branch_id: int | None = None,
|
|
1692
|
+
) -> dict[str, Any]:
|
|
1693
|
+
"""Trigger async import of a pre-uploaded file into a table (step 3).
|
|
1694
|
+
|
|
1695
|
+
Polls until the import job completes (up to IMPORT_JOB_MAX_WAIT seconds).
|
|
1696
|
+
|
|
1697
|
+
Args:
|
|
1698
|
+
table_id: Target table ID (e.g. "in.c-my-bucket.my-table").
|
|
1699
|
+
file_id: File ID returned by prepare_file_upload().
|
|
1700
|
+
incremental: If True, append rows; if False, full load.
|
|
1701
|
+
delimiter: CSV column delimiter.
|
|
1702
|
+
enclosure: CSV value enclosure character.
|
|
1703
|
+
branch_id: If set, target a specific dev branch.
|
|
1704
|
+
|
|
1705
|
+
Returns:
|
|
1706
|
+
Completed import job dict.
|
|
1707
|
+
"""
|
|
1708
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1709
|
+
safe_id = quote(table_id, safe="")
|
|
1710
|
+
body: dict[str, str] = {
|
|
1711
|
+
"dataFileId": str(file_id),
|
|
1712
|
+
"incremental": "1" if incremental else "0",
|
|
1713
|
+
"delimiter": delimiter,
|
|
1714
|
+
"enclosure": enclosure,
|
|
1715
|
+
}
|
|
1716
|
+
response = self._request("POST", f"{prefix}/tables/{safe_id}/import-async", data=body)
|
|
1717
|
+
return self._wait_for_storage_job(response.json(), max_wait=IMPORT_JOB_MAX_WAIT)
|
|
1718
|
+
|
|
1719
|
+
def upload_table(
|
|
1720
|
+
self,
|
|
1721
|
+
table_id: str,
|
|
1722
|
+
file_path: str,
|
|
1723
|
+
incremental: bool = False,
|
|
1724
|
+
delimiter: str = ",",
|
|
1725
|
+
enclosure: str = '"',
|
|
1726
|
+
branch_id: int | None = None,
|
|
1727
|
+
) -> dict[str, Any]:
|
|
1728
|
+
"""Upload a CSV file into an existing table (async, waits for completion).
|
|
1729
|
+
|
|
1730
|
+
Uses the file-first async flow to support files up to 5 GB:
|
|
1731
|
+
1. Register file with Storage API → get presigned cloud upload URL
|
|
1732
|
+
2. Upload file bytes directly to cloud storage (GCP bearer token, S3 presigned POST, or signed URL PUT)
|
|
1733
|
+
3. Trigger import-async job → poll until complete
|
|
1734
|
+
|
|
1735
|
+
Args:
|
|
1736
|
+
table_id: Target table ID (e.g. "in.c-my-bucket.my-table").
|
|
1737
|
+
file_path: Local path to the CSV file.
|
|
1738
|
+
incremental: If True, append rows; if False (default), full load.
|
|
1739
|
+
delimiter: CSV column delimiter (default ",").
|
|
1740
|
+
enclosure: CSV value enclosure character (default '"').
|
|
1741
|
+
branch_id: If set, target a specific dev branch.
|
|
1742
|
+
|
|
1743
|
+
Returns:
|
|
1744
|
+
Import results dict with importedRowsCount, warnings, etc.
|
|
1745
|
+
"""
|
|
1746
|
+
p = Path(file_path)
|
|
1747
|
+
size_bytes = p.stat().st_size
|
|
1748
|
+
upload_info = self.prepare_file_upload(name=p.name, size_bytes=size_bytes)
|
|
1749
|
+
file_id = upload_info["id"]
|
|
1750
|
+
self._upload_to_cloud(upload_info, file_path)
|
|
1751
|
+
job = self.import_table_async(
|
|
1752
|
+
table_id=table_id,
|
|
1753
|
+
file_id=file_id,
|
|
1754
|
+
incremental=incremental,
|
|
1755
|
+
delimiter=delimiter,
|
|
1756
|
+
enclosure=enclosure,
|
|
1757
|
+
branch_id=branch_id,
|
|
1758
|
+
)
|
|
1759
|
+
return job.get("results", {})
|
|
1760
|
+
|
|
1761
|
+
def delete_table(
|
|
1762
|
+
self,
|
|
1763
|
+
table_id: str,
|
|
1764
|
+
branch_id: int | None = None,
|
|
1765
|
+
force: bool = False,
|
|
1766
|
+
) -> dict[str, Any]:
|
|
1767
|
+
"""Delete a storage table (async, waits for completion).
|
|
1768
|
+
|
|
1769
|
+
Args:
|
|
1770
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
1771
|
+
branch_id: If set, target a specific dev branch.
|
|
1772
|
+
force: If True, cascade-delete the table and all its aliases.
|
|
1773
|
+
|
|
1774
|
+
Returns:
|
|
1775
|
+
Completed storage job dict.
|
|
1776
|
+
"""
|
|
1777
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1778
|
+
safe_id = quote(table_id, safe="")
|
|
1779
|
+
params: dict[str, str] = {"async": "true"}
|
|
1780
|
+
if force:
|
|
1781
|
+
params["force"] = "true"
|
|
1782
|
+
response = self._request("DELETE", f"{prefix}/tables/{safe_id}", params=params)
|
|
1783
|
+
return self._wait_for_storage_job(response.json())
|
|
1784
|
+
|
|
1785
|
+
def truncate_table(
|
|
1786
|
+
self,
|
|
1787
|
+
table_id: str,
|
|
1788
|
+
branch_id: int | None = None,
|
|
1789
|
+
) -> dict[str, Any]:
|
|
1790
|
+
"""Truncate a storage table (delete all rows; preserve schema).
|
|
1791
|
+
|
|
1792
|
+
The Storage API requires the ``allowTruncate=1`` safety opt-in to
|
|
1793
|
+
confirm the caller intends to remove every row when no filter
|
|
1794
|
+
clauses are sent. The endpoint is inherently asynchronous on
|
|
1795
|
+
every branch -- it always returns ``HTTP 202`` with a queued
|
|
1796
|
+
storage job (``operationName: tableRowsDelete``), which
|
|
1797
|
+
``_wait_for_storage_job`` polls to completion. Passing
|
|
1798
|
+
``async=true`` is rejected by the API as an unknown field, so
|
|
1799
|
+
we do NOT send it (this is a deliberate departure from
|
|
1800
|
+
``delete_table``'s contract -- see the truncate-table gotcha
|
|
1801
|
+
in plugins/.../gotchas.md for the live-API evidence).
|
|
1802
|
+
|
|
1803
|
+
The table definition (columns, types, primary key, descriptions,
|
|
1804
|
+
sharing edges, and dependents) is preserved -- only the rows
|
|
1805
|
+
are removed.
|
|
1806
|
+
|
|
1807
|
+
Args:
|
|
1808
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
1809
|
+
branch_id: If set, target a specific dev branch.
|
|
1810
|
+
|
|
1811
|
+
Returns:
|
|
1812
|
+
Completed storage job dict.
|
|
1813
|
+
"""
|
|
1814
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1815
|
+
safe_id = quote(table_id, safe="")
|
|
1816
|
+
params: dict[str, str] = {"allowTruncate": "1"}
|
|
1817
|
+
response = self._request("DELETE", f"{prefix}/tables/{safe_id}/rows", params=params)
|
|
1818
|
+
return self._wait_for_storage_job(response.json())
|
|
1819
|
+
|
|
1820
|
+
def delete_column(
|
|
1821
|
+
self,
|
|
1822
|
+
table_id: str,
|
|
1823
|
+
column_name: str,
|
|
1824
|
+
branch_id: int | None = None,
|
|
1825
|
+
force: bool = False,
|
|
1826
|
+
) -> dict[str, Any]:
|
|
1827
|
+
"""Delete a column from a storage table (async, waits for completion).
|
|
1828
|
+
|
|
1829
|
+
Args:
|
|
1830
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
1831
|
+
column_name: Name of the column to delete.
|
|
1832
|
+
branch_id: If set, target a specific dev branch.
|
|
1833
|
+
force: If True, also delete from aliased tables.
|
|
1834
|
+
|
|
1835
|
+
Returns:
|
|
1836
|
+
Completed storage job dict.
|
|
1837
|
+
"""
|
|
1838
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
1839
|
+
safe_table_id = quote(table_id, safe="")
|
|
1840
|
+
safe_column = quote(column_name, safe="")
|
|
1841
|
+
params: dict[str, str] = {"async": "true"}
|
|
1842
|
+
if force:
|
|
1843
|
+
params["force"] = "true"
|
|
1844
|
+
response = self._request(
|
|
1845
|
+
"DELETE",
|
|
1846
|
+
f"{prefix}/tables/{safe_table_id}/columns/{safe_column}",
|
|
1847
|
+
params=params,
|
|
1848
|
+
)
|
|
1849
|
+
return self._wait_for_storage_job(response.json())
|
|
1850
|
+
|
|
1851
|
+
def swap_tables(
|
|
1852
|
+
self,
|
|
1853
|
+
table_id: str,
|
|
1854
|
+
target_table_id: str,
|
|
1855
|
+
branch_id: int,
|
|
1856
|
+
) -> dict[str, Any]:
|
|
1857
|
+
"""Swap two storage tables (async, waits for completion; branch-scoped).
|
|
1858
|
+
|
|
1859
|
+
Both tables exchange physical positions; aliases keep pointing at the
|
|
1860
|
+
same physical position and therefore expose the OTHER table's data
|
|
1861
|
+
after the swap. ``branch_id`` is mandatory (the swap is always scoped
|
|
1862
|
+
to a branch), but ANY branch works -- including the default/production
|
|
1863
|
+
branch. A default-branch swap is the supported way to retype a prod
|
|
1864
|
+
table, because dev-branch merge does not propagate storage schema.
|
|
1865
|
+
|
|
1866
|
+
The API returns a queued storage job (``operationName: tableSwap``)
|
|
1867
|
+
which this method polls to completion before returning, mirroring
|
|
1868
|
+
``delete_table`` semantics. (The PHP reference client returns the
|
|
1869
|
+
raw initial response, but the operation is asynchronous on every
|
|
1870
|
+
backend tested -- callers expect a finished swap on return.)
|
|
1871
|
+
|
|
1872
|
+
Args:
|
|
1873
|
+
table_id: Full ID of the first table (e.g. "in.c-bucket.table").
|
|
1874
|
+
target_table_id: Full ID of the second table to swap with.
|
|
1875
|
+
branch_id: Development branch ID. Required by the API.
|
|
1876
|
+
|
|
1877
|
+
Returns:
|
|
1878
|
+
Completed storage job dict.
|
|
1879
|
+
"""
|
|
1880
|
+
prefix = f"/v2/storage/branch/{branch_id}"
|
|
1881
|
+
safe_id = quote(table_id, safe="")
|
|
1882
|
+
body = {"targetTableId": target_table_id}
|
|
1883
|
+
response = self._request("POST", f"{prefix}/tables/{safe_id}/swap", json=body)
|
|
1884
|
+
return self._wait_for_storage_job(response.json())
|
|
1885
|
+
|
|
1886
|
+
def pull_table(self, table_id: str, branch_id: int) -> dict[str, Any]:
|
|
1887
|
+
"""Pull (clone) a table from the default branch into a dev branch.
|
|
1888
|
+
|
|
1889
|
+
On ``storage-branches`` projects a dev branch reads production tables
|
|
1890
|
+
transparently (copy-on-write) until the first write. Operations that
|
|
1891
|
+
mutate a table in the branch -- such as ``swap_tables`` or a column
|
|
1892
|
+
drop -- require a branch-local materialization of the table first;
|
|
1893
|
+
otherwise the Storage API reports the bucket as "not found" in the
|
|
1894
|
+
branch. This endpoint performs that materialization: it copies the
|
|
1895
|
+
table from the default (production) branch into the branch's isolated
|
|
1896
|
+
storage. It is the same call the platform issues on a branch's first
|
|
1897
|
+
write to a production table.
|
|
1898
|
+
|
|
1899
|
+
The pull is one-way (default -> branch). The API returns a queued
|
|
1900
|
+
storage job which this method polls to completion before returning,
|
|
1901
|
+
mirroring ``swap_tables`` semantics.
|
|
1902
|
+
|
|
1903
|
+
Args:
|
|
1904
|
+
table_id: Full ID of the table to pull (e.g. "in.c-bucket.table").
|
|
1905
|
+
branch_id: Target development branch ID. The source is always the
|
|
1906
|
+
default/production branch.
|
|
1907
|
+
|
|
1908
|
+
Returns:
|
|
1909
|
+
Completed storage job dict.
|
|
1910
|
+
"""
|
|
1911
|
+
prefix = f"/v2/storage/branch/{branch_id}"
|
|
1912
|
+
safe_id = quote(table_id, safe="")
|
|
1913
|
+
response = self._request("POST", f"{prefix}/tables/{safe_id}/pull")
|
|
1914
|
+
return self._wait_for_storage_job(response.json())
|
|
1915
|
+
|
|
1916
|
+
def list_tables_with_metadata(self) -> list[dict[str, Any]]:
|
|
1917
|
+
"""List all storage tables with columns and metadata.
|
|
1918
|
+
|
|
1919
|
+
Returns:
|
|
1920
|
+
List of table dicts with columns, metadata, and bucket info.
|
|
1921
|
+
"""
|
|
1922
|
+
return self.list_tables(include="columns,metadata,buckets")
|
|
1923
|
+
|
|
1924
|
+
@staticmethod
|
|
1925
|
+
def _apply_table_filters(
|
|
1926
|
+
params: dict[str, Any],
|
|
1927
|
+
*,
|
|
1928
|
+
where_column: str | None = None,
|
|
1929
|
+
where_operator: str = "eq",
|
|
1930
|
+
where_values: list[str] | None = None,
|
|
1931
|
+
changed_since: str | None = None,
|
|
1932
|
+
changed_until: str | None = None,
|
|
1933
|
+
) -> None:
|
|
1934
|
+
"""Mutate ``params`` with Storage table export/preview filter clauses.
|
|
1935
|
+
|
|
1936
|
+
Shared by :meth:`get_table_data_preview` and :meth:`export_table_async`
|
|
1937
|
+
so the ``whereColumn`` / ``whereOperator`` / ``whereValues[]`` and
|
|
1938
|
+
``changedSince`` / ``changedUntil`` contract is identical across the
|
|
1939
|
+
sync-preview and async-export endpoints.
|
|
1940
|
+
|
|
1941
|
+
Args:
|
|
1942
|
+
where_column: Column to filter on. Must be paired with ``where_values``.
|
|
1943
|
+
where_operator: ``"eq"`` or ``"neq"`` (only meaningful with a filter).
|
|
1944
|
+
where_values: Values the column is matched against (OR within the set).
|
|
1945
|
+
changed_since: Lower bound on import time -- a unix timestamp or a
|
|
1946
|
+
strtotime string like ``"-2 days"``.
|
|
1947
|
+
changed_until: Upper bound on import time (same formats).
|
|
1948
|
+
|
|
1949
|
+
Raises:
|
|
1950
|
+
ValueError: On an invalid ``where_operator`` or a half-specified
|
|
1951
|
+
where-clause (a column without values, or values without a column).
|
|
1952
|
+
"""
|
|
1953
|
+
if (where_column is None) != (where_values is None):
|
|
1954
|
+
raise ValueError(
|
|
1955
|
+
"where_column and where_values must be given together "
|
|
1956
|
+
"(the column to match and the values to match it against)."
|
|
1957
|
+
)
|
|
1958
|
+
if where_column is not None:
|
|
1959
|
+
if where_operator not in ("eq", "neq"):
|
|
1960
|
+
raise ValueError(f"where_operator must be 'eq' or 'neq', got {where_operator!r}.")
|
|
1961
|
+
params["whereColumn"] = where_column
|
|
1962
|
+
params["whereOperator"] = where_operator
|
|
1963
|
+
params["whereValues[]"] = where_values
|
|
1964
|
+
if changed_since is not None:
|
|
1965
|
+
params["changedSince"] = changed_since
|
|
1966
|
+
if changed_until is not None:
|
|
1967
|
+
params["changedUntil"] = changed_until
|
|
1968
|
+
|
|
1969
|
+
def get_table_data_preview(
|
|
1970
|
+
self,
|
|
1971
|
+
table_id: str,
|
|
1972
|
+
limit: int = 100,
|
|
1973
|
+
columns: list[str] | None = None,
|
|
1974
|
+
*,
|
|
1975
|
+
where_column: str | None = None,
|
|
1976
|
+
where_operator: str = "eq",
|
|
1977
|
+
where_values: list[str] | None = None,
|
|
1978
|
+
changed_since: str | None = None,
|
|
1979
|
+
changed_until: str | None = None,
|
|
1980
|
+
) -> str:
|
|
1981
|
+
"""Get a CSV preview of table data.
|
|
1982
|
+
|
|
1983
|
+
Args:
|
|
1984
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
1985
|
+
limit: Max number of rows to return.
|
|
1986
|
+
columns: Optional list of column names to export.
|
|
1987
|
+
Storage API limits sync export to 30 columns max.
|
|
1988
|
+
where_column: Filter to rows where this column matches ``where_values``.
|
|
1989
|
+
where_operator: ``"eq"`` (default) or ``"neq"``.
|
|
1990
|
+
where_values: Values for the ``where_column`` filter.
|
|
1991
|
+
changed_since: Only rows imported since this time (unix ts / strtotime).
|
|
1992
|
+
changed_until: Only rows imported up to this time.
|
|
1993
|
+
|
|
1994
|
+
Returns:
|
|
1995
|
+
CSV string with table data preview.
|
|
1996
|
+
"""
|
|
1997
|
+
safe_id = quote(table_id, safe="")
|
|
1998
|
+
params: dict[str, Any] = {"limit": limit}
|
|
1999
|
+
if columns:
|
|
2000
|
+
params["columns"] = ",".join(columns)
|
|
2001
|
+
self._apply_table_filters(
|
|
2002
|
+
params,
|
|
2003
|
+
where_column=where_column,
|
|
2004
|
+
where_operator=where_operator,
|
|
2005
|
+
where_values=where_values,
|
|
2006
|
+
changed_since=changed_since,
|
|
2007
|
+
changed_until=changed_until,
|
|
2008
|
+
)
|
|
2009
|
+
response = self._request(
|
|
2010
|
+
"GET",
|
|
2011
|
+
f"/v2/storage/tables/{safe_id}/data-preview",
|
|
2012
|
+
params=params,
|
|
2013
|
+
)
|
|
2014
|
+
return response.text
|
|
2015
|
+
|
|
2016
|
+
def export_table_async(
|
|
2017
|
+
self,
|
|
2018
|
+
table_id: str,
|
|
2019
|
+
columns: list[str] | None = None,
|
|
2020
|
+
limit: int | None = None,
|
|
2021
|
+
branch_id: int | None = None,
|
|
2022
|
+
file_type: str = "csv",
|
|
2023
|
+
*,
|
|
2024
|
+
where_column: str | None = None,
|
|
2025
|
+
where_operator: str = "eq",
|
|
2026
|
+
where_values: list[str] | None = None,
|
|
2027
|
+
changed_since: str | None = None,
|
|
2028
|
+
changed_until: str | None = None,
|
|
2029
|
+
) -> dict[str, Any]:
|
|
2030
|
+
"""Start an async table export and wait for completion.
|
|
2031
|
+
|
|
2032
|
+
Args:
|
|
2033
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
2034
|
+
columns: Optional list of column names to export.
|
|
2035
|
+
limit: Optional max number of rows to export.
|
|
2036
|
+
branch_id: If set, target a specific dev branch.
|
|
2037
|
+
file_type: Output format, either "csv" (default) or "parquet".
|
|
2038
|
+
Parquet exports are always sliced and Snappy-compressed inside
|
|
2039
|
+
the parquet format (not gzipped at the slice level).
|
|
2040
|
+
where_column: Filter to rows where this column matches ``where_values``.
|
|
2041
|
+
where_operator: ``"eq"`` (default) or ``"neq"``.
|
|
2042
|
+
where_values: Values for the ``where_column`` filter.
|
|
2043
|
+
changed_since: Only rows imported since this time (unix ts / strtotime).
|
|
2044
|
+
changed_until: Only rows imported up to this time.
|
|
2045
|
+
|
|
2046
|
+
Returns:
|
|
2047
|
+
Completed export job dict (results contain file info).
|
|
2048
|
+
"""
|
|
2049
|
+
if file_type not in ("csv", "parquet"):
|
|
2050
|
+
raise ValueError(f"file_type must be 'csv' or 'parquet', got {file_type!r}")
|
|
2051
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2052
|
+
safe_id = quote(table_id, safe="")
|
|
2053
|
+
params: dict[str, Any] = {"fileType": file_type}
|
|
2054
|
+
if columns:
|
|
2055
|
+
params["columns"] = ",".join(columns)
|
|
2056
|
+
if limit is not None:
|
|
2057
|
+
params["limit"] = str(limit)
|
|
2058
|
+
self._apply_table_filters(
|
|
2059
|
+
params,
|
|
2060
|
+
where_column=where_column,
|
|
2061
|
+
where_operator=where_operator,
|
|
2062
|
+
where_values=where_values,
|
|
2063
|
+
changed_since=changed_since,
|
|
2064
|
+
changed_until=changed_until,
|
|
2065
|
+
)
|
|
2066
|
+
response = self._request(
|
|
2067
|
+
"POST",
|
|
2068
|
+
f"{prefix}/tables/{safe_id}/export-async",
|
|
2069
|
+
data=params,
|
|
2070
|
+
)
|
|
2071
|
+
return self._wait_for_storage_job(response.json(), max_wait=EXPORT_JOB_MAX_WAIT)
|
|
2072
|
+
|
|
2073
|
+
def add_column(
|
|
2074
|
+
self,
|
|
2075
|
+
table_id: str,
|
|
2076
|
+
name: str,
|
|
2077
|
+
definition: dict[str, Any] | None = None,
|
|
2078
|
+
branch_id: int | None = None,
|
|
2079
|
+
) -> dict[str, Any]:
|
|
2080
|
+
"""Add a single column to an existing table (synchronous).
|
|
2081
|
+
|
|
2082
|
+
Unlike ``delete_column`` (async storage job), the Storage API
|
|
2083
|
+
``POST /tables/{id}/columns`` endpoint is synchronous and returns the
|
|
2084
|
+
updated table resource directly -- there is no job to poll.
|
|
2085
|
+
|
|
2086
|
+
Args:
|
|
2087
|
+
table_id: Full table ID (e.g. "in.c-bucket.table").
|
|
2088
|
+
name: Name of the new column.
|
|
2089
|
+
definition: Optional typed-column definition for a typed table, e.g.
|
|
2090
|
+
``{"type": "NUMBER", "length": "18,2", "nullable": False,
|
|
2091
|
+
"default": "0"}``. Omit for an untyped column.
|
|
2092
|
+
branch_id: If set, target a specific dev branch.
|
|
2093
|
+
|
|
2094
|
+
Returns:
|
|
2095
|
+
The updated table resource dict from the API.
|
|
2096
|
+
"""
|
|
2097
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2098
|
+
safe_id = quote(table_id, safe="")
|
|
2099
|
+
body: dict[str, Any] = {"name": name}
|
|
2100
|
+
if definition:
|
|
2101
|
+
body["definition"] = definition
|
|
2102
|
+
response = self._request("POST", f"{prefix}/tables/{safe_id}/columns", json=body)
|
|
2103
|
+
return response.json()
|
|
2104
|
+
|
|
2105
|
+
def get_file_info(self, file_id: int, branch_id: int | None = None) -> dict[str, Any]:
|
|
2106
|
+
"""Get file metadata including download URL.
|
|
2107
|
+
|
|
2108
|
+
Args:
|
|
2109
|
+
file_id: Storage file ID (from export job results).
|
|
2110
|
+
branch_id: If set, query file from a specific dev branch scope.
|
|
2111
|
+
|
|
2112
|
+
Returns:
|
|
2113
|
+
File resource dict with 'url', 'isSliced', 'sizeBytes', etc.
|
|
2114
|
+
"""
|
|
2115
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2116
|
+
response = self._request(
|
|
2117
|
+
"GET",
|
|
2118
|
+
f"{prefix}/files/{file_id}",
|
|
2119
|
+
params={"federationToken": "1"},
|
|
2120
|
+
)
|
|
2121
|
+
return response.json()
|
|
2122
|
+
|
|
2123
|
+
def list_files(
|
|
2124
|
+
self,
|
|
2125
|
+
limit: int = 20,
|
|
2126
|
+
offset: int = 0,
|
|
2127
|
+
tags: list[str] | None = None,
|
|
2128
|
+
since_id: int | None = None,
|
|
2129
|
+
query: str | None = None,
|
|
2130
|
+
branch_id: int | None = None,
|
|
2131
|
+
) -> list[dict[str, Any]]:
|
|
2132
|
+
"""List Storage Files with optional filtering.
|
|
2133
|
+
|
|
2134
|
+
Args:
|
|
2135
|
+
limit: Max number of files to return.
|
|
2136
|
+
offset: Pagination offset.
|
|
2137
|
+
tags: Filter by tags (AND logic — all tags must match).
|
|
2138
|
+
since_id: Return only files with ID greater than this.
|
|
2139
|
+
query: Full-text search query on file name.
|
|
2140
|
+
branch_id: If set, list files from a specific dev branch.
|
|
2141
|
+
|
|
2142
|
+
Returns:
|
|
2143
|
+
List of file resource dicts.
|
|
2144
|
+
"""
|
|
2145
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2146
|
+
params: dict[str, Any] = {"limit": limit, "offset": offset}
|
|
2147
|
+
if tags:
|
|
2148
|
+
for i, tag in enumerate(tags):
|
|
2149
|
+
params[f"tags[{i}]"] = tag
|
|
2150
|
+
if since_id is not None:
|
|
2151
|
+
params["sinceId"] = since_id
|
|
2152
|
+
if query:
|
|
2153
|
+
params["q"] = query
|
|
2154
|
+
response = self._request("GET", f"{prefix}/files", params=params)
|
|
2155
|
+
return response.json()
|
|
2156
|
+
|
|
2157
|
+
def upload_file(
|
|
2158
|
+
self,
|
|
2159
|
+
file_path: str,
|
|
2160
|
+
name: str | None = None,
|
|
2161
|
+
tags: list[str] | None = None,
|
|
2162
|
+
is_permanent: bool = False,
|
|
2163
|
+
notify: bool = False,
|
|
2164
|
+
branch_id: int | None = None,
|
|
2165
|
+
) -> dict[str, Any]:
|
|
2166
|
+
"""Upload a local file to Storage Files.
|
|
2167
|
+
|
|
2168
|
+
Wraps prepare_file_upload + _upload_to_cloud into a single call.
|
|
2169
|
+
|
|
2170
|
+
Args:
|
|
2171
|
+
file_path: Local path to the file to upload.
|
|
2172
|
+
name: Custom filename (defaults to local file basename).
|
|
2173
|
+
tags: Optional list of tags to assign.
|
|
2174
|
+
is_permanent: If True, file is not auto-deleted after 15 days.
|
|
2175
|
+
notify: If True, send notification on upload completion.
|
|
2176
|
+
branch_id: If set, upload to a specific dev branch.
|
|
2177
|
+
|
|
2178
|
+
Returns:
|
|
2179
|
+
File resource dict with id, name, sizeBytes, tags, url.
|
|
2180
|
+
"""
|
|
2181
|
+
p = Path(file_path)
|
|
2182
|
+
size_bytes = p.stat().st_size
|
|
2183
|
+
file_name = name or p.name
|
|
2184
|
+
upload_info = self.prepare_file_upload(
|
|
2185
|
+
name=file_name,
|
|
2186
|
+
size_bytes=size_bytes,
|
|
2187
|
+
tags=tags,
|
|
2188
|
+
is_permanent=is_permanent,
|
|
2189
|
+
notify=notify,
|
|
2190
|
+
)
|
|
2191
|
+
self._upload_to_cloud(upload_info, file_path)
|
|
2192
|
+
# Return file info (prepare response has the file metadata)
|
|
2193
|
+
return {
|
|
2194
|
+
"id": upload_info["id"],
|
|
2195
|
+
"name": upload_info.get("name", file_name),
|
|
2196
|
+
"sizeBytes": size_bytes,
|
|
2197
|
+
"tags": upload_info.get("tags", tags or []),
|
|
2198
|
+
"isPermanent": upload_info.get("isPermanent", is_permanent),
|
|
2199
|
+
"created": upload_info.get("created"),
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
def delete_file(self, file_id: int, branch_id: int | None = None) -> None:
|
|
2203
|
+
"""Delete a Storage File.
|
|
2204
|
+
|
|
2205
|
+
Args:
|
|
2206
|
+
file_id: Storage file ID.
|
|
2207
|
+
branch_id: If set, target a file in a specific dev branch scope.
|
|
2208
|
+
"""
|
|
2209
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2210
|
+
self._request("DELETE", f"{prefix}/files/{file_id}")
|
|
2211
|
+
|
|
2212
|
+
def tag_file(self, file_id: int, tag: str, branch_id: int | None = None) -> None:
|
|
2213
|
+
"""Add a tag to a Storage File.
|
|
2214
|
+
|
|
2215
|
+
Args:
|
|
2216
|
+
file_id: Storage file ID.
|
|
2217
|
+
tag: Tag string to add.
|
|
2218
|
+
branch_id: If set, target a file in a specific dev branch scope.
|
|
2219
|
+
"""
|
|
2220
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2221
|
+
self._request("POST", f"{prefix}/files/{file_id}/tags", data={"tag": tag})
|
|
2222
|
+
|
|
2223
|
+
def untag_file(self, file_id: int, tag: str, branch_id: int | None = None) -> None:
|
|
2224
|
+
"""Remove a tag from a Storage File.
|
|
2225
|
+
|
|
2226
|
+
Args:
|
|
2227
|
+
file_id: Storage file ID.
|
|
2228
|
+
tag: Tag string to remove.
|
|
2229
|
+
branch_id: If set, target a file in a specific dev branch scope.
|
|
2230
|
+
"""
|
|
2231
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2232
|
+
safe_tag = quote(tag, safe="")
|
|
2233
|
+
self._request("DELETE", f"{prefix}/files/{file_id}/tags/{safe_tag}")
|
|
2234
|
+
|
|
2235
|
+
def download_sliced_file(self, file_detail: dict[str, Any], output_path: str) -> int:
|
|
2236
|
+
"""Download a sliced file by fetching manifest and concatenating slices.
|
|
2237
|
+
|
|
2238
|
+
Handles S3 (SigV4 auth) and GCS (bearer token) providers.
|
|
2239
|
+
Decompresses gzipped slices transparently.
|
|
2240
|
+
|
|
2241
|
+
Streams each slice chunk-by-chunk into a temp file and concatenates
|
|
2242
|
+
into ``output_path``. Peak RAM is O(chunk size), not O(slice size) —
|
|
2243
|
+
required for multi-GB tables on memory-constrained hosts (issue #187).
|
|
2244
|
+
|
|
2245
|
+
The manifest `url` from file info is already a presigned URL (download
|
|
2246
|
+
directly). Manifest entries have cloud-native URLs (s3://, gs://) that
|
|
2247
|
+
need auth — we build HTTPS URLs from the s3Path/gcsPath credentials.
|
|
2248
|
+
|
|
2249
|
+
Args:
|
|
2250
|
+
file_detail: Full file info dict from get_file_info()
|
|
2251
|
+
(must include provider credentials from federationToken=1).
|
|
2252
|
+
output_path: Local file path to write to.
|
|
2253
|
+
|
|
2254
|
+
Returns:
|
|
2255
|
+
Number of bytes written.
|
|
2256
|
+
"""
|
|
2257
|
+
import shutil
|
|
2258
|
+
import tempfile
|
|
2259
|
+
|
|
2260
|
+
entries, base_url, downloader, _manifest_data = self._prepare_sliced_download(file_detail)
|
|
2261
|
+
|
|
2262
|
+
# Stream each slice into a temp file, then copy-append into output.
|
|
2263
|
+
# Keeping per-slice temp files on disk (not in RAM) is the whole point.
|
|
2264
|
+
total = 0
|
|
2265
|
+
out_path = Path(output_path)
|
|
2266
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
2267
|
+
with out_path.open("wb") as out_fh:
|
|
2268
|
+
for entry in entries:
|
|
2269
|
+
entry_url = entry.get("url", "")
|
|
2270
|
+
slice_url = downloader.resolve_slice_url(base_url, entry_url, file_detail)
|
|
2271
|
+
is_gz = entry_url.split("?")[0].endswith(".gz")
|
|
2272
|
+
with tempfile.NamedTemporaryFile(
|
|
2273
|
+
dir=out_path.parent, prefix=".slice-", delete=True
|
|
2274
|
+
) as tmp:
|
|
2275
|
+
downloader.stream_to_file(slice_url, tmp.name, decompress_gzip=is_gz)
|
|
2276
|
+
tmp.seek(0)
|
|
2277
|
+
shutil.copyfileobj(tmp, out_fh, length=FILE_DOWNLOAD_CHUNK_SIZE)
|
|
2278
|
+
total += Path(tmp.name).stat().st_size
|
|
2279
|
+
|
|
2280
|
+
return total
|
|
2281
|
+
|
|
2282
|
+
def _prepare_sliced_download(
|
|
2283
|
+
self, file_detail: dict[str, Any]
|
|
2284
|
+
) -> tuple[list[dict[str, Any]], str, "_CloudDownloader", bytes]:
|
|
2285
|
+
"""Fetch and parse the manifest, returning entries + download context.
|
|
2286
|
+
|
|
2287
|
+
The manifest is small JSON (few KB even for TB tables), so loading it
|
|
2288
|
+
fully is fine. Entries are the per-slice URLs that callers iterate.
|
|
2289
|
+
|
|
2290
|
+
Returns a 4-tuple: (entries, base_url, downloader, raw_manifest_bytes).
|
|
2291
|
+
The raw manifest is useful for callers that persist it next to slices.
|
|
2292
|
+
"""
|
|
2293
|
+
import json as json_mod
|
|
2294
|
+
|
|
2295
|
+
provider = file_detail.get("provider", "")
|
|
2296
|
+
downloader = _CloudDownloader.create(file_detail)
|
|
2297
|
+
|
|
2298
|
+
with httpx.Client(timeout=FILE_DOWNLOAD_TIMEOUT) as http:
|
|
2299
|
+
resp = http.get(file_detail["url"])
|
|
2300
|
+
resp.raise_for_status()
|
|
2301
|
+
manifest_data = resp.content
|
|
2302
|
+
|
|
2303
|
+
manifest = json_mod.loads(manifest_data)
|
|
2304
|
+
entries = manifest.get("entries", [])
|
|
2305
|
+
if not entries:
|
|
2306
|
+
raise KeboolaApiError(
|
|
2307
|
+
message="Sliced file manifest has no entries",
|
|
2308
|
+
status_code=500,
|
|
2309
|
+
error_code=ErrorCode.EXPORT_EMPTY_MANIFEST,
|
|
2310
|
+
retryable=False,
|
|
2311
|
+
)
|
|
2312
|
+
|
|
2313
|
+
logger.info("Downloading %d slices (provider=%s)", len(entries), provider)
|
|
2314
|
+
base_url = downloader.resolve_base_url(file_detail)
|
|
2315
|
+
return entries, base_url, downloader, manifest_data
|
|
2316
|
+
|
|
2317
|
+
def download_sliced_file_to_dir(
|
|
2318
|
+
self, file_detail: dict[str, Any], output_dir: str
|
|
2319
|
+
) -> dict[str, Any]:
|
|
2320
|
+
"""Download a sliced file preserving each slice as a separate local file.
|
|
2321
|
+
|
|
2322
|
+
Unlike download_sliced_file() which binary-concatenates slices, this
|
|
2323
|
+
writes every manifest entry into its own file under ``output_dir``.
|
|
2324
|
+
Required for formats like Parquet where each slice is a self-contained
|
|
2325
|
+
file with its own footer and cannot be safely concatenated.
|
|
2326
|
+
|
|
2327
|
+
The original manifest is also written to ``output_dir/_manifest.json``
|
|
2328
|
+
so the slice set stays self-describing. The leading underscore follows
|
|
2329
|
+
the Hive/Spark/pyarrow convention that makes Parquet readers skip the
|
|
2330
|
+
file when scanning the directory as a dataset.
|
|
2331
|
+
|
|
2332
|
+
Gzip-compressed slices (typical for CSV) are decompressed transparently
|
|
2333
|
+
and the ``.gz`` suffix is stripped from the written filename. Parquet
|
|
2334
|
+
slices are written as-is (Snappy compression lives inside the format).
|
|
2335
|
+
|
|
2336
|
+
Args:
|
|
2337
|
+
file_detail: Full file info dict from get_file_info() with
|
|
2338
|
+
federationToken=1 provider credentials.
|
|
2339
|
+
output_dir: Directory to write slices into. Created if missing.
|
|
2340
|
+
|
|
2341
|
+
Returns:
|
|
2342
|
+
Dict with ``output_dir``, ``slice_count``, ``total_bytes``, and
|
|
2343
|
+
``slices`` (list of ``{path, size_bytes}``).
|
|
2344
|
+
"""
|
|
2345
|
+
out = Path(output_dir)
|
|
2346
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
2347
|
+
|
|
2348
|
+
entries, base_url, downloader, manifest_data = self._prepare_sliced_download(file_detail)
|
|
2349
|
+
|
|
2350
|
+
# Persist the manifest alongside slices for traceability.
|
|
2351
|
+
(out / "_manifest.json").write_bytes(manifest_data)
|
|
2352
|
+
|
|
2353
|
+
slices: list[dict[str, Any]] = []
|
|
2354
|
+
total = 0
|
|
2355
|
+
|
|
2356
|
+
for idx, entry in enumerate(entries):
|
|
2357
|
+
entry_url = entry.get("url", "")
|
|
2358
|
+
slice_url = downloader.resolve_slice_url(base_url, entry_url, file_detail)
|
|
2359
|
+
|
|
2360
|
+
clean_url = entry_url.split("?")[0]
|
|
2361
|
+
basename = clean_url.rsplit("/", 1)[-1]
|
|
2362
|
+
is_gz = clean_url.endswith(".gz")
|
|
2363
|
+
if is_gz:
|
|
2364
|
+
basename = basename.removesuffix(".gz")
|
|
2365
|
+
if not basename:
|
|
2366
|
+
basename = f"part-{idx:05d}"
|
|
2367
|
+
|
|
2368
|
+
slice_path = out / basename
|
|
2369
|
+
written = downloader.stream_to_file(slice_url, slice_path, decompress_gzip=is_gz)
|
|
2370
|
+
slices.append({"path": str(slice_path.resolve()), "size_bytes": written})
|
|
2371
|
+
total += written
|
|
2372
|
+
|
|
2373
|
+
return {
|
|
2374
|
+
"output_dir": str(out.resolve()),
|
|
2375
|
+
"slice_count": len(slices),
|
|
2376
|
+
"total_bytes": total,
|
|
2377
|
+
"slices": slices,
|
|
2378
|
+
}
|
|
2379
|
+
|
|
2380
|
+
def download_file(self, url: str, output_path: str) -> int:
|
|
2381
|
+
"""Download a non-sliced file from a presigned URL.
|
|
2382
|
+
|
|
2383
|
+
Streams the body chunk-by-chunk and decompresses gzip on the fly, so
|
|
2384
|
+
peak RAM stays at O(chunk size) even for multi-GB payloads (issue #187).
|
|
2385
|
+
|
|
2386
|
+
Args:
|
|
2387
|
+
url: Presigned download URL from file info.
|
|
2388
|
+
output_path: Local file path to write to.
|
|
2389
|
+
|
|
2390
|
+
Returns:
|
|
2391
|
+
Number of bytes written (post-decompression if the URL is gzipped).
|
|
2392
|
+
"""
|
|
2393
|
+
import gzip
|
|
2394
|
+
import shutil
|
|
2395
|
+
|
|
2396
|
+
out_path = Path(output_path)
|
|
2397
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
2398
|
+
is_gzipped = url.rstrip("?").split("?")[0].endswith(".gz")
|
|
2399
|
+
|
|
2400
|
+
with (
|
|
2401
|
+
httpx.Client(timeout=FILE_DOWNLOAD_TIMEOUT) as http,
|
|
2402
|
+
http.stream("GET", url) as response,
|
|
2403
|
+
):
|
|
2404
|
+
response.raise_for_status()
|
|
2405
|
+
source: Any = _IterBytesReader(response.iter_bytes(FILE_DOWNLOAD_CHUNK_SIZE))
|
|
2406
|
+
if is_gzipped:
|
|
2407
|
+
source = gzip.GzipFile(fileobj=source, mode="rb")
|
|
2408
|
+
with out_path.open("wb") as fh:
|
|
2409
|
+
shutil.copyfileobj(source, fh, length=FILE_DOWNLOAD_CHUNK_SIZE)
|
|
2410
|
+
|
|
2411
|
+
return out_path.stat().st_size
|
|
2412
|
+
|
|
2413
|
+
def list_jobs(
|
|
2414
|
+
self,
|
|
2415
|
+
component_id: str | None = None,
|
|
2416
|
+
config_id: str | None = None,
|
|
2417
|
+
status: str | None = None,
|
|
2418
|
+
limit: int = DEFAULT_JOB_LIMIT,
|
|
2419
|
+
offset: int = 0,
|
|
2420
|
+
) -> list[dict[str, Any]]:
|
|
2421
|
+
"""List jobs from the Queue API.
|
|
2422
|
+
|
|
2423
|
+
Args:
|
|
2424
|
+
component_id: Optional filter by component ID.
|
|
2425
|
+
config_id: Optional filter by config ID (requires component_id).
|
|
2426
|
+
status: Optional filter by job status.
|
|
2427
|
+
limit: Max number of jobs to return (1-500).
|
|
2428
|
+
offset: Offset for pagination.
|
|
2429
|
+
|
|
2430
|
+
Returns:
|
|
2431
|
+
List of job dicts from the Queue API.
|
|
2432
|
+
"""
|
|
2433
|
+
params: dict[str, str | int] = {"limit": limit, "offset": offset}
|
|
2434
|
+
if component_id:
|
|
2435
|
+
params["component"] = component_id
|
|
2436
|
+
if config_id:
|
|
2437
|
+
params["config"] = config_id
|
|
2438
|
+
if status:
|
|
2439
|
+
params["status"] = status
|
|
2440
|
+
|
|
2441
|
+
response = self._queue_request("GET", "/search/jobs", params=params)
|
|
2442
|
+
return response.json()
|
|
2443
|
+
|
|
2444
|
+
def list_jobs_grouped(
|
|
2445
|
+
self,
|
|
2446
|
+
jobs_per_group: int = DEFAULT_JOBS_PER_CONFIG,
|
|
2447
|
+
limit: int = DEFAULT_GROUPED_JOBS_LIMIT,
|
|
2448
|
+
sort_by: str = "startTime",
|
|
2449
|
+
sort_order: str = "desc",
|
|
2450
|
+
created_time_from: str | None = None,
|
|
2451
|
+
) -> list[dict[str, Any]]:
|
|
2452
|
+
"""List jobs grouped by component+config from the Queue API.
|
|
2453
|
+
|
|
2454
|
+
Uses GET /search/grouped-jobs to fetch the latest N jobs for each
|
|
2455
|
+
unique component+config combination in a single API call.
|
|
2456
|
+
|
|
2457
|
+
Args:
|
|
2458
|
+
jobs_per_group: Max jobs per component+config group (1-500).
|
|
2459
|
+
limit: Max number of groups to return (1-500).
|
|
2460
|
+
sort_by: Sort field for jobs within each group.
|
|
2461
|
+
sort_order: Sort direction ("asc" or "desc").
|
|
2462
|
+
created_time_from: Optional ISO datetime filter (e.g. "2026-03-20T00:00:00Z").
|
|
2463
|
+
|
|
2464
|
+
Returns:
|
|
2465
|
+
List of group dicts: [{"group": {"componentId": ..., "configId": ...}, "jobs": [...]}]
|
|
2466
|
+
"""
|
|
2467
|
+
params: list[tuple[str, str]] = [
|
|
2468
|
+
("groupBy[]", "componentId"),
|
|
2469
|
+
("groupBy[]", "configId"),
|
|
2470
|
+
("jobsPerGroup", str(jobs_per_group)),
|
|
2471
|
+
("limit", str(limit)),
|
|
2472
|
+
("sortBy", sort_by),
|
|
2473
|
+
("sortOrder", sort_order),
|
|
2474
|
+
]
|
|
2475
|
+
if created_time_from:
|
|
2476
|
+
params.append(("filters[createdTimeFrom]", created_time_from))
|
|
2477
|
+
|
|
2478
|
+
response = self._queue_request("GET", "/search/grouped-jobs", params=params)
|
|
2479
|
+
return response.json()
|
|
2480
|
+
|
|
2481
|
+
def get_job_detail(self, job_id: str) -> dict[str, Any]:
|
|
2482
|
+
"""Get detailed information about a specific job from the Queue API.
|
|
2483
|
+
|
|
2484
|
+
Args:
|
|
2485
|
+
job_id: The job ID.
|
|
2486
|
+
|
|
2487
|
+
Returns:
|
|
2488
|
+
Job detail dict from the Queue API.
|
|
2489
|
+
"""
|
|
2490
|
+
safe_job_id = quote(job_id, safe="")
|
|
2491
|
+
response = self._queue_request("GET", f"/jobs/{safe_job_id}")
|
|
2492
|
+
return response.json()
|
|
2493
|
+
|
|
2494
|
+
# --- Queue Job Creation ---
|
|
2495
|
+
|
|
2496
|
+
def create_job(
|
|
2497
|
+
self,
|
|
2498
|
+
component_id: str,
|
|
2499
|
+
config_id: str,
|
|
2500
|
+
config_data: dict[str, Any] | None = None,
|
|
2501
|
+
config_row_ids: list[str] | None = None,
|
|
2502
|
+
mode: str = "run",
|
|
2503
|
+
branch_id: int | None = None,
|
|
2504
|
+
variable_values_id: str | None = None,
|
|
2505
|
+
) -> dict[str, Any]:
|
|
2506
|
+
"""Create and run a Queue API job.
|
|
2507
|
+
|
|
2508
|
+
Args:
|
|
2509
|
+
component_id: Component ID (e.g. keboola.sandboxes).
|
|
2510
|
+
config_id: Configuration ID.
|
|
2511
|
+
config_data: Optional runtime config data override.
|
|
2512
|
+
config_row_ids: Optional list of config row IDs to run
|
|
2513
|
+
(omit to run entire config).
|
|
2514
|
+
mode: Job mode (default: run).
|
|
2515
|
+
branch_id: Optional dev branch ID. When set, the job runs
|
|
2516
|
+
on that branch instead of the default (production) branch.
|
|
2517
|
+
variable_values_id: Optional id of a row in the linked
|
|
2518
|
+
``keboola.variables`` config. When set, the Queue API binds
|
|
2519
|
+
the row's values to the job's `{{ variable }}` placeholders.
|
|
2520
|
+
Omit for configurations that have no linked variables.
|
|
2521
|
+
|
|
2522
|
+
Returns:
|
|
2523
|
+
Job dict from the Queue API.
|
|
2524
|
+
"""
|
|
2525
|
+
body: dict[str, Any] = {
|
|
2526
|
+
"component": component_id,
|
|
2527
|
+
"config": config_id,
|
|
2528
|
+
"mode": mode,
|
|
2529
|
+
}
|
|
2530
|
+
if branch_id is not None:
|
|
2531
|
+
body["branchId"] = str(branch_id)
|
|
2532
|
+
if config_data:
|
|
2533
|
+
body["configData"] = config_data
|
|
2534
|
+
if config_row_ids:
|
|
2535
|
+
body["configRowIds"] = config_row_ids
|
|
2536
|
+
if variable_values_id:
|
|
2537
|
+
body["variableValuesId"] = variable_values_id
|
|
2538
|
+
response = self._queue_request("POST", "/jobs", json=body)
|
|
2539
|
+
return response.json()
|
|
2540
|
+
|
|
2541
|
+
def kill_job(self, job_id: str) -> dict[str, Any]:
|
|
2542
|
+
"""Request termination of a running Queue API job.
|
|
2543
|
+
|
|
2544
|
+
Sets the job's desiredStatus to "terminating"; the executor transitions
|
|
2545
|
+
the actual status asynchronously (waiting -> cancelled, processing ->
|
|
2546
|
+
terminating -> terminated). Poll get_job_detail until isFinished=True
|
|
2547
|
+
to observe the terminal state.
|
|
2548
|
+
|
|
2549
|
+
Killable states per Queue API: created, waiting, processing. Calling
|
|
2550
|
+
kill on any other state returns HTTP 400 with a "not in one of killable
|
|
2551
|
+
states" message; callers that want idempotent behavior (e.g. bulk
|
|
2552
|
+
terminate after list_jobs under race conditions) should translate that
|
|
2553
|
+
into a no-op success at the service layer.
|
|
2554
|
+
"""
|
|
2555
|
+
safe_job_id = quote(job_id, safe="")
|
|
2556
|
+
response = self._queue_request("POST", f"/jobs/{safe_job_id}/kill")
|
|
2557
|
+
return response.json()
|
|
2558
|
+
|
|
2559
|
+
def fetch_job_events(self, run_id: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
2560
|
+
"""Fetch events emitted during a job's run.
|
|
2561
|
+
|
|
2562
|
+
Wraps the Storage API's ``GET /v2/storage/events?runId={runId}``
|
|
2563
|
+
endpoint -- NOT a Queue API path. Queue jobs (Queue API v2) expose a
|
|
2564
|
+
``runId`` on the job dict (typically equal to the job ``id``); the
|
|
2565
|
+
Storage Events API is the canonical event feed for the job. Returns
|
|
2566
|
+
the list in Storage API order (newest -> oldest; callers that want
|
|
2567
|
+
a chronological "tail" should reverse the slice).
|
|
2568
|
+
|
|
2569
|
+
Args:
|
|
2570
|
+
run_id: The job's ``runId`` (``job["runId"]``; falls back to
|
|
2571
|
+
``job["id"]`` on legacy records where they match).
|
|
2572
|
+
limit: Optional server-side event cap. Storage API default is
|
|
2573
|
+
about 100; pass an explicit value to cover long runs.
|
|
2574
|
+
|
|
2575
|
+
Returns:
|
|
2576
|
+
List of event dicts. Each event typically has ``uuid``,
|
|
2577
|
+
``event``, ``component``, ``message``, ``type``, ``created``,
|
|
2578
|
+
``runId``, ``configurationId`` keys. Empty when the run emitted
|
|
2579
|
+
no events yet.
|
|
2580
|
+
"""
|
|
2581
|
+
params: dict[str, Any] = {"runId": run_id}
|
|
2582
|
+
if limit is not None and limit > 0:
|
|
2583
|
+
params["limit"] = limit
|
|
2584
|
+
response = self._request("GET", "/v2/storage/events", params=params)
|
|
2585
|
+
payload = response.json()
|
|
2586
|
+
# Storage events returns a bare list. Tolerate a dict-wrapped
|
|
2587
|
+
# future shape defensively.
|
|
2588
|
+
if isinstance(payload, list):
|
|
2589
|
+
return payload
|
|
2590
|
+
if isinstance(payload, dict) and isinstance(payload.get("events"), list):
|
|
2591
|
+
return payload["events"]
|
|
2592
|
+
return []
|
|
2593
|
+
|
|
2594
|
+
def wait_for_queue_job(
|
|
2595
|
+
self,
|
|
2596
|
+
job_id: str,
|
|
2597
|
+
max_wait: float = STORAGE_JOB_MAX_WAIT,
|
|
2598
|
+
poll_strategy: str = DEFAULT_POLL_STRATEGY,
|
|
2599
|
+
) -> dict[str, Any]:
|
|
2600
|
+
"""Poll a Queue API job until it reaches a terminal state.
|
|
2601
|
+
|
|
2602
|
+
Uses the piecewise ``JOB_POLL_CURVE`` from constants for the
|
|
2603
|
+
``"exponential"`` strategy (2s x 30 -> 5s x 48 -> 15s forever) and
|
|
2604
|
+
the legacy fixed ``STORAGE_JOB_POLL_INTERVAL`` for ``"fixed"``. The
|
|
2605
|
+
curve matches the cadence used by FIIA and the official
|
|
2606
|
+
``keboola-as-code`` Go CLI.
|
|
2607
|
+
|
|
2608
|
+
Args:
|
|
2609
|
+
job_id: The Queue job ID.
|
|
2610
|
+
max_wait: Maximum seconds to wait (default: STORAGE_JOB_MAX_WAIT).
|
|
2611
|
+
poll_strategy: "exponential" (default) or "fixed". Any other
|
|
2612
|
+
value raises ValueError before the first network call.
|
|
2613
|
+
|
|
2614
|
+
Returns:
|
|
2615
|
+
Completed job dict.
|
|
2616
|
+
|
|
2617
|
+
Raises:
|
|
2618
|
+
ValueError: If poll_strategy is not one of VALID_POLL_STRATEGIES.
|
|
2619
|
+
KeboolaApiError: If the job fails (QUEUE_JOB_FAILED) or the
|
|
2620
|
+
deadline elapses before the job finishes (QUEUE_JOB_TIMEOUT).
|
|
2621
|
+
"""
|
|
2622
|
+
if poll_strategy not in VALID_POLL_STRATEGIES:
|
|
2623
|
+
# ValueError (not KeboolaApiError) because this is a programming
|
|
2624
|
+
# error -- the caller passed an invalid literal, not a bad API
|
|
2625
|
+
# response. JobService validates before reaching this layer, so
|
|
2626
|
+
# hitting this path from the CLI would be a bug in kbagent.
|
|
2627
|
+
raise ValueError(
|
|
2628
|
+
f"Invalid poll_strategy {poll_strategy!r}. "
|
|
2629
|
+
f"Expected one of: {sorted(VALID_POLL_STRATEGIES)}."
|
|
2630
|
+
)
|
|
2631
|
+
|
|
2632
|
+
deadline = time.monotonic() + max_wait
|
|
2633
|
+
for interval in _iter_poll_intervals(poll_strategy):
|
|
2634
|
+
job = self.get_job_detail(job_id)
|
|
2635
|
+
if job.get("isFinished"):
|
|
2636
|
+
if job.get("status") == "error":
|
|
2637
|
+
result = job.get("result", {})
|
|
2638
|
+
error_msg = (
|
|
2639
|
+
result.get("message", "Queue job failed")
|
|
2640
|
+
if isinstance(result, dict)
|
|
2641
|
+
else "Queue job failed"
|
|
2642
|
+
)
|
|
2643
|
+
raise KeboolaApiError(
|
|
2644
|
+
message=f"Queue job {job_id} failed: {error_msg}",
|
|
2645
|
+
status_code=500,
|
|
2646
|
+
error_code=ErrorCode.QUEUE_JOB_FAILED,
|
|
2647
|
+
retryable=False,
|
|
2648
|
+
)
|
|
2649
|
+
return job
|
|
2650
|
+
|
|
2651
|
+
# Cap the sleep so we never blow past the deadline by more than
|
|
2652
|
+
# one interval: trim to whatever time remains; if zero, break.
|
|
2653
|
+
remaining = deadline - time.monotonic()
|
|
2654
|
+
if remaining <= 0:
|
|
2655
|
+
break
|
|
2656
|
+
time.sleep(min(interval, remaining))
|
|
2657
|
+
|
|
2658
|
+
raise KeboolaApiError(
|
|
2659
|
+
message=f"Queue job {job_id} did not complete within {max_wait}s",
|
|
2660
|
+
status_code=504,
|
|
2661
|
+
error_code=ErrorCode.QUEUE_JOB_TIMEOUT,
|
|
2662
|
+
retryable=True,
|
|
2663
|
+
)
|
|
2664
|
+
|
|
2665
|
+
# --- Workspace CRUD ---
|
|
2666
|
+
|
|
2667
|
+
def list_workspaces(self, branch_id: int | None = None) -> list[dict[str, Any]]:
|
|
2668
|
+
"""List all workspaces in the project."""
|
|
2669
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2670
|
+
response = self._request("GET", f"{prefix}/workspaces")
|
|
2671
|
+
return response.json()
|
|
2672
|
+
|
|
2673
|
+
def get_workspace(self, workspace_id: int, branch_id: int | None = None) -> dict[str, Any]:
|
|
2674
|
+
"""Get workspace details (note: password is NOT included)."""
|
|
2675
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2676
|
+
response = self._request("GET", f"{prefix}/workspaces/{workspace_id}")
|
|
2677
|
+
return response.json()
|
|
2678
|
+
|
|
2679
|
+
def delete_workspace(self, workspace_id: int, branch_id: int | None = None) -> None:
|
|
2680
|
+
"""Delete a workspace (synchronous)."""
|
|
2681
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2682
|
+
self._request("DELETE", f"{prefix}/workspaces/{workspace_id}")
|
|
2683
|
+
|
|
2684
|
+
def reset_workspace_password(
|
|
2685
|
+
self, workspace_id: int, branch_id: int | None = None
|
|
2686
|
+
) -> dict[str, Any]:
|
|
2687
|
+
"""Reset workspace password. Returns new password."""
|
|
2688
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2689
|
+
response = self._request("POST", f"{prefix}/workspaces/{workspace_id}/password")
|
|
2690
|
+
return response.json()
|
|
2691
|
+
|
|
2692
|
+
def create_sandbox_config(
|
|
2693
|
+
self,
|
|
2694
|
+
name: str,
|
|
2695
|
+
description: str = "",
|
|
2696
|
+
backend_size: str = "small",
|
|
2697
|
+
branch_id: int | None = None,
|
|
2698
|
+
) -> dict[str, Any]:
|
|
2699
|
+
"""Create a keboola.sandboxes configuration.
|
|
2700
|
+
|
|
2701
|
+
This is needed to make workspaces visible in the Keboola UI.
|
|
2702
|
+
The UI only shows workspaces tied to a sandboxes config.
|
|
2703
|
+
|
|
2704
|
+
Args:
|
|
2705
|
+
name: Human-readable name for the workspace.
|
|
2706
|
+
description: Optional description.
|
|
2707
|
+
backend_size: Backend size (small, medium, large).
|
|
2708
|
+
branch_id: Branch ID. If provided, creates config in that branch.
|
|
2709
|
+
|
|
2710
|
+
Returns:
|
|
2711
|
+
Configuration dict with id, name, etc.
|
|
2712
|
+
"""
|
|
2713
|
+
config = {
|
|
2714
|
+
"parameters": {
|
|
2715
|
+
"runtime": {"shared": False},
|
|
2716
|
+
"storage": {"input": {"tables": []}, "output": {"tables": []}},
|
|
2717
|
+
"parameters": {"id": "", "blocks": []},
|
|
2718
|
+
"backendSize": backend_size,
|
|
2719
|
+
},
|
|
2720
|
+
"runtime": {"shared": False},
|
|
2721
|
+
}
|
|
2722
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2723
|
+
response = self._request(
|
|
2724
|
+
"POST",
|
|
2725
|
+
f"{prefix}/components/keboola.sandboxes/configs",
|
|
2726
|
+
data={
|
|
2727
|
+
"name": name,
|
|
2728
|
+
"description": description,
|
|
2729
|
+
"configuration": json.dumps(config),
|
|
2730
|
+
},
|
|
2731
|
+
)
|
|
2732
|
+
return response.json()
|
|
2733
|
+
|
|
2734
|
+
def delete_config(
|
|
2735
|
+
self, component_id: str, config_id: str, branch_id: int | None = None
|
|
2736
|
+
) -> None:
|
|
2737
|
+
"""Delete a component configuration.
|
|
2738
|
+
|
|
2739
|
+
Args:
|
|
2740
|
+
component_id: Component ID.
|
|
2741
|
+
config_id: Configuration ID.
|
|
2742
|
+
branch_id: Branch ID. If provided, deletes config in that branch.
|
|
2743
|
+
"""
|
|
2744
|
+
safe_component = quote(component_id, safe="")
|
|
2745
|
+
safe_config = quote(config_id, safe="")
|
|
2746
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2747
|
+
self._request(
|
|
2748
|
+
"DELETE",
|
|
2749
|
+
f"{prefix}/components/{safe_component}/configs/{safe_config}",
|
|
2750
|
+
)
|
|
2751
|
+
|
|
2752
|
+
def create_config_workspace(
|
|
2753
|
+
self,
|
|
2754
|
+
branch_id: int,
|
|
2755
|
+
component_id: str,
|
|
2756
|
+
config_id: str,
|
|
2757
|
+
backend: str = "snowflake",
|
|
2758
|
+
login_type: str | None = None,
|
|
2759
|
+
public_key: str | None = None,
|
|
2760
|
+
) -> dict[str, Any]:
|
|
2761
|
+
"""Create a workspace tied to a specific configuration.
|
|
2762
|
+
|
|
2763
|
+
Args:
|
|
2764
|
+
branch_id: Branch ID (use main branch ID for production).
|
|
2765
|
+
component_id: Component ID (e.g. keboola.snowflake-transformation).
|
|
2766
|
+
config_id: Configuration ID.
|
|
2767
|
+
backend: Workspace backend.
|
|
2768
|
+
login_type: Optional Storage API loginType. Omitted when None.
|
|
2769
|
+
public_key: Optional public key for key-pair workspaces. Omitted when None.
|
|
2770
|
+
|
|
2771
|
+
Returns:
|
|
2772
|
+
Workspace dict including connection credentials.
|
|
2773
|
+
"""
|
|
2774
|
+
safe_component = quote(component_id, safe="")
|
|
2775
|
+
safe_config = quote(config_id, safe="")
|
|
2776
|
+
payload: dict[str, Any] = {"backend": backend}
|
|
2777
|
+
if login_type is not None:
|
|
2778
|
+
payload["loginType"] = login_type
|
|
2779
|
+
if public_key is not None:
|
|
2780
|
+
payload["publicKey"] = public_key
|
|
2781
|
+
|
|
2782
|
+
response = self._request(
|
|
2783
|
+
"POST",
|
|
2784
|
+
f"/v2/storage/branch/{branch_id}/components/{safe_component}/configs/{safe_config}/workspaces",
|
|
2785
|
+
json=payload,
|
|
2786
|
+
)
|
|
2787
|
+
return response.json()
|
|
2788
|
+
|
|
2789
|
+
def list_config_workspaces(
|
|
2790
|
+
self,
|
|
2791
|
+
branch_id: int,
|
|
2792
|
+
component_id: str,
|
|
2793
|
+
config_id: str,
|
|
2794
|
+
) -> list[dict[str, Any]]:
|
|
2795
|
+
"""List workspaces tied to a specific configuration."""
|
|
2796
|
+
safe_component = quote(component_id, safe="")
|
|
2797
|
+
safe_config = quote(config_id, safe="")
|
|
2798
|
+
response = self._request(
|
|
2799
|
+
"GET",
|
|
2800
|
+
f"/v2/storage/branch/{branch_id}/components/{safe_component}/configs/{safe_config}/workspaces",
|
|
2801
|
+
)
|
|
2802
|
+
return response.json()
|
|
2803
|
+
|
|
2804
|
+
def load_workspace_tables(
|
|
2805
|
+
self,
|
|
2806
|
+
workspace_id: int,
|
|
2807
|
+
tables: list[dict[str, Any]],
|
|
2808
|
+
branch_id: int | None = None,
|
|
2809
|
+
preserve: bool = False,
|
|
2810
|
+
) -> dict[str, Any]:
|
|
2811
|
+
"""Load tables into a workspace (async operation).
|
|
2812
|
+
|
|
2813
|
+
Args:
|
|
2814
|
+
workspace_id: Target workspace ID.
|
|
2815
|
+
tables: List of table load definitions, each with at minimum:
|
|
2816
|
+
- source: table ID (e.g. "in.c-bucket.table")
|
|
2817
|
+
- destination: target table name in workspace
|
|
2818
|
+
branch_id: Branch ID. Required for workspaces on dev branches.
|
|
2819
|
+
preserve: If True, keep existing tables in the workspace. Default is False
|
|
2820
|
+
(workspace is cleared before loading).
|
|
2821
|
+
|
|
2822
|
+
Returns:
|
|
2823
|
+
Completed storage job dict (polls until done).
|
|
2824
|
+
|
|
2825
|
+
Raises:
|
|
2826
|
+
KeboolaApiError: If the load job fails or times out.
|
|
2827
|
+
"""
|
|
2828
|
+
prefix = f"/v2/storage/branch/{branch_id}" if branch_id else "/v2/storage"
|
|
2829
|
+
body: dict[str, Any] = {"input": tables, "preserve": preserve}
|
|
2830
|
+
response = self._request(
|
|
2831
|
+
"POST",
|
|
2832
|
+
f"{prefix}/workspaces/{workspace_id}/load",
|
|
2833
|
+
json=body,
|
|
2834
|
+
)
|
|
2835
|
+
return self._wait_for_storage_job(response.json())
|
|
2836
|
+
|
|
2837
|
+
# --- Query Service ---
|
|
2838
|
+
|
|
2839
|
+
def submit_query(
|
|
2840
|
+
self,
|
|
2841
|
+
branch_id: int,
|
|
2842
|
+
workspace_id: int,
|
|
2843
|
+
statements: list[str],
|
|
2844
|
+
transactional: bool = False,
|
|
2845
|
+
) -> dict[str, Any]:
|
|
2846
|
+
"""Submit SQL statements to the Query Service.
|
|
2847
|
+
|
|
2848
|
+
Args:
|
|
2849
|
+
branch_id: Branch ID.
|
|
2850
|
+
workspace_id: Workspace ID.
|
|
2851
|
+
statements: List of SQL statements to execute.
|
|
2852
|
+
transactional: Whether to wrap in a transaction.
|
|
2853
|
+
|
|
2854
|
+
Returns:
|
|
2855
|
+
Query job dict with id and status.
|
|
2856
|
+
"""
|
|
2857
|
+
body: dict[str, Any] = {
|
|
2858
|
+
"statements": statements,
|
|
2859
|
+
"transactional": transactional,
|
|
2860
|
+
}
|
|
2861
|
+
response = self._query_request(
|
|
2862
|
+
"POST",
|
|
2863
|
+
f"/api/v1/branches/{branch_id}/workspaces/{workspace_id}/queries",
|
|
2864
|
+
json=body,
|
|
2865
|
+
)
|
|
2866
|
+
return response.json()
|
|
2867
|
+
|
|
2868
|
+
def get_query_job(self, query_job_id: str) -> dict[str, Any]:
|
|
2869
|
+
"""Get query job status."""
|
|
2870
|
+
response = self._query_request("GET", f"/api/v1/queries/{query_job_id}")
|
|
2871
|
+
return response.json()
|
|
2872
|
+
|
|
2873
|
+
def export_query_results(
|
|
2874
|
+
self,
|
|
2875
|
+
query_job_id: str,
|
|
2876
|
+
statement_id: str,
|
|
2877
|
+
file_type: str = "csv",
|
|
2878
|
+
) -> str:
|
|
2879
|
+
"""Export query results as CSV (or other format).
|
|
2880
|
+
|
|
2881
|
+
Returns:
|
|
2882
|
+
Raw CSV string of query results.
|
|
2883
|
+
"""
|
|
2884
|
+
response = self._query_request(
|
|
2885
|
+
"GET",
|
|
2886
|
+
f"/api/v1/queries/{query_job_id}/{statement_id}/export",
|
|
2887
|
+
params={"fileType": file_type},
|
|
2888
|
+
)
|
|
2889
|
+
return response.text
|
|
2890
|
+
|
|
2891
|
+
def get_query_results(
|
|
2892
|
+
self,
|
|
2893
|
+
query_job_id: str,
|
|
2894
|
+
statement_id: str,
|
|
2895
|
+
offset: int = 0,
|
|
2896
|
+
page_size: int = QUERY_RESULTS_PAGE_SIZE,
|
|
2897
|
+
) -> dict[str, Any]:
|
|
2898
|
+
"""Fetch a page of inline statement results from the Query Service.
|
|
2899
|
+
|
|
2900
|
+
Unlike :meth:`export_query_results`, which materializes a CSV file via the
|
|
2901
|
+
warehouse UNLOAD path (slow), this reads the already-computed result set
|
|
2902
|
+
inline as JSON -- much faster for interactive queries. The endpoint is
|
|
2903
|
+
paginated; ``offset``/``page_size`` walk the result set.
|
|
2904
|
+
|
|
2905
|
+
Args:
|
|
2906
|
+
query_job_id: The query job ID.
|
|
2907
|
+
statement_id: The statement ID within the job.
|
|
2908
|
+
offset: Row offset to start from (for pagination).
|
|
2909
|
+
page_size: Maximum rows to return in this page.
|
|
2910
|
+
|
|
2911
|
+
Returns:
|
|
2912
|
+
Raw QueryResult dict, e.g.::
|
|
2913
|
+
|
|
2914
|
+
{
|
|
2915
|
+
"status": "completed",
|
|
2916
|
+
"columns": [{"name": "id", "type": "INTEGER", "nullable": false}],
|
|
2917
|
+
"data": [[1, "a"], [2, "b"]],
|
|
2918
|
+
"numberOfRows": 2,
|
|
2919
|
+
}
|
|
2920
|
+
"""
|
|
2921
|
+
response = self._query_request(
|
|
2922
|
+
"GET",
|
|
2923
|
+
f"/api/v1/queries/{query_job_id}/{statement_id}/results",
|
|
2924
|
+
params={"offset": offset, "pageSize": page_size},
|
|
2925
|
+
)
|
|
2926
|
+
return response.json()
|
|
2927
|
+
|
|
2928
|
+
def get_query_history(
|
|
2929
|
+
self,
|
|
2930
|
+
branch_id: int,
|
|
2931
|
+
workspace_id: int,
|
|
2932
|
+
) -> dict[str, Any]:
|
|
2933
|
+
"""Get query history for a workspace."""
|
|
2934
|
+
response = self._query_request(
|
|
2935
|
+
"GET",
|
|
2936
|
+
f"/api/v1/branches/{branch_id}/workspaces/{workspace_id}/queries",
|
|
2937
|
+
)
|
|
2938
|
+
return response.json()
|
|
2939
|
+
|
|
2940
|
+
def wait_for_query_job(self, query_job_id: str) -> dict[str, Any]:
|
|
2941
|
+
"""Poll a Query Service job until it reaches a terminal state.
|
|
2942
|
+
|
|
2943
|
+
Args:
|
|
2944
|
+
query_job_id: The query job ID.
|
|
2945
|
+
|
|
2946
|
+
Returns:
|
|
2947
|
+
Completed query job dict.
|
|
2948
|
+
|
|
2949
|
+
Raises:
|
|
2950
|
+
KeboolaApiError: If the query fails or times out.
|
|
2951
|
+
"""
|
|
2952
|
+
deadline = time.monotonic() + QUERY_JOB_MAX_WAIT
|
|
2953
|
+
while time.monotonic() < deadline:
|
|
2954
|
+
job = self.get_query_job(query_job_id)
|
|
2955
|
+
status = job.get("status", "")
|
|
2956
|
+
if status == "completed":
|
|
2957
|
+
return job
|
|
2958
|
+
if status in ("error", "failed"):
|
|
2959
|
+
raise KeboolaApiError(
|
|
2960
|
+
message=f"Query job failed: {_extract_query_job_error(job)}",
|
|
2961
|
+
status_code=500,
|
|
2962
|
+
error_code=ErrorCode.QUERY_JOB_FAILED,
|
|
2963
|
+
retryable=False,
|
|
2964
|
+
)
|
|
2965
|
+
time.sleep(QUERY_JOB_POLL_INTERVAL)
|
|
2966
|
+
|
|
2967
|
+
raise KeboolaApiError(
|
|
2968
|
+
message=f"Query job {query_job_id} did not complete within {QUERY_JOB_MAX_WAIT}s",
|
|
2969
|
+
status_code=504,
|
|
2970
|
+
error_code=ErrorCode.QUERY_JOB_TIMEOUT,
|
|
2971
|
+
retryable=True,
|
|
2972
|
+
)
|
|
2973
|
+
|
|
2974
|
+
|
|
2975
|
+
# The Query Service surfaces BigQuery errors as a serialized object string, e.g.
|
|
2976
|
+
# {Location: "query"; Message: "Syntax error: Unexpected identifier ..."; Reason: "invalidQuery"}
|
|
2977
|
+
# Pull out the human-readable `Message: "..."` part so a BigQuery failure reads
|
|
2978
|
+
# like Snowflake's plain text instead of leaking the wrapper into the user's red
|
|
2979
|
+
# error box. Mirrors keboola-mcp-server's `_BigQueryWorkspace._format_error_message`.
|
|
2980
|
+
_BQ_ERROR_MESSAGE_RE = re.compile(r'Message:\s*"((?:[^"\\]|\\.)*)"')
|
|
2981
|
+
|
|
2982
|
+
|
|
2983
|
+
def _unwrap_bigquery_error(message: str) -> str:
|
|
2984
|
+
"""Extract the inner message from a serialized BigQuery Query-Service error.
|
|
2985
|
+
|
|
2986
|
+
Snowflake errors are plain strings with no ``Message: "..."`` wrapper, so
|
|
2987
|
+
they pass through unchanged. Only the BigQuery object shape is rewritten.
|
|
2988
|
+
"""
|
|
2989
|
+
if message and (match := _BQ_ERROR_MESSAGE_RE.search(message)):
|
|
2990
|
+
return match.group(1).replace('\\"', '"')
|
|
2991
|
+
return message
|
|
2992
|
+
|
|
2993
|
+
|
|
2994
|
+
def _extract_query_job_error(job: dict[str, Any]) -> str:
|
|
2995
|
+
"""Pull the most useful warehouse error message out of a failed Query Service job.
|
|
2996
|
+
|
|
2997
|
+
The Query Service `/api/v1/queries/{id}` response for a failed job carries
|
|
2998
|
+
the actual Snowflake / BigQuery error inside ``statements[i].error`` as a
|
|
2999
|
+
plain string (e.g. "SQL compilation error:\\nFunction DATE_TRUNC does not
|
|
3000
|
+
support VARCHAR(10) argument type"). The top-level ``error`` field is
|
|
3001
|
+
usually ABSENT on failures — the previous extractor read only that and so
|
|
3002
|
+
emitted the useless "Query job failed: Query execution failed" message
|
|
3003
|
+
users were seeing in the SQL editor's red error box (#287).
|
|
3004
|
+
|
|
3005
|
+
Strategy:
|
|
3006
|
+
1. Walk ``statements`` and collect every failed statement's error,
|
|
3007
|
+
prefixed with the statement index so multi-statement batches stay
|
|
3008
|
+
readable. Strings, dicts ({\"message\": "..."}), and unknown shapes
|
|
3009
|
+
are all handled.
|
|
3010
|
+
2. Fall back to top-level ``error`` (string OR dict-with-message) for
|
|
3011
|
+
legacy shapes that don't carry statement-level errors.
|
|
3012
|
+
3. Fall back to the original generic string only when neither is set,
|
|
3013
|
+
so the caller never sees an empty message.
|
|
3014
|
+
|
|
3015
|
+
The returned string is meant to be embedded into a
|
|
3016
|
+
``KeboolaApiError(message=f"Query job failed: ...")`` and ultimately
|
|
3017
|
+
surfaced to the user (and the AI fix-mode helper, which pivots its
|
|
3018
|
+
meta-prompt on the warehouse text).
|
|
3019
|
+
"""
|
|
3020
|
+
|
|
3021
|
+
def _as_text(err: Any) -> str:
|
|
3022
|
+
if isinstance(err, str):
|
|
3023
|
+
raw = err.strip()
|
|
3024
|
+
elif isinstance(err, dict):
|
|
3025
|
+
raw = ""
|
|
3026
|
+
for key in ("message", "error", "detail"):
|
|
3027
|
+
val = err.get(key)
|
|
3028
|
+
if isinstance(val, str) and val.strip():
|
|
3029
|
+
raw = val.strip()
|
|
3030
|
+
break
|
|
3031
|
+
else:
|
|
3032
|
+
raw = str(err).strip() if err is not None else ""
|
|
3033
|
+
# BigQuery wraps the real message in a serialized object; Snowflake plain
|
|
3034
|
+
# text passes through untouched.
|
|
3035
|
+
return _unwrap_bigquery_error(raw)
|
|
3036
|
+
|
|
3037
|
+
statement_errors: list[str] = []
|
|
3038
|
+
for i, stmt in enumerate(job.get("statements") or []):
|
|
3039
|
+
if not isinstance(stmt, dict):
|
|
3040
|
+
continue
|
|
3041
|
+
if stmt.get("status") not in ("error", "failed"):
|
|
3042
|
+
continue
|
|
3043
|
+
text = _as_text(stmt.get("error"))
|
|
3044
|
+
if not text:
|
|
3045
|
+
continue
|
|
3046
|
+
# Single-statement queries don't need the "Statement 1:" prefix —
|
|
3047
|
+
# it adds visual noise in the editor's red box for the common case.
|
|
3048
|
+
prefix = "" if len(job.get("statements") or []) == 1 else f"Statement {i + 1}: "
|
|
3049
|
+
statement_errors.append(f"{prefix}{text}")
|
|
3050
|
+
|
|
3051
|
+
if statement_errors:
|
|
3052
|
+
return "\n".join(statement_errors)
|
|
3053
|
+
|
|
3054
|
+
top_level = _as_text(job.get("error"))
|
|
3055
|
+
if top_level:
|
|
3056
|
+
return top_level
|
|
3057
|
+
|
|
3058
|
+
return "Query execution failed (no error details from Query Service)"
|
|
3059
|
+
|
|
3060
|
+
|
|
3061
|
+
# ---------------------------------------------------------------------------
|
|
3062
|
+
# Cloud storage upload helpers
|
|
3063
|
+
# ---------------------------------------------------------------------------
|
|
3064
|
+
|
|
3065
|
+
|
|
3066
|
+
def _build_abs_upload_url(abs_params: dict[str, Any]) -> str:
|
|
3067
|
+
"""Build Azure Blob Storage upload URL from absUploadParams.
|
|
3068
|
+
|
|
3069
|
+
Parses SASConnectionString to extract BlobEndpoint and SharedAccessSignature,
|
|
3070
|
+
then constructs: ``{BlobEndpoint}/{container}/{blobName}?{SAS}``.
|
|
3071
|
+
|
|
3072
|
+
The ``url`` field in the API response is read-only (``sp=rl``).
|
|
3073
|
+
The write-capable SAS (``sp=rwl``) is only in ``absUploadParams``.
|
|
3074
|
+
|
|
3075
|
+
Args:
|
|
3076
|
+
abs_params: The absUploadParams dict from files/prepare response.
|
|
3077
|
+
|
|
3078
|
+
Returns:
|
|
3079
|
+
Full HTTPS URL with write-capable SAS token.
|
|
3080
|
+
"""
|
|
3081
|
+
blob_name = abs_params["blobName"]
|
|
3082
|
+
container = abs_params["container"]
|
|
3083
|
+
sas_string = abs_params["absCredentials"]["SASConnectionString"]
|
|
3084
|
+
|
|
3085
|
+
# Format: "BlobEndpoint=https://...;SharedAccessSignature=sv=2017-11-09&..."
|
|
3086
|
+
# partition("=") splits on first "=" only, preserving "=" in SAS values.
|
|
3087
|
+
parts: dict[str, str] = {}
|
|
3088
|
+
for segment in sas_string.split(";"):
|
|
3089
|
+
key, sep, value = segment.partition("=")
|
|
3090
|
+
if sep:
|
|
3091
|
+
parts[key] = value
|
|
3092
|
+
|
|
3093
|
+
blob_endpoint = parts.get("BlobEndpoint", "").rstrip("/")
|
|
3094
|
+
sas = parts.get("SharedAccessSignature", "")
|
|
3095
|
+
|
|
3096
|
+
return f"{blob_endpoint}/{container}/{blob_name}?{sas}"
|
|
3097
|
+
|
|
3098
|
+
|
|
3099
|
+
# ---------------------------------------------------------------------------
|
|
3100
|
+
# Cloud storage download helpers (S3 SigV4, GCS bearer, ABS signed URL)
|
|
3101
|
+
# ---------------------------------------------------------------------------
|
|
3102
|
+
|
|
3103
|
+
|
|
3104
|
+
class _IterBytesReader:
|
|
3105
|
+
"""Adapt an httpx iter_bytes() iterator to a .read(n) file-like interface.
|
|
3106
|
+
|
|
3107
|
+
shutil.copyfileobj and gzip.GzipFile both need a binary stream with
|
|
3108
|
+
read(size). httpx exposes an iterator instead, so we buffer the current
|
|
3109
|
+
chunk and hand out at most ``size`` bytes per read, refilling from the
|
|
3110
|
+
iterator as needed. The buffer holds at most one iterator chunk at a time
|
|
3111
|
+
(~1 MiB), so total memory stays bounded regardless of response size.
|
|
3112
|
+
"""
|
|
3113
|
+
|
|
3114
|
+
def __init__(self, chunks: Any) -> None:
|
|
3115
|
+
self._chunks = iter(chunks)
|
|
3116
|
+
self._buf = b""
|
|
3117
|
+
|
|
3118
|
+
def read(self, size: int = -1) -> bytes:
|
|
3119
|
+
if size is None or size < 0:
|
|
3120
|
+
pieces = [self._buf]
|
|
3121
|
+
self._buf = b""
|
|
3122
|
+
pieces.extend(self._chunks)
|
|
3123
|
+
return b"".join(pieces)
|
|
3124
|
+
while len(self._buf) < size:
|
|
3125
|
+
try:
|
|
3126
|
+
self._buf += next(self._chunks)
|
|
3127
|
+
except StopIteration:
|
|
3128
|
+
break
|
|
3129
|
+
out = self._buf[:size]
|
|
3130
|
+
self._buf = self._buf[size:]
|
|
3131
|
+
return out
|
|
3132
|
+
|
|
3133
|
+
|
|
3134
|
+
class _CloudDownloader:
|
|
3135
|
+
"""Abstraction for downloading from cloud storage using Keboola file credentials.
|
|
3136
|
+
|
|
3137
|
+
Supports three cloud backends:
|
|
3138
|
+
- AWS S3: Uses SigV4 signing with temporary credentials
|
|
3139
|
+
- GCP GCS: Uses OAuth2 bearer token
|
|
3140
|
+
- Azure ABS: Uses presigned/SAS URLs
|
|
3141
|
+
"""
|
|
3142
|
+
|
|
3143
|
+
def __init__(self, provider: str, auth_fn: Any) -> None:
|
|
3144
|
+
self._provider = provider
|
|
3145
|
+
self._auth_fn = auth_fn
|
|
3146
|
+
|
|
3147
|
+
@staticmethod
|
|
3148
|
+
def create(file_detail: dict[str, Any]) -> "_CloudDownloader":
|
|
3149
|
+
"""Create a downloader from file detail response.
|
|
3150
|
+
|
|
3151
|
+
Args:
|
|
3152
|
+
file_detail: Response from GET /v2/storage/files/{id}?federationToken=1.
|
|
3153
|
+
"""
|
|
3154
|
+
provider = file_detail.get("provider", "")
|
|
3155
|
+
|
|
3156
|
+
if provider == "aws":
|
|
3157
|
+
creds = file_detail.get("credentials", {})
|
|
3158
|
+
region = file_detail.get("region", "us-east-1")
|
|
3159
|
+
return _CloudDownloader(
|
|
3160
|
+
provider="aws",
|
|
3161
|
+
auth_fn=lambda url: _s3_signed_headers(url, creds, region),
|
|
3162
|
+
)
|
|
3163
|
+
elif provider == "gcp":
|
|
3164
|
+
gcs_creds = file_detail.get("gcsCredentials", {})
|
|
3165
|
+
token = gcs_creds.get("access_token", "")
|
|
3166
|
+
token_type = gcs_creds.get("token_type", "Bearer")
|
|
3167
|
+
return _CloudDownloader(
|
|
3168
|
+
provider="gcp",
|
|
3169
|
+
auth_fn=lambda _url: {"Authorization": f"{token_type} {token}"},
|
|
3170
|
+
)
|
|
3171
|
+
elif provider == "azure":
|
|
3172
|
+
# Azure: SAS token from absCredentials for authenticating slice downloads
|
|
3173
|
+
abs_creds = file_detail.get("absCredentials", {})
|
|
3174
|
+
sas_string = abs_creds.get("SASConnectionString", "")
|
|
3175
|
+
# Parse "BlobEndpoint=https://...;SharedAccessSignature=sv=..."
|
|
3176
|
+
sas_parts: dict[str, str] = {}
|
|
3177
|
+
for segment in sas_string.split(";"):
|
|
3178
|
+
key, sep, value = segment.partition("=")
|
|
3179
|
+
if sep:
|
|
3180
|
+
sas_parts[key] = value
|
|
3181
|
+
blob_endpoint = sas_parts.get("BlobEndpoint", "").rstrip("/")
|
|
3182
|
+
sas = sas_parts.get("SharedAccessSignature", "")
|
|
3183
|
+
return _CloudDownloader(
|
|
3184
|
+
provider="azure",
|
|
3185
|
+
auth_fn=lambda _url, _be=blob_endpoint, _sas=sas: {
|
|
3186
|
+
"_blob_endpoint": _be,
|
|
3187
|
+
"_sas": _sas,
|
|
3188
|
+
},
|
|
3189
|
+
)
|
|
3190
|
+
else:
|
|
3191
|
+
# Other: presigned URLs, no extra auth needed
|
|
3192
|
+
return _CloudDownloader(provider=provider, auth_fn=lambda _url: {})
|
|
3193
|
+
|
|
3194
|
+
def resolve_base_url(self, file_detail: dict[str, Any]) -> str:
|
|
3195
|
+
"""Build the HTTPS base URL for downloading slices.
|
|
3196
|
+
|
|
3197
|
+
Returns:
|
|
3198
|
+
Base HTTPS URL (e.g. "https://bucket.s3.region.amazonaws.com/key/prefix/").
|
|
3199
|
+
"""
|
|
3200
|
+
if self._provider == "aws":
|
|
3201
|
+
s3_path = file_detail.get("s3Path", {})
|
|
3202
|
+
bucket = s3_path.get("bucket", "")
|
|
3203
|
+
key = s3_path.get("key", "")
|
|
3204
|
+
region = file_detail.get("region", "us-east-1")
|
|
3205
|
+
return f"https://{bucket}.s3.{region}.amazonaws.com/{key}"
|
|
3206
|
+
elif self._provider == "gcp":
|
|
3207
|
+
gcs_path = file_detail.get("gcsPath", {})
|
|
3208
|
+
bucket = gcs_path.get("bucket", "")
|
|
3209
|
+
key = gcs_path.get("key", "")
|
|
3210
|
+
return f"https://storage.googleapis.com/{bucket}/{key}"
|
|
3211
|
+
elif self._provider == "azure":
|
|
3212
|
+
# Azure: base URL from absCredentials endpoint + container
|
|
3213
|
+
auth_info = self._auth_fn("")
|
|
3214
|
+
blob_endpoint = auth_info.get("_blob_endpoint", "")
|
|
3215
|
+
abs_path = file_detail.get("absPath", {})
|
|
3216
|
+
container = abs_path.get("container", "")
|
|
3217
|
+
return f"{blob_endpoint}/{container}/"
|
|
3218
|
+
else:
|
|
3219
|
+
# Other: entries should be full URLs
|
|
3220
|
+
return ""
|
|
3221
|
+
|
|
3222
|
+
def resolve_slice_url(
|
|
3223
|
+
self,
|
|
3224
|
+
base_url: str,
|
|
3225
|
+
entry_url: str,
|
|
3226
|
+
file_detail: dict[str, Any],
|
|
3227
|
+
) -> str:
|
|
3228
|
+
"""Convert a manifest entry URL to a downloadable HTTPS URL.
|
|
3229
|
+
|
|
3230
|
+
Manifest entries use cloud-native URLs (s3://bucket/key/slice.gz,
|
|
3231
|
+
azure://container/blob). This strips the cloud prefix and builds
|
|
3232
|
+
an HTTPS URL for download.
|
|
3233
|
+
|
|
3234
|
+
Args:
|
|
3235
|
+
base_url: HTTPS base URL from resolve_base_url().
|
|
3236
|
+
entry_url: Raw entry URL from manifest (e.g. "s3://bucket/key/slice.gz").
|
|
3237
|
+
file_detail: Full file detail dict.
|
|
3238
|
+
|
|
3239
|
+
Returns:
|
|
3240
|
+
Full HTTPS URL for the slice.
|
|
3241
|
+
"""
|
|
3242
|
+
if self._provider == "aws":
|
|
3243
|
+
# entry_url: "s3://bucket/key/prefix/slice.csv.gz"
|
|
3244
|
+
# base_url: "https://bucket.s3.region.amazonaws.com/key/prefix/"
|
|
3245
|
+
s3_path = file_detail.get("s3Path", {})
|
|
3246
|
+
bucket = s3_path.get("bucket", "")
|
|
3247
|
+
key = s3_path.get("key", "")
|
|
3248
|
+
prefix = f"s3://{bucket}/{key}"
|
|
3249
|
+
relative = entry_url.removeprefix(prefix) if entry_url.startswith(prefix) else entry_url
|
|
3250
|
+
return base_url + relative
|
|
3251
|
+
elif self._provider == "gcp":
|
|
3252
|
+
# entry_url: "gs://bucket/key/prefix/slice.csv.gz"
|
|
3253
|
+
gcs_path = file_detail.get("gcsPath", {})
|
|
3254
|
+
bucket = gcs_path.get("bucket", "")
|
|
3255
|
+
key = gcs_path.get("key", "")
|
|
3256
|
+
prefix = f"gs://{bucket}/{key}"
|
|
3257
|
+
relative = entry_url.removeprefix(prefix) if entry_url.startswith(prefix) else entry_url
|
|
3258
|
+
return base_url + relative
|
|
3259
|
+
elif self._provider == "azure":
|
|
3260
|
+
# entry_url: "azure://account.blob.core.windows.net/container/blob.gz"
|
|
3261
|
+
# Replace azure:// with https:// and append SAS token
|
|
3262
|
+
auth_info = self._auth_fn("")
|
|
3263
|
+
sas = auth_info.get("_sas", "")
|
|
3264
|
+
if entry_url.startswith("azure://"):
|
|
3265
|
+
https_url = "https://" + entry_url[len("azure://") :]
|
|
3266
|
+
return f"{https_url}?{sas}"
|
|
3267
|
+
return entry_url
|
|
3268
|
+
else:
|
|
3269
|
+
# Other: entry URLs should be full HTTPS URLs
|
|
3270
|
+
return entry_url
|
|
3271
|
+
|
|
3272
|
+
def _request_headers(self, url: str) -> dict[str, str]:
|
|
3273
|
+
"""Resolve auth headers for a cloud URL.
|
|
3274
|
+
|
|
3275
|
+
Azure stores metadata (endpoint, SAS) in the auth_fn result (keys
|
|
3276
|
+
prefixed with "_"); those are filtered out here. The SAS token itself
|
|
3277
|
+
is embedded into the URL by resolve_slice_url().
|
|
3278
|
+
"""
|
|
3279
|
+
auth_result = self._auth_fn(url)
|
|
3280
|
+
return {k: v for k, v in auth_result.items() if not k.startswith("_")}
|
|
3281
|
+
|
|
3282
|
+
def stream_to_file(self, url: str, dest: "Path | str", decompress_gzip: bool) -> int:
|
|
3283
|
+
"""Stream a cloud URL directly to a local file in bounded-memory chunks.
|
|
3284
|
+
|
|
3285
|
+
Used for slice downloads where the payload can be hundreds of MB per
|
|
3286
|
+
slice. Peak RAM is O(chunk size), not O(slice size), which is what
|
|
3287
|
+
makes multi-GB table exports survive on small VMs (see issue #187).
|
|
3288
|
+
|
|
3289
|
+
Args:
|
|
3290
|
+
url: Full HTTPS URL (with auth baked in for Azure).
|
|
3291
|
+
dest: Local file path to write to.
|
|
3292
|
+
decompress_gzip: If True, wrap the response stream in gzip.GzipFile
|
|
3293
|
+
so the decompressed bytes are what lands on disk. Streaming
|
|
3294
|
+
gzip keeps both compressed and decompressed state bounded.
|
|
3295
|
+
|
|
3296
|
+
Returns:
|
|
3297
|
+
Number of bytes written to ``dest`` (post-decompression if applicable).
|
|
3298
|
+
"""
|
|
3299
|
+
import gzip
|
|
3300
|
+
import shutil
|
|
3301
|
+
|
|
3302
|
+
headers = self._request_headers(url)
|
|
3303
|
+
dest_path = Path(dest)
|
|
3304
|
+
with (
|
|
3305
|
+
httpx.Client(timeout=FILE_DOWNLOAD_TIMEOUT) as http,
|
|
3306
|
+
http.stream("GET", url, headers=headers) as response,
|
|
3307
|
+
):
|
|
3308
|
+
response.raise_for_status()
|
|
3309
|
+
source: Any = _IterBytesReader(response.iter_bytes(FILE_DOWNLOAD_CHUNK_SIZE))
|
|
3310
|
+
if decompress_gzip:
|
|
3311
|
+
source = gzip.GzipFile(fileobj=source, mode="rb")
|
|
3312
|
+
with dest_path.open("wb") as fh:
|
|
3313
|
+
shutil.copyfileobj(source, fh, length=FILE_DOWNLOAD_CHUNK_SIZE)
|
|
3314
|
+
|
|
3315
|
+
return dest_path.stat().st_size
|
|
3316
|
+
|
|
3317
|
+
|
|
3318
|
+
def _s3_signed_headers(
|
|
3319
|
+
url: str,
|
|
3320
|
+
creds: dict[str, str],
|
|
3321
|
+
region: str,
|
|
3322
|
+
method: str = "GET",
|
|
3323
|
+
payload: bytes = b"",
|
|
3324
|
+
) -> dict[str, str]:
|
|
3325
|
+
"""Generate AWS SigV4 signed headers for an S3 request.
|
|
3326
|
+
|
|
3327
|
+
Implements minimal AWS Signature Version 4 signing using only stdlib
|
|
3328
|
+
(hmac, hashlib, urllib.parse). No boto3/botocore dependency required.
|
|
3329
|
+
|
|
3330
|
+
Args:
|
|
3331
|
+
url: Full S3 URL (https://bucket.s3.region.amazonaws.com/key).
|
|
3332
|
+
creds: Dict with AccessKeyId, SecretAccessKey, SessionToken.
|
|
3333
|
+
region: AWS region (e.g. "us-east-1").
|
|
3334
|
+
method: HTTP method (GET or PUT).
|
|
3335
|
+
payload: Request body bytes (empty for GET).
|
|
3336
|
+
|
|
3337
|
+
Returns:
|
|
3338
|
+
Dict of headers to include in the request.
|
|
3339
|
+
"""
|
|
3340
|
+
import datetime
|
|
3341
|
+
import hashlib
|
|
3342
|
+
import hmac
|
|
3343
|
+
from urllib.parse import unquote, urlparse
|
|
3344
|
+
|
|
3345
|
+
access_key = creds["AccessKeyId"]
|
|
3346
|
+
secret_key = creds["SecretAccessKey"]
|
|
3347
|
+
session_token = creds.get("SessionToken", "")
|
|
3348
|
+
|
|
3349
|
+
parsed = urlparse(url)
|
|
3350
|
+
host = parsed.hostname or ""
|
|
3351
|
+
path = parsed.path or "/"
|
|
3352
|
+
query = parsed.query or ""
|
|
3353
|
+
|
|
3354
|
+
now = datetime.datetime.now(datetime.UTC)
|
|
3355
|
+
date_stamp = now.strftime("%Y%m%d")
|
|
3356
|
+
amz_date = now.strftime("%Y%m%dT%H%M%SZ")
|
|
3357
|
+
|
|
3358
|
+
service = "s3"
|
|
3359
|
+
scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
|
3360
|
+
|
|
3361
|
+
# Canonical request
|
|
3362
|
+
canonical_uri = quote(unquote(path), safe="/~")
|
|
3363
|
+
if query:
|
|
3364
|
+
params_list = sorted(query.split("&"))
|
|
3365
|
+
canonical_querystring = "&".join(params_list)
|
|
3366
|
+
else:
|
|
3367
|
+
canonical_querystring = ""
|
|
3368
|
+
|
|
3369
|
+
headers_to_sign: dict[str, str] = {"host": host, "x-amz-date": amz_date}
|
|
3370
|
+
if session_token:
|
|
3371
|
+
headers_to_sign["x-amz-security-token"] = session_token
|
|
3372
|
+
|
|
3373
|
+
signed_headers = ";".join(sorted(headers_to_sign.keys()))
|
|
3374
|
+
canonical_headers = "".join(f"{k}:{v}\n" for k, v in sorted(headers_to_sign.items()))
|
|
3375
|
+
|
|
3376
|
+
payload_hash = hashlib.sha256(payload).hexdigest()
|
|
3377
|
+
|
|
3378
|
+
canonical_request = "\n".join(
|
|
3379
|
+
[
|
|
3380
|
+
method,
|
|
3381
|
+
canonical_uri,
|
|
3382
|
+
canonical_querystring,
|
|
3383
|
+
canonical_headers,
|
|
3384
|
+
signed_headers,
|
|
3385
|
+
payload_hash,
|
|
3386
|
+
]
|
|
3387
|
+
)
|
|
3388
|
+
|
|
3389
|
+
# String to sign
|
|
3390
|
+
string_to_sign = "\n".join(
|
|
3391
|
+
[
|
|
3392
|
+
"AWS4-HMAC-SHA256",
|
|
3393
|
+
amz_date,
|
|
3394
|
+
scope,
|
|
3395
|
+
hashlib.sha256(canonical_request.encode("utf-8")).hexdigest(),
|
|
3396
|
+
]
|
|
3397
|
+
)
|
|
3398
|
+
|
|
3399
|
+
# Signing key
|
|
3400
|
+
def _hmac_sha256(key: bytes, msg: str) -> bytes:
|
|
3401
|
+
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
|
|
3402
|
+
|
|
3403
|
+
k_date = _hmac_sha256(f"AWS4{secret_key}".encode(), date_stamp)
|
|
3404
|
+
k_region = _hmac_sha256(k_date, region)
|
|
3405
|
+
k_service = _hmac_sha256(k_region, service)
|
|
3406
|
+
k_signing = _hmac_sha256(k_service, "aws4_request")
|
|
3407
|
+
|
|
3408
|
+
signature = hmac.new(k_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
|
3409
|
+
|
|
3410
|
+
authorization = (
|
|
3411
|
+
f"AWS4-HMAC-SHA256 Credential={access_key}/{scope}, "
|
|
3412
|
+
f"SignedHeaders={signed_headers}, Signature={signature}"
|
|
3413
|
+
)
|
|
3414
|
+
|
|
3415
|
+
result: dict[str, str] = {
|
|
3416
|
+
"Authorization": authorization,
|
|
3417
|
+
"x-amz-date": amz_date,
|
|
3418
|
+
"x-amz-content-sha256": payload_hash,
|
|
3419
|
+
}
|
|
3420
|
+
if session_token:
|
|
3421
|
+
result["x-amz-security-token"] = session_token
|
|
3422
|
+
return result
|