skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/data/storage.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
"""Storage and Store Classes for Sky Data."""
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
2
4
|
import enum
|
|
3
5
|
import hashlib
|
|
4
6
|
import os
|
|
@@ -7,7 +9,7 @@ import shlex
|
|
|
7
9
|
import subprocess
|
|
8
10
|
import time
|
|
9
11
|
import typing
|
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
11
13
|
import urllib.parse
|
|
12
14
|
|
|
13
15
|
import colorama
|
|
@@ -21,6 +23,7 @@ from sky import skypilot_config
|
|
|
21
23
|
from sky.adaptors import aws
|
|
22
24
|
from sky.adaptors import azure
|
|
23
25
|
from sky.adaptors import cloudflare
|
|
26
|
+
from sky.adaptors import coreweave
|
|
24
27
|
from sky.adaptors import gcp
|
|
25
28
|
from sky.adaptors import ibm
|
|
26
29
|
from sky.adaptors import nebius
|
|
@@ -60,6 +63,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
|
|
|
60
63
|
str(clouds.OCI()),
|
|
61
64
|
str(clouds.Nebius()),
|
|
62
65
|
cloudflare.NAME,
|
|
66
|
+
coreweave.NAME,
|
|
63
67
|
]
|
|
64
68
|
|
|
65
69
|
# Maximum number of concurrent rsync upload processes
|
|
@@ -91,6 +95,12 @@ def get_cached_enabled_storage_cloud_names_or_refresh(
|
|
|
91
95
|
r2_is_enabled, _ = cloudflare.check_storage_credentials()
|
|
92
96
|
if r2_is_enabled:
|
|
93
97
|
enabled_clouds.append(cloudflare.NAME)
|
|
98
|
+
|
|
99
|
+
# Similarly, handle CoreWeave storage credentials
|
|
100
|
+
coreweave_is_enabled, _ = coreweave.check_storage_credentials()
|
|
101
|
+
if coreweave_is_enabled:
|
|
102
|
+
enabled_clouds.append(coreweave.NAME)
|
|
103
|
+
|
|
94
104
|
if raise_if_no_cloud_access and not enabled_clouds:
|
|
95
105
|
raise exceptions.NoCloudAccessError(
|
|
96
106
|
'No cloud access available for storage. '
|
|
@@ -109,7 +119,7 @@ def _is_storage_cloud_enabled(cloud_name: str,
|
|
|
109
119
|
sky_check.check_capability(
|
|
110
120
|
sky_cloud.CloudCapability.STORAGE,
|
|
111
121
|
quiet=True,
|
|
112
|
-
|
|
122
|
+
workspace=skypilot_config.get_active_workspace())
|
|
113
123
|
return _is_storage_cloud_enabled(cloud_name,
|
|
114
124
|
try_fix_with_sky_check=False)
|
|
115
125
|
return False
|
|
@@ -124,41 +134,71 @@ class StoreType(enum.Enum):
|
|
|
124
134
|
IBM = 'IBM'
|
|
125
135
|
OCI = 'OCI'
|
|
126
136
|
NEBIUS = 'NEBIUS'
|
|
137
|
+
COREWEAVE = 'COREWEAVE'
|
|
138
|
+
VOLUME = 'VOLUME'
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def _get_s3_compatible_store_by_cloud(cls,
|
|
142
|
+
cloud_name: str) -> Optional[str]:
|
|
143
|
+
"""Get S3-compatible store type by cloud name."""
|
|
144
|
+
for store_type, store_class in _S3_COMPATIBLE_STORES.items():
|
|
145
|
+
config = store_class.get_config()
|
|
146
|
+
if config.cloud_name.lower() == cloud_name:
|
|
147
|
+
return store_type
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def _get_s3_compatible_config(
|
|
152
|
+
cls, store_type: str) -> Optional['S3CompatibleConfig']:
|
|
153
|
+
"""Get S3-compatible store configuration by store type."""
|
|
154
|
+
store_class = _S3_COMPATIBLE_STORES.get(store_type)
|
|
155
|
+
if store_class:
|
|
156
|
+
return store_class.get_config()
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def find_s3_compatible_config_by_prefix(
|
|
161
|
+
cls, source: str) -> Optional['StoreType']:
|
|
162
|
+
"""Get S3-compatible store type by URL prefix."""
|
|
163
|
+
for store_type, store_class in _S3_COMPATIBLE_STORES.items():
|
|
164
|
+
config = store_class.get_config()
|
|
165
|
+
if source.startswith(config.url_prefix):
|
|
166
|
+
return StoreType(store_type)
|
|
167
|
+
return None
|
|
127
168
|
|
|
128
169
|
@classmethod
|
|
129
170
|
def from_cloud(cls, cloud: str) -> 'StoreType':
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
elif cloud.lower() == str(clouds.GCP()).lower():
|
|
171
|
+
cloud_lower = cloud.lower()
|
|
172
|
+
if cloud_lower == str(clouds.GCP()).lower():
|
|
133
173
|
return StoreType.GCS
|
|
134
|
-
elif
|
|
174
|
+
elif cloud_lower == str(clouds.IBM()).lower():
|
|
135
175
|
return StoreType.IBM
|
|
136
|
-
elif
|
|
137
|
-
return StoreType.R2
|
|
138
|
-
elif cloud.lower() == str(clouds.Azure()).lower():
|
|
176
|
+
elif cloud_lower == str(clouds.Azure()).lower():
|
|
139
177
|
return StoreType.AZURE
|
|
140
|
-
elif
|
|
178
|
+
elif cloud_lower == str(clouds.OCI()).lower():
|
|
141
179
|
return StoreType.OCI
|
|
142
|
-
elif
|
|
143
|
-
return StoreType.NEBIUS
|
|
144
|
-
elif cloud.lower() == str(clouds.Lambda()).lower():
|
|
180
|
+
elif cloud_lower == str(clouds.Lambda()).lower():
|
|
145
181
|
with ux_utils.print_exception_no_traceback():
|
|
146
182
|
raise ValueError('Lambda Cloud does not provide cloud storage.')
|
|
147
|
-
elif
|
|
183
|
+
elif cloud_lower == str(clouds.SCP()).lower():
|
|
148
184
|
with ux_utils.print_exception_no_traceback():
|
|
149
185
|
raise ValueError('SCP does not provide cloud storage.')
|
|
186
|
+
else:
|
|
187
|
+
s3_store_type = cls._get_s3_compatible_store_by_cloud(cloud_lower)
|
|
188
|
+
if s3_store_type:
|
|
189
|
+
return cls(s3_store_type)
|
|
150
190
|
|
|
151
191
|
raise ValueError(f'Unsupported cloud for StoreType: {cloud}')
|
|
152
192
|
|
|
153
193
|
def to_cloud(self) -> str:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
194
|
+
config = self._get_s3_compatible_config(self.value)
|
|
195
|
+
if config:
|
|
196
|
+
return config.cloud_name
|
|
197
|
+
|
|
198
|
+
if self == StoreType.GCS:
|
|
157
199
|
return str(clouds.GCP())
|
|
158
200
|
elif self == StoreType.AZURE:
|
|
159
201
|
return str(clouds.Azure())
|
|
160
|
-
elif self == StoreType.R2:
|
|
161
|
-
return cloudflare.NAME
|
|
162
202
|
elif self == StoreType.IBM:
|
|
163
203
|
return str(clouds.IBM())
|
|
164
204
|
elif self == StoreType.OCI:
|
|
@@ -168,41 +208,34 @@ class StoreType(enum.Enum):
|
|
|
168
208
|
|
|
169
209
|
@classmethod
|
|
170
210
|
def from_store(cls, store: 'AbstractStore') -> 'StoreType':
|
|
171
|
-
if isinstance(store,
|
|
172
|
-
return
|
|
173
|
-
|
|
211
|
+
if isinstance(store, S3CompatibleStore):
|
|
212
|
+
return cls(store.get_store_type())
|
|
213
|
+
|
|
214
|
+
if isinstance(store, GcsStore):
|
|
174
215
|
return StoreType.GCS
|
|
175
216
|
elif isinstance(store, AzureBlobStore):
|
|
176
217
|
return StoreType.AZURE
|
|
177
|
-
elif isinstance(store, R2Store):
|
|
178
|
-
return StoreType.R2
|
|
179
218
|
elif isinstance(store, IBMCosStore):
|
|
180
219
|
return StoreType.IBM
|
|
181
220
|
elif isinstance(store, OciStore):
|
|
182
221
|
return StoreType.OCI
|
|
183
|
-
elif isinstance(store, NebiusStore):
|
|
184
|
-
return StoreType.NEBIUS
|
|
185
222
|
else:
|
|
186
223
|
with ux_utils.print_exception_no_traceback():
|
|
187
224
|
raise ValueError(f'Unknown store type: {store}')
|
|
188
225
|
|
|
189
226
|
def store_prefix(self) -> str:
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
227
|
+
config = self._get_s3_compatible_config(self.value)
|
|
228
|
+
if config:
|
|
229
|
+
return config.url_prefix
|
|
230
|
+
|
|
231
|
+
if self == StoreType.GCS:
|
|
193
232
|
return 'gs://'
|
|
194
233
|
elif self == StoreType.AZURE:
|
|
195
234
|
return 'https://'
|
|
196
|
-
# R2 storages use 's3://' as a prefix for various aws cli commands
|
|
197
|
-
elif self == StoreType.R2:
|
|
198
|
-
return 'r2://'
|
|
199
235
|
elif self == StoreType.IBM:
|
|
200
236
|
return 'cos://'
|
|
201
237
|
elif self == StoreType.OCI:
|
|
202
238
|
return 'oci://'
|
|
203
|
-
# Nebius storages use 's3://' as a prefix for various aws cli commands
|
|
204
|
-
elif self == StoreType.NEBIUS:
|
|
205
|
-
return 'nebius://'
|
|
206
239
|
else:
|
|
207
240
|
with ux_utils.print_exception_no_traceback():
|
|
208
241
|
raise ValueError(f'Unknown store type: {self}')
|
|
@@ -251,12 +284,20 @@ class StoreType(enum.Enum):
|
|
|
251
284
|
elif store_type == StoreType.IBM:
|
|
252
285
|
bucket_name, sub_path, region = data_utils.split_cos_path(
|
|
253
286
|
store_url)
|
|
254
|
-
elif store_type == StoreType.R2:
|
|
255
|
-
bucket_name, sub_path = data_utils.split_r2_path(store_url)
|
|
256
287
|
elif store_type == StoreType.GCS:
|
|
257
288
|
bucket_name, sub_path = data_utils.split_gcs_path(store_url)
|
|
258
|
-
|
|
259
|
-
|
|
289
|
+
else:
|
|
290
|
+
# Check compatible stores
|
|
291
|
+
for compatible_store_type, store_class in \
|
|
292
|
+
_S3_COMPATIBLE_STORES.items():
|
|
293
|
+
if store_type.value == compatible_store_type:
|
|
294
|
+
config = store_class.get_config()
|
|
295
|
+
bucket_name, sub_path = config.split_path(store_url)
|
|
296
|
+
break
|
|
297
|
+
else:
|
|
298
|
+
# If we get here, it's an unknown S3-compatible store
|
|
299
|
+
raise ValueError(
|
|
300
|
+
f'Unknown S3-compatible store type: {store_type}')
|
|
260
301
|
return store_type, bucket_name, \
|
|
261
302
|
sub_path, storage_account_name, region
|
|
262
303
|
raise ValueError(f'Unknown store URL: {store_url}')
|
|
@@ -751,20 +792,19 @@ class Storage(object):
|
|
|
751
792
|
# If source is a pre-existing bucket, connect to the bucket
|
|
752
793
|
# If the bucket does not exist, this will error out
|
|
753
794
|
if isinstance(self.source, str):
|
|
754
|
-
if self.source.startswith('
|
|
755
|
-
self.add_store(StoreType.S3)
|
|
756
|
-
elif self.source.startswith('gs://'):
|
|
795
|
+
if self.source.startswith('gs://'):
|
|
757
796
|
self.add_store(StoreType.GCS)
|
|
758
797
|
elif data_utils.is_az_container_endpoint(self.source):
|
|
759
798
|
self.add_store(StoreType.AZURE)
|
|
760
|
-
elif self.source.startswith('r2://'):
|
|
761
|
-
self.add_store(StoreType.R2)
|
|
762
799
|
elif self.source.startswith('cos://'):
|
|
763
800
|
self.add_store(StoreType.IBM)
|
|
764
801
|
elif self.source.startswith('oci://'):
|
|
765
802
|
self.add_store(StoreType.OCI)
|
|
766
|
-
|
|
767
|
-
|
|
803
|
+
|
|
804
|
+
store_type = StoreType.find_s3_compatible_config_by_prefix(
|
|
805
|
+
self.source)
|
|
806
|
+
if store_type:
|
|
807
|
+
self.add_store(store_type)
|
|
768
808
|
|
|
769
809
|
def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
|
|
770
810
|
"""Adds the bucket sub path prefix to the blob path."""
|
|
@@ -852,7 +892,7 @@ class Storage(object):
|
|
|
852
892
|
f'{source} in the file_mounts section of your YAML')
|
|
853
893
|
is_local_source = True
|
|
854
894
|
elif split_path.scheme in [
|
|
855
|
-
's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
|
|
895
|
+
's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius', 'cw'
|
|
856
896
|
]:
|
|
857
897
|
is_local_source = False
|
|
858
898
|
# Storage mounting does not support mounting specific files from
|
|
@@ -877,7 +917,8 @@ class Storage(object):
|
|
|
877
917
|
with ux_utils.print_exception_no_traceback():
|
|
878
918
|
raise exceptions.StorageSourceError(
|
|
879
919
|
f'Supported paths: local, s3://, gs://, https://, '
|
|
880
|
-
f'r2://, cos://, oci://, nebius://.
|
|
920
|
+
f'r2://, cos://, oci://, nebius://, cw://. '
|
|
921
|
+
f'Got: {source}')
|
|
881
922
|
return source, is_local_source
|
|
882
923
|
|
|
883
924
|
def _validate_storage_spec(self, name: Optional[str]) -> None:
|
|
@@ -892,7 +933,16 @@ class Storage(object):
|
|
|
892
933
|
"""
|
|
893
934
|
prefix = name.split('://')[0]
|
|
894
935
|
prefix = prefix.lower()
|
|
895
|
-
if prefix in [
|
|
936
|
+
if prefix in [
|
|
937
|
+
's3',
|
|
938
|
+
'gs',
|
|
939
|
+
'https',
|
|
940
|
+
'r2',
|
|
941
|
+
'cos',
|
|
942
|
+
'oci',
|
|
943
|
+
'nebius',
|
|
944
|
+
'cw',
|
|
945
|
+
]:
|
|
896
946
|
with ux_utils.print_exception_no_traceback():
|
|
897
947
|
raise exceptions.StorageNameError(
|
|
898
948
|
'Prefix detected: `name` cannot start with '
|
|
@@ -980,12 +1030,25 @@ class Storage(object):
|
|
|
980
1030
|
# When initializing from global_user_state, we override the
|
|
981
1031
|
# source from the YAML
|
|
982
1032
|
try:
|
|
983
|
-
if s_type
|
|
1033
|
+
if s_type.value in _S3_COMPATIBLE_STORES:
|
|
1034
|
+
store_class = _S3_COMPATIBLE_STORES[s_type.value]
|
|
1035
|
+
store = store_class.from_metadata(
|
|
1036
|
+
s_metadata,
|
|
1037
|
+
source=self.source,
|
|
1038
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1039
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
1040
|
+
elif s_type == StoreType.S3:
|
|
984
1041
|
store = S3Store.from_metadata(
|
|
985
1042
|
s_metadata,
|
|
986
1043
|
source=self.source,
|
|
987
1044
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
988
1045
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1046
|
+
elif s_type == StoreType.R2:
|
|
1047
|
+
store = R2Store.from_metadata(
|
|
1048
|
+
s_metadata,
|
|
1049
|
+
source=self.source,
|
|
1050
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1051
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
989
1052
|
elif s_type == StoreType.GCS:
|
|
990
1053
|
store = GcsStore.from_metadata(
|
|
991
1054
|
s_metadata,
|
|
@@ -1000,12 +1063,6 @@ class Storage(object):
|
|
|
1000
1063
|
source=self.source,
|
|
1001
1064
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1002
1065
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1003
|
-
elif s_type == StoreType.R2:
|
|
1004
|
-
store = R2Store.from_metadata(
|
|
1005
|
-
s_metadata,
|
|
1006
|
-
source=self.source,
|
|
1007
|
-
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1008
|
-
_bucket_sub_path=self._bucket_sub_path)
|
|
1009
1066
|
elif s_type == StoreType.IBM:
|
|
1010
1067
|
store = IBMCosStore.from_metadata(
|
|
1011
1068
|
s_metadata,
|
|
@@ -1024,6 +1081,12 @@ class Storage(object):
|
|
|
1024
1081
|
source=self.source,
|
|
1025
1082
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1026
1083
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1084
|
+
elif s_type == StoreType.COREWEAVE:
|
|
1085
|
+
store = CoreWeaveStore.from_metadata(
|
|
1086
|
+
s_metadata,
|
|
1087
|
+
source=self.source,
|
|
1088
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1089
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
1027
1090
|
else:
|
|
1028
1091
|
with ux_utils.print_exception_no_traceback():
|
|
1029
1092
|
raise ValueError(f'Unknown store type: {s_type}')
|
|
@@ -1106,20 +1169,17 @@ class Storage(object):
|
|
|
1106
1169
|
return store
|
|
1107
1170
|
|
|
1108
1171
|
store_cls: Type[AbstractStore]
|
|
1109
|
-
if
|
|
1110
|
-
|
|
1172
|
+
# First check if it's a registered S3-compatible store
|
|
1173
|
+
if store_type.value in _S3_COMPATIBLE_STORES:
|
|
1174
|
+
store_cls = _S3_COMPATIBLE_STORES[store_type.value]
|
|
1111
1175
|
elif store_type == StoreType.GCS:
|
|
1112
1176
|
store_cls = GcsStore
|
|
1113
1177
|
elif store_type == StoreType.AZURE:
|
|
1114
1178
|
store_cls = AzureBlobStore
|
|
1115
|
-
elif store_type == StoreType.R2:
|
|
1116
|
-
store_cls = R2Store
|
|
1117
1179
|
elif store_type == StoreType.IBM:
|
|
1118
1180
|
store_cls = IBMCosStore
|
|
1119
1181
|
elif store_type == StoreType.OCI:
|
|
1120
1182
|
store_cls = OciStore
|
|
1121
|
-
elif store_type == StoreType.NEBIUS:
|
|
1122
|
-
store_cls = NebiusStore
|
|
1123
1183
|
else:
|
|
1124
1184
|
with ux_utils.print_exception_no_traceback():
|
|
1125
1185
|
raise exceptions.StorageSpecError(
|
|
@@ -1266,6 +1326,17 @@ class Storage(object):
|
|
|
1266
1326
|
if store.is_sky_managed:
|
|
1267
1327
|
global_user_state.set_storage_status(self.name, StorageStatus.READY)
|
|
1268
1328
|
|
|
1329
|
+
@classmethod
|
|
1330
|
+
def from_handle(cls, handle: StorageHandle) -> 'Storage':
|
|
1331
|
+
"""Create Storage from StorageHandle object.
|
|
1332
|
+
"""
|
|
1333
|
+
obj = cls(name=handle.storage_name,
|
|
1334
|
+
source=handle.source,
|
|
1335
|
+
sync_on_reconstruction=False)
|
|
1336
|
+
obj.handle = handle
|
|
1337
|
+
obj._add_store_from_metadata(handle.sky_stores)
|
|
1338
|
+
return obj
|
|
1339
|
+
|
|
1269
1340
|
@classmethod
|
|
1270
1341
|
def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
|
|
1271
1342
|
common_utils.validate_schema(config, schemas.get_storage_schema(),
|
|
@@ -1343,101 +1414,262 @@ class Storage(object):
|
|
|
1343
1414
|
return config
|
|
1344
1415
|
|
|
1345
1416
|
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1417
|
+
# Registry for S3-compatible stores
|
|
1418
|
+
_S3_COMPATIBLE_STORES = {}
|
|
1419
|
+
|
|
1420
|
+
|
|
1421
|
+
def register_s3_compatible_store(store_class):
|
|
1422
|
+
"""Decorator to automatically register S3-compatible stores."""
|
|
1423
|
+
store_type = store_class.get_store_type()
|
|
1424
|
+
_S3_COMPATIBLE_STORES[store_type] = store_class
|
|
1425
|
+
return store_class
|
|
1426
|
+
|
|
1427
|
+
|
|
1428
|
+
@dataclass
|
|
1429
|
+
class S3CompatibleConfig:
|
|
1430
|
+
"""Configuration for S3-compatible storage providers."""
|
|
1431
|
+
# Provider identification
|
|
1432
|
+
store_type: str # Store type identifier (e.g., "S3", "R2", "MINIO")
|
|
1433
|
+
url_prefix: str # URL prefix (e.g., "s3://", "r2://", "minio://")
|
|
1434
|
+
|
|
1435
|
+
# Client creation
|
|
1436
|
+
client_factory: Callable[[Optional[str]], Any]
|
|
1437
|
+
resource_factory: Callable[[str], StorageHandle]
|
|
1438
|
+
split_path: Callable[[str], Tuple[str, str]]
|
|
1439
|
+
verify_bucket: Callable[[str], bool]
|
|
1440
|
+
|
|
1441
|
+
# CLI configuration
|
|
1442
|
+
aws_profile: Optional[str] = None
|
|
1443
|
+
get_endpoint_url: Optional[Callable[[], str]] = None
|
|
1444
|
+
credentials_file: Optional[str] = None
|
|
1445
|
+
config_file: Optional[str] = None
|
|
1446
|
+
extra_cli_args: Optional[List[str]] = None
|
|
1447
|
+
|
|
1448
|
+
# Provider-specific settings
|
|
1449
|
+
cloud_name: str = ''
|
|
1450
|
+
default_region: Optional[str] = None
|
|
1451
|
+
access_denied_message: str = 'Access Denied'
|
|
1452
|
+
|
|
1453
|
+
# Mounting
|
|
1454
|
+
mount_cmd_factory: Optional[Callable] = None
|
|
1455
|
+
mount_cached_cmd_factory: Optional[Callable] = None
|
|
1456
|
+
|
|
1457
|
+
def __post_init__(self):
|
|
1458
|
+
if self.extra_cli_args is None:
|
|
1459
|
+
self.extra_cli_args = []
|
|
1460
|
+
|
|
1461
|
+
|
|
1462
|
+
class S3CompatibleStore(AbstractStore):
|
|
1463
|
+
"""Base class for S3-compatible object storage providers.
|
|
1464
|
+
|
|
1465
|
+
This class provides a unified interface for all S3-compatible storage
|
|
1466
|
+
providers (AWS S3, Cloudflare R2, Nebius, MinIO, CoreWeave, etc.) by
|
|
1467
|
+
leveraging a configuration-driven approach that eliminates code duplication
|
|
1468
|
+
|
|
1469
|
+
## Adding a New S3-Compatible Store
|
|
1470
|
+
|
|
1471
|
+
To add a new S3-compatible storage provider (e.g., MinIO),
|
|
1472
|
+
follow these steps:
|
|
1473
|
+
|
|
1474
|
+
### 1. Add Store Type to Enum
|
|
1475
|
+
First, add your store type to the StoreType enum:
|
|
1476
|
+
```python
|
|
1477
|
+
class StoreType(enum.Enum):
|
|
1478
|
+
# ... existing entries ...
|
|
1479
|
+
MINIO = 'MINIO'
|
|
1480
|
+
```
|
|
1481
|
+
|
|
1482
|
+
### 2. Create Store Class
|
|
1483
|
+
Create a new store class that inherits from S3CompatibleStore:
|
|
1484
|
+
```python
|
|
1485
|
+
@register_s3_compatible_store
|
|
1486
|
+
class MinIOStore(S3CompatibleStore):
|
|
1487
|
+
'''MinIOStore for MinIO object storage.'''
|
|
1488
|
+
|
|
1489
|
+
@classmethod
|
|
1490
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
1491
|
+
'''Return the configuration for MinIO.'''
|
|
1492
|
+
return S3CompatibleConfig(
|
|
1493
|
+
store_type='MINIO',
|
|
1494
|
+
url_prefix='minio://',
|
|
1495
|
+
client_factory=lambda region:\
|
|
1496
|
+
data_utils.create_minio_client(region),
|
|
1497
|
+
resource_factory=lambda name:\
|
|
1498
|
+
minio.resource('s3').Bucket(name),
|
|
1499
|
+
split_path=data_utils.split_minio_path,
|
|
1500
|
+
aws_profile='minio',
|
|
1501
|
+
get_endpoint_url=lambda: minio.get_endpoint_url(),
|
|
1502
|
+
cloud_name='minio',
|
|
1503
|
+
default_region='us-east-1',
|
|
1504
|
+
mount_cmd_factory=mounting_utils.get_minio_mount_cmd,
|
|
1505
|
+
)
|
|
1506
|
+
```
|
|
1507
|
+
|
|
1508
|
+
### 3. Implement Required Utilities
|
|
1509
|
+
Create the necessary utility functions:
|
|
1510
|
+
|
|
1511
|
+
#### In `sky/data/data_utils.py`:
|
|
1512
|
+
```python
|
|
1513
|
+
def create_minio_client(region: Optional[str] = None):
|
|
1514
|
+
'''Create MinIO S3 client.'''
|
|
1515
|
+
return boto3.client('s3',
|
|
1516
|
+
endpoint_url=minio.get_endpoint_url(),
|
|
1517
|
+
aws_access_key_id=minio.get_access_key(),
|
|
1518
|
+
aws_secret_access_key=minio.get_secret_key(),
|
|
1519
|
+
region_name=region or 'us-east-1')
|
|
1520
|
+
|
|
1521
|
+
def split_minio_path(minio_path: str) -> Tuple[str, str]:
|
|
1522
|
+
'''Split minio://bucket/key into (bucket, key).'''
|
|
1523
|
+
path_parts = minio_path.replace('minio://', '').split('/', 1)
|
|
1524
|
+
bucket = path_parts[0]
|
|
1525
|
+
key = path_parts[1] if len(path_parts) > 1 else ''
|
|
1526
|
+
return bucket, key
|
|
1527
|
+
```
|
|
1528
|
+
|
|
1529
|
+
#### In `sky/utils/mounting_utils.py`:
|
|
1530
|
+
```python
|
|
1531
|
+
def get_minio_mount_cmd(profile: str, bucket_name: str, endpoint_url: str,
|
|
1532
|
+
mount_path: str,
|
|
1533
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
1534
|
+
'''Generate MinIO mount command using s3fs.'''
|
|
1535
|
+
# Implementation similar to other S3-compatible mount commands
|
|
1536
|
+
pass
|
|
1537
|
+
```
|
|
1538
|
+
|
|
1539
|
+
### 4. Create Adapter Module (if needed)
|
|
1540
|
+
Create `sky/adaptors/minio.py` for MinIO-specific configuration:
|
|
1541
|
+
```python
|
|
1542
|
+
'''MinIO adapter for SkyPilot.'''
|
|
1543
|
+
|
|
1544
|
+
MINIO_PROFILE_NAME = 'minio'
|
|
1545
|
+
|
|
1546
|
+
def get_endpoint_url() -> str:
|
|
1547
|
+
'''Get MinIO endpoint URL from configuration.'''
|
|
1548
|
+
# Read from ~/.minio/config or environment variables
|
|
1549
|
+
pass
|
|
1550
|
+
|
|
1551
|
+
def resource(resource_name: str):
|
|
1552
|
+
'''Get MinIO resource.'''
|
|
1553
|
+
# Implementation for creating MinIO resources
|
|
1554
|
+
pass
|
|
1555
|
+
```
|
|
1556
|
+
|
|
1349
1557
|
"""
|
|
1350
1558
|
|
|
1351
|
-
_DEFAULT_REGION = 'us-east-1'
|
|
1352
1559
|
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
1353
|
-
_CUSTOM_ENDPOINT_REGIONS = [
|
|
1354
|
-
'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
|
|
1355
|
-
'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
|
|
1356
|
-
'il-central-1'
|
|
1357
|
-
]
|
|
1358
1560
|
|
|
1359
1561
|
def __init__(self,
|
|
1360
1562
|
name: str,
|
|
1361
1563
|
source: str,
|
|
1362
|
-
region: Optional[str] =
|
|
1564
|
+
region: Optional[str] = None,
|
|
1363
1565
|
is_sky_managed: Optional[bool] = None,
|
|
1364
1566
|
sync_on_reconstruction: bool = True,
|
|
1365
1567
|
_bucket_sub_path: Optional[str] = None):
|
|
1568
|
+
# Initialize configuration first to get defaults
|
|
1569
|
+
self.config = self.__class__.get_config()
|
|
1570
|
+
|
|
1571
|
+
# Use provider's default region if not specified
|
|
1572
|
+
if region is None:
|
|
1573
|
+
region = self.config.default_region
|
|
1574
|
+
|
|
1575
|
+
# Initialize S3CompatibleStore specific attributes
|
|
1366
1576
|
self.client: 'mypy_boto3_s3.Client'
|
|
1367
1577
|
self.bucket: 'StorageHandle'
|
|
1368
|
-
|
|
1369
|
-
#
|
|
1370
|
-
# We should eventually make all opt-in regions also work for S3 by
|
|
1371
|
-
# passing the right endpoint flags.
|
|
1372
|
-
if region in self._CUSTOM_ENDPOINT_REGIONS:
|
|
1373
|
-
logger.warning('AWS opt-in regions are not supported for S3. '
|
|
1374
|
-
f'Falling back to default region '
|
|
1375
|
-
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
|
1376
|
-
region = self._DEFAULT_REGION
|
|
1578
|
+
|
|
1579
|
+
# Call parent constructor
|
|
1377
1580
|
super().__init__(name, source, region, is_sky_managed,
|
|
1378
1581
|
sync_on_reconstruction, _bucket_sub_path)
|
|
1379
1582
|
|
|
1583
|
+
@classmethod
|
|
1584
|
+
@abstractmethod
|
|
1585
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
1586
|
+
"""Return the configuration for this S3-compatible provider."""
|
|
1587
|
+
pass
|
|
1588
|
+
|
|
1589
|
+
@classmethod
|
|
1590
|
+
def get_store_type(cls) -> str:
|
|
1591
|
+
"""Return the store type identifier from configuration."""
|
|
1592
|
+
return cls.get_config().store_type
|
|
1593
|
+
|
|
1594
|
+
@property
|
|
1595
|
+
def provider_prefixes(self) -> set:
|
|
1596
|
+
"""Dynamically get all provider prefixes from registered stores."""
|
|
1597
|
+
prefixes = set()
|
|
1598
|
+
|
|
1599
|
+
# Get prefixes from all registered S3-compatible stores
|
|
1600
|
+
for store_class in _S3_COMPATIBLE_STORES.values():
|
|
1601
|
+
config = store_class.get_config()
|
|
1602
|
+
prefixes.add(config.url_prefix)
|
|
1603
|
+
|
|
1604
|
+
# Add hardcoded prefixes for non-S3-compatible stores
|
|
1605
|
+
prefixes.update({
|
|
1606
|
+
'gs://', # GCS
|
|
1607
|
+
'https://', # Azure
|
|
1608
|
+
'cos://', # IBM COS
|
|
1609
|
+
'oci://', # OCI
|
|
1610
|
+
})
|
|
1611
|
+
|
|
1612
|
+
return prefixes
|
|
1613
|
+
|
|
1380
1614
|
def _validate(self):
|
|
1381
1615
|
if self.source is not None and isinstance(self.source, str):
|
|
1382
|
-
if self.source.startswith(
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
'
|
|
1616
|
+
if self.source.startswith(self.config.url_prefix):
|
|
1617
|
+
bucket_name, _ = self.config.split_path(self.source)
|
|
1618
|
+
assert self.name == bucket_name, (
|
|
1619
|
+
f'{self.config.store_type} Bucket is specified as path, '
|
|
1620
|
+
f'the name should be the same as {self.config.store_type} '
|
|
1621
|
+
f'bucket.')
|
|
1622
|
+
# Only verify if this is NOT the same store type as the source
|
|
1623
|
+
if self.__class__.get_store_type() != self.config.store_type:
|
|
1624
|
+
assert self.config.verify_bucket(self.name), (
|
|
1625
|
+
f'Source specified as {self.source},'
|
|
1626
|
+
f'a {self.config.store_type} '
|
|
1627
|
+
f'bucket. {self.config.store_type} Bucket should exist.'
|
|
1628
|
+
)
|
|
1386
1629
|
elif self.source.startswith('gs://'):
|
|
1387
1630
|
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
|
1388
1631
|
'GCS Bucket is specified as path, the name should be '
|
|
1389
1632
|
'the same as GCS bucket.')
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1633
|
+
if not isinstance(self, GcsStore):
|
|
1634
|
+
assert data_utils.verify_gcs_bucket(self.name), (
|
|
1635
|
+
f'Source specified as {self.source}, a GCS bucket. ',
|
|
1636
|
+
'GCS Bucket should exist.')
|
|
1393
1637
|
elif data_utils.is_az_container_endpoint(self.source):
|
|
1394
1638
|
storage_account_name, container_name, _ = (
|
|
1395
1639
|
data_utils.split_az_path(self.source))
|
|
1396
1640
|
assert self.name == container_name, (
|
|
1397
1641
|
'Azure bucket is specified as path, the name should be '
|
|
1398
1642
|
'the same as Azure bucket.')
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1643
|
+
if not isinstance(self, AzureBlobStore):
|
|
1644
|
+
assert data_utils.verify_az_bucket(
|
|
1645
|
+
storage_account_name, self.name
|
|
1646
|
+
), (f'Source specified as {self.source}, an Azure bucket. '
|
|
1402
1647
|
'Azure bucket should exist.')
|
|
1403
|
-
elif self.source.startswith('r2://'):
|
|
1404
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
1405
|
-
'R2 Bucket is specified as path, the name should be '
|
|
1406
|
-
'the same as R2 bucket.')
|
|
1407
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
1408
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
1409
|
-
'R2 Bucket should exist.')
|
|
1410
|
-
elif self.source.startswith('nebius://'):
|
|
1411
|
-
assert self.name == data_utils.split_nebius_path(
|
|
1412
|
-
self.source)[0], (
|
|
1413
|
-
'Nebius Object Storage is specified as path, the name '
|
|
1414
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
1415
|
-
assert data_utils.verify_nebius_bucket(self.name), (
|
|
1416
|
-
f'Source specified as {self.source}, a Nebius Object '
|
|
1417
|
-
f'Storage bucket. Nebius Object Storage Bucket should'
|
|
1418
|
-
f' exist.')
|
|
1419
1648
|
elif self.source.startswith('cos://'):
|
|
1420
1649
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
1421
1650
|
'COS Bucket is specified as path, the name should be '
|
|
1422
1651
|
'the same as COS bucket.')
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1652
|
+
if not isinstance(self, IBMCosStore):
|
|
1653
|
+
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
|
1654
|
+
f'Source specified as {self.source}, a COS bucket. ',
|
|
1655
|
+
'COS Bucket should exist.')
|
|
1426
1656
|
elif self.source.startswith('oci://'):
|
|
1427
1657
|
raise NotImplementedError(
|
|
1428
|
-
'Moving data from OCI to
|
|
1658
|
+
f'Moving data from OCI to {self.source} is ',
|
|
1659
|
+
'currently not supported.')
|
|
1660
|
+
|
|
1429
1661
|
# Validate name
|
|
1430
1662
|
self.name = self.validate_name(self.name)
|
|
1431
1663
|
|
|
1432
1664
|
# Check if the storage is enabled
|
|
1433
|
-
if not _is_storage_cloud_enabled(
|
|
1665
|
+
if not _is_storage_cloud_enabled(self.config.cloud_name):
|
|
1434
1666
|
with ux_utils.print_exception_no_traceback():
|
|
1435
1667
|
raise exceptions.ResourcesUnavailableError(
|
|
1436
|
-
'Storage
|
|
1437
|
-
'
|
|
1438
|
-
'
|
|
1439
|
-
'
|
|
1440
|
-
)
|
|
1668
|
+
f'Storage "store: {self.config.store_type.lower()}" '
|
|
1669
|
+
f'specified, but '
|
|
1670
|
+
f'{self.config.cloud_name} access is disabled. '
|
|
1671
|
+
'To fix, enable '
|
|
1672
|
+
f'{self.config.cloud_name} by running `sky check`.')
|
|
1441
1673
|
|
|
1442
1674
|
@classmethod
|
|
1443
1675
|
def validate_name(cls, name: str) -> str:
|
|
@@ -1509,7 +1741,7 @@ class S3Store(AbstractStore):
|
|
|
1509
1741
|
StorageBucketGetError: If fetching existing bucket fails
|
|
1510
1742
|
StorageInitError: If general initialization fails.
|
|
1511
1743
|
"""
|
|
1512
|
-
self.client =
|
|
1744
|
+
self.client = self.config.client_factory(self.region)
|
|
1513
1745
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
1514
1746
|
if self.is_sky_managed is None:
|
|
1515
1747
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -1531,16 +1763,10 @@ class S3Store(AbstractStore):
|
|
|
1531
1763
|
if isinstance(self.source, list):
|
|
1532
1764
|
self.batch_aws_rsync(self.source, create_dirs=True)
|
|
1533
1765
|
elif self.source is not None:
|
|
1534
|
-
if self.
|
|
1535
|
-
pass
|
|
1536
|
-
elif self.
|
|
1537
|
-
self.
|
|
1538
|
-
elif self.source.startswith('r2://'):
|
|
1539
|
-
self._transfer_to_s3()
|
|
1540
|
-
elif self.source.startswith('oci://'):
|
|
1541
|
-
self._transfer_to_s3()
|
|
1542
|
-
elif self.source.startswith('nebius://'):
|
|
1543
|
-
self._transfer_to_s3()
|
|
1766
|
+
if self._is_same_provider_source():
|
|
1767
|
+
pass # No transfer needed
|
|
1768
|
+
elif self._needs_cross_provider_transfer():
|
|
1769
|
+
self._transfer_from_other_provider()
|
|
1544
1770
|
else:
|
|
1545
1771
|
self.batch_aws_rsync([self.source])
|
|
1546
1772
|
except exceptions.StorageUploadError:
|
|
@@ -1549,57 +1775,94 @@ class S3Store(AbstractStore):
|
|
|
1549
1775
|
raise exceptions.StorageUploadError(
|
|
1550
1776
|
f'Upload failed for store {self.name}') from e
|
|
1551
1777
|
|
|
1778
|
+
def _is_same_provider_source(self) -> bool:
|
|
1779
|
+
"""Check if source is from the same provider."""
|
|
1780
|
+
return isinstance(self.source, str) and self.source.startswith(
|
|
1781
|
+
self.config.url_prefix)
|
|
1782
|
+
|
|
1783
|
+
def _needs_cross_provider_transfer(self) -> bool:
|
|
1784
|
+
"""Check if source needs cross-provider transfer."""
|
|
1785
|
+
if not isinstance(self.source, str):
|
|
1786
|
+
return False
|
|
1787
|
+
return any(
|
|
1788
|
+
self.source.startswith(prefix) for prefix in self.provider_prefixes)
|
|
1789
|
+
|
|
1790
|
+
def _detect_source_type(self) -> str:
|
|
1791
|
+
"""Detect the source provider type from URL."""
|
|
1792
|
+
if not isinstance(self.source, str):
|
|
1793
|
+
return 'unknown'
|
|
1794
|
+
|
|
1795
|
+
for provider in self.provider_prefixes:
|
|
1796
|
+
if self.source.startswith(provider):
|
|
1797
|
+
return provider[:-len('://')]
|
|
1798
|
+
return ''
|
|
1799
|
+
|
|
1800
|
+
def _transfer_from_other_provider(self):
|
|
1801
|
+
"""Transfer data from another cloud to this S3-compatible store."""
|
|
1802
|
+
source_type = self._detect_source_type()
|
|
1803
|
+
target_type = self.config.store_type.lower()
|
|
1804
|
+
|
|
1805
|
+
if hasattr(data_transfer, f'{source_type}_to_{target_type}'):
|
|
1806
|
+
transfer_func = getattr(data_transfer,
|
|
1807
|
+
f'{source_type}_to_{target_type}')
|
|
1808
|
+
transfer_func(self.name, self.name)
|
|
1809
|
+
else:
|
|
1810
|
+
with ux_utils.print_exception_no_traceback():
|
|
1811
|
+
raise NotImplementedError(
|
|
1812
|
+
f'Transfer from {source_type} to {target_type} '
|
|
1813
|
+
'is not yet supported.')
|
|
1814
|
+
|
|
1552
1815
|
def delete(self) -> None:
|
|
1816
|
+
"""Delete the bucket or sub-path."""
|
|
1553
1817
|
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
1554
1818
|
return self._delete_sub_path()
|
|
1555
1819
|
|
|
1556
|
-
deleted_by_skypilot = self.
|
|
1820
|
+
deleted_by_skypilot = self._delete_bucket(self.name)
|
|
1821
|
+
provider = self.config.store_type
|
|
1557
1822
|
if deleted_by_skypilot:
|
|
1558
|
-
msg_str = f'Deleted
|
|
1823
|
+
msg_str = f'Deleted {provider} bucket {self.name}.'
|
|
1559
1824
|
else:
|
|
1560
|
-
msg_str = f'
|
|
1825
|
+
msg_str = f'{provider} bucket {self.name} may have been deleted ' \
|
|
1561
1826
|
f'externally. Removing from local state.'
|
|
1562
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
1563
|
-
f'{colorama.Style.RESET_ALL}')
|
|
1564
|
-
|
|
1565
|
-
def _delete_sub_path(self) -> None:
|
|
1566
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
1567
|
-
deleted_by_skypilot = self._delete_s3_bucket_sub_path(
|
|
1568
|
-
self.name, self._bucket_sub_path)
|
|
1569
|
-
if deleted_by_skypilot:
|
|
1570
|
-
msg_str = f'Removed objects from S3 bucket ' \
|
|
1571
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
1572
|
-
else:
|
|
1573
|
-
msg_str = f'Failed to remove objects from S3 bucket ' \
|
|
1574
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
1575
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
1576
|
-
f'{colorama.Style.RESET_ALL}')
|
|
1827
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
|
|
1577
1828
|
|
|
1578
1829
|
def get_handle(self) -> StorageHandle:
|
|
1579
|
-
|
|
1830
|
+
"""Get storage handle using provider's resource factory."""
|
|
1831
|
+
return self.config.resource_factory(self.name)
|
|
1580
1832
|
|
|
1581
|
-
def
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
"""Invokes aws s3 sync to batch upload a list of local paths to S3
|
|
1833
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
1834
|
+
"""Download file using S3 API."""
|
|
1835
|
+
self.bucket.download_file(remote_path, local_path)
|
|
1585
1836
|
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1837
|
+
def mount_command(self, mount_path: str) -> str:
|
|
1838
|
+
"""Get mount command using provider's mount factory."""
|
|
1839
|
+
if self.config.mount_cmd_factory is None:
|
|
1840
|
+
raise exceptions.NotSupportedError(
|
|
1841
|
+
f'Mounting not supported for {self.config.store_type}')
|
|
1589
1842
|
|
|
1590
|
-
|
|
1591
|
-
|
|
1843
|
+
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
1844
|
+
mount_cmd = self.config.mount_cmd_factory(self.bucket.name, mount_path,
|
|
1845
|
+
self._bucket_sub_path)
|
|
1846
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1847
|
+
mount_cmd)
|
|
1592
1848
|
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1849
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
1850
|
+
"""Get cached mount command. Can be overridden by subclasses."""
|
|
1851
|
+
if self.config.mount_cached_cmd_factory is None:
|
|
1852
|
+
raise exceptions.NotSupportedError(
|
|
1853
|
+
f'Cached mounting not supported for {self.config.store_type}')
|
|
1854
|
+
|
|
1855
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
1856
|
+
mount_cmd = self.config.mount_cached_cmd_factory(
|
|
1857
|
+
self.bucket.name, mount_path, self._bucket_sub_path)
|
|
1858
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1859
|
+
mount_cmd)
|
|
1860
|
+
|
|
1861
|
+
def batch_aws_rsync(self,
|
|
1862
|
+
source_path_list: List[Path],
|
|
1863
|
+
create_dirs: bool = False) -> None:
|
|
1864
|
+
"""Generic S3-compatible rsync using AWS CLI."""
|
|
1865
|
+
sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
|
|
1603
1866
|
|
|
1604
1867
|
def get_file_sync_command(base_dir_path, file_names):
|
|
1605
1868
|
includes = ' '.join([
|
|
@@ -1607,10 +1870,31 @@ class S3Store(AbstractStore):
|
|
|
1607
1870
|
for file_name in file_names
|
|
1608
1871
|
])
|
|
1609
1872
|
base_dir_path = shlex.quote(base_dir_path)
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1873
|
+
|
|
1874
|
+
# Build AWS CLI command with provider-specific configuration
|
|
1875
|
+
cmd_parts = ['aws s3 sync --no-follow-symlinks --exclude="*"']
|
|
1876
|
+
cmd_parts.append(f'{includes} {base_dir_path}')
|
|
1877
|
+
cmd_parts.append(f's3://{self.name}{sub_path}')
|
|
1878
|
+
|
|
1879
|
+
# Add provider-specific arguments
|
|
1880
|
+
if self.config.get_endpoint_url:
|
|
1881
|
+
cmd_parts.append(
|
|
1882
|
+
f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
1883
|
+
if self.config.aws_profile:
|
|
1884
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
1885
|
+
if self.config.extra_cli_args:
|
|
1886
|
+
cmd_parts.extend(self.config.extra_cli_args)
|
|
1887
|
+
|
|
1888
|
+
# Handle credentials file via environment
|
|
1889
|
+
cmd = ' '.join(cmd_parts)
|
|
1890
|
+
if self.config.credentials_file:
|
|
1891
|
+
cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
|
|
1892
|
+
f'{self.config.credentials_file} {cmd}'
|
|
1893
|
+
if self.config.config_file:
|
|
1894
|
+
cmd = 'AWS_CONFIG_FILE=' + \
|
|
1895
|
+
f'{self.config.config_file} {cmd}'
|
|
1896
|
+
|
|
1897
|
+
return cmd
|
|
1614
1898
|
|
|
1615
1899
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
1616
1900
|
# we exclude .git directory from the sync
|
|
@@ -1618,11 +1902,11 @@ class S3Store(AbstractStore):
|
|
|
1618
1902
|
excluded_list.append('.git/*')
|
|
1619
1903
|
|
|
1620
1904
|
# Process exclusion patterns to make them work correctly with aws
|
|
1621
|
-
# s3 sync
|
|
1905
|
+
# s3 sync - this logic is from S3Store2 to ensure compatibility
|
|
1622
1906
|
processed_excludes = []
|
|
1623
1907
|
for excluded_path in excluded_list:
|
|
1624
1908
|
# Check if the path is a directory exclusion pattern
|
|
1625
|
-
# For AWS S3 sync, directory patterns need to end with "
|
|
1909
|
+
# For AWS S3 sync, directory patterns need to end with "/*" to
|
|
1626
1910
|
# exclude all contents
|
|
1627
1911
|
if (excluded_path.endswith('/') or os.path.isdir(
|
|
1628
1912
|
os.path.join(src_dir_path, excluded_path.rstrip('/')))):
|
|
@@ -1637,10 +1921,28 @@ class S3Store(AbstractStore):
|
|
|
1637
1921
|
for file_name in processed_excludes
|
|
1638
1922
|
])
|
|
1639
1923
|
src_dir_path = shlex.quote(src_dir_path)
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1924
|
+
|
|
1925
|
+
cmd_parts = ['aws s3 sync --no-follow-symlinks']
|
|
1926
|
+
cmd_parts.append(f'{excludes} {src_dir_path}')
|
|
1927
|
+
cmd_parts.append(f's3://{self.name}{sub_path}/{dest_dir_name}')
|
|
1928
|
+
|
|
1929
|
+
if self.config.get_endpoint_url:
|
|
1930
|
+
cmd_parts.append(
|
|
1931
|
+
f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
1932
|
+
if self.config.aws_profile:
|
|
1933
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
1934
|
+
if self.config.extra_cli_args:
|
|
1935
|
+
cmd_parts.extend(self.config.extra_cli_args)
|
|
1936
|
+
|
|
1937
|
+
cmd = ' '.join(cmd_parts)
|
|
1938
|
+
if self.config.credentials_file:
|
|
1939
|
+
cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
|
|
1940
|
+
f'{self.config.credentials_file} {cmd}'
|
|
1941
|
+
if self.config.config_file:
|
|
1942
|
+
cmd = 'AWS_CONFIG_FILE=' + \
|
|
1943
|
+
f'{self.config.config_file} {cmd}'
|
|
1944
|
+
|
|
1945
|
+
return cmd
|
|
1644
1946
|
|
|
1645
1947
|
# Generate message for upload
|
|
1646
1948
|
if len(source_path_list) > 1:
|
|
@@ -1648,9 +1950,12 @@ class S3Store(AbstractStore):
|
|
|
1648
1950
|
else:
|
|
1649
1951
|
source_message = source_path_list[0]
|
|
1650
1952
|
|
|
1953
|
+
provider_prefix = self.config.url_prefix
|
|
1651
1954
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
1652
1955
|
_STORAGE_LOG_FILE_NAME)
|
|
1653
|
-
sync_path = f'{source_message} ->
|
|
1956
|
+
sync_path = (f'{source_message} -> '
|
|
1957
|
+
f'{provider_prefix}{self.name}{sub_path}/')
|
|
1958
|
+
|
|
1654
1959
|
with rich_utils.safe_status(
|
|
1655
1960
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
1656
1961
|
log_path=log_path)):
|
|
@@ -1660,150 +1965,81 @@ class S3Store(AbstractStore):
|
|
|
1660
1965
|
get_dir_sync_command,
|
|
1661
1966
|
log_path,
|
|
1662
1967
|
self.name,
|
|
1663
|
-
self.
|
|
1968
|
+
self.config.access_denied_message,
|
|
1664
1969
|
create_dirs=create_dirs,
|
|
1665
1970
|
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
|
1971
|
+
|
|
1666
1972
|
logger.info(
|
|
1667
1973
|
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
1668
1974
|
log_path))
|
|
1669
1975
|
|
|
1670
|
-
def _transfer_to_s3(self) -> None:
|
|
1671
|
-
assert isinstance(self.source, str), self.source
|
|
1672
|
-
if self.source.startswith('gs://'):
|
|
1673
|
-
data_transfer.gcs_to_s3(self.name, self.name)
|
|
1674
|
-
elif self.source.startswith('r2://'):
|
|
1675
|
-
data_transfer.r2_to_s3(self.name, self.name)
|
|
1676
|
-
|
|
1677
1976
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
1678
|
-
"""
|
|
1679
|
-
|
|
1680
|
-
If the bucket exists, this method will return the bucket.
|
|
1681
|
-
If the bucket does not exist, there are three cases:
|
|
1682
|
-
1) Raise an error if the bucket source starts with s3://
|
|
1683
|
-
2) Return None if bucket has been externally deleted and
|
|
1684
|
-
sync_on_reconstruction is False
|
|
1685
|
-
3) Create and return a new bucket otherwise
|
|
1686
|
-
|
|
1687
|
-
Raises:
|
|
1688
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
1689
|
-
mounted without specifying storage source.
|
|
1690
|
-
StorageBucketCreateError: If creating the bucket fails
|
|
1691
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
1692
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
1693
|
-
attempted to be fetched while reconstructing the storage for
|
|
1694
|
-
'sky storage delete' or 'sky start'
|
|
1695
|
-
"""
|
|
1696
|
-
s3 = aws.resource('s3')
|
|
1697
|
-
bucket = s3.Bucket(self.name)
|
|
1977
|
+
"""Get or create bucket using S3 API."""
|
|
1978
|
+
bucket = self.config.resource_factory(self.name)
|
|
1698
1979
|
|
|
1699
1980
|
try:
|
|
1700
1981
|
# Try Public bucket case.
|
|
1701
|
-
# This line does not error out if the bucket is an external public
|
|
1702
|
-
# bucket or if it is a user's bucket that is publicly
|
|
1703
|
-
# accessible.
|
|
1704
1982
|
self.client.head_bucket(Bucket=self.name)
|
|
1705
1983
|
self._validate_existing_bucket()
|
|
1706
1984
|
return bucket, False
|
|
1707
1985
|
except aws.botocore_exceptions().ClientError as e:
|
|
1708
1986
|
error_code = e.response['Error']['Code']
|
|
1709
|
-
# AccessDenied error for buckets that are private and not owned by
|
|
1710
|
-
# user.
|
|
1711
1987
|
if error_code == '403':
|
|
1712
|
-
command = f'aws s3 ls {self.name}'
|
|
1988
|
+
command = f'aws s3 ls s3://{self.name}'
|
|
1989
|
+
if self.config.aws_profile:
|
|
1990
|
+
command += f' --profile={self.config.aws_profile}'
|
|
1991
|
+
if self.config.get_endpoint_url:
|
|
1992
|
+
command += f' --endpoint-url '\
|
|
1993
|
+
f'{self.config.get_endpoint_url()}'
|
|
1994
|
+
if self.config.credentials_file:
|
|
1995
|
+
command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
1996
|
+
f'{self.config.credentials_file} {command}')
|
|
1997
|
+
if self.config.config_file:
|
|
1998
|
+
command = 'AWS_CONFIG_FILE=' + \
|
|
1999
|
+
f'{self.config.config_file} {command}'
|
|
1713
2000
|
with ux_utils.print_exception_no_traceback():
|
|
1714
2001
|
raise exceptions.StorageBucketGetError(
|
|
1715
2002
|
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
1716
2003
|
f' To debug, consider running `{command}`.') from e
|
|
1717
2004
|
|
|
1718
|
-
if isinstance(self.source, str) and self.source.startswith(
|
|
2005
|
+
if isinstance(self.source, str) and self.source.startswith(
|
|
2006
|
+
self.config.url_prefix):
|
|
1719
2007
|
with ux_utils.print_exception_no_traceback():
|
|
1720
2008
|
raise exceptions.StorageBucketGetError(
|
|
1721
2009
|
'Attempted to use a non-existent bucket as a source: '
|
|
1722
|
-
f'{self.source}.
|
|
1723
|
-
f'{self.source}` to debug.')
|
|
2010
|
+
f'{self.source}.')
|
|
1724
2011
|
|
|
1725
|
-
# If bucket cannot be found
|
|
1726
|
-
# the bucket is to be created by Sky. However, creation is skipped if
|
|
1727
|
-
# Store object is being reconstructed for deletion or re-mount with
|
|
1728
|
-
# sky start, and error is raised instead.
|
|
2012
|
+
# If bucket cannot be found, create it if needed
|
|
1729
2013
|
if self.sync_on_reconstruction:
|
|
1730
|
-
bucket = self.
|
|
2014
|
+
bucket = self._create_bucket(self.name)
|
|
1731
2015
|
return bucket, True
|
|
1732
2016
|
else:
|
|
1733
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
1734
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
1735
|
-
# is already removed externally.
|
|
1736
2017
|
raise exceptions.StorageExternalDeletionError(
|
|
1737
2018
|
'Attempted to fetch a non-existent bucket: '
|
|
1738
2019
|
f'{self.name}')
|
|
1739
2020
|
|
|
1740
|
-
def
|
|
1741
|
-
"""
|
|
1742
|
-
using the boto3 API
|
|
1743
|
-
|
|
1744
|
-
Args:
|
|
1745
|
-
remote_path: str; Remote path on S3 bucket
|
|
1746
|
-
local_path: str; Local path on user's device
|
|
1747
|
-
"""
|
|
1748
|
-
self.bucket.download_file(remote_path, local_path)
|
|
1749
|
-
|
|
1750
|
-
def mount_command(self, mount_path: str) -> str:
|
|
1751
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
1752
|
-
|
|
1753
|
-
Uses goofys to mount the bucket.
|
|
1754
|
-
|
|
1755
|
-
Args:
|
|
1756
|
-
mount_path: str; Path to mount the bucket to.
|
|
1757
|
-
"""
|
|
1758
|
-
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
1759
|
-
mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
|
|
1760
|
-
mount_path,
|
|
1761
|
-
self._bucket_sub_path)
|
|
1762
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1763
|
-
mount_cmd)
|
|
1764
|
-
|
|
1765
|
-
def mount_cached_command(self, mount_path: str) -> str:
|
|
1766
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
1767
|
-
rclone_profile_name = (
|
|
1768
|
-
data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
|
|
1769
|
-
rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
|
|
1770
|
-
rclone_profile_name=rclone_profile_name)
|
|
1771
|
-
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
1772
|
-
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
1773
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1774
|
-
mount_cached_cmd)
|
|
1775
|
-
|
|
1776
|
-
def _create_s3_bucket(self,
|
|
1777
|
-
bucket_name: str,
|
|
1778
|
-
region=_DEFAULT_REGION) -> StorageHandle:
|
|
1779
|
-
"""Creates S3 bucket with specific name in specific region
|
|
1780
|
-
|
|
1781
|
-
Args:
|
|
1782
|
-
bucket_name: str; Name of bucket
|
|
1783
|
-
region: str; Region name, e.g. us-west-1, us-east-2
|
|
1784
|
-
Raises:
|
|
1785
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
1786
|
-
"""
|
|
1787
|
-
s3_client = self.client
|
|
2021
|
+
def _create_bucket(self, bucket_name: str) -> StorageHandle:
|
|
2022
|
+
"""Create bucket using S3 API."""
|
|
1788
2023
|
try:
|
|
1789
2024
|
create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
|
|
1790
|
-
|
|
1791
|
-
# the LocationConstraint must not be specified.
|
|
1792
|
-
# Reference: https://stackoverflow.com/a/51912090
|
|
1793
|
-
if region is not None and region != 'us-east-1':
|
|
2025
|
+
if self.region is not None and self.region != 'us-east-1':
|
|
1794
2026
|
create_bucket_config['CreateBucketConfiguration'] = {
|
|
1795
|
-
'LocationConstraint': region
|
|
2027
|
+
'LocationConstraint': self.region
|
|
1796
2028
|
}
|
|
1797
|
-
|
|
2029
|
+
self.client.create_bucket(**create_bucket_config)
|
|
1798
2030
|
logger.info(
|
|
1799
2031
|
f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
|
|
1800
|
-
f'{region or "us-east-1"}{colorama.Style.RESET_ALL}')
|
|
2032
|
+
f'{self.region or "us-east-1"}{colorama.Style.RESET_ALL}')
|
|
1801
2033
|
|
|
1802
2034
|
# Add AWS tags configured in config.yaml to the bucket.
|
|
1803
2035
|
# This is useful for cost tracking and external cleanup.
|
|
1804
|
-
bucket_tags = skypilot_config.
|
|
2036
|
+
bucket_tags = skypilot_config.get_effective_region_config(
|
|
2037
|
+
cloud=self.config.cloud_name,
|
|
2038
|
+
region=None,
|
|
2039
|
+
keys=('labels',),
|
|
2040
|
+
default_value={})
|
|
1805
2041
|
if bucket_tags:
|
|
1806
|
-
|
|
2042
|
+
self.client.put_bucket_tagging(
|
|
1807
2043
|
Bucket=bucket_name,
|
|
1808
2044
|
Tagging={
|
|
1809
2045
|
'TagSet': [{
|
|
@@ -1811,22 +2047,46 @@ class S3Store(AbstractStore):
|
|
|
1811
2047
|
'Value': v
|
|
1812
2048
|
} for k, v in bucket_tags.items()]
|
|
1813
2049
|
})
|
|
1814
|
-
|
|
1815
2050
|
except aws.botocore_exceptions().ClientError as e:
|
|
1816
2051
|
with ux_utils.print_exception_no_traceback():
|
|
1817
2052
|
raise exceptions.StorageBucketCreateError(
|
|
1818
|
-
f'Attempted to create
|
|
2053
|
+
f'Attempted to create S3 bucket {self.name} but failed.'
|
|
1819
2054
|
) from e
|
|
1820
|
-
return
|
|
2055
|
+
return self.config.resource_factory(bucket_name)
|
|
2056
|
+
|
|
2057
|
+
def _delete_bucket(self, bucket_name: str) -> bool:
|
|
2058
|
+
"""Delete bucket using AWS CLI."""
|
|
2059
|
+
cmd_parts = [f'aws s3 rb s3://{bucket_name} --force']
|
|
2060
|
+
|
|
2061
|
+
if self.config.aws_profile:
|
|
2062
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
2063
|
+
if self.config.get_endpoint_url:
|
|
2064
|
+
cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
2065
|
+
|
|
2066
|
+
remove_command = ' '.join(cmd_parts)
|
|
2067
|
+
|
|
2068
|
+
if self.config.credentials_file:
|
|
2069
|
+
remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
2070
|
+
f'{self.config.credentials_file} '
|
|
2071
|
+
f'{remove_command}')
|
|
2072
|
+
if self.config.config_file:
|
|
2073
|
+
remove_command = 'AWS_CONFIG_FILE=' + \
|
|
2074
|
+
f'{self.config.config_file} {remove_command}'
|
|
2075
|
+
return self._execute_remove_command(
|
|
2076
|
+
remove_command, bucket_name,
|
|
2077
|
+
f'Deleting {self.config.store_type} bucket {bucket_name}',
|
|
2078
|
+
(f'Failed to delete {self.config.store_type} bucket '
|
|
2079
|
+
f'{bucket_name}.'))
|
|
1821
2080
|
|
|
1822
|
-
def
|
|
1823
|
-
|
|
1824
|
-
|
|
2081
|
+
def _execute_remove_command(self, command: str, bucket_name: str,
|
|
2082
|
+
hint_operating: str, hint_failed: str) -> bool:
|
|
2083
|
+
"""Execute bucket removal command."""
|
|
1825
2084
|
try:
|
|
1826
2085
|
with rich_utils.safe_status(
|
|
1827
2086
|
ux_utils.spinner_message(hint_operating)):
|
|
1828
|
-
subprocess.check_output(command
|
|
1829
|
-
stderr=subprocess.STDOUT
|
|
2087
|
+
subprocess.check_output(command,
|
|
2088
|
+
stderr=subprocess.STDOUT,
|
|
2089
|
+
shell=True)
|
|
1830
2090
|
except subprocess.CalledProcessError as e:
|
|
1831
2091
|
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
|
1832
2092
|
logger.debug(
|
|
@@ -1840,53 +2100,50 @@ class S3Store(AbstractStore):
|
|
|
1840
2100
|
f'Detailed error: {e.output}')
|
|
1841
2101
|
return True
|
|
1842
2102
|
|
|
1843
|
-
def
|
|
1844
|
-
"""
|
|
2103
|
+
def _delete_sub_path(self) -> None:
|
|
2104
|
+
"""Remove objects from the sub path in the bucket."""
|
|
2105
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
2106
|
+
deleted_by_skypilot = self._delete_bucket_sub_path(
|
|
2107
|
+
self.name, self._bucket_sub_path)
|
|
2108
|
+
provider = self.config.store_type
|
|
2109
|
+
if deleted_by_skypilot:
|
|
2110
|
+
msg_str = (f'Removed objects from {provider} bucket '
|
|
2111
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
|
2112
|
+
else:
|
|
2113
|
+
msg_str = (f'Failed to remove objects from {provider} bucket '
|
|
2114
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
|
2115
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
|
|
2116
|
+
|
|
2117
|
+
def _delete_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
|
|
2118
|
+
"""Delete objects in the sub path from the bucket."""
|
|
2119
|
+
cmd_parts = [f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive']
|
|
2120
|
+
|
|
2121
|
+
if self.config.aws_profile:
|
|
2122
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
2123
|
+
if self.config.get_endpoint_url:
|
|
2124
|
+
cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
2125
|
+
|
|
2126
|
+
remove_command = ' '.join(cmd_parts)
|
|
2127
|
+
|
|
2128
|
+
if self.config.credentials_file:
|
|
2129
|
+
remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
2130
|
+
f'{self.config.credentials_file} '
|
|
2131
|
+
f'{remove_command}')
|
|
2132
|
+
if self.config.config_file:
|
|
2133
|
+
remove_command = 'AWS_CONFIG_FILE=' + \
|
|
2134
|
+
f'{self.config.config_file} {remove_command}'
|
|
2135
|
+
return self._execute_remove_command(
|
|
2136
|
+
remove_command, bucket_name,
|
|
2137
|
+
(f'Removing objects from {self.config.store_type} bucket '
|
|
2138
|
+
f'{bucket_name}/{sub_path}'),
|
|
2139
|
+
(f'Failed to remove objects from {self.config.store_type} '
|
|
2140
|
+
f'bucket {bucket_name}/{sub_path}.'))
|
|
1845
2141
|
|
|
1846
|
-
Args:
|
|
1847
|
-
bucket_name: str; Name of bucket
|
|
1848
2142
|
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
StorageBucketDeleteError: If deleting the bucket fails.
|
|
1854
|
-
"""
|
|
1855
|
-
# Deleting objects is very slow programatically
|
|
1856
|
-
# (i.e. bucket.objects.all().delete() is slow).
|
|
1857
|
-
# In addition, standard delete operations (i.e. via `aws s3 rm`)
|
|
1858
|
-
# are slow, since AWS puts deletion markers.
|
|
1859
|
-
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
|
|
1860
|
-
# The fastest way to delete is to run `aws s3 rb --force`,
|
|
1861
|
-
# which removes the bucket by force.
|
|
1862
|
-
remove_command = f'aws s3 rb s3://{bucket_name} --force'
|
|
1863
|
-
success = self._execute_s3_remove_command(
|
|
1864
|
-
remove_command, bucket_name,
|
|
1865
|
-
f'Deleting S3 bucket [green]{bucket_name}[/]',
|
|
1866
|
-
f'Failed to delete S3 bucket {bucket_name}.')
|
|
1867
|
-
if not success:
|
|
1868
|
-
return False
|
|
1869
|
-
|
|
1870
|
-
# Wait until bucket deletion propagates on AWS servers
|
|
1871
|
-
while data_utils.verify_s3_bucket(bucket_name):
|
|
1872
|
-
time.sleep(0.1)
|
|
1873
|
-
return True
|
|
1874
|
-
|
|
1875
|
-
def _delete_s3_bucket_sub_path(self, bucket_name: str,
|
|
1876
|
-
sub_path: str) -> bool:
|
|
1877
|
-
"""Deletes the sub path from the bucket."""
|
|
1878
|
-
remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
|
|
1879
|
-
return self._execute_s3_remove_command(
|
|
1880
|
-
remove_command, bucket_name, f'Removing objects from S3 bucket '
|
|
1881
|
-
f'[green]{bucket_name}/{sub_path}[/]',
|
|
1882
|
-
f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
|
|
1883
|
-
)
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
class GcsStore(AbstractStore):
|
|
1887
|
-
"""GcsStore inherits from Storage Object and represents the backend
|
|
1888
|
-
for GCS buckets.
|
|
1889
|
-
"""
|
|
2143
|
+
class GcsStore(AbstractStore):
|
|
2144
|
+
"""GcsStore inherits from Storage Object and represents the backend
|
|
2145
|
+
for GCS buckets.
|
|
2146
|
+
"""
|
|
1890
2147
|
|
|
1891
2148
|
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
|
1892
2149
|
|
|
@@ -1951,6 +2208,10 @@ class GcsStore(AbstractStore):
|
|
|
1951
2208
|
elif self.source.startswith('oci://'):
|
|
1952
2209
|
raise NotImplementedError(
|
|
1953
2210
|
'Moving data from OCI to GCS is currently not supported.')
|
|
2211
|
+
elif self.source.startswith('cw://'):
|
|
2212
|
+
raise NotImplementedError(
|
|
2213
|
+
'Moving data from CoreWeave Object Storage to GCS is'
|
|
2214
|
+
' currently not supported.')
|
|
1954
2215
|
# Validate name
|
|
1955
2216
|
self.name = self.validate_name(self.name)
|
|
1956
2217
|
# Check if the storage is enabled
|
|
@@ -2337,7 +2598,7 @@ class GcsStore(AbstractStore):
|
|
|
2337
2598
|
except Exception as e: # pylint: disable=broad-except
|
|
2338
2599
|
with ux_utils.print_exception_no_traceback():
|
|
2339
2600
|
raise exceptions.StorageBucketCreateError(
|
|
2340
|
-
f'Attempted to create
|
|
2601
|
+
f'Attempted to create GCS bucket {self.name} but failed.'
|
|
2341
2602
|
) from e
|
|
2342
2603
|
logger.info(
|
|
2343
2604
|
f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
|
|
@@ -2494,7 +2755,11 @@ class AzureBlobStore(AbstractStore):
|
|
|
2494
2755
|
name=override_args.get('name', metadata.name),
|
|
2495
2756
|
storage_account_name=override_args.get(
|
|
2496
2757
|
'storage_account', metadata.storage_account_name),
|
|
2497
|
-
|
|
2758
|
+
# TODO(cooperc): fix the types for mypy 1.16
|
|
2759
|
+
# Azure store expects a string path; metadata.source may be a Path
|
|
2760
|
+
# or List[Path].
|
|
2761
|
+
source=override_args.get('source',
|
|
2762
|
+
metadata.source), # type: ignore[arg-type]
|
|
2498
2763
|
region=override_args.get('region', metadata.region),
|
|
2499
2764
|
is_sky_managed=override_args.get('is_sky_managed',
|
|
2500
2765
|
metadata.is_sky_managed),
|
|
@@ -2562,6 +2827,10 @@ class AzureBlobStore(AbstractStore):
|
|
|
2562
2827
|
elif self.source.startswith('oci://'):
|
|
2563
2828
|
raise NotImplementedError(
|
|
2564
2829
|
'Moving data from OCI to AZureBlob is not supported.')
|
|
2830
|
+
elif self.source.startswith('cw://'):
|
|
2831
|
+
raise NotImplementedError(
|
|
2832
|
+
'Moving data from CoreWeave Object Storage to AzureBlob is'
|
|
2833
|
+
' currently not supported.')
|
|
2565
2834
|
# Validate name
|
|
2566
2835
|
self.name = self.validate_name(self.name)
|
|
2567
2836
|
|
|
@@ -2764,8 +3033,12 @@ class AzureBlobStore(AbstractStore):
|
|
|
2764
3033
|
# Creates new resource group and storage account or use the
|
|
2765
3034
|
# storage_account provided by the user through config.yaml
|
|
2766
3035
|
else:
|
|
2767
|
-
config_storage_account =
|
|
2768
|
-
(
|
|
3036
|
+
config_storage_account = (
|
|
3037
|
+
skypilot_config.get_effective_region_config(
|
|
3038
|
+
cloud='azure',
|
|
3039
|
+
region=None,
|
|
3040
|
+
keys=('storage_account',),
|
|
3041
|
+
default_value=None))
|
|
2769
3042
|
if config_storage_account is not None:
|
|
2770
3043
|
# using user provided storage account from config.yaml
|
|
2771
3044
|
storage_account_name = config_storage_account
|
|
@@ -2929,6 +3202,8 @@ class AzureBlobStore(AbstractStore):
|
|
|
2929
3202
|
raise NotImplementedError(error_message.format('OCI'))
|
|
2930
3203
|
elif self.source.startswith('nebius://'):
|
|
2931
3204
|
raise NotImplementedError(error_message.format('NEBIUS'))
|
|
3205
|
+
elif self.source.startswith('cw://'):
|
|
3206
|
+
raise NotImplementedError(error_message.format('CoreWeave'))
|
|
2932
3207
|
else:
|
|
2933
3208
|
self.batch_az_blob_sync([self.source])
|
|
2934
3209
|
except exceptions.StorageUploadError:
|
|
@@ -3256,7 +3531,7 @@ class AzureBlobStore(AbstractStore):
|
|
|
3256
3531
|
with rich_utils.safe_status(
|
|
3257
3532
|
ux_utils.spinner_message(
|
|
3258
3533
|
f'Deleting Azure container {container_name}')):
|
|
3259
|
-
# Check for the
|
|
3534
|
+
# Check for the existence of the container before deletion.
|
|
3260
3535
|
self.storage_client.blob_containers.get(
|
|
3261
3536
|
self.resource_group_name,
|
|
3262
3537
|
self.storage_account_name,
|
|
@@ -3281,22 +3556,23 @@ class AzureBlobStore(AbstractStore):
|
|
|
3281
3556
|
return True
|
|
3282
3557
|
|
|
3283
3558
|
|
|
3284
|
-
class
|
|
3285
|
-
"""
|
|
3286
|
-
for
|
|
3559
|
+
class IBMCosStore(AbstractStore):
|
|
3560
|
+
"""IBMCosStore inherits from Storage Object and represents the backend
|
|
3561
|
+
for COS buckets.
|
|
3287
3562
|
"""
|
|
3288
|
-
|
|
3289
3563
|
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
3290
3564
|
|
|
3291
3565
|
def __init__(self,
|
|
3292
3566
|
name: str,
|
|
3293
3567
|
source: str,
|
|
3294
|
-
region: Optional[str] = '
|
|
3568
|
+
region: Optional[str] = 'us-east',
|
|
3295
3569
|
is_sky_managed: Optional[bool] = None,
|
|
3296
|
-
sync_on_reconstruction:
|
|
3570
|
+
sync_on_reconstruction: bool = True,
|
|
3297
3571
|
_bucket_sub_path: Optional[str] = None):
|
|
3298
|
-
self.client: '
|
|
3572
|
+
self.client: 'storage.Client'
|
|
3299
3573
|
self.bucket: 'StorageHandle'
|
|
3574
|
+
self.rclone_profile_name = (
|
|
3575
|
+
data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
|
|
3300
3576
|
super().__init__(name, source, region, is_sky_managed,
|
|
3301
3577
|
sync_on_reconstruction, _bucket_sub_path)
|
|
3302
3578
|
|
|
@@ -3330,6 +3606,9 @@ class R2Store(AbstractStore):
|
|
|
3330
3606
|
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
3331
3607
|
'R2 Bucket is specified as path, the name should be '
|
|
3332
3608
|
'the same as R2 bucket.')
|
|
3609
|
+
assert data_utils.verify_r2_bucket(self.name), (
|
|
3610
|
+
f'Source specified as {self.source}, a R2 bucket. ',
|
|
3611
|
+
'R2 Bucket should exist.')
|
|
3333
3612
|
elif self.source.startswith('nebius://'):
|
|
3334
3613
|
assert self.name == data_utils.split_nebius_path(
|
|
3335
3614
|
self.source)[0], (
|
|
@@ -3341,29 +3620,63 @@ class R2Store(AbstractStore):
|
|
|
3341
3620
|
f'exist.')
|
|
3342
3621
|
elif self.source.startswith('cos://'):
|
|
3343
3622
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
3344
|
-
'
|
|
3623
|
+
'COS Bucket is specified as path, the name should be '
|
|
3345
3624
|
'the same as COS bucket.')
|
|
3346
|
-
|
|
3347
|
-
f'Source specified as {self.source}, a COS bucket. ',
|
|
3348
|
-
'COS Bucket should exist.')
|
|
3349
|
-
elif self.source.startswith('oci://'):
|
|
3625
|
+
elif self.source.startswith('cw://'):
|
|
3350
3626
|
raise NotImplementedError(
|
|
3351
|
-
'Moving data from
|
|
3352
|
-
|
|
3627
|
+
'Moving data from CoreWeave Object Storage to COS is '
|
|
3628
|
+
'currently not supported.')
|
|
3353
3629
|
# Validate name
|
|
3354
|
-
self.name =
|
|
3355
|
-
|
|
3356
|
-
|
|
3630
|
+
self.name = IBMCosStore.validate_name(self.name)
|
|
3631
|
+
|
|
3632
|
+
@classmethod
|
|
3633
|
+
def validate_name(cls, name: str) -> str:
|
|
3634
|
+
"""Validates the name of a COS bucket.
|
|
3635
|
+
|
|
3636
|
+
Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
|
|
3637
|
+
"""
|
|
3638
|
+
|
|
3639
|
+
def _raise_no_traceback_name_error(err_str):
|
|
3357
3640
|
with ux_utils.print_exception_no_traceback():
|
|
3358
|
-
raise exceptions.
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
)
|
|
3641
|
+
raise exceptions.StorageNameError(err_str)
|
|
3642
|
+
|
|
3643
|
+
if name is not None and isinstance(name, str):
|
|
3644
|
+
if not 3 <= len(name) <= 63:
|
|
3645
|
+
_raise_no_traceback_name_error(
|
|
3646
|
+
f'Invalid store name: {name} must be between 3 (min) '
|
|
3647
|
+
'and 63 (max) characters long.')
|
|
3648
|
+
|
|
3649
|
+
# Check for valid characters and start/end with a letter or number
|
|
3650
|
+
pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
|
|
3651
|
+
if not re.match(pattern, name):
|
|
3652
|
+
_raise_no_traceback_name_error(
|
|
3653
|
+
f'Invalid store name: {name} can consist only of '
|
|
3654
|
+
'lowercase letters, numbers, dots (.), and dashes (-). '
|
|
3655
|
+
'It must begin and end with a letter or number.')
|
|
3656
|
+
|
|
3657
|
+
# Check for two adjacent periods or dashes
|
|
3658
|
+
if any(substring in name for substring in ['..', '--']):
|
|
3659
|
+
_raise_no_traceback_name_error(
|
|
3660
|
+
f'Invalid store name: {name} must not contain '
|
|
3661
|
+
'two adjacent periods/dashes')
|
|
3662
|
+
|
|
3663
|
+
# Check for IP address format
|
|
3664
|
+
ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
|
|
3665
|
+
if re.match(ip_pattern, name):
|
|
3666
|
+
_raise_no_traceback_name_error(
|
|
3667
|
+
f'Invalid store name: {name} must not be formatted as '
|
|
3668
|
+
'an IP address (for example, 192.168.5.4).')
|
|
3669
|
+
|
|
3670
|
+
if any(substring in name for substring in ['.-', '-.']):
|
|
3671
|
+
_raise_no_traceback_name_error(
|
|
3672
|
+
f'Invalid store name: {name} must '
|
|
3673
|
+
'not allow substrings: ".-", "-." .')
|
|
3674
|
+
else:
|
|
3675
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
|
3676
|
+
return name
|
|
3364
3677
|
|
|
3365
3678
|
def initialize(self):
|
|
3366
|
-
"""Initializes the
|
|
3679
|
+
"""Initializes the cos store object on the cloud.
|
|
3367
3680
|
|
|
3368
3681
|
Initialization involves fetching bucket if exists, or creating it if
|
|
3369
3682
|
it does not.
|
|
@@ -3373,7 +3686,8 @@ class R2Store(AbstractStore):
|
|
|
3373
3686
|
StorageBucketGetError: If fetching existing bucket fails
|
|
3374
3687
|
StorageInitError: If general initialization fails.
|
|
3375
3688
|
"""
|
|
3376
|
-
self.client =
|
|
3689
|
+
self.client = ibm.get_cos_client(self.region)
|
|
3690
|
+
self.s3_resource = ibm.get_cos_resource(self.region)
|
|
3377
3691
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
3378
3692
|
if self.is_sky_managed is None:
|
|
3379
3693
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -3383,7 +3697,7 @@ class R2Store(AbstractStore):
|
|
|
3383
3697
|
self.is_sky_managed = is_new_bucket
|
|
3384
3698
|
|
|
3385
3699
|
def upload(self):
|
|
3386
|
-
"""Uploads
|
|
3700
|
+
"""Uploads files from local machine to bucket.
|
|
3387
3701
|
|
|
3388
3702
|
Upload must be called by the Storage handler - it is not called on
|
|
3389
3703
|
Store initialization.
|
|
@@ -3393,22 +3707,29 @@ class R2Store(AbstractStore):
|
|
|
3393
3707
|
"""
|
|
3394
3708
|
try:
|
|
3395
3709
|
if isinstance(self.source, list):
|
|
3396
|
-
self.
|
|
3710
|
+
self.batch_ibm_rsync(self.source, create_dirs=True)
|
|
3397
3711
|
elif self.source is not None:
|
|
3398
|
-
if self.source.startswith('
|
|
3399
|
-
|
|
3400
|
-
elif self.source.startswith('gs://'):
|
|
3401
|
-
self._transfer_to_r2()
|
|
3402
|
-
elif self.source.startswith('r2://'):
|
|
3712
|
+
if self.source.startswith('cos://'):
|
|
3713
|
+
# cos bucket used as a dest, can't be used as source.
|
|
3403
3714
|
pass
|
|
3404
|
-
elif self.source.startswith('
|
|
3405
|
-
|
|
3715
|
+
elif self.source.startswith('s3://'):
|
|
3716
|
+
raise Exception('IBM COS currently not supporting'
|
|
3717
|
+
'data transfers between COS and S3')
|
|
3406
3718
|
elif self.source.startswith('nebius://'):
|
|
3407
|
-
|
|
3719
|
+
raise Exception('IBM COS currently not supporting'
|
|
3720
|
+
'data transfers between COS and Nebius')
|
|
3721
|
+
elif self.source.startswith('gs://'):
|
|
3722
|
+
raise Exception('IBM COS currently not supporting'
|
|
3723
|
+
'data transfers between COS and GS')
|
|
3724
|
+
elif self.source.startswith('r2://'):
|
|
3725
|
+
raise Exception('IBM COS currently not supporting'
|
|
3726
|
+
'data transfers between COS and r2')
|
|
3727
|
+
elif self.source.startswith('cw://'):
|
|
3728
|
+
raise Exception('IBM COS currently not supporting'
|
|
3729
|
+
'data transfers between COS and CoreWeave')
|
|
3408
3730
|
else:
|
|
3409
|
-
self.
|
|
3410
|
-
|
|
3411
|
-
raise
|
|
3731
|
+
self.batch_ibm_rsync([self.source])
|
|
3732
|
+
|
|
3412
3733
|
except Exception as e:
|
|
3413
3734
|
raise exceptions.StorageUploadError(
|
|
3414
3735
|
f'Upload failed for store {self.name}') from e
|
|
@@ -3417,41 +3738,28 @@ class R2Store(AbstractStore):
|
|
|
3417
3738
|
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
3418
3739
|
return self._delete_sub_path()
|
|
3419
3740
|
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
msg_str = f'Deleted R2 bucket {self.name}.'
|
|
3423
|
-
else:
|
|
3424
|
-
msg_str = f'R2 bucket {self.name} may have been deleted ' \
|
|
3425
|
-
f'externally. Removing from local state.'
|
|
3426
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
3741
|
+
self._delete_cos_bucket()
|
|
3742
|
+
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
|
3427
3743
|
f'{colorama.Style.RESET_ALL}')
|
|
3428
3744
|
|
|
3429
3745
|
def _delete_sub_path(self) -> None:
|
|
3430
3746
|
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
|
|
3436
|
-
|
|
3437
|
-
msg_str = f'Failed to remove objects from R2 bucket ' \
|
|
3438
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
3439
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
3440
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3747
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
3748
|
+
try:
|
|
3749
|
+
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
|
3750
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
3751
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
|
3752
|
+
logger.debug('bucket already removed')
|
|
3441
3753
|
|
|
3442
3754
|
def get_handle(self) -> StorageHandle:
|
|
3443
|
-
return
|
|
3755
|
+
return self.s3_resource.Bucket(self.name)
|
|
3444
3756
|
|
|
3445
|
-
def
|
|
3757
|
+
def batch_ibm_rsync(self,
|
|
3446
3758
|
source_path_list: List[Path],
|
|
3447
3759
|
create_dirs: bool = False) -> None:
|
|
3448
|
-
"""Invokes
|
|
3449
|
-
|
|
3450
|
-
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
|
3451
|
-
increase parallelism, modify max_concurrent_requests in your aws config
|
|
3452
|
-
file (Default path: ~/.aws/config).
|
|
3760
|
+
"""Invokes rclone copy to batch upload a list of local paths to cos
|
|
3453
3761
|
|
|
3454
|
-
Since
|
|
3762
|
+
Since rclone does not support batch operations, we construct
|
|
3455
3763
|
multiple commands to be run in parallel.
|
|
3456
3764
|
|
|
3457
3765
|
Args:
|
|
@@ -3465,49 +3773,58 @@ class R2Store(AbstractStore):
|
|
|
3465
3773
|
sub_path = (f'/{self._bucket_sub_path}'
|
|
3466
3774
|
if self._bucket_sub_path else '')
|
|
3467
3775
|
|
|
3468
|
-
def
|
|
3776
|
+
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
|
3777
|
+
"""returns an rclone command that copies a complete folder
|
|
3778
|
+
from 'src_dir_path' to bucket/'dest_dir_name'.
|
|
3779
|
+
|
|
3780
|
+
`rclone copy` copies files from source path to target.
|
|
3781
|
+
files with identical names at won't be copied over, unless
|
|
3782
|
+
their modification date is more recent.
|
|
3783
|
+
works similarly to `aws sync` (without --delete).
|
|
3784
|
+
|
|
3785
|
+
Args:
|
|
3786
|
+
src_dir_path (str): local source path from which to copy files.
|
|
3787
|
+
dest_dir_name (str): remote target path files are copied to.
|
|
3788
|
+
|
|
3789
|
+
Returns:
|
|
3790
|
+
str: bash command using rclone to sync files. Executed remotely.
|
|
3791
|
+
"""
|
|
3792
|
+
|
|
3793
|
+
# .git directory is excluded from the sync
|
|
3794
|
+
# wrapping src_dir_path with "" to support path with spaces
|
|
3795
|
+
src_dir_path = shlex.quote(src_dir_path)
|
|
3796
|
+
sync_command = ('rclone copy --exclude ".git/*" '
|
|
3797
|
+
f'{src_dir_path} '
|
|
3798
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}'
|
|
3799
|
+
f'/{dest_dir_name}')
|
|
3800
|
+
return sync_command
|
|
3801
|
+
|
|
3802
|
+
def get_file_sync_command(base_dir_path, file_names) -> str:
|
|
3803
|
+
"""returns an rclone command that copies files: 'file_names'
|
|
3804
|
+
from base directory: `base_dir_path` to bucket.
|
|
3805
|
+
|
|
3806
|
+
`rclone copy` copies files from source path to target.
|
|
3807
|
+
files with identical names at won't be copied over, unless
|
|
3808
|
+
their modification date is more recent.
|
|
3809
|
+
works similarly to `aws sync` (without --delete).
|
|
3810
|
+
|
|
3811
|
+
Args:
|
|
3812
|
+
base_dir_path (str): local path from which to copy files.
|
|
3813
|
+
file_names (List): specific file names to copy.
|
|
3814
|
+
|
|
3815
|
+
Returns:
|
|
3816
|
+
str: bash command using rclone to sync files
|
|
3817
|
+
"""
|
|
3818
|
+
|
|
3819
|
+
# wrapping file_name with "" to support spaces
|
|
3469
3820
|
includes = ' '.join([
|
|
3470
3821
|
f'--include {shlex.quote(file_name)}'
|
|
3471
3822
|
for file_name in file_names
|
|
3472
3823
|
])
|
|
3473
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3474
3824
|
base_dir_path = shlex.quote(base_dir_path)
|
|
3475
|
-
sync_command = (
|
|
3476
|
-
|
|
3477
|
-
|
|
3478
|
-
'aws s3 sync --no-follow-symlinks --exclude="*" '
|
|
3479
|
-
f'{includes} {base_dir_path} '
|
|
3480
|
-
f's3://{self.name}{sub_path} '
|
|
3481
|
-
f'--endpoint {endpoint_url} '
|
|
3482
|
-
# R2 does not support CRC64-NVME
|
|
3483
|
-
# which is the default for aws s3 sync
|
|
3484
|
-
# https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
|
|
3485
|
-
f'--checksum-algorithm CRC32 '
|
|
3486
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3487
|
-
return sync_command
|
|
3488
|
-
|
|
3489
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
3490
|
-
# we exclude .git directory from the sync
|
|
3491
|
-
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
3492
|
-
excluded_list.append('.git/*')
|
|
3493
|
-
excludes = ' '.join([
|
|
3494
|
-
f'--exclude {shlex.quote(file_name)}'
|
|
3495
|
-
for file_name in excluded_list
|
|
3496
|
-
])
|
|
3497
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3498
|
-
src_dir_path = shlex.quote(src_dir_path)
|
|
3499
|
-
sync_command = (
|
|
3500
|
-
'AWS_SHARED_CREDENTIALS_FILE='
|
|
3501
|
-
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
|
3502
|
-
f'aws s3 sync --no-follow-symlinks {excludes} '
|
|
3503
|
-
f'{src_dir_path} '
|
|
3504
|
-
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
|
3505
|
-
f'--endpoint {endpoint_url} '
|
|
3506
|
-
# R2 does not support CRC64-NVME
|
|
3507
|
-
# which is the default for aws s3 sync
|
|
3508
|
-
# https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
|
|
3509
|
-
f'--checksum-algorithm CRC32 '
|
|
3510
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3825
|
+
sync_command = ('rclone copy '
|
|
3826
|
+
f'{includes} {base_dir_path} '
|
|
3827
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}')
|
|
3511
3828
|
return sync_command
|
|
3512
3829
|
|
|
3513
3830
|
# Generate message for upload
|
|
@@ -3518,7 +3835,8 @@ class R2Store(AbstractStore):
|
|
|
3518
3835
|
|
|
3519
3836
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
3520
3837
|
_STORAGE_LOG_FILE_NAME)
|
|
3521
|
-
sync_path =
|
|
3838
|
+
sync_path = (
|
|
3839
|
+
f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
|
|
3522
3840
|
with rich_utils.safe_status(
|
|
3523
3841
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
3524
3842
|
log_path=log_path)):
|
|
@@ -3535,1236 +3853,306 @@ class R2Store(AbstractStore):
|
|
|
3535
3853
|
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
3536
3854
|
log_path))
|
|
3537
3855
|
|
|
3538
|
-
def _transfer_to_r2(self) -> None:
|
|
3539
|
-
assert isinstance(self.source, str), self.source
|
|
3540
|
-
if self.source.startswith('gs://'):
|
|
3541
|
-
data_transfer.gcs_to_r2(self.name, self.name)
|
|
3542
|
-
elif self.source.startswith('s3://'):
|
|
3543
|
-
data_transfer.s3_to_r2(self.name, self.name)
|
|
3544
|
-
elif self.source.startswith('nebius://'):
|
|
3545
|
-
data_transfer.s3_to_r2(self.name, self.name)
|
|
3546
|
-
|
|
3547
3856
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
3548
|
-
"""
|
|
3857
|
+
"""returns IBM COS bucket object if exists, otherwise creates it.
|
|
3549
3858
|
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
2) Return None if bucket has been externally deleted and
|
|
3554
|
-
sync_on_reconstruction is False
|
|
3555
|
-
3) Create and return a new bucket otherwise
|
|
3859
|
+
Returns:
|
|
3860
|
+
StorageHandle(str): bucket name
|
|
3861
|
+
bool: indicates whether a new bucket was created.
|
|
3556
3862
|
|
|
3557
3863
|
Raises:
|
|
3558
3864
|
StorageSpecError: If externally created bucket is attempted to be
|
|
3559
3865
|
mounted without specifying storage source.
|
|
3560
|
-
StorageBucketCreateError: If
|
|
3866
|
+
StorageBucketCreateError: If bucket creation fails.
|
|
3561
3867
|
StorageBucketGetError: If fetching a bucket fails
|
|
3562
3868
|
StorageExternalDeletionError: If externally deleted storage is
|
|
3563
3869
|
attempted to be fetched while reconstructing the storage for
|
|
3564
3870
|
'sky storage delete' or 'sky start'
|
|
3565
3871
|
"""
|
|
3566
|
-
r2 = cloudflare.resource('s3')
|
|
3567
|
-
bucket = r2.Bucket(self.name)
|
|
3568
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3569
|
-
try:
|
|
3570
|
-
# Try Public bucket case.
|
|
3571
|
-
# This line does not error out if the bucket is an external public
|
|
3572
|
-
# bucket or if it is a user's bucket that is publicly
|
|
3573
|
-
# accessible.
|
|
3574
|
-
self.client.head_bucket(Bucket=self.name)
|
|
3575
|
-
self._validate_existing_bucket()
|
|
3576
|
-
return bucket, False
|
|
3577
|
-
except aws.botocore_exceptions().ClientError as e:
|
|
3578
|
-
error_code = e.response['Error']['Code']
|
|
3579
|
-
# AccessDenied error for buckets that are private and not owned by
|
|
3580
|
-
# user.
|
|
3581
|
-
if error_code == '403':
|
|
3582
|
-
command = ('AWS_SHARED_CREDENTIALS_FILE='
|
|
3583
|
-
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
|
3584
|
-
f'aws s3 ls s3://{self.name} '
|
|
3585
|
-
f'--endpoint {endpoint_url} '
|
|
3586
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3587
|
-
with ux_utils.print_exception_no_traceback():
|
|
3588
|
-
raise exceptions.StorageBucketGetError(
|
|
3589
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
3590
|
-
f' To debug, consider running `{command}`.') from e
|
|
3591
3872
|
|
|
3592
|
-
|
|
3873
|
+
bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
|
|
3874
|
+
self.name)
|
|
3875
|
+
try:
|
|
3876
|
+
bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
|
|
3877
|
+
except exceptions.StorageBucketGetError as e:
|
|
3593
3878
|
with ux_utils.print_exception_no_traceback():
|
|
3879
|
+
command = f'rclone lsd {bucket_profile_name}: '
|
|
3594
3880
|
raise exceptions.StorageBucketGetError(
|
|
3595
|
-
|
|
3596
|
-
f'{
|
|
3597
|
-
'`AWS_SHARED_CREDENTIALS_FILE='
|
|
3598
|
-
f'{cloudflare.R2_CREDENTIALS_PATH} aws s3 ls '
|
|
3599
|
-
f's3://{self.name} '
|
|
3600
|
-
f'--endpoint {endpoint_url} '
|
|
3601
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}\' '
|
|
3602
|
-
'to debug.')
|
|
3603
|
-
|
|
3604
|
-
# If bucket cannot be found in both private and public settings,
|
|
3605
|
-
# the bucket is to be created by Sky. However, creation is skipped if
|
|
3606
|
-
# Store object is being reconstructed for deletion or re-mount with
|
|
3607
|
-
# sky start, and error is raised instead.
|
|
3608
|
-
if self.sync_on_reconstruction:
|
|
3609
|
-
bucket = self._create_r2_bucket(self.name)
|
|
3610
|
-
return bucket, True
|
|
3611
|
-
else:
|
|
3612
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
3613
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
3614
|
-
# is already removed externally.
|
|
3615
|
-
raise exceptions.StorageExternalDeletionError(
|
|
3616
|
-
'Attempted to fetch a non-existent bucket: '
|
|
3617
|
-
f'{self.name}')
|
|
3618
|
-
|
|
3619
|
-
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
3620
|
-
"""Downloads file from remote to local on r2 bucket
|
|
3621
|
-
using the boto3 API
|
|
3881
|
+
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
3882
|
+
f' To debug, consider running `{command}`.') from e
|
|
3622
3883
|
|
|
3623
|
-
Args:
|
|
3624
|
-
remote_path: str; Remote path on R2 bucket
|
|
3625
|
-
local_path: str; Local path on user's device
|
|
3626
|
-
"""
|
|
3627
|
-
self.bucket.download_file(remote_path, local_path)
|
|
3628
|
-
|
|
3629
|
-
def mount_command(self, mount_path: str) -> str:
|
|
3630
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
3631
|
-
|
|
3632
|
-
Uses goofys to mount the bucket.
|
|
3633
|
-
|
|
3634
|
-
Args:
|
|
3635
|
-
mount_path: str; Path to mount the bucket to.
|
|
3636
|
-
"""
|
|
3637
|
-
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
3638
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3639
|
-
r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
|
|
3640
|
-
r2_profile_name = cloudflare.R2_PROFILE_NAME
|
|
3641
|
-
mount_cmd = mounting_utils.get_r2_mount_cmd(
|
|
3642
|
-
r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
|
|
3643
|
-
mount_path, self._bucket_sub_path)
|
|
3644
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3645
|
-
mount_cmd)
|
|
3646
|
-
|
|
3647
|
-
def mount_cached_command(self, mount_path: str) -> str:
|
|
3648
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
3649
|
-
rclone_profile_name = (
|
|
3650
|
-
data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
|
|
3651
|
-
rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
|
|
3652
|
-
rclone_profile_name=rclone_profile_name)
|
|
3653
|
-
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
3654
|
-
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
3655
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3656
|
-
mount_cached_cmd)
|
|
3657
|
-
|
|
3658
|
-
def _create_r2_bucket(self,
|
|
3659
|
-
bucket_name: str,
|
|
3660
|
-
region='auto') -> StorageHandle:
|
|
3661
|
-
"""Creates R2 bucket with specific name in specific region
|
|
3662
|
-
|
|
3663
|
-
Args:
|
|
3664
|
-
bucket_name: str; Name of bucket
|
|
3665
|
-
region: str; Region name, r2 automatically sets region
|
|
3666
|
-
Raises:
|
|
3667
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
3668
|
-
"""
|
|
3669
|
-
r2_client = self.client
|
|
3670
|
-
try:
|
|
3671
|
-
if region is None:
|
|
3672
|
-
r2_client.create_bucket(Bucket=bucket_name)
|
|
3673
|
-
else:
|
|
3674
|
-
location = {'LocationConstraint': region}
|
|
3675
|
-
r2_client.create_bucket(Bucket=bucket_name,
|
|
3676
|
-
CreateBucketConfiguration=location)
|
|
3677
|
-
logger.info(f' {colorama.Style.DIM}Created R2 bucket '
|
|
3678
|
-
f'{bucket_name!r} in {region}'
|
|
3679
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3680
|
-
except aws.botocore_exceptions().ClientError as e:
|
|
3681
|
-
with ux_utils.print_exception_no_traceback():
|
|
3682
|
-
raise exceptions.StorageBucketCreateError(
|
|
3683
|
-
f'Attempted to create a bucket '
|
|
3684
|
-
f'{self.name} but failed.') from e
|
|
3685
|
-
return cloudflare.resource('s3').Bucket(bucket_name)
|
|
3686
|
-
|
|
3687
|
-
def _execute_r2_remove_command(self, command: str, bucket_name: str,
|
|
3688
|
-
hint_operating: str,
|
|
3689
|
-
hint_failed: str) -> bool:
|
|
3690
|
-
try:
|
|
3691
|
-
with rich_utils.safe_status(
|
|
3692
|
-
ux_utils.spinner_message(hint_operating)):
|
|
3693
|
-
subprocess.check_output(command.split(' '),
|
|
3694
|
-
stderr=subprocess.STDOUT,
|
|
3695
|
-
shell=True)
|
|
3696
|
-
except subprocess.CalledProcessError as e:
|
|
3697
|
-
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
|
3698
|
-
logger.debug(
|
|
3699
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
|
3700
|
-
bucket_name=bucket_name))
|
|
3701
|
-
return False
|
|
3702
|
-
else:
|
|
3703
|
-
with ux_utils.print_exception_no_traceback():
|
|
3704
|
-
raise exceptions.StorageBucketDeleteError(
|
|
3705
|
-
f'{hint_failed}'
|
|
3706
|
-
f'Detailed error: {e.output}')
|
|
3707
|
-
return True
|
|
3708
|
-
|
|
3709
|
-
def _delete_r2_bucket_sub_path(self, bucket_name: str,
|
|
3710
|
-
sub_path: str) -> bool:
|
|
3711
|
-
"""Deletes the sub path from the bucket."""
|
|
3712
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3713
|
-
remove_command = (
|
|
3714
|
-
f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
|
|
3715
|
-
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
|
3716
|
-
f'--endpoint {endpoint_url} '
|
|
3717
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3718
|
-
return self._execute_r2_remove_command(
|
|
3719
|
-
remove_command, bucket_name,
|
|
3720
|
-
f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
|
|
3721
|
-
f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
|
|
3722
|
-
)
|
|
3723
|
-
|
|
3724
|
-
def _delete_r2_bucket(self, bucket_name: str) -> bool:
|
|
3725
|
-
"""Deletes R2 bucket, including all objects in bucket
|
|
3726
|
-
|
|
3727
|
-
Args:
|
|
3728
|
-
bucket_name: str; Name of bucket
|
|
3729
|
-
|
|
3730
|
-
Returns:
|
|
3731
|
-
bool; True if bucket was deleted, False if it was deleted externally.
|
|
3732
|
-
|
|
3733
|
-
Raises:
|
|
3734
|
-
StorageBucketDeleteError: If deleting the bucket fails.
|
|
3735
|
-
"""
|
|
3736
|
-
# Deleting objects is very slow programatically
|
|
3737
|
-
# (i.e. bucket.objects.all().delete() is slow).
|
|
3738
|
-
# In addition, standard delete operations (i.e. via `aws s3 rm`)
|
|
3739
|
-
# are slow, since AWS puts deletion markers.
|
|
3740
|
-
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
|
|
3741
|
-
# The fastest way to delete is to run `aws s3 rb --force`,
|
|
3742
|
-
# which removes the bucket by force.
|
|
3743
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3744
|
-
remove_command = (
|
|
3745
|
-
f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
|
|
3746
|
-
f'aws s3 rb s3://{bucket_name} --force '
|
|
3747
|
-
f'--endpoint {endpoint_url} '
|
|
3748
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3749
|
-
|
|
3750
|
-
success = self._execute_r2_remove_command(
|
|
3751
|
-
remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
|
|
3752
|
-
f'Failed to delete R2 bucket {bucket_name}.')
|
|
3753
|
-
if not success:
|
|
3754
|
-
return False
|
|
3755
|
-
|
|
3756
|
-
# Wait until bucket deletion propagates on AWS servers
|
|
3757
|
-
while data_utils.verify_r2_bucket(bucket_name):
|
|
3758
|
-
time.sleep(0.1)
|
|
3759
|
-
return True
|
|
3760
|
-
|
|
3761
|
-
|
|
3762
|
-
class IBMCosStore(AbstractStore):
|
|
3763
|
-
"""IBMCosStore inherits from Storage Object and represents the backend
|
|
3764
|
-
for COS buckets.
|
|
3765
|
-
"""
|
|
3766
|
-
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
3767
|
-
|
|
3768
|
-
def __init__(self,
|
|
3769
|
-
name: str,
|
|
3770
|
-
source: str,
|
|
3771
|
-
region: Optional[str] = 'us-east',
|
|
3772
|
-
is_sky_managed: Optional[bool] = None,
|
|
3773
|
-
sync_on_reconstruction: bool = True,
|
|
3774
|
-
_bucket_sub_path: Optional[str] = None):
|
|
3775
|
-
self.client: 'storage.Client'
|
|
3776
|
-
self.bucket: 'StorageHandle'
|
|
3777
|
-
self.rclone_profile_name = (
|
|
3778
|
-
data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
|
|
3779
|
-
super().__init__(name, source, region, is_sky_managed,
|
|
3780
|
-
sync_on_reconstruction, _bucket_sub_path)
|
|
3781
|
-
|
|
3782
|
-
def _validate(self):
|
|
3783
|
-
if self.source is not None and isinstance(self.source, str):
|
|
3784
|
-
if self.source.startswith('s3://'):
|
|
3785
|
-
assert self.name == data_utils.split_s3_path(self.source)[0], (
|
|
3786
|
-
'S3 Bucket is specified as path, the name should be the'
|
|
3787
|
-
' same as S3 bucket.')
|
|
3788
|
-
assert data_utils.verify_s3_bucket(self.name), (
|
|
3789
|
-
f'Source specified as {self.source}, a S3 bucket. ',
|
|
3790
|
-
'S3 Bucket should exist.')
|
|
3791
|
-
elif self.source.startswith('gs://'):
|
|
3792
|
-
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
|
3793
|
-
'GCS Bucket is specified as path, the name should be '
|
|
3794
|
-
'the same as GCS bucket.')
|
|
3795
|
-
assert data_utils.verify_gcs_bucket(self.name), (
|
|
3796
|
-
f'Source specified as {self.source}, a GCS bucket. ',
|
|
3797
|
-
'GCS Bucket should exist.')
|
|
3798
|
-
elif data_utils.is_az_container_endpoint(self.source):
|
|
3799
|
-
storage_account_name, container_name, _ = (
|
|
3800
|
-
data_utils.split_az_path(self.source))
|
|
3801
|
-
assert self.name == container_name, (
|
|
3802
|
-
'Azure bucket is specified as path, the name should be '
|
|
3803
|
-
'the same as Azure bucket.')
|
|
3804
|
-
assert data_utils.verify_az_bucket(
|
|
3805
|
-
storage_account_name, self.name), (
|
|
3806
|
-
f'Source specified as {self.source}, an Azure bucket. '
|
|
3807
|
-
'Azure bucket should exist.')
|
|
3808
|
-
elif self.source.startswith('r2://'):
|
|
3809
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
3810
|
-
'R2 Bucket is specified as path, the name should be '
|
|
3811
|
-
'the same as R2 bucket.')
|
|
3812
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
3813
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
3814
|
-
'R2 Bucket should exist.')
|
|
3815
|
-
elif self.source.startswith('nebius://'):
|
|
3816
|
-
assert self.name == data_utils.split_nebius_path(
|
|
3817
|
-
self.source)[0], (
|
|
3818
|
-
'Nebius Object Storage is specified as path, the name '
|
|
3819
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
3820
|
-
assert data_utils.verify_nebius_bucket(self.name), (
|
|
3821
|
-
f'Source specified as {self.source}, a Nebius Object '
|
|
3822
|
-
f'Storage bucket. Nebius Object Storage Bucket should '
|
|
3823
|
-
f'exist.')
|
|
3824
|
-
elif self.source.startswith('cos://'):
|
|
3825
|
-
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
3826
|
-
'COS Bucket is specified as path, the name should be '
|
|
3827
|
-
'the same as COS bucket.')
|
|
3828
|
-
# Validate name
|
|
3829
|
-
self.name = IBMCosStore.validate_name(self.name)
|
|
3830
|
-
|
|
3831
|
-
@classmethod
|
|
3832
|
-
def validate_name(cls, name: str) -> str:
|
|
3833
|
-
"""Validates the name of a COS bucket.
|
|
3834
|
-
|
|
3835
|
-
Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
|
|
3836
|
-
"""
|
|
3837
|
-
|
|
3838
|
-
def _raise_no_traceback_name_error(err_str):
|
|
3839
|
-
with ux_utils.print_exception_no_traceback():
|
|
3840
|
-
raise exceptions.StorageNameError(err_str)
|
|
3841
|
-
|
|
3842
|
-
if name is not None and isinstance(name, str):
|
|
3843
|
-
if not 3 <= len(name) <= 63:
|
|
3844
|
-
_raise_no_traceback_name_error(
|
|
3845
|
-
f'Invalid store name: {name} must be between 3 (min) '
|
|
3846
|
-
'and 63 (max) characters long.')
|
|
3847
|
-
|
|
3848
|
-
# Check for valid characters and start/end with a letter or number
|
|
3849
|
-
pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
|
|
3850
|
-
if not re.match(pattern, name):
|
|
3851
|
-
_raise_no_traceback_name_error(
|
|
3852
|
-
f'Invalid store name: {name} can consist only of '
|
|
3853
|
-
'lowercase letters, numbers, dots (.), and dashes (-). '
|
|
3854
|
-
'It must begin and end with a letter or number.')
|
|
3855
|
-
|
|
3856
|
-
# Check for two adjacent periods or dashes
|
|
3857
|
-
if any(substring in name for substring in ['..', '--']):
|
|
3858
|
-
_raise_no_traceback_name_error(
|
|
3859
|
-
f'Invalid store name: {name} must not contain '
|
|
3860
|
-
'two adjacent periods/dashes')
|
|
3861
|
-
|
|
3862
|
-
# Check for IP address format
|
|
3863
|
-
ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
|
|
3864
|
-
if re.match(ip_pattern, name):
|
|
3865
|
-
_raise_no_traceback_name_error(
|
|
3866
|
-
f'Invalid store name: {name} must not be formatted as '
|
|
3867
|
-
'an IP address (for example, 192.168.5.4).')
|
|
3868
|
-
|
|
3869
|
-
if any(substring in name for substring in ['.-', '-.']):
|
|
3870
|
-
_raise_no_traceback_name_error(
|
|
3871
|
-
f'Invalid store name: {name} must '
|
|
3872
|
-
'not allow substrings: ".-", "-." .')
|
|
3873
|
-
else:
|
|
3874
|
-
_raise_no_traceback_name_error('Store name must be specified.')
|
|
3875
|
-
return name
|
|
3876
|
-
|
|
3877
|
-
def initialize(self):
|
|
3878
|
-
"""Initializes the cos store object on the cloud.
|
|
3879
|
-
|
|
3880
|
-
Initialization involves fetching bucket if exists, or creating it if
|
|
3881
|
-
it does not.
|
|
3882
|
-
|
|
3883
|
-
Raises:
|
|
3884
|
-
StorageBucketCreateError: If bucket creation fails
|
|
3885
|
-
StorageBucketGetError: If fetching existing bucket fails
|
|
3886
|
-
StorageInitError: If general initialization fails.
|
|
3887
|
-
"""
|
|
3888
|
-
self.client = ibm.get_cos_client(self.region)
|
|
3889
|
-
self.s3_resource = ibm.get_cos_resource(self.region)
|
|
3890
|
-
self.bucket, is_new_bucket = self._get_bucket()
|
|
3891
|
-
if self.is_sky_managed is None:
|
|
3892
|
-
# If is_sky_managed is not specified, then this is a new storage
|
|
3893
|
-
# object (i.e., did not exist in global_user_state) and we should
|
|
3894
|
-
# set the is_sky_managed property.
|
|
3895
|
-
# If is_sky_managed is specified, then we take no action.
|
|
3896
|
-
self.is_sky_managed = is_new_bucket
|
|
3897
|
-
|
|
3898
|
-
def upload(self):
|
|
3899
|
-
"""Uploads files from local machine to bucket.
|
|
3900
|
-
|
|
3901
|
-
Upload must be called by the Storage handler - it is not called on
|
|
3902
|
-
Store initialization.
|
|
3903
|
-
|
|
3904
|
-
Raises:
|
|
3905
|
-
StorageUploadError: if upload fails.
|
|
3906
|
-
"""
|
|
3907
|
-
try:
|
|
3908
|
-
if isinstance(self.source, list):
|
|
3909
|
-
self.batch_ibm_rsync(self.source, create_dirs=True)
|
|
3910
|
-
elif self.source is not None:
|
|
3911
|
-
if self.source.startswith('cos://'):
|
|
3912
|
-
# cos bucket used as a dest, can't be used as source.
|
|
3913
|
-
pass
|
|
3914
|
-
elif self.source.startswith('s3://'):
|
|
3915
|
-
raise Exception('IBM COS currently not supporting'
|
|
3916
|
-
'data transfers between COS and S3')
|
|
3917
|
-
elif self.source.startswith('nebius://'):
|
|
3918
|
-
raise Exception('IBM COS currently not supporting'
|
|
3919
|
-
'data transfers between COS and Nebius')
|
|
3920
|
-
elif self.source.startswith('gs://'):
|
|
3921
|
-
raise Exception('IBM COS currently not supporting'
|
|
3922
|
-
'data transfers between COS and GS')
|
|
3923
|
-
elif self.source.startswith('r2://'):
|
|
3924
|
-
raise Exception('IBM COS currently not supporting'
|
|
3925
|
-
'data transfers between COS and r2')
|
|
3926
|
-
else:
|
|
3927
|
-
self.batch_ibm_rsync([self.source])
|
|
3928
|
-
|
|
3929
|
-
except Exception as e:
|
|
3930
|
-
raise exceptions.StorageUploadError(
|
|
3931
|
-
f'Upload failed for store {self.name}') from e
|
|
3932
|
-
|
|
3933
|
-
def delete(self) -> None:
|
|
3934
|
-
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
3935
|
-
return self._delete_sub_path()
|
|
3936
|
-
|
|
3937
|
-
self._delete_cos_bucket()
|
|
3938
|
-
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
|
3939
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3940
|
-
|
|
3941
|
-
def _delete_sub_path(self) -> None:
|
|
3942
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
3943
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
3944
|
-
try:
|
|
3945
|
-
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
|
3946
|
-
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
3947
|
-
if e.__class__.__name__ == 'NoSuchBucket':
|
|
3948
|
-
logger.debug('bucket already removed')
|
|
3949
|
-
|
|
3950
|
-
def get_handle(self) -> StorageHandle:
|
|
3951
|
-
return self.s3_resource.Bucket(self.name)
|
|
3952
|
-
|
|
3953
|
-
def batch_ibm_rsync(self,
|
|
3954
|
-
source_path_list: List[Path],
|
|
3955
|
-
create_dirs: bool = False) -> None:
|
|
3956
|
-
"""Invokes rclone copy to batch upload a list of local paths to cos
|
|
3957
|
-
|
|
3958
|
-
Since rclone does not support batch operations, we construct
|
|
3959
|
-
multiple commands to be run in parallel.
|
|
3960
|
-
|
|
3961
|
-
Args:
|
|
3962
|
-
source_path_list: List of paths to local files or directories
|
|
3963
|
-
create_dirs: If the local_path is a directory and this is set to
|
|
3964
|
-
False, the contents of the directory are directly uploaded to
|
|
3965
|
-
root of the bucket. If the local_path is a directory and this is
|
|
3966
|
-
set to True, the directory is created in the bucket root and
|
|
3967
|
-
contents are uploaded to it.
|
|
3968
|
-
"""
|
|
3969
|
-
sub_path = (f'/{self._bucket_sub_path}'
|
|
3970
|
-
if self._bucket_sub_path else '')
|
|
3971
|
-
|
|
3972
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
|
3973
|
-
"""returns an rclone command that copies a complete folder
|
|
3974
|
-
from 'src_dir_path' to bucket/'dest_dir_name'.
|
|
3975
|
-
|
|
3976
|
-
`rclone copy` copies files from source path to target.
|
|
3977
|
-
files with identical names at won't be copied over, unless
|
|
3978
|
-
their modification date is more recent.
|
|
3979
|
-
works similarly to `aws sync` (without --delete).
|
|
3980
|
-
|
|
3981
|
-
Args:
|
|
3982
|
-
src_dir_path (str): local source path from which to copy files.
|
|
3983
|
-
dest_dir_name (str): remote target path files are copied to.
|
|
3984
|
-
|
|
3985
|
-
Returns:
|
|
3986
|
-
str: bash command using rclone to sync files. Executed remotely.
|
|
3987
|
-
"""
|
|
3988
|
-
|
|
3989
|
-
# .git directory is excluded from the sync
|
|
3990
|
-
# wrapping src_dir_path with "" to support path with spaces
|
|
3991
|
-
src_dir_path = shlex.quote(src_dir_path)
|
|
3992
|
-
sync_command = ('rclone copy --exclude ".git/*" '
|
|
3993
|
-
f'{src_dir_path} '
|
|
3994
|
-
f'{self.rclone_profile_name}:{self.name}{sub_path}'
|
|
3995
|
-
f'/{dest_dir_name}')
|
|
3996
|
-
return sync_command
|
|
3997
|
-
|
|
3998
|
-
def get_file_sync_command(base_dir_path, file_names) -> str:
|
|
3999
|
-
"""returns an rclone command that copies files: 'file_names'
|
|
4000
|
-
from base directory: `base_dir_path` to bucket.
|
|
4001
|
-
|
|
4002
|
-
`rclone copy` copies files from source path to target.
|
|
4003
|
-
files with identical names at won't be copied over, unless
|
|
4004
|
-
their modification date is more recent.
|
|
4005
|
-
works similarly to `aws sync` (without --delete).
|
|
4006
|
-
|
|
4007
|
-
Args:
|
|
4008
|
-
base_dir_path (str): local path from which to copy files.
|
|
4009
|
-
file_names (List): specific file names to copy.
|
|
4010
|
-
|
|
4011
|
-
Returns:
|
|
4012
|
-
str: bash command using rclone to sync files
|
|
4013
|
-
"""
|
|
4014
|
-
|
|
4015
|
-
# wrapping file_name with "" to support spaces
|
|
4016
|
-
includes = ' '.join([
|
|
4017
|
-
f'--include {shlex.quote(file_name)}'
|
|
4018
|
-
for file_name in file_names
|
|
4019
|
-
])
|
|
4020
|
-
base_dir_path = shlex.quote(base_dir_path)
|
|
4021
|
-
sync_command = ('rclone copy '
|
|
4022
|
-
f'{includes} {base_dir_path} '
|
|
4023
|
-
f'{self.rclone_profile_name}:{self.name}{sub_path}')
|
|
4024
|
-
return sync_command
|
|
4025
|
-
|
|
4026
|
-
# Generate message for upload
|
|
4027
|
-
if len(source_path_list) > 1:
|
|
4028
|
-
source_message = f'{len(source_path_list)} paths'
|
|
4029
|
-
else:
|
|
4030
|
-
source_message = source_path_list[0]
|
|
4031
|
-
|
|
4032
|
-
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4033
|
-
_STORAGE_LOG_FILE_NAME)
|
|
4034
|
-
sync_path = (
|
|
4035
|
-
f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
|
|
4036
|
-
with rich_utils.safe_status(
|
|
4037
|
-
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4038
|
-
log_path=log_path)):
|
|
4039
|
-
data_utils.parallel_upload(
|
|
4040
|
-
source_path_list,
|
|
4041
|
-
get_file_sync_command,
|
|
4042
|
-
get_dir_sync_command,
|
|
4043
|
-
log_path,
|
|
4044
|
-
self.name,
|
|
4045
|
-
self._ACCESS_DENIED_MESSAGE,
|
|
4046
|
-
create_dirs=create_dirs,
|
|
4047
|
-
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
|
4048
|
-
logger.info(
|
|
4049
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4050
|
-
log_path))
|
|
4051
|
-
|
|
4052
|
-
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4053
|
-
"""returns IBM COS bucket object if exists, otherwise creates it.
|
|
4054
|
-
|
|
4055
|
-
Returns:
|
|
4056
|
-
StorageHandle(str): bucket name
|
|
4057
|
-
bool: indicates whether a new bucket was created.
|
|
4058
|
-
|
|
4059
|
-
Raises:
|
|
4060
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
4061
|
-
mounted without specifying storage source.
|
|
4062
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4063
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
4064
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
4065
|
-
attempted to be fetched while reconstructing the storage for
|
|
4066
|
-
'sky storage delete' or 'sky start'
|
|
4067
|
-
"""
|
|
4068
|
-
|
|
4069
|
-
bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
|
|
4070
|
-
self.name)
|
|
4071
|
-
try:
|
|
4072
|
-
bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
|
|
4073
|
-
except exceptions.StorageBucketGetError as e:
|
|
4074
|
-
with ux_utils.print_exception_no_traceback():
|
|
4075
|
-
command = f'rclone lsd {bucket_profile_name}: '
|
|
4076
|
-
raise exceptions.StorageBucketGetError(
|
|
4077
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4078
|
-
f' To debug, consider running `{command}`.') from e
|
|
4079
|
-
|
|
4080
|
-
try:
|
|
4081
|
-
uri_region = data_utils.split_cos_path(
|
|
4082
|
-
self.source)[2] # type: ignore
|
|
4083
|
-
except ValueError:
|
|
4084
|
-
# source isn't a cos uri
|
|
4085
|
-
uri_region = ''
|
|
4086
|
-
|
|
4087
|
-
# bucket's region doesn't match specified region in URI
|
|
4088
|
-
if bucket_region and uri_region and uri_region != bucket_region\
|
|
4089
|
-
and self.sync_on_reconstruction:
|
|
4090
|
-
with ux_utils.print_exception_no_traceback():
|
|
4091
|
-
raise exceptions.StorageBucketGetError(
|
|
4092
|
-
f'Bucket {self.name} exists in '
|
|
4093
|
-
f'region {bucket_region}, '
|
|
4094
|
-
f'but URI specified region {uri_region}.')
|
|
4095
|
-
|
|
4096
|
-
if not bucket_region and uri_region:
|
|
4097
|
-
# bucket doesn't exist but source is a bucket URI
|
|
4098
|
-
with ux_utils.print_exception_no_traceback():
|
|
4099
|
-
raise exceptions.StorageBucketGetError(
|
|
4100
|
-
'Attempted to use a non-existent bucket as a source: '
|
|
4101
|
-
f'{self.name} by providing URI. Consider using '
|
|
4102
|
-
'`rclone lsd <remote>` on relevant remotes returned '
|
|
4103
|
-
'via `rclone listremotes` to debug.')
|
|
4104
|
-
|
|
4105
|
-
data_utils.Rclone.store_rclone_config(
|
|
4106
|
-
self.name,
|
|
4107
|
-
data_utils.Rclone.RcloneStores.IBM,
|
|
4108
|
-
self.region, # type: ignore
|
|
4109
|
-
)
|
|
4110
|
-
|
|
4111
|
-
if not bucket_region and self.sync_on_reconstruction:
|
|
4112
|
-
# bucket doesn't exist
|
|
4113
|
-
return self._create_cos_bucket(self.name, self.region), True
|
|
4114
|
-
elif not bucket_region and not self.sync_on_reconstruction:
|
|
4115
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
4116
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
4117
|
-
# is already removed externally.
|
|
4118
|
-
raise exceptions.StorageExternalDeletionError(
|
|
4119
|
-
'Attempted to fetch a non-existent bucket: '
|
|
4120
|
-
f'{self.name}')
|
|
4121
|
-
else:
|
|
4122
|
-
# bucket exists
|
|
4123
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
4124
|
-
self._validate_existing_bucket()
|
|
4125
|
-
return bucket, False
|
|
4126
|
-
|
|
4127
|
-
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4128
|
-
"""Downloads file from remote to local on s3 bucket
|
|
4129
|
-
using the boto3 API
|
|
4130
|
-
|
|
4131
|
-
Args:
|
|
4132
|
-
remote_path: str; Remote path on S3 bucket
|
|
4133
|
-
local_path: str; Local path on user's device
|
|
4134
|
-
"""
|
|
4135
|
-
self.client.download_file(self.name, local_path, remote_path)
|
|
4136
|
-
|
|
4137
|
-
def mount_command(self, mount_path: str) -> str:
|
|
4138
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
4139
|
-
|
|
4140
|
-
Uses rclone to mount the bucket.
|
|
4141
|
-
Source: https://github.com/rclone/rclone
|
|
4142
|
-
|
|
4143
|
-
Args:
|
|
4144
|
-
mount_path: str; Path to mount the bucket to.
|
|
4145
|
-
"""
|
|
4146
|
-
# install rclone if not installed.
|
|
4147
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4148
|
-
rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
|
|
4149
|
-
rclone_profile_name=self.rclone_profile_name,
|
|
4150
|
-
region=self.region) # type: ignore
|
|
4151
|
-
mount_cmd = (
|
|
4152
|
-
mounting_utils.get_cos_mount_cmd(
|
|
4153
|
-
rclone_config,
|
|
4154
|
-
self.rclone_profile_name,
|
|
4155
|
-
self.bucket.name,
|
|
4156
|
-
mount_path,
|
|
4157
|
-
self._bucket_sub_path, # type: ignore
|
|
4158
|
-
))
|
|
4159
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4160
|
-
mount_cmd)
|
|
4161
|
-
|
|
4162
|
-
def _create_cos_bucket(self,
|
|
4163
|
-
bucket_name: str,
|
|
4164
|
-
region='us-east') -> StorageHandle:
|
|
4165
|
-
"""Creates IBM COS bucket with specific name in specific region
|
|
4166
|
-
|
|
4167
|
-
Args:
|
|
4168
|
-
bucket_name: str; Name of bucket
|
|
4169
|
-
region: str; Region name, e.g. us-east, us-south
|
|
4170
|
-
Raises:
|
|
4171
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4172
|
-
"""
|
|
4173
|
-
try:
|
|
4174
|
-
self.client.create_bucket(
|
|
4175
|
-
Bucket=bucket_name,
|
|
4176
|
-
CreateBucketConfiguration={
|
|
4177
|
-
'LocationConstraint': f'{region}-smart'
|
|
4178
|
-
})
|
|
4179
|
-
logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
|
|
4180
|
-
f'{bucket_name!r} in {region} '
|
|
4181
|
-
'with storage class smart tier'
|
|
4182
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4183
|
-
self.bucket = self.s3_resource.Bucket(bucket_name)
|
|
4184
|
-
|
|
4185
|
-
except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
|
|
4186
|
-
with ux_utils.print_exception_no_traceback():
|
|
4187
|
-
raise exceptions.StorageBucketCreateError(
|
|
4188
|
-
f'Failed to create bucket: '
|
|
4189
|
-
f'{bucket_name}') from e
|
|
4190
|
-
|
|
4191
|
-
s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
|
|
4192
|
-
s3_bucket_exists_waiter.wait(Bucket=bucket_name)
|
|
4193
|
-
|
|
4194
|
-
return self.bucket
|
|
4195
|
-
|
|
4196
|
-
def _delete_cos_bucket_objects(self,
|
|
4197
|
-
bucket: Any,
|
|
4198
|
-
prefix: Optional[str] = None) -> None:
|
|
4199
|
-
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
|
4200
|
-
if bucket_versioning.status == 'Enabled':
|
|
4201
|
-
if prefix is not None:
|
|
4202
|
-
res = list(
|
|
4203
|
-
bucket.object_versions.filter(Prefix=prefix).delete())
|
|
4204
|
-
else:
|
|
4205
|
-
res = list(bucket.object_versions.delete())
|
|
4206
|
-
else:
|
|
4207
|
-
if prefix is not None:
|
|
4208
|
-
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
|
4209
|
-
else:
|
|
4210
|
-
res = list(bucket.objects.delete())
|
|
4211
|
-
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
|
4212
|
-
|
|
4213
|
-
def _delete_cos_bucket(self) -> None:
|
|
4214
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
4215
|
-
try:
|
|
4216
|
-
self._delete_cos_bucket_objects(bucket)
|
|
4217
|
-
bucket.delete()
|
|
4218
|
-
bucket.wait_until_not_exists()
|
|
4219
|
-
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
4220
|
-
if e.__class__.__name__ == 'NoSuchBucket':
|
|
4221
|
-
logger.debug('bucket already removed')
|
|
4222
|
-
data_utils.Rclone.delete_rclone_bucket_profile(
|
|
4223
|
-
self.name, data_utils.Rclone.RcloneStores.IBM)
|
|
4224
|
-
|
|
4225
|
-
|
|
4226
|
-
class OciStore(AbstractStore):
|
|
4227
|
-
"""OciStore inherits from Storage Object and represents the backend
|
|
4228
|
-
for OCI buckets.
|
|
4229
|
-
"""
|
|
4230
|
-
|
|
4231
|
-
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
|
4232
|
-
|
|
4233
|
-
def __init__(self,
|
|
4234
|
-
name: str,
|
|
4235
|
-
source: Optional[SourceType],
|
|
4236
|
-
region: Optional[str] = None,
|
|
4237
|
-
is_sky_managed: Optional[bool] = None,
|
|
4238
|
-
sync_on_reconstruction: Optional[bool] = True,
|
|
4239
|
-
_bucket_sub_path: Optional[str] = None):
|
|
4240
|
-
self.client: Any
|
|
4241
|
-
self.bucket: StorageHandle
|
|
4242
|
-
self.oci_config_file: str
|
|
4243
|
-
self.config_profile: str
|
|
4244
|
-
self.compartment: str
|
|
4245
|
-
self.namespace: str
|
|
4246
|
-
|
|
4247
|
-
# Region is from the specified name in <bucket>@<region> format.
|
|
4248
|
-
# Another case is name can also be set by the source, for example:
|
|
4249
|
-
# /datasets-storage:
|
|
4250
|
-
# source: oci://RAGData@us-sanjose-1
|
|
4251
|
-
# The name in above mount will be set to RAGData@us-sanjose-1
|
|
4252
|
-
region_in_name = None
|
|
4253
|
-
if name is not None and '@' in name:
|
|
4254
|
-
self._validate_bucket_expr(name)
|
|
4255
|
-
name, region_in_name = name.split('@')
|
|
4256
|
-
|
|
4257
|
-
# Region is from the specified source in oci://<bucket>@<region> format
|
|
4258
|
-
region_in_source = None
|
|
4259
|
-
if isinstance(source,
|
|
4260
|
-
str) and source.startswith('oci://') and '@' in source:
|
|
4261
|
-
self._validate_bucket_expr(source)
|
|
4262
|
-
source, region_in_source = source.split('@')
|
|
4263
|
-
|
|
4264
|
-
if region_in_name is not None and region_in_source is not None:
|
|
4265
|
-
# This should never happen because name and source will never be
|
|
4266
|
-
# the remote bucket at the same time.
|
|
4267
|
-
assert region_in_name == region_in_source, (
|
|
4268
|
-
f'Mismatch region specified. Region in name {region_in_name}, '
|
|
4269
|
-
f'but region in source is {region_in_source}')
|
|
4270
|
-
|
|
4271
|
-
if region_in_name is not None:
|
|
4272
|
-
region = region_in_name
|
|
4273
|
-
elif region_in_source is not None:
|
|
4274
|
-
region = region_in_source
|
|
4275
|
-
|
|
4276
|
-
# Default region set to what specified in oci config.
|
|
4277
|
-
if region is None:
|
|
4278
|
-
region = oci.get_oci_config()['region']
|
|
4279
|
-
|
|
4280
|
-
# So far from now on, the name and source are canonical, means there
|
|
4281
|
-
# is no region (@<region> suffix) associated with them anymore.
|
|
4282
|
-
|
|
4283
|
-
super().__init__(name, source, region, is_sky_managed,
|
|
4284
|
-
sync_on_reconstruction, _bucket_sub_path)
|
|
4285
|
-
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
|
4286
|
-
|
|
4287
|
-
def _validate_bucket_expr(self, bucket_expr: str):
|
|
4288
|
-
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
|
|
4289
|
-
if not re.match(pattern, bucket_expr):
|
|
4290
|
-
raise ValueError(
|
|
4291
|
-
'The format for the bucket portion is <bucket>@<region> '
|
|
4292
|
-
'when specify a region with a bucket.')
|
|
4293
|
-
|
|
4294
|
-
def _validate(self):
|
|
4295
|
-
if self.source is not None and isinstance(self.source, str):
|
|
4296
|
-
if self.source.startswith('oci://'):
|
|
4297
|
-
assert self.name == data_utils.split_oci_path(self.source)[0], (
|
|
4298
|
-
'OCI Bucket is specified as path, the name should be '
|
|
4299
|
-
'the same as OCI bucket.')
|
|
4300
|
-
elif not re.search(r'^\w+://', self.source):
|
|
4301
|
-
# Treat it as local path.
|
|
4302
|
-
pass
|
|
4303
|
-
else:
|
|
4304
|
-
raise NotImplementedError(
|
|
4305
|
-
f'Moving data from {self.source} to OCI is not supported.')
|
|
4306
|
-
|
|
4307
|
-
# Validate name
|
|
4308
|
-
self.name = self.validate_name(self.name)
|
|
4309
|
-
# Check if the storage is enabled
|
|
4310
|
-
if not _is_storage_cloud_enabled(str(clouds.OCI())):
|
|
4311
|
-
with ux_utils.print_exception_no_traceback():
|
|
4312
|
-
raise exceptions.ResourcesUnavailableError(
|
|
4313
|
-
'Storage \'store: oci\' specified, but ' \
|
|
4314
|
-
'OCI access is disabled. To fix, enable '\
|
|
4315
|
-
'OCI by running `sky check`. '\
|
|
4316
|
-
'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
|
4317
|
-
)
|
|
4318
|
-
|
|
4319
|
-
@classmethod
|
|
4320
|
-
def validate_name(cls, name) -> str:
|
|
4321
|
-
"""Validates the name of the OCI store.
|
|
4322
|
-
|
|
4323
|
-
Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
|
|
4324
|
-
"""
|
|
4325
|
-
|
|
4326
|
-
def _raise_no_traceback_name_error(err_str):
|
|
4327
|
-
with ux_utils.print_exception_no_traceback():
|
|
4328
|
-
raise exceptions.StorageNameError(err_str)
|
|
4329
|
-
|
|
4330
|
-
if name is not None and isinstance(name, str):
|
|
4331
|
-
# Check for overall length
|
|
4332
|
-
if not 1 <= len(name) <= 256:
|
|
4333
|
-
_raise_no_traceback_name_error(
|
|
4334
|
-
f'Invalid store name: name {name} must contain 1-256 '
|
|
4335
|
-
'characters.')
|
|
4336
|
-
|
|
4337
|
-
# Check for valid characters and start/end with a number or letter
|
|
4338
|
-
pattern = r'^[A-Za-z0-9-._]+$'
|
|
4339
|
-
if not re.match(pattern, name):
|
|
4340
|
-
_raise_no_traceback_name_error(
|
|
4341
|
-
f'Invalid store name: name {name} can only contain '
|
|
4342
|
-
'upper or lower case letters, numeric characters, hyphens '
|
|
4343
|
-
'(-), underscores (_), and dots (.). Spaces are not '
|
|
4344
|
-
'allowed. Names must start and end with a number or '
|
|
4345
|
-
'letter.')
|
|
4346
|
-
else:
|
|
4347
|
-
_raise_no_traceback_name_error('Store name must be specified.')
|
|
4348
|
-
return name
|
|
4349
|
-
|
|
4350
|
-
def initialize(self):
|
|
4351
|
-
"""Initializes the OCI store object on the cloud.
|
|
4352
|
-
|
|
4353
|
-
Initialization involves fetching bucket if exists, or creating it if
|
|
4354
|
-
it does not.
|
|
4355
|
-
|
|
4356
|
-
Raises:
|
|
4357
|
-
StorageBucketCreateError: If bucket creation fails
|
|
4358
|
-
StorageBucketGetError: If fetching existing bucket fails
|
|
4359
|
-
StorageInitError: If general initialization fails.
|
|
4360
|
-
"""
|
|
4361
|
-
# pylint: disable=import-outside-toplevel
|
|
4362
|
-
from sky.clouds.utils import oci_utils
|
|
4363
|
-
from sky.provision.oci.query_utils import query_helper
|
|
4364
|
-
|
|
4365
|
-
self.oci_config_file = oci.get_config_file()
|
|
4366
|
-
self.config_profile = oci_utils.oci_config.get_profile()
|
|
4367
|
-
|
|
4368
|
-
## pylint: disable=line-too-long
|
|
4369
|
-
# What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
|
|
4370
|
-
self.compartment = query_helper.find_compartment(self.region)
|
|
4371
|
-
self.client = oci.get_object_storage_client(region=self.region,
|
|
4372
|
-
profile=self.config_profile)
|
|
4373
|
-
self.namespace = self.client.get_namespace(
|
|
4374
|
-
compartment_id=oci.get_oci_config()['tenancy']).data
|
|
4375
|
-
|
|
4376
|
-
self.bucket, is_new_bucket = self._get_bucket()
|
|
4377
|
-
if self.is_sky_managed is None:
|
|
4378
|
-
# If is_sky_managed is not specified, then this is a new storage
|
|
4379
|
-
# object (i.e., did not exist in global_user_state) and we should
|
|
4380
|
-
# set the is_sky_managed property.
|
|
4381
|
-
# If is_sky_managed is specified, then we take no action.
|
|
4382
|
-
self.is_sky_managed = is_new_bucket
|
|
4383
|
-
|
|
4384
|
-
def upload(self):
|
|
4385
|
-
"""Uploads source to store bucket.
|
|
4386
|
-
|
|
4387
|
-
Upload must be called by the Storage handler - it is not called on
|
|
4388
|
-
Store initialization.
|
|
4389
|
-
|
|
4390
|
-
Raises:
|
|
4391
|
-
StorageUploadError: if upload fails.
|
|
4392
|
-
"""
|
|
4393
|
-
try:
|
|
4394
|
-
if isinstance(self.source, list):
|
|
4395
|
-
self.batch_oci_rsync(self.source, create_dirs=True)
|
|
4396
|
-
elif self.source is not None:
|
|
4397
|
-
if self.source.startswith('oci://'):
|
|
4398
|
-
pass
|
|
4399
|
-
else:
|
|
4400
|
-
self.batch_oci_rsync([self.source])
|
|
4401
|
-
except exceptions.StorageUploadError:
|
|
4402
|
-
raise
|
|
4403
|
-
except Exception as e:
|
|
4404
|
-
raise exceptions.StorageUploadError(
|
|
4405
|
-
f'Upload failed for store {self.name}') from e
|
|
4406
|
-
|
|
4407
|
-
def delete(self) -> None:
|
|
4408
|
-
deleted_by_skypilot = self._delete_oci_bucket(self.name)
|
|
4409
|
-
if deleted_by_skypilot:
|
|
4410
|
-
msg_str = f'Deleted OCI bucket {self.name}.'
|
|
4411
|
-
else:
|
|
4412
|
-
msg_str = (f'OCI bucket {self.name} may have been deleted '
|
|
4413
|
-
f'externally. Removing from local state.')
|
|
4414
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4415
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4416
|
-
|
|
4417
|
-
def get_handle(self) -> StorageHandle:
|
|
4418
|
-
return self.client.get_bucket(namespace_name=self.namespace,
|
|
4419
|
-
bucket_name=self.name).data
|
|
4420
|
-
|
|
4421
|
-
def batch_oci_rsync(self,
|
|
4422
|
-
source_path_list: List[Path],
|
|
4423
|
-
create_dirs: bool = False) -> None:
|
|
4424
|
-
"""Invokes oci sync to batch upload a list of local paths to Bucket
|
|
4425
|
-
|
|
4426
|
-
Use OCI bulk operation to batch process the file upload
|
|
4427
|
-
|
|
4428
|
-
Args:
|
|
4429
|
-
source_path_list: List of paths to local files or directories
|
|
4430
|
-
create_dirs: If the local_path is a directory and this is set to
|
|
4431
|
-
False, the contents of the directory are directly uploaded to
|
|
4432
|
-
root of the bucket. If the local_path is a directory and this is
|
|
4433
|
-
set to True, the directory is created in the bucket root and
|
|
4434
|
-
contents are uploaded to it.
|
|
4435
|
-
"""
|
|
4436
|
-
sub_path = (f'{self._bucket_sub_path}/'
|
|
4437
|
-
if self._bucket_sub_path else '')
|
|
4438
|
-
|
|
4439
|
-
@oci.with_oci_env
|
|
4440
|
-
def get_file_sync_command(base_dir_path, file_names):
|
|
4441
|
-
includes = ' '.join(
|
|
4442
|
-
[f'--include "{file_name}"' for file_name in file_names])
|
|
4443
|
-
prefix_arg = ''
|
|
4444
|
-
if sub_path:
|
|
4445
|
-
prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
|
|
4446
|
-
sync_command = (
|
|
4447
|
-
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4448
|
-
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4449
|
-
f'--region {self.region} --src-dir "{base_dir_path}" '
|
|
4450
|
-
f'{prefix_arg} '
|
|
4451
|
-
f'{includes}')
|
|
4452
|
-
|
|
4453
|
-
return sync_command
|
|
4454
|
-
|
|
4455
|
-
@oci.with_oci_env
|
|
4456
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
4457
|
-
if dest_dir_name and not str(dest_dir_name).endswith('/'):
|
|
4458
|
-
dest_dir_name = f'{dest_dir_name}/'
|
|
4459
|
-
|
|
4460
|
-
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
4461
|
-
excluded_list.append('.git/*')
|
|
4462
|
-
excludes = ' '.join([
|
|
4463
|
-
f'--exclude {shlex.quote(file_name)}'
|
|
4464
|
-
for file_name in excluded_list
|
|
4465
|
-
])
|
|
4466
|
-
|
|
4467
|
-
# we exclude .git directory from the sync
|
|
4468
|
-
sync_command = (
|
|
4469
|
-
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4470
|
-
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4471
|
-
f'--region {self.region} '
|
|
4472
|
-
f'--object-prefix "{sub_path}{dest_dir_name}" '
|
|
4473
|
-
f'--src-dir "{src_dir_path}" {excludes}')
|
|
4474
|
-
|
|
4475
|
-
return sync_command
|
|
4476
|
-
|
|
4477
|
-
# Generate message for upload
|
|
4478
|
-
if len(source_path_list) > 1:
|
|
4479
|
-
source_message = f'{len(source_path_list)} paths'
|
|
4480
|
-
else:
|
|
4481
|
-
source_message = source_path_list[0]
|
|
4482
|
-
|
|
4483
|
-
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4484
|
-
_STORAGE_LOG_FILE_NAME)
|
|
4485
|
-
sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
|
|
4486
|
-
with rich_utils.safe_status(
|
|
4487
|
-
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4488
|
-
log_path=log_path)):
|
|
4489
|
-
data_utils.parallel_upload(
|
|
4490
|
-
source_path_list=source_path_list,
|
|
4491
|
-
filesync_command_generator=get_file_sync_command,
|
|
4492
|
-
dirsync_command_generator=get_dir_sync_command,
|
|
4493
|
-
log_path=log_path,
|
|
4494
|
-
bucket_name=self.name,
|
|
4495
|
-
access_denied_message=self._ACCESS_DENIED_MESSAGE,
|
|
4496
|
-
create_dirs=create_dirs,
|
|
4497
|
-
max_concurrent_uploads=1)
|
|
4498
|
-
|
|
4499
|
-
logger.info(
|
|
4500
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4501
|
-
log_path))
|
|
4502
|
-
|
|
4503
|
-
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4504
|
-
"""Obtains the OCI bucket.
|
|
4505
|
-
If the bucket exists, this method will connect to the bucket.
|
|
4506
|
-
|
|
4507
|
-
If the bucket does not exist, there are three cases:
|
|
4508
|
-
1) Raise an error if the bucket source starts with oci://
|
|
4509
|
-
2) Return None if bucket has been externally deleted and
|
|
4510
|
-
sync_on_reconstruction is False
|
|
4511
|
-
3) Create and return a new bucket otherwise
|
|
4512
|
-
|
|
4513
|
-
Return tuple (Bucket, Boolean): The first item is the bucket
|
|
4514
|
-
json payload from the OCI API call, the second item indicates
|
|
4515
|
-
if this is a new created bucket(True) or an existing bucket(False).
|
|
4516
|
-
|
|
4517
|
-
Raises:
|
|
4518
|
-
StorageBucketCreateError: If creating the bucket fails
|
|
4519
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
4520
|
-
"""
|
|
4521
3884
|
try:
|
|
4522
|
-
|
|
4523
|
-
|
|
4524
|
-
|
|
4525
|
-
|
|
4526
|
-
|
|
4527
|
-
if e.status == 404: # Not Found
|
|
4528
|
-
if isinstance(self.source,
|
|
4529
|
-
str) and self.source.startswith('oci://'):
|
|
4530
|
-
with ux_utils.print_exception_no_traceback():
|
|
4531
|
-
raise exceptions.StorageBucketGetError(
|
|
4532
|
-
'Attempted to connect to a non-existent bucket: '
|
|
4533
|
-
f'{self.source}') from e
|
|
4534
|
-
else:
|
|
4535
|
-
# If bucket cannot be found (i.e., does not exist), it is
|
|
4536
|
-
# to be created by Sky. However, creation is skipped if
|
|
4537
|
-
# Store object is being reconstructed for deletion.
|
|
4538
|
-
if self.sync_on_reconstruction:
|
|
4539
|
-
bucket = self._create_oci_bucket(self.name)
|
|
4540
|
-
return bucket, True
|
|
4541
|
-
else:
|
|
4542
|
-
return None, False
|
|
4543
|
-
elif e.status == 401: # Unauthorized
|
|
4544
|
-
# AccessDenied error for buckets that are private and not
|
|
4545
|
-
# owned by user.
|
|
4546
|
-
command = (
|
|
4547
|
-
f'oci os object list --namespace-name {self.namespace} '
|
|
4548
|
-
f'--bucket-name {self.name}')
|
|
4549
|
-
with ux_utils.print_exception_no_traceback():
|
|
4550
|
-
raise exceptions.StorageBucketGetError(
|
|
4551
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4552
|
-
f' To debug, consider running `{command}`.') from e
|
|
4553
|
-
else:
|
|
4554
|
-
# Unknown / unexpected error happened. This might happen when
|
|
4555
|
-
# Object storage service itself functions not normal (e.g.
|
|
4556
|
-
# maintainance event causes internal server error or request
|
|
4557
|
-
# timeout, etc).
|
|
4558
|
-
with ux_utils.print_exception_no_traceback():
|
|
4559
|
-
raise exceptions.StorageBucketGetError(
|
|
4560
|
-
f'Failed to connect to OCI bucket {self.name}') from e
|
|
3885
|
+
uri_region = data_utils.split_cos_path(
|
|
3886
|
+
self.source)[2] # type: ignore
|
|
3887
|
+
except ValueError:
|
|
3888
|
+
# source isn't a cos uri
|
|
3889
|
+
uri_region = ''
|
|
4561
3890
|
|
|
4562
|
-
|
|
4563
|
-
|
|
3891
|
+
# bucket's region doesn't match specified region in URI
|
|
3892
|
+
if bucket_region and uri_region and uri_region != bucket_region\
|
|
3893
|
+
and self.sync_on_reconstruction:
|
|
3894
|
+
with ux_utils.print_exception_no_traceback():
|
|
3895
|
+
raise exceptions.StorageBucketGetError(
|
|
3896
|
+
f'Bucket {self.name} exists in '
|
|
3897
|
+
f'region {bucket_region}, '
|
|
3898
|
+
f'but URI specified region {uri_region}.')
|
|
4564
3899
|
|
|
4565
|
-
|
|
3900
|
+
if not bucket_region and uri_region:
|
|
3901
|
+
# bucket doesn't exist but source is a bucket URI
|
|
3902
|
+
with ux_utils.print_exception_no_traceback():
|
|
3903
|
+
raise exceptions.StorageBucketGetError(
|
|
3904
|
+
'Attempted to use a non-existent bucket as a source: '
|
|
3905
|
+
f'{self.name} by providing URI. Consider using '
|
|
3906
|
+
'`rclone lsd <remote>` on relevant remotes returned '
|
|
3907
|
+
'via `rclone listremotes` to debug.')
|
|
4566
3908
|
|
|
4567
|
-
|
|
4568
|
-
|
|
4569
|
-
|
|
4570
|
-
|
|
4571
|
-
|
|
4572
|
-
mount_path=mount_path,
|
|
4573
|
-
store_name=self.name,
|
|
4574
|
-
region=str(self.region),
|
|
4575
|
-
namespace=self.namespace,
|
|
4576
|
-
compartment=self.bucket.compartment_id,
|
|
4577
|
-
config_file=self.oci_config_file,
|
|
4578
|
-
config_profile=self.config_profile)
|
|
4579
|
-
version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
|
|
3909
|
+
data_utils.Rclone.store_rclone_config(
|
|
3910
|
+
self.name,
|
|
3911
|
+
data_utils.Rclone.RcloneStores.IBM,
|
|
3912
|
+
self.region, # type: ignore
|
|
3913
|
+
)
|
|
4580
3914
|
|
|
4581
|
-
|
|
4582
|
-
|
|
3915
|
+
if not bucket_region and self.sync_on_reconstruction:
|
|
3916
|
+
# bucket doesn't exist
|
|
3917
|
+
return self._create_cos_bucket(self.name, self.region), True
|
|
3918
|
+
elif not bucket_region and not self.sync_on_reconstruction:
|
|
3919
|
+
# Raised when Storage object is reconstructed for sky storage
|
|
3920
|
+
# delete or to re-mount Storages with sky start but the storage
|
|
3921
|
+
# is already removed externally.
|
|
3922
|
+
raise exceptions.StorageExternalDeletionError(
|
|
3923
|
+
'Attempted to fetch a non-existent bucket: '
|
|
3924
|
+
f'{self.name}')
|
|
3925
|
+
else:
|
|
3926
|
+
# bucket exists
|
|
3927
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
3928
|
+
self._validate_existing_bucket()
|
|
3929
|
+
return bucket, False
|
|
4583
3930
|
|
|
4584
3931
|
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4585
|
-
"""Downloads file from remote to local on
|
|
3932
|
+
"""Downloads file from remote to local on s3 bucket
|
|
3933
|
+
using the boto3 API
|
|
4586
3934
|
|
|
4587
3935
|
Args:
|
|
4588
|
-
remote_path: str; Remote path on
|
|
3936
|
+
remote_path: str; Remote path on S3 bucket
|
|
4589
3937
|
local_path: str; Local path on user's device
|
|
4590
3938
|
"""
|
|
4591
|
-
|
|
4592
|
-
# If the remote path is /bucket_name, we need to
|
|
4593
|
-
# remove the leading /
|
|
4594
|
-
remote_path = remote_path.lstrip('/')
|
|
4595
|
-
|
|
4596
|
-
filename = os.path.basename(remote_path)
|
|
4597
|
-
if not local_path.endswith(filename):
|
|
4598
|
-
local_path = os.path.join(local_path, filename)
|
|
4599
|
-
|
|
4600
|
-
@oci.with_oci_env
|
|
4601
|
-
def get_file_download_command(remote_path, local_path):
|
|
4602
|
-
download_command = (f'oci os object get --bucket-name {self.name} '
|
|
4603
|
-
f'--namespace-name {self.namespace} '
|
|
4604
|
-
f'--region {self.region} --name {remote_path} '
|
|
4605
|
-
f'--file {local_path}')
|
|
3939
|
+
self.client.download_file(self.name, local_path, remote_path)
|
|
4606
3940
|
|
|
4607
|
-
|
|
3941
|
+
def mount_command(self, mount_path: str) -> str:
|
|
3942
|
+
"""Returns the command to mount the bucket to the mount_path.
|
|
4608
3943
|
|
|
4609
|
-
|
|
3944
|
+
Uses rclone to mount the bucket.
|
|
3945
|
+
Source: https://github.com/rclone/rclone
|
|
4610
3946
|
|
|
4611
|
-
|
|
4612
|
-
|
|
4613
|
-
|
|
4614
|
-
|
|
4615
|
-
|
|
4616
|
-
|
|
4617
|
-
|
|
4618
|
-
|
|
4619
|
-
|
|
4620
|
-
|
|
4621
|
-
|
|
4622
|
-
|
|
4623
|
-
|
|
3947
|
+
Args:
|
|
3948
|
+
mount_path: str; Path to mount the bucket to.
|
|
3949
|
+
"""
|
|
3950
|
+
# install rclone if not installed.
|
|
3951
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
3952
|
+
rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
|
|
3953
|
+
rclone_profile_name=self.rclone_profile_name,
|
|
3954
|
+
region=self.region) # type: ignore
|
|
3955
|
+
mount_cmd = (
|
|
3956
|
+
mounting_utils.get_cos_mount_cmd(
|
|
3957
|
+
rclone_config,
|
|
3958
|
+
self.rclone_profile_name,
|
|
3959
|
+
self.bucket.name,
|
|
3960
|
+
mount_path,
|
|
3961
|
+
self._bucket_sub_path, # type: ignore
|
|
3962
|
+
))
|
|
3963
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3964
|
+
mount_cmd)
|
|
4624
3965
|
|
|
4625
|
-
def
|
|
4626
|
-
|
|
3966
|
+
def _create_cos_bucket(self,
|
|
3967
|
+
bucket_name: str,
|
|
3968
|
+
region='us-east') -> StorageHandle:
|
|
3969
|
+
"""Creates IBM COS bucket with specific name in specific region
|
|
4627
3970
|
|
|
4628
3971
|
Args:
|
|
4629
3972
|
bucket_name: str; Name of bucket
|
|
4630
|
-
region: str; Region name, e.g. us-
|
|
3973
|
+
region: str; Region name, e.g. us-east, us-south
|
|
3974
|
+
Raises:
|
|
3975
|
+
StorageBucketCreateError: If bucket creation fails.
|
|
4631
3976
|
"""
|
|
4632
|
-
logger.debug(f'_create_oci_bucket: {bucket_name}')
|
|
4633
3977
|
try:
|
|
4634
|
-
|
|
4635
|
-
|
|
4636
|
-
|
|
4637
|
-
|
|
4638
|
-
|
|
4639
|
-
|
|
4640
|
-
|
|
4641
|
-
|
|
4642
|
-
|
|
4643
|
-
|
|
3978
|
+
self.client.create_bucket(
|
|
3979
|
+
Bucket=bucket_name,
|
|
3980
|
+
CreateBucketConfiguration={
|
|
3981
|
+
'LocationConstraint': f'{region}-smart'
|
|
3982
|
+
})
|
|
3983
|
+
logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
|
|
3984
|
+
f'{bucket_name!r} in {region} '
|
|
3985
|
+
'with storage class smart tier'
|
|
3986
|
+
f'{colorama.Style.RESET_ALL}')
|
|
3987
|
+
self.bucket = self.s3_resource.Bucket(bucket_name)
|
|
3988
|
+
|
|
3989
|
+
except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
|
|
4644
3990
|
with ux_utils.print_exception_no_traceback():
|
|
4645
3991
|
raise exceptions.StorageBucketCreateError(
|
|
4646
|
-
f'Failed to create
|
|
4647
|
-
|
|
4648
|
-
def _delete_oci_bucket(self, bucket_name: str) -> bool:
|
|
4649
|
-
"""Deletes OCI bucket, including all objects in bucket
|
|
4650
|
-
|
|
4651
|
-
Args:
|
|
4652
|
-
bucket_name: str; Name of bucket
|
|
4653
|
-
|
|
4654
|
-
Returns:
|
|
4655
|
-
bool; True if bucket was deleted, False if it was deleted externally.
|
|
4656
|
-
"""
|
|
4657
|
-
logger.debug(f'_delete_oci_bucket: {bucket_name}')
|
|
3992
|
+
f'Failed to create bucket: '
|
|
3993
|
+
f'{bucket_name}') from e
|
|
4658
3994
|
|
|
4659
|
-
|
|
4660
|
-
|
|
4661
|
-
remove_command = (f'oci os bucket delete --bucket-name '
|
|
4662
|
-
f'--region {self.region} '
|
|
4663
|
-
f'{bucket_name} --empty --force')
|
|
3995
|
+
s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
|
|
3996
|
+
s3_bucket_exists_waiter.wait(Bucket=bucket_name)
|
|
4664
3997
|
|
|
4665
|
-
|
|
3998
|
+
return self.bucket
|
|
4666
3999
|
|
|
4667
|
-
|
|
4000
|
+
def _delete_cos_bucket_objects(self,
|
|
4001
|
+
bucket: Any,
|
|
4002
|
+
prefix: Optional[str] = None) -> None:
|
|
4003
|
+
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
|
4004
|
+
if bucket_versioning.status == 'Enabled':
|
|
4005
|
+
if prefix is not None:
|
|
4006
|
+
res = list(
|
|
4007
|
+
bucket.object_versions.filter(Prefix=prefix).delete())
|
|
4008
|
+
else:
|
|
4009
|
+
res = list(bucket.object_versions.delete())
|
|
4010
|
+
else:
|
|
4011
|
+
if prefix is not None:
|
|
4012
|
+
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
|
4013
|
+
else:
|
|
4014
|
+
res = list(bucket.objects.delete())
|
|
4015
|
+
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
|
4668
4016
|
|
|
4017
|
+
def _delete_cos_bucket(self) -> None:
|
|
4018
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
4669
4019
|
try:
|
|
4670
|
-
|
|
4671
|
-
|
|
4672
|
-
|
|
4673
|
-
|
|
4674
|
-
|
|
4675
|
-
|
|
4676
|
-
|
|
4677
|
-
|
|
4678
|
-
bucket_name=bucket_name))
|
|
4679
|
-
return False
|
|
4680
|
-
else:
|
|
4681
|
-
logger.error(e.output)
|
|
4682
|
-
with ux_utils.print_exception_no_traceback():
|
|
4683
|
-
raise exceptions.StorageBucketDeleteError(
|
|
4684
|
-
f'Failed to delete OCI bucket {bucket_name}.')
|
|
4685
|
-
return True
|
|
4020
|
+
self._delete_cos_bucket_objects(bucket)
|
|
4021
|
+
bucket.delete()
|
|
4022
|
+
bucket.wait_until_not_exists()
|
|
4023
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
4024
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
|
4025
|
+
logger.debug('bucket already removed')
|
|
4026
|
+
data_utils.Rclone.delete_rclone_bucket_profile(
|
|
4027
|
+
self.name, data_utils.Rclone.RcloneStores.IBM)
|
|
4686
4028
|
|
|
4687
4029
|
|
|
4688
|
-
class
|
|
4689
|
-
"""
|
|
4690
|
-
for
|
|
4030
|
+
class OciStore(AbstractStore):
|
|
4031
|
+
"""OciStore inherits from Storage Object and represents the backend
|
|
4032
|
+
for OCI buckets.
|
|
4691
4033
|
"""
|
|
4692
4034
|
|
|
4693
|
-
_ACCESS_DENIED_MESSAGE = '
|
|
4694
|
-
_TIMEOUT_TO_PROPAGATES = 20
|
|
4035
|
+
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
|
4695
4036
|
|
|
4696
4037
|
def __init__(self,
|
|
4697
4038
|
name: str,
|
|
4698
|
-
source:
|
|
4039
|
+
source: Optional[SourceType],
|
|
4699
4040
|
region: Optional[str] = None,
|
|
4700
4041
|
is_sky_managed: Optional[bool] = None,
|
|
4701
|
-
sync_on_reconstruction: bool = True,
|
|
4042
|
+
sync_on_reconstruction: Optional[bool] = True,
|
|
4702
4043
|
_bucket_sub_path: Optional[str] = None):
|
|
4703
|
-
self.client:
|
|
4704
|
-
self.bucket:
|
|
4044
|
+
self.client: Any
|
|
4045
|
+
self.bucket: StorageHandle
|
|
4046
|
+
self.oci_config_file: str
|
|
4047
|
+
self.config_profile: str
|
|
4048
|
+
self.compartment: str
|
|
4049
|
+
self.namespace: str
|
|
4050
|
+
|
|
4051
|
+
# Region is from the specified name in <bucket>@<region> format.
|
|
4052
|
+
# Another case is name can also be set by the source, for example:
|
|
4053
|
+
# /datasets-storage:
|
|
4054
|
+
# source: oci://RAGData@us-sanjose-1
|
|
4055
|
+
# The name in above mount will be set to RAGData@us-sanjose-1
|
|
4056
|
+
region_in_name = None
|
|
4057
|
+
if name is not None and '@' in name:
|
|
4058
|
+
self._validate_bucket_expr(name)
|
|
4059
|
+
name, region_in_name = name.split('@')
|
|
4060
|
+
|
|
4061
|
+
# Region is from the specified source in oci://<bucket>@<region> format
|
|
4062
|
+
region_in_source = None
|
|
4063
|
+
if isinstance(source,
|
|
4064
|
+
str) and source.startswith('oci://') and '@' in source:
|
|
4065
|
+
self._validate_bucket_expr(source)
|
|
4066
|
+
source, region_in_source = source.split('@')
|
|
4067
|
+
|
|
4068
|
+
if region_in_name is not None and region_in_source is not None:
|
|
4069
|
+
# This should never happen because name and source will never be
|
|
4070
|
+
# the remote bucket at the same time.
|
|
4071
|
+
assert region_in_name == region_in_source, (
|
|
4072
|
+
f'Mismatch region specified. Region in name {region_in_name}, '
|
|
4073
|
+
f'but region in source is {region_in_source}')
|
|
4074
|
+
|
|
4075
|
+
if region_in_name is not None:
|
|
4076
|
+
region = region_in_name
|
|
4077
|
+
elif region_in_source is not None:
|
|
4078
|
+
region = region_in_source
|
|
4079
|
+
|
|
4080
|
+
# Default region set to what specified in oci config.
|
|
4081
|
+
if region is None:
|
|
4082
|
+
region = oci.get_oci_config()['region']
|
|
4083
|
+
|
|
4084
|
+
# So far from now on, the name and source are canonical, means there
|
|
4085
|
+
# is no region (@<region> suffix) associated with them anymore.
|
|
4086
|
+
|
|
4705
4087
|
super().__init__(name, source, region, is_sky_managed,
|
|
4706
4088
|
sync_on_reconstruction, _bucket_sub_path)
|
|
4089
|
+
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
|
4090
|
+
|
|
4091
|
+
def _validate_bucket_expr(self, bucket_expr: str):
|
|
4092
|
+
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
|
|
4093
|
+
if not re.match(pattern, bucket_expr):
|
|
4094
|
+
raise ValueError(
|
|
4095
|
+
'The format for the bucket portion is <bucket>@<region> '
|
|
4096
|
+
'when specify a region with a bucket.')
|
|
4707
4097
|
|
|
4708
4098
|
def _validate(self):
|
|
4709
4099
|
if self.source is not None and isinstance(self.source, str):
|
|
4710
|
-
if self.source.startswith('
|
|
4711
|
-
assert self.name == data_utils.
|
|
4712
|
-
'
|
|
4713
|
-
' same as
|
|
4714
|
-
elif
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
assert data_utils.verify_gcs_bucket(self.name), (
|
|
4719
|
-
f'Source specified as {self.source}, a GCS bucket. ',
|
|
4720
|
-
'GCS Bucket should exist.')
|
|
4721
|
-
elif data_utils.is_az_container_endpoint(self.source):
|
|
4722
|
-
storage_account_name, container_name, _ = (
|
|
4723
|
-
data_utils.split_az_path(self.source))
|
|
4724
|
-
assert self.name == container_name, (
|
|
4725
|
-
'Azure bucket is specified as path, the name should be '
|
|
4726
|
-
'the same as Azure bucket.')
|
|
4727
|
-
assert data_utils.verify_az_bucket(
|
|
4728
|
-
storage_account_name, self.name), (
|
|
4729
|
-
f'Source specified as {self.source}, an Azure bucket. '
|
|
4730
|
-
'Azure bucket should exist.')
|
|
4731
|
-
elif self.source.startswith('r2://'):
|
|
4732
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
4733
|
-
'R2 Bucket is specified as path, the name should be '
|
|
4734
|
-
'the same as R2 bucket.')
|
|
4735
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
4736
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
4737
|
-
'R2 Bucket should exist.')
|
|
4738
|
-
elif self.source.startswith('nebius://'):
|
|
4739
|
-
assert self.name == data_utils.split_nebius_path(
|
|
4740
|
-
self.source)[0], (
|
|
4741
|
-
'Nebius Object Storage is specified as path, the name '
|
|
4742
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
4743
|
-
elif self.source.startswith('cos://'):
|
|
4744
|
-
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
4745
|
-
'COS Bucket is specified as path, the name should be '
|
|
4746
|
-
'the same as COS bucket.')
|
|
4747
|
-
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
|
4748
|
-
f'Source specified as {self.source}, a COS bucket. ',
|
|
4749
|
-
'COS Bucket should exist.')
|
|
4750
|
-
elif self.source.startswith('oci://'):
|
|
4100
|
+
if self.source.startswith('oci://'):
|
|
4101
|
+
assert self.name == data_utils.split_oci_path(self.source)[0], (
|
|
4102
|
+
'OCI Bucket is specified as path, the name should be '
|
|
4103
|
+
'the same as OCI bucket.')
|
|
4104
|
+
elif not re.search(r'^\w+://', self.source):
|
|
4105
|
+
# Treat it as local path.
|
|
4106
|
+
pass
|
|
4107
|
+
else:
|
|
4751
4108
|
raise NotImplementedError(
|
|
4752
|
-
'Moving data from
|
|
4753
|
-
# Validate name
|
|
4754
|
-
self.name = S3Store.validate_name(self.name)
|
|
4109
|
+
f'Moving data from {self.source} to OCI is not supported.')
|
|
4755
4110
|
|
|
4111
|
+
# Validate name
|
|
4112
|
+
self.name = self.validate_name(self.name)
|
|
4756
4113
|
# Check if the storage is enabled
|
|
4757
|
-
if not _is_storage_cloud_enabled(str(clouds.
|
|
4114
|
+
if not _is_storage_cloud_enabled(str(clouds.OCI())):
|
|
4758
4115
|
with ux_utils.print_exception_no_traceback():
|
|
4759
|
-
raise exceptions.ResourcesUnavailableError(
|
|
4760
|
-
'Storage \'store:
|
|
4761
|
-
'
|
|
4762
|
-
'
|
|
4763
|
-
'https://
|
|
4764
|
-
|
|
4116
|
+
raise exceptions.ResourcesUnavailableError(
|
|
4117
|
+
'Storage \'store: oci\' specified, but ' \
|
|
4118
|
+
'OCI access is disabled. To fix, enable '\
|
|
4119
|
+
'OCI by running `sky check`. '\
|
|
4120
|
+
'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
|
4121
|
+
)
|
|
4122
|
+
|
|
4123
|
+
@classmethod
|
|
4124
|
+
def validate_name(cls, name) -> str:
|
|
4125
|
+
"""Validates the name of the OCI store.
|
|
4126
|
+
|
|
4127
|
+
Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
|
|
4128
|
+
"""
|
|
4129
|
+
|
|
4130
|
+
def _raise_no_traceback_name_error(err_str):
|
|
4131
|
+
with ux_utils.print_exception_no_traceback():
|
|
4132
|
+
raise exceptions.StorageNameError(err_str)
|
|
4133
|
+
|
|
4134
|
+
if name is not None and isinstance(name, str):
|
|
4135
|
+
# Check for overall length
|
|
4136
|
+
if not 1 <= len(name) <= 256:
|
|
4137
|
+
_raise_no_traceback_name_error(
|
|
4138
|
+
f'Invalid store name: name {name} must contain 1-256 '
|
|
4139
|
+
'characters.')
|
|
4140
|
+
|
|
4141
|
+
# Check for valid characters and start/end with a number or letter
|
|
4142
|
+
pattern = r'^[A-Za-z0-9-._]+$'
|
|
4143
|
+
if not re.match(pattern, name):
|
|
4144
|
+
_raise_no_traceback_name_error(
|
|
4145
|
+
f'Invalid store name: name {name} can only contain '
|
|
4146
|
+
'upper or lower case letters, numeric characters, hyphens '
|
|
4147
|
+
'(-), underscores (_), and dots (.). Spaces are not '
|
|
4148
|
+
'allowed. Names must start and end with a number or '
|
|
4149
|
+
'letter.')
|
|
4150
|
+
else:
|
|
4151
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
|
4152
|
+
return name
|
|
4765
4153
|
|
|
4766
4154
|
def initialize(self):
|
|
4767
|
-
"""Initializes the
|
|
4155
|
+
"""Initializes the OCI store object on the cloud.
|
|
4768
4156
|
|
|
4769
4157
|
Initialization involves fetching bucket if exists, or creating it if
|
|
4770
4158
|
it does not.
|
|
@@ -4774,7 +4162,21 @@ class NebiusStore(AbstractStore):
|
|
|
4774
4162
|
StorageBucketGetError: If fetching existing bucket fails
|
|
4775
4163
|
StorageInitError: If general initialization fails.
|
|
4776
4164
|
"""
|
|
4777
|
-
|
|
4165
|
+
# pylint: disable=import-outside-toplevel
|
|
4166
|
+
from sky.clouds.utils import oci_utils
|
|
4167
|
+
from sky.provision.oci.query_utils import query_helper
|
|
4168
|
+
|
|
4169
|
+
self.oci_config_file = oci.get_config_file()
|
|
4170
|
+
self.config_profile = oci_utils.oci_config.get_profile()
|
|
4171
|
+
|
|
4172
|
+
## pylint: disable=line-too-long
|
|
4173
|
+
# What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
|
|
4174
|
+
self.compartment = query_helper.find_compartment(self.region)
|
|
4175
|
+
self.client = oci.get_object_storage_client(region=self.region,
|
|
4176
|
+
profile=self.config_profile)
|
|
4177
|
+
self.namespace = self.client.get_namespace(
|
|
4178
|
+
compartment_id=oci.get_oci_config()['tenancy']).data
|
|
4179
|
+
|
|
4778
4180
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
4779
4181
|
if self.is_sky_managed is None:
|
|
4780
4182
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -4794,20 +4196,12 @@ class NebiusStore(AbstractStore):
|
|
|
4794
4196
|
"""
|
|
4795
4197
|
try:
|
|
4796
4198
|
if isinstance(self.source, list):
|
|
4797
|
-
self.
|
|
4199
|
+
self.batch_oci_rsync(self.source, create_dirs=True)
|
|
4798
4200
|
elif self.source is not None:
|
|
4799
|
-
if self.source.startswith('
|
|
4201
|
+
if self.source.startswith('oci://'):
|
|
4800
4202
|
pass
|
|
4801
|
-
elif self.source.startswith('s3://'):
|
|
4802
|
-
self._transfer_to_nebius()
|
|
4803
|
-
elif self.source.startswith('gs://'):
|
|
4804
|
-
self._transfer_to_nebius()
|
|
4805
|
-
elif self.source.startswith('r2://'):
|
|
4806
|
-
self._transfer_to_nebius()
|
|
4807
|
-
elif self.source.startswith('oci://'):
|
|
4808
|
-
self._transfer_to_nebius()
|
|
4809
4203
|
else:
|
|
4810
|
-
self.
|
|
4204
|
+
self.batch_oci_rsync([self.source])
|
|
4811
4205
|
except exceptions.StorageUploadError:
|
|
4812
4206
|
raise
|
|
4813
4207
|
except Exception as e:
|
|
@@ -4815,45 +4209,25 @@ class NebiusStore(AbstractStore):
|
|
|
4815
4209
|
f'Upload failed for store {self.name}') from e
|
|
4816
4210
|
|
|
4817
4211
|
def delete(self) -> None:
|
|
4818
|
-
|
|
4819
|
-
return self._delete_sub_path()
|
|
4820
|
-
|
|
4821
|
-
deleted_by_skypilot = self._delete_nebius_bucket(self.name)
|
|
4212
|
+
deleted_by_skypilot = self._delete_oci_bucket(self.name)
|
|
4822
4213
|
if deleted_by_skypilot:
|
|
4823
|
-
msg_str = f'Deleted
|
|
4214
|
+
msg_str = f'Deleted OCI bucket {self.name}.'
|
|
4824
4215
|
else:
|
|
4825
|
-
msg_str = (f'
|
|
4216
|
+
msg_str = (f'OCI bucket {self.name} may have been deleted '
|
|
4826
4217
|
f'externally. Removing from local state.')
|
|
4827
4218
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4828
4219
|
f'{colorama.Style.RESET_ALL}')
|
|
4829
4220
|
|
|
4830
|
-
def _delete_sub_path(self) -> None:
|
|
4831
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
4832
|
-
deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
|
|
4833
|
-
self.name, self._bucket_sub_path)
|
|
4834
|
-
if deleted_by_skypilot:
|
|
4835
|
-
msg_str = (f'Removed objects from S3 bucket '
|
|
4836
|
-
f'{self.name}/{self._bucket_sub_path}.')
|
|
4837
|
-
else:
|
|
4838
|
-
msg_str = (f'Failed to remove objects from S3 bucket '
|
|
4839
|
-
f'{self.name}/{self._bucket_sub_path}.')
|
|
4840
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4841
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4842
|
-
|
|
4843
4221
|
def get_handle(self) -> StorageHandle:
|
|
4844
|
-
return
|
|
4222
|
+
return self.client.get_bucket(namespace_name=self.namespace,
|
|
4223
|
+
bucket_name=self.name).data
|
|
4845
4224
|
|
|
4846
|
-
def
|
|
4225
|
+
def batch_oci_rsync(self,
|
|
4847
4226
|
source_path_list: List[Path],
|
|
4848
4227
|
create_dirs: bool = False) -> None:
|
|
4849
|
-
"""Invokes
|
|
4850
|
-
|
|
4851
|
-
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
|
4852
|
-
increase parallelism, modify max_concurrent_requests in your aws config
|
|
4853
|
-
file (Default path: ~/.aws/config).
|
|
4228
|
+
"""Invokes oci sync to batch upload a list of local paths to Bucket
|
|
4854
4229
|
|
|
4855
|
-
|
|
4856
|
-
multiple commands to be run in parallel.
|
|
4230
|
+
Use OCI bulk operation to batch process the file upload
|
|
4857
4231
|
|
|
4858
4232
|
Args:
|
|
4859
4233
|
source_path_list: List of paths to local files or directories
|
|
@@ -4863,34 +4237,45 @@ class NebiusStore(AbstractStore):
|
|
|
4863
4237
|
set to True, the directory is created in the bucket root and
|
|
4864
4238
|
contents are uploaded to it.
|
|
4865
4239
|
"""
|
|
4866
|
-
sub_path = (f'
|
|
4240
|
+
sub_path = (f'{self._bucket_sub_path}/'
|
|
4867
4241
|
if self._bucket_sub_path else '')
|
|
4868
4242
|
|
|
4243
|
+
@oci.with_oci_env
|
|
4869
4244
|
def get_file_sync_command(base_dir_path, file_names):
|
|
4870
|
-
includes = ' '.join(
|
|
4871
|
-
f'--include {
|
|
4872
|
-
|
|
4873
|
-
|
|
4874
|
-
|
|
4875
|
-
sync_command = (
|
|
4876
|
-
|
|
4877
|
-
|
|
4878
|
-
|
|
4245
|
+
includes = ' '.join(
|
|
4246
|
+
[f'--include "{file_name}"' for file_name in file_names])
|
|
4247
|
+
prefix_arg = ''
|
|
4248
|
+
if sub_path:
|
|
4249
|
+
prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
|
|
4250
|
+
sync_command = (
|
|
4251
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4252
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4253
|
+
f'--region {self.region} --src-dir "{base_dir_path}" '
|
|
4254
|
+
f'{prefix_arg} '
|
|
4255
|
+
f'{includes}')
|
|
4256
|
+
|
|
4879
4257
|
return sync_command
|
|
4880
4258
|
|
|
4259
|
+
@oci.with_oci_env
|
|
4881
4260
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
4882
|
-
|
|
4261
|
+
if dest_dir_name and not str(dest_dir_name).endswith('/'):
|
|
4262
|
+
dest_dir_name = f'{dest_dir_name}/'
|
|
4263
|
+
|
|
4883
4264
|
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
4884
4265
|
excluded_list.append('.git/*')
|
|
4885
4266
|
excludes = ' '.join([
|
|
4886
4267
|
f'--exclude {shlex.quote(file_name)}'
|
|
4887
4268
|
for file_name in excluded_list
|
|
4888
4269
|
])
|
|
4889
|
-
|
|
4890
|
-
|
|
4891
|
-
|
|
4892
|
-
|
|
4893
|
-
|
|
4270
|
+
|
|
4271
|
+
# we exclude .git directory from the sync
|
|
4272
|
+
sync_command = (
|
|
4273
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4274
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4275
|
+
f'--region {self.region} '
|
|
4276
|
+
f'--object-prefix "{sub_path}{dest_dir_name}" '
|
|
4277
|
+
f'--src-dir "{src_dir_path}" {excludes}')
|
|
4278
|
+
|
|
4894
4279
|
return sync_command
|
|
4895
4280
|
|
|
4896
4281
|
# Generate message for upload
|
|
@@ -4901,210 +4286,469 @@ class NebiusStore(AbstractStore):
|
|
|
4901
4286
|
|
|
4902
4287
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4903
4288
|
_STORAGE_LOG_FILE_NAME)
|
|
4904
|
-
sync_path = f'{source_message} ->
|
|
4289
|
+
sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
|
|
4905
4290
|
with rich_utils.safe_status(
|
|
4906
4291
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4907
4292
|
log_path=log_path)):
|
|
4908
4293
|
data_utils.parallel_upload(
|
|
4909
|
-
source_path_list,
|
|
4910
|
-
get_file_sync_command,
|
|
4911
|
-
get_dir_sync_command,
|
|
4912
|
-
log_path,
|
|
4913
|
-
self.name,
|
|
4914
|
-
self._ACCESS_DENIED_MESSAGE,
|
|
4294
|
+
source_path_list=source_path_list,
|
|
4295
|
+
filesync_command_generator=get_file_sync_command,
|
|
4296
|
+
dirsync_command_generator=get_dir_sync_command,
|
|
4297
|
+
log_path=log_path,
|
|
4298
|
+
bucket_name=self.name,
|
|
4299
|
+
access_denied_message=self._ACCESS_DENIED_MESSAGE,
|
|
4915
4300
|
create_dirs=create_dirs,
|
|
4916
|
-
max_concurrent_uploads=
|
|
4917
|
-
logger.info(
|
|
4918
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4919
|
-
log_path))
|
|
4301
|
+
max_concurrent_uploads=1)
|
|
4920
4302
|
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
data_transfer.gcs_to_nebius(self.name, self.name)
|
|
4925
|
-
elif self.source.startswith('r2://'):
|
|
4926
|
-
data_transfer.r2_to_nebius(self.name, self.name)
|
|
4927
|
-
elif self.source.startswith('s3://'):
|
|
4928
|
-
data_transfer.s3_to_nebius(self.name, self.name)
|
|
4303
|
+
logger.info(
|
|
4304
|
+
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4305
|
+
log_path))
|
|
4929
4306
|
|
|
4930
4307
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4931
|
-
"""Obtains the
|
|
4308
|
+
"""Obtains the OCI bucket.
|
|
4309
|
+
If the bucket exists, this method will connect to the bucket.
|
|
4932
4310
|
|
|
4933
|
-
If the bucket exists, this method will return the bucket.
|
|
4934
4311
|
If the bucket does not exist, there are three cases:
|
|
4935
|
-
1) Raise an error if the bucket source starts with
|
|
4312
|
+
1) Raise an error if the bucket source starts with oci://
|
|
4936
4313
|
2) Return None if bucket has been externally deleted and
|
|
4937
4314
|
sync_on_reconstruction is False
|
|
4938
4315
|
3) Create and return a new bucket otherwise
|
|
4939
4316
|
|
|
4317
|
+
Return tuple (Bucket, Boolean): The first item is the bucket
|
|
4318
|
+
json payload from the OCI API call, the second item indicates
|
|
4319
|
+
if this is a new created bucket(True) or an existing bucket(False).
|
|
4320
|
+
|
|
4940
4321
|
Raises:
|
|
4941
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
4942
|
-
mounted without specifying storage source.
|
|
4943
4322
|
StorageBucketCreateError: If creating the bucket fails
|
|
4944
4323
|
StorageBucketGetError: If fetching a bucket fails
|
|
4945
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
4946
|
-
attempted to be fetched while reconstructing the storage for
|
|
4947
|
-
'sky storage delete' or 'sky start'
|
|
4948
4324
|
"""
|
|
4949
|
-
nebius_s = nebius.resource('s3')
|
|
4950
|
-
bucket = nebius_s.Bucket(self.name)
|
|
4951
4325
|
try:
|
|
4952
|
-
|
|
4953
|
-
|
|
4954
|
-
|
|
4955
|
-
# accessible.
|
|
4956
|
-
self.client.head_bucket(Bucket=self.name)
|
|
4957
|
-
self._validate_existing_bucket()
|
|
4326
|
+
get_bucket_response = self.client.get_bucket(
|
|
4327
|
+
namespace_name=self.namespace, bucket_name=self.name)
|
|
4328
|
+
bucket = get_bucket_response.data
|
|
4958
4329
|
return bucket, False
|
|
4959
|
-
except
|
|
4960
|
-
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
|
|
4964
|
-
|
|
4965
|
-
|
|
4330
|
+
except oci.service_exception() as e:
|
|
4331
|
+
if e.status == 404: # Not Found
|
|
4332
|
+
if isinstance(self.source,
|
|
4333
|
+
str) and self.source.startswith('oci://'):
|
|
4334
|
+
with ux_utils.print_exception_no_traceback():
|
|
4335
|
+
raise exceptions.StorageBucketGetError(
|
|
4336
|
+
'Attempted to connect to a non-existent bucket: '
|
|
4337
|
+
f'{self.source}') from e
|
|
4338
|
+
else:
|
|
4339
|
+
# If bucket cannot be found (i.e., does not exist), it is
|
|
4340
|
+
# to be created by Sky. However, creation is skipped if
|
|
4341
|
+
# Store object is being reconstructed for deletion.
|
|
4342
|
+
if self.sync_on_reconstruction:
|
|
4343
|
+
bucket = self._create_oci_bucket(self.name)
|
|
4344
|
+
return bucket, True
|
|
4345
|
+
else:
|
|
4346
|
+
return None, False
|
|
4347
|
+
elif e.status == 401: # Unauthorized
|
|
4348
|
+
# AccessDenied error for buckets that are private and not
|
|
4349
|
+
# owned by user.
|
|
4350
|
+
command = (
|
|
4351
|
+
f'oci os object list --namespace-name {self.namespace} '
|
|
4352
|
+
f'--bucket-name {self.name}')
|
|
4966
4353
|
with ux_utils.print_exception_no_traceback():
|
|
4967
4354
|
raise exceptions.StorageBucketGetError(
|
|
4968
4355
|
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4969
4356
|
f' To debug, consider running `{command}`.') from e
|
|
4357
|
+
else:
|
|
4358
|
+
# Unknown / unexpected error happened. This might happen when
|
|
4359
|
+
# Object storage service itself functions not normal (e.g.
|
|
4360
|
+
# maintainance event causes internal server error or request
|
|
4361
|
+
# timeout, etc).
|
|
4362
|
+
with ux_utils.print_exception_no_traceback():
|
|
4363
|
+
raise exceptions.StorageBucketGetError(
|
|
4364
|
+
f'Failed to connect to OCI bucket {self.name}') from e
|
|
4970
4365
|
|
|
4971
|
-
|
|
4972
|
-
|
|
4973
|
-
raise exceptions.StorageBucketGetError(
|
|
4974
|
-
'Attempted to use a non-existent bucket as a source: '
|
|
4975
|
-
f'{self.source}. Consider using `aws s3 ls '
|
|
4976
|
-
f's3://{self.name} '
|
|
4977
|
-
f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
|
|
4366
|
+
def mount_command(self, mount_path: str) -> str:
|
|
4367
|
+
"""Returns the command to mount the bucket to the mount_path.
|
|
4978
4368
|
|
|
4979
|
-
|
|
4980
|
-
|
|
4981
|
-
|
|
4982
|
-
|
|
4983
|
-
|
|
4984
|
-
|
|
4985
|
-
|
|
4986
|
-
|
|
4987
|
-
|
|
4988
|
-
|
|
4989
|
-
|
|
4990
|
-
|
|
4991
|
-
|
|
4992
|
-
|
|
4369
|
+
Uses Rclone to mount the bucket.
|
|
4370
|
+
|
|
4371
|
+
Args:
|
|
4372
|
+
mount_path: str; Path to mount the bucket to.
|
|
4373
|
+
"""
|
|
4374
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4375
|
+
mount_cmd = mounting_utils.get_oci_mount_cmd(
|
|
4376
|
+
mount_path=mount_path,
|
|
4377
|
+
store_name=self.name,
|
|
4378
|
+
region=str(self.region),
|
|
4379
|
+
namespace=self.namespace,
|
|
4380
|
+
compartment=self.bucket.compartment_id,
|
|
4381
|
+
config_file=self.oci_config_file,
|
|
4382
|
+
config_profile=self.config_profile)
|
|
4383
|
+
version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
|
|
4384
|
+
|
|
4385
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4386
|
+
mount_cmd, version_check_cmd)
|
|
4993
4387
|
|
|
4994
4388
|
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4995
|
-
"""Downloads file from remote to local on
|
|
4996
|
-
using the boto3 API
|
|
4389
|
+
"""Downloads file from remote to local on OCI bucket
|
|
4997
4390
|
|
|
4998
4391
|
Args:
|
|
4999
|
-
remote_path: str; Remote path on
|
|
4392
|
+
remote_path: str; Remote path on OCI bucket
|
|
5000
4393
|
local_path: str; Local path on user's device
|
|
5001
4394
|
"""
|
|
5002
|
-
self.
|
|
4395
|
+
if remote_path.startswith(f'/{self.name}'):
|
|
4396
|
+
# If the remote path is /bucket_name, we need to
|
|
4397
|
+
# remove the leading /
|
|
4398
|
+
remote_path = remote_path.lstrip('/')
|
|
5003
4399
|
|
|
5004
|
-
|
|
5005
|
-
|
|
4400
|
+
filename = os.path.basename(remote_path)
|
|
4401
|
+
if not local_path.endswith(filename):
|
|
4402
|
+
local_path = os.path.join(local_path, filename)
|
|
4403
|
+
|
|
4404
|
+
@oci.with_oci_env
|
|
4405
|
+
def get_file_download_command(remote_path, local_path):
|
|
4406
|
+
download_command = (f'oci os object get --bucket-name {self.name} '
|
|
4407
|
+
f'--namespace-name {self.namespace} '
|
|
4408
|
+
f'--region {self.region} --name {remote_path} '
|
|
4409
|
+
f'--file {local_path}')
|
|
5006
4410
|
|
|
5007
|
-
|
|
4411
|
+
return download_command
|
|
5008
4412
|
|
|
5009
|
-
|
|
5010
|
-
|
|
5011
|
-
|
|
5012
|
-
|
|
5013
|
-
|
|
5014
|
-
|
|
5015
|
-
|
|
5016
|
-
|
|
5017
|
-
|
|
5018
|
-
|
|
5019
|
-
|
|
5020
|
-
|
|
5021
|
-
|
|
4413
|
+
download_command = get_file_download_command(remote_path, local_path)
|
|
4414
|
+
|
|
4415
|
+
try:
|
|
4416
|
+
with rich_utils.safe_status(
|
|
4417
|
+
f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
|
|
4418
|
+
):
|
|
4419
|
+
subprocess.check_output(download_command,
|
|
4420
|
+
stderr=subprocess.STDOUT,
|
|
4421
|
+
shell=True)
|
|
4422
|
+
except subprocess.CalledProcessError as e:
|
|
4423
|
+
logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
|
|
4424
|
+
f'Detail errors: {e.output}')
|
|
4425
|
+
with ux_utils.print_exception_no_traceback():
|
|
4426
|
+
raise exceptions.StorageBucketDeleteError(
|
|
4427
|
+
f'Failed download file {self.name}:{remote_path}.') from e
|
|
5022
4428
|
|
|
5023
|
-
def
|
|
5024
|
-
"""Creates
|
|
4429
|
+
def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
|
|
4430
|
+
"""Creates OCI bucket with specific name in specific region
|
|
5025
4431
|
|
|
5026
4432
|
Args:
|
|
5027
4433
|
bucket_name: str; Name of bucket
|
|
5028
|
-
|
|
5029
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4434
|
+
region: str; Region name, e.g. us-central1, us-west1
|
|
5030
4435
|
"""
|
|
5031
|
-
|
|
4436
|
+
logger.debug(f'_create_oci_bucket: {bucket_name}')
|
|
5032
4437
|
try:
|
|
5033
|
-
|
|
5034
|
-
|
|
4438
|
+
create_bucket_response = self.client.create_bucket(
|
|
4439
|
+
namespace_name=self.namespace,
|
|
4440
|
+
create_bucket_details=oci.oci.object_storage.models.
|
|
4441
|
+
CreateBucketDetails(
|
|
4442
|
+
name=bucket_name,
|
|
4443
|
+
compartment_id=self.compartment,
|
|
4444
|
+
))
|
|
4445
|
+
bucket = create_bucket_response.data
|
|
4446
|
+
return bucket
|
|
4447
|
+
except oci.service_exception() as e:
|
|
5035
4448
|
with ux_utils.print_exception_no_traceback():
|
|
5036
4449
|
raise exceptions.StorageBucketCreateError(
|
|
5037
|
-
f'
|
|
5038
|
-
|
|
5039
|
-
|
|
4450
|
+
f'Failed to create OCI bucket: {self.name}') from e
|
|
4451
|
+
|
|
4452
|
+
def _delete_oci_bucket(self, bucket_name: str) -> bool:
|
|
4453
|
+
"""Deletes OCI bucket, including all objects in bucket
|
|
4454
|
+
|
|
4455
|
+
Args:
|
|
4456
|
+
bucket_name: str; Name of bucket
|
|
4457
|
+
|
|
4458
|
+
Returns:
|
|
4459
|
+
bool; True if bucket was deleted, False if it was deleted externally.
|
|
4460
|
+
"""
|
|
4461
|
+
logger.debug(f'_delete_oci_bucket: {bucket_name}')
|
|
4462
|
+
|
|
4463
|
+
@oci.with_oci_env
|
|
4464
|
+
def get_bucket_delete_command(bucket_name):
|
|
4465
|
+
remove_command = (f'oci os bucket delete --bucket-name '
|
|
4466
|
+
f'--region {self.region} '
|
|
4467
|
+
f'{bucket_name} --empty --force')
|
|
4468
|
+
|
|
4469
|
+
return remove_command
|
|
4470
|
+
|
|
4471
|
+
remove_command = get_bucket_delete_command(bucket_name)
|
|
5040
4472
|
|
|
5041
|
-
def _execute_nebius_remove_command(self, command: str, bucket_name: str,
|
|
5042
|
-
hint_operating: str,
|
|
5043
|
-
hint_failed: str) -> bool:
|
|
5044
4473
|
try:
|
|
5045
4474
|
with rich_utils.safe_status(
|
|
5046
|
-
|
|
5047
|
-
subprocess.check_output(
|
|
4475
|
+
f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
|
|
4476
|
+
subprocess.check_output(remove_command.split(' '),
|
|
5048
4477
|
stderr=subprocess.STDOUT)
|
|
5049
4478
|
except subprocess.CalledProcessError as e:
|
|
5050
|
-
if '
|
|
4479
|
+
if 'BucketNotFound' in e.output.decode('utf-8'):
|
|
5051
4480
|
logger.debug(
|
|
5052
4481
|
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
|
5053
4482
|
bucket_name=bucket_name))
|
|
5054
4483
|
return False
|
|
5055
4484
|
else:
|
|
4485
|
+
logger.error(e.output)
|
|
5056
4486
|
with ux_utils.print_exception_no_traceback():
|
|
5057
4487
|
raise exceptions.StorageBucketDeleteError(
|
|
5058
|
-
f'{
|
|
5059
|
-
f'Detailed error: {e.output}')
|
|
4488
|
+
f'Failed to delete OCI bucket {bucket_name}.')
|
|
5060
4489
|
return True
|
|
5061
4490
|
|
|
5062
|
-
def _delete_nebius_bucket(self, bucket_name: str) -> bool:
|
|
5063
|
-
"""Deletes S3 bucket, including all objects in bucket
|
|
5064
4491
|
|
|
5065
|
-
|
|
5066
|
-
|
|
4492
|
+
@register_s3_compatible_store
|
|
4493
|
+
class S3Store(S3CompatibleStore):
|
|
4494
|
+
"""S3Store inherits from S3CompatibleStore and represents the backend
|
|
4495
|
+
for S3 buckets.
|
|
4496
|
+
"""
|
|
5067
4497
|
|
|
5068
|
-
|
|
5069
|
-
|
|
4498
|
+
_DEFAULT_REGION = 'us-east-1'
|
|
4499
|
+
_CUSTOM_ENDPOINT_REGIONS = [
|
|
4500
|
+
'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
|
|
4501
|
+
'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
|
|
4502
|
+
'il-central-1'
|
|
4503
|
+
]
|
|
5070
4504
|
|
|
5071
|
-
|
|
5072
|
-
|
|
5073
|
-
|
|
5074
|
-
|
|
5075
|
-
|
|
5076
|
-
|
|
5077
|
-
|
|
5078
|
-
#
|
|
5079
|
-
#
|
|
5080
|
-
#
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
|
|
5084
|
-
|
|
5085
|
-
|
|
5086
|
-
|
|
5087
|
-
|
|
5088
|
-
|
|
5089
|
-
return False
|
|
4505
|
+
def __init__(self,
|
|
4506
|
+
name: str,
|
|
4507
|
+
source: str,
|
|
4508
|
+
region: Optional[str] = None,
|
|
4509
|
+
is_sky_managed: Optional[bool] = None,
|
|
4510
|
+
sync_on_reconstruction: bool = True,
|
|
4511
|
+
_bucket_sub_path: Optional[str] = None):
|
|
4512
|
+
# TODO(romilb): This is purely a stopgap fix for
|
|
4513
|
+
# https://github.com/skypilot-org/skypilot/issues/3405
|
|
4514
|
+
# We should eventually make all opt-in regions also work for S3 by
|
|
4515
|
+
# passing the right endpoint flags.
|
|
4516
|
+
if region in self._CUSTOM_ENDPOINT_REGIONS:
|
|
4517
|
+
logger.warning('AWS opt-in regions are not supported for S3. '
|
|
4518
|
+
f'Falling back to default region '
|
|
4519
|
+
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
|
4520
|
+
region = self._DEFAULT_REGION
|
|
4521
|
+
super().__init__(name, source, region, is_sky_managed,
|
|
4522
|
+
sync_on_reconstruction, _bucket_sub_path)
|
|
5090
4523
|
|
|
5091
|
-
|
|
5092
|
-
|
|
5093
|
-
|
|
5094
|
-
|
|
5095
|
-
|
|
5096
|
-
|
|
5097
|
-
|
|
5098
|
-
|
|
4524
|
+
@classmethod
|
|
4525
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4526
|
+
"""Return the configuration for AWS S3."""
|
|
4527
|
+
return S3CompatibleConfig(
|
|
4528
|
+
store_type='S3',
|
|
4529
|
+
url_prefix='s3://',
|
|
4530
|
+
client_factory=data_utils.create_s3_client,
|
|
4531
|
+
resource_factory=lambda name: aws.resource('s3').Bucket(name),
|
|
4532
|
+
split_path=data_utils.split_s3_path,
|
|
4533
|
+
verify_bucket=data_utils.verify_s3_bucket,
|
|
4534
|
+
cloud_name=str(clouds.AWS()),
|
|
4535
|
+
default_region=cls._DEFAULT_REGION,
|
|
4536
|
+
mount_cmd_factory=mounting_utils.get_s3_mount_cmd,
|
|
4537
|
+
)
|
|
4538
|
+
|
|
4539
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4540
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4541
|
+
rclone_profile_name = (
|
|
4542
|
+
data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
|
|
4543
|
+
rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
|
|
4544
|
+
rclone_profile_name=rclone_profile_name)
|
|
4545
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4546
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4547
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4548
|
+
mount_cached_cmd)
|
|
4549
|
+
|
|
4550
|
+
|
|
4551
|
+
@register_s3_compatible_store
|
|
4552
|
+
class R2Store(S3CompatibleStore):
|
|
4553
|
+
"""R2Store inherits from S3CompatibleStore and represents the backend
|
|
4554
|
+
for R2 buckets.
|
|
4555
|
+
"""
|
|
4556
|
+
|
|
4557
|
+
def __init__(self,
|
|
4558
|
+
name: str,
|
|
4559
|
+
source: str,
|
|
4560
|
+
region: Optional[str] = 'auto',
|
|
4561
|
+
is_sky_managed: Optional[bool] = None,
|
|
4562
|
+
sync_on_reconstruction: bool = True,
|
|
4563
|
+
_bucket_sub_path: Optional[str] = None):
|
|
4564
|
+
super().__init__(name, source, region, is_sky_managed,
|
|
4565
|
+
sync_on_reconstruction, _bucket_sub_path)
|
|
4566
|
+
|
|
4567
|
+
@classmethod
|
|
4568
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4569
|
+
"""Return the configuration for Cloudflare R2."""
|
|
4570
|
+
return S3CompatibleConfig(
|
|
4571
|
+
store_type='R2',
|
|
4572
|
+
url_prefix='r2://',
|
|
4573
|
+
client_factory=lambda region: data_utils.create_r2_client(region or
|
|
4574
|
+
'auto'),
|
|
4575
|
+
resource_factory=lambda name: cloudflare.resource('s3').Bucket(name
|
|
4576
|
+
),
|
|
4577
|
+
split_path=data_utils.split_r2_path,
|
|
4578
|
+
verify_bucket=data_utils.verify_r2_bucket,
|
|
4579
|
+
credentials_file=cloudflare.R2_CREDENTIALS_PATH,
|
|
4580
|
+
aws_profile=cloudflare.R2_PROFILE_NAME,
|
|
4581
|
+
get_endpoint_url=lambda: cloudflare.create_endpoint(), # pylint: disable=unnecessary-lambda
|
|
4582
|
+
extra_cli_args=['--checksum-algorithm', 'CRC32'], # R2 specific
|
|
4583
|
+
cloud_name=cloudflare.NAME,
|
|
4584
|
+
default_region='auto',
|
|
4585
|
+
mount_cmd_factory=cls._get_r2_mount_cmd,
|
|
4586
|
+
)
|
|
4587
|
+
|
|
4588
|
+
@classmethod
|
|
4589
|
+
def _get_r2_mount_cmd(cls, bucket_name: str, mount_path: str,
|
|
4590
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
4591
|
+
"""Factory method for R2 mount command."""
|
|
4592
|
+
endpoint_url = cloudflare.create_endpoint()
|
|
4593
|
+
return mounting_utils.get_r2_mount_cmd(cloudflare.R2_CREDENTIALS_PATH,
|
|
4594
|
+
cloudflare.R2_PROFILE_NAME,
|
|
4595
|
+
endpoint_url, bucket_name,
|
|
4596
|
+
mount_path, bucket_sub_path)
|
|
4597
|
+
|
|
4598
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4599
|
+
"""R2-specific cached mount implementation using rclone."""
|
|
4600
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4601
|
+
rclone_profile_name = (
|
|
4602
|
+
data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
|
|
4603
|
+
rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
|
|
4604
|
+
rclone_profile_name=rclone_profile_name)
|
|
4605
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4606
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4607
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4608
|
+
mount_cached_cmd)
|
|
4609
|
+
|
|
4610
|
+
|
|
4611
|
+
@register_s3_compatible_store
|
|
4612
|
+
class NebiusStore(S3CompatibleStore):
|
|
4613
|
+
"""NebiusStore inherits from S3CompatibleStore and represents the backend
|
|
4614
|
+
for Nebius Object Storage buckets.
|
|
4615
|
+
"""
|
|
4616
|
+
|
|
4617
|
+
@classmethod
|
|
4618
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4619
|
+
"""Return the configuration for Nebius Object Storage."""
|
|
4620
|
+
return S3CompatibleConfig(
|
|
4621
|
+
store_type='NEBIUS',
|
|
4622
|
+
url_prefix='nebius://',
|
|
4623
|
+
client_factory=lambda region: data_utils.create_nebius_client(),
|
|
4624
|
+
resource_factory=lambda name: nebius.resource('s3').Bucket(name),
|
|
4625
|
+
split_path=data_utils.split_nebius_path,
|
|
4626
|
+
verify_bucket=data_utils.verify_nebius_bucket,
|
|
4627
|
+
aws_profile=nebius.NEBIUS_PROFILE_NAME,
|
|
4628
|
+
cloud_name=str(clouds.Nebius()),
|
|
4629
|
+
mount_cmd_factory=cls._get_nebius_mount_cmd,
|
|
4630
|
+
)
|
|
4631
|
+
|
|
4632
|
+
@classmethod
|
|
4633
|
+
def _get_nebius_mount_cmd(cls, bucket_name: str, mount_path: str,
|
|
4634
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
4635
|
+
"""Factory method for Nebius mount command."""
|
|
4636
|
+
# We need to get the endpoint URL, but since this is a static method,
|
|
4637
|
+
# we'll need to create a client to get it
|
|
4638
|
+
client = data_utils.create_nebius_client()
|
|
4639
|
+
endpoint_url = client.meta.endpoint_url
|
|
4640
|
+
return mounting_utils.get_nebius_mount_cmd(nebius.NEBIUS_PROFILE_NAME,
|
|
4641
|
+
bucket_name, endpoint_url,
|
|
4642
|
+
mount_path, bucket_sub_path)
|
|
4643
|
+
|
|
4644
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4645
|
+
"""Nebius-specific cached mount implementation using rclone."""
|
|
4646
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4647
|
+
rclone_profile_name = (
|
|
4648
|
+
data_utils.Rclone.RcloneStores.NEBIUS.get_profile_name(self.name))
|
|
4649
|
+
rclone_config = data_utils.Rclone.RcloneStores.NEBIUS.get_config(
|
|
4650
|
+
rclone_profile_name=rclone_profile_name)
|
|
4651
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4652
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4653
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4654
|
+
mount_cached_cmd)
|
|
4655
|
+
|
|
4656
|
+
|
|
4657
|
+
@register_s3_compatible_store
|
|
4658
|
+
class CoreWeaveStore(S3CompatibleStore):
|
|
4659
|
+
"""CoreWeaveStore inherits from S3CompatibleStore and represents the backend
|
|
4660
|
+
for CoreWeave Object Storage buckets.
|
|
4661
|
+
"""
|
|
4662
|
+
|
|
4663
|
+
@classmethod
|
|
4664
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4665
|
+
"""Return the configuration for CoreWeave Object Storage."""
|
|
4666
|
+
return S3CompatibleConfig(
|
|
4667
|
+
store_type='COREWEAVE',
|
|
4668
|
+
url_prefix='cw://',
|
|
4669
|
+
client_factory=lambda region: data_utils.create_coreweave_client(),
|
|
4670
|
+
resource_factory=lambda name: coreweave.resource('s3').Bucket(name),
|
|
4671
|
+
split_path=data_utils.split_coreweave_path,
|
|
4672
|
+
verify_bucket=data_utils.verify_coreweave_bucket,
|
|
4673
|
+
aws_profile=coreweave.COREWEAVE_PROFILE_NAME,
|
|
4674
|
+
get_endpoint_url=coreweave.get_endpoint,
|
|
4675
|
+
credentials_file=coreweave.COREWEAVE_CREDENTIALS_PATH,
|
|
4676
|
+
config_file=coreweave.COREWEAVE_CONFIG_PATH,
|
|
4677
|
+
cloud_name=coreweave.NAME,
|
|
4678
|
+
default_region=coreweave.DEFAULT_REGION,
|
|
4679
|
+
mount_cmd_factory=cls._get_coreweave_mount_cmd,
|
|
4680
|
+
)
|
|
4681
|
+
|
|
4682
|
+
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4683
|
+
"""Get or create bucket using CoreWeave's S3 API"""
|
|
4684
|
+
bucket = self.config.resource_factory(self.name)
|
|
4685
|
+
|
|
4686
|
+
# Use our custom bucket verification instead of head_bucket
|
|
4687
|
+
if data_utils.verify_coreweave_bucket(self.name):
|
|
4688
|
+
self._validate_existing_bucket()
|
|
4689
|
+
return bucket, False
|
|
4690
|
+
|
|
4691
|
+
# TODO(hailong): Enable the bucket creation for CoreWeave
|
|
4692
|
+
# Disable this to avoid waiting too long until the following
|
|
4693
|
+
# issue is resolved:
|
|
4694
|
+
# https://github.com/skypilot-org/skypilot/issues/7736
|
|
4695
|
+
raise exceptions.StorageBucketGetError(
|
|
4696
|
+
f'Bucket {self.name!r} does not exist. CoreWeave buckets can take'
|
|
4697
|
+
' a long time to become accessible after creation, so SkyPilot'
|
|
4698
|
+
' does not create them automatically. Please create the bucket'
|
|
4699
|
+
' manually in CoreWeave and wait for it to be accessible before'
|
|
4700
|
+
' using it.')
|
|
4701
|
+
|
|
4702
|
+
# # Check if this is a source with URL prefix (existing bucket case)
|
|
4703
|
+
# if isinstance(self.source, str) and self.source.startswith(
|
|
4704
|
+
# self.config.url_prefix):
|
|
4705
|
+
# with ux_utils.print_exception_no_traceback():
|
|
4706
|
+
# raise exceptions.StorageBucketGetError(
|
|
4707
|
+
# 'Attempted to use a non-existent bucket as a source: '
|
|
4708
|
+
# f'{self.source}.')
|
|
4709
|
+
|
|
4710
|
+
# # If bucket cannot be found, create it if needed
|
|
4711
|
+
# if self.sync_on_reconstruction:
|
|
4712
|
+
# bucket = self._create_bucket(self.name)
|
|
4713
|
+
# return bucket, True
|
|
4714
|
+
# else:
|
|
4715
|
+
# raise exceptions.StorageExternalDeletionError(
|
|
4716
|
+
# 'Attempted to fetch a non-existent bucket: '
|
|
4717
|
+
# f'{self.name}')
|
|
4718
|
+
|
|
4719
|
+
@classmethod
|
|
4720
|
+
def _get_coreweave_mount_cmd(cls, bucket_name: str, mount_path: str,
|
|
4721
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
4722
|
+
"""Factory method for CoreWeave mount command."""
|
|
4723
|
+
endpoint_url = coreweave.get_endpoint()
|
|
4724
|
+
return mounting_utils.get_coreweave_mount_cmd(
|
|
4725
|
+
coreweave.COREWEAVE_CREDENTIALS_PATH,
|
|
4726
|
+
coreweave.COREWEAVE_PROFILE_NAME, bucket_name, endpoint_url,
|
|
4727
|
+
mount_path, bucket_sub_path)
|
|
4728
|
+
|
|
4729
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4730
|
+
"""CoreWeave-specific cached mount implementation using rclone."""
|
|
4731
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4732
|
+
rclone_profile_name = (
|
|
4733
|
+
data_utils.Rclone.RcloneStores.COREWEAVE.get_profile_name(
|
|
4734
|
+
self.name))
|
|
4735
|
+
rclone_config = data_utils.Rclone.RcloneStores.COREWEAVE.get_config(
|
|
4736
|
+
rclone_profile_name=rclone_profile_name)
|
|
4737
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4738
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4739
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4740
|
+
mount_cached_cmd)
|
|
5099
4741
|
|
|
5100
|
-
def
|
|
5101
|
-
|
|
5102
|
-
|
|
5103
|
-
|
|
5104
|
-
|
|
5105
|
-
|
|
5106
|
-
|
|
5107
|
-
|
|
5108
|
-
|
|
5109
|
-
|
|
5110
|
-
|
|
4742
|
+
def _create_bucket(self, bucket_name: str) -> StorageHandle:
|
|
4743
|
+
"""Create bucket using S3 API with timing handling for CoreWeave."""
|
|
4744
|
+
result = super()._create_bucket(bucket_name)
|
|
4745
|
+
# Ensure bucket is created
|
|
4746
|
+
# The newly created bucket ever takes about 18min to be accessible,
|
|
4747
|
+
# here we just retry for 36 times (5s * 36 = 180s) to avoid waiting
|
|
4748
|
+
# too long
|
|
4749
|
+
# TODO(hailong): Update the logic here when the following
|
|
4750
|
+
# issue is resolved:
|
|
4751
|
+
# https://github.com/skypilot-org/skypilot/issues/7736
|
|
4752
|
+
data_utils.verify_coreweave_bucket(bucket_name, retry=36)
|
|
4753
|
+
|
|
4754
|
+
return result
|