skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/data/storage.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
"""Storage and Store Classes for Sky Data."""
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
2
4
|
import enum
|
|
3
5
|
import hashlib
|
|
4
6
|
import os
|
|
@@ -7,7 +9,7 @@ import shlex
|
|
|
7
9
|
import subprocess
|
|
8
10
|
import time
|
|
9
11
|
import typing
|
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
11
13
|
import urllib.parse
|
|
12
14
|
|
|
13
15
|
import colorama
|
|
@@ -21,6 +23,7 @@ from sky import skypilot_config
|
|
|
21
23
|
from sky.adaptors import aws
|
|
22
24
|
from sky.adaptors import azure
|
|
23
25
|
from sky.adaptors import cloudflare
|
|
26
|
+
from sky.adaptors import coreweave
|
|
24
27
|
from sky.adaptors import gcp
|
|
25
28
|
from sky.adaptors import ibm
|
|
26
29
|
from sky.adaptors import nebius
|
|
@@ -60,6 +63,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
|
|
|
60
63
|
str(clouds.OCI()),
|
|
61
64
|
str(clouds.Nebius()),
|
|
62
65
|
cloudflare.NAME,
|
|
66
|
+
coreweave.NAME,
|
|
63
67
|
]
|
|
64
68
|
|
|
65
69
|
# Maximum number of concurrent rsync upload processes
|
|
@@ -91,6 +95,12 @@ def get_cached_enabled_storage_cloud_names_or_refresh(
|
|
|
91
95
|
r2_is_enabled, _ = cloudflare.check_storage_credentials()
|
|
92
96
|
if r2_is_enabled:
|
|
93
97
|
enabled_clouds.append(cloudflare.NAME)
|
|
98
|
+
|
|
99
|
+
# Similarly, handle CoreWeave storage credentials
|
|
100
|
+
coreweave_is_enabled, _ = coreweave.check_storage_credentials()
|
|
101
|
+
if coreweave_is_enabled:
|
|
102
|
+
enabled_clouds.append(coreweave.NAME)
|
|
103
|
+
|
|
94
104
|
if raise_if_no_cloud_access and not enabled_clouds:
|
|
95
105
|
raise exceptions.NoCloudAccessError(
|
|
96
106
|
'No cloud access available for storage. '
|
|
@@ -105,11 +115,11 @@ def _is_storage_cloud_enabled(cloud_name: str,
|
|
|
105
115
|
if cloud_name in enabled_storage_cloud_names:
|
|
106
116
|
return True
|
|
107
117
|
if try_fix_with_sky_check:
|
|
108
|
-
# TODO(zhwu): Only check the specified cloud to speed up.
|
|
109
118
|
sky_check.check_capability(
|
|
110
119
|
sky_cloud.CloudCapability.STORAGE,
|
|
111
120
|
quiet=True,
|
|
112
|
-
|
|
121
|
+
clouds=[cloud_name],
|
|
122
|
+
workspace=skypilot_config.get_active_workspace())
|
|
113
123
|
return _is_storage_cloud_enabled(cloud_name,
|
|
114
124
|
try_fix_with_sky_check=False)
|
|
115
125
|
return False
|
|
@@ -124,41 +134,71 @@ class StoreType(enum.Enum):
|
|
|
124
134
|
IBM = 'IBM'
|
|
125
135
|
OCI = 'OCI'
|
|
126
136
|
NEBIUS = 'NEBIUS'
|
|
137
|
+
COREWEAVE = 'COREWEAVE'
|
|
138
|
+
VOLUME = 'VOLUME'
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def _get_s3_compatible_store_by_cloud(cls,
|
|
142
|
+
cloud_name: str) -> Optional[str]:
|
|
143
|
+
"""Get S3-compatible store type by cloud name."""
|
|
144
|
+
for store_type, store_class in _S3_COMPATIBLE_STORES.items():
|
|
145
|
+
config = store_class.get_config()
|
|
146
|
+
if config.cloud_name.lower() == cloud_name:
|
|
147
|
+
return store_type
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def _get_s3_compatible_config(
|
|
152
|
+
cls, store_type: str) -> Optional['S3CompatibleConfig']:
|
|
153
|
+
"""Get S3-compatible store configuration by store type."""
|
|
154
|
+
store_class = _S3_COMPATIBLE_STORES.get(store_type)
|
|
155
|
+
if store_class:
|
|
156
|
+
return store_class.get_config()
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def find_s3_compatible_config_by_prefix(
|
|
161
|
+
cls, source: str) -> Optional['StoreType']:
|
|
162
|
+
"""Get S3-compatible store type by URL prefix."""
|
|
163
|
+
for store_type, store_class in _S3_COMPATIBLE_STORES.items():
|
|
164
|
+
config = store_class.get_config()
|
|
165
|
+
if source.startswith(config.url_prefix):
|
|
166
|
+
return StoreType(store_type)
|
|
167
|
+
return None
|
|
127
168
|
|
|
128
169
|
@classmethod
|
|
129
170
|
def from_cloud(cls, cloud: str) -> 'StoreType':
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
elif cloud.lower() == str(clouds.GCP()).lower():
|
|
171
|
+
cloud_lower = cloud.lower()
|
|
172
|
+
if cloud_lower == str(clouds.GCP()).lower():
|
|
133
173
|
return StoreType.GCS
|
|
134
|
-
elif
|
|
174
|
+
elif cloud_lower == str(clouds.IBM()).lower():
|
|
135
175
|
return StoreType.IBM
|
|
136
|
-
elif
|
|
137
|
-
return StoreType.R2
|
|
138
|
-
elif cloud.lower() == str(clouds.Azure()).lower():
|
|
176
|
+
elif cloud_lower == str(clouds.Azure()).lower():
|
|
139
177
|
return StoreType.AZURE
|
|
140
|
-
elif
|
|
178
|
+
elif cloud_lower == str(clouds.OCI()).lower():
|
|
141
179
|
return StoreType.OCI
|
|
142
|
-
elif
|
|
143
|
-
return StoreType.NEBIUS
|
|
144
|
-
elif cloud.lower() == str(clouds.Lambda()).lower():
|
|
180
|
+
elif cloud_lower == str(clouds.Lambda()).lower():
|
|
145
181
|
with ux_utils.print_exception_no_traceback():
|
|
146
182
|
raise ValueError('Lambda Cloud does not provide cloud storage.')
|
|
147
|
-
elif
|
|
183
|
+
elif cloud_lower == str(clouds.SCP()).lower():
|
|
148
184
|
with ux_utils.print_exception_no_traceback():
|
|
149
185
|
raise ValueError('SCP does not provide cloud storage.')
|
|
186
|
+
else:
|
|
187
|
+
s3_store_type = cls._get_s3_compatible_store_by_cloud(cloud_lower)
|
|
188
|
+
if s3_store_type:
|
|
189
|
+
return cls(s3_store_type)
|
|
150
190
|
|
|
151
191
|
raise ValueError(f'Unsupported cloud for StoreType: {cloud}')
|
|
152
192
|
|
|
153
193
|
def to_cloud(self) -> str:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
194
|
+
config = self._get_s3_compatible_config(self.value)
|
|
195
|
+
if config:
|
|
196
|
+
return config.cloud_name
|
|
197
|
+
|
|
198
|
+
if self == StoreType.GCS:
|
|
157
199
|
return str(clouds.GCP())
|
|
158
200
|
elif self == StoreType.AZURE:
|
|
159
201
|
return str(clouds.Azure())
|
|
160
|
-
elif self == StoreType.R2:
|
|
161
|
-
return cloudflare.NAME
|
|
162
202
|
elif self == StoreType.IBM:
|
|
163
203
|
return str(clouds.IBM())
|
|
164
204
|
elif self == StoreType.OCI:
|
|
@@ -168,41 +208,34 @@ class StoreType(enum.Enum):
|
|
|
168
208
|
|
|
169
209
|
@classmethod
|
|
170
210
|
def from_store(cls, store: 'AbstractStore') -> 'StoreType':
|
|
171
|
-
if isinstance(store,
|
|
172
|
-
return
|
|
173
|
-
|
|
211
|
+
if isinstance(store, S3CompatibleStore):
|
|
212
|
+
return cls(store.get_store_type())
|
|
213
|
+
|
|
214
|
+
if isinstance(store, GcsStore):
|
|
174
215
|
return StoreType.GCS
|
|
175
216
|
elif isinstance(store, AzureBlobStore):
|
|
176
217
|
return StoreType.AZURE
|
|
177
|
-
elif isinstance(store, R2Store):
|
|
178
|
-
return StoreType.R2
|
|
179
218
|
elif isinstance(store, IBMCosStore):
|
|
180
219
|
return StoreType.IBM
|
|
181
220
|
elif isinstance(store, OciStore):
|
|
182
221
|
return StoreType.OCI
|
|
183
|
-
elif isinstance(store, NebiusStore):
|
|
184
|
-
return StoreType.NEBIUS
|
|
185
222
|
else:
|
|
186
223
|
with ux_utils.print_exception_no_traceback():
|
|
187
224
|
raise ValueError(f'Unknown store type: {store}')
|
|
188
225
|
|
|
189
226
|
def store_prefix(self) -> str:
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
227
|
+
config = self._get_s3_compatible_config(self.value)
|
|
228
|
+
if config:
|
|
229
|
+
return config.url_prefix
|
|
230
|
+
|
|
231
|
+
if self == StoreType.GCS:
|
|
193
232
|
return 'gs://'
|
|
194
233
|
elif self == StoreType.AZURE:
|
|
195
234
|
return 'https://'
|
|
196
|
-
# R2 storages use 's3://' as a prefix for various aws cli commands
|
|
197
|
-
elif self == StoreType.R2:
|
|
198
|
-
return 'r2://'
|
|
199
235
|
elif self == StoreType.IBM:
|
|
200
236
|
return 'cos://'
|
|
201
237
|
elif self == StoreType.OCI:
|
|
202
238
|
return 'oci://'
|
|
203
|
-
# Nebius storages use 's3://' as a prefix for various aws cli commands
|
|
204
|
-
elif self == StoreType.NEBIUS:
|
|
205
|
-
return 'nebius://'
|
|
206
239
|
else:
|
|
207
240
|
with ux_utils.print_exception_no_traceback():
|
|
208
241
|
raise ValueError(f'Unknown store type: {self}')
|
|
@@ -251,12 +284,20 @@ class StoreType(enum.Enum):
|
|
|
251
284
|
elif store_type == StoreType.IBM:
|
|
252
285
|
bucket_name, sub_path, region = data_utils.split_cos_path(
|
|
253
286
|
store_url)
|
|
254
|
-
elif store_type == StoreType.R2:
|
|
255
|
-
bucket_name, sub_path = data_utils.split_r2_path(store_url)
|
|
256
287
|
elif store_type == StoreType.GCS:
|
|
257
288
|
bucket_name, sub_path = data_utils.split_gcs_path(store_url)
|
|
258
|
-
|
|
259
|
-
|
|
289
|
+
else:
|
|
290
|
+
# Check compatible stores
|
|
291
|
+
for compatible_store_type, store_class in \
|
|
292
|
+
_S3_COMPATIBLE_STORES.items():
|
|
293
|
+
if store_type.value == compatible_store_type:
|
|
294
|
+
config = store_class.get_config()
|
|
295
|
+
bucket_name, sub_path = config.split_path(store_url)
|
|
296
|
+
break
|
|
297
|
+
else:
|
|
298
|
+
# If we get here, it's an unknown S3-compatible store
|
|
299
|
+
raise ValueError(
|
|
300
|
+
f'Unknown S3-compatible store type: {store_type}')
|
|
260
301
|
return store_type, bucket_name, \
|
|
261
302
|
sub_path, storage_account_name, region
|
|
262
303
|
raise ValueError(f'Unknown store URL: {store_url}')
|
|
@@ -714,6 +755,11 @@ class Storage(object):
|
|
|
714
755
|
previous_store_type = store_type
|
|
715
756
|
else:
|
|
716
757
|
new_store_type = store_type
|
|
758
|
+
if previous_store_type is None or new_store_type is None:
|
|
759
|
+
# This should not happen if the condition above is true,
|
|
760
|
+
# but add check for type safety
|
|
761
|
+
raise exceptions.StorageBucketCreateError(
|
|
762
|
+
f'Bucket {self.name} has inconsistent store types.')
|
|
717
763
|
with ux_utils.print_exception_no_traceback():
|
|
718
764
|
raise exceptions.StorageBucketCreateError(
|
|
719
765
|
f'Bucket {self.name} was previously created for '
|
|
@@ -744,27 +790,27 @@ class Storage(object):
|
|
|
744
790
|
source=self.source,
|
|
745
791
|
mode=self.mode)
|
|
746
792
|
|
|
747
|
-
for
|
|
748
|
-
self.add_store(
|
|
793
|
+
for store_type in input_stores:
|
|
794
|
+
self.add_store(store_type)
|
|
749
795
|
|
|
750
796
|
if self.source is not None:
|
|
751
797
|
# If source is a pre-existing bucket, connect to the bucket
|
|
752
798
|
# If the bucket does not exist, this will error out
|
|
753
799
|
if isinstance(self.source, str):
|
|
754
|
-
if self.source.startswith('
|
|
755
|
-
self.add_store(StoreType.S3)
|
|
756
|
-
elif self.source.startswith('gs://'):
|
|
800
|
+
if self.source.startswith('gs://'):
|
|
757
801
|
self.add_store(StoreType.GCS)
|
|
758
802
|
elif data_utils.is_az_container_endpoint(self.source):
|
|
759
803
|
self.add_store(StoreType.AZURE)
|
|
760
|
-
elif self.source.startswith('r2://'):
|
|
761
|
-
self.add_store(StoreType.R2)
|
|
762
804
|
elif self.source.startswith('cos://'):
|
|
763
805
|
self.add_store(StoreType.IBM)
|
|
764
806
|
elif self.source.startswith('oci://'):
|
|
765
807
|
self.add_store(StoreType.OCI)
|
|
766
|
-
|
|
767
|
-
|
|
808
|
+
|
|
809
|
+
s3_compatible_store_type: Optional[StoreType] = (
|
|
810
|
+
StoreType.find_s3_compatible_config_by_prefix(
|
|
811
|
+
self.source))
|
|
812
|
+
if s3_compatible_store_type:
|
|
813
|
+
self.add_store(s3_compatible_store_type)
|
|
768
814
|
|
|
769
815
|
def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
|
|
770
816
|
"""Adds the bucket sub path prefix to the blob path."""
|
|
@@ -852,7 +898,7 @@ class Storage(object):
|
|
|
852
898
|
f'{source} in the file_mounts section of your YAML')
|
|
853
899
|
is_local_source = True
|
|
854
900
|
elif split_path.scheme in [
|
|
855
|
-
's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
|
|
901
|
+
's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius', 'cw'
|
|
856
902
|
]:
|
|
857
903
|
is_local_source = False
|
|
858
904
|
# Storage mounting does not support mounting specific files from
|
|
@@ -877,7 +923,8 @@ class Storage(object):
|
|
|
877
923
|
with ux_utils.print_exception_no_traceback():
|
|
878
924
|
raise exceptions.StorageSourceError(
|
|
879
925
|
f'Supported paths: local, s3://, gs://, https://, '
|
|
880
|
-
f'r2://, cos://, oci://, nebius://.
|
|
926
|
+
f'r2://, cos://, oci://, nebius://, cw://. '
|
|
927
|
+
f'Got: {source}')
|
|
881
928
|
return source, is_local_source
|
|
882
929
|
|
|
883
930
|
def _validate_storage_spec(self, name: Optional[str]) -> None:
|
|
@@ -892,7 +939,16 @@ class Storage(object):
|
|
|
892
939
|
"""
|
|
893
940
|
prefix = name.split('://')[0]
|
|
894
941
|
prefix = prefix.lower()
|
|
895
|
-
if prefix in [
|
|
942
|
+
if prefix in [
|
|
943
|
+
's3',
|
|
944
|
+
'gs',
|
|
945
|
+
'https',
|
|
946
|
+
'r2',
|
|
947
|
+
'cos',
|
|
948
|
+
'oci',
|
|
949
|
+
'nebius',
|
|
950
|
+
'cw',
|
|
951
|
+
]:
|
|
896
952
|
with ux_utils.print_exception_no_traceback():
|
|
897
953
|
raise exceptions.StorageNameError(
|
|
898
954
|
'Prefix detected: `name` cannot start with '
|
|
@@ -980,12 +1036,25 @@ class Storage(object):
|
|
|
980
1036
|
# When initializing from global_user_state, we override the
|
|
981
1037
|
# source from the YAML
|
|
982
1038
|
try:
|
|
983
|
-
if s_type
|
|
1039
|
+
if s_type.value in _S3_COMPATIBLE_STORES:
|
|
1040
|
+
store_class = _S3_COMPATIBLE_STORES[s_type.value]
|
|
1041
|
+
store = store_class.from_metadata(
|
|
1042
|
+
s_metadata,
|
|
1043
|
+
source=self.source,
|
|
1044
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1045
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
1046
|
+
elif s_type == StoreType.S3:
|
|
984
1047
|
store = S3Store.from_metadata(
|
|
985
1048
|
s_metadata,
|
|
986
1049
|
source=self.source,
|
|
987
1050
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
988
1051
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1052
|
+
elif s_type == StoreType.R2:
|
|
1053
|
+
store = R2Store.from_metadata(
|
|
1054
|
+
s_metadata,
|
|
1055
|
+
source=self.source,
|
|
1056
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1057
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
989
1058
|
elif s_type == StoreType.GCS:
|
|
990
1059
|
store = GcsStore.from_metadata(
|
|
991
1060
|
s_metadata,
|
|
@@ -1000,12 +1069,6 @@ class Storage(object):
|
|
|
1000
1069
|
source=self.source,
|
|
1001
1070
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1002
1071
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1003
|
-
elif s_type == StoreType.R2:
|
|
1004
|
-
store = R2Store.from_metadata(
|
|
1005
|
-
s_metadata,
|
|
1006
|
-
source=self.source,
|
|
1007
|
-
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1008
|
-
_bucket_sub_path=self._bucket_sub_path)
|
|
1009
1072
|
elif s_type == StoreType.IBM:
|
|
1010
1073
|
store = IBMCosStore.from_metadata(
|
|
1011
1074
|
s_metadata,
|
|
@@ -1024,6 +1087,12 @@ class Storage(object):
|
|
|
1024
1087
|
source=self.source,
|
|
1025
1088
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1026
1089
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1090
|
+
elif s_type == StoreType.COREWEAVE:
|
|
1091
|
+
store = CoreWeaveStore.from_metadata(
|
|
1092
|
+
s_metadata,
|
|
1093
|
+
source=self.source,
|
|
1094
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1095
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
1027
1096
|
else:
|
|
1028
1097
|
with ux_utils.print_exception_no_traceback():
|
|
1029
1098
|
raise ValueError(f'Unknown store type: {s_type}')
|
|
@@ -1106,20 +1175,17 @@ class Storage(object):
|
|
|
1106
1175
|
return store
|
|
1107
1176
|
|
|
1108
1177
|
store_cls: Type[AbstractStore]
|
|
1109
|
-
if
|
|
1110
|
-
|
|
1178
|
+
# First check if it's a registered S3-compatible store
|
|
1179
|
+
if store_type.value in _S3_COMPATIBLE_STORES:
|
|
1180
|
+
store_cls = _S3_COMPATIBLE_STORES[store_type.value]
|
|
1111
1181
|
elif store_type == StoreType.GCS:
|
|
1112
1182
|
store_cls = GcsStore
|
|
1113
1183
|
elif store_type == StoreType.AZURE:
|
|
1114
1184
|
store_cls = AzureBlobStore
|
|
1115
|
-
elif store_type == StoreType.R2:
|
|
1116
|
-
store_cls = R2Store
|
|
1117
1185
|
elif store_type == StoreType.IBM:
|
|
1118
1186
|
store_cls = IBMCosStore
|
|
1119
1187
|
elif store_type == StoreType.OCI:
|
|
1120
1188
|
store_cls = OciStore
|
|
1121
|
-
elif store_type == StoreType.NEBIUS:
|
|
1122
|
-
store_cls = NebiusStore
|
|
1123
1189
|
else:
|
|
1124
1190
|
with ux_utils.print_exception_no_traceback():
|
|
1125
1191
|
raise exceptions.StorageSpecError(
|
|
@@ -1266,6 +1332,17 @@ class Storage(object):
|
|
|
1266
1332
|
if store.is_sky_managed:
|
|
1267
1333
|
global_user_state.set_storage_status(self.name, StorageStatus.READY)
|
|
1268
1334
|
|
|
1335
|
+
@classmethod
|
|
1336
|
+
def from_handle(cls, handle: StorageHandle) -> 'Storage':
|
|
1337
|
+
"""Create Storage from StorageHandle object.
|
|
1338
|
+
"""
|
|
1339
|
+
obj = cls(name=handle.storage_name,
|
|
1340
|
+
source=handle.source,
|
|
1341
|
+
sync_on_reconstruction=False)
|
|
1342
|
+
obj.handle = handle
|
|
1343
|
+
obj._add_store_from_metadata(handle.sky_stores)
|
|
1344
|
+
return obj
|
|
1345
|
+
|
|
1269
1346
|
@classmethod
|
|
1270
1347
|
def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
|
|
1271
1348
|
common_utils.validate_schema(config, schemas.get_storage_schema(),
|
|
@@ -1343,101 +1420,262 @@ class Storage(object):
|
|
|
1343
1420
|
return config
|
|
1344
1421
|
|
|
1345
1422
|
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1423
|
+
# Registry for S3-compatible stores
|
|
1424
|
+
_S3_COMPATIBLE_STORES = {}
|
|
1425
|
+
|
|
1426
|
+
|
|
1427
|
+
def register_s3_compatible_store(store_class):
|
|
1428
|
+
"""Decorator to automatically register S3-compatible stores."""
|
|
1429
|
+
store_type = store_class.get_store_type()
|
|
1430
|
+
_S3_COMPATIBLE_STORES[store_type] = store_class
|
|
1431
|
+
return store_class
|
|
1432
|
+
|
|
1433
|
+
|
|
1434
|
+
@dataclass
|
|
1435
|
+
class S3CompatibleConfig:
|
|
1436
|
+
"""Configuration for S3-compatible storage providers."""
|
|
1437
|
+
# Provider identification
|
|
1438
|
+
store_type: str # Store type identifier (e.g., "S3", "R2", "MINIO")
|
|
1439
|
+
url_prefix: str # URL prefix (e.g., "s3://", "r2://", "minio://")
|
|
1440
|
+
|
|
1441
|
+
# Client creation
|
|
1442
|
+
client_factory: Callable[[Optional[str]], Any]
|
|
1443
|
+
resource_factory: Callable[[str], StorageHandle]
|
|
1444
|
+
split_path: Callable[[str], Tuple[str, str]]
|
|
1445
|
+
verify_bucket: Callable[[str], bool]
|
|
1446
|
+
|
|
1447
|
+
# CLI configuration
|
|
1448
|
+
aws_profile: Optional[str] = None
|
|
1449
|
+
get_endpoint_url: Optional[Callable[[], str]] = None
|
|
1450
|
+
credentials_file: Optional[str] = None
|
|
1451
|
+
config_file: Optional[str] = None
|
|
1452
|
+
extra_cli_args: Optional[List[str]] = None
|
|
1453
|
+
|
|
1454
|
+
# Provider-specific settings
|
|
1455
|
+
cloud_name: str = ''
|
|
1456
|
+
default_region: Optional[str] = None
|
|
1457
|
+
access_denied_message: str = 'Access Denied'
|
|
1458
|
+
|
|
1459
|
+
# Mounting
|
|
1460
|
+
mount_cmd_factory: Optional[Callable] = None
|
|
1461
|
+
mount_cached_cmd_factory: Optional[Callable] = None
|
|
1462
|
+
|
|
1463
|
+
def __post_init__(self):
|
|
1464
|
+
if self.extra_cli_args is None:
|
|
1465
|
+
self.extra_cli_args = []
|
|
1466
|
+
|
|
1467
|
+
|
|
1468
|
+
class S3CompatibleStore(AbstractStore):
|
|
1469
|
+
"""Base class for S3-compatible object storage providers.
|
|
1470
|
+
|
|
1471
|
+
This class provides a unified interface for all S3-compatible storage
|
|
1472
|
+
providers (AWS S3, Cloudflare R2, Nebius, MinIO, CoreWeave, etc.) by
|
|
1473
|
+
leveraging a configuration-driven approach that eliminates code duplication
|
|
1474
|
+
|
|
1475
|
+
## Adding a New S3-Compatible Store
|
|
1476
|
+
|
|
1477
|
+
To add a new S3-compatible storage provider (e.g., MinIO),
|
|
1478
|
+
follow these steps:
|
|
1479
|
+
|
|
1480
|
+
### 1. Add Store Type to Enum
|
|
1481
|
+
First, add your store type to the StoreType enum:
|
|
1482
|
+
```python
|
|
1483
|
+
class StoreType(enum.Enum):
|
|
1484
|
+
# ... existing entries ...
|
|
1485
|
+
MINIO = 'MINIO'
|
|
1486
|
+
```
|
|
1487
|
+
|
|
1488
|
+
### 2. Create Store Class
|
|
1489
|
+
Create a new store class that inherits from S3CompatibleStore:
|
|
1490
|
+
```python
|
|
1491
|
+
@register_s3_compatible_store
|
|
1492
|
+
class MinIOStore(S3CompatibleStore):
|
|
1493
|
+
'''MinIOStore for MinIO object storage.'''
|
|
1494
|
+
|
|
1495
|
+
@classmethod
|
|
1496
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
1497
|
+
'''Return the configuration for MinIO.'''
|
|
1498
|
+
return S3CompatibleConfig(
|
|
1499
|
+
store_type='MINIO',
|
|
1500
|
+
url_prefix='minio://',
|
|
1501
|
+
client_factory=lambda region:\
|
|
1502
|
+
data_utils.create_minio_client(region),
|
|
1503
|
+
resource_factory=lambda name:\
|
|
1504
|
+
minio.resource('s3').Bucket(name),
|
|
1505
|
+
split_path=data_utils.split_minio_path,
|
|
1506
|
+
aws_profile='minio',
|
|
1507
|
+
get_endpoint_url=lambda: minio.get_endpoint_url(),
|
|
1508
|
+
cloud_name='minio',
|
|
1509
|
+
default_region='us-east-1',
|
|
1510
|
+
mount_cmd_factory=mounting_utils.get_minio_mount_cmd,
|
|
1511
|
+
)
|
|
1512
|
+
```
|
|
1513
|
+
|
|
1514
|
+
### 3. Implement Required Utilities
|
|
1515
|
+
Create the necessary utility functions:
|
|
1516
|
+
|
|
1517
|
+
#### In `sky/data/data_utils.py`:
|
|
1518
|
+
```python
|
|
1519
|
+
def create_minio_client(region: Optional[str] = None):
|
|
1520
|
+
'''Create MinIO S3 client.'''
|
|
1521
|
+
return boto3.client('s3',
|
|
1522
|
+
endpoint_url=minio.get_endpoint_url(),
|
|
1523
|
+
aws_access_key_id=minio.get_access_key(),
|
|
1524
|
+
aws_secret_access_key=minio.get_secret_key(),
|
|
1525
|
+
region_name=region or 'us-east-1')
|
|
1526
|
+
|
|
1527
|
+
def split_minio_path(minio_path: str) -> Tuple[str, str]:
|
|
1528
|
+
'''Split minio://bucket/key into (bucket, key).'''
|
|
1529
|
+
path_parts = minio_path.replace('minio://', '').split('/', 1)
|
|
1530
|
+
bucket = path_parts[0]
|
|
1531
|
+
key = path_parts[1] if len(path_parts) > 1 else ''
|
|
1532
|
+
return bucket, key
|
|
1533
|
+
```
|
|
1534
|
+
|
|
1535
|
+
#### In `sky/utils/mounting_utils.py`:
|
|
1536
|
+
```python
|
|
1537
|
+
def get_minio_mount_cmd(profile: str, bucket_name: str, endpoint_url: str,
|
|
1538
|
+
mount_path: str,
|
|
1539
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
1540
|
+
'''Generate MinIO mount command using s3fs.'''
|
|
1541
|
+
# Implementation similar to other S3-compatible mount commands
|
|
1542
|
+
pass
|
|
1543
|
+
```
|
|
1544
|
+
|
|
1545
|
+
### 4. Create Adapter Module (if needed)
|
|
1546
|
+
Create `sky/adaptors/minio.py` for MinIO-specific configuration:
|
|
1547
|
+
```python
|
|
1548
|
+
'''MinIO adapter for SkyPilot.'''
|
|
1549
|
+
|
|
1550
|
+
MINIO_PROFILE_NAME = 'minio'
|
|
1551
|
+
|
|
1552
|
+
def get_endpoint_url() -> str:
|
|
1553
|
+
'''Get MinIO endpoint URL from configuration.'''
|
|
1554
|
+
# Read from ~/.minio/config or environment variables
|
|
1555
|
+
pass
|
|
1556
|
+
|
|
1557
|
+
def resource(resource_name: str):
|
|
1558
|
+
'''Get MinIO resource.'''
|
|
1559
|
+
# Implementation for creating MinIO resources
|
|
1560
|
+
pass
|
|
1561
|
+
```
|
|
1562
|
+
|
|
1349
1563
|
"""
|
|
1350
1564
|
|
|
1351
|
-
_DEFAULT_REGION = 'us-east-1'
|
|
1352
1565
|
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
1353
|
-
_CUSTOM_ENDPOINT_REGIONS = [
|
|
1354
|
-
'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
|
|
1355
|
-
'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
|
|
1356
|
-
'il-central-1'
|
|
1357
|
-
]
|
|
1358
1566
|
|
|
1359
1567
|
def __init__(self,
|
|
1360
1568
|
name: str,
|
|
1361
1569
|
source: str,
|
|
1362
|
-
region: Optional[str] =
|
|
1570
|
+
region: Optional[str] = None,
|
|
1363
1571
|
is_sky_managed: Optional[bool] = None,
|
|
1364
1572
|
sync_on_reconstruction: bool = True,
|
|
1365
1573
|
_bucket_sub_path: Optional[str] = None):
|
|
1574
|
+
# Initialize configuration first to get defaults
|
|
1575
|
+
self.config = self.__class__.get_config()
|
|
1576
|
+
|
|
1577
|
+
# Use provider's default region if not specified
|
|
1578
|
+
if region is None:
|
|
1579
|
+
region = self.config.default_region
|
|
1580
|
+
|
|
1581
|
+
# Initialize S3CompatibleStore specific attributes
|
|
1366
1582
|
self.client: 'mypy_boto3_s3.Client'
|
|
1367
1583
|
self.bucket: 'StorageHandle'
|
|
1368
|
-
|
|
1369
|
-
#
|
|
1370
|
-
# We should eventually make all opt-in regions also work for S3 by
|
|
1371
|
-
# passing the right endpoint flags.
|
|
1372
|
-
if region in self._CUSTOM_ENDPOINT_REGIONS:
|
|
1373
|
-
logger.warning('AWS opt-in regions are not supported for S3. '
|
|
1374
|
-
f'Falling back to default region '
|
|
1375
|
-
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
|
1376
|
-
region = self._DEFAULT_REGION
|
|
1584
|
+
|
|
1585
|
+
# Call parent constructor
|
|
1377
1586
|
super().__init__(name, source, region, is_sky_managed,
|
|
1378
1587
|
sync_on_reconstruction, _bucket_sub_path)
|
|
1379
1588
|
|
|
1589
|
+
@classmethod
|
|
1590
|
+
@abstractmethod
|
|
1591
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
1592
|
+
"""Return the configuration for this S3-compatible provider."""
|
|
1593
|
+
pass
|
|
1594
|
+
|
|
1595
|
+
@classmethod
|
|
1596
|
+
def get_store_type(cls) -> str:
|
|
1597
|
+
"""Return the store type identifier from configuration."""
|
|
1598
|
+
return cls.get_config().store_type
|
|
1599
|
+
|
|
1600
|
+
@property
|
|
1601
|
+
def provider_prefixes(self) -> set:
|
|
1602
|
+
"""Dynamically get all provider prefixes from registered stores."""
|
|
1603
|
+
prefixes = set()
|
|
1604
|
+
|
|
1605
|
+
# Get prefixes from all registered S3-compatible stores
|
|
1606
|
+
for store_class in _S3_COMPATIBLE_STORES.values():
|
|
1607
|
+
config = store_class.get_config()
|
|
1608
|
+
prefixes.add(config.url_prefix)
|
|
1609
|
+
|
|
1610
|
+
# Add hardcoded prefixes for non-S3-compatible stores
|
|
1611
|
+
prefixes.update({
|
|
1612
|
+
'gs://', # GCS
|
|
1613
|
+
'https://', # Azure
|
|
1614
|
+
'cos://', # IBM COS
|
|
1615
|
+
'oci://', # OCI
|
|
1616
|
+
})
|
|
1617
|
+
|
|
1618
|
+
return prefixes
|
|
1619
|
+
|
|
1380
1620
|
def _validate(self):
|
|
1381
1621
|
if self.source is not None and isinstance(self.source, str):
|
|
1382
|
-
if self.source.startswith(
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
'
|
|
1622
|
+
if self.source.startswith(self.config.url_prefix):
|
|
1623
|
+
bucket_name, _ = self.config.split_path(self.source)
|
|
1624
|
+
assert self.name == bucket_name, (
|
|
1625
|
+
f'{self.config.store_type} Bucket is specified as path, '
|
|
1626
|
+
f'the name should be the same as {self.config.store_type} '
|
|
1627
|
+
f'bucket.')
|
|
1628
|
+
# Only verify if this is NOT the same store type as the source
|
|
1629
|
+
if self.__class__.get_store_type() != self.config.store_type:
|
|
1630
|
+
assert self.config.verify_bucket(self.name), (
|
|
1631
|
+
f'Source specified as {self.source},'
|
|
1632
|
+
f'a {self.config.store_type} '
|
|
1633
|
+
f'bucket. {self.config.store_type} Bucket should exist.'
|
|
1634
|
+
)
|
|
1386
1635
|
elif self.source.startswith('gs://'):
|
|
1387
1636
|
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
|
1388
1637
|
'GCS Bucket is specified as path, the name should be '
|
|
1389
1638
|
'the same as GCS bucket.')
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1639
|
+
if not isinstance(self, GcsStore):
|
|
1640
|
+
assert data_utils.verify_gcs_bucket(self.name), (
|
|
1641
|
+
f'Source specified as {self.source}, a GCS bucket. ',
|
|
1642
|
+
'GCS Bucket should exist.')
|
|
1393
1643
|
elif data_utils.is_az_container_endpoint(self.source):
|
|
1394
1644
|
storage_account_name, container_name, _ = (
|
|
1395
1645
|
data_utils.split_az_path(self.source))
|
|
1396
1646
|
assert self.name == container_name, (
|
|
1397
1647
|
'Azure bucket is specified as path, the name should be '
|
|
1398
1648
|
'the same as Azure bucket.')
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1649
|
+
if not isinstance(self, AzureBlobStore):
|
|
1650
|
+
assert data_utils.verify_az_bucket(
|
|
1651
|
+
storage_account_name, self.name
|
|
1652
|
+
), (f'Source specified as {self.source}, an Azure bucket. '
|
|
1402
1653
|
'Azure bucket should exist.')
|
|
1403
|
-
elif self.source.startswith('r2://'):
|
|
1404
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
1405
|
-
'R2 Bucket is specified as path, the name should be '
|
|
1406
|
-
'the same as R2 bucket.')
|
|
1407
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
1408
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
1409
|
-
'R2 Bucket should exist.')
|
|
1410
|
-
elif self.source.startswith('nebius://'):
|
|
1411
|
-
assert self.name == data_utils.split_nebius_path(
|
|
1412
|
-
self.source)[0], (
|
|
1413
|
-
'Nebius Object Storage is specified as path, the name '
|
|
1414
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
1415
|
-
assert data_utils.verify_nebius_bucket(self.name), (
|
|
1416
|
-
f'Source specified as {self.source}, a Nebius Object '
|
|
1417
|
-
f'Storage bucket. Nebius Object Storage Bucket should'
|
|
1418
|
-
f' exist.')
|
|
1419
1654
|
elif self.source.startswith('cos://'):
|
|
1420
1655
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
1421
1656
|
'COS Bucket is specified as path, the name should be '
|
|
1422
1657
|
'the same as COS bucket.')
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1658
|
+
if not isinstance(self, IBMCosStore):
|
|
1659
|
+
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
|
1660
|
+
f'Source specified as {self.source}, a COS bucket. ',
|
|
1661
|
+
'COS Bucket should exist.')
|
|
1426
1662
|
elif self.source.startswith('oci://'):
|
|
1427
1663
|
raise NotImplementedError(
|
|
1428
|
-
'Moving data from OCI to
|
|
1664
|
+
f'Moving data from OCI to {self.source} is ',
|
|
1665
|
+
'currently not supported.')
|
|
1666
|
+
|
|
1429
1667
|
# Validate name
|
|
1430
1668
|
self.name = self.validate_name(self.name)
|
|
1431
1669
|
|
|
1432
1670
|
# Check if the storage is enabled
|
|
1433
|
-
if not _is_storage_cloud_enabled(
|
|
1671
|
+
if not _is_storage_cloud_enabled(self.config.cloud_name):
|
|
1434
1672
|
with ux_utils.print_exception_no_traceback():
|
|
1435
1673
|
raise exceptions.ResourcesUnavailableError(
|
|
1436
|
-
'Storage
|
|
1437
|
-
'
|
|
1438
|
-
'
|
|
1439
|
-
'
|
|
1440
|
-
)
|
|
1674
|
+
f'Storage "store: {self.config.store_type.lower()}" '
|
|
1675
|
+
f'specified, but '
|
|
1676
|
+
f'{self.config.cloud_name} access is disabled. '
|
|
1677
|
+
'To fix, enable '
|
|
1678
|
+
f'{self.config.cloud_name} by running `sky check`.')
|
|
1441
1679
|
|
|
1442
1680
|
@classmethod
|
|
1443
1681
|
def validate_name(cls, name: str) -> str:
|
|
@@ -1509,7 +1747,7 @@ class S3Store(AbstractStore):
|
|
|
1509
1747
|
StorageBucketGetError: If fetching existing bucket fails
|
|
1510
1748
|
StorageInitError: If general initialization fails.
|
|
1511
1749
|
"""
|
|
1512
|
-
self.client =
|
|
1750
|
+
self.client = self.config.client_factory(self.region)
|
|
1513
1751
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
1514
1752
|
if self.is_sky_managed is None:
|
|
1515
1753
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -1531,16 +1769,10 @@ class S3Store(AbstractStore):
|
|
|
1531
1769
|
if isinstance(self.source, list):
|
|
1532
1770
|
self.batch_aws_rsync(self.source, create_dirs=True)
|
|
1533
1771
|
elif self.source is not None:
|
|
1534
|
-
if self.
|
|
1535
|
-
pass
|
|
1536
|
-
elif self.
|
|
1537
|
-
self.
|
|
1538
|
-
elif self.source.startswith('r2://'):
|
|
1539
|
-
self._transfer_to_s3()
|
|
1540
|
-
elif self.source.startswith('oci://'):
|
|
1541
|
-
self._transfer_to_s3()
|
|
1542
|
-
elif self.source.startswith('nebius://'):
|
|
1543
|
-
self._transfer_to_s3()
|
|
1772
|
+
if self._is_same_provider_source():
|
|
1773
|
+
pass # No transfer needed
|
|
1774
|
+
elif self._needs_cross_provider_transfer():
|
|
1775
|
+
self._transfer_from_other_provider()
|
|
1544
1776
|
else:
|
|
1545
1777
|
self.batch_aws_rsync([self.source])
|
|
1546
1778
|
except exceptions.StorageUploadError:
|
|
@@ -1549,57 +1781,94 @@ class S3Store(AbstractStore):
|
|
|
1549
1781
|
raise exceptions.StorageUploadError(
|
|
1550
1782
|
f'Upload failed for store {self.name}') from e
|
|
1551
1783
|
|
|
1784
|
+
def _is_same_provider_source(self) -> bool:
|
|
1785
|
+
"""Check if source is from the same provider."""
|
|
1786
|
+
return isinstance(self.source, str) and self.source.startswith(
|
|
1787
|
+
self.config.url_prefix)
|
|
1788
|
+
|
|
1789
|
+
def _needs_cross_provider_transfer(self) -> bool:
|
|
1790
|
+
"""Check if source needs cross-provider transfer."""
|
|
1791
|
+
if not isinstance(self.source, str):
|
|
1792
|
+
return False
|
|
1793
|
+
return any(
|
|
1794
|
+
self.source.startswith(prefix) for prefix in self.provider_prefixes)
|
|
1795
|
+
|
|
1796
|
+
def _detect_source_type(self) -> str:
|
|
1797
|
+
"""Detect the source provider type from URL."""
|
|
1798
|
+
if not isinstance(self.source, str):
|
|
1799
|
+
return 'unknown'
|
|
1800
|
+
|
|
1801
|
+
for provider in self.provider_prefixes:
|
|
1802
|
+
if self.source.startswith(provider):
|
|
1803
|
+
return provider[:-len('://')]
|
|
1804
|
+
return ''
|
|
1805
|
+
|
|
1806
|
+
def _transfer_from_other_provider(self):
|
|
1807
|
+
"""Transfer data from another cloud to this S3-compatible store."""
|
|
1808
|
+
source_type = self._detect_source_type()
|
|
1809
|
+
target_type = self.config.store_type.lower()
|
|
1810
|
+
|
|
1811
|
+
if hasattr(data_transfer, f'{source_type}_to_{target_type}'):
|
|
1812
|
+
transfer_func = getattr(data_transfer,
|
|
1813
|
+
f'{source_type}_to_{target_type}')
|
|
1814
|
+
transfer_func(self.name, self.name)
|
|
1815
|
+
else:
|
|
1816
|
+
with ux_utils.print_exception_no_traceback():
|
|
1817
|
+
raise NotImplementedError(
|
|
1818
|
+
f'Transfer from {source_type} to {target_type} '
|
|
1819
|
+
'is not yet supported.')
|
|
1820
|
+
|
|
1552
1821
|
def delete(self) -> None:
|
|
1822
|
+
"""Delete the bucket or sub-path."""
|
|
1553
1823
|
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
1554
1824
|
return self._delete_sub_path()
|
|
1555
1825
|
|
|
1556
|
-
deleted_by_skypilot = self.
|
|
1826
|
+
deleted_by_skypilot = self._delete_bucket(self.name)
|
|
1827
|
+
provider = self.config.store_type
|
|
1557
1828
|
if deleted_by_skypilot:
|
|
1558
|
-
msg_str = f'Deleted
|
|
1829
|
+
msg_str = f'Deleted {provider} bucket {self.name}.'
|
|
1559
1830
|
else:
|
|
1560
|
-
msg_str = f'
|
|
1831
|
+
msg_str = f'{provider} bucket {self.name} may have been deleted ' \
|
|
1561
1832
|
f'externally. Removing from local state.'
|
|
1562
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
1563
|
-
f'{colorama.Style.RESET_ALL}')
|
|
1564
|
-
|
|
1565
|
-
def _delete_sub_path(self) -> None:
|
|
1566
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
1567
|
-
deleted_by_skypilot = self._delete_s3_bucket_sub_path(
|
|
1568
|
-
self.name, self._bucket_sub_path)
|
|
1569
|
-
if deleted_by_skypilot:
|
|
1570
|
-
msg_str = f'Removed objects from S3 bucket ' \
|
|
1571
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
1572
|
-
else:
|
|
1573
|
-
msg_str = f'Failed to remove objects from S3 bucket ' \
|
|
1574
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
1575
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
1576
|
-
f'{colorama.Style.RESET_ALL}')
|
|
1833
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
|
|
1577
1834
|
|
|
1578
1835
|
def get_handle(self) -> StorageHandle:
|
|
1579
|
-
|
|
1836
|
+
"""Get storage handle using provider's resource factory."""
|
|
1837
|
+
return self.config.resource_factory(self.name)
|
|
1580
1838
|
|
|
1581
|
-
def
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
"""Invokes aws s3 sync to batch upload a list of local paths to S3
|
|
1839
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
1840
|
+
"""Download file using S3 API."""
|
|
1841
|
+
self.bucket.download_file(remote_path, local_path)
|
|
1585
1842
|
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1843
|
+
def mount_command(self, mount_path: str) -> str:
|
|
1844
|
+
"""Get mount command using provider's mount factory."""
|
|
1845
|
+
if self.config.mount_cmd_factory is None:
|
|
1846
|
+
raise exceptions.NotSupportedError(
|
|
1847
|
+
f'Mounting not supported for {self.config.store_type}')
|
|
1589
1848
|
|
|
1590
|
-
|
|
1591
|
-
|
|
1849
|
+
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
1850
|
+
mount_cmd = self.config.mount_cmd_factory(self.bucket.name, mount_path,
|
|
1851
|
+
self._bucket_sub_path)
|
|
1852
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1853
|
+
mount_cmd)
|
|
1592
1854
|
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1855
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
1856
|
+
"""Get cached mount command. Can be overridden by subclasses."""
|
|
1857
|
+
if self.config.mount_cached_cmd_factory is None:
|
|
1858
|
+
raise exceptions.NotSupportedError(
|
|
1859
|
+
f'Cached mounting not supported for {self.config.store_type}')
|
|
1860
|
+
|
|
1861
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
1862
|
+
mount_cmd = self.config.mount_cached_cmd_factory(
|
|
1863
|
+
self.bucket.name, mount_path, self._bucket_sub_path)
|
|
1864
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1865
|
+
mount_cmd)
|
|
1866
|
+
|
|
1867
|
+
def batch_aws_rsync(self,
|
|
1868
|
+
source_path_list: List[Path],
|
|
1869
|
+
create_dirs: bool = False) -> None:
|
|
1870
|
+
"""Generic S3-compatible rsync using AWS CLI."""
|
|
1871
|
+
sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
|
|
1603
1872
|
|
|
1604
1873
|
def get_file_sync_command(base_dir_path, file_names):
|
|
1605
1874
|
includes = ' '.join([
|
|
@@ -1607,10 +1876,31 @@ class S3Store(AbstractStore):
|
|
|
1607
1876
|
for file_name in file_names
|
|
1608
1877
|
])
|
|
1609
1878
|
base_dir_path = shlex.quote(base_dir_path)
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1879
|
+
|
|
1880
|
+
# Build AWS CLI command with provider-specific configuration
|
|
1881
|
+
cmd_parts = ['aws s3 sync --no-follow-symlinks --exclude="*"']
|
|
1882
|
+
cmd_parts.append(f'{includes} {base_dir_path}')
|
|
1883
|
+
cmd_parts.append(f's3://{self.name}{sub_path}')
|
|
1884
|
+
|
|
1885
|
+
# Add provider-specific arguments
|
|
1886
|
+
if self.config.get_endpoint_url:
|
|
1887
|
+
cmd_parts.append(
|
|
1888
|
+
f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
1889
|
+
if self.config.aws_profile:
|
|
1890
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
1891
|
+
if self.config.extra_cli_args:
|
|
1892
|
+
cmd_parts.extend(self.config.extra_cli_args)
|
|
1893
|
+
|
|
1894
|
+
# Handle credentials file via environment
|
|
1895
|
+
cmd = ' '.join(cmd_parts)
|
|
1896
|
+
if self.config.credentials_file:
|
|
1897
|
+
cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
|
|
1898
|
+
f'{self.config.credentials_file} {cmd}'
|
|
1899
|
+
if self.config.config_file:
|
|
1900
|
+
cmd = 'AWS_CONFIG_FILE=' + \
|
|
1901
|
+
f'{self.config.config_file} {cmd}'
|
|
1902
|
+
|
|
1903
|
+
return cmd
|
|
1614
1904
|
|
|
1615
1905
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
1616
1906
|
# we exclude .git directory from the sync
|
|
@@ -1618,11 +1908,11 @@ class S3Store(AbstractStore):
|
|
|
1618
1908
|
excluded_list.append('.git/*')
|
|
1619
1909
|
|
|
1620
1910
|
# Process exclusion patterns to make them work correctly with aws
|
|
1621
|
-
# s3 sync
|
|
1911
|
+
# s3 sync - this logic is from S3Store2 to ensure compatibility
|
|
1622
1912
|
processed_excludes = []
|
|
1623
1913
|
for excluded_path in excluded_list:
|
|
1624
1914
|
# Check if the path is a directory exclusion pattern
|
|
1625
|
-
# For AWS S3 sync, directory patterns need to end with "
|
|
1915
|
+
# For AWS S3 sync, directory patterns need to end with "/*" to
|
|
1626
1916
|
# exclude all contents
|
|
1627
1917
|
if (excluded_path.endswith('/') or os.path.isdir(
|
|
1628
1918
|
os.path.join(src_dir_path, excluded_path.rstrip('/')))):
|
|
@@ -1637,10 +1927,28 @@ class S3Store(AbstractStore):
|
|
|
1637
1927
|
for file_name in processed_excludes
|
|
1638
1928
|
])
|
|
1639
1929
|
src_dir_path = shlex.quote(src_dir_path)
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1930
|
+
|
|
1931
|
+
cmd_parts = ['aws s3 sync --no-follow-symlinks']
|
|
1932
|
+
cmd_parts.append(f'{excludes} {src_dir_path}')
|
|
1933
|
+
cmd_parts.append(f's3://{self.name}{sub_path}/{dest_dir_name}')
|
|
1934
|
+
|
|
1935
|
+
if self.config.get_endpoint_url:
|
|
1936
|
+
cmd_parts.append(
|
|
1937
|
+
f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
1938
|
+
if self.config.aws_profile:
|
|
1939
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
1940
|
+
if self.config.extra_cli_args:
|
|
1941
|
+
cmd_parts.extend(self.config.extra_cli_args)
|
|
1942
|
+
|
|
1943
|
+
cmd = ' '.join(cmd_parts)
|
|
1944
|
+
if self.config.credentials_file:
|
|
1945
|
+
cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
|
|
1946
|
+
f'{self.config.credentials_file} {cmd}'
|
|
1947
|
+
if self.config.config_file:
|
|
1948
|
+
cmd = 'AWS_CONFIG_FILE=' + \
|
|
1949
|
+
f'{self.config.config_file} {cmd}'
|
|
1950
|
+
|
|
1951
|
+
return cmd
|
|
1644
1952
|
|
|
1645
1953
|
# Generate message for upload
|
|
1646
1954
|
if len(source_path_list) > 1:
|
|
@@ -1648,9 +1956,12 @@ class S3Store(AbstractStore):
|
|
|
1648
1956
|
else:
|
|
1649
1957
|
source_message = source_path_list[0]
|
|
1650
1958
|
|
|
1959
|
+
provider_prefix = self.config.url_prefix
|
|
1651
1960
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
1652
1961
|
_STORAGE_LOG_FILE_NAME)
|
|
1653
|
-
sync_path = f'{source_message} ->
|
|
1962
|
+
sync_path = (f'{source_message} -> '
|
|
1963
|
+
f'{provider_prefix}{self.name}{sub_path}/')
|
|
1964
|
+
|
|
1654
1965
|
with rich_utils.safe_status(
|
|
1655
1966
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
1656
1967
|
log_path=log_path)):
|
|
@@ -1660,150 +1971,81 @@ class S3Store(AbstractStore):
|
|
|
1660
1971
|
get_dir_sync_command,
|
|
1661
1972
|
log_path,
|
|
1662
1973
|
self.name,
|
|
1663
|
-
self.
|
|
1974
|
+
self.config.access_denied_message,
|
|
1664
1975
|
create_dirs=create_dirs,
|
|
1665
1976
|
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
|
1977
|
+
|
|
1666
1978
|
logger.info(
|
|
1667
1979
|
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
1668
1980
|
log_path))
|
|
1669
1981
|
|
|
1670
|
-
def _transfer_to_s3(self) -> None:
|
|
1671
|
-
assert isinstance(self.source, str), self.source
|
|
1672
|
-
if self.source.startswith('gs://'):
|
|
1673
|
-
data_transfer.gcs_to_s3(self.name, self.name)
|
|
1674
|
-
elif self.source.startswith('r2://'):
|
|
1675
|
-
data_transfer.r2_to_s3(self.name, self.name)
|
|
1676
|
-
|
|
1677
1982
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
1678
|
-
"""
|
|
1679
|
-
|
|
1680
|
-
If the bucket exists, this method will return the bucket.
|
|
1681
|
-
If the bucket does not exist, there are three cases:
|
|
1682
|
-
1) Raise an error if the bucket source starts with s3://
|
|
1683
|
-
2) Return None if bucket has been externally deleted and
|
|
1684
|
-
sync_on_reconstruction is False
|
|
1685
|
-
3) Create and return a new bucket otherwise
|
|
1686
|
-
|
|
1687
|
-
Raises:
|
|
1688
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
1689
|
-
mounted without specifying storage source.
|
|
1690
|
-
StorageBucketCreateError: If creating the bucket fails
|
|
1691
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
1692
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
1693
|
-
attempted to be fetched while reconstructing the storage for
|
|
1694
|
-
'sky storage delete' or 'sky start'
|
|
1695
|
-
"""
|
|
1696
|
-
s3 = aws.resource('s3')
|
|
1697
|
-
bucket = s3.Bucket(self.name)
|
|
1983
|
+
"""Get or create bucket using S3 API."""
|
|
1984
|
+
bucket = self.config.resource_factory(self.name)
|
|
1698
1985
|
|
|
1699
1986
|
try:
|
|
1700
1987
|
# Try Public bucket case.
|
|
1701
|
-
# This line does not error out if the bucket is an external public
|
|
1702
|
-
# bucket or if it is a user's bucket that is publicly
|
|
1703
|
-
# accessible.
|
|
1704
1988
|
self.client.head_bucket(Bucket=self.name)
|
|
1705
1989
|
self._validate_existing_bucket()
|
|
1706
1990
|
return bucket, False
|
|
1707
1991
|
except aws.botocore_exceptions().ClientError as e:
|
|
1708
1992
|
error_code = e.response['Error']['Code']
|
|
1709
|
-
# AccessDenied error for buckets that are private and not owned by
|
|
1710
|
-
# user.
|
|
1711
1993
|
if error_code == '403':
|
|
1712
|
-
command = f'aws s3 ls {self.name}'
|
|
1994
|
+
command = f'aws s3 ls s3://{self.name}'
|
|
1995
|
+
if self.config.aws_profile:
|
|
1996
|
+
command += f' --profile={self.config.aws_profile}'
|
|
1997
|
+
if self.config.get_endpoint_url:
|
|
1998
|
+
command += f' --endpoint-url '\
|
|
1999
|
+
f'{self.config.get_endpoint_url()}'
|
|
2000
|
+
if self.config.credentials_file:
|
|
2001
|
+
command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
2002
|
+
f'{self.config.credentials_file} {command}')
|
|
2003
|
+
if self.config.config_file:
|
|
2004
|
+
command = 'AWS_CONFIG_FILE=' + \
|
|
2005
|
+
f'{self.config.config_file} {command}'
|
|
1713
2006
|
with ux_utils.print_exception_no_traceback():
|
|
1714
2007
|
raise exceptions.StorageBucketGetError(
|
|
1715
2008
|
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
1716
2009
|
f' To debug, consider running `{command}`.') from e
|
|
1717
2010
|
|
|
1718
|
-
if isinstance(self.source, str) and self.source.startswith(
|
|
2011
|
+
if isinstance(self.source, str) and self.source.startswith(
|
|
2012
|
+
self.config.url_prefix):
|
|
1719
2013
|
with ux_utils.print_exception_no_traceback():
|
|
1720
2014
|
raise exceptions.StorageBucketGetError(
|
|
1721
2015
|
'Attempted to use a non-existent bucket as a source: '
|
|
1722
|
-
f'{self.source}.
|
|
1723
|
-
f'{self.source}` to debug.')
|
|
2016
|
+
f'{self.source}.')
|
|
1724
2017
|
|
|
1725
|
-
# If bucket cannot be found
|
|
1726
|
-
# the bucket is to be created by Sky. However, creation is skipped if
|
|
1727
|
-
# Store object is being reconstructed for deletion or re-mount with
|
|
1728
|
-
# sky start, and error is raised instead.
|
|
2018
|
+
# If bucket cannot be found, create it if needed
|
|
1729
2019
|
if self.sync_on_reconstruction:
|
|
1730
|
-
bucket = self.
|
|
2020
|
+
bucket = self._create_bucket(self.name)
|
|
1731
2021
|
return bucket, True
|
|
1732
2022
|
else:
|
|
1733
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
1734
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
1735
|
-
# is already removed externally.
|
|
1736
2023
|
raise exceptions.StorageExternalDeletionError(
|
|
1737
2024
|
'Attempted to fetch a non-existent bucket: '
|
|
1738
2025
|
f'{self.name}')
|
|
1739
2026
|
|
|
1740
|
-
def
|
|
1741
|
-
"""
|
|
1742
|
-
using the boto3 API
|
|
1743
|
-
|
|
1744
|
-
Args:
|
|
1745
|
-
remote_path: str; Remote path on S3 bucket
|
|
1746
|
-
local_path: str; Local path on user's device
|
|
1747
|
-
"""
|
|
1748
|
-
self.bucket.download_file(remote_path, local_path)
|
|
1749
|
-
|
|
1750
|
-
def mount_command(self, mount_path: str) -> str:
|
|
1751
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
1752
|
-
|
|
1753
|
-
Uses goofys to mount the bucket.
|
|
1754
|
-
|
|
1755
|
-
Args:
|
|
1756
|
-
mount_path: str; Path to mount the bucket to.
|
|
1757
|
-
"""
|
|
1758
|
-
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
1759
|
-
mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
|
|
1760
|
-
mount_path,
|
|
1761
|
-
self._bucket_sub_path)
|
|
1762
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1763
|
-
mount_cmd)
|
|
1764
|
-
|
|
1765
|
-
def mount_cached_command(self, mount_path: str) -> str:
|
|
1766
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
1767
|
-
rclone_profile_name = (
|
|
1768
|
-
data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
|
|
1769
|
-
rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
|
|
1770
|
-
rclone_profile_name=rclone_profile_name)
|
|
1771
|
-
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
1772
|
-
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
1773
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1774
|
-
mount_cached_cmd)
|
|
1775
|
-
|
|
1776
|
-
def _create_s3_bucket(self,
|
|
1777
|
-
bucket_name: str,
|
|
1778
|
-
region=_DEFAULT_REGION) -> StorageHandle:
|
|
1779
|
-
"""Creates S3 bucket with specific name in specific region
|
|
1780
|
-
|
|
1781
|
-
Args:
|
|
1782
|
-
bucket_name: str; Name of bucket
|
|
1783
|
-
region: str; Region name, e.g. us-west-1, us-east-2
|
|
1784
|
-
Raises:
|
|
1785
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
1786
|
-
"""
|
|
1787
|
-
s3_client = self.client
|
|
2027
|
+
def _create_bucket(self, bucket_name: str) -> StorageHandle:
|
|
2028
|
+
"""Create bucket using S3 API."""
|
|
1788
2029
|
try:
|
|
1789
2030
|
create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
|
|
1790
|
-
|
|
1791
|
-
# the LocationConstraint must not be specified.
|
|
1792
|
-
# Reference: https://stackoverflow.com/a/51912090
|
|
1793
|
-
if region is not None and region != 'us-east-1':
|
|
2031
|
+
if self.region is not None and self.region != 'us-east-1':
|
|
1794
2032
|
create_bucket_config['CreateBucketConfiguration'] = {
|
|
1795
|
-
'LocationConstraint': region
|
|
2033
|
+
'LocationConstraint': self.region
|
|
1796
2034
|
}
|
|
1797
|
-
|
|
2035
|
+
self.client.create_bucket(**create_bucket_config)
|
|
1798
2036
|
logger.info(
|
|
1799
2037
|
f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
|
|
1800
|
-
f'{region or "us-east-1"}{colorama.Style.RESET_ALL}')
|
|
2038
|
+
f'{self.region or "us-east-1"}{colorama.Style.RESET_ALL}')
|
|
1801
2039
|
|
|
1802
2040
|
# Add AWS tags configured in config.yaml to the bucket.
|
|
1803
2041
|
# This is useful for cost tracking and external cleanup.
|
|
1804
|
-
bucket_tags = skypilot_config.
|
|
2042
|
+
bucket_tags = skypilot_config.get_effective_region_config(
|
|
2043
|
+
cloud=self.config.cloud_name,
|
|
2044
|
+
region=None,
|
|
2045
|
+
keys=('labels',),
|
|
2046
|
+
default_value={})
|
|
1805
2047
|
if bucket_tags:
|
|
1806
|
-
|
|
2048
|
+
self.client.put_bucket_tagging(
|
|
1807
2049
|
Bucket=bucket_name,
|
|
1808
2050
|
Tagging={
|
|
1809
2051
|
'TagSet': [{
|
|
@@ -1811,22 +2053,46 @@ class S3Store(AbstractStore):
|
|
|
1811
2053
|
'Value': v
|
|
1812
2054
|
} for k, v in bucket_tags.items()]
|
|
1813
2055
|
})
|
|
1814
|
-
|
|
1815
2056
|
except aws.botocore_exceptions().ClientError as e:
|
|
1816
2057
|
with ux_utils.print_exception_no_traceback():
|
|
1817
2058
|
raise exceptions.StorageBucketCreateError(
|
|
1818
|
-
f'Attempted to create
|
|
2059
|
+
f'Attempted to create S3 bucket {self.name} but failed.'
|
|
1819
2060
|
) from e
|
|
1820
|
-
return
|
|
2061
|
+
return self.config.resource_factory(bucket_name)
|
|
2062
|
+
|
|
2063
|
+
def _delete_bucket(self, bucket_name: str) -> bool:
|
|
2064
|
+
"""Delete bucket using AWS CLI."""
|
|
2065
|
+
cmd_parts = [f'aws s3 rb s3://{bucket_name} --force']
|
|
2066
|
+
|
|
2067
|
+
if self.config.aws_profile:
|
|
2068
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
2069
|
+
if self.config.get_endpoint_url:
|
|
2070
|
+
cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
2071
|
+
|
|
2072
|
+
remove_command = ' '.join(cmd_parts)
|
|
2073
|
+
|
|
2074
|
+
if self.config.credentials_file:
|
|
2075
|
+
remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
2076
|
+
f'{self.config.credentials_file} '
|
|
2077
|
+
f'{remove_command}')
|
|
2078
|
+
if self.config.config_file:
|
|
2079
|
+
remove_command = 'AWS_CONFIG_FILE=' + \
|
|
2080
|
+
f'{self.config.config_file} {remove_command}'
|
|
2081
|
+
return self._execute_remove_command(
|
|
2082
|
+
remove_command, bucket_name,
|
|
2083
|
+
f'Deleting {self.config.store_type} bucket {bucket_name}',
|
|
2084
|
+
(f'Failed to delete {self.config.store_type} bucket '
|
|
2085
|
+
f'{bucket_name}.'))
|
|
1821
2086
|
|
|
1822
|
-
def
|
|
1823
|
-
|
|
1824
|
-
|
|
2087
|
+
def _execute_remove_command(self, command: str, bucket_name: str,
|
|
2088
|
+
hint_operating: str, hint_failed: str) -> bool:
|
|
2089
|
+
"""Execute bucket removal command."""
|
|
1825
2090
|
try:
|
|
1826
2091
|
with rich_utils.safe_status(
|
|
1827
2092
|
ux_utils.spinner_message(hint_operating)):
|
|
1828
|
-
subprocess.check_output(command
|
|
1829
|
-
stderr=subprocess.STDOUT
|
|
2093
|
+
subprocess.check_output(command,
|
|
2094
|
+
stderr=subprocess.STDOUT,
|
|
2095
|
+
shell=True)
|
|
1830
2096
|
except subprocess.CalledProcessError as e:
|
|
1831
2097
|
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
|
1832
2098
|
logger.debug(
|
|
@@ -1840,47 +2106,44 @@ class S3Store(AbstractStore):
|
|
|
1840
2106
|
f'Detailed error: {e.output}')
|
|
1841
2107
|
return True
|
|
1842
2108
|
|
|
1843
|
-
def
|
|
1844
|
-
"""
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
f'[green]{bucket_name}/{sub_path}[/]',
|
|
1882
|
-
f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
|
|
1883
|
-
)
|
|
2109
|
+
def _delete_sub_path(self) -> None:
|
|
2110
|
+
"""Remove objects from the sub path in the bucket."""
|
|
2111
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
2112
|
+
deleted_by_skypilot = self._delete_bucket_sub_path(
|
|
2113
|
+
self.name, self._bucket_sub_path)
|
|
2114
|
+
provider = self.config.store_type
|
|
2115
|
+
if deleted_by_skypilot:
|
|
2116
|
+
msg_str = (f'Removed objects from {provider} bucket '
|
|
2117
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
|
2118
|
+
else:
|
|
2119
|
+
msg_str = (f'Failed to remove objects from {provider} bucket '
|
|
2120
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
|
2121
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
|
|
2122
|
+
|
|
2123
|
+
def _delete_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
|
|
2124
|
+
"""Delete objects in the sub path from the bucket."""
|
|
2125
|
+
cmd_parts = [f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive']
|
|
2126
|
+
|
|
2127
|
+
if self.config.aws_profile:
|
|
2128
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
2129
|
+
if self.config.get_endpoint_url:
|
|
2130
|
+
cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
2131
|
+
|
|
2132
|
+
remove_command = ' '.join(cmd_parts)
|
|
2133
|
+
|
|
2134
|
+
if self.config.credentials_file:
|
|
2135
|
+
remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
2136
|
+
f'{self.config.credentials_file} '
|
|
2137
|
+
f'{remove_command}')
|
|
2138
|
+
if self.config.config_file:
|
|
2139
|
+
remove_command = 'AWS_CONFIG_FILE=' + \
|
|
2140
|
+
f'{self.config.config_file} {remove_command}'
|
|
2141
|
+
return self._execute_remove_command(
|
|
2142
|
+
remove_command, bucket_name,
|
|
2143
|
+
(f'Removing objects from {self.config.store_type} bucket '
|
|
2144
|
+
f'{bucket_name}/{sub_path}'),
|
|
2145
|
+
(f'Failed to remove objects from {self.config.store_type} '
|
|
2146
|
+
f'bucket {bucket_name}/{sub_path}.'))
|
|
1884
2147
|
|
|
1885
2148
|
|
|
1886
2149
|
class GcsStore(AbstractStore):
|
|
@@ -1951,6 +2214,10 @@ class GcsStore(AbstractStore):
|
|
|
1951
2214
|
elif self.source.startswith('oci://'):
|
|
1952
2215
|
raise NotImplementedError(
|
|
1953
2216
|
'Moving data from OCI to GCS is currently not supported.')
|
|
2217
|
+
elif self.source.startswith('cw://'):
|
|
2218
|
+
raise NotImplementedError(
|
|
2219
|
+
'Moving data from CoreWeave Object Storage to GCS is'
|
|
2220
|
+
' currently not supported.')
|
|
1954
2221
|
# Validate name
|
|
1955
2222
|
self.name = self.validate_name(self.name)
|
|
1956
2223
|
# Check if the storage is enabled
|
|
@@ -2337,7 +2604,7 @@ class GcsStore(AbstractStore):
|
|
|
2337
2604
|
except Exception as e: # pylint: disable=broad-except
|
|
2338
2605
|
with ux_utils.print_exception_no_traceback():
|
|
2339
2606
|
raise exceptions.StorageBucketCreateError(
|
|
2340
|
-
f'Attempted to create
|
|
2607
|
+
f'Attempted to create GCS bucket {self.name} but failed.'
|
|
2341
2608
|
) from e
|
|
2342
2609
|
logger.info(
|
|
2343
2610
|
f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
|
|
@@ -2494,7 +2761,11 @@ class AzureBlobStore(AbstractStore):
|
|
|
2494
2761
|
name=override_args.get('name', metadata.name),
|
|
2495
2762
|
storage_account_name=override_args.get(
|
|
2496
2763
|
'storage_account', metadata.storage_account_name),
|
|
2497
|
-
|
|
2764
|
+
# TODO(cooperc): fix the types for mypy 1.16
|
|
2765
|
+
# Azure store expects a string path; metadata.source may be a Path
|
|
2766
|
+
# or List[Path].
|
|
2767
|
+
source=override_args.get('source',
|
|
2768
|
+
metadata.source), # type: ignore[arg-type]
|
|
2498
2769
|
region=override_args.get('region', metadata.region),
|
|
2499
2770
|
is_sky_managed=override_args.get('is_sky_managed',
|
|
2500
2771
|
metadata.is_sky_managed),
|
|
@@ -2562,6 +2833,10 @@ class AzureBlobStore(AbstractStore):
|
|
|
2562
2833
|
elif self.source.startswith('oci://'):
|
|
2563
2834
|
raise NotImplementedError(
|
|
2564
2835
|
'Moving data from OCI to AZureBlob is not supported.')
|
|
2836
|
+
elif self.source.startswith('cw://'):
|
|
2837
|
+
raise NotImplementedError(
|
|
2838
|
+
'Moving data from CoreWeave Object Storage to AzureBlob is'
|
|
2839
|
+
' currently not supported.')
|
|
2565
2840
|
# Validate name
|
|
2566
2841
|
self.name = self.validate_name(self.name)
|
|
2567
2842
|
|
|
@@ -2764,8 +3039,12 @@ class AzureBlobStore(AbstractStore):
|
|
|
2764
3039
|
# Creates new resource group and storage account or use the
|
|
2765
3040
|
# storage_account provided by the user through config.yaml
|
|
2766
3041
|
else:
|
|
2767
|
-
config_storage_account =
|
|
2768
|
-
(
|
|
3042
|
+
config_storage_account = (
|
|
3043
|
+
skypilot_config.get_effective_region_config(
|
|
3044
|
+
cloud='azure',
|
|
3045
|
+
region=None,
|
|
3046
|
+
keys=('storage_account',),
|
|
3047
|
+
default_value=None))
|
|
2769
3048
|
if config_storage_account is not None:
|
|
2770
3049
|
# using user provided storage account from config.yaml
|
|
2771
3050
|
storage_account_name = config_storage_account
|
|
@@ -2929,6 +3208,8 @@ class AzureBlobStore(AbstractStore):
|
|
|
2929
3208
|
raise NotImplementedError(error_message.format('OCI'))
|
|
2930
3209
|
elif self.source.startswith('nebius://'):
|
|
2931
3210
|
raise NotImplementedError(error_message.format('NEBIUS'))
|
|
3211
|
+
elif self.source.startswith('cw://'):
|
|
3212
|
+
raise NotImplementedError(error_message.format('CoreWeave'))
|
|
2932
3213
|
else:
|
|
2933
3214
|
self.batch_az_blob_sync([self.source])
|
|
2934
3215
|
except exceptions.StorageUploadError:
|
|
@@ -3256,7 +3537,7 @@ class AzureBlobStore(AbstractStore):
|
|
|
3256
3537
|
with rich_utils.safe_status(
|
|
3257
3538
|
ux_utils.spinner_message(
|
|
3258
3539
|
f'Deleting Azure container {container_name}')):
|
|
3259
|
-
# Check for the
|
|
3540
|
+
# Check for the existence of the container before deletion.
|
|
3260
3541
|
self.storage_client.blob_containers.get(
|
|
3261
3542
|
self.resource_group_name,
|
|
3262
3543
|
self.storage_account_name,
|
|
@@ -3281,22 +3562,23 @@ class AzureBlobStore(AbstractStore):
|
|
|
3281
3562
|
return True
|
|
3282
3563
|
|
|
3283
3564
|
|
|
3284
|
-
class
|
|
3285
|
-
"""
|
|
3286
|
-
for
|
|
3565
|
+
class IBMCosStore(AbstractStore):
|
|
3566
|
+
"""IBMCosStore inherits from Storage Object and represents the backend
|
|
3567
|
+
for COS buckets.
|
|
3287
3568
|
"""
|
|
3288
|
-
|
|
3289
3569
|
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
3290
3570
|
|
|
3291
3571
|
def __init__(self,
|
|
3292
3572
|
name: str,
|
|
3293
3573
|
source: str,
|
|
3294
|
-
region: Optional[str] = '
|
|
3574
|
+
region: Optional[str] = 'us-east',
|
|
3295
3575
|
is_sky_managed: Optional[bool] = None,
|
|
3296
|
-
sync_on_reconstruction:
|
|
3576
|
+
sync_on_reconstruction: bool = True,
|
|
3297
3577
|
_bucket_sub_path: Optional[str] = None):
|
|
3298
|
-
self.client: '
|
|
3578
|
+
self.client: 'storage.Client'
|
|
3299
3579
|
self.bucket: 'StorageHandle'
|
|
3580
|
+
self.rclone_profile_name = (
|
|
3581
|
+
data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
|
|
3300
3582
|
super().__init__(name, source, region, is_sky_managed,
|
|
3301
3583
|
sync_on_reconstruction, _bucket_sub_path)
|
|
3302
3584
|
|
|
@@ -3330,6 +3612,9 @@ class R2Store(AbstractStore):
|
|
|
3330
3612
|
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
3331
3613
|
'R2 Bucket is specified as path, the name should be '
|
|
3332
3614
|
'the same as R2 bucket.')
|
|
3615
|
+
assert data_utils.verify_r2_bucket(self.name), (
|
|
3616
|
+
f'Source specified as {self.source}, a R2 bucket. ',
|
|
3617
|
+
'R2 Bucket should exist.')
|
|
3333
3618
|
elif self.source.startswith('nebius://'):
|
|
3334
3619
|
assert self.name == data_utils.split_nebius_path(
|
|
3335
3620
|
self.source)[0], (
|
|
@@ -3341,29 +3626,63 @@ class R2Store(AbstractStore):
|
|
|
3341
3626
|
f'exist.')
|
|
3342
3627
|
elif self.source.startswith('cos://'):
|
|
3343
3628
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
3344
|
-
'
|
|
3629
|
+
'COS Bucket is specified as path, the name should be '
|
|
3345
3630
|
'the same as COS bucket.')
|
|
3346
|
-
|
|
3347
|
-
f'Source specified as {self.source}, a COS bucket. ',
|
|
3348
|
-
'COS Bucket should exist.')
|
|
3349
|
-
elif self.source.startswith('oci://'):
|
|
3631
|
+
elif self.source.startswith('cw://'):
|
|
3350
3632
|
raise NotImplementedError(
|
|
3351
|
-
'Moving data from
|
|
3352
|
-
|
|
3633
|
+
'Moving data from CoreWeave Object Storage to COS is '
|
|
3634
|
+
'currently not supported.')
|
|
3353
3635
|
# Validate name
|
|
3354
|
-
self.name =
|
|
3355
|
-
|
|
3356
|
-
|
|
3636
|
+
self.name = IBMCosStore.validate_name(self.name)
|
|
3637
|
+
|
|
3638
|
+
@classmethod
|
|
3639
|
+
def validate_name(cls, name: str) -> str:
|
|
3640
|
+
"""Validates the name of a COS bucket.
|
|
3641
|
+
|
|
3642
|
+
Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
|
|
3643
|
+
"""
|
|
3644
|
+
|
|
3645
|
+
def _raise_no_traceback_name_error(err_str):
|
|
3357
3646
|
with ux_utils.print_exception_no_traceback():
|
|
3358
|
-
raise exceptions.
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
)
|
|
3647
|
+
raise exceptions.StorageNameError(err_str)
|
|
3648
|
+
|
|
3649
|
+
if name is not None and isinstance(name, str):
|
|
3650
|
+
if not 3 <= len(name) <= 63:
|
|
3651
|
+
_raise_no_traceback_name_error(
|
|
3652
|
+
f'Invalid store name: {name} must be between 3 (min) '
|
|
3653
|
+
'and 63 (max) characters long.')
|
|
3654
|
+
|
|
3655
|
+
# Check for valid characters and start/end with a letter or number
|
|
3656
|
+
pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
|
|
3657
|
+
if not re.match(pattern, name):
|
|
3658
|
+
_raise_no_traceback_name_error(
|
|
3659
|
+
f'Invalid store name: {name} can consist only of '
|
|
3660
|
+
'lowercase letters, numbers, dots (.), and dashes (-). '
|
|
3661
|
+
'It must begin and end with a letter or number.')
|
|
3662
|
+
|
|
3663
|
+
# Check for two adjacent periods or dashes
|
|
3664
|
+
if any(substring in name for substring in ['..', '--']):
|
|
3665
|
+
_raise_no_traceback_name_error(
|
|
3666
|
+
f'Invalid store name: {name} must not contain '
|
|
3667
|
+
'two adjacent periods/dashes')
|
|
3668
|
+
|
|
3669
|
+
# Check for IP address format
|
|
3670
|
+
ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
|
|
3671
|
+
if re.match(ip_pattern, name):
|
|
3672
|
+
_raise_no_traceback_name_error(
|
|
3673
|
+
f'Invalid store name: {name} must not be formatted as '
|
|
3674
|
+
'an IP address (for example, 192.168.5.4).')
|
|
3675
|
+
|
|
3676
|
+
if any(substring in name for substring in ['.-', '-.']):
|
|
3677
|
+
_raise_no_traceback_name_error(
|
|
3678
|
+
f'Invalid store name: {name} must '
|
|
3679
|
+
'not allow substrings: ".-", "-." .')
|
|
3680
|
+
else:
|
|
3681
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
|
3682
|
+
return name
|
|
3364
3683
|
|
|
3365
3684
|
def initialize(self):
|
|
3366
|
-
"""Initializes the
|
|
3685
|
+
"""Initializes the cos store object on the cloud.
|
|
3367
3686
|
|
|
3368
3687
|
Initialization involves fetching bucket if exists, or creating it if
|
|
3369
3688
|
it does not.
|
|
@@ -3373,7 +3692,11 @@ class R2Store(AbstractStore):
|
|
|
3373
3692
|
StorageBucketGetError: If fetching existing bucket fails
|
|
3374
3693
|
StorageInitError: If general initialization fails.
|
|
3375
3694
|
"""
|
|
3376
|
-
self.
|
|
3695
|
+
if self.region is None:
|
|
3696
|
+
raise exceptions.StorageInitError(
|
|
3697
|
+
'Region must be specified for IBM COS store.')
|
|
3698
|
+
self.client = ibm.get_cos_client(self.region)
|
|
3699
|
+
self.s3_resource = ibm.get_cos_resource(self.region)
|
|
3377
3700
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
3378
3701
|
if self.is_sky_managed is None:
|
|
3379
3702
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -3383,7 +3706,7 @@ class R2Store(AbstractStore):
|
|
|
3383
3706
|
self.is_sky_managed = is_new_bucket
|
|
3384
3707
|
|
|
3385
3708
|
def upload(self):
|
|
3386
|
-
"""Uploads
|
|
3709
|
+
"""Uploads files from local machine to bucket.
|
|
3387
3710
|
|
|
3388
3711
|
Upload must be called by the Storage handler - it is not called on
|
|
3389
3712
|
Store initialization.
|
|
@@ -3393,22 +3716,29 @@ class R2Store(AbstractStore):
|
|
|
3393
3716
|
"""
|
|
3394
3717
|
try:
|
|
3395
3718
|
if isinstance(self.source, list):
|
|
3396
|
-
self.
|
|
3719
|
+
self.batch_ibm_rsync(self.source, create_dirs=True)
|
|
3397
3720
|
elif self.source is not None:
|
|
3398
|
-
if self.source.startswith('
|
|
3399
|
-
|
|
3400
|
-
elif self.source.startswith('gs://'):
|
|
3401
|
-
self._transfer_to_r2()
|
|
3402
|
-
elif self.source.startswith('r2://'):
|
|
3721
|
+
if self.source.startswith('cos://'):
|
|
3722
|
+
# cos bucket used as a dest, can't be used as source.
|
|
3403
3723
|
pass
|
|
3404
|
-
elif self.source.startswith('
|
|
3405
|
-
|
|
3724
|
+
elif self.source.startswith('s3://'):
|
|
3725
|
+
raise Exception('IBM COS currently not supporting'
|
|
3726
|
+
'data transfers between COS and S3')
|
|
3406
3727
|
elif self.source.startswith('nebius://'):
|
|
3407
|
-
|
|
3728
|
+
raise Exception('IBM COS currently not supporting'
|
|
3729
|
+
'data transfers between COS and Nebius')
|
|
3730
|
+
elif self.source.startswith('gs://'):
|
|
3731
|
+
raise Exception('IBM COS currently not supporting'
|
|
3732
|
+
'data transfers between COS and GS')
|
|
3733
|
+
elif self.source.startswith('r2://'):
|
|
3734
|
+
raise Exception('IBM COS currently not supporting'
|
|
3735
|
+
'data transfers between COS and r2')
|
|
3736
|
+
elif self.source.startswith('cw://'):
|
|
3737
|
+
raise Exception('IBM COS currently not supporting'
|
|
3738
|
+
'data transfers between COS and CoreWeave')
|
|
3408
3739
|
else:
|
|
3409
|
-
self.
|
|
3410
|
-
|
|
3411
|
-
raise
|
|
3740
|
+
self.batch_ibm_rsync([self.source])
|
|
3741
|
+
|
|
3412
3742
|
except Exception as e:
|
|
3413
3743
|
raise exceptions.StorageUploadError(
|
|
3414
3744
|
f'Upload failed for store {self.name}') from e
|
|
@@ -3417,41 +3747,28 @@ class R2Store(AbstractStore):
|
|
|
3417
3747
|
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
3418
3748
|
return self._delete_sub_path()
|
|
3419
3749
|
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
msg_str = f'Deleted R2 bucket {self.name}.'
|
|
3423
|
-
else:
|
|
3424
|
-
msg_str = f'R2 bucket {self.name} may have been deleted ' \
|
|
3425
|
-
f'externally. Removing from local state.'
|
|
3426
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
3750
|
+
self._delete_cos_bucket()
|
|
3751
|
+
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
|
3427
3752
|
f'{colorama.Style.RESET_ALL}')
|
|
3428
3753
|
|
|
3429
3754
|
def _delete_sub_path(self) -> None:
|
|
3430
3755
|
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
|
|
3436
|
-
|
|
3437
|
-
msg_str = f'Failed to remove objects from R2 bucket ' \
|
|
3438
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
3439
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
3440
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3756
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
3757
|
+
try:
|
|
3758
|
+
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
|
3759
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
3760
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
|
3761
|
+
logger.debug('bucket already removed')
|
|
3441
3762
|
|
|
3442
3763
|
def get_handle(self) -> StorageHandle:
|
|
3443
|
-
return
|
|
3764
|
+
return self.s3_resource.Bucket(self.name)
|
|
3444
3765
|
|
|
3445
|
-
def
|
|
3766
|
+
def batch_ibm_rsync(self,
|
|
3446
3767
|
source_path_list: List[Path],
|
|
3447
3768
|
create_dirs: bool = False) -> None:
|
|
3448
|
-
"""Invokes
|
|
3449
|
-
|
|
3450
|
-
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
|
3451
|
-
increase parallelism, modify max_concurrent_requests in your aws config
|
|
3452
|
-
file (Default path: ~/.aws/config).
|
|
3769
|
+
"""Invokes rclone copy to batch upload a list of local paths to cos
|
|
3453
3770
|
|
|
3454
|
-
Since
|
|
3771
|
+
Since rclone does not support batch operations, we construct
|
|
3455
3772
|
multiple commands to be run in parallel.
|
|
3456
3773
|
|
|
3457
3774
|
Args:
|
|
@@ -3465,49 +3782,58 @@ class R2Store(AbstractStore):
|
|
|
3465
3782
|
sub_path = (f'/{self._bucket_sub_path}'
|
|
3466
3783
|
if self._bucket_sub_path else '')
|
|
3467
3784
|
|
|
3468
|
-
def
|
|
3785
|
+
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
|
3786
|
+
"""returns an rclone command that copies a complete folder
|
|
3787
|
+
from 'src_dir_path' to bucket/'dest_dir_name'.
|
|
3788
|
+
|
|
3789
|
+
`rclone copy` copies files from source path to target.
|
|
3790
|
+
files with identical names at won't be copied over, unless
|
|
3791
|
+
their modification date is more recent.
|
|
3792
|
+
works similarly to `aws sync` (without --delete).
|
|
3793
|
+
|
|
3794
|
+
Args:
|
|
3795
|
+
src_dir_path (str): local source path from which to copy files.
|
|
3796
|
+
dest_dir_name (str): remote target path files are copied to.
|
|
3797
|
+
|
|
3798
|
+
Returns:
|
|
3799
|
+
str: bash command using rclone to sync files. Executed remotely.
|
|
3800
|
+
"""
|
|
3801
|
+
|
|
3802
|
+
# .git directory is excluded from the sync
|
|
3803
|
+
# wrapping src_dir_path with "" to support path with spaces
|
|
3804
|
+
src_dir_path = shlex.quote(src_dir_path)
|
|
3805
|
+
sync_command = ('rclone copy --exclude ".git/*" '
|
|
3806
|
+
f'{src_dir_path} '
|
|
3807
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}'
|
|
3808
|
+
f'/{dest_dir_name}')
|
|
3809
|
+
return sync_command
|
|
3810
|
+
|
|
3811
|
+
def get_file_sync_command(base_dir_path, file_names) -> str:
|
|
3812
|
+
"""returns an rclone command that copies files: 'file_names'
|
|
3813
|
+
from base directory: `base_dir_path` to bucket.
|
|
3814
|
+
|
|
3815
|
+
`rclone copy` copies files from source path to target.
|
|
3816
|
+
files with identical names at won't be copied over, unless
|
|
3817
|
+
their modification date is more recent.
|
|
3818
|
+
works similarly to `aws sync` (without --delete).
|
|
3819
|
+
|
|
3820
|
+
Args:
|
|
3821
|
+
base_dir_path (str): local path from which to copy files.
|
|
3822
|
+
file_names (List): specific file names to copy.
|
|
3823
|
+
|
|
3824
|
+
Returns:
|
|
3825
|
+
str: bash command using rclone to sync files
|
|
3826
|
+
"""
|
|
3827
|
+
|
|
3828
|
+
# wrapping file_name with "" to support spaces
|
|
3469
3829
|
includes = ' '.join([
|
|
3470
3830
|
f'--include {shlex.quote(file_name)}'
|
|
3471
3831
|
for file_name in file_names
|
|
3472
3832
|
])
|
|
3473
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3474
3833
|
base_dir_path = shlex.quote(base_dir_path)
|
|
3475
|
-
sync_command = (
|
|
3476
|
-
|
|
3477
|
-
|
|
3478
|
-
'aws s3 sync --no-follow-symlinks --exclude="*" '
|
|
3479
|
-
f'{includes} {base_dir_path} '
|
|
3480
|
-
f's3://{self.name}{sub_path} '
|
|
3481
|
-
f'--endpoint {endpoint_url} '
|
|
3482
|
-
# R2 does not support CRC64-NVME
|
|
3483
|
-
# which is the default for aws s3 sync
|
|
3484
|
-
# https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
|
|
3485
|
-
f'--checksum-algorithm CRC32 '
|
|
3486
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3487
|
-
return sync_command
|
|
3488
|
-
|
|
3489
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
3490
|
-
# we exclude .git directory from the sync
|
|
3491
|
-
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
3492
|
-
excluded_list.append('.git/*')
|
|
3493
|
-
excludes = ' '.join([
|
|
3494
|
-
f'--exclude {shlex.quote(file_name)}'
|
|
3495
|
-
for file_name in excluded_list
|
|
3496
|
-
])
|
|
3497
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3498
|
-
src_dir_path = shlex.quote(src_dir_path)
|
|
3499
|
-
sync_command = (
|
|
3500
|
-
'AWS_SHARED_CREDENTIALS_FILE='
|
|
3501
|
-
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
|
3502
|
-
f'aws s3 sync --no-follow-symlinks {excludes} '
|
|
3503
|
-
f'{src_dir_path} '
|
|
3504
|
-
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
|
3505
|
-
f'--endpoint {endpoint_url} '
|
|
3506
|
-
# R2 does not support CRC64-NVME
|
|
3507
|
-
# which is the default for aws s3 sync
|
|
3508
|
-
# https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
|
|
3509
|
-
f'--checksum-algorithm CRC32 '
|
|
3510
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3834
|
+
sync_command = ('rclone copy '
|
|
3835
|
+
f'{includes} {base_dir_path} '
|
|
3836
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}')
|
|
3511
3837
|
return sync_command
|
|
3512
3838
|
|
|
3513
3839
|
# Generate message for upload
|
|
@@ -3518,7 +3844,8 @@ class R2Store(AbstractStore):
|
|
|
3518
3844
|
|
|
3519
3845
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
3520
3846
|
_STORAGE_LOG_FILE_NAME)
|
|
3521
|
-
sync_path =
|
|
3847
|
+
sync_path = (
|
|
3848
|
+
f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
|
|
3522
3849
|
with rich_utils.safe_status(
|
|
3523
3850
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
3524
3851
|
log_path=log_path)):
|
|
@@ -3535,1236 +3862,306 @@ class R2Store(AbstractStore):
|
|
|
3535
3862
|
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
3536
3863
|
log_path))
|
|
3537
3864
|
|
|
3538
|
-
def _transfer_to_r2(self) -> None:
|
|
3539
|
-
assert isinstance(self.source, str), self.source
|
|
3540
|
-
if self.source.startswith('gs://'):
|
|
3541
|
-
data_transfer.gcs_to_r2(self.name, self.name)
|
|
3542
|
-
elif self.source.startswith('s3://'):
|
|
3543
|
-
data_transfer.s3_to_r2(self.name, self.name)
|
|
3544
|
-
elif self.source.startswith('nebius://'):
|
|
3545
|
-
data_transfer.s3_to_r2(self.name, self.name)
|
|
3546
|
-
|
|
3547
3865
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
3548
|
-
"""
|
|
3866
|
+
"""returns IBM COS bucket object if exists, otherwise creates it.
|
|
3549
3867
|
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
2) Return None if bucket has been externally deleted and
|
|
3554
|
-
sync_on_reconstruction is False
|
|
3555
|
-
3) Create and return a new bucket otherwise
|
|
3868
|
+
Returns:
|
|
3869
|
+
StorageHandle(str): bucket name
|
|
3870
|
+
bool: indicates whether a new bucket was created.
|
|
3556
3871
|
|
|
3557
3872
|
Raises:
|
|
3558
3873
|
StorageSpecError: If externally created bucket is attempted to be
|
|
3559
3874
|
mounted without specifying storage source.
|
|
3560
|
-
StorageBucketCreateError: If
|
|
3875
|
+
StorageBucketCreateError: If bucket creation fails.
|
|
3561
3876
|
StorageBucketGetError: If fetching a bucket fails
|
|
3562
3877
|
StorageExternalDeletionError: If externally deleted storage is
|
|
3563
3878
|
attempted to be fetched while reconstructing the storage for
|
|
3564
3879
|
'sky storage delete' or 'sky start'
|
|
3565
3880
|
"""
|
|
3566
|
-
r2 = cloudflare.resource('s3')
|
|
3567
|
-
bucket = r2.Bucket(self.name)
|
|
3568
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3569
|
-
try:
|
|
3570
|
-
# Try Public bucket case.
|
|
3571
|
-
# This line does not error out if the bucket is an external public
|
|
3572
|
-
# bucket or if it is a user's bucket that is publicly
|
|
3573
|
-
# accessible.
|
|
3574
|
-
self.client.head_bucket(Bucket=self.name)
|
|
3575
|
-
self._validate_existing_bucket()
|
|
3576
|
-
return bucket, False
|
|
3577
|
-
except aws.botocore_exceptions().ClientError as e:
|
|
3578
|
-
error_code = e.response['Error']['Code']
|
|
3579
|
-
# AccessDenied error for buckets that are private and not owned by
|
|
3580
|
-
# user.
|
|
3581
|
-
if error_code == '403':
|
|
3582
|
-
command = ('AWS_SHARED_CREDENTIALS_FILE='
|
|
3583
|
-
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
|
3584
|
-
f'aws s3 ls s3://{self.name} '
|
|
3585
|
-
f'--endpoint {endpoint_url} '
|
|
3586
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3587
|
-
with ux_utils.print_exception_no_traceback():
|
|
3588
|
-
raise exceptions.StorageBucketGetError(
|
|
3589
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
3590
|
-
f' To debug, consider running `{command}`.') from e
|
|
3591
3881
|
|
|
3592
|
-
|
|
3882
|
+
bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
|
|
3883
|
+
self.name)
|
|
3884
|
+
try:
|
|
3885
|
+
bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
|
|
3886
|
+
except exceptions.StorageBucketGetError as e:
|
|
3593
3887
|
with ux_utils.print_exception_no_traceback():
|
|
3888
|
+
command = f'rclone lsd {bucket_profile_name}: '
|
|
3594
3889
|
raise exceptions.StorageBucketGetError(
|
|
3595
|
-
|
|
3596
|
-
f'{
|
|
3597
|
-
'`AWS_SHARED_CREDENTIALS_FILE='
|
|
3598
|
-
f'{cloudflare.R2_CREDENTIALS_PATH} aws s3 ls '
|
|
3599
|
-
f's3://{self.name} '
|
|
3600
|
-
f'--endpoint {endpoint_url} '
|
|
3601
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}\' '
|
|
3602
|
-
'to debug.')
|
|
3603
|
-
|
|
3604
|
-
# If bucket cannot be found in both private and public settings,
|
|
3605
|
-
# the bucket is to be created by Sky. However, creation is skipped if
|
|
3606
|
-
# Store object is being reconstructed for deletion or re-mount with
|
|
3607
|
-
# sky start, and error is raised instead.
|
|
3608
|
-
if self.sync_on_reconstruction:
|
|
3609
|
-
bucket = self._create_r2_bucket(self.name)
|
|
3610
|
-
return bucket, True
|
|
3611
|
-
else:
|
|
3612
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
3613
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
3614
|
-
# is already removed externally.
|
|
3615
|
-
raise exceptions.StorageExternalDeletionError(
|
|
3616
|
-
'Attempted to fetch a non-existent bucket: '
|
|
3617
|
-
f'{self.name}')
|
|
3618
|
-
|
|
3619
|
-
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
3620
|
-
"""Downloads file from remote to local on r2 bucket
|
|
3621
|
-
using the boto3 API
|
|
3890
|
+
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
3891
|
+
f' To debug, consider running `{command}`.') from e
|
|
3622
3892
|
|
|
3623
|
-
|
|
3624
|
-
|
|
3625
|
-
|
|
3626
|
-
|
|
3627
|
-
|
|
3893
|
+
try:
|
|
3894
|
+
uri_region = data_utils.split_cos_path(
|
|
3895
|
+
self.source)[2] # type: ignore
|
|
3896
|
+
except ValueError:
|
|
3897
|
+
# source isn't a cos uri
|
|
3898
|
+
uri_region = ''
|
|
3628
3899
|
|
|
3629
|
-
|
|
3630
|
-
|
|
3631
|
-
|
|
3632
|
-
|
|
3633
|
-
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
3638
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3639
|
-
r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
|
|
3640
|
-
r2_profile_name = cloudflare.R2_PROFILE_NAME
|
|
3641
|
-
mount_cmd = mounting_utils.get_r2_mount_cmd(
|
|
3642
|
-
r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
|
|
3643
|
-
mount_path, self._bucket_sub_path)
|
|
3644
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3645
|
-
mount_cmd)
|
|
3646
|
-
|
|
3647
|
-
def mount_cached_command(self, mount_path: str) -> str:
|
|
3648
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
3649
|
-
rclone_profile_name = (
|
|
3650
|
-
data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
|
|
3651
|
-
rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
|
|
3652
|
-
rclone_profile_name=rclone_profile_name)
|
|
3653
|
-
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
3654
|
-
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
3655
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3656
|
-
mount_cached_cmd)
|
|
3657
|
-
|
|
3658
|
-
def _create_r2_bucket(self,
|
|
3659
|
-
bucket_name: str,
|
|
3660
|
-
region='auto') -> StorageHandle:
|
|
3661
|
-
"""Creates R2 bucket with specific name in specific region
|
|
3662
|
-
|
|
3663
|
-
Args:
|
|
3664
|
-
bucket_name: str; Name of bucket
|
|
3665
|
-
region: str; Region name, r2 automatically sets region
|
|
3666
|
-
Raises:
|
|
3667
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
3668
|
-
"""
|
|
3669
|
-
r2_client = self.client
|
|
3670
|
-
try:
|
|
3671
|
-
if region is None:
|
|
3672
|
-
r2_client.create_bucket(Bucket=bucket_name)
|
|
3673
|
-
else:
|
|
3674
|
-
location = {'LocationConstraint': region}
|
|
3675
|
-
r2_client.create_bucket(Bucket=bucket_name,
|
|
3676
|
-
CreateBucketConfiguration=location)
|
|
3677
|
-
logger.info(f' {colorama.Style.DIM}Created R2 bucket '
|
|
3678
|
-
f'{bucket_name!r} in {region}'
|
|
3679
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3680
|
-
except aws.botocore_exceptions().ClientError as e:
|
|
3681
|
-
with ux_utils.print_exception_no_traceback():
|
|
3682
|
-
raise exceptions.StorageBucketCreateError(
|
|
3683
|
-
f'Attempted to create a bucket '
|
|
3684
|
-
f'{self.name} but failed.') from e
|
|
3685
|
-
return cloudflare.resource('s3').Bucket(bucket_name)
|
|
3686
|
-
|
|
3687
|
-
def _execute_r2_remove_command(self, command: str, bucket_name: str,
|
|
3688
|
-
hint_operating: str,
|
|
3689
|
-
hint_failed: str) -> bool:
|
|
3690
|
-
try:
|
|
3691
|
-
with rich_utils.safe_status(
|
|
3692
|
-
ux_utils.spinner_message(hint_operating)):
|
|
3693
|
-
subprocess.check_output(command.split(' '),
|
|
3694
|
-
stderr=subprocess.STDOUT,
|
|
3695
|
-
shell=True)
|
|
3696
|
-
except subprocess.CalledProcessError as e:
|
|
3697
|
-
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
|
3698
|
-
logger.debug(
|
|
3699
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
|
3700
|
-
bucket_name=bucket_name))
|
|
3701
|
-
return False
|
|
3702
|
-
else:
|
|
3703
|
-
with ux_utils.print_exception_no_traceback():
|
|
3704
|
-
raise exceptions.StorageBucketDeleteError(
|
|
3705
|
-
f'{hint_failed}'
|
|
3706
|
-
f'Detailed error: {e.output}')
|
|
3707
|
-
return True
|
|
3708
|
-
|
|
3709
|
-
def _delete_r2_bucket_sub_path(self, bucket_name: str,
|
|
3710
|
-
sub_path: str) -> bool:
|
|
3711
|
-
"""Deletes the sub path from the bucket."""
|
|
3712
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3713
|
-
remove_command = (
|
|
3714
|
-
f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
|
|
3715
|
-
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
|
3716
|
-
f'--endpoint {endpoint_url} '
|
|
3717
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3718
|
-
return self._execute_r2_remove_command(
|
|
3719
|
-
remove_command, bucket_name,
|
|
3720
|
-
f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
|
|
3721
|
-
f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
|
|
3722
|
-
)
|
|
3723
|
-
|
|
3724
|
-
def _delete_r2_bucket(self, bucket_name: str) -> bool:
|
|
3725
|
-
"""Deletes R2 bucket, including all objects in bucket
|
|
3726
|
-
|
|
3727
|
-
Args:
|
|
3728
|
-
bucket_name: str; Name of bucket
|
|
3729
|
-
|
|
3730
|
-
Returns:
|
|
3731
|
-
bool; True if bucket was deleted, False if it was deleted externally.
|
|
3732
|
-
|
|
3733
|
-
Raises:
|
|
3734
|
-
StorageBucketDeleteError: If deleting the bucket fails.
|
|
3735
|
-
"""
|
|
3736
|
-
# Deleting objects is very slow programatically
|
|
3737
|
-
# (i.e. bucket.objects.all().delete() is slow).
|
|
3738
|
-
# In addition, standard delete operations (i.e. via `aws s3 rm`)
|
|
3739
|
-
# are slow, since AWS puts deletion markers.
|
|
3740
|
-
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
|
|
3741
|
-
# The fastest way to delete is to run `aws s3 rb --force`,
|
|
3742
|
-
# which removes the bucket by force.
|
|
3743
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3744
|
-
remove_command = (
|
|
3745
|
-
f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
|
|
3746
|
-
f'aws s3 rb s3://{bucket_name} --force '
|
|
3747
|
-
f'--endpoint {endpoint_url} '
|
|
3748
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3749
|
-
|
|
3750
|
-
success = self._execute_r2_remove_command(
|
|
3751
|
-
remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
|
|
3752
|
-
f'Failed to delete R2 bucket {bucket_name}.')
|
|
3753
|
-
if not success:
|
|
3754
|
-
return False
|
|
3755
|
-
|
|
3756
|
-
# Wait until bucket deletion propagates on AWS servers
|
|
3757
|
-
while data_utils.verify_r2_bucket(bucket_name):
|
|
3758
|
-
time.sleep(0.1)
|
|
3759
|
-
return True
|
|
3760
|
-
|
|
3761
|
-
|
|
3762
|
-
class IBMCosStore(AbstractStore):
|
|
3763
|
-
"""IBMCosStore inherits from Storage Object and represents the backend
|
|
3764
|
-
for COS buckets.
|
|
3765
|
-
"""
|
|
3766
|
-
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
3767
|
-
|
|
3768
|
-
def __init__(self,
|
|
3769
|
-
name: str,
|
|
3770
|
-
source: str,
|
|
3771
|
-
region: Optional[str] = 'us-east',
|
|
3772
|
-
is_sky_managed: Optional[bool] = None,
|
|
3773
|
-
sync_on_reconstruction: bool = True,
|
|
3774
|
-
_bucket_sub_path: Optional[str] = None):
|
|
3775
|
-
self.client: 'storage.Client'
|
|
3776
|
-
self.bucket: 'StorageHandle'
|
|
3777
|
-
self.rclone_profile_name = (
|
|
3778
|
-
data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
|
|
3779
|
-
super().__init__(name, source, region, is_sky_managed,
|
|
3780
|
-
sync_on_reconstruction, _bucket_sub_path)
|
|
3781
|
-
|
|
3782
|
-
def _validate(self):
|
|
3783
|
-
if self.source is not None and isinstance(self.source, str):
|
|
3784
|
-
if self.source.startswith('s3://'):
|
|
3785
|
-
assert self.name == data_utils.split_s3_path(self.source)[0], (
|
|
3786
|
-
'S3 Bucket is specified as path, the name should be the'
|
|
3787
|
-
' same as S3 bucket.')
|
|
3788
|
-
assert data_utils.verify_s3_bucket(self.name), (
|
|
3789
|
-
f'Source specified as {self.source}, a S3 bucket. ',
|
|
3790
|
-
'S3 Bucket should exist.')
|
|
3791
|
-
elif self.source.startswith('gs://'):
|
|
3792
|
-
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
|
3793
|
-
'GCS Bucket is specified as path, the name should be '
|
|
3794
|
-
'the same as GCS bucket.')
|
|
3795
|
-
assert data_utils.verify_gcs_bucket(self.name), (
|
|
3796
|
-
f'Source specified as {self.source}, a GCS bucket. ',
|
|
3797
|
-
'GCS Bucket should exist.')
|
|
3798
|
-
elif data_utils.is_az_container_endpoint(self.source):
|
|
3799
|
-
storage_account_name, container_name, _ = (
|
|
3800
|
-
data_utils.split_az_path(self.source))
|
|
3801
|
-
assert self.name == container_name, (
|
|
3802
|
-
'Azure bucket is specified as path, the name should be '
|
|
3803
|
-
'the same as Azure bucket.')
|
|
3804
|
-
assert data_utils.verify_az_bucket(
|
|
3805
|
-
storage_account_name, self.name), (
|
|
3806
|
-
f'Source specified as {self.source}, an Azure bucket. '
|
|
3807
|
-
'Azure bucket should exist.')
|
|
3808
|
-
elif self.source.startswith('r2://'):
|
|
3809
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
3810
|
-
'R2 Bucket is specified as path, the name should be '
|
|
3811
|
-
'the same as R2 bucket.')
|
|
3812
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
3813
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
3814
|
-
'R2 Bucket should exist.')
|
|
3815
|
-
elif self.source.startswith('nebius://'):
|
|
3816
|
-
assert self.name == data_utils.split_nebius_path(
|
|
3817
|
-
self.source)[0], (
|
|
3818
|
-
'Nebius Object Storage is specified as path, the name '
|
|
3819
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
3820
|
-
assert data_utils.verify_nebius_bucket(self.name), (
|
|
3821
|
-
f'Source specified as {self.source}, a Nebius Object '
|
|
3822
|
-
f'Storage bucket. Nebius Object Storage Bucket should '
|
|
3823
|
-
f'exist.')
|
|
3824
|
-
elif self.source.startswith('cos://'):
|
|
3825
|
-
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
3826
|
-
'COS Bucket is specified as path, the name should be '
|
|
3827
|
-
'the same as COS bucket.')
|
|
3828
|
-
# Validate name
|
|
3829
|
-
self.name = IBMCosStore.validate_name(self.name)
|
|
3830
|
-
|
|
3831
|
-
@classmethod
|
|
3832
|
-
def validate_name(cls, name: str) -> str:
|
|
3833
|
-
"""Validates the name of a COS bucket.
|
|
3834
|
-
|
|
3835
|
-
Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
|
|
3836
|
-
"""
|
|
3837
|
-
|
|
3838
|
-
def _raise_no_traceback_name_error(err_str):
|
|
3839
|
-
with ux_utils.print_exception_no_traceback():
|
|
3840
|
-
raise exceptions.StorageNameError(err_str)
|
|
3841
|
-
|
|
3842
|
-
if name is not None and isinstance(name, str):
|
|
3843
|
-
if not 3 <= len(name) <= 63:
|
|
3844
|
-
_raise_no_traceback_name_error(
|
|
3845
|
-
f'Invalid store name: {name} must be between 3 (min) '
|
|
3846
|
-
'and 63 (max) characters long.')
|
|
3847
|
-
|
|
3848
|
-
# Check for valid characters and start/end with a letter or number
|
|
3849
|
-
pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
|
|
3850
|
-
if not re.match(pattern, name):
|
|
3851
|
-
_raise_no_traceback_name_error(
|
|
3852
|
-
f'Invalid store name: {name} can consist only of '
|
|
3853
|
-
'lowercase letters, numbers, dots (.), and dashes (-). '
|
|
3854
|
-
'It must begin and end with a letter or number.')
|
|
3855
|
-
|
|
3856
|
-
# Check for two adjacent periods or dashes
|
|
3857
|
-
if any(substring in name for substring in ['..', '--']):
|
|
3858
|
-
_raise_no_traceback_name_error(
|
|
3859
|
-
f'Invalid store name: {name} must not contain '
|
|
3860
|
-
'two adjacent periods/dashes')
|
|
3861
|
-
|
|
3862
|
-
# Check for IP address format
|
|
3863
|
-
ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
|
|
3864
|
-
if re.match(ip_pattern, name):
|
|
3865
|
-
_raise_no_traceback_name_error(
|
|
3866
|
-
f'Invalid store name: {name} must not be formatted as '
|
|
3867
|
-
'an IP address (for example, 192.168.5.4).')
|
|
3868
|
-
|
|
3869
|
-
if any(substring in name for substring in ['.-', '-.']):
|
|
3870
|
-
_raise_no_traceback_name_error(
|
|
3871
|
-
f'Invalid store name: {name} must '
|
|
3872
|
-
'not allow substrings: ".-", "-." .')
|
|
3873
|
-
else:
|
|
3874
|
-
_raise_no_traceback_name_error('Store name must be specified.')
|
|
3875
|
-
return name
|
|
3876
|
-
|
|
3877
|
-
def initialize(self):
|
|
3878
|
-
"""Initializes the cos store object on the cloud.
|
|
3879
|
-
|
|
3880
|
-
Initialization involves fetching bucket if exists, or creating it if
|
|
3881
|
-
it does not.
|
|
3882
|
-
|
|
3883
|
-
Raises:
|
|
3884
|
-
StorageBucketCreateError: If bucket creation fails
|
|
3885
|
-
StorageBucketGetError: If fetching existing bucket fails
|
|
3886
|
-
StorageInitError: If general initialization fails.
|
|
3887
|
-
"""
|
|
3888
|
-
self.client = ibm.get_cos_client(self.region)
|
|
3889
|
-
self.s3_resource = ibm.get_cos_resource(self.region)
|
|
3890
|
-
self.bucket, is_new_bucket = self._get_bucket()
|
|
3891
|
-
if self.is_sky_managed is None:
|
|
3892
|
-
# If is_sky_managed is not specified, then this is a new storage
|
|
3893
|
-
# object (i.e., did not exist in global_user_state) and we should
|
|
3894
|
-
# set the is_sky_managed property.
|
|
3895
|
-
# If is_sky_managed is specified, then we take no action.
|
|
3896
|
-
self.is_sky_managed = is_new_bucket
|
|
3897
|
-
|
|
3898
|
-
def upload(self):
|
|
3899
|
-
"""Uploads files from local machine to bucket.
|
|
3900
|
-
|
|
3901
|
-
Upload must be called by the Storage handler - it is not called on
|
|
3902
|
-
Store initialization.
|
|
3903
|
-
|
|
3904
|
-
Raises:
|
|
3905
|
-
StorageUploadError: if upload fails.
|
|
3906
|
-
"""
|
|
3907
|
-
try:
|
|
3908
|
-
if isinstance(self.source, list):
|
|
3909
|
-
self.batch_ibm_rsync(self.source, create_dirs=True)
|
|
3910
|
-
elif self.source is not None:
|
|
3911
|
-
if self.source.startswith('cos://'):
|
|
3912
|
-
# cos bucket used as a dest, can't be used as source.
|
|
3913
|
-
pass
|
|
3914
|
-
elif self.source.startswith('s3://'):
|
|
3915
|
-
raise Exception('IBM COS currently not supporting'
|
|
3916
|
-
'data transfers between COS and S3')
|
|
3917
|
-
elif self.source.startswith('nebius://'):
|
|
3918
|
-
raise Exception('IBM COS currently not supporting'
|
|
3919
|
-
'data transfers between COS and Nebius')
|
|
3920
|
-
elif self.source.startswith('gs://'):
|
|
3921
|
-
raise Exception('IBM COS currently not supporting'
|
|
3922
|
-
'data transfers between COS and GS')
|
|
3923
|
-
elif self.source.startswith('r2://'):
|
|
3924
|
-
raise Exception('IBM COS currently not supporting'
|
|
3925
|
-
'data transfers between COS and r2')
|
|
3926
|
-
else:
|
|
3927
|
-
self.batch_ibm_rsync([self.source])
|
|
3928
|
-
|
|
3929
|
-
except Exception as e:
|
|
3930
|
-
raise exceptions.StorageUploadError(
|
|
3931
|
-
f'Upload failed for store {self.name}') from e
|
|
3932
|
-
|
|
3933
|
-
def delete(self) -> None:
|
|
3934
|
-
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
3935
|
-
return self._delete_sub_path()
|
|
3936
|
-
|
|
3937
|
-
self._delete_cos_bucket()
|
|
3938
|
-
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
|
3939
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3940
|
-
|
|
3941
|
-
def _delete_sub_path(self) -> None:
|
|
3942
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
3943
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
3944
|
-
try:
|
|
3945
|
-
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
|
3946
|
-
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
3947
|
-
if e.__class__.__name__ == 'NoSuchBucket':
|
|
3948
|
-
logger.debug('bucket already removed')
|
|
3949
|
-
|
|
3950
|
-
def get_handle(self) -> StorageHandle:
|
|
3951
|
-
return self.s3_resource.Bucket(self.name)
|
|
3952
|
-
|
|
3953
|
-
def batch_ibm_rsync(self,
|
|
3954
|
-
source_path_list: List[Path],
|
|
3955
|
-
create_dirs: bool = False) -> None:
|
|
3956
|
-
"""Invokes rclone copy to batch upload a list of local paths to cos
|
|
3957
|
-
|
|
3958
|
-
Since rclone does not support batch operations, we construct
|
|
3959
|
-
multiple commands to be run in parallel.
|
|
3960
|
-
|
|
3961
|
-
Args:
|
|
3962
|
-
source_path_list: List of paths to local files or directories
|
|
3963
|
-
create_dirs: If the local_path is a directory and this is set to
|
|
3964
|
-
False, the contents of the directory are directly uploaded to
|
|
3965
|
-
root of the bucket. If the local_path is a directory and this is
|
|
3966
|
-
set to True, the directory is created in the bucket root and
|
|
3967
|
-
contents are uploaded to it.
|
|
3968
|
-
"""
|
|
3969
|
-
sub_path = (f'/{self._bucket_sub_path}'
|
|
3970
|
-
if self._bucket_sub_path else '')
|
|
3971
|
-
|
|
3972
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
|
3973
|
-
"""returns an rclone command that copies a complete folder
|
|
3974
|
-
from 'src_dir_path' to bucket/'dest_dir_name'.
|
|
3975
|
-
|
|
3976
|
-
`rclone copy` copies files from source path to target.
|
|
3977
|
-
files with identical names at won't be copied over, unless
|
|
3978
|
-
their modification date is more recent.
|
|
3979
|
-
works similarly to `aws sync` (without --delete).
|
|
3980
|
-
|
|
3981
|
-
Args:
|
|
3982
|
-
src_dir_path (str): local source path from which to copy files.
|
|
3983
|
-
dest_dir_name (str): remote target path files are copied to.
|
|
3984
|
-
|
|
3985
|
-
Returns:
|
|
3986
|
-
str: bash command using rclone to sync files. Executed remotely.
|
|
3987
|
-
"""
|
|
3988
|
-
|
|
3989
|
-
# .git directory is excluded from the sync
|
|
3990
|
-
# wrapping src_dir_path with "" to support path with spaces
|
|
3991
|
-
src_dir_path = shlex.quote(src_dir_path)
|
|
3992
|
-
sync_command = ('rclone copy --exclude ".git/*" '
|
|
3993
|
-
f'{src_dir_path} '
|
|
3994
|
-
f'{self.rclone_profile_name}:{self.name}{sub_path}'
|
|
3995
|
-
f'/{dest_dir_name}')
|
|
3996
|
-
return sync_command
|
|
3997
|
-
|
|
3998
|
-
def get_file_sync_command(base_dir_path, file_names) -> str:
|
|
3999
|
-
"""returns an rclone command that copies files: 'file_names'
|
|
4000
|
-
from base directory: `base_dir_path` to bucket.
|
|
4001
|
-
|
|
4002
|
-
`rclone copy` copies files from source path to target.
|
|
4003
|
-
files with identical names at won't be copied over, unless
|
|
4004
|
-
their modification date is more recent.
|
|
4005
|
-
works similarly to `aws sync` (without --delete).
|
|
4006
|
-
|
|
4007
|
-
Args:
|
|
4008
|
-
base_dir_path (str): local path from which to copy files.
|
|
4009
|
-
file_names (List): specific file names to copy.
|
|
4010
|
-
|
|
4011
|
-
Returns:
|
|
4012
|
-
str: bash command using rclone to sync files
|
|
4013
|
-
"""
|
|
4014
|
-
|
|
4015
|
-
# wrapping file_name with "" to support spaces
|
|
4016
|
-
includes = ' '.join([
|
|
4017
|
-
f'--include {shlex.quote(file_name)}'
|
|
4018
|
-
for file_name in file_names
|
|
4019
|
-
])
|
|
4020
|
-
base_dir_path = shlex.quote(base_dir_path)
|
|
4021
|
-
sync_command = ('rclone copy '
|
|
4022
|
-
f'{includes} {base_dir_path} '
|
|
4023
|
-
f'{self.rclone_profile_name}:{self.name}{sub_path}')
|
|
4024
|
-
return sync_command
|
|
4025
|
-
|
|
4026
|
-
# Generate message for upload
|
|
4027
|
-
if len(source_path_list) > 1:
|
|
4028
|
-
source_message = f'{len(source_path_list)} paths'
|
|
4029
|
-
else:
|
|
4030
|
-
source_message = source_path_list[0]
|
|
4031
|
-
|
|
4032
|
-
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4033
|
-
_STORAGE_LOG_FILE_NAME)
|
|
4034
|
-
sync_path = (
|
|
4035
|
-
f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
|
|
4036
|
-
with rich_utils.safe_status(
|
|
4037
|
-
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4038
|
-
log_path=log_path)):
|
|
4039
|
-
data_utils.parallel_upload(
|
|
4040
|
-
source_path_list,
|
|
4041
|
-
get_file_sync_command,
|
|
4042
|
-
get_dir_sync_command,
|
|
4043
|
-
log_path,
|
|
4044
|
-
self.name,
|
|
4045
|
-
self._ACCESS_DENIED_MESSAGE,
|
|
4046
|
-
create_dirs=create_dirs,
|
|
4047
|
-
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
|
4048
|
-
logger.info(
|
|
4049
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4050
|
-
log_path))
|
|
4051
|
-
|
|
4052
|
-
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4053
|
-
"""returns IBM COS bucket object if exists, otherwise creates it.
|
|
4054
|
-
|
|
4055
|
-
Returns:
|
|
4056
|
-
StorageHandle(str): bucket name
|
|
4057
|
-
bool: indicates whether a new bucket was created.
|
|
4058
|
-
|
|
4059
|
-
Raises:
|
|
4060
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
4061
|
-
mounted without specifying storage source.
|
|
4062
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4063
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
4064
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
4065
|
-
attempted to be fetched while reconstructing the storage for
|
|
4066
|
-
'sky storage delete' or 'sky start'
|
|
4067
|
-
"""
|
|
4068
|
-
|
|
4069
|
-
bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
|
|
4070
|
-
self.name)
|
|
4071
|
-
try:
|
|
4072
|
-
bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
|
|
4073
|
-
except exceptions.StorageBucketGetError as e:
|
|
4074
|
-
with ux_utils.print_exception_no_traceback():
|
|
4075
|
-
command = f'rclone lsd {bucket_profile_name}: '
|
|
4076
|
-
raise exceptions.StorageBucketGetError(
|
|
4077
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4078
|
-
f' To debug, consider running `{command}`.') from e
|
|
4079
|
-
|
|
4080
|
-
try:
|
|
4081
|
-
uri_region = data_utils.split_cos_path(
|
|
4082
|
-
self.source)[2] # type: ignore
|
|
4083
|
-
except ValueError:
|
|
4084
|
-
# source isn't a cos uri
|
|
4085
|
-
uri_region = ''
|
|
4086
|
-
|
|
4087
|
-
# bucket's region doesn't match specified region in URI
|
|
4088
|
-
if bucket_region and uri_region and uri_region != bucket_region\
|
|
4089
|
-
and self.sync_on_reconstruction:
|
|
4090
|
-
with ux_utils.print_exception_no_traceback():
|
|
4091
|
-
raise exceptions.StorageBucketGetError(
|
|
4092
|
-
f'Bucket {self.name} exists in '
|
|
4093
|
-
f'region {bucket_region}, '
|
|
4094
|
-
f'but URI specified region {uri_region}.')
|
|
4095
|
-
|
|
4096
|
-
if not bucket_region and uri_region:
|
|
4097
|
-
# bucket doesn't exist but source is a bucket URI
|
|
4098
|
-
with ux_utils.print_exception_no_traceback():
|
|
4099
|
-
raise exceptions.StorageBucketGetError(
|
|
4100
|
-
'Attempted to use a non-existent bucket as a source: '
|
|
4101
|
-
f'{self.name} by providing URI. Consider using '
|
|
4102
|
-
'`rclone lsd <remote>` on relevant remotes returned '
|
|
4103
|
-
'via `rclone listremotes` to debug.')
|
|
4104
|
-
|
|
4105
|
-
data_utils.Rclone.store_rclone_config(
|
|
4106
|
-
self.name,
|
|
4107
|
-
data_utils.Rclone.RcloneStores.IBM,
|
|
4108
|
-
self.region, # type: ignore
|
|
4109
|
-
)
|
|
4110
|
-
|
|
4111
|
-
if not bucket_region and self.sync_on_reconstruction:
|
|
4112
|
-
# bucket doesn't exist
|
|
4113
|
-
return self._create_cos_bucket(self.name, self.region), True
|
|
4114
|
-
elif not bucket_region and not self.sync_on_reconstruction:
|
|
4115
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
4116
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
4117
|
-
# is already removed externally.
|
|
4118
|
-
raise exceptions.StorageExternalDeletionError(
|
|
4119
|
-
'Attempted to fetch a non-existent bucket: '
|
|
4120
|
-
f'{self.name}')
|
|
4121
|
-
else:
|
|
4122
|
-
# bucket exists
|
|
4123
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
4124
|
-
self._validate_existing_bucket()
|
|
4125
|
-
return bucket, False
|
|
4126
|
-
|
|
4127
|
-
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4128
|
-
"""Downloads file from remote to local on s3 bucket
|
|
4129
|
-
using the boto3 API
|
|
4130
|
-
|
|
4131
|
-
Args:
|
|
4132
|
-
remote_path: str; Remote path on S3 bucket
|
|
4133
|
-
local_path: str; Local path on user's device
|
|
4134
|
-
"""
|
|
4135
|
-
self.client.download_file(self.name, local_path, remote_path)
|
|
4136
|
-
|
|
4137
|
-
def mount_command(self, mount_path: str) -> str:
|
|
4138
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
4139
|
-
|
|
4140
|
-
Uses rclone to mount the bucket.
|
|
4141
|
-
Source: https://github.com/rclone/rclone
|
|
4142
|
-
|
|
4143
|
-
Args:
|
|
4144
|
-
mount_path: str; Path to mount the bucket to.
|
|
4145
|
-
"""
|
|
4146
|
-
# install rclone if not installed.
|
|
4147
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4148
|
-
rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
|
|
4149
|
-
rclone_profile_name=self.rclone_profile_name,
|
|
4150
|
-
region=self.region) # type: ignore
|
|
4151
|
-
mount_cmd = (
|
|
4152
|
-
mounting_utils.get_cos_mount_cmd(
|
|
4153
|
-
rclone_config,
|
|
4154
|
-
self.rclone_profile_name,
|
|
4155
|
-
self.bucket.name,
|
|
4156
|
-
mount_path,
|
|
4157
|
-
self._bucket_sub_path, # type: ignore
|
|
4158
|
-
))
|
|
4159
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4160
|
-
mount_cmd)
|
|
4161
|
-
|
|
4162
|
-
def _create_cos_bucket(self,
|
|
4163
|
-
bucket_name: str,
|
|
4164
|
-
region='us-east') -> StorageHandle:
|
|
4165
|
-
"""Creates IBM COS bucket with specific name in specific region
|
|
4166
|
-
|
|
4167
|
-
Args:
|
|
4168
|
-
bucket_name: str; Name of bucket
|
|
4169
|
-
region: str; Region name, e.g. us-east, us-south
|
|
4170
|
-
Raises:
|
|
4171
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4172
|
-
"""
|
|
4173
|
-
try:
|
|
4174
|
-
self.client.create_bucket(
|
|
4175
|
-
Bucket=bucket_name,
|
|
4176
|
-
CreateBucketConfiguration={
|
|
4177
|
-
'LocationConstraint': f'{region}-smart'
|
|
4178
|
-
})
|
|
4179
|
-
logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
|
|
4180
|
-
f'{bucket_name!r} in {region} '
|
|
4181
|
-
'with storage class smart tier'
|
|
4182
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4183
|
-
self.bucket = self.s3_resource.Bucket(bucket_name)
|
|
4184
|
-
|
|
4185
|
-
except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
|
|
4186
|
-
with ux_utils.print_exception_no_traceback():
|
|
4187
|
-
raise exceptions.StorageBucketCreateError(
|
|
4188
|
-
f'Failed to create bucket: '
|
|
4189
|
-
f'{bucket_name}') from e
|
|
4190
|
-
|
|
4191
|
-
s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
|
|
4192
|
-
s3_bucket_exists_waiter.wait(Bucket=bucket_name)
|
|
4193
|
-
|
|
4194
|
-
return self.bucket
|
|
4195
|
-
|
|
4196
|
-
def _delete_cos_bucket_objects(self,
|
|
4197
|
-
bucket: Any,
|
|
4198
|
-
prefix: Optional[str] = None) -> None:
|
|
4199
|
-
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
|
4200
|
-
if bucket_versioning.status == 'Enabled':
|
|
4201
|
-
if prefix is not None:
|
|
4202
|
-
res = list(
|
|
4203
|
-
bucket.object_versions.filter(Prefix=prefix).delete())
|
|
4204
|
-
else:
|
|
4205
|
-
res = list(bucket.object_versions.delete())
|
|
4206
|
-
else:
|
|
4207
|
-
if prefix is not None:
|
|
4208
|
-
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
|
4209
|
-
else:
|
|
4210
|
-
res = list(bucket.objects.delete())
|
|
4211
|
-
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
|
4212
|
-
|
|
4213
|
-
def _delete_cos_bucket(self) -> None:
|
|
4214
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
4215
|
-
try:
|
|
4216
|
-
self._delete_cos_bucket_objects(bucket)
|
|
4217
|
-
bucket.delete()
|
|
4218
|
-
bucket.wait_until_not_exists()
|
|
4219
|
-
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
4220
|
-
if e.__class__.__name__ == 'NoSuchBucket':
|
|
4221
|
-
logger.debug('bucket already removed')
|
|
4222
|
-
data_utils.Rclone.delete_rclone_bucket_profile(
|
|
4223
|
-
self.name, data_utils.Rclone.RcloneStores.IBM)
|
|
4224
|
-
|
|
4225
|
-
|
|
4226
|
-
class OciStore(AbstractStore):
|
|
4227
|
-
"""OciStore inherits from Storage Object and represents the backend
|
|
4228
|
-
for OCI buckets.
|
|
4229
|
-
"""
|
|
4230
|
-
|
|
4231
|
-
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
|
4232
|
-
|
|
4233
|
-
def __init__(self,
|
|
4234
|
-
name: str,
|
|
4235
|
-
source: Optional[SourceType],
|
|
4236
|
-
region: Optional[str] = None,
|
|
4237
|
-
is_sky_managed: Optional[bool] = None,
|
|
4238
|
-
sync_on_reconstruction: Optional[bool] = True,
|
|
4239
|
-
_bucket_sub_path: Optional[str] = None):
|
|
4240
|
-
self.client: Any
|
|
4241
|
-
self.bucket: StorageHandle
|
|
4242
|
-
self.oci_config_file: str
|
|
4243
|
-
self.config_profile: str
|
|
4244
|
-
self.compartment: str
|
|
4245
|
-
self.namespace: str
|
|
4246
|
-
|
|
4247
|
-
# Region is from the specified name in <bucket>@<region> format.
|
|
4248
|
-
# Another case is name can also be set by the source, for example:
|
|
4249
|
-
# /datasets-storage:
|
|
4250
|
-
# source: oci://RAGData@us-sanjose-1
|
|
4251
|
-
# The name in above mount will be set to RAGData@us-sanjose-1
|
|
4252
|
-
region_in_name = None
|
|
4253
|
-
if name is not None and '@' in name:
|
|
4254
|
-
self._validate_bucket_expr(name)
|
|
4255
|
-
name, region_in_name = name.split('@')
|
|
4256
|
-
|
|
4257
|
-
# Region is from the specified source in oci://<bucket>@<region> format
|
|
4258
|
-
region_in_source = None
|
|
4259
|
-
if isinstance(source,
|
|
4260
|
-
str) and source.startswith('oci://') and '@' in source:
|
|
4261
|
-
self._validate_bucket_expr(source)
|
|
4262
|
-
source, region_in_source = source.split('@')
|
|
4263
|
-
|
|
4264
|
-
if region_in_name is not None and region_in_source is not None:
|
|
4265
|
-
# This should never happen because name and source will never be
|
|
4266
|
-
# the remote bucket at the same time.
|
|
4267
|
-
assert region_in_name == region_in_source, (
|
|
4268
|
-
f'Mismatch region specified. Region in name {region_in_name}, '
|
|
4269
|
-
f'but region in source is {region_in_source}')
|
|
4270
|
-
|
|
4271
|
-
if region_in_name is not None:
|
|
4272
|
-
region = region_in_name
|
|
4273
|
-
elif region_in_source is not None:
|
|
4274
|
-
region = region_in_source
|
|
4275
|
-
|
|
4276
|
-
# Default region set to what specified in oci config.
|
|
4277
|
-
if region is None:
|
|
4278
|
-
region = oci.get_oci_config()['region']
|
|
4279
|
-
|
|
4280
|
-
# So far from now on, the name and source are canonical, means there
|
|
4281
|
-
# is no region (@<region> suffix) associated with them anymore.
|
|
4282
|
-
|
|
4283
|
-
super().__init__(name, source, region, is_sky_managed,
|
|
4284
|
-
sync_on_reconstruction, _bucket_sub_path)
|
|
4285
|
-
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
|
4286
|
-
|
|
4287
|
-
def _validate_bucket_expr(self, bucket_expr: str):
|
|
4288
|
-
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
|
|
4289
|
-
if not re.match(pattern, bucket_expr):
|
|
4290
|
-
raise ValueError(
|
|
4291
|
-
'The format for the bucket portion is <bucket>@<region> '
|
|
4292
|
-
'when specify a region with a bucket.')
|
|
4293
|
-
|
|
4294
|
-
def _validate(self):
|
|
4295
|
-
if self.source is not None and isinstance(self.source, str):
|
|
4296
|
-
if self.source.startswith('oci://'):
|
|
4297
|
-
assert self.name == data_utils.split_oci_path(self.source)[0], (
|
|
4298
|
-
'OCI Bucket is specified as path, the name should be '
|
|
4299
|
-
'the same as OCI bucket.')
|
|
4300
|
-
elif not re.search(r'^\w+://', self.source):
|
|
4301
|
-
# Treat it as local path.
|
|
4302
|
-
pass
|
|
4303
|
-
else:
|
|
4304
|
-
raise NotImplementedError(
|
|
4305
|
-
f'Moving data from {self.source} to OCI is not supported.')
|
|
4306
|
-
|
|
4307
|
-
# Validate name
|
|
4308
|
-
self.name = self.validate_name(self.name)
|
|
4309
|
-
# Check if the storage is enabled
|
|
4310
|
-
if not _is_storage_cloud_enabled(str(clouds.OCI())):
|
|
4311
|
-
with ux_utils.print_exception_no_traceback():
|
|
4312
|
-
raise exceptions.ResourcesUnavailableError(
|
|
4313
|
-
'Storage \'store: oci\' specified, but ' \
|
|
4314
|
-
'OCI access is disabled. To fix, enable '\
|
|
4315
|
-
'OCI by running `sky check`. '\
|
|
4316
|
-
'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
|
4317
|
-
)
|
|
4318
|
-
|
|
4319
|
-
@classmethod
|
|
4320
|
-
def validate_name(cls, name) -> str:
|
|
4321
|
-
"""Validates the name of the OCI store.
|
|
4322
|
-
|
|
4323
|
-
Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
|
|
4324
|
-
"""
|
|
4325
|
-
|
|
4326
|
-
def _raise_no_traceback_name_error(err_str):
|
|
4327
|
-
with ux_utils.print_exception_no_traceback():
|
|
4328
|
-
raise exceptions.StorageNameError(err_str)
|
|
4329
|
-
|
|
4330
|
-
if name is not None and isinstance(name, str):
|
|
4331
|
-
# Check for overall length
|
|
4332
|
-
if not 1 <= len(name) <= 256:
|
|
4333
|
-
_raise_no_traceback_name_error(
|
|
4334
|
-
f'Invalid store name: name {name} must contain 1-256 '
|
|
4335
|
-
'characters.')
|
|
4336
|
-
|
|
4337
|
-
# Check for valid characters and start/end with a number or letter
|
|
4338
|
-
pattern = r'^[A-Za-z0-9-._]+$'
|
|
4339
|
-
if not re.match(pattern, name):
|
|
4340
|
-
_raise_no_traceback_name_error(
|
|
4341
|
-
f'Invalid store name: name {name} can only contain '
|
|
4342
|
-
'upper or lower case letters, numeric characters, hyphens '
|
|
4343
|
-
'(-), underscores (_), and dots (.). Spaces are not '
|
|
4344
|
-
'allowed. Names must start and end with a number or '
|
|
4345
|
-
'letter.')
|
|
4346
|
-
else:
|
|
4347
|
-
_raise_no_traceback_name_error('Store name must be specified.')
|
|
4348
|
-
return name
|
|
4349
|
-
|
|
4350
|
-
def initialize(self):
|
|
4351
|
-
"""Initializes the OCI store object on the cloud.
|
|
4352
|
-
|
|
4353
|
-
Initialization involves fetching bucket if exists, or creating it if
|
|
4354
|
-
it does not.
|
|
4355
|
-
|
|
4356
|
-
Raises:
|
|
4357
|
-
StorageBucketCreateError: If bucket creation fails
|
|
4358
|
-
StorageBucketGetError: If fetching existing bucket fails
|
|
4359
|
-
StorageInitError: If general initialization fails.
|
|
4360
|
-
"""
|
|
4361
|
-
# pylint: disable=import-outside-toplevel
|
|
4362
|
-
from sky.clouds.utils import oci_utils
|
|
4363
|
-
from sky.provision.oci.query_utils import query_helper
|
|
4364
|
-
|
|
4365
|
-
self.oci_config_file = oci.get_config_file()
|
|
4366
|
-
self.config_profile = oci_utils.oci_config.get_profile()
|
|
4367
|
-
|
|
4368
|
-
## pylint: disable=line-too-long
|
|
4369
|
-
# What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
|
|
4370
|
-
self.compartment = query_helper.find_compartment(self.region)
|
|
4371
|
-
self.client = oci.get_object_storage_client(region=self.region,
|
|
4372
|
-
profile=self.config_profile)
|
|
4373
|
-
self.namespace = self.client.get_namespace(
|
|
4374
|
-
compartment_id=oci.get_oci_config()['tenancy']).data
|
|
4375
|
-
|
|
4376
|
-
self.bucket, is_new_bucket = self._get_bucket()
|
|
4377
|
-
if self.is_sky_managed is None:
|
|
4378
|
-
# If is_sky_managed is not specified, then this is a new storage
|
|
4379
|
-
# object (i.e., did not exist in global_user_state) and we should
|
|
4380
|
-
# set the is_sky_managed property.
|
|
4381
|
-
# If is_sky_managed is specified, then we take no action.
|
|
4382
|
-
self.is_sky_managed = is_new_bucket
|
|
4383
|
-
|
|
4384
|
-
def upload(self):
|
|
4385
|
-
"""Uploads source to store bucket.
|
|
4386
|
-
|
|
4387
|
-
Upload must be called by the Storage handler - it is not called on
|
|
4388
|
-
Store initialization.
|
|
4389
|
-
|
|
4390
|
-
Raises:
|
|
4391
|
-
StorageUploadError: if upload fails.
|
|
4392
|
-
"""
|
|
4393
|
-
try:
|
|
4394
|
-
if isinstance(self.source, list):
|
|
4395
|
-
self.batch_oci_rsync(self.source, create_dirs=True)
|
|
4396
|
-
elif self.source is not None:
|
|
4397
|
-
if self.source.startswith('oci://'):
|
|
4398
|
-
pass
|
|
4399
|
-
else:
|
|
4400
|
-
self.batch_oci_rsync([self.source])
|
|
4401
|
-
except exceptions.StorageUploadError:
|
|
4402
|
-
raise
|
|
4403
|
-
except Exception as e:
|
|
4404
|
-
raise exceptions.StorageUploadError(
|
|
4405
|
-
f'Upload failed for store {self.name}') from e
|
|
4406
|
-
|
|
4407
|
-
def delete(self) -> None:
|
|
4408
|
-
deleted_by_skypilot = self._delete_oci_bucket(self.name)
|
|
4409
|
-
if deleted_by_skypilot:
|
|
4410
|
-
msg_str = f'Deleted OCI bucket {self.name}.'
|
|
4411
|
-
else:
|
|
4412
|
-
msg_str = (f'OCI bucket {self.name} may have been deleted '
|
|
4413
|
-
f'externally. Removing from local state.')
|
|
4414
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4415
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4416
|
-
|
|
4417
|
-
def get_handle(self) -> StorageHandle:
|
|
4418
|
-
return self.client.get_bucket(namespace_name=self.namespace,
|
|
4419
|
-
bucket_name=self.name).data
|
|
4420
|
-
|
|
4421
|
-
def batch_oci_rsync(self,
|
|
4422
|
-
source_path_list: List[Path],
|
|
4423
|
-
create_dirs: bool = False) -> None:
|
|
4424
|
-
"""Invokes oci sync to batch upload a list of local paths to Bucket
|
|
4425
|
-
|
|
4426
|
-
Use OCI bulk operation to batch process the file upload
|
|
4427
|
-
|
|
4428
|
-
Args:
|
|
4429
|
-
source_path_list: List of paths to local files or directories
|
|
4430
|
-
create_dirs: If the local_path is a directory and this is set to
|
|
4431
|
-
False, the contents of the directory are directly uploaded to
|
|
4432
|
-
root of the bucket. If the local_path is a directory and this is
|
|
4433
|
-
set to True, the directory is created in the bucket root and
|
|
4434
|
-
contents are uploaded to it.
|
|
4435
|
-
"""
|
|
4436
|
-
sub_path = (f'{self._bucket_sub_path}/'
|
|
4437
|
-
if self._bucket_sub_path else '')
|
|
4438
|
-
|
|
4439
|
-
@oci.with_oci_env
|
|
4440
|
-
def get_file_sync_command(base_dir_path, file_names):
|
|
4441
|
-
includes = ' '.join(
|
|
4442
|
-
[f'--include "{file_name}"' for file_name in file_names])
|
|
4443
|
-
prefix_arg = ''
|
|
4444
|
-
if sub_path:
|
|
4445
|
-
prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
|
|
4446
|
-
sync_command = (
|
|
4447
|
-
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4448
|
-
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4449
|
-
f'--region {self.region} --src-dir "{base_dir_path}" '
|
|
4450
|
-
f'{prefix_arg} '
|
|
4451
|
-
f'{includes}')
|
|
4452
|
-
|
|
4453
|
-
return sync_command
|
|
4454
|
-
|
|
4455
|
-
@oci.with_oci_env
|
|
4456
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
4457
|
-
if dest_dir_name and not str(dest_dir_name).endswith('/'):
|
|
4458
|
-
dest_dir_name = f'{dest_dir_name}/'
|
|
4459
|
-
|
|
4460
|
-
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
4461
|
-
excluded_list.append('.git/*')
|
|
4462
|
-
excludes = ' '.join([
|
|
4463
|
-
f'--exclude {shlex.quote(file_name)}'
|
|
4464
|
-
for file_name in excluded_list
|
|
4465
|
-
])
|
|
4466
|
-
|
|
4467
|
-
# we exclude .git directory from the sync
|
|
4468
|
-
sync_command = (
|
|
4469
|
-
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4470
|
-
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4471
|
-
f'--region {self.region} '
|
|
4472
|
-
f'--object-prefix "{sub_path}{dest_dir_name}" '
|
|
4473
|
-
f'--src-dir "{src_dir_path}" {excludes}')
|
|
4474
|
-
|
|
4475
|
-
return sync_command
|
|
4476
|
-
|
|
4477
|
-
# Generate message for upload
|
|
4478
|
-
if len(source_path_list) > 1:
|
|
4479
|
-
source_message = f'{len(source_path_list)} paths'
|
|
4480
|
-
else:
|
|
4481
|
-
source_message = source_path_list[0]
|
|
4482
|
-
|
|
4483
|
-
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4484
|
-
_STORAGE_LOG_FILE_NAME)
|
|
4485
|
-
sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
|
|
4486
|
-
with rich_utils.safe_status(
|
|
4487
|
-
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4488
|
-
log_path=log_path)):
|
|
4489
|
-
data_utils.parallel_upload(
|
|
4490
|
-
source_path_list=source_path_list,
|
|
4491
|
-
filesync_command_generator=get_file_sync_command,
|
|
4492
|
-
dirsync_command_generator=get_dir_sync_command,
|
|
4493
|
-
log_path=log_path,
|
|
4494
|
-
bucket_name=self.name,
|
|
4495
|
-
access_denied_message=self._ACCESS_DENIED_MESSAGE,
|
|
4496
|
-
create_dirs=create_dirs,
|
|
4497
|
-
max_concurrent_uploads=1)
|
|
4498
|
-
|
|
4499
|
-
logger.info(
|
|
4500
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4501
|
-
log_path))
|
|
4502
|
-
|
|
4503
|
-
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4504
|
-
"""Obtains the OCI bucket.
|
|
4505
|
-
If the bucket exists, this method will connect to the bucket.
|
|
4506
|
-
|
|
4507
|
-
If the bucket does not exist, there are three cases:
|
|
4508
|
-
1) Raise an error if the bucket source starts with oci://
|
|
4509
|
-
2) Return None if bucket has been externally deleted and
|
|
4510
|
-
sync_on_reconstruction is False
|
|
4511
|
-
3) Create and return a new bucket otherwise
|
|
4512
|
-
|
|
4513
|
-
Return tuple (Bucket, Boolean): The first item is the bucket
|
|
4514
|
-
json payload from the OCI API call, the second item indicates
|
|
4515
|
-
if this is a new created bucket(True) or an existing bucket(False).
|
|
4516
|
-
|
|
4517
|
-
Raises:
|
|
4518
|
-
StorageBucketCreateError: If creating the bucket fails
|
|
4519
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
4520
|
-
"""
|
|
4521
|
-
try:
|
|
4522
|
-
get_bucket_response = self.client.get_bucket(
|
|
4523
|
-
namespace_name=self.namespace, bucket_name=self.name)
|
|
4524
|
-
bucket = get_bucket_response.data
|
|
4525
|
-
return bucket, False
|
|
4526
|
-
except oci.service_exception() as e:
|
|
4527
|
-
if e.status == 404: # Not Found
|
|
4528
|
-
if isinstance(self.source,
|
|
4529
|
-
str) and self.source.startswith('oci://'):
|
|
4530
|
-
with ux_utils.print_exception_no_traceback():
|
|
4531
|
-
raise exceptions.StorageBucketGetError(
|
|
4532
|
-
'Attempted to connect to a non-existent bucket: '
|
|
4533
|
-
f'{self.source}') from e
|
|
4534
|
-
else:
|
|
4535
|
-
# If bucket cannot be found (i.e., does not exist), it is
|
|
4536
|
-
# to be created by Sky. However, creation is skipped if
|
|
4537
|
-
# Store object is being reconstructed for deletion.
|
|
4538
|
-
if self.sync_on_reconstruction:
|
|
4539
|
-
bucket = self._create_oci_bucket(self.name)
|
|
4540
|
-
return bucket, True
|
|
4541
|
-
else:
|
|
4542
|
-
return None, False
|
|
4543
|
-
elif e.status == 401: # Unauthorized
|
|
4544
|
-
# AccessDenied error for buckets that are private and not
|
|
4545
|
-
# owned by user.
|
|
4546
|
-
command = (
|
|
4547
|
-
f'oci os object list --namespace-name {self.namespace} '
|
|
4548
|
-
f'--bucket-name {self.name}')
|
|
4549
|
-
with ux_utils.print_exception_no_traceback():
|
|
4550
|
-
raise exceptions.StorageBucketGetError(
|
|
4551
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4552
|
-
f' To debug, consider running `{command}`.') from e
|
|
4553
|
-
else:
|
|
4554
|
-
# Unknown / unexpected error happened. This might happen when
|
|
4555
|
-
# Object storage service itself functions not normal (e.g.
|
|
4556
|
-
# maintainance event causes internal server error or request
|
|
4557
|
-
# timeout, etc).
|
|
4558
|
-
with ux_utils.print_exception_no_traceback():
|
|
4559
|
-
raise exceptions.StorageBucketGetError(
|
|
4560
|
-
f'Failed to connect to OCI bucket {self.name}') from e
|
|
4561
|
-
|
|
4562
|
-
def mount_command(self, mount_path: str) -> str:
|
|
4563
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
3900
|
+
# bucket's region doesn't match specified region in URI
|
|
3901
|
+
if bucket_region and uri_region and uri_region != bucket_region\
|
|
3902
|
+
and self.sync_on_reconstruction:
|
|
3903
|
+
with ux_utils.print_exception_no_traceback():
|
|
3904
|
+
raise exceptions.StorageBucketGetError(
|
|
3905
|
+
f'Bucket {self.name} exists in '
|
|
3906
|
+
f'region {bucket_region}, '
|
|
3907
|
+
f'but URI specified region {uri_region}.')
|
|
4564
3908
|
|
|
4565
|
-
|
|
3909
|
+
if not bucket_region and uri_region:
|
|
3910
|
+
# bucket doesn't exist but source is a bucket URI
|
|
3911
|
+
with ux_utils.print_exception_no_traceback():
|
|
3912
|
+
raise exceptions.StorageBucketGetError(
|
|
3913
|
+
'Attempted to use a non-existent bucket as a source: '
|
|
3914
|
+
f'{self.name} by providing URI. Consider using '
|
|
3915
|
+
'`rclone lsd <remote>` on relevant remotes returned '
|
|
3916
|
+
'via `rclone listremotes` to debug.')
|
|
4566
3917
|
|
|
4567
|
-
|
|
4568
|
-
|
|
4569
|
-
|
|
4570
|
-
|
|
4571
|
-
|
|
4572
|
-
mount_path=mount_path,
|
|
4573
|
-
store_name=self.name,
|
|
4574
|
-
region=str(self.region),
|
|
4575
|
-
namespace=self.namespace,
|
|
4576
|
-
compartment=self.bucket.compartment_id,
|
|
4577
|
-
config_file=self.oci_config_file,
|
|
4578
|
-
config_profile=self.config_profile)
|
|
4579
|
-
version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
|
|
3918
|
+
data_utils.Rclone.store_rclone_config(
|
|
3919
|
+
self.name,
|
|
3920
|
+
data_utils.Rclone.RcloneStores.IBM,
|
|
3921
|
+
self.region, # type: ignore
|
|
3922
|
+
)
|
|
4580
3923
|
|
|
4581
|
-
|
|
4582
|
-
|
|
3924
|
+
if not bucket_region and self.sync_on_reconstruction:
|
|
3925
|
+
# bucket doesn't exist
|
|
3926
|
+
return self._create_cos_bucket(self.name, self.region), True
|
|
3927
|
+
elif not bucket_region and not self.sync_on_reconstruction:
|
|
3928
|
+
# Raised when Storage object is reconstructed for sky storage
|
|
3929
|
+
# delete or to re-mount Storages with sky start but the storage
|
|
3930
|
+
# is already removed externally.
|
|
3931
|
+
raise exceptions.StorageExternalDeletionError(
|
|
3932
|
+
'Attempted to fetch a non-existent bucket: '
|
|
3933
|
+
f'{self.name}')
|
|
3934
|
+
else:
|
|
3935
|
+
# bucket exists
|
|
3936
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
3937
|
+
self._validate_existing_bucket()
|
|
3938
|
+
return bucket, False
|
|
4583
3939
|
|
|
4584
3940
|
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4585
|
-
"""Downloads file from remote to local on
|
|
3941
|
+
"""Downloads file from remote to local on s3 bucket
|
|
3942
|
+
using the boto3 API
|
|
4586
3943
|
|
|
4587
3944
|
Args:
|
|
4588
|
-
remote_path: str; Remote path on
|
|
3945
|
+
remote_path: str; Remote path on S3 bucket
|
|
4589
3946
|
local_path: str; Local path on user's device
|
|
4590
3947
|
"""
|
|
4591
|
-
|
|
4592
|
-
# If the remote path is /bucket_name, we need to
|
|
4593
|
-
# remove the leading /
|
|
4594
|
-
remote_path = remote_path.lstrip('/')
|
|
4595
|
-
|
|
4596
|
-
filename = os.path.basename(remote_path)
|
|
4597
|
-
if not local_path.endswith(filename):
|
|
4598
|
-
local_path = os.path.join(local_path, filename)
|
|
4599
|
-
|
|
4600
|
-
@oci.with_oci_env
|
|
4601
|
-
def get_file_download_command(remote_path, local_path):
|
|
4602
|
-
download_command = (f'oci os object get --bucket-name {self.name} '
|
|
4603
|
-
f'--namespace-name {self.namespace} '
|
|
4604
|
-
f'--region {self.region} --name {remote_path} '
|
|
4605
|
-
f'--file {local_path}')
|
|
3948
|
+
self.client.download_file(self.name, local_path, remote_path)
|
|
4606
3949
|
|
|
4607
|
-
|
|
3950
|
+
def mount_command(self, mount_path: str) -> str:
|
|
3951
|
+
"""Returns the command to mount the bucket to the mount_path.
|
|
4608
3952
|
|
|
4609
|
-
|
|
3953
|
+
Uses rclone to mount the bucket.
|
|
3954
|
+
Source: https://github.com/rclone/rclone
|
|
4610
3955
|
|
|
4611
|
-
|
|
4612
|
-
|
|
4613
|
-
|
|
4614
|
-
|
|
4615
|
-
|
|
4616
|
-
|
|
4617
|
-
|
|
4618
|
-
|
|
4619
|
-
|
|
4620
|
-
|
|
4621
|
-
|
|
4622
|
-
|
|
4623
|
-
|
|
3956
|
+
Args:
|
|
3957
|
+
mount_path: str; Path to mount the bucket to.
|
|
3958
|
+
"""
|
|
3959
|
+
# install rclone if not installed.
|
|
3960
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
3961
|
+
rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
|
|
3962
|
+
rclone_profile_name=self.rclone_profile_name,
|
|
3963
|
+
region=self.region) # type: ignore
|
|
3964
|
+
mount_cmd = (
|
|
3965
|
+
mounting_utils.get_cos_mount_cmd(
|
|
3966
|
+
rclone_config,
|
|
3967
|
+
self.rclone_profile_name,
|
|
3968
|
+
self.bucket.name,
|
|
3969
|
+
mount_path,
|
|
3970
|
+
self._bucket_sub_path, # type: ignore
|
|
3971
|
+
))
|
|
3972
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3973
|
+
mount_cmd)
|
|
4624
3974
|
|
|
4625
|
-
def
|
|
4626
|
-
|
|
3975
|
+
def _create_cos_bucket(self,
|
|
3976
|
+
bucket_name: str,
|
|
3977
|
+
region='us-east') -> StorageHandle:
|
|
3978
|
+
"""Creates IBM COS bucket with specific name in specific region
|
|
4627
3979
|
|
|
4628
3980
|
Args:
|
|
4629
3981
|
bucket_name: str; Name of bucket
|
|
4630
|
-
region: str; Region name, e.g. us-
|
|
3982
|
+
region: str; Region name, e.g. us-east, us-south
|
|
3983
|
+
Raises:
|
|
3984
|
+
StorageBucketCreateError: If bucket creation fails.
|
|
4631
3985
|
"""
|
|
4632
|
-
logger.debug(f'_create_oci_bucket: {bucket_name}')
|
|
4633
3986
|
try:
|
|
4634
|
-
|
|
4635
|
-
|
|
4636
|
-
|
|
4637
|
-
|
|
4638
|
-
|
|
4639
|
-
|
|
4640
|
-
|
|
4641
|
-
|
|
4642
|
-
|
|
4643
|
-
|
|
3987
|
+
self.client.create_bucket(
|
|
3988
|
+
Bucket=bucket_name,
|
|
3989
|
+
CreateBucketConfiguration={
|
|
3990
|
+
'LocationConstraint': f'{region}-smart'
|
|
3991
|
+
})
|
|
3992
|
+
logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
|
|
3993
|
+
f'{bucket_name!r} in {region} '
|
|
3994
|
+
'with storage class smart tier'
|
|
3995
|
+
f'{colorama.Style.RESET_ALL}')
|
|
3996
|
+
self.bucket = self.s3_resource.Bucket(bucket_name)
|
|
3997
|
+
|
|
3998
|
+
except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
|
|
4644
3999
|
with ux_utils.print_exception_no_traceback():
|
|
4645
4000
|
raise exceptions.StorageBucketCreateError(
|
|
4646
|
-
f'Failed to create
|
|
4647
|
-
|
|
4648
|
-
def _delete_oci_bucket(self, bucket_name: str) -> bool:
|
|
4649
|
-
"""Deletes OCI bucket, including all objects in bucket
|
|
4650
|
-
|
|
4651
|
-
Args:
|
|
4652
|
-
bucket_name: str; Name of bucket
|
|
4653
|
-
|
|
4654
|
-
Returns:
|
|
4655
|
-
bool; True if bucket was deleted, False if it was deleted externally.
|
|
4656
|
-
"""
|
|
4657
|
-
logger.debug(f'_delete_oci_bucket: {bucket_name}')
|
|
4001
|
+
f'Failed to create bucket: '
|
|
4002
|
+
f'{bucket_name}') from e
|
|
4658
4003
|
|
|
4659
|
-
|
|
4660
|
-
|
|
4661
|
-
remove_command = (f'oci os bucket delete --bucket-name '
|
|
4662
|
-
f'--region {self.region} '
|
|
4663
|
-
f'{bucket_name} --empty --force')
|
|
4004
|
+
s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
|
|
4005
|
+
s3_bucket_exists_waiter.wait(Bucket=bucket_name)
|
|
4664
4006
|
|
|
4665
|
-
|
|
4007
|
+
return self.bucket
|
|
4666
4008
|
|
|
4667
|
-
|
|
4009
|
+
def _delete_cos_bucket_objects(self,
|
|
4010
|
+
bucket: Any,
|
|
4011
|
+
prefix: Optional[str] = None) -> None:
|
|
4012
|
+
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
|
4013
|
+
if bucket_versioning.status == 'Enabled':
|
|
4014
|
+
if prefix is not None:
|
|
4015
|
+
res = list(
|
|
4016
|
+
bucket.object_versions.filter(Prefix=prefix).delete())
|
|
4017
|
+
else:
|
|
4018
|
+
res = list(bucket.object_versions.delete())
|
|
4019
|
+
else:
|
|
4020
|
+
if prefix is not None:
|
|
4021
|
+
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
|
4022
|
+
else:
|
|
4023
|
+
res = list(bucket.objects.delete())
|
|
4024
|
+
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
|
4668
4025
|
|
|
4026
|
+
def _delete_cos_bucket(self) -> None:
|
|
4027
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
4669
4028
|
try:
|
|
4670
|
-
|
|
4671
|
-
|
|
4672
|
-
|
|
4673
|
-
|
|
4674
|
-
|
|
4675
|
-
|
|
4676
|
-
|
|
4677
|
-
|
|
4678
|
-
bucket_name=bucket_name))
|
|
4679
|
-
return False
|
|
4680
|
-
else:
|
|
4681
|
-
logger.error(e.output)
|
|
4682
|
-
with ux_utils.print_exception_no_traceback():
|
|
4683
|
-
raise exceptions.StorageBucketDeleteError(
|
|
4684
|
-
f'Failed to delete OCI bucket {bucket_name}.')
|
|
4685
|
-
return True
|
|
4029
|
+
self._delete_cos_bucket_objects(bucket)
|
|
4030
|
+
bucket.delete()
|
|
4031
|
+
bucket.wait_until_not_exists()
|
|
4032
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
4033
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
|
4034
|
+
logger.debug('bucket already removed')
|
|
4035
|
+
data_utils.Rclone.delete_rclone_bucket_profile(
|
|
4036
|
+
self.name, data_utils.Rclone.RcloneStores.IBM)
|
|
4686
4037
|
|
|
4687
4038
|
|
|
4688
|
-
class
|
|
4689
|
-
"""
|
|
4690
|
-
for
|
|
4039
|
+
class OciStore(AbstractStore):
|
|
4040
|
+
"""OciStore inherits from Storage Object and represents the backend
|
|
4041
|
+
for OCI buckets.
|
|
4691
4042
|
"""
|
|
4692
4043
|
|
|
4693
|
-
_ACCESS_DENIED_MESSAGE = '
|
|
4694
|
-
_TIMEOUT_TO_PROPAGATES = 20
|
|
4044
|
+
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
|
4695
4045
|
|
|
4696
4046
|
def __init__(self,
|
|
4697
4047
|
name: str,
|
|
4698
|
-
source:
|
|
4048
|
+
source: Optional[SourceType],
|
|
4699
4049
|
region: Optional[str] = None,
|
|
4700
4050
|
is_sky_managed: Optional[bool] = None,
|
|
4701
|
-
sync_on_reconstruction: bool = True,
|
|
4051
|
+
sync_on_reconstruction: Optional[bool] = True,
|
|
4702
4052
|
_bucket_sub_path: Optional[str] = None):
|
|
4703
|
-
self.client:
|
|
4704
|
-
self.bucket:
|
|
4053
|
+
self.client: Any
|
|
4054
|
+
self.bucket: StorageHandle
|
|
4055
|
+
self.oci_config_file: str
|
|
4056
|
+
self.config_profile: str
|
|
4057
|
+
self.compartment: str
|
|
4058
|
+
self.namespace: str
|
|
4059
|
+
|
|
4060
|
+
# Region is from the specified name in <bucket>@<region> format.
|
|
4061
|
+
# Another case is name can also be set by the source, for example:
|
|
4062
|
+
# /datasets-storage:
|
|
4063
|
+
# source: oci://RAGData@us-sanjose-1
|
|
4064
|
+
# The name in above mount will be set to RAGData@us-sanjose-1
|
|
4065
|
+
region_in_name = None
|
|
4066
|
+
if name is not None and '@' in name:
|
|
4067
|
+
self._validate_bucket_expr(name)
|
|
4068
|
+
name, region_in_name = name.split('@')
|
|
4069
|
+
|
|
4070
|
+
# Region is from the specified source in oci://<bucket>@<region> format
|
|
4071
|
+
region_in_source = None
|
|
4072
|
+
if isinstance(source,
|
|
4073
|
+
str) and source.startswith('oci://') and '@' in source:
|
|
4074
|
+
self._validate_bucket_expr(source)
|
|
4075
|
+
source, region_in_source = source.split('@')
|
|
4076
|
+
|
|
4077
|
+
if region_in_name is not None and region_in_source is not None:
|
|
4078
|
+
# This should never happen because name and source will never be
|
|
4079
|
+
# the remote bucket at the same time.
|
|
4080
|
+
assert region_in_name == region_in_source, (
|
|
4081
|
+
f'Mismatch region specified. Region in name {region_in_name}, '
|
|
4082
|
+
f'but region in source is {region_in_source}')
|
|
4083
|
+
|
|
4084
|
+
if region_in_name is not None:
|
|
4085
|
+
region = region_in_name
|
|
4086
|
+
elif region_in_source is not None:
|
|
4087
|
+
region = region_in_source
|
|
4088
|
+
|
|
4089
|
+
# Default region set to what specified in oci config.
|
|
4090
|
+
if region is None:
|
|
4091
|
+
region = oci.get_oci_config()['region']
|
|
4092
|
+
|
|
4093
|
+
# So far from now on, the name and source are canonical, means there
|
|
4094
|
+
# is no region (@<region> suffix) associated with them anymore.
|
|
4095
|
+
|
|
4705
4096
|
super().__init__(name, source, region, is_sky_managed,
|
|
4706
4097
|
sync_on_reconstruction, _bucket_sub_path)
|
|
4098
|
+
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
|
4099
|
+
|
|
4100
|
+
def _validate_bucket_expr(self, bucket_expr: str):
|
|
4101
|
+
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
|
|
4102
|
+
if not re.match(pattern, bucket_expr):
|
|
4103
|
+
raise ValueError(
|
|
4104
|
+
'The format for the bucket portion is <bucket>@<region> '
|
|
4105
|
+
'when specify a region with a bucket.')
|
|
4707
4106
|
|
|
4708
4107
|
def _validate(self):
|
|
4709
4108
|
if self.source is not None and isinstance(self.source, str):
|
|
4710
|
-
if self.source.startswith('
|
|
4711
|
-
assert self.name == data_utils.
|
|
4712
|
-
'
|
|
4713
|
-
' same as
|
|
4714
|
-
elif
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
assert data_utils.verify_gcs_bucket(self.name), (
|
|
4719
|
-
f'Source specified as {self.source}, a GCS bucket. ',
|
|
4720
|
-
'GCS Bucket should exist.')
|
|
4721
|
-
elif data_utils.is_az_container_endpoint(self.source):
|
|
4722
|
-
storage_account_name, container_name, _ = (
|
|
4723
|
-
data_utils.split_az_path(self.source))
|
|
4724
|
-
assert self.name == container_name, (
|
|
4725
|
-
'Azure bucket is specified as path, the name should be '
|
|
4726
|
-
'the same as Azure bucket.')
|
|
4727
|
-
assert data_utils.verify_az_bucket(
|
|
4728
|
-
storage_account_name, self.name), (
|
|
4729
|
-
f'Source specified as {self.source}, an Azure bucket. '
|
|
4730
|
-
'Azure bucket should exist.')
|
|
4731
|
-
elif self.source.startswith('r2://'):
|
|
4732
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
4733
|
-
'R2 Bucket is specified as path, the name should be '
|
|
4734
|
-
'the same as R2 bucket.')
|
|
4735
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
4736
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
4737
|
-
'R2 Bucket should exist.')
|
|
4738
|
-
elif self.source.startswith('nebius://'):
|
|
4739
|
-
assert self.name == data_utils.split_nebius_path(
|
|
4740
|
-
self.source)[0], (
|
|
4741
|
-
'Nebius Object Storage is specified as path, the name '
|
|
4742
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
4743
|
-
elif self.source.startswith('cos://'):
|
|
4744
|
-
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
4745
|
-
'COS Bucket is specified as path, the name should be '
|
|
4746
|
-
'the same as COS bucket.')
|
|
4747
|
-
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
|
4748
|
-
f'Source specified as {self.source}, a COS bucket. ',
|
|
4749
|
-
'COS Bucket should exist.')
|
|
4750
|
-
elif self.source.startswith('oci://'):
|
|
4109
|
+
if self.source.startswith('oci://'):
|
|
4110
|
+
assert self.name == data_utils.split_oci_path(self.source)[0], (
|
|
4111
|
+
'OCI Bucket is specified as path, the name should be '
|
|
4112
|
+
'the same as OCI bucket.')
|
|
4113
|
+
elif not re.search(r'^\w+://', self.source):
|
|
4114
|
+
# Treat it as local path.
|
|
4115
|
+
pass
|
|
4116
|
+
else:
|
|
4751
4117
|
raise NotImplementedError(
|
|
4752
|
-
'Moving data from
|
|
4753
|
-
# Validate name
|
|
4754
|
-
self.name = S3Store.validate_name(self.name)
|
|
4118
|
+
f'Moving data from {self.source} to OCI is not supported.')
|
|
4755
4119
|
|
|
4120
|
+
# Validate name
|
|
4121
|
+
self.name = self.validate_name(self.name)
|
|
4756
4122
|
# Check if the storage is enabled
|
|
4757
|
-
if not _is_storage_cloud_enabled(str(clouds.
|
|
4123
|
+
if not _is_storage_cloud_enabled(str(clouds.OCI())):
|
|
4758
4124
|
with ux_utils.print_exception_no_traceback():
|
|
4759
|
-
raise exceptions.ResourcesUnavailableError(
|
|
4760
|
-
'Storage \'store:
|
|
4761
|
-
'
|
|
4762
|
-
'
|
|
4763
|
-
'https://
|
|
4764
|
-
|
|
4125
|
+
raise exceptions.ResourcesUnavailableError(
|
|
4126
|
+
'Storage \'store: oci\' specified, but ' \
|
|
4127
|
+
'OCI access is disabled. To fix, enable '\
|
|
4128
|
+
'OCI by running `sky check`. '\
|
|
4129
|
+
'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
|
4130
|
+
)
|
|
4131
|
+
|
|
4132
|
+
@classmethod
|
|
4133
|
+
def validate_name(cls, name) -> str:
|
|
4134
|
+
"""Validates the name of the OCI store.
|
|
4135
|
+
|
|
4136
|
+
Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
|
|
4137
|
+
"""
|
|
4138
|
+
|
|
4139
|
+
def _raise_no_traceback_name_error(err_str):
|
|
4140
|
+
with ux_utils.print_exception_no_traceback():
|
|
4141
|
+
raise exceptions.StorageNameError(err_str)
|
|
4142
|
+
|
|
4143
|
+
if name is not None and isinstance(name, str):
|
|
4144
|
+
# Check for overall length
|
|
4145
|
+
if not 1 <= len(name) <= 256:
|
|
4146
|
+
_raise_no_traceback_name_error(
|
|
4147
|
+
f'Invalid store name: name {name} must contain 1-256 '
|
|
4148
|
+
'characters.')
|
|
4149
|
+
|
|
4150
|
+
# Check for valid characters and start/end with a number or letter
|
|
4151
|
+
pattern = r'^[A-Za-z0-9-._]+$'
|
|
4152
|
+
if not re.match(pattern, name):
|
|
4153
|
+
_raise_no_traceback_name_error(
|
|
4154
|
+
f'Invalid store name: name {name} can only contain '
|
|
4155
|
+
'upper or lower case letters, numeric characters, hyphens '
|
|
4156
|
+
'(-), underscores (_), and dots (.). Spaces are not '
|
|
4157
|
+
'allowed. Names must start and end with a number or '
|
|
4158
|
+
'letter.')
|
|
4159
|
+
else:
|
|
4160
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
|
4161
|
+
return name
|
|
4765
4162
|
|
|
4766
4163
|
def initialize(self):
|
|
4767
|
-
"""Initializes the
|
|
4164
|
+
"""Initializes the OCI store object on the cloud.
|
|
4768
4165
|
|
|
4769
4166
|
Initialization involves fetching bucket if exists, or creating it if
|
|
4770
4167
|
it does not.
|
|
@@ -4774,7 +4171,21 @@ class NebiusStore(AbstractStore):
|
|
|
4774
4171
|
StorageBucketGetError: If fetching existing bucket fails
|
|
4775
4172
|
StorageInitError: If general initialization fails.
|
|
4776
4173
|
"""
|
|
4777
|
-
|
|
4174
|
+
# pylint: disable=import-outside-toplevel
|
|
4175
|
+
from sky.clouds.utils import oci_utils
|
|
4176
|
+
from sky.provision.oci.query_utils import query_helper
|
|
4177
|
+
|
|
4178
|
+
self.oci_config_file = oci.get_config_file()
|
|
4179
|
+
self.config_profile = oci_utils.oci_config.get_profile()
|
|
4180
|
+
|
|
4181
|
+
## pylint: disable=line-too-long
|
|
4182
|
+
# What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
|
|
4183
|
+
self.compartment = query_helper.find_compartment(self.region)
|
|
4184
|
+
self.client = oci.get_object_storage_client(region=self.region,
|
|
4185
|
+
profile=self.config_profile)
|
|
4186
|
+
self.namespace = self.client.get_namespace(
|
|
4187
|
+
compartment_id=oci.get_oci_config()['tenancy']).data
|
|
4188
|
+
|
|
4778
4189
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
4779
4190
|
if self.is_sky_managed is None:
|
|
4780
4191
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -4794,20 +4205,12 @@ class NebiusStore(AbstractStore):
|
|
|
4794
4205
|
"""
|
|
4795
4206
|
try:
|
|
4796
4207
|
if isinstance(self.source, list):
|
|
4797
|
-
self.
|
|
4208
|
+
self.batch_oci_rsync(self.source, create_dirs=True)
|
|
4798
4209
|
elif self.source is not None:
|
|
4799
|
-
if self.source.startswith('
|
|
4210
|
+
if self.source.startswith('oci://'):
|
|
4800
4211
|
pass
|
|
4801
|
-
elif self.source.startswith('s3://'):
|
|
4802
|
-
self._transfer_to_nebius()
|
|
4803
|
-
elif self.source.startswith('gs://'):
|
|
4804
|
-
self._transfer_to_nebius()
|
|
4805
|
-
elif self.source.startswith('r2://'):
|
|
4806
|
-
self._transfer_to_nebius()
|
|
4807
|
-
elif self.source.startswith('oci://'):
|
|
4808
|
-
self._transfer_to_nebius()
|
|
4809
4212
|
else:
|
|
4810
|
-
self.
|
|
4213
|
+
self.batch_oci_rsync([self.source])
|
|
4811
4214
|
except exceptions.StorageUploadError:
|
|
4812
4215
|
raise
|
|
4813
4216
|
except Exception as e:
|
|
@@ -4815,45 +4218,25 @@ class NebiusStore(AbstractStore):
|
|
|
4815
4218
|
f'Upload failed for store {self.name}') from e
|
|
4816
4219
|
|
|
4817
4220
|
def delete(self) -> None:
|
|
4818
|
-
|
|
4819
|
-
return self._delete_sub_path()
|
|
4820
|
-
|
|
4821
|
-
deleted_by_skypilot = self._delete_nebius_bucket(self.name)
|
|
4221
|
+
deleted_by_skypilot = self._delete_oci_bucket(self.name)
|
|
4822
4222
|
if deleted_by_skypilot:
|
|
4823
|
-
msg_str = f'Deleted
|
|
4223
|
+
msg_str = f'Deleted OCI bucket {self.name}.'
|
|
4824
4224
|
else:
|
|
4825
|
-
msg_str = (f'
|
|
4225
|
+
msg_str = (f'OCI bucket {self.name} may have been deleted '
|
|
4826
4226
|
f'externally. Removing from local state.')
|
|
4827
4227
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4828
4228
|
f'{colorama.Style.RESET_ALL}')
|
|
4829
4229
|
|
|
4830
|
-
def _delete_sub_path(self) -> None:
|
|
4831
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
4832
|
-
deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
|
|
4833
|
-
self.name, self._bucket_sub_path)
|
|
4834
|
-
if deleted_by_skypilot:
|
|
4835
|
-
msg_str = (f'Removed objects from S3 bucket '
|
|
4836
|
-
f'{self.name}/{self._bucket_sub_path}.')
|
|
4837
|
-
else:
|
|
4838
|
-
msg_str = (f'Failed to remove objects from S3 bucket '
|
|
4839
|
-
f'{self.name}/{self._bucket_sub_path}.')
|
|
4840
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4841
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4842
|
-
|
|
4843
4230
|
def get_handle(self) -> StorageHandle:
|
|
4844
|
-
return
|
|
4231
|
+
return self.client.get_bucket(namespace_name=self.namespace,
|
|
4232
|
+
bucket_name=self.name).data
|
|
4845
4233
|
|
|
4846
|
-
def
|
|
4234
|
+
def batch_oci_rsync(self,
|
|
4847
4235
|
source_path_list: List[Path],
|
|
4848
4236
|
create_dirs: bool = False) -> None:
|
|
4849
|
-
"""Invokes
|
|
4850
|
-
|
|
4851
|
-
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
|
4852
|
-
increase parallelism, modify max_concurrent_requests in your aws config
|
|
4853
|
-
file (Default path: ~/.aws/config).
|
|
4237
|
+
"""Invokes oci sync to batch upload a list of local paths to Bucket
|
|
4854
4238
|
|
|
4855
|
-
|
|
4856
|
-
multiple commands to be run in parallel.
|
|
4239
|
+
Use OCI bulk operation to batch process the file upload
|
|
4857
4240
|
|
|
4858
4241
|
Args:
|
|
4859
4242
|
source_path_list: List of paths to local files or directories
|
|
@@ -4863,34 +4246,45 @@ class NebiusStore(AbstractStore):
|
|
|
4863
4246
|
set to True, the directory is created in the bucket root and
|
|
4864
4247
|
contents are uploaded to it.
|
|
4865
4248
|
"""
|
|
4866
|
-
sub_path = (f'
|
|
4249
|
+
sub_path = (f'{self._bucket_sub_path}/'
|
|
4867
4250
|
if self._bucket_sub_path else '')
|
|
4868
4251
|
|
|
4252
|
+
@oci.with_oci_env
|
|
4869
4253
|
def get_file_sync_command(base_dir_path, file_names):
|
|
4870
|
-
includes = ' '.join(
|
|
4871
|
-
f'--include {
|
|
4872
|
-
|
|
4873
|
-
|
|
4874
|
-
|
|
4875
|
-
sync_command = (
|
|
4876
|
-
|
|
4877
|
-
|
|
4878
|
-
|
|
4254
|
+
includes = ' '.join(
|
|
4255
|
+
[f'--include "{file_name}"' for file_name in file_names])
|
|
4256
|
+
prefix_arg = ''
|
|
4257
|
+
if sub_path:
|
|
4258
|
+
prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
|
|
4259
|
+
sync_command = (
|
|
4260
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4261
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4262
|
+
f'--region {self.region} --src-dir "{base_dir_path}" '
|
|
4263
|
+
f'{prefix_arg} '
|
|
4264
|
+
f'{includes}')
|
|
4265
|
+
|
|
4879
4266
|
return sync_command
|
|
4880
4267
|
|
|
4268
|
+
@oci.with_oci_env
|
|
4881
4269
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
4882
|
-
|
|
4270
|
+
if dest_dir_name and not str(dest_dir_name).endswith('/'):
|
|
4271
|
+
dest_dir_name = f'{dest_dir_name}/'
|
|
4272
|
+
|
|
4883
4273
|
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
4884
4274
|
excluded_list.append('.git/*')
|
|
4885
4275
|
excludes = ' '.join([
|
|
4886
4276
|
f'--exclude {shlex.quote(file_name)}'
|
|
4887
4277
|
for file_name in excluded_list
|
|
4888
4278
|
])
|
|
4889
|
-
|
|
4890
|
-
|
|
4891
|
-
|
|
4892
|
-
|
|
4893
|
-
|
|
4279
|
+
|
|
4280
|
+
# we exclude .git directory from the sync
|
|
4281
|
+
sync_command = (
|
|
4282
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4283
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4284
|
+
f'--region {self.region} '
|
|
4285
|
+
f'--object-prefix "{sub_path}{dest_dir_name}" '
|
|
4286
|
+
f'--src-dir "{src_dir_path}" {excludes}')
|
|
4287
|
+
|
|
4894
4288
|
return sync_command
|
|
4895
4289
|
|
|
4896
4290
|
# Generate message for upload
|
|
@@ -4901,210 +4295,469 @@ class NebiusStore(AbstractStore):
|
|
|
4901
4295
|
|
|
4902
4296
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4903
4297
|
_STORAGE_LOG_FILE_NAME)
|
|
4904
|
-
sync_path = f'{source_message} ->
|
|
4298
|
+
sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
|
|
4905
4299
|
with rich_utils.safe_status(
|
|
4906
4300
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4907
4301
|
log_path=log_path)):
|
|
4908
4302
|
data_utils.parallel_upload(
|
|
4909
|
-
source_path_list,
|
|
4910
|
-
get_file_sync_command,
|
|
4911
|
-
get_dir_sync_command,
|
|
4912
|
-
log_path,
|
|
4913
|
-
self.name,
|
|
4914
|
-
self._ACCESS_DENIED_MESSAGE,
|
|
4303
|
+
source_path_list=source_path_list,
|
|
4304
|
+
filesync_command_generator=get_file_sync_command,
|
|
4305
|
+
dirsync_command_generator=get_dir_sync_command,
|
|
4306
|
+
log_path=log_path,
|
|
4307
|
+
bucket_name=self.name,
|
|
4308
|
+
access_denied_message=self._ACCESS_DENIED_MESSAGE,
|
|
4915
4309
|
create_dirs=create_dirs,
|
|
4916
|
-
max_concurrent_uploads=
|
|
4917
|
-
logger.info(
|
|
4918
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4919
|
-
log_path))
|
|
4310
|
+
max_concurrent_uploads=1)
|
|
4920
4311
|
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
data_transfer.gcs_to_nebius(self.name, self.name)
|
|
4925
|
-
elif self.source.startswith('r2://'):
|
|
4926
|
-
data_transfer.r2_to_nebius(self.name, self.name)
|
|
4927
|
-
elif self.source.startswith('s3://'):
|
|
4928
|
-
data_transfer.s3_to_nebius(self.name, self.name)
|
|
4312
|
+
logger.info(
|
|
4313
|
+
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4314
|
+
log_path))
|
|
4929
4315
|
|
|
4930
4316
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4931
|
-
"""Obtains the
|
|
4317
|
+
"""Obtains the OCI bucket.
|
|
4318
|
+
If the bucket exists, this method will connect to the bucket.
|
|
4932
4319
|
|
|
4933
|
-
If the bucket exists, this method will return the bucket.
|
|
4934
4320
|
If the bucket does not exist, there are three cases:
|
|
4935
|
-
1) Raise an error if the bucket source starts with
|
|
4321
|
+
1) Raise an error if the bucket source starts with oci://
|
|
4936
4322
|
2) Return None if bucket has been externally deleted and
|
|
4937
4323
|
sync_on_reconstruction is False
|
|
4938
4324
|
3) Create and return a new bucket otherwise
|
|
4939
4325
|
|
|
4326
|
+
Return tuple (Bucket, Boolean): The first item is the bucket
|
|
4327
|
+
json payload from the OCI API call, the second item indicates
|
|
4328
|
+
if this is a new created bucket(True) or an existing bucket(False).
|
|
4329
|
+
|
|
4940
4330
|
Raises:
|
|
4941
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
4942
|
-
mounted without specifying storage source.
|
|
4943
4331
|
StorageBucketCreateError: If creating the bucket fails
|
|
4944
4332
|
StorageBucketGetError: If fetching a bucket fails
|
|
4945
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
4946
|
-
attempted to be fetched while reconstructing the storage for
|
|
4947
|
-
'sky storage delete' or 'sky start'
|
|
4948
4333
|
"""
|
|
4949
|
-
nebius_s = nebius.resource('s3')
|
|
4950
|
-
bucket = nebius_s.Bucket(self.name)
|
|
4951
4334
|
try:
|
|
4952
|
-
|
|
4953
|
-
|
|
4954
|
-
|
|
4955
|
-
# accessible.
|
|
4956
|
-
self.client.head_bucket(Bucket=self.name)
|
|
4957
|
-
self._validate_existing_bucket()
|
|
4335
|
+
get_bucket_response = self.client.get_bucket(
|
|
4336
|
+
namespace_name=self.namespace, bucket_name=self.name)
|
|
4337
|
+
bucket = get_bucket_response.data
|
|
4958
4338
|
return bucket, False
|
|
4959
|
-
except
|
|
4960
|
-
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
|
|
4964
|
-
|
|
4965
|
-
|
|
4339
|
+
except oci.service_exception() as e:
|
|
4340
|
+
if e.status == 404: # Not Found
|
|
4341
|
+
if isinstance(self.source,
|
|
4342
|
+
str) and self.source.startswith('oci://'):
|
|
4343
|
+
with ux_utils.print_exception_no_traceback():
|
|
4344
|
+
raise exceptions.StorageBucketGetError(
|
|
4345
|
+
'Attempted to connect to a non-existent bucket: '
|
|
4346
|
+
f'{self.source}') from e
|
|
4347
|
+
else:
|
|
4348
|
+
# If bucket cannot be found (i.e., does not exist), it is
|
|
4349
|
+
# to be created by Sky. However, creation is skipped if
|
|
4350
|
+
# Store object is being reconstructed for deletion.
|
|
4351
|
+
if self.sync_on_reconstruction:
|
|
4352
|
+
bucket = self._create_oci_bucket(self.name)
|
|
4353
|
+
return bucket, True
|
|
4354
|
+
else:
|
|
4355
|
+
return None, False
|
|
4356
|
+
elif e.status == 401: # Unauthorized
|
|
4357
|
+
# AccessDenied error for buckets that are private and not
|
|
4358
|
+
# owned by user.
|
|
4359
|
+
command = (
|
|
4360
|
+
f'oci os object list --namespace-name {self.namespace} '
|
|
4361
|
+
f'--bucket-name {self.name}')
|
|
4966
4362
|
with ux_utils.print_exception_no_traceback():
|
|
4967
4363
|
raise exceptions.StorageBucketGetError(
|
|
4968
4364
|
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4969
4365
|
f' To debug, consider running `{command}`.') from e
|
|
4366
|
+
else:
|
|
4367
|
+
# Unknown / unexpected error happened. This might happen when
|
|
4368
|
+
# Object storage service itself functions not normal (e.g.
|
|
4369
|
+
# maintainance event causes internal server error or request
|
|
4370
|
+
# timeout, etc).
|
|
4371
|
+
with ux_utils.print_exception_no_traceback():
|
|
4372
|
+
raise exceptions.StorageBucketGetError(
|
|
4373
|
+
f'Failed to connect to OCI bucket {self.name}') from e
|
|
4970
4374
|
|
|
4971
|
-
|
|
4972
|
-
|
|
4973
|
-
raise exceptions.StorageBucketGetError(
|
|
4974
|
-
'Attempted to use a non-existent bucket as a source: '
|
|
4975
|
-
f'{self.source}. Consider using `aws s3 ls '
|
|
4976
|
-
f's3://{self.name} '
|
|
4977
|
-
f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
|
|
4375
|
+
def mount_command(self, mount_path: str) -> str:
|
|
4376
|
+
"""Returns the command to mount the bucket to the mount_path.
|
|
4978
4377
|
|
|
4979
|
-
|
|
4980
|
-
|
|
4981
|
-
|
|
4982
|
-
|
|
4983
|
-
|
|
4984
|
-
|
|
4985
|
-
|
|
4986
|
-
|
|
4987
|
-
|
|
4988
|
-
|
|
4989
|
-
|
|
4990
|
-
|
|
4991
|
-
|
|
4992
|
-
|
|
4378
|
+
Uses Rclone to mount the bucket.
|
|
4379
|
+
|
|
4380
|
+
Args:
|
|
4381
|
+
mount_path: str; Path to mount the bucket to.
|
|
4382
|
+
"""
|
|
4383
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4384
|
+
mount_cmd = mounting_utils.get_oci_mount_cmd(
|
|
4385
|
+
mount_path=mount_path,
|
|
4386
|
+
store_name=self.name,
|
|
4387
|
+
region=str(self.region),
|
|
4388
|
+
namespace=self.namespace,
|
|
4389
|
+
compartment=self.bucket.compartment_id,
|
|
4390
|
+
config_file=self.oci_config_file,
|
|
4391
|
+
config_profile=self.config_profile)
|
|
4392
|
+
version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
|
|
4393
|
+
|
|
4394
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4395
|
+
mount_cmd, version_check_cmd)
|
|
4993
4396
|
|
|
4994
4397
|
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4995
|
-
"""Downloads file from remote to local on
|
|
4996
|
-
using the boto3 API
|
|
4398
|
+
"""Downloads file from remote to local on OCI bucket
|
|
4997
4399
|
|
|
4998
4400
|
Args:
|
|
4999
|
-
remote_path: str; Remote path on
|
|
4401
|
+
remote_path: str; Remote path on OCI bucket
|
|
5000
4402
|
local_path: str; Local path on user's device
|
|
5001
4403
|
"""
|
|
5002
|
-
self.
|
|
4404
|
+
if remote_path.startswith(f'/{self.name}'):
|
|
4405
|
+
# If the remote path is /bucket_name, we need to
|
|
4406
|
+
# remove the leading /
|
|
4407
|
+
remote_path = remote_path.lstrip('/')
|
|
5003
4408
|
|
|
5004
|
-
|
|
5005
|
-
|
|
4409
|
+
filename = os.path.basename(remote_path)
|
|
4410
|
+
if not local_path.endswith(filename):
|
|
4411
|
+
local_path = os.path.join(local_path, filename)
|
|
4412
|
+
|
|
4413
|
+
@oci.with_oci_env
|
|
4414
|
+
def get_file_download_command(remote_path, local_path):
|
|
4415
|
+
download_command = (f'oci os object get --bucket-name {self.name} '
|
|
4416
|
+
f'--namespace-name {self.namespace} '
|
|
4417
|
+
f'--region {self.region} --name {remote_path} '
|
|
4418
|
+
f'--file {local_path}')
|
|
5006
4419
|
|
|
5007
|
-
|
|
4420
|
+
return download_command
|
|
5008
4421
|
|
|
5009
|
-
|
|
5010
|
-
|
|
5011
|
-
|
|
5012
|
-
|
|
5013
|
-
|
|
5014
|
-
|
|
5015
|
-
|
|
5016
|
-
|
|
5017
|
-
|
|
5018
|
-
|
|
5019
|
-
|
|
5020
|
-
|
|
5021
|
-
|
|
4422
|
+
download_command = get_file_download_command(remote_path, local_path)
|
|
4423
|
+
|
|
4424
|
+
try:
|
|
4425
|
+
with rich_utils.safe_status(
|
|
4426
|
+
f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
|
|
4427
|
+
):
|
|
4428
|
+
subprocess.check_output(download_command,
|
|
4429
|
+
stderr=subprocess.STDOUT,
|
|
4430
|
+
shell=True)
|
|
4431
|
+
except subprocess.CalledProcessError as e:
|
|
4432
|
+
logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
|
|
4433
|
+
f'Detail errors: {e.output}')
|
|
4434
|
+
with ux_utils.print_exception_no_traceback():
|
|
4435
|
+
raise exceptions.StorageBucketDeleteError(
|
|
4436
|
+
f'Failed download file {self.name}:{remote_path}.') from e
|
|
5022
4437
|
|
|
5023
|
-
def
|
|
5024
|
-
"""Creates
|
|
4438
|
+
def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
|
|
4439
|
+
"""Creates OCI bucket with specific name in specific region
|
|
5025
4440
|
|
|
5026
4441
|
Args:
|
|
5027
4442
|
bucket_name: str; Name of bucket
|
|
5028
|
-
|
|
5029
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4443
|
+
region: str; Region name, e.g. us-central1, us-west1
|
|
5030
4444
|
"""
|
|
5031
|
-
|
|
4445
|
+
logger.debug(f'_create_oci_bucket: {bucket_name}')
|
|
5032
4446
|
try:
|
|
5033
|
-
|
|
5034
|
-
|
|
4447
|
+
create_bucket_response = self.client.create_bucket(
|
|
4448
|
+
namespace_name=self.namespace,
|
|
4449
|
+
create_bucket_details=oci.oci.object_storage.models.
|
|
4450
|
+
CreateBucketDetails(
|
|
4451
|
+
name=bucket_name,
|
|
4452
|
+
compartment_id=self.compartment,
|
|
4453
|
+
))
|
|
4454
|
+
bucket = create_bucket_response.data
|
|
4455
|
+
return bucket
|
|
4456
|
+
except oci.service_exception() as e:
|
|
5035
4457
|
with ux_utils.print_exception_no_traceback():
|
|
5036
4458
|
raise exceptions.StorageBucketCreateError(
|
|
5037
|
-
f'
|
|
5038
|
-
|
|
5039
|
-
|
|
4459
|
+
f'Failed to create OCI bucket: {self.name}') from e
|
|
4460
|
+
|
|
4461
|
+
def _delete_oci_bucket(self, bucket_name: str) -> bool:
|
|
4462
|
+
"""Deletes OCI bucket, including all objects in bucket
|
|
4463
|
+
|
|
4464
|
+
Args:
|
|
4465
|
+
bucket_name: str; Name of bucket
|
|
4466
|
+
|
|
4467
|
+
Returns:
|
|
4468
|
+
bool; True if bucket was deleted, False if it was deleted externally.
|
|
4469
|
+
"""
|
|
4470
|
+
logger.debug(f'_delete_oci_bucket: {bucket_name}')
|
|
4471
|
+
|
|
4472
|
+
@oci.with_oci_env
|
|
4473
|
+
def get_bucket_delete_command(bucket_name):
|
|
4474
|
+
remove_command = (f'oci os bucket delete --bucket-name '
|
|
4475
|
+
f'--region {self.region} '
|
|
4476
|
+
f'{bucket_name} --empty --force')
|
|
4477
|
+
|
|
4478
|
+
return remove_command
|
|
4479
|
+
|
|
4480
|
+
remove_command = get_bucket_delete_command(bucket_name)
|
|
5040
4481
|
|
|
5041
|
-
def _execute_nebius_remove_command(self, command: str, bucket_name: str,
|
|
5042
|
-
hint_operating: str,
|
|
5043
|
-
hint_failed: str) -> bool:
|
|
5044
4482
|
try:
|
|
5045
4483
|
with rich_utils.safe_status(
|
|
5046
|
-
|
|
5047
|
-
subprocess.check_output(
|
|
4484
|
+
f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
|
|
4485
|
+
subprocess.check_output(remove_command.split(' '),
|
|
5048
4486
|
stderr=subprocess.STDOUT)
|
|
5049
4487
|
except subprocess.CalledProcessError as e:
|
|
5050
|
-
if '
|
|
4488
|
+
if 'BucketNotFound' in e.output.decode('utf-8'):
|
|
5051
4489
|
logger.debug(
|
|
5052
4490
|
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
|
5053
4491
|
bucket_name=bucket_name))
|
|
5054
4492
|
return False
|
|
5055
4493
|
else:
|
|
4494
|
+
logger.error(e.output)
|
|
5056
4495
|
with ux_utils.print_exception_no_traceback():
|
|
5057
4496
|
raise exceptions.StorageBucketDeleteError(
|
|
5058
|
-
f'{
|
|
5059
|
-
f'Detailed error: {e.output}')
|
|
4497
|
+
f'Failed to delete OCI bucket {bucket_name}.')
|
|
5060
4498
|
return True
|
|
5061
4499
|
|
|
5062
|
-
def _delete_nebius_bucket(self, bucket_name: str) -> bool:
|
|
5063
|
-
"""Deletes S3 bucket, including all objects in bucket
|
|
5064
4500
|
|
|
5065
|
-
|
|
5066
|
-
|
|
4501
|
+
@register_s3_compatible_store
|
|
4502
|
+
class S3Store(S3CompatibleStore):
|
|
4503
|
+
"""S3Store inherits from S3CompatibleStore and represents the backend
|
|
4504
|
+
for S3 buckets.
|
|
4505
|
+
"""
|
|
5067
4506
|
|
|
5068
|
-
|
|
5069
|
-
|
|
4507
|
+
_DEFAULT_REGION = 'us-east-1'
|
|
4508
|
+
_CUSTOM_ENDPOINT_REGIONS = [
|
|
4509
|
+
'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
|
|
4510
|
+
'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
|
|
4511
|
+
'il-central-1'
|
|
4512
|
+
]
|
|
5070
4513
|
|
|
5071
|
-
|
|
5072
|
-
|
|
5073
|
-
|
|
5074
|
-
|
|
5075
|
-
|
|
5076
|
-
|
|
5077
|
-
|
|
5078
|
-
#
|
|
5079
|
-
#
|
|
5080
|
-
#
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
|
|
5084
|
-
|
|
5085
|
-
|
|
5086
|
-
|
|
5087
|
-
|
|
5088
|
-
|
|
5089
|
-
return False
|
|
4514
|
+
def __init__(self,
|
|
4515
|
+
name: str,
|
|
4516
|
+
source: str,
|
|
4517
|
+
region: Optional[str] = None,
|
|
4518
|
+
is_sky_managed: Optional[bool] = None,
|
|
4519
|
+
sync_on_reconstruction: bool = True,
|
|
4520
|
+
_bucket_sub_path: Optional[str] = None):
|
|
4521
|
+
# TODO(romilb): This is purely a stopgap fix for
|
|
4522
|
+
# https://github.com/skypilot-org/skypilot/issues/3405
|
|
4523
|
+
# We should eventually make all opt-in regions also work for S3 by
|
|
4524
|
+
# passing the right endpoint flags.
|
|
4525
|
+
if region in self._CUSTOM_ENDPOINT_REGIONS:
|
|
4526
|
+
logger.warning('AWS opt-in regions are not supported for S3. '
|
|
4527
|
+
f'Falling back to default region '
|
|
4528
|
+
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
|
4529
|
+
region = self._DEFAULT_REGION
|
|
4530
|
+
super().__init__(name, source, region, is_sky_managed,
|
|
4531
|
+
sync_on_reconstruction, _bucket_sub_path)
|
|
5090
4532
|
|
|
5091
|
-
|
|
5092
|
-
|
|
5093
|
-
|
|
5094
|
-
|
|
5095
|
-
|
|
5096
|
-
|
|
5097
|
-
|
|
5098
|
-
|
|
4533
|
+
@classmethod
|
|
4534
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4535
|
+
"""Return the configuration for AWS S3."""
|
|
4536
|
+
return S3CompatibleConfig(
|
|
4537
|
+
store_type='S3',
|
|
4538
|
+
url_prefix='s3://',
|
|
4539
|
+
client_factory=data_utils.create_s3_client,
|
|
4540
|
+
resource_factory=lambda name: aws.resource('s3').Bucket(name),
|
|
4541
|
+
split_path=data_utils.split_s3_path,
|
|
4542
|
+
verify_bucket=data_utils.verify_s3_bucket,
|
|
4543
|
+
cloud_name=str(clouds.AWS()),
|
|
4544
|
+
default_region=cls._DEFAULT_REGION,
|
|
4545
|
+
mount_cmd_factory=mounting_utils.get_s3_mount_cmd,
|
|
4546
|
+
)
|
|
4547
|
+
|
|
4548
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4549
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4550
|
+
rclone_profile_name = (
|
|
4551
|
+
data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
|
|
4552
|
+
rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
|
|
4553
|
+
rclone_profile_name=rclone_profile_name)
|
|
4554
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4555
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4556
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4557
|
+
mount_cached_cmd)
|
|
4558
|
+
|
|
4559
|
+
|
|
4560
|
+
@register_s3_compatible_store
|
|
4561
|
+
class R2Store(S3CompatibleStore):
|
|
4562
|
+
"""R2Store inherits from S3CompatibleStore and represents the backend
|
|
4563
|
+
for R2 buckets.
|
|
4564
|
+
"""
|
|
4565
|
+
|
|
4566
|
+
def __init__(self,
|
|
4567
|
+
name: str,
|
|
4568
|
+
source: str,
|
|
4569
|
+
region: Optional[str] = 'auto',
|
|
4570
|
+
is_sky_managed: Optional[bool] = None,
|
|
4571
|
+
sync_on_reconstruction: bool = True,
|
|
4572
|
+
_bucket_sub_path: Optional[str] = None):
|
|
4573
|
+
super().__init__(name, source, region, is_sky_managed,
|
|
4574
|
+
sync_on_reconstruction, _bucket_sub_path)
|
|
4575
|
+
|
|
4576
|
+
@classmethod
|
|
4577
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4578
|
+
"""Return the configuration for Cloudflare R2."""
|
|
4579
|
+
return S3CompatibleConfig(
|
|
4580
|
+
store_type='R2',
|
|
4581
|
+
url_prefix='r2://',
|
|
4582
|
+
client_factory=lambda region: data_utils.create_r2_client(region or
|
|
4583
|
+
'auto'),
|
|
4584
|
+
resource_factory=lambda name: cloudflare.resource('s3').Bucket(name
|
|
4585
|
+
),
|
|
4586
|
+
split_path=data_utils.split_r2_path,
|
|
4587
|
+
verify_bucket=data_utils.verify_r2_bucket,
|
|
4588
|
+
credentials_file=cloudflare.R2_CREDENTIALS_PATH,
|
|
4589
|
+
aws_profile=cloudflare.R2_PROFILE_NAME,
|
|
4590
|
+
get_endpoint_url=lambda: cloudflare.create_endpoint(), # pylint: disable=unnecessary-lambda
|
|
4591
|
+
extra_cli_args=['--checksum-algorithm', 'CRC32'], # R2 specific
|
|
4592
|
+
cloud_name=cloudflare.NAME,
|
|
4593
|
+
default_region='auto',
|
|
4594
|
+
mount_cmd_factory=cls._get_r2_mount_cmd,
|
|
4595
|
+
)
|
|
4596
|
+
|
|
4597
|
+
@classmethod
|
|
4598
|
+
def _get_r2_mount_cmd(cls, bucket_name: str, mount_path: str,
|
|
4599
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
4600
|
+
"""Factory method for R2 mount command."""
|
|
4601
|
+
endpoint_url = cloudflare.create_endpoint()
|
|
4602
|
+
return mounting_utils.get_r2_mount_cmd(cloudflare.R2_CREDENTIALS_PATH,
|
|
4603
|
+
cloudflare.R2_PROFILE_NAME,
|
|
4604
|
+
endpoint_url, bucket_name,
|
|
4605
|
+
mount_path, bucket_sub_path)
|
|
4606
|
+
|
|
4607
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4608
|
+
"""R2-specific cached mount implementation using rclone."""
|
|
4609
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4610
|
+
rclone_profile_name = (
|
|
4611
|
+
data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
|
|
4612
|
+
rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
|
|
4613
|
+
rclone_profile_name=rclone_profile_name)
|
|
4614
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4615
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4616
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4617
|
+
mount_cached_cmd)
|
|
4618
|
+
|
|
4619
|
+
|
|
4620
|
+
@register_s3_compatible_store
|
|
4621
|
+
class NebiusStore(S3CompatibleStore):
|
|
4622
|
+
"""NebiusStore inherits from S3CompatibleStore and represents the backend
|
|
4623
|
+
for Nebius Object Storage buckets.
|
|
4624
|
+
"""
|
|
4625
|
+
|
|
4626
|
+
@classmethod
|
|
4627
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4628
|
+
"""Return the configuration for Nebius Object Storage."""
|
|
4629
|
+
return S3CompatibleConfig(
|
|
4630
|
+
store_type='NEBIUS',
|
|
4631
|
+
url_prefix='nebius://',
|
|
4632
|
+
client_factory=lambda region: data_utils.create_nebius_client(),
|
|
4633
|
+
resource_factory=lambda name: nebius.resource('s3').Bucket(name),
|
|
4634
|
+
split_path=data_utils.split_nebius_path,
|
|
4635
|
+
verify_bucket=data_utils.verify_nebius_bucket,
|
|
4636
|
+
aws_profile=nebius.NEBIUS_PROFILE_NAME,
|
|
4637
|
+
cloud_name=str(clouds.Nebius()),
|
|
4638
|
+
mount_cmd_factory=cls._get_nebius_mount_cmd,
|
|
4639
|
+
)
|
|
4640
|
+
|
|
4641
|
+
@classmethod
|
|
4642
|
+
def _get_nebius_mount_cmd(cls, bucket_name: str, mount_path: str,
|
|
4643
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
4644
|
+
"""Factory method for Nebius mount command."""
|
|
4645
|
+
# We need to get the endpoint URL, but since this is a static method,
|
|
4646
|
+
# we'll need to create a client to get it
|
|
4647
|
+
client = data_utils.create_nebius_client()
|
|
4648
|
+
endpoint_url = client.meta.endpoint_url
|
|
4649
|
+
return mounting_utils.get_nebius_mount_cmd(nebius.NEBIUS_PROFILE_NAME,
|
|
4650
|
+
bucket_name, endpoint_url,
|
|
4651
|
+
mount_path, bucket_sub_path)
|
|
4652
|
+
|
|
4653
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4654
|
+
"""Nebius-specific cached mount implementation using rclone."""
|
|
4655
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4656
|
+
rclone_profile_name = (
|
|
4657
|
+
data_utils.Rclone.RcloneStores.NEBIUS.get_profile_name(self.name))
|
|
4658
|
+
rclone_config = data_utils.Rclone.RcloneStores.NEBIUS.get_config(
|
|
4659
|
+
rclone_profile_name=rclone_profile_name)
|
|
4660
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4661
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4662
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4663
|
+
mount_cached_cmd)
|
|
4664
|
+
|
|
4665
|
+
|
|
4666
|
+
@register_s3_compatible_store
|
|
4667
|
+
class CoreWeaveStore(S3CompatibleStore):
|
|
4668
|
+
"""CoreWeaveStore inherits from S3CompatibleStore and represents the backend
|
|
4669
|
+
for CoreWeave Object Storage buckets.
|
|
4670
|
+
"""
|
|
4671
|
+
|
|
4672
|
+
@classmethod
|
|
4673
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4674
|
+
"""Return the configuration for CoreWeave Object Storage."""
|
|
4675
|
+
return S3CompatibleConfig(
|
|
4676
|
+
store_type='COREWEAVE',
|
|
4677
|
+
url_prefix='cw://',
|
|
4678
|
+
client_factory=lambda region: data_utils.create_coreweave_client(),
|
|
4679
|
+
resource_factory=lambda name: coreweave.resource('s3').Bucket(name),
|
|
4680
|
+
split_path=data_utils.split_coreweave_path,
|
|
4681
|
+
verify_bucket=data_utils.verify_coreweave_bucket,
|
|
4682
|
+
aws_profile=coreweave.COREWEAVE_PROFILE_NAME,
|
|
4683
|
+
get_endpoint_url=coreweave.get_endpoint,
|
|
4684
|
+
credentials_file=coreweave.COREWEAVE_CREDENTIALS_PATH,
|
|
4685
|
+
config_file=coreweave.COREWEAVE_CONFIG_PATH,
|
|
4686
|
+
cloud_name=coreweave.NAME,
|
|
4687
|
+
default_region=coreweave.DEFAULT_REGION,
|
|
4688
|
+
mount_cmd_factory=cls._get_coreweave_mount_cmd,
|
|
4689
|
+
)
|
|
4690
|
+
|
|
4691
|
+
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4692
|
+
"""Get or create bucket using CoreWeave's S3 API"""
|
|
4693
|
+
bucket = self.config.resource_factory(self.name)
|
|
4694
|
+
|
|
4695
|
+
# Use our custom bucket verification instead of head_bucket
|
|
4696
|
+
if data_utils.verify_coreweave_bucket(self.name):
|
|
4697
|
+
self._validate_existing_bucket()
|
|
4698
|
+
return bucket, False
|
|
4699
|
+
|
|
4700
|
+
# TODO(hailong): Enable the bucket creation for CoreWeave
|
|
4701
|
+
# Disable this to avoid waiting too long until the following
|
|
4702
|
+
# issue is resolved:
|
|
4703
|
+
# https://github.com/skypilot-org/skypilot/issues/7736
|
|
4704
|
+
raise exceptions.StorageBucketGetError(
|
|
4705
|
+
f'Bucket {self.name!r} does not exist. CoreWeave buckets can take'
|
|
4706
|
+
' a long time to become accessible after creation, so SkyPilot'
|
|
4707
|
+
' does not create them automatically. Please create the bucket'
|
|
4708
|
+
' manually in CoreWeave and wait for it to be accessible before'
|
|
4709
|
+
' using it.')
|
|
4710
|
+
|
|
4711
|
+
# # Check if this is a source with URL prefix (existing bucket case)
|
|
4712
|
+
# if isinstance(self.source, str) and self.source.startswith(
|
|
4713
|
+
# self.config.url_prefix):
|
|
4714
|
+
# with ux_utils.print_exception_no_traceback():
|
|
4715
|
+
# raise exceptions.StorageBucketGetError(
|
|
4716
|
+
# 'Attempted to use a non-existent bucket as a source: '
|
|
4717
|
+
# f'{self.source}.')
|
|
4718
|
+
|
|
4719
|
+
# # If bucket cannot be found, create it if needed
|
|
4720
|
+
# if self.sync_on_reconstruction:
|
|
4721
|
+
# bucket = self._create_bucket(self.name)
|
|
4722
|
+
# return bucket, True
|
|
4723
|
+
# else:
|
|
4724
|
+
# raise exceptions.StorageExternalDeletionError(
|
|
4725
|
+
# 'Attempted to fetch a non-existent bucket: '
|
|
4726
|
+
# f'{self.name}')
|
|
4727
|
+
|
|
4728
|
+
@classmethod
|
|
4729
|
+
def _get_coreweave_mount_cmd(cls, bucket_name: str, mount_path: str,
|
|
4730
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
4731
|
+
"""Factory method for CoreWeave mount command."""
|
|
4732
|
+
endpoint_url = coreweave.get_endpoint()
|
|
4733
|
+
return mounting_utils.get_coreweave_mount_cmd(
|
|
4734
|
+
coreweave.COREWEAVE_CREDENTIALS_PATH,
|
|
4735
|
+
coreweave.COREWEAVE_PROFILE_NAME, bucket_name, endpoint_url,
|
|
4736
|
+
mount_path, bucket_sub_path)
|
|
4737
|
+
|
|
4738
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4739
|
+
"""CoreWeave-specific cached mount implementation using rclone."""
|
|
4740
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4741
|
+
rclone_profile_name = (
|
|
4742
|
+
data_utils.Rclone.RcloneStores.COREWEAVE.get_profile_name(
|
|
4743
|
+
self.name))
|
|
4744
|
+
rclone_config = data_utils.Rclone.RcloneStores.COREWEAVE.get_config(
|
|
4745
|
+
rclone_profile_name=rclone_profile_name)
|
|
4746
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4747
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4748
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4749
|
+
mount_cached_cmd)
|
|
5099
4750
|
|
|
5100
|
-
def
|
|
5101
|
-
|
|
5102
|
-
|
|
5103
|
-
|
|
5104
|
-
|
|
5105
|
-
|
|
5106
|
-
|
|
5107
|
-
|
|
5108
|
-
|
|
5109
|
-
|
|
5110
|
-
|
|
4751
|
+
def _create_bucket(self, bucket_name: str) -> StorageHandle:
|
|
4752
|
+
"""Create bucket using S3 API with timing handling for CoreWeave."""
|
|
4753
|
+
result = super()._create_bucket(bucket_name)
|
|
4754
|
+
# Ensure bucket is created
|
|
4755
|
+
# The newly created bucket ever takes about 18min to be accessible,
|
|
4756
|
+
# here we just retry for 36 times (5s * 36 = 180s) to avoid waiting
|
|
4757
|
+
# too long
|
|
4758
|
+
# TODO(hailong): Update the logic here when the following
|
|
4759
|
+
# issue is resolved:
|
|
4760
|
+
# https://github.com/skypilot-org/skypilot/issues/7736
|
|
4761
|
+
data_utils.verify_coreweave_bucket(bucket_name, retry=36)
|
|
4762
|
+
|
|
4763
|
+
return result
|