skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/clouds/runpod.py
CHANGED
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
""" RunPod Cloud. """
|
|
2
2
|
|
|
3
|
+
from importlib import util as import_lib_util
|
|
4
|
+
import os
|
|
3
5
|
import typing
|
|
4
6
|
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
5
7
|
|
|
8
|
+
from sky import catalog
|
|
6
9
|
from sky import clouds
|
|
7
|
-
from sky.clouds import service_catalog
|
|
8
10
|
from sky.utils import registry
|
|
9
11
|
from sky.utils import resources_utils
|
|
10
12
|
|
|
11
13
|
if typing.TYPE_CHECKING:
|
|
12
14
|
from sky import resources as resources_lib
|
|
15
|
+
from sky.utils import volume as volume_lib
|
|
13
16
|
|
|
14
|
-
|
|
15
|
-
'config.toml',
|
|
16
|
-
]
|
|
17
|
+
_CREDENTIAL_FILE = 'config.toml'
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
@registry.CLOUD_REGISTRY.register
|
|
@@ -30,14 +31,20 @@ class RunPod(clouds.Cloud):
|
|
|
30
31
|
'are non-trivial on RunPod.'),
|
|
31
32
|
clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
|
|
32
33
|
('Customizing disk tier is not supported yet on RunPod.'),
|
|
34
|
+
clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
|
|
35
|
+
('Custom network tier is not supported yet on RunPod.'),
|
|
33
36
|
clouds.CloudImplementationFeatures.STORAGE_MOUNTING:
|
|
34
37
|
('Mounting object stores is not supported on RunPod. To read data '
|
|
35
38
|
'from object stores on RunPod, use `mode: COPY` to copy the data '
|
|
36
39
|
'to local disk.'),
|
|
37
40
|
clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
|
|
38
41
|
('High availability controllers are not supported on RunPod.'),
|
|
42
|
+
clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
|
|
43
|
+
('Customized multiple network interfaces are not supported on '
|
|
44
|
+
'RunPod.'),
|
|
39
45
|
}
|
|
40
46
|
_MAX_CLUSTER_NAME_LEN_LIMIT = 120
|
|
47
|
+
_MAX_VOLUME_NAME_LEN_LIMIT = 30
|
|
41
48
|
_regions: List[clouds.Region] = []
|
|
42
49
|
|
|
43
50
|
PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
|
|
@@ -46,7 +53,9 @@ class RunPod(clouds.Cloud):
|
|
|
46
53
|
|
|
47
54
|
@classmethod
|
|
48
55
|
def _unsupported_features_for_resources(
|
|
49
|
-
cls,
|
|
56
|
+
cls,
|
|
57
|
+
resources: 'resources_lib.Resources',
|
|
58
|
+
region: Optional[str] = None,
|
|
50
59
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
51
60
|
"""The features not supported based on the resources provided.
|
|
52
61
|
|
|
@@ -65,12 +74,17 @@ class RunPod(clouds.Cloud):
|
|
|
65
74
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
66
75
|
|
|
67
76
|
@classmethod
|
|
68
|
-
def regions_with_offering(
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
77
|
+
def regions_with_offering(
|
|
78
|
+
cls,
|
|
79
|
+
instance_type: str,
|
|
80
|
+
accelerators: Optional[Dict[str, int]],
|
|
81
|
+
use_spot: bool,
|
|
82
|
+
region: Optional[str],
|
|
83
|
+
zone: Optional[str],
|
|
84
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
85
|
+
) -> List[clouds.Region]:
|
|
72
86
|
del accelerators # unused
|
|
73
|
-
regions =
|
|
87
|
+
regions = catalog.get_region_zones_for_instance_type(
|
|
74
88
|
instance_type, use_spot, 'runpod')
|
|
75
89
|
|
|
76
90
|
if region is not None:
|
|
@@ -88,8 +102,8 @@ class RunPod(clouds.Cloud):
|
|
|
88
102
|
cls,
|
|
89
103
|
instance_type: str,
|
|
90
104
|
) -> Tuple[Optional[float], Optional[float]]:
|
|
91
|
-
return
|
|
92
|
-
|
|
105
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
|
106
|
+
clouds='runpod')
|
|
93
107
|
|
|
94
108
|
@classmethod
|
|
95
109
|
def zones_provision_loop(
|
|
@@ -116,11 +130,11 @@ class RunPod(clouds.Cloud):
|
|
|
116
130
|
use_spot: bool,
|
|
117
131
|
region: Optional[str] = None,
|
|
118
132
|
zone: Optional[str] = None) -> float:
|
|
119
|
-
return
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
133
|
+
return catalog.get_hourly_cost(instance_type,
|
|
134
|
+
use_spot=use_spot,
|
|
135
|
+
region=region,
|
|
136
|
+
zone=zone,
|
|
137
|
+
clouds='runpod')
|
|
124
138
|
|
|
125
139
|
def accelerators_to_hourly_cost(self,
|
|
126
140
|
accelerators: Dict[str, int],
|
|
@@ -135,56 +149,65 @@ class RunPod(clouds.Cloud):
|
|
|
135
149
|
return 0.0
|
|
136
150
|
|
|
137
151
|
@classmethod
|
|
138
|
-
def get_default_instance_type(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
152
|
+
def get_default_instance_type(cls,
|
|
153
|
+
cpus: Optional[str] = None,
|
|
154
|
+
memory: Optional[str] = None,
|
|
155
|
+
disk_tier: Optional[
|
|
156
|
+
resources_utils.DiskTier] = None,
|
|
157
|
+
region: Optional[str] = None,
|
|
158
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
144
159
|
"""Returns the default instance type for RunPod."""
|
|
145
|
-
return
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
160
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
|
161
|
+
memory=memory,
|
|
162
|
+
disk_tier=disk_tier,
|
|
163
|
+
region=region,
|
|
164
|
+
zone=zone,
|
|
165
|
+
clouds='runpod')
|
|
149
166
|
|
|
150
167
|
@classmethod
|
|
151
168
|
def get_accelerators_from_instance_type(
|
|
152
169
|
cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
|
153
|
-
return
|
|
154
|
-
|
|
170
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
|
171
|
+
clouds='runpod')
|
|
155
172
|
|
|
156
173
|
@classmethod
|
|
157
174
|
def get_zone_shell_cmd(cls) -> Optional[str]:
|
|
158
175
|
return None
|
|
159
176
|
|
|
160
177
|
def make_deploy_resources_variables(
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
178
|
+
self,
|
|
179
|
+
resources: 'resources_lib.Resources',
|
|
180
|
+
cluster_name: resources_utils.ClusterName,
|
|
181
|
+
region: 'clouds.Region',
|
|
182
|
+
zones: Optional[List['clouds.Zone']],
|
|
183
|
+
num_nodes: int,
|
|
184
|
+
dryrun: bool = False,
|
|
185
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
|
186
|
+
) -> Dict[str, Optional[Union[str, bool]]]:
|
|
168
187
|
del dryrun, cluster_name # unused
|
|
169
188
|
assert zones is not None, (region, zones)
|
|
170
189
|
|
|
190
|
+
if volume_mounts and len(volume_mounts) > 1:
|
|
191
|
+
raise ValueError(f'RunPod only supports one network volume mount, '
|
|
192
|
+
f'but {len(volume_mounts)} are specified.')
|
|
193
|
+
|
|
171
194
|
zone_names = [zone.name for zone in zones]
|
|
172
195
|
|
|
173
|
-
|
|
174
|
-
acc_dict = self.get_accelerators_from_instance_type(
|
|
196
|
+
resources = resources.assert_launchable()
|
|
197
|
+
acc_dict = self.get_accelerators_from_instance_type(
|
|
198
|
+
resources.instance_type)
|
|
175
199
|
custom_resources = resources_utils.make_ray_custom_resources_str(
|
|
176
200
|
acc_dict)
|
|
177
201
|
|
|
178
|
-
if
|
|
179
|
-
image_id = 'runpod/base:
|
|
180
|
-
elif
|
|
181
|
-
image_id =
|
|
202
|
+
if resources.image_id is None:
|
|
203
|
+
image_id: Optional[str] = 'runpod/base:1.0.2-ubuntu2204'
|
|
204
|
+
elif resources.extract_docker_image() is not None:
|
|
205
|
+
image_id = resources.extract_docker_image()
|
|
182
206
|
else:
|
|
183
|
-
image_id =
|
|
207
|
+
image_id = resources.image_id[resources.region]
|
|
184
208
|
|
|
185
209
|
instance_type = resources.instance_type
|
|
186
210
|
use_spot = resources.use_spot
|
|
187
|
-
|
|
188
211
|
hourly_cost = self.instance_type_to_hourly_cost(
|
|
189
212
|
instance_type=instance_type, use_spot=use_spot)
|
|
190
213
|
|
|
@@ -232,7 +255,9 @@ class RunPod(clouds.Cloud):
|
|
|
232
255
|
default_instance_type = RunPod.get_default_instance_type(
|
|
233
256
|
cpus=resources.cpus,
|
|
234
257
|
memory=resources.memory,
|
|
235
|
-
disk_tier=resources.disk_tier
|
|
258
|
+
disk_tier=resources.disk_tier,
|
|
259
|
+
region=resources.region,
|
|
260
|
+
zone=resources.zone)
|
|
236
261
|
if default_instance_type is None:
|
|
237
262
|
# TODO: Add hints to all return values in this method to help
|
|
238
263
|
# users understand why the resources are not launchable.
|
|
@@ -243,15 +268,15 @@ class RunPod(clouds.Cloud):
|
|
|
243
268
|
|
|
244
269
|
assert len(accelerators) == 1, resources
|
|
245
270
|
acc, acc_count = list(accelerators.items())[0]
|
|
246
|
-
(instance_list,
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
271
|
+
(instance_list,
|
|
272
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
|
273
|
+
acc,
|
|
274
|
+
acc_count,
|
|
275
|
+
use_spot=resources.use_spot,
|
|
276
|
+
cpus=resources.cpus,
|
|
277
|
+
region=resources.region,
|
|
278
|
+
zone=resources.zone,
|
|
279
|
+
clouds='runpod')
|
|
255
280
|
if instance_list is None:
|
|
256
281
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
|
257
282
|
None)
|
|
@@ -259,37 +284,92 @@ class RunPod(clouds.Cloud):
|
|
|
259
284
|
fuzzy_candidate_list, None)
|
|
260
285
|
|
|
261
286
|
@classmethod
|
|
262
|
-
def _check_compute_credentials(
|
|
287
|
+
def _check_compute_credentials(
|
|
288
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
|
263
289
|
"""Checks if the user has access credentials to
|
|
264
290
|
RunPod's compute service."""
|
|
265
291
|
return cls._check_credentials()
|
|
266
292
|
|
|
267
293
|
@classmethod
|
|
268
294
|
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
|
269
|
-
"""
|
|
295
|
+
"""Verify that the user has valid credentials for RunPod. """
|
|
296
|
+
dependency_error_msg = ('Failed to import runpod or TOML parser. '
|
|
297
|
+
'Install: pip install "skypilot[runpod]".')
|
|
270
298
|
try:
|
|
271
|
-
|
|
272
|
-
|
|
299
|
+
runpod_spec = import_lib_util.find_spec('runpod')
|
|
300
|
+
if runpod_spec is None:
|
|
301
|
+
return False, dependency_error_msg
|
|
302
|
+
# Prefer stdlib tomllib (Python 3.11+); fallback to tomli
|
|
303
|
+
tomllib_spec = import_lib_util.find_spec('tomllib')
|
|
304
|
+
tomli_spec = import_lib_util.find_spec('tomli')
|
|
305
|
+
if tomllib_spec is None and tomli_spec is None:
|
|
306
|
+
return False, dependency_error_msg
|
|
307
|
+
except ValueError:
|
|
308
|
+
# docstring of importlib_util.find_spec:
|
|
309
|
+
# First, sys.modules is checked to see if the module was alread
|
|
310
|
+
# imported.
|
|
311
|
+
# If so, then sys.modules[name].__spec__ is returned.
|
|
312
|
+
# If that happens to be set to None, then ValueError is raised.
|
|
313
|
+
return False, dependency_error_msg
|
|
314
|
+
|
|
315
|
+
valid, error = cls._check_runpod_credentials()
|
|
316
|
+
if not valid:
|
|
317
|
+
return False, (
|
|
318
|
+
f'{error} \n' # First line is indented by 4 spaces
|
|
319
|
+
' Credentials can be set up by running: \n'
|
|
320
|
+
f' $ pip install runpod \n'
|
|
321
|
+
f' $ runpod config\n'
|
|
322
|
+
' For more information, see https://docs.skypilot.co/en/latest/getting-started/installation.html#runpod' # pylint: disable=line-too-long
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
return True, None
|
|
273
326
|
|
|
274
|
-
|
|
327
|
+
@classmethod
|
|
328
|
+
def _check_runpod_credentials(cls, profile: str = 'default'):
|
|
329
|
+
"""Checks if the credentials file exists and is valid."""
|
|
330
|
+
credential_file = os.path.expanduser(f'~/.runpod/{_CREDENTIAL_FILE}')
|
|
331
|
+
if not os.path.exists(credential_file):
|
|
332
|
+
return False, '~/.runpod/config.toml does not exist.'
|
|
333
|
+
|
|
334
|
+
# We don't need to import TOML parser if config.toml does not exist.
|
|
335
|
+
# When needed, prefer stdlib tomllib (py>=3.11); otherwise use tomli.
|
|
336
|
+
# TODO(andy): remove this fallback after dropping Python 3.10 support.
|
|
337
|
+
try:
|
|
338
|
+
try:
|
|
339
|
+
import tomllib as toml # pylint: disable=import-outside-toplevel
|
|
340
|
+
except ModuleNotFoundError: # py<3.11
|
|
341
|
+
import tomli as toml # pylint: disable=import-outside-toplevel
|
|
342
|
+
except ModuleNotFoundError:
|
|
343
|
+
# Should never happen. We already installed proper dependencies for
|
|
344
|
+
# different Python versions in setup_files/dependencies.py.
|
|
345
|
+
return False, (
|
|
346
|
+
'~/.runpod/config.toml exists but no TOML parser is available. '
|
|
347
|
+
'Install tomli for Python < 3.11: pip install tomli.')
|
|
348
|
+
|
|
349
|
+
# Check for default api_key
|
|
350
|
+
try:
|
|
351
|
+
with open(credential_file, 'rb') as cred_file:
|
|
352
|
+
config = toml.load(cred_file)
|
|
353
|
+
|
|
354
|
+
if profile not in config:
|
|
275
355
|
return False, (
|
|
276
|
-
f'
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
356
|
+
f'~/.runpod/config.toml is missing {profile} profile.')
|
|
357
|
+
|
|
358
|
+
if 'api_key' not in config[profile]:
|
|
359
|
+
return (
|
|
360
|
+
False,
|
|
361
|
+
'~/.runpod/config.toml is missing '
|
|
362
|
+
f'api_key for {profile} profile.',
|
|
281
363
|
)
|
|
282
364
|
|
|
283
|
-
|
|
365
|
+
except (TypeError, ValueError):
|
|
366
|
+
return False, '~/.runpod/config.toml is not a valid TOML file.'
|
|
284
367
|
|
|
285
|
-
|
|
286
|
-
return False, ('Failed to import runpod. '
|
|
287
|
-
'To install, run: pip install skypilot[runpod]')
|
|
368
|
+
return True, None
|
|
288
369
|
|
|
289
370
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
|
290
371
|
return {
|
|
291
|
-
f'~/.runpod/{
|
|
292
|
-
for filename in _CREDENTIAL_FILES
|
|
372
|
+
f'~/.runpod/{_CREDENTIAL_FILE}': f'~/.runpod/{_CREDENTIAL_FILE}'
|
|
293
373
|
}
|
|
294
374
|
|
|
295
375
|
@classmethod
|
|
@@ -299,15 +379,25 @@ class RunPod(clouds.Cloud):
|
|
|
299
379
|
return None
|
|
300
380
|
|
|
301
381
|
def instance_type_exists(self, instance_type: str) -> bool:
|
|
302
|
-
return
|
|
382
|
+
return catalog.instance_type_exists(instance_type, 'runpod')
|
|
303
383
|
|
|
304
384
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
|
305
|
-
return
|
|
306
|
-
zone,
|
|
307
|
-
clouds='runpod')
|
|
385
|
+
return catalog.validate_region_zone(region, zone, clouds='runpod')
|
|
308
386
|
|
|
309
387
|
@classmethod
|
|
310
388
|
def get_image_size(cls, image_id: str, region: Optional[str]) -> float:
|
|
311
389
|
# TODO: use 0.0 for now to allow all images. We should change this to
|
|
312
390
|
# return the docker image size.
|
|
313
391
|
return 0.0
|
|
392
|
+
|
|
393
|
+
@classmethod
|
|
394
|
+
def is_volume_name_valid(cls,
|
|
395
|
+
volume_name: str) -> Tuple[bool, Optional[str]]:
|
|
396
|
+
"""Validates that the volume name is valid for this cloud.
|
|
397
|
+
|
|
398
|
+
- must be <= 30 characters
|
|
399
|
+
"""
|
|
400
|
+
if len(volume_name) > cls._MAX_VOLUME_NAME_LEN_LIMIT:
|
|
401
|
+
return (False, f'Volume name exceeds the maximum length of '
|
|
402
|
+
f'{cls._MAX_VOLUME_NAME_LEN_LIMIT} characters.')
|
|
403
|
+
return True, None
|