skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/clouds/hyperbolic.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Hyperbolic Cloud provider implementation
|
|
2
|
+
for SkyPilot.
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import typing
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
7
|
+
|
|
8
|
+
from sky import catalog
|
|
9
|
+
from sky import clouds
|
|
10
|
+
from sky.utils import registry
|
|
11
|
+
from sky.utils import resources_utils
|
|
12
|
+
from sky.utils.resources_utils import DiskTier
|
|
13
|
+
|
|
14
|
+
if typing.TYPE_CHECKING:
|
|
15
|
+
from sky import resources as resources_lib
|
|
16
|
+
from sky.utils import volume as volume_lib
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@registry.CLOUD_REGISTRY.register
|
|
20
|
+
class Hyperbolic(clouds.Cloud):
|
|
21
|
+
"""Hyperbolic Cloud Provider."""
|
|
22
|
+
|
|
23
|
+
_REPR = 'Hyperbolic'
|
|
24
|
+
name = 'hyperbolic'
|
|
25
|
+
_MAX_CLUSTER_NAME_LEN_LIMIT = 120
|
|
26
|
+
API_KEY_PATH = os.path.expanduser('~/.hyperbolic/api_key')
|
|
27
|
+
|
|
28
|
+
_CLOUD_UNSUPPORTED_FEATURES = {
|
|
29
|
+
clouds.CloudImplementationFeatures.STOP: ('Stopping not supported.'),
|
|
30
|
+
clouds.CloudImplementationFeatures.MULTI_NODE:
|
|
31
|
+
('Multi-node not supported.'),
|
|
32
|
+
clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
|
|
33
|
+
('Custom disk tiers not supported.'),
|
|
34
|
+
clouds.CloudImplementationFeatures.STORAGE_MOUNTING:
|
|
35
|
+
('Storage mounting not supported.'),
|
|
36
|
+
clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
|
|
37
|
+
('High availability controllers not supported.'),
|
|
38
|
+
clouds.CloudImplementationFeatures.SPOT_INSTANCE:
|
|
39
|
+
('Spot instances not supported.'),
|
|
40
|
+
clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
|
|
41
|
+
('Disk cloning not supported.'),
|
|
42
|
+
clouds.CloudImplementationFeatures.DOCKER_IMAGE:
|
|
43
|
+
('Docker images not supported.'),
|
|
44
|
+
clouds.CloudImplementationFeatures.OPEN_PORTS:
|
|
45
|
+
('Opening ports not supported.'),
|
|
46
|
+
clouds.CloudImplementationFeatures.IMAGE_ID:
|
|
47
|
+
('Custom image IDs not supported.'),
|
|
48
|
+
clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
|
|
49
|
+
('Custom network tiers not supported.'),
|
|
50
|
+
clouds.CloudImplementationFeatures.HOST_CONTROLLERS:
|
|
51
|
+
('Host controllers not supported.'),
|
|
52
|
+
clouds.CloudImplementationFeatures.AUTO_TERMINATE:
|
|
53
|
+
('Auto-termination not supported.'),
|
|
54
|
+
clouds.CloudImplementationFeatures.AUTOSTOP:
|
|
55
|
+
('Auto-stop not supported.'),
|
|
56
|
+
clouds.CloudImplementationFeatures.AUTODOWN:
|
|
57
|
+
('Auto-down not supported.'),
|
|
58
|
+
clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
|
|
59
|
+
('Customized multiple network interfaces not supported.'),
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
|
|
63
|
+
STATUS_VERSION = clouds.StatusVersion.SKYPILOT
|
|
64
|
+
OPEN_PORTS_VERSION = clouds.OpenPortsVersion.LAUNCH_ONLY
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def _unsupported_features_for_resources(
|
|
68
|
+
cls,
|
|
69
|
+
resources: 'resources_lib.Resources',
|
|
70
|
+
region: Optional[str] = None,
|
|
71
|
+
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
72
|
+
del resources
|
|
73
|
+
return cls._CLOUD_UNSUPPORTED_FEATURES
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def _max_cluster_name_length(cls) -> Optional[int]:
|
|
77
|
+
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
78
|
+
|
|
79
|
+
def instance_type_exists(self, instance_type: str) -> bool:
|
|
80
|
+
return catalog.instance_type_exists(instance_type, 'hyperbolic')
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def regions_with_offering(
|
|
84
|
+
cls,
|
|
85
|
+
instance_type: str,
|
|
86
|
+
accelerators: Optional[Dict[str, int]],
|
|
87
|
+
use_spot: bool,
|
|
88
|
+
region: Optional[str],
|
|
89
|
+
zone: Optional[str],
|
|
90
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
91
|
+
) -> List[clouds.Region]:
|
|
92
|
+
assert zone is None, 'Hyperbolic does not support zones.'
|
|
93
|
+
del accelerators, zone # unused
|
|
94
|
+
|
|
95
|
+
regions = catalog.get_region_zones_for_instance_type(
|
|
96
|
+
instance_type, use_spot, 'hyperbolic')
|
|
97
|
+
if region is not None:
|
|
98
|
+
regions = [r for r in regions if r.name == region]
|
|
99
|
+
return regions
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def get_vcpus_mem_from_instance_type(
|
|
103
|
+
cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
|
104
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
|
105
|
+
clouds='hyperbolic')
|
|
106
|
+
|
|
107
|
+
def instance_type_to_hourly_cost(self,
|
|
108
|
+
instance_type: str,
|
|
109
|
+
use_spot: bool,
|
|
110
|
+
region: Optional[str] = None,
|
|
111
|
+
zone: Optional[str] = None) -> float:
|
|
112
|
+
return catalog.get_hourly_cost(instance_type,
|
|
113
|
+
use_spot=use_spot,
|
|
114
|
+
region=region,
|
|
115
|
+
zone=zone,
|
|
116
|
+
clouds='hyperbolic')
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def get_default_instance_type(cls,
|
|
120
|
+
cpus: Optional[str] = None,
|
|
121
|
+
memory: Optional[str] = None,
|
|
122
|
+
disk_tier: Optional[DiskTier] = None,
|
|
123
|
+
region: Optional[str] = None,
|
|
124
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
125
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
|
126
|
+
memory=memory,
|
|
127
|
+
disk_tier=disk_tier,
|
|
128
|
+
region=region,
|
|
129
|
+
zone=zone,
|
|
130
|
+
clouds='hyperbolic')
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def get_accelerators_from_instance_type(
|
|
134
|
+
cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
|
135
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
|
136
|
+
clouds='hyperbolic')
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
|
140
|
+
if os.path.exists(cls.API_KEY_PATH):
|
|
141
|
+
return True, None
|
|
142
|
+
return False, f'API key not found at {cls.API_KEY_PATH}'
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
|
|
146
|
+
return cls._check_credentials()
|
|
147
|
+
|
|
148
|
+
@classmethod
|
|
149
|
+
def get_credential_file_mounts(cls) -> Dict[str, str]:
|
|
150
|
+
if os.path.exists(cls.API_KEY_PATH):
|
|
151
|
+
return {cls.API_KEY_PATH: '~/.hyperbolic/api_key'}
|
|
152
|
+
return {}
|
|
153
|
+
|
|
154
|
+
def __repr__(self):
|
|
155
|
+
return self._REPR
|
|
156
|
+
|
|
157
|
+
def _get_feasible_launchable_resources(
|
|
158
|
+
self, resources: 'resources_lib.Resources'
|
|
159
|
+
) -> 'resources_utils.FeasibleResources':
|
|
160
|
+
# Check if the instance type exists in the catalog
|
|
161
|
+
if resources.instance_type is not None:
|
|
162
|
+
if catalog.instance_type_exists(resources.instance_type,
|
|
163
|
+
'hyperbolic'):
|
|
164
|
+
# Remove accelerators for launchable resources
|
|
165
|
+
resources_launch = resources.copy(accelerators=None)
|
|
166
|
+
return resources_utils.FeasibleResources([resources_launch], [],
|
|
167
|
+
None)
|
|
168
|
+
else:
|
|
169
|
+
raise ValueError(
|
|
170
|
+
f'Invalid instance type: {resources.instance_type}')
|
|
171
|
+
|
|
172
|
+
# If accelerators are specified
|
|
173
|
+
accelerators = resources.accelerators
|
|
174
|
+
if accelerators is not None:
|
|
175
|
+
assert len(accelerators) == 1, resources
|
|
176
|
+
acc, acc_count = list(accelerators.items())[0]
|
|
177
|
+
(instance_list,
|
|
178
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
|
179
|
+
acc,
|
|
180
|
+
acc_count,
|
|
181
|
+
use_spot=resources.use_spot,
|
|
182
|
+
cpus=resources.cpus,
|
|
183
|
+
memory=resources.memory,
|
|
184
|
+
region=resources.region,
|
|
185
|
+
zone=resources.zone,
|
|
186
|
+
clouds='hyperbolic')
|
|
187
|
+
if instance_list is None:
|
|
188
|
+
return resources_utils.FeasibleResources([],
|
|
189
|
+
fuzzy_candidate_list,
|
|
190
|
+
None)
|
|
191
|
+
|
|
192
|
+
def _make(instance_list):
|
|
193
|
+
resource_list = []
|
|
194
|
+
for instance_type in instance_list:
|
|
195
|
+
r = resources.copy(
|
|
196
|
+
cloud=self,
|
|
197
|
+
instance_type=instance_type,
|
|
198
|
+
accelerators=None,
|
|
199
|
+
cpus=None,
|
|
200
|
+
memory=None,
|
|
201
|
+
)
|
|
202
|
+
resource_list.append(r)
|
|
203
|
+
return resource_list
|
|
204
|
+
|
|
205
|
+
return resources_utils.FeasibleResources(_make(instance_list),
|
|
206
|
+
fuzzy_candidate_list, None)
|
|
207
|
+
|
|
208
|
+
# If nothing is specified, return a default instance type
|
|
209
|
+
default_instance_type = self.get_default_instance_type(
|
|
210
|
+
cpus=resources.cpus,
|
|
211
|
+
memory=resources.memory,
|
|
212
|
+
disk_tier=resources.disk_tier,
|
|
213
|
+
region=resources.region,
|
|
214
|
+
zone=resources.zone)
|
|
215
|
+
if default_instance_type is None:
|
|
216
|
+
return resources_utils.FeasibleResources([], [], None)
|
|
217
|
+
else:
|
|
218
|
+
r = resources.copy(
|
|
219
|
+
cloud=self,
|
|
220
|
+
instance_type=default_instance_type,
|
|
221
|
+
accelerators=None,
|
|
222
|
+
cpus=None,
|
|
223
|
+
memory=None,
|
|
224
|
+
)
|
|
225
|
+
return resources_utils.FeasibleResources([r], [], None)
|
|
226
|
+
|
|
227
|
+
def validate_region_zone(
|
|
228
|
+
self, region: Optional[str],
|
|
229
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
|
230
|
+
if zone is not None:
|
|
231
|
+
raise ValueError('Hyperbolic does not support zones.')
|
|
232
|
+
return catalog.validate_region_zone(region, zone, 'hyperbolic')
|
|
233
|
+
|
|
234
|
+
@classmethod
|
|
235
|
+
def regions(cls) -> List[clouds.Region]:
|
|
236
|
+
"""Returns the list of regions in Hyperbolic's catalog."""
|
|
237
|
+
return catalog.regions('hyperbolic')
|
|
238
|
+
|
|
239
|
+
@classmethod
|
|
240
|
+
def zones_provision_loop(cls,
|
|
241
|
+
*,
|
|
242
|
+
region: str,
|
|
243
|
+
num_nodes: int,
|
|
244
|
+
instance_type: str,
|
|
245
|
+
accelerators: Optional[Dict[str, int]] = None,
|
|
246
|
+
use_spot: bool = False):
|
|
247
|
+
yield None
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
def get_zone_shell_cmd(cls) -> Optional[str]:
|
|
251
|
+
return None
|
|
252
|
+
|
|
253
|
+
def get_egress_cost(self, num_gigabytes: float):
|
|
254
|
+
return 0.0
|
|
255
|
+
|
|
256
|
+
def accelerators_to_hourly_cost(self, accelerators: Dict[str, int],
|
|
257
|
+
use_spot: bool, region: Optional[str],
|
|
258
|
+
zone: Optional[str]) -> float:
|
|
259
|
+
return 0.0
|
|
260
|
+
|
|
261
|
+
def make_deploy_resources_variables(
|
|
262
|
+
self,
|
|
263
|
+
resources: 'resources_lib.Resources',
|
|
264
|
+
cluster_name: resources_utils.ClusterName,
|
|
265
|
+
region: 'clouds.Region',
|
|
266
|
+
zones: Optional[List['clouds.Zone']],
|
|
267
|
+
num_nodes: int,
|
|
268
|
+
dryrun: bool = False,
|
|
269
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
|
270
|
+
) -> Dict[str, Any]:
|
|
271
|
+
"""Returns a dict of variables for the deployment template."""
|
|
272
|
+
del dryrun, region, cluster_name # unused
|
|
273
|
+
assert zones is None, ('Hyperbolic does not support zones', zones)
|
|
274
|
+
|
|
275
|
+
resources = resources.assert_launchable()
|
|
276
|
+
# resources.accelerators is cleared but .instance_type encodes the info.
|
|
277
|
+
acc_dict = self.get_accelerators_from_instance_type(
|
|
278
|
+
resources.instance_type)
|
|
279
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
|
280
|
+
acc_dict)
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
'instance_type': resources.instance_type,
|
|
284
|
+
'custom_resources': custom_resources,
|
|
285
|
+
'num_nodes': 1, # Hyperbolic only supports single-node clusters
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
def cluster_name_in_hint(self, cluster_name_on_cloud: Optional[str],
|
|
289
|
+
cluster_name: str) -> bool:
|
|
290
|
+
"""Check if a node's name matches the cluster name pattern."""
|
|
291
|
+
if cluster_name_on_cloud is None:
|
|
292
|
+
return False
|
|
293
|
+
return cluster_name_on_cloud.startswith(cluster_name)
|
sky/clouds/ibm.py
CHANGED
|
@@ -5,11 +5,11 @@ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
|
5
5
|
|
|
6
6
|
import colorama
|
|
7
7
|
|
|
8
|
+
from sky import catalog
|
|
8
9
|
from sky import clouds
|
|
9
10
|
from sky import sky_logging
|
|
10
11
|
from sky.adaptors import ibm
|
|
11
12
|
from sky.adaptors.ibm import CREDENTIAL_FILE
|
|
12
|
-
from sky.clouds import service_catalog
|
|
13
13
|
from sky.utils import registry
|
|
14
14
|
from sky.utils import resources_utils
|
|
15
15
|
from sky.utils import status_lib
|
|
@@ -18,6 +18,7 @@ from sky.utils import ux_utils
|
|
|
18
18
|
if typing.TYPE_CHECKING:
|
|
19
19
|
# renaming to avoid shadowing variables
|
|
20
20
|
from sky import resources as resources_lib
|
|
21
|
+
from sky.utils import volume as volume_lib
|
|
21
22
|
|
|
22
23
|
logger = sky_logging.init_logger(__name__)
|
|
23
24
|
|
|
@@ -36,7 +37,9 @@ class IBM(clouds.Cloud):
|
|
|
36
37
|
|
|
37
38
|
@classmethod
|
|
38
39
|
def _unsupported_features_for_resources(
|
|
39
|
-
cls,
|
|
40
|
+
cls,
|
|
41
|
+
resources: 'resources_lib.Resources',
|
|
42
|
+
region: Optional[str] = None,
|
|
40
43
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
41
44
|
features = {
|
|
42
45
|
clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
|
|
@@ -52,6 +55,9 @@ class IBM(clouds.Cloud):
|
|
|
52
55
|
(f'Opening ports is currently not supported on {cls._REPR}.'),
|
|
53
56
|
clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
|
|
54
57
|
('High availability controllers are not supported on IBM.'),
|
|
58
|
+
clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
|
|
59
|
+
('Customized multiple network interfaces are not supported on '
|
|
60
|
+
f'{cls._REPR}.'),
|
|
55
61
|
}
|
|
56
62
|
if resources.use_spot:
|
|
57
63
|
features[clouds.CloudImplementationFeatures.STOP] = (
|
|
@@ -64,14 +70,19 @@ class IBM(clouds.Cloud):
|
|
|
64
70
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
65
71
|
|
|
66
72
|
@classmethod
|
|
67
|
-
def regions_with_offering(
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
def regions_with_offering(
|
|
74
|
+
cls,
|
|
75
|
+
instance_type: str,
|
|
76
|
+
accelerators: Optional[Dict[str, int]],
|
|
77
|
+
use_spot: bool,
|
|
78
|
+
region: Optional[str],
|
|
79
|
+
zone: Optional[str],
|
|
80
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
81
|
+
) -> List[clouds.Region]:
|
|
71
82
|
del accelerators # unused
|
|
72
83
|
if use_spot:
|
|
73
84
|
return []
|
|
74
|
-
regions =
|
|
85
|
+
regions = catalog.get_region_zones_for_instance_type(
|
|
75
86
|
instance_type, use_spot, 'ibm')
|
|
76
87
|
|
|
77
88
|
if region is not None:
|
|
@@ -131,11 +142,11 @@ class IBM(clouds.Cloud):
|
|
|
131
142
|
zone: Optional[str] = None) -> float:
|
|
132
143
|
# Currently doesn't support spot instances, hence use_spot set to False.
|
|
133
144
|
del use_spot
|
|
134
|
-
return
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
145
|
+
return catalog.get_hourly_cost(instance_type,
|
|
146
|
+
use_spot=False,
|
|
147
|
+
region=region,
|
|
148
|
+
zone=zone,
|
|
149
|
+
clouds='ibm')
|
|
139
150
|
|
|
140
151
|
def accelerators_to_hourly_cost(self,
|
|
141
152
|
accelerators: Dict[str, int],
|
|
@@ -175,7 +186,8 @@ class IBM(clouds.Cloud):
|
|
|
175
186
|
zones: Optional[List['clouds.Zone']],
|
|
176
187
|
num_nodes: int,
|
|
177
188
|
dryrun: bool = False,
|
|
178
|
-
|
|
189
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
|
190
|
+
) -> Dict[str, Any]:
|
|
179
191
|
"""Converts planned sky.Resources to cloud-specific resource variables.
|
|
180
192
|
|
|
181
193
|
These variables are used to fill the node type section (instance type,
|
|
@@ -204,30 +216,32 @@ class IBM(clouds.Cloud):
|
|
|
204
216
|
# clouds implementing 'zones_provision_loop()'
|
|
205
217
|
zone_names = [zone.name for zone in zones] # type: ignore[union-attr]
|
|
206
218
|
|
|
207
|
-
|
|
208
|
-
assert not
|
|
219
|
+
resources = resources.assert_launchable()
|
|
220
|
+
assert not resources.use_spot, \
|
|
209
221
|
'IBM does not currently support spot instances in this framework'
|
|
210
222
|
|
|
211
|
-
acc_dict = self.get_accelerators_from_instance_type(
|
|
223
|
+
acc_dict = self.get_accelerators_from_instance_type(
|
|
224
|
+
resources.instance_type)
|
|
212
225
|
custom_resources = resources_utils.make_ray_custom_resources_str(
|
|
213
226
|
acc_dict)
|
|
214
227
|
|
|
215
|
-
instance_resources = _get_profile_resources(
|
|
228
|
+
instance_resources = _get_profile_resources(resources.instance_type)
|
|
216
229
|
|
|
217
230
|
worker_instance_type = get_cred_file_field('worker_instance_type',
|
|
218
|
-
|
|
231
|
+
resources.instance_type)
|
|
219
232
|
worker_instance_resources = _get_profile_resources(worker_instance_type)
|
|
220
233
|
# r.image_id: {clouds.Region:image_id} - property of Resources class
|
|
221
|
-
image_id =
|
|
222
|
-
region.name] if
|
|
234
|
+
image_id = resources.image_id[
|
|
235
|
+
region.name] if resources.image_id else self.get_default_image(
|
|
236
|
+
region_name)
|
|
223
237
|
|
|
224
238
|
return {
|
|
225
|
-
'instance_type':
|
|
239
|
+
'instance_type': resources.instance_type,
|
|
226
240
|
'instance_resources': instance_resources,
|
|
227
241
|
'worker_instance_type': worker_instance_type,
|
|
228
242
|
'worker_instance_resources': worker_instance_resources,
|
|
229
243
|
'custom_resources': custom_resources,
|
|
230
|
-
'use_spot':
|
|
244
|
+
'use_spot': resources.use_spot,
|
|
231
245
|
'region': region_name,
|
|
232
246
|
'zones': ','.join(zone_names),
|
|
233
247
|
'image_id': image_id,
|
|
@@ -241,8 +255,8 @@ class IBM(clouds.Cloud):
|
|
|
241
255
|
cls,
|
|
242
256
|
instance_type: str,
|
|
243
257
|
) -> Tuple[Optional[float], Optional[float]]:
|
|
244
|
-
return
|
|
245
|
-
|
|
258
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
|
259
|
+
clouds='ibm')
|
|
246
260
|
|
|
247
261
|
@classmethod
|
|
248
262
|
def get_accelerators_from_instance_type(
|
|
@@ -250,20 +264,23 @@ class IBM(clouds.Cloud):
|
|
|
250
264
|
instance_type: str,
|
|
251
265
|
) -> Optional[Dict[str, Union[int, float]]]:
|
|
252
266
|
"""Returns {acc: acc_count} held by 'instance_type', if any."""
|
|
253
|
-
return
|
|
254
|
-
|
|
267
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
|
268
|
+
clouds='ibm')
|
|
255
269
|
|
|
256
270
|
@classmethod
|
|
257
271
|
def get_default_instance_type(
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
272
|
+
cls,
|
|
273
|
+
cpus: Optional[str] = None,
|
|
274
|
+
memory: Optional[str] = None,
|
|
275
|
+
disk_tier: Optional['resources_utils.DiskTier'] = None,
|
|
276
|
+
region: Optional[str] = None,
|
|
277
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
278
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
|
279
|
+
memory=memory,
|
|
280
|
+
disk_tier=disk_tier,
|
|
281
|
+
region=region,
|
|
282
|
+
zone=zone,
|
|
283
|
+
clouds='ibm')
|
|
267
284
|
|
|
268
285
|
def _get_feasible_launchable_resources(
|
|
269
286
|
self, resources: 'resources_lib.Resources'
|
|
@@ -298,7 +315,9 @@ class IBM(clouds.Cloud):
|
|
|
298
315
|
default_instance_type = IBM.get_default_instance_type(
|
|
299
316
|
cpus=resources.cpus,
|
|
300
317
|
memory=resources.memory,
|
|
301
|
-
disk_tier=resources.disk_tier
|
|
318
|
+
disk_tier=resources.disk_tier,
|
|
319
|
+
region=resources.region,
|
|
320
|
+
zone=resources.zone)
|
|
302
321
|
if default_instance_type is None:
|
|
303
322
|
return resources_utils.FeasibleResources([], [], None)
|
|
304
323
|
else:
|
|
@@ -307,15 +326,15 @@ class IBM(clouds.Cloud):
|
|
|
307
326
|
|
|
308
327
|
assert len(accelerators) == 1, resources
|
|
309
328
|
acc, acc_count = list(accelerators.items())[0]
|
|
310
|
-
(instance_list,
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
329
|
+
(instance_list,
|
|
330
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
|
331
|
+
acc,
|
|
332
|
+
acc_count,
|
|
333
|
+
cpus=resources.cpus,
|
|
334
|
+
memory=resources.memory,
|
|
335
|
+
region=resources.region,
|
|
336
|
+
zone=resources.zone,
|
|
337
|
+
clouds='ibm')
|
|
319
338
|
if instance_list is None:
|
|
320
339
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
|
321
340
|
None)
|
|
@@ -397,13 +416,15 @@ class IBM(clouds.Cloud):
|
|
|
397
416
|
return image_size
|
|
398
417
|
|
|
399
418
|
@classmethod
|
|
400
|
-
def _check_compute_credentials(
|
|
419
|
+
def _check_compute_credentials(
|
|
420
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
|
401
421
|
"""Checks if the user has access credentials to
|
|
402
422
|
IBM's compute service."""
|
|
403
423
|
return cls._check_credentials()
|
|
404
424
|
|
|
405
425
|
@classmethod
|
|
406
|
-
def _check_storage_credentials(
|
|
426
|
+
def _check_storage_credentials(
|
|
427
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
|
407
428
|
"""Checks if the user has access credentials to
|
|
408
429
|
IBM's storage service."""
|
|
409
430
|
# TODO(seungjin): Implement separate check for
|
|
@@ -458,11 +479,11 @@ class IBM(clouds.Cloud):
|
|
|
458
479
|
|
|
459
480
|
def instance_type_exists(self, instance_type):
|
|
460
481
|
"""Returns whether the instance type exists for this cloud."""
|
|
461
|
-
return
|
|
482
|
+
return catalog.instance_type_exists(instance_type, clouds='ibm')
|
|
462
483
|
|
|
463
484
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
|
464
485
|
"""Validates the region and zone."""
|
|
465
|
-
return
|
|
486
|
+
return catalog.validate_region_zone(region, zone, clouds='ibm')
|
|
466
487
|
|
|
467
488
|
@classmethod
|
|
468
489
|
def query_status(cls, name: str, tag_filters: Dict[str, str],
|