skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Seeweb service catalog.
|
|
2
|
+
|
|
3
|
+
This module loads the service catalog file and can be used to
|
|
4
|
+
query instance types and pricing information for Seeweb.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import typing
|
|
8
|
+
from typing import Dict, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from sky.catalog import common
|
|
13
|
+
from sky.utils import resources_utils
|
|
14
|
+
from sky.utils import ux_utils
|
|
15
|
+
|
|
16
|
+
if typing.TYPE_CHECKING:
|
|
17
|
+
from sky.clouds import cloud
|
|
18
|
+
|
|
19
|
+
_PULL_FREQUENCY_HOURS = 8
|
|
20
|
+
_df = common.read_catalog('seeweb/vms.csv',
|
|
21
|
+
pull_frequency_hours=_PULL_FREQUENCY_HOURS)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def instance_type_exists(instance_type: str) -> bool:
|
|
25
|
+
result = common.instance_type_exists_impl(_df, instance_type)
|
|
26
|
+
return result
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def validate_region_zone(
|
|
30
|
+
region: Optional[str],
|
|
31
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
|
32
|
+
if zone is not None:
|
|
33
|
+
with ux_utils.print_exception_no_traceback():
|
|
34
|
+
raise ValueError('Seeweb does not support zones.')
|
|
35
|
+
|
|
36
|
+
result = common.validate_region_zone_impl('Seeweb', _df, region, zone)
|
|
37
|
+
return result
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_hourly_cost(instance_type: str,
|
|
41
|
+
use_spot: bool = False,
|
|
42
|
+
region: Optional[str] = None,
|
|
43
|
+
zone: Optional[str] = None) -> float:
|
|
44
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
|
45
|
+
if zone is not None:
|
|
46
|
+
with ux_utils.print_exception_no_traceback():
|
|
47
|
+
raise ValueError('Seeweb does not support zones.')
|
|
48
|
+
|
|
49
|
+
result = common.get_hourly_cost_impl(_df, instance_type, use_spot, region,
|
|
50
|
+
zone)
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_vcpus_mem_from_instance_type(
|
|
55
|
+
instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
|
56
|
+
result = common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
57
|
+
return result
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
61
|
+
memory: Optional[str] = None,
|
|
62
|
+
disk_tier: Optional[
|
|
63
|
+
resources_utils.DiskTier] = None,
|
|
64
|
+
region: Optional[str] = None,
|
|
65
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
66
|
+
del disk_tier # unused
|
|
67
|
+
result = common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory,
|
|
68
|
+
region, zone)
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_accelerators_from_instance_type(
|
|
73
|
+
instance_type: str) -> Optional[Dict[str, int]]:
|
|
74
|
+
# Filter the dataframe for the specific instance type
|
|
75
|
+
df_filtered = _df[_df['InstanceType'] == instance_type]
|
|
76
|
+
if df_filtered.empty:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# Get the first row (all rows for same instance
|
|
80
|
+
# type should have same accelerator info)
|
|
81
|
+
row = df_filtered.iloc[0]
|
|
82
|
+
acc_name = row['AcceleratorName']
|
|
83
|
+
acc_count = row['AcceleratorCount']
|
|
84
|
+
|
|
85
|
+
# Check if the instance has accelerators
|
|
86
|
+
if pd.isna(acc_name) or pd.isna(
|
|
87
|
+
acc_count) or acc_name == '' or acc_count == '':
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
# Convert accelerator count to int/float
|
|
91
|
+
try:
|
|
92
|
+
if int(acc_count) == acc_count:
|
|
93
|
+
acc_count = int(acc_count)
|
|
94
|
+
else:
|
|
95
|
+
acc_count = float(acc_count)
|
|
96
|
+
except (ValueError, TypeError):
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
result = {acc_name: acc_count}
|
|
100
|
+
return result
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_instance_type_for_accelerator(
|
|
104
|
+
acc_name: str,
|
|
105
|
+
acc_count: int,
|
|
106
|
+
cpus: Optional[str] = None,
|
|
107
|
+
memory: Optional[str] = None,
|
|
108
|
+
use_spot: bool = False,
|
|
109
|
+
region: Optional[str] = None,
|
|
110
|
+
zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
|
|
111
|
+
"""Returns a list of instance types satisfying
|
|
112
|
+
the required count of accelerators."""
|
|
113
|
+
if zone is not None:
|
|
114
|
+
with ux_utils.print_exception_no_traceback():
|
|
115
|
+
raise ValueError('Seeweb does not support zones.')
|
|
116
|
+
|
|
117
|
+
result = common.get_instance_type_for_accelerator_impl(df=_df,
|
|
118
|
+
acc_name=acc_name,
|
|
119
|
+
acc_count=acc_count,
|
|
120
|
+
cpus=cpus,
|
|
121
|
+
memory=memory,
|
|
122
|
+
use_spot=use_spot,
|
|
123
|
+
region=region,
|
|
124
|
+
zone=zone)
|
|
125
|
+
return result
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def regions() -> List['cloud.Region']:
|
|
129
|
+
result = common.get_region_zones(_df, use_spot=False)
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_region_zones_for_instance_type(instance_type: str,
|
|
134
|
+
use_spot: bool = False
|
|
135
|
+
) -> List['cloud.Region']:
|
|
136
|
+
"""Returns a list of regions for a given instance type."""
|
|
137
|
+
# Filter the dataframe for the specific instance type
|
|
138
|
+
df_filtered = _df[_df['InstanceType'] == instance_type]
|
|
139
|
+
if df_filtered.empty:
|
|
140
|
+
return []
|
|
141
|
+
|
|
142
|
+
# Use common.get_region_zones() like all other providers
|
|
143
|
+
region_list = common.get_region_zones(df_filtered, use_spot)
|
|
144
|
+
|
|
145
|
+
# Default region: Frosinone (it-fr2)
|
|
146
|
+
# Other regions: Milano (it-mi2), Lugano (ch-lug1), Bulgaria (bg-sof1)
|
|
147
|
+
priority_regions = ['it-fr2']
|
|
148
|
+
prioritized_regions = []
|
|
149
|
+
other_regions = []
|
|
150
|
+
|
|
151
|
+
# First, add regions in priority order if they exist
|
|
152
|
+
for priority_region in priority_regions:
|
|
153
|
+
for region in region_list:
|
|
154
|
+
if region.name == priority_region:
|
|
155
|
+
prioritized_regions.append(region)
|
|
156
|
+
break
|
|
157
|
+
|
|
158
|
+
# Then, add any remaining regions that weren't in the priority list
|
|
159
|
+
for region in region_list:
|
|
160
|
+
if region.name not in priority_regions:
|
|
161
|
+
other_regions.append(region)
|
|
162
|
+
|
|
163
|
+
result = prioritized_regions + other_regions
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def list_accelerators(
|
|
168
|
+
gpus_only: bool,
|
|
169
|
+
name_filter: Optional[str],
|
|
170
|
+
region_filter: Optional[str],
|
|
171
|
+
quantity_filter: Optional[int],
|
|
172
|
+
case_sensitive: bool = True,
|
|
173
|
+
all_regions: bool = False,
|
|
174
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
|
175
|
+
"""Lists accelerators offered in Seeweb."""
|
|
176
|
+
# Filter out rows with empty or null regions (indicating unavailability)
|
|
177
|
+
df_filtered = _df.dropna(subset=['Region'])
|
|
178
|
+
df_filtered = df_filtered[df_filtered['Region'].str.strip() != '']
|
|
179
|
+
|
|
180
|
+
result = common.list_accelerators_impl('Seeweb', df_filtered, gpus_only,
|
|
181
|
+
name_filter, region_filter,
|
|
182
|
+
quantity_filter, case_sensitive,
|
|
183
|
+
all_regions, require_price)
|
|
184
|
+
return result
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
""" Shadeform | Catalog
|
|
2
|
+
|
|
3
|
+
This module loads pricing and instance information from the Shadeform API
|
|
4
|
+
and can be used to query instance types and pricing information for Shadeform.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import typing
|
|
8
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from sky.catalog import common
|
|
13
|
+
|
|
14
|
+
if typing.TYPE_CHECKING:
|
|
15
|
+
from sky.clouds import cloud
|
|
16
|
+
|
|
17
|
+
# We'll use dynamic fetching, so no static CSV file to load
|
|
18
|
+
_df = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_df():
|
|
22
|
+
"""Get the dataframe, fetching from API if needed."""
|
|
23
|
+
global _df
|
|
24
|
+
if _df is None:
|
|
25
|
+
# For now, we'll fall back to a minimal static catalog
|
|
26
|
+
# In a full implementation, this would call the Shadeform API
|
|
27
|
+
# to dynamically fetch the latest instance types and pricing
|
|
28
|
+
try:
|
|
29
|
+
df = common.read_catalog('shadeform/vms.csv')
|
|
30
|
+
except FileNotFoundError:
|
|
31
|
+
# If no static catalog exists, create an empty one
|
|
32
|
+
# This would be replaced with dynamic API fetching
|
|
33
|
+
_df = pd.DataFrame(columns=[
|
|
34
|
+
'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
|
|
35
|
+
'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
|
|
36
|
+
])
|
|
37
|
+
else:
|
|
38
|
+
df = df[df['InstanceType'].notna()]
|
|
39
|
+
if 'AcceleratorName' in df.columns:
|
|
40
|
+
df = df[df['AcceleratorName'].notna()]
|
|
41
|
+
df = df.assign(AcceleratorName=df['AcceleratorName'].astype(
|
|
42
|
+
str).str.strip())
|
|
43
|
+
_df = df.reset_index(drop=True)
|
|
44
|
+
return _df
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_not_found_error(err: ValueError) -> bool:
|
|
48
|
+
msg = str(err).lower()
|
|
49
|
+
return 'not found' in msg or 'not supported' in msg
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _call_or_default(func, default):
|
|
53
|
+
try:
|
|
54
|
+
return func()
|
|
55
|
+
except ValueError as err:
|
|
56
|
+
if _is_not_found_error(err):
|
|
57
|
+
return default
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def instance_type_exists(instance_type: str) -> bool:
|
|
62
|
+
"""Check if an instance type exists."""
|
|
63
|
+
return common.instance_type_exists_impl(_get_df(), instance_type)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def validate_region_zone(
|
|
67
|
+
region: Optional[str],
|
|
68
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
|
69
|
+
"""Validate region and zone for Shadeform."""
|
|
70
|
+
return common.validate_region_zone_impl('shadeform', _get_df(), region,
|
|
71
|
+
zone)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_hourly_cost(instance_type: str,
|
|
75
|
+
use_spot: bool = False,
|
|
76
|
+
region: Optional[str] = None,
|
|
77
|
+
zone: Optional[str] = None) -> float:
|
|
78
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
|
79
|
+
# Shadeform doesn't support spot instances currently
|
|
80
|
+
if use_spot:
|
|
81
|
+
raise ValueError('Spot instances are not supported on Shadeform')
|
|
82
|
+
|
|
83
|
+
return common.get_hourly_cost_impl(_get_df(), instance_type, use_spot,
|
|
84
|
+
region, zone)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_vcpus_mem_from_instance_type(
|
|
88
|
+
instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
|
89
|
+
"""Get vCPUs and memory from instance type."""
|
|
90
|
+
return _call_or_default(
|
|
91
|
+
lambda: common.get_vcpus_mem_from_instance_type_impl(
|
|
92
|
+
_get_df(), instance_type), (None, None))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
96
|
+
memory: Optional[str] = None,
|
|
97
|
+
disk_tier: Optional[str] = None,
|
|
98
|
+
region: Optional[str] = None,
|
|
99
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
100
|
+
"""Get default instance type based on requirements."""
|
|
101
|
+
del disk_tier # Shadeform doesn't support custom disk tiers yet
|
|
102
|
+
return _call_or_default(
|
|
103
|
+
lambda: common.get_instance_type_for_cpus_mem_impl(
|
|
104
|
+
_get_df(), cpus, memory, region, zone), None)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_accelerators_from_instance_type(
|
|
108
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
|
109
|
+
"""Get accelerator information from instance type."""
|
|
110
|
+
return _call_or_default(
|
|
111
|
+
lambda: common.get_accelerators_from_instance_type_impl(
|
|
112
|
+
_get_df(), instance_type), None)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_instance_type_for_accelerator(
|
|
116
|
+
acc_name: str,
|
|
117
|
+
acc_count: int,
|
|
118
|
+
cpus: Optional[str] = None,
|
|
119
|
+
memory: Optional[str] = None,
|
|
120
|
+
use_spot: bool = False,
|
|
121
|
+
region: Optional[str] = None,
|
|
122
|
+
zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
|
|
123
|
+
"""Returns a list of instance types that have the given accelerator."""
|
|
124
|
+
if use_spot:
|
|
125
|
+
# Return empty lists since spot is not supported
|
|
126
|
+
return None, ['Spot instances are not supported on Shadeform']
|
|
127
|
+
|
|
128
|
+
return _call_or_default(
|
|
129
|
+
lambda: common.get_instance_type_for_accelerator_impl(
|
|
130
|
+
df=_get_df(),
|
|
131
|
+
acc_name=acc_name,
|
|
132
|
+
acc_count=acc_count,
|
|
133
|
+
cpus=cpus,
|
|
134
|
+
memory=memory,
|
|
135
|
+
use_spot=use_spot,
|
|
136
|
+
region=region,
|
|
137
|
+
zone=zone), (None, []))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_region_zones_for_instance_type(instance_type: str,
|
|
141
|
+
use_spot: bool) -> List['cloud.Region']:
|
|
142
|
+
"""Get regions and zones for an instance type."""
|
|
143
|
+
if use_spot:
|
|
144
|
+
return [] # No spot support
|
|
145
|
+
|
|
146
|
+
df = _get_df()
|
|
147
|
+
df_filtered = df[df['InstanceType'] == instance_type]
|
|
148
|
+
return _call_or_default(
|
|
149
|
+
lambda: common.get_region_zones(df_filtered, use_spot), [])
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def list_accelerators(
|
|
153
|
+
gpus_only: bool,
|
|
154
|
+
name_filter: Optional[str],
|
|
155
|
+
region_filter: Optional[str],
|
|
156
|
+
quantity_filter: Optional[int],
|
|
157
|
+
case_sensitive: bool = True,
|
|
158
|
+
all_regions: bool = False,
|
|
159
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
|
160
|
+
"""Returns all instance types in Shadeform offering GPUs."""
|
|
161
|
+
del require_price # Unused.
|
|
162
|
+
return common.list_accelerators_impl('Shadeform', _get_df(), gpus_only,
|
|
163
|
+
name_filter, region_filter,
|
|
164
|
+
quantity_filter, case_sensitive,
|
|
165
|
+
all_regions)
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""SSH Catalog.
|
|
2
|
+
|
|
3
|
+
This catalog inherits from the Kubernetes catalog as SSH cloud is a wrapper
|
|
4
|
+
around Kubernetes that uses SSH-specific contexts.
|
|
5
|
+
"""
|
|
6
|
+
import typing
|
|
7
|
+
from typing import Dict, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from sky import sky_logging
|
|
10
|
+
from sky.catalog import CloudFilter
|
|
11
|
+
from sky.catalog import common
|
|
12
|
+
from sky.catalog import kubernetes_catalog
|
|
13
|
+
from sky.clouds import ssh
|
|
14
|
+
|
|
15
|
+
logger = sky_logging.init_logger(__name__)
|
|
16
|
+
|
|
17
|
+
if typing.TYPE_CHECKING:
|
|
18
|
+
import pandas as pd
|
|
19
|
+
else:
|
|
20
|
+
from sky.adaptors import common as adaptors_common
|
|
21
|
+
pd = adaptors_common.LazyImport('pandas')
|
|
22
|
+
|
|
23
|
+
_PULL_FREQUENCY_HOURS = 7
|
|
24
|
+
|
|
25
|
+
# Reuse the Kubernetes images catalog for SSH cloud.
|
|
26
|
+
# We keep pull_frequency_hours so we can remotely update the default image paths
|
|
27
|
+
_image_df = common.read_catalog('kubernetes/images.csv',
|
|
28
|
+
pull_frequency_hours=_PULL_FREQUENCY_HOURS)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
|
|
32
|
+
"""Returns the image id from the tag.
|
|
33
|
+
|
|
34
|
+
Delegates to Kubernetes catalog implementation.
|
|
35
|
+
"""
|
|
36
|
+
return kubernetes_catalog.get_image_id_from_tag(tag, region)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
|
|
40
|
+
"""Returns whether the image tag is valid.
|
|
41
|
+
|
|
42
|
+
Delegates to Kubernetes catalog implementation.
|
|
43
|
+
"""
|
|
44
|
+
return kubernetes_catalog.is_image_tag_valid(tag, region)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def list_accelerators(
|
|
48
|
+
gpus_only: bool,
|
|
49
|
+
name_filter: Optional[str],
|
|
50
|
+
region_filter: Optional[str],
|
|
51
|
+
quantity_filter: Optional[int],
|
|
52
|
+
case_sensitive: bool = True,
|
|
53
|
+
all_regions: bool = False,
|
|
54
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
|
55
|
+
"""List accelerators in SSH-based Kubernetes clusters.
|
|
56
|
+
|
|
57
|
+
Delegates to the Kubernetes _list_accelerators function but restricts to
|
|
58
|
+
SSH contexts.
|
|
59
|
+
"""
|
|
60
|
+
return _list_accelerators(gpus_only,
|
|
61
|
+
name_filter,
|
|
62
|
+
region_filter,
|
|
63
|
+
quantity_filter,
|
|
64
|
+
case_sensitive,
|
|
65
|
+
all_regions,
|
|
66
|
+
require_price,
|
|
67
|
+
realtime=False)[0]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def list_accelerators_realtime(
|
|
71
|
+
gpus_only: bool,
|
|
72
|
+
name_filter: Optional[str],
|
|
73
|
+
region_filter: Optional[str],
|
|
74
|
+
quantity_filter: Optional[int],
|
|
75
|
+
case_sensitive: bool = True,
|
|
76
|
+
all_regions: bool = False,
|
|
77
|
+
require_price: bool = True
|
|
78
|
+
) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
|
|
79
|
+
int]]:
|
|
80
|
+
"""List accelerators in SSH Node Pools with real-time information.
|
|
81
|
+
|
|
82
|
+
Delegates to the Kubernetes _list_accelerators function but restricts to
|
|
83
|
+
SSH contexts.
|
|
84
|
+
"""
|
|
85
|
+
return _list_accelerators(gpus_only,
|
|
86
|
+
name_filter,
|
|
87
|
+
region_filter,
|
|
88
|
+
quantity_filter,
|
|
89
|
+
case_sensitive,
|
|
90
|
+
all_regions,
|
|
91
|
+
require_price,
|
|
92
|
+
realtime=True)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _list_accelerators(
|
|
96
|
+
gpus_only: bool,
|
|
97
|
+
name_filter: Optional[str],
|
|
98
|
+
region_filter: Optional[str],
|
|
99
|
+
quantity_filter: Optional[int],
|
|
100
|
+
case_sensitive: bool = True,
|
|
101
|
+
all_regions: bool = False,
|
|
102
|
+
require_price: bool = True,
|
|
103
|
+
realtime: bool = False
|
|
104
|
+
) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
|
|
105
|
+
int]]:
|
|
106
|
+
"""List accelerators in SSH-based Kubernetes clusters.
|
|
107
|
+
|
|
108
|
+
This is a wrapper around the Kubernetes _list_accelerators function that
|
|
109
|
+
restricts the contexts to SSH-specific contexts only.
|
|
110
|
+
|
|
111
|
+
If region_filter is specified and it's not an SSH context, no results will
|
|
112
|
+
be returned.
|
|
113
|
+
"""
|
|
114
|
+
# If a specific region is requested, ensure it's an SSH context
|
|
115
|
+
if region_filter is not None and not region_filter.startswith('ssh-'):
|
|
116
|
+
return {}, {}, {}
|
|
117
|
+
|
|
118
|
+
# Get SSH contexts
|
|
119
|
+
ssh_contexts = ssh.SSH.existing_allowed_contexts()
|
|
120
|
+
|
|
121
|
+
# If no contexts found, return empty results
|
|
122
|
+
if not ssh_contexts:
|
|
123
|
+
return {}, {}, {}
|
|
124
|
+
|
|
125
|
+
# If a region filter is specified and it's not a SSH context return empty
|
|
126
|
+
# results
|
|
127
|
+
if region_filter is not None and region_filter not in ssh_contexts:
|
|
128
|
+
return {}, {}, {}
|
|
129
|
+
|
|
130
|
+
# If region_filter is None, use the first context if all_regions is False
|
|
131
|
+
if region_filter is None and not all_regions and ssh_contexts:
|
|
132
|
+
# Use the first SSH context if no specific region requested
|
|
133
|
+
region_filter = ssh_contexts[0]
|
|
134
|
+
|
|
135
|
+
# Call the Kubernetes _list_accelerators with the appropriate region filter
|
|
136
|
+
if realtime:
|
|
137
|
+
return kubernetes_catalog.list_accelerators_realtime(
|
|
138
|
+
gpus_only, name_filter, region_filter, quantity_filter,
|
|
139
|
+
case_sensitive, all_regions, require_price)
|
|
140
|
+
else:
|
|
141
|
+
result = kubernetes_catalog.list_accelerators(
|
|
142
|
+
gpus_only, name_filter, region_filter, quantity_filter,
|
|
143
|
+
case_sensitive, all_regions, require_price)
|
|
144
|
+
return result, {}, {}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def validate_region_zone(
|
|
148
|
+
region_name: Optional[str],
|
|
149
|
+
zone_name: Optional[str],
|
|
150
|
+
clouds: CloudFilter = None) -> Tuple[Optional[str], Optional[str]]:
|
|
151
|
+
"""Validates the region and zone for SSH cloud.
|
|
152
|
+
|
|
153
|
+
Delegates to the Kubernetes catalog implementation but ensures
|
|
154
|
+
the region is a valid SSH context.
|
|
155
|
+
"""
|
|
156
|
+
# Delegate to Kubernetes implementation
|
|
157
|
+
region, zone = kubernetes_catalog.validate_region_zone(
|
|
158
|
+
region_name, zone_name, clouds)
|
|
159
|
+
|
|
160
|
+
# Get SSH contexts
|
|
161
|
+
ssh_contexts = ssh.SSH.existing_allowed_contexts()
|
|
162
|
+
|
|
163
|
+
# If a region is specified, ensure it's in the list of SSH contexts
|
|
164
|
+
if region is not None and region not in ssh_contexts:
|
|
165
|
+
return None, None
|
|
166
|
+
|
|
167
|
+
return region, zone
|
|
@@ -7,7 +7,7 @@ query instance types and pricing information for Vast.ai.
|
|
|
7
7
|
import typing
|
|
8
8
|
from typing import Dict, List, Optional, Tuple, Union
|
|
9
9
|
|
|
10
|
-
from sky.
|
|
10
|
+
from sky.catalog import common
|
|
11
11
|
from sky.utils import ux_utils
|
|
12
12
|
|
|
13
13
|
if typing.TYPE_CHECKING:
|
|
@@ -48,11 +48,14 @@ def get_vcpus_mem_from_instance_type(
|
|
|
48
48
|
|
|
49
49
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
50
50
|
memory: Optional[str] = None,
|
|
51
|
-
disk_tier: Optional[str] = None
|
|
51
|
+
disk_tier: Optional[str] = None,
|
|
52
|
+
region: Optional[str] = None,
|
|
53
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
52
54
|
del disk_tier
|
|
53
55
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
54
56
|
# want to specify a default instance type or family.
|
|
55
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
57
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
58
|
+
zone)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
def get_accelerators_from_instance_type(
|
|
@@ -5,7 +5,7 @@ import typing
|
|
|
5
5
|
from typing import Dict, List, Optional, Tuple, Union
|
|
6
6
|
|
|
7
7
|
from sky.adaptors import common as adaptors_common
|
|
8
|
-
from sky.
|
|
8
|
+
from sky.catalog import common
|
|
9
9
|
|
|
10
10
|
if typing.TYPE_CHECKING:
|
|
11
11
|
import pandas as pd
|
|
@@ -72,6 +72,8 @@ def get_default_instance_type(
|
|
|
72
72
|
cpus: Optional[str] = None,
|
|
73
73
|
memory: Optional[str] = None,
|
|
74
74
|
disk_tier: Optional[str] = None,
|
|
75
|
+
region: Optional[str] = None,
|
|
76
|
+
zone: Optional[str] = None,
|
|
75
77
|
) -> Optional[str]:
|
|
76
78
|
del disk_tier # unused
|
|
77
79
|
if cpus is None and memory is None:
|
|
@@ -81,7 +83,8 @@ def get_default_instance_type(
|
|
|
81
83
|
else:
|
|
82
84
|
memory_gb_or_ratio = memory
|
|
83
85
|
return common.get_instance_type_for_cpus_mem_impl(_get_df(), cpus,
|
|
84
|
-
memory_gb_or_ratio
|
|
86
|
+
memory_gb_or_ratio,
|
|
87
|
+
region, zone)
|
|
85
88
|
|
|
86
89
|
|
|
87
90
|
def get_accelerators_from_instance_type(
|