skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/backends/docker_utils.py
CHANGED
|
@@ -168,7 +168,7 @@ def build_dockerimage(task: task_mod.Task,
|
|
|
168
168
|
build_dir=temp_dir)
|
|
169
169
|
|
|
170
170
|
dst = os.path.join(temp_dir, SKY_DOCKER_WORKDIR)
|
|
171
|
-
if task.workdir is not None:
|
|
171
|
+
if task.workdir is not None and isinstance(task.workdir, str):
|
|
172
172
|
# Copy workdir contents to tempdir
|
|
173
173
|
shutil.copytree(os.path.expanduser(task.workdir), dst)
|
|
174
174
|
else:
|
|
@@ -178,7 +178,8 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
|
|
|
178
178
|
return handle, False
|
|
179
179
|
|
|
180
180
|
def _sync_workdir(self, handle: LocalDockerResourceHandle,
|
|
181
|
-
workdir: Path
|
|
181
|
+
workdir: Union[Path, Dict[str, Any]],
|
|
182
|
+
envs_and_secrets: Dict[str, str]) -> None:
|
|
182
183
|
"""Workdir is sync'd by adding to the docker image.
|
|
183
184
|
|
|
184
185
|
This happens in the execute step.
|
|
@@ -188,6 +189,15 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
|
|
|
188
189
|
' a NoOp. If you are running sky exec, your workdir has not'
|
|
189
190
|
' been updated.')
|
|
190
191
|
|
|
192
|
+
def _download_file(self, handle: LocalDockerResourceHandle,
|
|
193
|
+
local_file_path: str, remote_file_path: str) -> None:
|
|
194
|
+
"""Syncs file from remote to local."""
|
|
195
|
+
# Copy from docker container to local
|
|
196
|
+
container = self.containers[handle]
|
|
197
|
+
copy_cmd = (
|
|
198
|
+
f'docker cp {container.name}:{remote_file_path} {local_file_path}')
|
|
199
|
+
subprocess.run(copy_cmd, shell=True, check=True)
|
|
200
|
+
|
|
191
201
|
def _sync_file_mounts(
|
|
192
202
|
self,
|
|
193
203
|
handle: LocalDockerResourceHandle,
|
|
@@ -273,13 +283,8 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
|
|
|
273
283
|
def _execute(self,
|
|
274
284
|
handle: LocalDockerResourceHandle,
|
|
275
285
|
task: 'task_lib.Task',
|
|
276
|
-
detach_run: bool,
|
|
277
286
|
dryrun: bool = False) -> None:
|
|
278
287
|
""" Launches the container."""
|
|
279
|
-
if detach_run:
|
|
280
|
-
raise NotImplementedError('detach_run=True is not supported in '
|
|
281
|
-
'LocalDockerBackend.')
|
|
282
|
-
|
|
283
288
|
if task.num_nodes > 1:
|
|
284
289
|
raise NotImplementedError(
|
|
285
290
|
'Tasks with num_nodes > 1 is currently not supported in '
|
sky/backends/wheel_utils.py
CHANGED
|
@@ -16,6 +16,7 @@ import pathlib
|
|
|
16
16
|
import re
|
|
17
17
|
import shutil
|
|
18
18
|
import subprocess
|
|
19
|
+
import sys
|
|
19
20
|
import tempfile
|
|
20
21
|
from typing import Optional, Tuple
|
|
21
22
|
|
|
@@ -27,13 +28,14 @@ import sky
|
|
|
27
28
|
from sky import sky_logging
|
|
28
29
|
from sky.backends import backend_utils
|
|
29
30
|
from sky.server import common
|
|
31
|
+
from sky.utils import directory_utils
|
|
30
32
|
|
|
31
33
|
logger = sky_logging.init_logger(__name__)
|
|
32
34
|
|
|
33
35
|
# Local wheel path is same as the remote path.
|
|
34
36
|
WHEEL_DIR = pathlib.Path(os.path.expanduser(backend_utils.SKY_REMOTE_PATH))
|
|
35
37
|
_WHEEL_LOCK_PATH = WHEEL_DIR.parent / '.wheels_lock'
|
|
36
|
-
SKY_PACKAGE_PATH = pathlib.Path(
|
|
38
|
+
SKY_PACKAGE_PATH = pathlib.Path(directory_utils.get_sky_dir())
|
|
37
39
|
|
|
38
40
|
# NOTE: keep the same as setup.py's setuptools.setup(name=..., ...).
|
|
39
41
|
_PACKAGE_WHEEL_NAME = 'skypilot'
|
|
@@ -132,19 +134,45 @@ def _build_sky_wheel() -> pathlib.Path:
|
|
|
132
134
|
# It is important to normalize the path, otherwise 'pip wheel' would
|
|
133
135
|
# treat the directory as a file and generate an empty wheel.
|
|
134
136
|
norm_path = str(tmp_dir) + os.sep
|
|
137
|
+
# TODO(#5046): Consider adding native UV support for building wheels.
|
|
138
|
+
# Use `python -m pip` instead of `pip3` for better compatibility across
|
|
139
|
+
# different environments (conda, venv, UV, system Python, etc.)
|
|
135
140
|
try:
|
|
136
|
-
# TODO(suquark): For python>=3.7, 'subprocess.run' supports capture
|
|
137
|
-
# of the output.
|
|
138
141
|
subprocess.run([
|
|
139
|
-
'
|
|
142
|
+
sys.executable, '-m', 'pip', 'wheel', '--no-deps', norm_path,
|
|
143
|
+
'--wheel-dir',
|
|
140
144
|
str(tmp_dir)
|
|
141
145
|
],
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
146
|
+
capture_output=True,
|
|
147
|
+
check=True,
|
|
148
|
+
text=True)
|
|
145
149
|
except subprocess.CalledProcessError as e:
|
|
146
|
-
|
|
147
|
-
|
|
150
|
+
error_msg = e.stderr
|
|
151
|
+
if 'No module named pip' in error_msg:
|
|
152
|
+
# pip module not found - provide helpful suggestions based on
|
|
153
|
+
# the available package managers
|
|
154
|
+
if shutil.which('uv'):
|
|
155
|
+
msg = ('pip module not found. Since you have UV installed, '
|
|
156
|
+
'you can install pip by running:\n'
|
|
157
|
+
' uv pip install pip')
|
|
158
|
+
elif shutil.which('conda'):
|
|
159
|
+
msg = (
|
|
160
|
+
'pip module not found. Since you have conda installed, '
|
|
161
|
+
'you can install pip by running:\n'
|
|
162
|
+
' conda install pip')
|
|
163
|
+
else:
|
|
164
|
+
msg = ('pip module not found. Please install pip for your '
|
|
165
|
+
f'Python environment ({sys.executable}).')
|
|
166
|
+
else:
|
|
167
|
+
# Other pip errors
|
|
168
|
+
msg = f'pip wheel command failed. Error: {error_msg}'
|
|
169
|
+
raise RuntimeError('Failed to build pip wheel for SkyPilot.\n' +
|
|
170
|
+
msg) from e
|
|
171
|
+
except FileNotFoundError as e:
|
|
172
|
+
# Python executable not found (extremely rare)
|
|
173
|
+
raise RuntimeError(
|
|
174
|
+
f'Failed to build pip wheel for SkyPilot. '
|
|
175
|
+
f'Python executable not found: {sys.executable}') from e
|
|
148
176
|
|
|
149
177
|
try:
|
|
150
178
|
wheel_path = next(tmp_dir.glob(_WHEEL_PATTERN))
|
|
@@ -4,24 +4,21 @@ import importlib
|
|
|
4
4
|
import typing
|
|
5
5
|
from typing import Dict, List, Optional, Set, Tuple, Union
|
|
6
6
|
|
|
7
|
-
from sky.
|
|
8
|
-
from sky.
|
|
9
|
-
from sky.clouds.service_catalog.constants import CATALOG_DIR
|
|
10
|
-
from sky.clouds.service_catalog.constants import CATALOG_SCHEMA_VERSION
|
|
11
|
-
from sky.clouds.service_catalog.constants import HOSTED_CATALOG_DIR_URL
|
|
7
|
+
from sky.catalog.config import fallback_to_default_catalog
|
|
8
|
+
from sky.skylet import constants
|
|
12
9
|
from sky.utils import resources_utils
|
|
13
10
|
from sky.utils import subprocess_utils
|
|
14
11
|
|
|
15
12
|
if typing.TYPE_CHECKING:
|
|
13
|
+
from sky.catalog import common
|
|
16
14
|
from sky.clouds import cloud
|
|
17
|
-
from sky.clouds.service_catalog import common
|
|
18
15
|
|
|
19
16
|
CloudFilter = Optional[Union[List[str], str]]
|
|
20
17
|
|
|
21
18
|
|
|
22
19
|
def _map_clouds_catalog(clouds: CloudFilter, method_name: str, *args, **kwargs):
|
|
23
20
|
if clouds is None:
|
|
24
|
-
clouds = list(ALL_CLOUDS)
|
|
21
|
+
clouds = list(constants.ALL_CLOUDS)
|
|
25
22
|
|
|
26
23
|
# TODO(hemil): Remove this once the common service catalog
|
|
27
24
|
# functions are refactored from clouds/kubernetes.py to
|
|
@@ -35,10 +32,10 @@ def _map_clouds_catalog(clouds: CloudFilter, method_name: str, *args, **kwargs):
|
|
|
35
32
|
def _execute_catalog_method(cloud: str):
|
|
36
33
|
try:
|
|
37
34
|
cloud_module = importlib.import_module(
|
|
38
|
-
f'sky.
|
|
35
|
+
f'sky.catalog.{cloud.lower()}_catalog')
|
|
39
36
|
except ModuleNotFoundError:
|
|
40
37
|
raise ValueError(
|
|
41
|
-
'Cannot find module "sky.
|
|
38
|
+
'Cannot find module "sky.catalog'
|
|
42
39
|
f'.{cloud}_catalog" for cloud "{cloud}".') from None
|
|
43
40
|
try:
|
|
44
41
|
method = getattr(cloud_module, method_name)
|
|
@@ -94,7 +91,7 @@ def list_accelerator_counts(
|
|
|
94
91
|
region_filter: Optional[str] = None,
|
|
95
92
|
quantity_filter: Optional[int] = None,
|
|
96
93
|
clouds: CloudFilter = None,
|
|
97
|
-
) -> Dict[str, List[
|
|
94
|
+
) -> Dict[str, List[float]]:
|
|
98
95
|
"""Lists all accelerators offered by Sky and available counts.
|
|
99
96
|
|
|
100
97
|
Returns: A dictionary of canonical accelerator names mapped to a list
|
|
@@ -110,12 +107,12 @@ def list_accelerator_counts(
|
|
|
110
107
|
require_price=False)
|
|
111
108
|
if not isinstance(results, list):
|
|
112
109
|
results = [results]
|
|
113
|
-
accelerator_counts: Dict[str, Set[
|
|
110
|
+
accelerator_counts: Dict[str, Set[float]] = collections.defaultdict(set)
|
|
114
111
|
for result in results:
|
|
115
112
|
for gpu, items in result.items():
|
|
116
113
|
for item in items:
|
|
117
114
|
accelerator_counts[gpu].add(item.accelerator_count)
|
|
118
|
-
ret: Dict[str, List[
|
|
115
|
+
ret: Dict[str, List[float]] = {}
|
|
119
116
|
for gpu, counts in accelerator_counts.items():
|
|
120
117
|
ret[gpu] = sorted(counts)
|
|
121
118
|
return ret
|
|
@@ -224,6 +221,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
224
221
|
memory: Optional[str] = None,
|
|
225
222
|
disk_tier: Optional[
|
|
226
223
|
resources_utils.DiskTier] = None,
|
|
224
|
+
region: Optional[str] = None,
|
|
225
|
+
zone: Optional[str] = None,
|
|
227
226
|
clouds: CloudFilter = None) -> Optional[str]:
|
|
228
227
|
"""Returns the cloud's default instance type for given #vCPUs and memory.
|
|
229
228
|
|
|
@@ -237,7 +236,7 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
237
236
|
the given CPU and memory requirement.
|
|
238
237
|
"""
|
|
239
238
|
return _map_clouds_catalog(clouds, 'get_default_instance_type', cpus,
|
|
240
|
-
memory, disk_tier)
|
|
239
|
+
memory, disk_tier, region, zone)
|
|
241
240
|
|
|
242
241
|
|
|
243
242
|
def get_accelerators_from_instance_type(
|
|
@@ -248,9 +247,16 @@ def get_accelerators_from_instance_type(
|
|
|
248
247
|
instance_type)
|
|
249
248
|
|
|
250
249
|
|
|
250
|
+
def get_arch_from_instance_type(instance_type: str,
|
|
251
|
+
clouds: CloudFilter = None) -> Optional[str]:
|
|
252
|
+
"""Returns the arch from a instance type."""
|
|
253
|
+
return _map_clouds_catalog(clouds, 'get_arch_from_instance_type',
|
|
254
|
+
instance_type)
|
|
255
|
+
|
|
256
|
+
|
|
251
257
|
def get_instance_type_for_accelerator(
|
|
252
258
|
acc_name: str,
|
|
253
|
-
acc_count: int,
|
|
259
|
+
acc_count: Union[int, float],
|
|
254
260
|
cpus: Optional[str] = None,
|
|
255
261
|
memory: Optional[str] = None,
|
|
256
262
|
use_spot: bool = False,
|
|
@@ -327,6 +333,7 @@ def get_common_gpus() -> List[str]:
|
|
|
327
333
|
'A10G',
|
|
328
334
|
'A100',
|
|
329
335
|
'A100-80GB',
|
|
336
|
+
'B200',
|
|
330
337
|
'H100',
|
|
331
338
|
'H200',
|
|
332
339
|
'L4',
|
|
@@ -380,9 +387,4 @@ __all__ = [
|
|
|
380
387
|
'is_image_tag_valid',
|
|
381
388
|
# Configuration
|
|
382
389
|
'fallback_to_default_catalog',
|
|
383
|
-
# Constants
|
|
384
|
-
'ALL_CLOUDS',
|
|
385
|
-
'HOSTED_CATALOG_DIR_URL',
|
|
386
|
-
'CATALOG_SCHEMA_VERSION',
|
|
387
|
-
'CATALOG_DIR',
|
|
388
390
|
]
|
|
@@ -13,10 +13,10 @@ from typing import Dict, List, Optional, Tuple, Union
|
|
|
13
13
|
from sky import exceptions
|
|
14
14
|
from sky import sky_logging
|
|
15
15
|
from sky.adaptors import common as adaptors_common
|
|
16
|
+
from sky.catalog import common
|
|
17
|
+
from sky.catalog import config
|
|
18
|
+
from sky.catalog.data_fetchers import fetch_aws
|
|
16
19
|
from sky.clouds import aws
|
|
17
|
-
from sky.clouds.service_catalog import common
|
|
18
|
-
from sky.clouds.service_catalog import config
|
|
19
|
-
from sky.clouds.service_catalog.data_fetchers import fetch_aws
|
|
20
20
|
from sky.utils import common_utils
|
|
21
21
|
from sky.utils import resources_utils
|
|
22
22
|
from sky.utils import rich_utils
|
|
@@ -38,14 +38,26 @@ _DEFAULT_INSTANCE_FAMILY = [
|
|
|
38
38
|
# CPU: Intel Ice Lake 8375C.
|
|
39
39
|
# Memory: 4 GiB RAM per 1 vCPU;
|
|
40
40
|
'm6i',
|
|
41
|
+
# This is the latest general-purpose instance family as of Jul 2025.
|
|
42
|
+
# CPU: Intel Sapphire Rapids.
|
|
43
|
+
# Memory: 4 GiB RAM per 1 vCPU;
|
|
44
|
+
'm7i',
|
|
41
45
|
# This is the latest memory-optimized instance family as of Mar 2023.
|
|
42
46
|
# CPU: Intel Ice Lake 8375C
|
|
43
47
|
# Memory: 8 GiB RAM per 1 vCPU;
|
|
44
48
|
'r6i',
|
|
49
|
+
# This is the latest memory-optimized instance family as of Jul 2025.
|
|
50
|
+
# CPU: Intel Sapphire Rapids.
|
|
51
|
+
# Memory: 8 GiB RAM per 1 vCPU;
|
|
52
|
+
'r7i',
|
|
45
53
|
# This is the latest compute-optimized instance family as of Mar 2023.
|
|
46
54
|
# CPU: Intel Ice Lake 8375C
|
|
47
55
|
# Memory: 2 GiB RAM per 1 vCPU;
|
|
48
56
|
'c6i',
|
|
57
|
+
# This is the latest compute-optimized instance family as of Jul 2025.
|
|
58
|
+
# CPU: Intel Sapphire Rapids.
|
|
59
|
+
# Memory: 2 GiB RAM per 1 vCPU;
|
|
60
|
+
'c7i',
|
|
49
61
|
]
|
|
50
62
|
_DEFAULT_NUM_VCPUS = 8
|
|
51
63
|
_DEFAULT_MEMORY_CPU_RATIO = 4
|
|
@@ -230,10 +242,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
230
242
|
instance_type)
|
|
231
243
|
|
|
232
244
|
|
|
233
|
-
def get_default_instance_type(
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
245
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
246
|
+
memory: Optional[str] = None,
|
|
247
|
+
disk_tier: Optional[
|
|
248
|
+
resources_utils.DiskTier] = None,
|
|
249
|
+
region: Optional[str] = None,
|
|
250
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
237
251
|
del disk_tier # unused
|
|
238
252
|
if cpus is None and memory is None:
|
|
239
253
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
@@ -247,7 +261,8 @@ def get_default_instance_type(
|
|
|
247
261
|
df = _get_df()
|
|
248
262
|
df = df[df['InstanceType'].str.startswith(instance_type_prefix)]
|
|
249
263
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
250
|
-
memory_gb_or_ratio
|
|
264
|
+
memory_gb_or_ratio,
|
|
265
|
+
region, zone)
|
|
251
266
|
|
|
252
267
|
|
|
253
268
|
def get_accelerators_from_instance_type(
|
|
@@ -256,6 +271,10 @@ def get_accelerators_from_instance_type(
|
|
|
256
271
|
_get_df(), instance_type)
|
|
257
272
|
|
|
258
273
|
|
|
274
|
+
def get_arch_from_instance_type(instance_type: str) -> Optional[str]:
|
|
275
|
+
return common.get_arch_from_instance_type_impl(_get_df(), instance_type)
|
|
276
|
+
|
|
277
|
+
|
|
259
278
|
def get_instance_type_for_accelerator(
|
|
260
279
|
acc_name: str,
|
|
261
280
|
acc_count: int,
|
|
@@ -8,8 +8,8 @@ from typing import Dict, List, Optional, Tuple, Union
|
|
|
8
8
|
|
|
9
9
|
from sky import clouds as cloud_lib
|
|
10
10
|
from sky import sky_logging
|
|
11
|
+
from sky.catalog import common
|
|
11
12
|
from sky.clouds import Azure
|
|
12
|
-
from sky.clouds.service_catalog import common
|
|
13
13
|
from sky.utils import resources_utils
|
|
14
14
|
from sky.utils import ux_utils
|
|
15
15
|
|
|
@@ -17,7 +17,7 @@ logger = sky_logging.init_logger(__name__)
|
|
|
17
17
|
|
|
18
18
|
# This list should match the list of regions in
|
|
19
19
|
# skypilot image generation Packer script's replication_regions
|
|
20
|
-
# sky/clouds/
|
|
20
|
+
# sky/clouds/catalog/images/skypilot-azure-cpu-ubuntu.pkr.hcl
|
|
21
21
|
COMMUNITY_IMAGE_AVAILABLE_REGIONS = {
|
|
22
22
|
'centralus',
|
|
23
23
|
'eastus',
|
|
@@ -114,10 +114,12 @@ def _get_instance_family(instance_type: str) -> str:
|
|
|
114
114
|
return instance_family
|
|
115
115
|
|
|
116
116
|
|
|
117
|
-
def get_default_instance_type(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
117
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
118
|
+
memory: Optional[str] = None,
|
|
119
|
+
disk_tier: Optional[
|
|
120
|
+
resources_utils.DiskTier] = None,
|
|
121
|
+
region: Optional[str] = None,
|
|
122
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
121
123
|
if cpus is None and memory is None:
|
|
122
124
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
123
125
|
if memory is None:
|
|
@@ -133,7 +135,8 @@ def get_default_instance_type(
|
|
|
133
135
|
|
|
134
136
|
df = df.loc[df['InstanceType'].apply(_filter_disk_type)]
|
|
135
137
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
136
|
-
memory_gb_or_ratio
|
|
138
|
+
memory_gb_or_ratio,
|
|
139
|
+
region, zone)
|
|
137
140
|
|
|
138
141
|
|
|
139
142
|
def get_accelerators_from_instance_type(
|
|
@@ -12,7 +12,8 @@ import filelock
|
|
|
12
12
|
from sky import sky_logging
|
|
13
13
|
from sky.adaptors import common as adaptors_common
|
|
14
14
|
from sky.clouds import cloud as cloud_lib
|
|
15
|
-
from sky.
|
|
15
|
+
from sky.skylet import constants
|
|
16
|
+
from sky.utils import annotations
|
|
16
17
|
from sky.utils import common_utils
|
|
17
18
|
from sky.utils import registry
|
|
18
19
|
from sky.utils import rich_utils
|
|
@@ -50,7 +51,7 @@ class InstanceTypeInfo(NamedTuple):
|
|
|
50
51
|
cloud: str
|
|
51
52
|
instance_type: Optional[str]
|
|
52
53
|
accelerator_name: str
|
|
53
|
-
accelerator_count:
|
|
54
|
+
accelerator_count: float
|
|
54
55
|
cpu_count: Optional[float]
|
|
55
56
|
device_memory: Optional[float]
|
|
56
57
|
memory: Optional[float]
|
|
@@ -125,17 +126,21 @@ class LazyDataFrame:
|
|
|
125
126
|
|
|
126
127
|
We don't need to load the catalog for every SkyPilot call, and this class
|
|
127
128
|
allows us to load the catalog only when needed.
|
|
129
|
+
|
|
130
|
+
Use update_if_stale_func to pass in a function that decides whether to
|
|
131
|
+
update the catalog on disk, updates it if needed, and returns
|
|
132
|
+
a bool indicating whether the update was done.
|
|
128
133
|
"""
|
|
129
134
|
|
|
130
|
-
def __init__(self, filename: str,
|
|
135
|
+
def __init__(self, filename: str, update_if_stale_func: Callable[[], bool]):
|
|
131
136
|
self._filename = filename
|
|
132
137
|
self._df: Optional['pd.DataFrame'] = None
|
|
133
|
-
self.
|
|
138
|
+
self._update_if_stale_func = update_if_stale_func
|
|
134
139
|
|
|
140
|
+
@annotations.lru_cache(scope='request')
|
|
135
141
|
def _load_df(self) -> 'pd.DataFrame':
|
|
136
|
-
if self._df is None:
|
|
142
|
+
if self._update_if_stale_func() or self._df is None:
|
|
137
143
|
try:
|
|
138
|
-
self._update_func()
|
|
139
144
|
self._df = pd.read_csv(self._filename)
|
|
140
145
|
except Exception as e: # pylint: disable=broad-except
|
|
141
146
|
# As users can manually modify the catalog, read_csv can fail.
|
|
@@ -193,48 +198,60 @@ def read_catalog(filename: str,
|
|
|
193
198
|
return last_update + pull_frequency_hours * 3600 < time.time()
|
|
194
199
|
|
|
195
200
|
def _update_catalog():
|
|
201
|
+
# Fast path: Exit early to avoid lock contention.
|
|
202
|
+
if not _need_update():
|
|
203
|
+
return False
|
|
204
|
+
|
|
196
205
|
# Atomic check, to avoid conflicts with other processes.
|
|
197
206
|
with filelock.FileLock(meta_path + '.lock'):
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
207
|
+
# Double check after acquiring the lock.
|
|
208
|
+
if not _need_update():
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
url = f'{constants.HOSTED_CATALOG_DIR_URL}/{constants.CATALOG_SCHEMA_VERSION}/{filename}' # pylint: disable=line-too-long
|
|
212
|
+
url_fallback = f'{constants.HOSTED_CATALOG_DIR_URL_S3_MIRROR}/{constants.CATALOG_SCHEMA_VERSION}/{filename}' # pylint: disable=line-too-long
|
|
213
|
+
headers = {'User-Agent': 'SkyPilot/0.7'}
|
|
214
|
+
update_frequency_str = ''
|
|
215
|
+
if pull_frequency_hours is not None:
|
|
216
|
+
update_frequency_str = (
|
|
217
|
+
f' (every {pull_frequency_hours} hours)')
|
|
218
|
+
with rich_utils.safe_status(
|
|
219
|
+
ux_utils.spinner_message(
|
|
220
|
+
f'Updating {cloud} catalog: {filename}') +
|
|
221
|
+
f'{update_frequency_str}'):
|
|
222
|
+
try:
|
|
223
|
+
r = requests.get(url=url, headers=headers)
|
|
224
|
+
if r.status_code == 429:
|
|
225
|
+
# fallback to s3 mirror, github introduced rate
|
|
226
|
+
# limit after 2025-05, see
|
|
227
|
+
# https://github.com/skypilot-org/skypilot/issues/5438
|
|
228
|
+
# for more details
|
|
229
|
+
r = requests.get(url=url_fallback, headers=headers)
|
|
230
|
+
r.raise_for_status()
|
|
231
|
+
except requests.exceptions.RequestException as e:
|
|
232
|
+
error_str = (f'Failed to fetch {cloud} catalog '
|
|
233
|
+
f'{filename}. ')
|
|
234
|
+
if os.path.exists(catalog_path):
|
|
235
|
+
logger.warning(
|
|
236
|
+
f'{error_str}Using cached catalog files.')
|
|
237
|
+
# Update catalog file modification time.
|
|
238
|
+
os.utime(catalog_path, None) # Sets to current time
|
|
226
239
|
else:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
240
|
+
logger.error(f'{error_str}Please check your internet '
|
|
241
|
+
'connection.')
|
|
242
|
+
with ux_utils.print_exception_no_traceback():
|
|
243
|
+
raise e
|
|
244
|
+
else:
|
|
245
|
+
# Download successful, save the catalog to a local file.
|
|
246
|
+
os.makedirs(os.path.dirname(catalog_path), exist_ok=True)
|
|
247
|
+
with open(catalog_path, 'w', encoding='utf-8') as f:
|
|
248
|
+
f.write(r.text)
|
|
249
|
+
with open(meta_path + '.md5', 'w', encoding='utf-8') as f:
|
|
250
|
+
f.write(hashlib.md5(r.text.encode()).hexdigest())
|
|
251
|
+
logger.debug(f'Updated {cloud} catalog {filename}.')
|
|
252
|
+
return True
|
|
236
253
|
|
|
237
|
-
return LazyDataFrame(catalog_path,
|
|
254
|
+
return LazyDataFrame(catalog_path, update_if_stale_func=_update_catalog)
|
|
238
255
|
|
|
239
256
|
|
|
240
257
|
def _get_instance_type(
|
|
@@ -337,7 +354,7 @@ def get_hourly_cost_impl(
|
|
|
337
354
|
) -> float:
|
|
338
355
|
"""Returns the hourly price of a VM instance in the given region and zone.
|
|
339
356
|
|
|
340
|
-
Refer to get_hourly_cost in
|
|
357
|
+
Refer to get_hourly_cost in catalog/__init__.py for the docstring.
|
|
341
358
|
"""
|
|
342
359
|
df = _get_instance_type(df, instance_type, region, zone)
|
|
343
360
|
if df.empty:
|
|
@@ -459,8 +476,11 @@ def _filter_region_zone(df: 'pd.DataFrame', region: Optional[str],
|
|
|
459
476
|
|
|
460
477
|
|
|
461
478
|
def get_instance_type_for_cpus_mem_impl(
|
|
462
|
-
df: 'pd.DataFrame',
|
|
463
|
-
|
|
479
|
+
df: 'pd.DataFrame',
|
|
480
|
+
cpus: Optional[str],
|
|
481
|
+
memory_gb_or_ratio: Optional[str],
|
|
482
|
+
region: Optional[str] = None,
|
|
483
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
464
484
|
"""Returns the cheapest instance type that satisfies the requirements.
|
|
465
485
|
|
|
466
486
|
Args:
|
|
@@ -473,7 +493,10 @@ def get_instance_type_for_cpus_mem_impl(
|
|
|
473
493
|
returned instance type should have at least the given memory size.
|
|
474
494
|
If the string ends with "x", then the returned instance type should
|
|
475
495
|
have at least the given number of vCPUs times the given ratio.
|
|
496
|
+
region: The region to filter by.
|
|
497
|
+
zone: The zone to filter by.
|
|
476
498
|
"""
|
|
499
|
+
df = _filter_region_zone(df, region, zone)
|
|
477
500
|
df = _filter_with_cpus(df, cpus)
|
|
478
501
|
df = _filter_with_mem(df, memory_gb_or_ratio)
|
|
479
502
|
if df.empty:
|
|
@@ -504,6 +527,24 @@ def get_accelerators_from_instance_type_impl(
|
|
|
504
527
|
return {acc_name: _convert(acc_count)}
|
|
505
528
|
|
|
506
529
|
|
|
530
|
+
def get_arch_from_instance_type_impl(
|
|
531
|
+
df: 'pd.DataFrame',
|
|
532
|
+
instance_type: str,
|
|
533
|
+
) -> Optional[str]:
|
|
534
|
+
df = _get_instance_type(df, instance_type, None)
|
|
535
|
+
if df.empty:
|
|
536
|
+
with ux_utils.print_exception_no_traceback():
|
|
537
|
+
raise ValueError(f'No instance type {instance_type} found.')
|
|
538
|
+
row = df.iloc[0]
|
|
539
|
+
if 'Arch' not in row:
|
|
540
|
+
return None
|
|
541
|
+
arch = row['Arch']
|
|
542
|
+
if pd.isnull(arch):
|
|
543
|
+
return None
|
|
544
|
+
|
|
545
|
+
return arch
|
|
546
|
+
|
|
547
|
+
|
|
507
548
|
def get_instance_type_for_accelerator_impl(
|
|
508
549
|
df: 'pd.DataFrame',
|
|
509
550
|
acc_name: str,
|
|
@@ -608,7 +649,7 @@ def list_accelerators_impl(
|
|
|
608
649
|
df = df[df['Region'].str.contains(region_filter,
|
|
609
650
|
case=case_sensitive,
|
|
610
651
|
regex=True)]
|
|
611
|
-
df['AcceleratorCount'] = df['AcceleratorCount'].astype(
|
|
652
|
+
df['AcceleratorCount'] = df['AcceleratorCount'].astype(float)
|
|
612
653
|
if quantity_filter is not None:
|
|
613
654
|
df = df[df['AcceleratorCount'] == quantity_filter]
|
|
614
655
|
grouped = df.groupby('AcceleratorName')
|
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
from typing import Dict, List, Optional, Tuple, Union
|
|
5
5
|
|
|
6
|
-
from sky.
|
|
7
|
-
|
|
6
|
+
from sky.catalog import common
|
|
7
|
+
from sky.provision.cudo import cudo_machine_type as cudo_mt
|
|
8
8
|
from sky.utils import ux_utils
|
|
9
9
|
|
|
10
10
|
if typing.TYPE_CHECKING:
|
|
11
11
|
from sky.clouds import cloud
|
|
12
12
|
|
|
13
|
-
_PULL_FREQUENCY_HOURS =
|
|
13
|
+
_PULL_FREQUENCY_HOURS = 7
|
|
14
14
|
_df = common.read_catalog(cudo_mt.VMS_CSV,
|
|
15
15
|
pull_frequency_hours=_PULL_FREQUENCY_HOURS)
|
|
16
16
|
|
|
@@ -51,7 +51,9 @@ def get_vcpus_mem_from_instance_type(
|
|
|
51
51
|
|
|
52
52
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
53
53
|
memory: Optional[str] = None,
|
|
54
|
-
disk_tier: Optional[str] = None
|
|
54
|
+
disk_tier: Optional[str] = None,
|
|
55
|
+
region: Optional[str] = None,
|
|
56
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
55
57
|
del disk_tier
|
|
56
58
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
57
59
|
# want to specify a default instance type or family.
|
|
@@ -62,7 +64,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
62
64
|
if memory is None:
|
|
63
65
|
memory_gb_or_ratio = f'{_DEFAULT_MEMORY_CPU_RATIO}x'
|
|
64
66
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
65
|
-
memory_gb_or_ratio
|
|
67
|
+
memory_gb_or_ratio,
|
|
68
|
+
region, zone)
|
|
66
69
|
|
|
67
70
|
|
|
68
71
|
def get_accelerators_from_instance_type(
|