skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""A script that generates the Seeweb catalog.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python fetch_seeweb.py [-h] [--api-key API_KEY]
|
|
5
|
+
[--api-key-path API_KEY_PATH]
|
|
6
|
+
|
|
7
|
+
If neither --api-key nor --api-key-path are provided, this script will parse
|
|
8
|
+
`~/.seeweb_cloud/seeweb_keys` to look for Seeweb API key.
|
|
9
|
+
"""
|
|
10
|
+
import argparse
|
|
11
|
+
import configparser
|
|
12
|
+
import csv
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from sky.adaptors.seeweb import ecsapi
|
|
18
|
+
|
|
19
|
+
# GPU name mapping from Seeweb to SkyPilot canonical names
|
|
20
|
+
SEEWEB_GPU_NAME_TO_SKYPILOT_GPU_NAME = {
|
|
21
|
+
'H200 141GB': 'H200',
|
|
22
|
+
'RTX A6000 48GB': 'RTXA6000',
|
|
23
|
+
'A100 80GB': 'A100',
|
|
24
|
+
'L4 24GB': 'L4',
|
|
25
|
+
'L40s 48GB': 'L40S',
|
|
26
|
+
'H100 80GB': 'H100',
|
|
27
|
+
'MI300X': 'MI300X',
|
|
28
|
+
'A30': 'A30',
|
|
29
|
+
'RTX 6000 24GB': 'RTX6000',
|
|
30
|
+
'Tenstorrent Grayskull e75': 'GRAYSKULL-E75',
|
|
31
|
+
'Tenstorrent Grayskull e150': 'GRAYSKULL-E150',
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# GPU VRAM mapping in MB
|
|
35
|
+
VRAM = {
|
|
36
|
+
'RTXA6000': 48384, # 48GB
|
|
37
|
+
'H200': 144384, # 141GB
|
|
38
|
+
'A100': 81920, # 80GB
|
|
39
|
+
'L4': 24576, # 24GB
|
|
40
|
+
'L40S': 49152, # 48GB
|
|
41
|
+
'H100': 81920, # 80GB
|
|
42
|
+
'MI300X': 192000, # 192GB
|
|
43
|
+
'A30': 24576, # 24GB
|
|
44
|
+
'RTX6000': 24576, # 24GB
|
|
45
|
+
'GRAYSKULL-E75': 8192, # 8GB
|
|
46
|
+
'GRAYSKULL-E150': 8192, # 8GB
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def is_tenstorrent_gpu_name(gpu_name: Optional[str]) -> bool:
|
|
51
|
+
"""Return True if the given GPU name refers to a Tenstorrent GPU.
|
|
52
|
+
|
|
53
|
+
Detects by common identifiers present in normalized names (e.g., GRAYSKULL)
|
|
54
|
+
or by the vendor name directly.
|
|
55
|
+
"""
|
|
56
|
+
if not gpu_name:
|
|
57
|
+
return False
|
|
58
|
+
upper = str(gpu_name).upper()
|
|
59
|
+
return 'TENSTORRENT' in upper or 'GRAYSKULL' in upper
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def is_mi300x_gpu_name(gpu_name: Optional[str]) -> bool:
|
|
63
|
+
"""Return True if the given GPU name refers to AMD MI300X."""
|
|
64
|
+
if not gpu_name:
|
|
65
|
+
return False
|
|
66
|
+
return 'MI300X' in str(gpu_name).upper()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_api_key(path: Optional[str] = None) -> str:
|
|
70
|
+
"""Get API key from config file or environment variable."""
|
|
71
|
+
# Step 1: Try to get from config file
|
|
72
|
+
if path is None:
|
|
73
|
+
path = os.path.expanduser('~/.seeweb_cloud/seeweb_keys')
|
|
74
|
+
else:
|
|
75
|
+
path = os.path.expanduser(path)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
parser = configparser.ConfigParser()
|
|
79
|
+
parser.read(path)
|
|
80
|
+
return parser['DEFAULT']['api_key'].strip()
|
|
81
|
+
except (KeyError, FileNotFoundError) as exc:
|
|
82
|
+
# Step 2: Try environment variable
|
|
83
|
+
api_key = os.environ.get('SEEWEB_API_KEY')
|
|
84
|
+
if api_key:
|
|
85
|
+
return api_key.strip()
|
|
86
|
+
|
|
87
|
+
# If neither found, raise error
|
|
88
|
+
raise ValueError(
|
|
89
|
+
f'API key not found in {path} or ENV variable SEEWEB_API_KEY'
|
|
90
|
+
) from exc
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def normalize_gpu_name(gpu_name: str) -> str:
|
|
94
|
+
"""Normalize GPU name from Seeweb API to SkyPilot canonical name."""
|
|
95
|
+
if not gpu_name:
|
|
96
|
+
return ''
|
|
97
|
+
|
|
98
|
+
# Map to canonical name if available
|
|
99
|
+
canonical_name = SEEWEB_GPU_NAME_TO_SKYPILOT_GPU_NAME.get(gpu_name)
|
|
100
|
+
if canonical_name:
|
|
101
|
+
return canonical_name
|
|
102
|
+
|
|
103
|
+
# If not found in mapping, return original name
|
|
104
|
+
print(f'Warning: GPU name "{gpu_name}" not found in mapping,'
|
|
105
|
+
f'using original name')
|
|
106
|
+
return gpu_name
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def parse_plan_info(plan: Any) -> Dict[str, Any]:
|
|
110
|
+
"""Parse plan information from Seeweb API response."""
|
|
111
|
+
# Handle both dictionary and object formats
|
|
112
|
+
if hasattr(plan, 'name'):
|
|
113
|
+
# Object format from API
|
|
114
|
+
plan_name = getattr(plan, 'name', 'unknown')
|
|
115
|
+
vcpus = int(getattr(plan, 'cpu', 0))
|
|
116
|
+
|
|
117
|
+
# Handle memory conversion safely
|
|
118
|
+
memory_mb = getattr(plan, 'ram', 0)
|
|
119
|
+
try:
|
|
120
|
+
memory_gb = int(
|
|
121
|
+
memory_mb) / 1024 if memory_mb else 0 # Convert to GB
|
|
122
|
+
except (ValueError, TypeError):
|
|
123
|
+
memory_gb = 0
|
|
124
|
+
|
|
125
|
+
# Handle price safely
|
|
126
|
+
try:
|
|
127
|
+
price = float(getattr(plan, 'hourly_price', 0.0))
|
|
128
|
+
except (ValueError, TypeError):
|
|
129
|
+
price = 0.0
|
|
130
|
+
|
|
131
|
+
# Handle GPU info
|
|
132
|
+
try:
|
|
133
|
+
gpu_count = int(getattr(plan, 'gpu', 0))
|
|
134
|
+
except (ValueError, TypeError):
|
|
135
|
+
gpu_count = 0
|
|
136
|
+
|
|
137
|
+
gpu_label = getattr(plan, 'gpu_label', None)
|
|
138
|
+
|
|
139
|
+
# Determine GPU name - use gpu_label if available,
|
|
140
|
+
# otherwise try to infer from plan name
|
|
141
|
+
if gpu_label:
|
|
142
|
+
gpu_name = normalize_gpu_name(gpu_label) # Normalize the GPU name
|
|
143
|
+
else:
|
|
144
|
+
# Try to extract GPU name from plan name
|
|
145
|
+
plan_name = getattr(plan, 'name', '')
|
|
146
|
+
if 'GPU' in plan_name:
|
|
147
|
+
# Extract GPU type from plan name (e.g., ECS1GPU11 -> GPU11)
|
|
148
|
+
parts = plan_name.split('GPU')
|
|
149
|
+
if len(parts) > 1:
|
|
150
|
+
gpu_name = 'GPU' + parts[1]
|
|
151
|
+
else:
|
|
152
|
+
gpu_name = 'GPU'
|
|
153
|
+
else:
|
|
154
|
+
gpu_name = None
|
|
155
|
+
|
|
156
|
+
# Get GPU VRAM from mapping using the normalized name
|
|
157
|
+
gpu_vram_mb = VRAM.get(gpu_name, 0) if gpu_name else 0
|
|
158
|
+
else:
|
|
159
|
+
raise ValueError(f'Unsupported plan format: {type(plan)}')
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
'plan_name': plan_name,
|
|
163
|
+
'vcpus': vcpus,
|
|
164
|
+
'memory_gb': memory_gb,
|
|
165
|
+
'gpu_name': gpu_name,
|
|
166
|
+
'gpu_count': gpu_count,
|
|
167
|
+
'gpu_vram_mb': gpu_vram_mb,
|
|
168
|
+
'price': price,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def get_gpu_info(gpu_count: int, gpu_name: str, gpu_vram_mb: int = 0) -> str:
|
|
173
|
+
"""Generate GPU info JSON string compatible with SkyPilot."""
|
|
174
|
+
if not gpu_name or gpu_count == 0:
|
|
175
|
+
return ''
|
|
176
|
+
|
|
177
|
+
# Determine manufacturer based on GPU name
|
|
178
|
+
gpu_name_upper = str(gpu_name).upper()
|
|
179
|
+
if 'MI300' in gpu_name_upper or gpu_name_upper == 'MI300X':
|
|
180
|
+
manufacturer = 'AMD'
|
|
181
|
+
elif 'GRAYSKULL' in gpu_name_upper:
|
|
182
|
+
manufacturer = 'TENSTORRENT'
|
|
183
|
+
else:
|
|
184
|
+
manufacturer = 'NVIDIA'
|
|
185
|
+
|
|
186
|
+
gpu_info = {
|
|
187
|
+
'Gpus': [{
|
|
188
|
+
'Name': gpu_name,
|
|
189
|
+
'Manufacturer': manufacturer,
|
|
190
|
+
'Count': float(gpu_count),
|
|
191
|
+
'MemoryInfo': {
|
|
192
|
+
'SizeInMiB': gpu_vram_mb
|
|
193
|
+
},
|
|
194
|
+
}],
|
|
195
|
+
'TotalGpuMemoryInMiB': gpu_vram_mb * gpu_count if gpu_vram_mb else 0
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return json.dumps(gpu_info).replace('"', '\'')
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def fetch_seeweb_data(api_key: str) -> List[Dict]:
|
|
202
|
+
"""Fetch data from Seeweb API."""
|
|
203
|
+
if ecsapi is None:
|
|
204
|
+
raise ImportError('ecsapi not available')
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
client = ecsapi.Api(token=api_key)
|
|
208
|
+
|
|
209
|
+
print('Fetching plans from Seeweb API...')
|
|
210
|
+
api_plans = client.fetch_plans()
|
|
211
|
+
|
|
212
|
+
if not api_plans:
|
|
213
|
+
raise ValueError('No plans returned from API')
|
|
214
|
+
|
|
215
|
+
print(f'Successfully fetched {len(api_plans)} plans from API')
|
|
216
|
+
plans = []
|
|
217
|
+
|
|
218
|
+
for plan in api_plans:
|
|
219
|
+
try:
|
|
220
|
+
# Parse first so we can filter
|
|
221
|
+
# Tenstorrent before extra API calls
|
|
222
|
+
parsed = parse_plan_info(plan)
|
|
223
|
+
|
|
224
|
+
if is_tenstorrent_gpu_name(parsed.get('gpu_name')):
|
|
225
|
+
print(f'Skipping Tenstorrent plan {plan.name}')
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
if is_mi300x_gpu_name(parsed.get('gpu_name')):
|
|
229
|
+
print(f'Skipping MI300X plan {plan.name}')
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
print(f'Fetching regions available for {plan.name}')
|
|
233
|
+
regions_available = client.fetch_regions_available(plan.name)
|
|
234
|
+
|
|
235
|
+
parsed.update({'regions_available': regions_available})
|
|
236
|
+
plans.append(parsed)
|
|
237
|
+
except Exception as e: # pylint: disable=broad-except
|
|
238
|
+
print(f'Error parsing plan {plan.name}: {e}')
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
print(f'Successfully parsed {len(plans)} plans')
|
|
242
|
+
return plans
|
|
243
|
+
|
|
244
|
+
except Exception as e: # pylint: disable=broad-except
|
|
245
|
+
raise Exception(f'Error fetching data from Seeweb API: {e}') from e
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def create_catalog(api_key: str, output_path: str) -> None:
|
|
249
|
+
"""Create Seeweb catalog by fetching data from API."""
|
|
250
|
+
plans = fetch_seeweb_data(api_key)
|
|
251
|
+
|
|
252
|
+
# Create CSV catalog
|
|
253
|
+
print(f'Writing catalog to {output_path}')
|
|
254
|
+
with open(output_path, mode='w', encoding='utf-8') as f:
|
|
255
|
+
writer = csv.writer(f, delimiter=',', quotechar='"')
|
|
256
|
+
writer.writerow([
|
|
257
|
+
'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
|
|
258
|
+
'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
|
|
259
|
+
])
|
|
260
|
+
|
|
261
|
+
for plan in plans:
|
|
262
|
+
try:
|
|
263
|
+
gpu_info_str = ''
|
|
264
|
+
if plan['gpu_name'] and plan['gpu_count'] > 0:
|
|
265
|
+
gpu_info_str = get_gpu_info(plan['gpu_count'],
|
|
266
|
+
plan['gpu_name'],
|
|
267
|
+
plan.get('gpu_vram_mb', 0))
|
|
268
|
+
|
|
269
|
+
# Handle regions - create a row for each available region
|
|
270
|
+
regions_available = plan['regions_available']
|
|
271
|
+
if isinstance(regions_available,
|
|
272
|
+
list) and len(regions_available) > 0:
|
|
273
|
+
# Create a row for each region
|
|
274
|
+
for region in regions_available:
|
|
275
|
+
writer.writerow([
|
|
276
|
+
plan['plan_name'], # InstanceType
|
|
277
|
+
plan['gpu_name'], # AcceleratorName (cleaned)
|
|
278
|
+
plan['gpu_count'] if plan['gpu_count'] > 0 else
|
|
279
|
+
'', # AcceleratorCount
|
|
280
|
+
plan['vcpus'], # vCPUs
|
|
281
|
+
plan['memory_gb'], # MemoryGiB
|
|
282
|
+
plan['price'], # Price
|
|
283
|
+
region, # Region (single region per row)
|
|
284
|
+
gpu_info_str, # GpuInfo
|
|
285
|
+
'' # SpotPrice (Seeweb doesn't support spot)
|
|
286
|
+
])
|
|
287
|
+
else:
|
|
288
|
+
# No regions available, create a row with empty region
|
|
289
|
+
writer.writerow([
|
|
290
|
+
plan['plan_name'], # InstanceType
|
|
291
|
+
plan['gpu_name'], # AcceleratorName (cleaned)
|
|
292
|
+
plan['gpu_count']
|
|
293
|
+
if plan['gpu_count'] > 0 else '', # AcceleratorCount
|
|
294
|
+
plan['vcpus'], # vCPUs
|
|
295
|
+
plan['memory_gb'], # MemoryGiB
|
|
296
|
+
plan['price'], # Price
|
|
297
|
+
'', # Region (empty)
|
|
298
|
+
gpu_info_str, # GpuInfo
|
|
299
|
+
'' # SpotPrice (Seeweb doesn't support spot)
|
|
300
|
+
])
|
|
301
|
+
except Exception as e: # pylint: disable=broad-except
|
|
302
|
+
print(f'Error processing plan {plan["plan_name"]}: {e}')
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
print(f'Seeweb catalog saved to {output_path}')
|
|
306
|
+
print(f'Created {len(plans)} instance types')
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def main() -> None:
|
|
310
|
+
"""Main function to fetch and write Seeweb platform prices to a CSV file."""
|
|
311
|
+
parser = argparse.ArgumentParser()
|
|
312
|
+
parser.add_argument('--api-key', help='Seeweb API key')
|
|
313
|
+
parser.add_argument('--api-key-path',
|
|
314
|
+
help='Path to file containing Seeweb API key')
|
|
315
|
+
args = parser.parse_args()
|
|
316
|
+
|
|
317
|
+
# Get API key
|
|
318
|
+
if args.api_key:
|
|
319
|
+
api_key = args.api_key
|
|
320
|
+
else:
|
|
321
|
+
api_key = get_api_key(args.api_key_path)
|
|
322
|
+
|
|
323
|
+
os.makedirs('seeweb', exist_ok=True)
|
|
324
|
+
create_catalog(api_key, 'seeweb/vms.csv')
|
|
325
|
+
print('Seeweb Service Catalog saved to seeweb/vms.csv')
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
if __name__ == '__main__':
|
|
329
|
+
main()
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""A script that generates the Shadeform catalog.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python fetch_shadeform.py [-h] [--api-key API_KEY]
|
|
5
|
+
[--api-key-path API_KEY_PATH]
|
|
6
|
+
|
|
7
|
+
If neither --api-key nor --api-key-path are provided, this script will parse
|
|
8
|
+
`~/.shadeform/api_key` to look for Shadeform API key.
|
|
9
|
+
"""
|
|
10
|
+
import argparse
|
|
11
|
+
import csv
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
from typing import Dict
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
ENDPOINT = 'https://api.shadeform.ai/v1/instances/types'
|
|
19
|
+
DEFAULT_SHADEFORM_API_KEY_PATH = os.path.expanduser('~/.shadeform/api_key')
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_gpu_info(gpu_type: str, num_gpus: int, ram_per_gpu: int) -> Dict:
|
|
23
|
+
"""Parse GPU information for the catalog."""
|
|
24
|
+
|
|
25
|
+
manufacturer = 'NVIDIA'
|
|
26
|
+
if gpu_type == 'MI300X':
|
|
27
|
+
manufacturer = 'AMD'
|
|
28
|
+
elif gpu_type == 'GAUDI2':
|
|
29
|
+
manufacturer = 'Intel'
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
'Gpus': [{
|
|
33
|
+
'Name': gpu_type,
|
|
34
|
+
'Manufacturer': manufacturer,
|
|
35
|
+
'Count': float(num_gpus),
|
|
36
|
+
'MemoryInfo': {
|
|
37
|
+
'SizeInMiB': ram_per_gpu
|
|
38
|
+
},
|
|
39
|
+
'TotalGpuMemoryInMiB': ram_per_gpu * num_gpus
|
|
40
|
+
}]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def create_catalog(api_key: str, output_path: str) -> None:
|
|
45
|
+
"""Create Shadeform catalog by fetching from API."""
|
|
46
|
+
headers = {'X-API-KEY': api_key}
|
|
47
|
+
|
|
48
|
+
params = {'available': 'true'}
|
|
49
|
+
|
|
50
|
+
response = requests.get(ENDPOINT,
|
|
51
|
+
headers=headers,
|
|
52
|
+
params=params,
|
|
53
|
+
timeout=30)
|
|
54
|
+
response.raise_for_status()
|
|
55
|
+
|
|
56
|
+
data = response.json()
|
|
57
|
+
instance_types = data.get('instance_types', [])
|
|
58
|
+
|
|
59
|
+
with open(output_path, mode='w', encoding='utf-8') as f:
|
|
60
|
+
writer = csv.writer(f, delimiter=',', quotechar='"')
|
|
61
|
+
writer.writerow([
|
|
62
|
+
'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
|
|
63
|
+
'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
for instance in instance_types:
|
|
67
|
+
config = instance['configuration']
|
|
68
|
+
|
|
69
|
+
cloud = instance['cloud']
|
|
70
|
+
shade_instance_type = instance['shade_instance_type']
|
|
71
|
+
instance_type = f'{cloud}_{shade_instance_type.replace("_", "-")}'
|
|
72
|
+
gpu_type = config['gpu_type'].replace('_', '-')
|
|
73
|
+
gpu_count = float(config['num_gpus'])
|
|
74
|
+
vcpus = float(config['vcpus'])
|
|
75
|
+
memory_gb = int(config['memory_in_gb'])
|
|
76
|
+
|
|
77
|
+
# Append "B" to instance_type and gpu_type if they end with "G"
|
|
78
|
+
if instance_type.endswith('G'):
|
|
79
|
+
instance_type += 'B'
|
|
80
|
+
if gpu_type.endswith('G'):
|
|
81
|
+
gpu_type += 'B'
|
|
82
|
+
|
|
83
|
+
# Replace "Gx" with "GBx" (case sensitive)
|
|
84
|
+
if 'Gx' in instance_type:
|
|
85
|
+
instance_type = instance_type.replace('Gx', 'GBx')
|
|
86
|
+
|
|
87
|
+
# Price is in cents per hour, convert to dollars
|
|
88
|
+
price = float(instance['hourly_price']) / 100
|
|
89
|
+
|
|
90
|
+
# Create GPU info
|
|
91
|
+
gpuinfo = None
|
|
92
|
+
if gpu_count > 0:
|
|
93
|
+
gpuinfo_dict = parse_gpu_info(gpu_type, int(gpu_count),
|
|
94
|
+
int(config['vram_per_gpu_in_gb']))
|
|
95
|
+
gpuinfo = json.dumps(gpuinfo_dict).replace('"', '\'')
|
|
96
|
+
|
|
97
|
+
# Write entry for each available region
|
|
98
|
+
for availability in instance.get('availability', []):
|
|
99
|
+
if availability['available'] and gpu_count > 0:
|
|
100
|
+
region = availability['region']
|
|
101
|
+
writer.writerow([
|
|
102
|
+
instance_type,
|
|
103
|
+
gpu_type,
|
|
104
|
+
gpu_count,
|
|
105
|
+
vcpus,
|
|
106
|
+
memory_gb,
|
|
107
|
+
price,
|
|
108
|
+
region,
|
|
109
|
+
gpuinfo,
|
|
110
|
+
'' # No spot pricing info available
|
|
111
|
+
])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_api_key(cmdline_args: argparse.Namespace) -> str:
|
|
115
|
+
"""Get Shadeform API key from cmdline or default path."""
|
|
116
|
+
api_key = cmdline_args.api_key
|
|
117
|
+
if api_key is None:
|
|
118
|
+
if cmdline_args.api_key_path is not None:
|
|
119
|
+
with open(cmdline_args.api_key_path, mode='r',
|
|
120
|
+
encoding='utf-8') as f:
|
|
121
|
+
api_key = f.read().strip()
|
|
122
|
+
else:
|
|
123
|
+
# Read from ~/.shadeform/api_key
|
|
124
|
+
with open(DEFAULT_SHADEFORM_API_KEY_PATH,
|
|
125
|
+
mode='r',
|
|
126
|
+
encoding='utf-8') as f:
|
|
127
|
+
api_key = f.read().strip()
|
|
128
|
+
assert api_key is not None, (
|
|
129
|
+
f'API key not found. Please provide via --api-key or place in '
|
|
130
|
+
f'{DEFAULT_SHADEFORM_API_KEY_PATH}')
|
|
131
|
+
return api_key
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == '__main__':
|
|
135
|
+
parser = argparse.ArgumentParser()
|
|
136
|
+
parser.add_argument('--api-key', help='Shadeform API key.')
|
|
137
|
+
parser.add_argument('--api-key-path',
|
|
138
|
+
help='path of file containing Shadeform API key.')
|
|
139
|
+
args = parser.parse_args()
|
|
140
|
+
os.makedirs('shadeform', exist_ok=True)
|
|
141
|
+
create_catalog(get_api_key(args), 'shadeform/vms.csv')
|
|
142
|
+
print('Shadeform catalog saved to shadeform/vms.csv')
|
|
@@ -6,7 +6,7 @@ import typing
|
|
|
6
6
|
|
|
7
7
|
from sky.adaptors import common as adaptors_common
|
|
8
8
|
from sky.adaptors import vsphere as vsphere_adaptor
|
|
9
|
-
from sky.
|
|
9
|
+
from sky.catalog.common import get_catalog_path
|
|
10
10
|
from sky.provision.vsphere.common.cls_api_client import ClsApiClient
|
|
11
11
|
|
|
12
12
|
if typing.TYPE_CHECKING:
|
|
@@ -7,7 +7,7 @@ query instance types and pricing information for digital ocean.
|
|
|
7
7
|
import typing
|
|
8
8
|
from typing import Dict, List, Optional, Tuple, Union
|
|
9
9
|
|
|
10
|
-
from sky.
|
|
10
|
+
from sky.catalog import common
|
|
11
11
|
from sky.utils import ux_utils
|
|
12
12
|
|
|
13
13
|
if typing.TYPE_CHECKING:
|
|
@@ -52,11 +52,14 @@ def get_default_instance_type(
|
|
|
52
52
|
cpus: Optional[str] = None,
|
|
53
53
|
memory: Optional[str] = None,
|
|
54
54
|
disk_tier: Optional[str] = None,
|
|
55
|
+
region: Optional[str] = None,
|
|
56
|
+
zone: Optional[str] = None,
|
|
55
57
|
) -> Optional[str]:
|
|
56
58
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
57
59
|
# want to specify a default instance type or family.
|
|
58
60
|
del disk_tier # unused
|
|
59
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
61
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
62
|
+
zone)
|
|
60
63
|
|
|
61
64
|
|
|
62
65
|
def get_accelerators_from_instance_type(
|
|
@@ -6,7 +6,7 @@ instance types and pricing information for FluidStack.
|
|
|
6
6
|
import typing
|
|
7
7
|
from typing import Dict, List, Optional, Tuple, Union
|
|
8
8
|
|
|
9
|
-
from sky.
|
|
9
|
+
from sky.catalog import common
|
|
10
10
|
from sky.utils import ux_utils
|
|
11
11
|
|
|
12
12
|
if typing.TYPE_CHECKING:
|
|
@@ -52,7 +52,9 @@ def get_vcpus_mem_from_instance_type(
|
|
|
52
52
|
|
|
53
53
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
54
54
|
memory: Optional[str] = None,
|
|
55
|
-
disk_tier: Optional[str] = None
|
|
55
|
+
disk_tier: Optional[str] = None,
|
|
56
|
+
region: Optional[str] = None,
|
|
57
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
56
58
|
del disk_tier # unused
|
|
57
59
|
if cpus is None and memory is None:
|
|
58
60
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
@@ -61,7 +63,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
61
63
|
else:
|
|
62
64
|
memory_gb_or_ratio = memory
|
|
63
65
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
64
|
-
memory_gb_or_ratio
|
|
66
|
+
memory_gb_or_ratio,
|
|
67
|
+
region, zone)
|
|
65
68
|
|
|
66
69
|
|
|
67
70
|
def get_accelerators_from_instance_type(
|
|
@@ -9,8 +9,8 @@ from typing import Dict, List, Optional, Tuple
|
|
|
9
9
|
from sky import exceptions
|
|
10
10
|
from sky import sky_logging
|
|
11
11
|
from sky.adaptors import common as adaptors_common
|
|
12
|
+
from sky.catalog import common
|
|
12
13
|
from sky.clouds import GCP
|
|
13
|
-
from sky.clouds.service_catalog import common
|
|
14
14
|
from sky.utils import resources_utils
|
|
15
15
|
from sky.utils import ux_utils
|
|
16
16
|
|
|
@@ -37,20 +37,37 @@ _image_df = common.read_catalog('gcp/images.csv',
|
|
|
37
37
|
_quotas_df = common.read_catalog('gcp/accelerator_quota_mapping.csv',
|
|
38
38
|
pull_frequency_hours=_PULL_FREQUENCY_HOURS)
|
|
39
39
|
|
|
40
|
-
# We will select from the following
|
|
40
|
+
# We will select from the following six CPU instance families:
|
|
41
41
|
_DEFAULT_INSTANCE_FAMILY = [
|
|
42
|
-
# This is
|
|
43
|
-
# CPU: Intel Ice Lake
|
|
42
|
+
# This is a widely used general-purpose instance family as of July 2025.
|
|
43
|
+
# CPU: Primarily Intel Ice Lake (3rd Gen Intel Xeon Scalable Processors)
|
|
44
|
+
# or Cascade Lake (2nd Gen Intel Xeon Scalable Processors).
|
|
44
45
|
# Memory: 4 GiB RAM per 1 vCPU;
|
|
45
46
|
'n2-standard',
|
|
46
|
-
# This is
|
|
47
|
-
# CPU: Intel Ice Lake
|
|
47
|
+
# This is a memory-optimized instance family as of July 2025.
|
|
48
|
+
# CPU: Primarily Intel Ice Lake (3rd Gen Intel Xeon Scalable Processors)
|
|
49
|
+
# or Cascade Lake (2nd Gen Intel Xeon Scalable Processors).
|
|
48
50
|
# Memory: 8 GiB RAM per 1 vCPU;
|
|
49
51
|
'n2-highmem',
|
|
50
|
-
# This is
|
|
51
|
-
# CPU: Intel Ice Lake
|
|
52
|
+
# This is a compute-optimized instance family as of July 2025.
|
|
53
|
+
# CPU: Primarily Intel Ice Lake (3rd Gen Intel Xeon Scalable Processors)
|
|
54
|
+
# or Cascade Lake (2nd Gen Intel Xeon Scalable Processors).
|
|
52
55
|
# Memory: 1 GiB RAM per 1 vCPU;
|
|
53
56
|
'n2-highcpu',
|
|
57
|
+
# This is the latest general-purpose instance family as of July 2025.
|
|
58
|
+
# CPU: Intel 5th Gen Xeon Scalable processor (Emerald Rapids).
|
|
59
|
+
# Memory: 4 GiB RAM per 1 vCPU;
|
|
60
|
+
'n4-standard',
|
|
61
|
+
# This is the latest general-purpose instance family
|
|
62
|
+
# with a higher vCPU to memory ratio as of July 2025.
|
|
63
|
+
# CPU: Intel 5th Gen Xeon Scalable processor (Emerald Rapids).
|
|
64
|
+
# Memory: 2 GiB RAM per 1 vCPU;
|
|
65
|
+
'n4-highcpu',
|
|
66
|
+
# This is the latest general-purpose instance family
|
|
67
|
+
# with a lower vCPU to memory ratio as of July 2025.
|
|
68
|
+
# CPU: Intel 5th Gen Xeon Scalable processor (Emerald Rapids).
|
|
69
|
+
# Memory: 8 GiB RAM per 1 vCPU;
|
|
70
|
+
'n4-highmem',
|
|
54
71
|
]
|
|
55
72
|
# n2 is not allowed for launching GPUs for now.
|
|
56
73
|
_DEFAULT_HOST_VM_FAMILY = (
|
|
@@ -104,7 +121,13 @@ _ACC_INSTANCE_TYPE_DICTS = {
|
|
|
104
121
|
},
|
|
105
122
|
'H100-MEGA': {
|
|
106
123
|
8: ['a3-megagpu-8g'],
|
|
107
|
-
}
|
|
124
|
+
},
|
|
125
|
+
'H200': {
|
|
126
|
+
8: ['a3-ultragpu-8g'],
|
|
127
|
+
},
|
|
128
|
+
'B200': {
|
|
129
|
+
8: ['a4-highgpu-8g'],
|
|
130
|
+
},
|
|
108
131
|
}
|
|
109
132
|
# Enable GPU type inference from instance types
|
|
110
133
|
_INSTANCE_TYPE_TO_ACC = {
|
|
@@ -119,7 +142,7 @@ GCP_ACC_INSTANCE_TYPES = list(_INSTANCE_TYPE_TO_ACC.keys())
|
|
|
119
142
|
|
|
120
143
|
# Number of CPU cores per GPU based on the AWS setting.
|
|
121
144
|
# GCP A100 has its own instance type mapping.
|
|
122
|
-
# Refer to sky/clouds/
|
|
145
|
+
# Refer to sky/clouds/catalog/gcp_catalog.py
|
|
123
146
|
_NUM_ACC_TO_NUM_CPU = {
|
|
124
147
|
# Based on p2 on AWS.
|
|
125
148
|
'K80': {
|
|
@@ -256,10 +279,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
256
279
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
257
280
|
|
|
258
281
|
|
|
259
|
-
def get_default_instance_type(
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
282
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
283
|
+
memory: Optional[str] = None,
|
|
284
|
+
disk_tier: Optional[
|
|
285
|
+
resources_utils.DiskTier] = None,
|
|
286
|
+
region: Optional[str] = None,
|
|
287
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
263
288
|
if cpus is None and memory is None:
|
|
264
289
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
265
290
|
if memory is None:
|
|
@@ -277,7 +302,8 @@ def get_default_instance_type(
|
|
|
277
302
|
|
|
278
303
|
df = df.loc[df['InstanceType'].apply(_filter_disk_type)]
|
|
279
304
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
280
|
-
memory_gb_or_ratio
|
|
305
|
+
memory_gb_or_ratio,
|
|
306
|
+
region, zone)
|
|
281
307
|
|
|
282
308
|
|
|
283
309
|
def get_accelerators_from_instance_type(
|