skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/clouds/nebius.py
CHANGED
|
@@ -1,24 +1,22 @@
|
|
|
1
1
|
""" Nebius Cloud. """
|
|
2
|
+
import json
|
|
2
3
|
import os
|
|
3
4
|
import typing
|
|
4
|
-
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
5
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
5
6
|
|
|
7
|
+
from sky import catalog
|
|
6
8
|
from sky import clouds
|
|
9
|
+
from sky import exceptions
|
|
10
|
+
from sky import skypilot_config
|
|
7
11
|
from sky.adaptors import nebius
|
|
8
|
-
from sky.
|
|
12
|
+
from sky.provision.nebius import constants as nebius_constants
|
|
9
13
|
from sky.utils import annotations
|
|
10
14
|
from sky.utils import registry
|
|
11
15
|
from sky.utils import resources_utils
|
|
12
16
|
|
|
13
17
|
if typing.TYPE_CHECKING:
|
|
14
18
|
from sky import resources as resources_lib
|
|
15
|
-
|
|
16
|
-
_CREDENTIAL_FILES = [
|
|
17
|
-
# credential files for Nebius
|
|
18
|
-
nebius.NEBIUS_TENANT_ID_FILENAME,
|
|
19
|
-
nebius.NEBIUS_IAM_TOKEN_FILENAME,
|
|
20
|
-
nebius.NEBIUS_CREDENTIALS_FILENAME
|
|
21
|
-
]
|
|
19
|
+
from sky.utils import volume as volume_lib
|
|
22
20
|
|
|
23
21
|
_INDENT_PREFIX = ' '
|
|
24
22
|
|
|
@@ -54,14 +52,18 @@ class Nebius(clouds.Cloud):
|
|
|
54
52
|
_CLOUD_UNSUPPORTED_FEATURES = {
|
|
55
53
|
clouds.CloudImplementationFeatures.AUTODOWN:
|
|
56
54
|
('Autodown not supported. Can\'t delete OS disk.'),
|
|
57
|
-
clouds.CloudImplementationFeatures.SPOT_INSTANCE:
|
|
58
|
-
('Spot is not supported, as Nebius API does not implement spot.'),
|
|
59
55
|
clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
|
|
60
56
|
(f'Migrating disk is currently not supported on {_REPR}.'),
|
|
61
57
|
clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
|
|
62
58
|
(f'Custom disk tier is currently not supported on {_REPR}.'),
|
|
59
|
+
clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
|
|
60
|
+
('Custom network tier is currently only supported for '
|
|
61
|
+
'H100:8 and H200:8 on Nebius.'),
|
|
63
62
|
clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
|
|
64
63
|
('High availability controllers are not supported on Nebius.'),
|
|
64
|
+
clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
|
|
65
|
+
('Customized multiple network interfaces are not supported on '
|
|
66
|
+
f'{_REPR}.'),
|
|
65
67
|
}
|
|
66
68
|
# Nebius maximum instance name length defined as <= 63 as a hostname length
|
|
67
69
|
# 63 - 8 - 5 = 50 characters since
|
|
@@ -76,25 +78,43 @@ class Nebius(clouds.Cloud):
|
|
|
76
78
|
|
|
77
79
|
@classmethod
|
|
78
80
|
def _unsupported_features_for_resources(
|
|
79
|
-
cls,
|
|
81
|
+
cls,
|
|
82
|
+
resources: 'resources_lib.Resources',
|
|
83
|
+
region: Optional[str] = None,
|
|
80
84
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
81
|
-
|
|
82
|
-
|
|
85
|
+
unsupported = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
|
|
86
|
+
|
|
87
|
+
# Check if the accelerators support InfiniBand (H100 or H200) and 8 GPUs
|
|
88
|
+
if resources.accelerators is not None:
|
|
89
|
+
for acc_name, acc_count in resources.accelerators.items():
|
|
90
|
+
if acc_name.lower() in ('h100', 'h200') and acc_count == 8:
|
|
91
|
+
# Remove CUSTOM_NETWORK_TIER from unsupported features for
|
|
92
|
+
# InfiniBand-capable accelerators. Refer to:
|
|
93
|
+
# https://docs.nebius.com/compute/clusters/gpu#fabrics
|
|
94
|
+
unsupported.pop(
|
|
95
|
+
clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER,
|
|
96
|
+
None)
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
return unsupported
|
|
83
100
|
|
|
84
101
|
@classmethod
|
|
85
102
|
def _max_cluster_name_length(cls) -> Optional[int]:
|
|
86
103
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
87
104
|
|
|
88
105
|
@classmethod
|
|
89
|
-
def regions_with_offering(
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
106
|
+
def regions_with_offering(
|
|
107
|
+
cls,
|
|
108
|
+
instance_type: str,
|
|
109
|
+
accelerators: Optional[Dict[str, int]],
|
|
110
|
+
use_spot: bool,
|
|
111
|
+
region: Optional[str],
|
|
112
|
+
zone: Optional[str],
|
|
113
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
114
|
+
) -> List[clouds.Region]:
|
|
93
115
|
assert zone is None, 'Nebius does not support zones.'
|
|
94
116
|
del accelerators, zone # unused
|
|
95
|
-
|
|
96
|
-
return []
|
|
97
|
-
regions = service_catalog.get_region_zones_for_instance_type(
|
|
117
|
+
regions = catalog.get_region_zones_for_instance_type(
|
|
98
118
|
instance_type, use_spot, 'nebius')
|
|
99
119
|
|
|
100
120
|
if region is not None:
|
|
@@ -106,8 +126,8 @@ class Nebius(clouds.Cloud):
|
|
|
106
126
|
cls,
|
|
107
127
|
instance_type: str,
|
|
108
128
|
) -> Tuple[Optional[float], Optional[float]]:
|
|
109
|
-
return
|
|
110
|
-
|
|
129
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
|
130
|
+
clouds='nebius')
|
|
111
131
|
|
|
112
132
|
@classmethod
|
|
113
133
|
def zones_provision_loop(
|
|
@@ -134,11 +154,11 @@ class Nebius(clouds.Cloud):
|
|
|
134
154
|
use_spot: bool,
|
|
135
155
|
region: Optional[str] = None,
|
|
136
156
|
zone: Optional[str] = None) -> float:
|
|
137
|
-
return
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
157
|
+
return catalog.get_hourly_cost(instance_type,
|
|
158
|
+
use_spot=use_spot,
|
|
159
|
+
region=region,
|
|
160
|
+
zone=zone,
|
|
161
|
+
clouds='nebius')
|
|
142
162
|
|
|
143
163
|
def accelerators_to_hourly_cost(self,
|
|
144
164
|
accelerators: Dict[str, int],
|
|
@@ -160,69 +180,124 @@ class Nebius(clouds.Cloud):
|
|
|
160
180
|
return isinstance(other, Nebius)
|
|
161
181
|
|
|
162
182
|
@classmethod
|
|
163
|
-
def get_default_instance_type(
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
183
|
+
def get_default_instance_type(cls,
|
|
184
|
+
cpus: Optional[str] = None,
|
|
185
|
+
memory: Optional[str] = None,
|
|
186
|
+
disk_tier: Optional[
|
|
187
|
+
resources_utils.DiskTier] = None,
|
|
188
|
+
region: Optional[str] = None,
|
|
189
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
169
190
|
"""Returns the default instance type for Nebius."""
|
|
170
|
-
return
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
191
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
|
192
|
+
memory=memory,
|
|
193
|
+
disk_tier=disk_tier,
|
|
194
|
+
region=region,
|
|
195
|
+
zone=zone,
|
|
196
|
+
clouds='nebius')
|
|
174
197
|
|
|
175
198
|
@classmethod
|
|
176
199
|
def get_accelerators_from_instance_type(
|
|
177
200
|
cls,
|
|
178
201
|
instance_type: str,
|
|
179
202
|
) -> Optional[Dict[str, Union[int, float]]]:
|
|
180
|
-
return
|
|
181
|
-
|
|
203
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
|
204
|
+
clouds='nebius')
|
|
182
205
|
|
|
183
206
|
@classmethod
|
|
184
207
|
def get_zone_shell_cmd(cls) -> Optional[str]:
|
|
185
208
|
return None
|
|
186
209
|
|
|
187
210
|
def make_deploy_resources_variables(
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
211
|
+
self,
|
|
212
|
+
resources: 'resources_lib.Resources',
|
|
213
|
+
cluster_name: resources_utils.ClusterName,
|
|
214
|
+
region: 'clouds.Region',
|
|
215
|
+
zones: Optional[List['clouds.Zone']],
|
|
216
|
+
num_nodes: int,
|
|
217
|
+
dryrun: bool = False,
|
|
218
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
|
219
|
+
) -> Dict[str, Any]:
|
|
195
220
|
del dryrun, cluster_name
|
|
196
221
|
assert zones is None, ('Nebius does not support zones', zones)
|
|
197
222
|
|
|
198
|
-
|
|
199
|
-
acc_dict = self.get_accelerators_from_instance_type(
|
|
223
|
+
resources = resources.assert_launchable()
|
|
224
|
+
acc_dict = self.get_accelerators_from_instance_type(
|
|
225
|
+
resources.instance_type)
|
|
200
226
|
custom_resources = resources_utils.make_ray_custom_resources_str(
|
|
201
227
|
acc_dict)
|
|
202
228
|
platform, _ = resources.instance_type.split('_')
|
|
203
229
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
image_family = '
|
|
230
|
+
# Selecting image_family by platform
|
|
231
|
+
# https://docs.nebius.com/compute/storage/boot-disk-images
|
|
232
|
+
if platform.startswith('cpu'):
|
|
233
|
+
image_family = 'ubuntu24.04-driverless'
|
|
234
|
+
elif platform.startswith('gpu'):
|
|
235
|
+
image_family = 'ubuntu24.04-cuda12'
|
|
208
236
|
else:
|
|
209
237
|
raise RuntimeError('Unsupported instance type for Nebius cloud:'
|
|
210
238
|
f' {resources.instance_type}')
|
|
211
239
|
|
|
212
|
-
|
|
240
|
+
config_fs = skypilot_config.get_effective_region_config(
|
|
241
|
+
cloud='nebius',
|
|
242
|
+
region=region.name,
|
|
243
|
+
keys=('filesystems',),
|
|
244
|
+
default_value=[])
|
|
245
|
+
resources_vars_fs = []
|
|
246
|
+
for i, fs in enumerate(config_fs):
|
|
247
|
+
resources_vars_fs.append({
|
|
248
|
+
'filesystem_id': fs['filesystem_id'],
|
|
249
|
+
'filesystem_attach_mode': fs.get('attach_mode', 'READ_WRITE'),
|
|
250
|
+
'filesystem_mount_path': fs.get(
|
|
251
|
+
'mount_path', f'/mnt/filesystem-skypilot-{i+1}'),
|
|
252
|
+
'filesystem_mount_tag': f'filesystem-skypilot-{i+1}'
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
use_static_ip_address = skypilot_config.get_nested(
|
|
256
|
+
('nebius', 'use_static_ip_address'), default_value=False)
|
|
257
|
+
resources_vars: Dict[str, Any] = {
|
|
213
258
|
'instance_type': resources.instance_type,
|
|
214
259
|
'custom_resources': custom_resources,
|
|
260
|
+
'use_static_ip_address': use_static_ip_address,
|
|
215
261
|
'region': region.name,
|
|
216
262
|
'image_id': image_family,
|
|
217
263
|
# Nebius does not support specific zones.
|
|
218
264
|
'zones': None,
|
|
265
|
+
'use_spot': resources.use_spot,
|
|
266
|
+
'filesystems': resources_vars_fs,
|
|
267
|
+
'network_tier': resources.network_tier
|
|
219
268
|
}
|
|
220
269
|
|
|
270
|
+
docker_run_options = []
|
|
271
|
+
|
|
221
272
|
if acc_dict is not None:
|
|
222
273
|
# Nebius cloud's docker runtime information does not contain
|
|
223
274
|
# 'nvidia-container-runtime', causing no GPU option to be added to
|
|
224
275
|
# the docker run command. We patch this by adding it here.
|
|
225
|
-
|
|
276
|
+
docker_run_options.append('--gpus all')
|
|
277
|
+
|
|
278
|
+
# Check for InfiniBand support with network_tier: best
|
|
279
|
+
is_infiniband_capable = (
|
|
280
|
+
platform in nebius_constants.INFINIBAND_INSTANCE_PLATFORMS)
|
|
281
|
+
if (is_infiniband_capable and
|
|
282
|
+
resources.network_tier == resources_utils.NetworkTier.BEST):
|
|
283
|
+
# For Docker containers, add InfiniBand device access and
|
|
284
|
+
# IPC_LOCK capability
|
|
285
|
+
if resources.extract_docker_image() is not None:
|
|
286
|
+
docker_run_options.extend(
|
|
287
|
+
nebius_constants.INFINIBAND_DOCKER_OPTIONS)
|
|
288
|
+
|
|
289
|
+
# Add InfiniBand environment variables to docker run options
|
|
290
|
+
for env_var, env_value in (
|
|
291
|
+
nebius_constants.INFINIBAND_ENV_VARS.items()):
|
|
292
|
+
docker_run_options.extend(
|
|
293
|
+
['-e', f'{env_var}={env_value}'])
|
|
294
|
+
|
|
295
|
+
# For all InfiniBand-capable instances, add env variables
|
|
296
|
+
resources_vars[
|
|
297
|
+
'env_vars'] = nebius_constants.INFINIBAND_ENV_VARS
|
|
298
|
+
|
|
299
|
+
if docker_run_options:
|
|
300
|
+
resources_vars['docker_run_options'] = docker_run_options
|
|
226
301
|
|
|
227
302
|
return resources_vars
|
|
228
303
|
|
|
@@ -254,7 +329,9 @@ class Nebius(clouds.Cloud):
|
|
|
254
329
|
default_instance_type = Nebius.get_default_instance_type(
|
|
255
330
|
cpus=resources.cpus,
|
|
256
331
|
memory=resources.memory,
|
|
257
|
-
disk_tier=resources.disk_tier
|
|
332
|
+
disk_tier=resources.disk_tier,
|
|
333
|
+
region=resources.region,
|
|
334
|
+
zone=resources.zone)
|
|
258
335
|
if default_instance_type is None:
|
|
259
336
|
# TODO: Add hints to all return values in this method to help
|
|
260
337
|
# users understand why the resources are not launchable.
|
|
@@ -265,15 +342,16 @@ class Nebius(clouds.Cloud):
|
|
|
265
342
|
|
|
266
343
|
assert len(accelerators) == 1, resources
|
|
267
344
|
acc, acc_count = list(accelerators.items())[0]
|
|
268
|
-
(instance_list,
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
345
|
+
(instance_list,
|
|
346
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
|
347
|
+
acc,
|
|
348
|
+
acc_count,
|
|
349
|
+
use_spot=resources.use_spot,
|
|
350
|
+
cpus=resources.cpus,
|
|
351
|
+
memory=resources.memory,
|
|
352
|
+
region=resources.region,
|
|
353
|
+
zone=resources.zone,
|
|
354
|
+
clouds='nebius')
|
|
277
355
|
if instance_list is None:
|
|
278
356
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
|
279
357
|
None)
|
|
@@ -281,25 +359,25 @@ class Nebius(clouds.Cloud):
|
|
|
281
359
|
fuzzy_candidate_list, None)
|
|
282
360
|
|
|
283
361
|
@classmethod
|
|
284
|
-
|
|
285
|
-
|
|
362
|
+
def _check_compute_credentials(
|
|
363
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
|
286
364
|
"""Checks if the user has access credentials to
|
|
287
365
|
Nebius's compute service."""
|
|
288
366
|
token_cred_msg = (
|
|
289
367
|
f'{_INDENT_PREFIX}Credentials can be set up by running: \n'
|
|
290
|
-
f'{_INDENT_PREFIX} $ nebius iam get-access-token > {nebius.
|
|
291
|
-
f'{_INDENT_PREFIX} or generate
|
|
368
|
+
f'{_INDENT_PREFIX} $ nebius iam get-access-token > {nebius.iam_token_path()} \n' # pylint: disable=line-too-long
|
|
369
|
+
f'{_INDENT_PREFIX} or generate {nebius.credentials_path()} \n')
|
|
292
370
|
|
|
293
|
-
tenant_msg = (f'{_INDENT_PREFIX} Copy your
|
|
294
|
-
f'{_INDENT_PREFIX} $ echo $NEBIUS_TENANT_ID_PATH > {nebius.
|
|
371
|
+
tenant_msg = (f'{_INDENT_PREFIX} Copy your tenant ID from the web console and save it to file \n' # pylint: disable=line-too-long
|
|
372
|
+
f'{_INDENT_PREFIX} $ echo $NEBIUS_TENANT_ID_PATH > {nebius.tenant_id_path()} \n' # pylint: disable=line-too-long
|
|
295
373
|
f'{_INDENT_PREFIX} Or if you have 1 tenant you can run:\n' # pylint: disable=line-too-long
|
|
296
|
-
f'{_INDENT_PREFIX} $ nebius --format json iam whoami|jq -r \'.user_profile.tenants[0].tenant_id\' > {nebius.
|
|
374
|
+
f'{_INDENT_PREFIX} $ nebius --format json iam whoami|jq -r \'.user_profile.tenants[0].tenant_id\' > {nebius.tenant_id_path()} \n') # pylint: disable=line-too-long
|
|
297
375
|
if not nebius.is_token_or_cred_file_exist():
|
|
298
376
|
return False, f'{token_cred_msg}'
|
|
299
|
-
sdk = nebius.sdk()
|
|
300
377
|
tenant_id = nebius.get_tenant_id()
|
|
301
378
|
if tenant_id is None:
|
|
302
379
|
return False, f'{tenant_msg}'
|
|
380
|
+
sdk = nebius.sdk()
|
|
303
381
|
try:
|
|
304
382
|
service = nebius.iam().ProjectServiceClient(sdk)
|
|
305
383
|
service.list(
|
|
@@ -313,7 +391,8 @@ class Nebius(clouds.Cloud):
|
|
|
313
391
|
|
|
314
392
|
@classmethod
|
|
315
393
|
@annotations.lru_cache(scope='request')
|
|
316
|
-
def _check_storage_credentials(
|
|
394
|
+
def _check_storage_credentials(
|
|
395
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
|
317
396
|
"""Checks if the user has access credentials to Nebius Object Storage.
|
|
318
397
|
|
|
319
398
|
Returns:
|
|
@@ -340,8 +419,8 @@ class Nebius(clouds.Cloud):
|
|
|
340
419
|
|
|
341
420
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
|
342
421
|
credential_file_mounts = {
|
|
343
|
-
|
|
344
|
-
for
|
|
422
|
+
filepath: filepath
|
|
423
|
+
for filepath in nebius.get_credential_file_paths()
|
|
345
424
|
}
|
|
346
425
|
if nebius_profile_in_aws_cred_and_config():
|
|
347
426
|
credential_file_mounts['~/.aws/credentials'] = '~/.aws/credentials'
|
|
@@ -355,9 +434,56 @@ class Nebius(clouds.Cloud):
|
|
|
355
434
|
return None
|
|
356
435
|
|
|
357
436
|
def instance_type_exists(self, instance_type: str) -> bool:
|
|
358
|
-
return
|
|
437
|
+
return catalog.instance_type_exists(instance_type, 'nebius')
|
|
359
438
|
|
|
360
439
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
|
361
|
-
return
|
|
362
|
-
|
|
363
|
-
|
|
440
|
+
return catalog.validate_region_zone(region, zone, clouds='nebius')
|
|
441
|
+
|
|
442
|
+
@classmethod
|
|
443
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
|
444
|
+
"""Returns the email address + project id of the active user."""
|
|
445
|
+
nebius_workspace_config = json.dumps(
|
|
446
|
+
skypilot_config.get_workspace_cloud('nebius'), sort_keys=True)
|
|
447
|
+
return cls._get_user_identities(nebius_workspace_config)
|
|
448
|
+
|
|
449
|
+
@classmethod
|
|
450
|
+
@annotations.lru_cache(scope='request', maxsize=5)
|
|
451
|
+
def _get_user_identities(
|
|
452
|
+
cls, workspace_config: Optional[str]) -> Optional[List[List[str]]]:
|
|
453
|
+
# We add workspace_config in args to avoid caching the identity for when
|
|
454
|
+
# different workspace configs are used.
|
|
455
|
+
del workspace_config # Unused
|
|
456
|
+
sdk = nebius.sdk()
|
|
457
|
+
profile_client = nebius.iam().ProfileServiceClient(sdk)
|
|
458
|
+
profile = nebius.sync_call(
|
|
459
|
+
profile_client.get(nebius.iam().GetProfileRequest(),
|
|
460
|
+
timeout=nebius.READ_TIMEOUT))
|
|
461
|
+
if profile.user_profile is not None:
|
|
462
|
+
if profile.user_profile.attributes is None:
|
|
463
|
+
raise exceptions.CloudUserIdentityError(
|
|
464
|
+
'Nebius profile is a UserProfile, but has no attributes: '
|
|
465
|
+
f'{profile.user_profile}')
|
|
466
|
+
if profile.user_profile.attributes.email is None:
|
|
467
|
+
raise exceptions.CloudUserIdentityError(
|
|
468
|
+
'Nebius profile is a UserProfile, but has no email: '
|
|
469
|
+
f'{profile.user_profile}')
|
|
470
|
+
return [[profile.user_profile.attributes.email]]
|
|
471
|
+
if profile.service_account_profile is not None:
|
|
472
|
+
if profile.service_account_profile.info is None:
|
|
473
|
+
raise exceptions.CloudUserIdentityError(
|
|
474
|
+
'Nebius profile is a ServiceAccountProfile, but has no '
|
|
475
|
+
f'info: {profile.service_account_profile}')
|
|
476
|
+
if profile.service_account_profile.info.metadata is None:
|
|
477
|
+
raise exceptions.CloudUserIdentityError(
|
|
478
|
+
'Nebius profile is a ServiceAccountProfile, but has no '
|
|
479
|
+
f'metadata: {profile.service_account_profile}')
|
|
480
|
+
if profile.service_account_profile.info.metadata.name is None:
|
|
481
|
+
raise exceptions.CloudUserIdentityError(
|
|
482
|
+
'Nebius profile is a ServiceAccountProfile, but has no '
|
|
483
|
+
f'name: {profile.service_account_profile}')
|
|
484
|
+
return [[profile.service_account_profile.info.metadata.name]]
|
|
485
|
+
if profile.anonymous_profile is not None:
|
|
486
|
+
return None
|
|
487
|
+
unknown_profile_type = profile.which_field_in_oneof('profile')
|
|
488
|
+
raise exceptions.CloudUserIdentityError(
|
|
489
|
+
f'Nebius profile is of an unknown type - {unknown_profile_type}')
|