skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +10 -2
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +588 -184
- sky/backends/cloud_vm_ray_backend.py +1088 -904
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -0
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +73 -43
- sky/client/cli/command.py +675 -412
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +132 -63
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +6 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +40 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +33 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +174 -165
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +162 -29
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +37 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +97 -26
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +382 -418
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +30 -9
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +5 -3
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +381 -145
- sky/server/requests/payloads.py +71 -18
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +507 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +85 -20
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +420 -172
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +62 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +111 -26
- sky/skylet/events.py +64 -10
- sky/skylet/job_lib.py +141 -104
- sky/skylet/log_lib.py +233 -5
- sky/skylet/log_lib.pyi +40 -2
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +22 -1
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +196 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/permission.py +60 -43
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +22 -0
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +88 -27
- sky/utils/command_runner.pyi +36 -3
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +37 -4
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +107 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/clouds/lambda_cloud.py
CHANGED
|
@@ -59,7 +59,9 @@ class Lambda(clouds.Cloud):
|
|
|
59
59
|
|
|
60
60
|
@classmethod
|
|
61
61
|
def _unsupported_features_for_resources(
|
|
62
|
-
cls,
|
|
62
|
+
cls,
|
|
63
|
+
resources: 'resources_lib.Resources',
|
|
64
|
+
region: Optional[str] = None,
|
|
63
65
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
64
66
|
del resources # unused
|
|
65
67
|
return cls._CLOUD_UNSUPPORTED_FEATURES
|
|
@@ -69,10 +71,15 @@ class Lambda(clouds.Cloud):
|
|
|
69
71
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
70
72
|
|
|
71
73
|
@classmethod
|
|
72
|
-
def regions_with_offering(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
def regions_with_offering(
|
|
75
|
+
cls,
|
|
76
|
+
instance_type: str,
|
|
77
|
+
accelerators: Optional[Dict[str, int]],
|
|
78
|
+
use_spot: bool,
|
|
79
|
+
region: Optional[str],
|
|
80
|
+
zone: Optional[str],
|
|
81
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
82
|
+
) -> List[clouds.Region]:
|
|
76
83
|
assert zone is None, 'Lambda does not support zones.'
|
|
77
84
|
del accelerators, zone # unused
|
|
78
85
|
if use_spot:
|
sky/clouds/nebius.py
CHANGED
|
@@ -78,7 +78,9 @@ class Nebius(clouds.Cloud):
|
|
|
78
78
|
|
|
79
79
|
@classmethod
|
|
80
80
|
def _unsupported_features_for_resources(
|
|
81
|
-
cls,
|
|
81
|
+
cls,
|
|
82
|
+
resources: 'resources_lib.Resources',
|
|
83
|
+
region: Optional[str] = None,
|
|
82
84
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
83
85
|
unsupported = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
|
|
84
86
|
|
|
@@ -101,10 +103,15 @@ class Nebius(clouds.Cloud):
|
|
|
101
103
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
102
104
|
|
|
103
105
|
@classmethod
|
|
104
|
-
def regions_with_offering(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
def regions_with_offering(
|
|
107
|
+
cls,
|
|
108
|
+
instance_type: str,
|
|
109
|
+
accelerators: Optional[Dict[str, int]],
|
|
110
|
+
use_spot: bool,
|
|
111
|
+
region: Optional[str],
|
|
112
|
+
zone: Optional[str],
|
|
113
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
114
|
+
) -> List[clouds.Region]:
|
|
108
115
|
assert zone is None, 'Nebius does not support zones.'
|
|
109
116
|
del accelerators, zone # unused
|
|
110
117
|
regions = catalog.get_region_zones_for_instance_type(
|
|
@@ -245,9 +252,12 @@ class Nebius(clouds.Cloud):
|
|
|
245
252
|
'filesystem_mount_tag': f'filesystem-skypilot-{i+1}'
|
|
246
253
|
})
|
|
247
254
|
|
|
255
|
+
use_static_ip_address = skypilot_config.get_nested(
|
|
256
|
+
('nebius', 'use_static_ip_address'), default_value=False)
|
|
248
257
|
resources_vars: Dict[str, Any] = {
|
|
249
258
|
'instance_type': resources.instance_type,
|
|
250
259
|
'custom_resources': custom_resources,
|
|
260
|
+
'use_static_ip_address': use_static_ip_address,
|
|
251
261
|
'region': region.name,
|
|
252
262
|
'image_id': image_family,
|
|
253
263
|
# Nebius does not support specific zones.
|
|
@@ -364,10 +374,10 @@ class Nebius(clouds.Cloud):
|
|
|
364
374
|
f'{_INDENT_PREFIX} $ nebius --format json iam whoami|jq -r \'.user_profile.tenants[0].tenant_id\' > {nebius.tenant_id_path()} \n') # pylint: disable=line-too-long
|
|
365
375
|
if not nebius.is_token_or_cred_file_exist():
|
|
366
376
|
return False, f'{token_cred_msg}'
|
|
367
|
-
sdk = nebius.sdk()
|
|
368
377
|
tenant_id = nebius.get_tenant_id()
|
|
369
378
|
if tenant_id is None:
|
|
370
379
|
return False, f'{tenant_msg}'
|
|
380
|
+
sdk = nebius.sdk()
|
|
371
381
|
try:
|
|
372
382
|
service = nebius.iam().ProjectServiceClient(sdk)
|
|
373
383
|
service.list(
|
|
@@ -445,9 +455,13 @@ class Nebius(clouds.Cloud):
|
|
|
445
455
|
del workspace_config # Unused
|
|
446
456
|
sdk = nebius.sdk()
|
|
447
457
|
profile_client = nebius.iam().ProfileServiceClient(sdk)
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
458
|
+
try:
|
|
459
|
+
profile = nebius.sync_call(
|
|
460
|
+
profile_client.get(nebius.iam().GetProfileRequest(),
|
|
461
|
+
timeout=nebius.READ_TIMEOUT))
|
|
462
|
+
except Exception as e:
|
|
463
|
+
raise exceptions.CloudUserIdentityError(
|
|
464
|
+
f'Error getting Nebius profile: {e}')
|
|
451
465
|
if profile.user_profile is not None:
|
|
452
466
|
if profile.user_profile.attributes is None:
|
|
453
467
|
raise exceptions.CloudUserIdentityError(
|
sky/clouds/oci.py
CHANGED
|
@@ -28,6 +28,7 @@ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
|
28
28
|
from sky import catalog
|
|
29
29
|
from sky import clouds
|
|
30
30
|
from sky import exceptions
|
|
31
|
+
from sky.adaptors import common
|
|
31
32
|
from sky.adaptors import oci as oci_adaptor
|
|
32
33
|
from sky.clouds.utils import oci_utils
|
|
33
34
|
from sky.provision.oci.query_utils import query_helper
|
|
@@ -68,7 +69,9 @@ class OCI(clouds.Cloud):
|
|
|
68
69
|
|
|
69
70
|
@classmethod
|
|
70
71
|
def _unsupported_features_for_resources(
|
|
71
|
-
cls,
|
|
72
|
+
cls,
|
|
73
|
+
resources: 'resources_lib.Resources',
|
|
74
|
+
region: Optional[str] = None,
|
|
72
75
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
73
76
|
unsupported_features = {
|
|
74
77
|
clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
|
|
@@ -95,10 +98,15 @@ class OCI(clouds.Cloud):
|
|
|
95
98
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
96
99
|
|
|
97
100
|
@classmethod
|
|
98
|
-
def regions_with_offering(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
101
|
+
def regions_with_offering(
|
|
102
|
+
cls,
|
|
103
|
+
instance_type: str,
|
|
104
|
+
accelerators: Optional[Dict[str, int]],
|
|
105
|
+
use_spot: bool,
|
|
106
|
+
region: Optional[str],
|
|
107
|
+
zone: Optional[str],
|
|
108
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
109
|
+
) -> List[clouds.Region]:
|
|
102
110
|
del accelerators # unused
|
|
103
111
|
|
|
104
112
|
regions = catalog.get_region_zones_for_instance_type(
|
|
@@ -454,13 +462,12 @@ class OCI(clouds.Cloud):
|
|
|
454
462
|
f'{cls._INDENT_PREFIX} region=us-sanjose-1\n'
|
|
455
463
|
f'{cls._INDENT_PREFIX} key_file=~/.oci/oci_api_key.pem')
|
|
456
464
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
f'{cls._INDENT_PREFIX}{short_credential_help_str}')
|
|
465
|
+
dependency_error_msg = (
|
|
466
|
+
'`oci` is not installed. Install it with: '
|
|
467
|
+
'pip install oci\n'
|
|
468
|
+
f'{cls._INDENT_PREFIX}{short_credential_help_str}')
|
|
469
|
+
if not common.can_import_modules(['oci']):
|
|
470
|
+
return False, dependency_error_msg
|
|
464
471
|
|
|
465
472
|
conf_file = oci_adaptor.get_config_file()
|
|
466
473
|
|
sky/clouds/paperspace.py
CHANGED
|
@@ -60,7 +60,9 @@ class Paperspace(clouds.Cloud):
|
|
|
60
60
|
|
|
61
61
|
@classmethod
|
|
62
62
|
def _unsupported_features_for_resources(
|
|
63
|
-
cls,
|
|
63
|
+
cls,
|
|
64
|
+
resources: 'resources_lib.Resources',
|
|
65
|
+
region: Optional[str] = None,
|
|
64
66
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
65
67
|
"""The features not supported based on the resources provided.
|
|
66
68
|
|
|
@@ -86,6 +88,7 @@ class Paperspace(clouds.Cloud):
|
|
|
86
88
|
use_spot: bool,
|
|
87
89
|
region: Optional[str],
|
|
88
90
|
zone: Optional[str],
|
|
91
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
89
92
|
) -> List[clouds.Region]:
|
|
90
93
|
assert zone is None, 'Paperspace does not support zones.'
|
|
91
94
|
del accelerators, zone # unused
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
""" Prime Intellect Cloud. """
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import typing
|
|
5
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
6
|
+
|
|
7
|
+
from sky import catalog
|
|
8
|
+
from sky import clouds
|
|
9
|
+
from sky.provision.primeintellect import utils
|
|
10
|
+
from sky.utils import registry
|
|
11
|
+
from sky.utils import resources_utils
|
|
12
|
+
|
|
13
|
+
if typing.TYPE_CHECKING:
|
|
14
|
+
from sky import resources as resources_lib
|
|
15
|
+
from sky.utils import volume as volume_lib
|
|
16
|
+
|
|
17
|
+
CredentialCheckResult = Tuple[bool, Optional[Union[str, Dict[str, str]]]]
|
|
18
|
+
|
|
19
|
+
_CREDENTIAL_FILES = [
|
|
20
|
+
'config.json',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@registry.CLOUD_REGISTRY.register
|
|
25
|
+
class PrimeIntellect(clouds.Cloud):
|
|
26
|
+
"""Prime Intellect GPU Cloud"""
|
|
27
|
+
_REPR = 'PrimeIntellect'
|
|
28
|
+
_CLOUD_UNSUPPORTED_FEATURES = {
|
|
29
|
+
clouds.CloudImplementationFeatures.AUTOSTOP: 'Stopping not supported.',
|
|
30
|
+
clouds.CloudImplementationFeatures.AUTODOWN:
|
|
31
|
+
('Auto down not supported yet.'),
|
|
32
|
+
clouds.CloudImplementationFeatures.STOP: 'Stopping not supported.',
|
|
33
|
+
clouds.CloudImplementationFeatures.MULTI_NODE:
|
|
34
|
+
('Multi-node not supported yet.'),
|
|
35
|
+
clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
|
|
36
|
+
('Custom disk tier not supported yet.'),
|
|
37
|
+
clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
|
|
38
|
+
('Custom network tier not supported yet.'),
|
|
39
|
+
clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
|
|
40
|
+
('Customized multiple network interfaces are not supported'),
|
|
41
|
+
clouds.CloudImplementationFeatures.IMAGE_ID:
|
|
42
|
+
('Custom image not supported yet.'),
|
|
43
|
+
clouds.CloudImplementationFeatures.DOCKER_IMAGE:
|
|
44
|
+
('Custom docker image not supported yet.'),
|
|
45
|
+
}
|
|
46
|
+
PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
|
|
47
|
+
STATUS_VERSION = clouds.StatusVersion.SKYPILOT
|
|
48
|
+
_MAX_CLUSTER_NAME_LEN_LIMIT = 120
|
|
49
|
+
_regions: List[clouds.Region] = []
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def _cloud_unsupported_features(
|
|
53
|
+
cls) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
54
|
+
return cls._CLOUD_UNSUPPORTED_FEATURES
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def _max_cluster_name_length(cls) -> Optional[int]:
|
|
58
|
+
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def regions_with_offering(
|
|
62
|
+
cls,
|
|
63
|
+
instance_type: str,
|
|
64
|
+
accelerators: Optional[Dict[str, int]],
|
|
65
|
+
use_spot: bool,
|
|
66
|
+
region: Optional[str],
|
|
67
|
+
zone: Optional[str],
|
|
68
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
69
|
+
) -> List[clouds.Region]:
|
|
70
|
+
"""Returns the regions that offer the specified resources."""
|
|
71
|
+
del accelerators
|
|
72
|
+
regions = catalog.get_region_zones_for_instance_type(
|
|
73
|
+
instance_type, use_spot, 'primeintellect')
|
|
74
|
+
|
|
75
|
+
if region is not None:
|
|
76
|
+
regions = [r for r in regions if r.name == region]
|
|
77
|
+
if zone is not None:
|
|
78
|
+
for r in regions:
|
|
79
|
+
assert r.zones is not None, r
|
|
80
|
+
r.set_zones([z for z in r.zones if z.name == zone])
|
|
81
|
+
regions = [r for r in regions if r.zones]
|
|
82
|
+
return regions
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def get_vcpus_mem_from_instance_type(
|
|
86
|
+
cls,
|
|
87
|
+
instance_type: str,
|
|
88
|
+
) -> Tuple[Optional[float], Optional[float]]:
|
|
89
|
+
"""Returns the #vCPUs and memory that the instance type offers."""
|
|
90
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
|
91
|
+
clouds='primeintellect')
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def zones_provision_loop(
|
|
95
|
+
cls,
|
|
96
|
+
*,
|
|
97
|
+
region: str,
|
|
98
|
+
num_nodes: int,
|
|
99
|
+
instance_type: str,
|
|
100
|
+
accelerators: Optional[Dict[str, int]] = None,
|
|
101
|
+
use_spot: bool = False,
|
|
102
|
+
) -> Iterator[Optional[List['clouds.Zone']]]:
|
|
103
|
+
"""Returns an iterator over zones for provisioning."""
|
|
104
|
+
regions = cls.regions_with_offering(instance_type,
|
|
105
|
+
accelerators,
|
|
106
|
+
use_spot,
|
|
107
|
+
region=region,
|
|
108
|
+
zone=None)
|
|
109
|
+
for r in regions:
|
|
110
|
+
assert r.zones is not None, r
|
|
111
|
+
yield r.zones
|
|
112
|
+
|
|
113
|
+
def instance_type_to_hourly_cost(self,
|
|
114
|
+
instance_type: str,
|
|
115
|
+
use_spot: bool,
|
|
116
|
+
region: Optional[str] = None,
|
|
117
|
+
zone: Optional[str] = None) -> float:
|
|
118
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
|
119
|
+
return catalog.get_hourly_cost(instance_type,
|
|
120
|
+
use_spot=use_spot,
|
|
121
|
+
region=region,
|
|
122
|
+
zone=zone,
|
|
123
|
+
clouds='primeintellect')
|
|
124
|
+
|
|
125
|
+
def accelerators_to_hourly_cost(self,
|
|
126
|
+
accelerators: Dict[str, int],
|
|
127
|
+
use_spot: bool,
|
|
128
|
+
region: Optional[str] = None,
|
|
129
|
+
zone: Optional[str] = None) -> float:
|
|
130
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
|
131
|
+
del accelerators, use_spot, region, zone # Unused.
|
|
132
|
+
return 0.0
|
|
133
|
+
|
|
134
|
+
def get_egress_cost(self, num_gigabytes: float) -> float:
|
|
135
|
+
return 0.0
|
|
136
|
+
|
|
137
|
+
def is_same_cloud(self, other: clouds.Cloud) -> bool:
|
|
138
|
+
return isinstance(other, PrimeIntellect)
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def get_default_instance_type(cls,
|
|
142
|
+
cpus: Optional[str] = None,
|
|
143
|
+
memory: Optional[str] = None,
|
|
144
|
+
disk_tier: Optional[
|
|
145
|
+
resources_utils.DiskTier] = None,
|
|
146
|
+
region: Optional[str] = None,
|
|
147
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
148
|
+
"""Returns the default instance type for Prime Intellect."""
|
|
149
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
|
150
|
+
memory=memory,
|
|
151
|
+
disk_tier=disk_tier,
|
|
152
|
+
region=region,
|
|
153
|
+
zone=zone,
|
|
154
|
+
clouds='primeintellect')
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def get_accelerators_from_instance_type(
|
|
158
|
+
cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
|
159
|
+
return catalog.get_accelerators_from_instance_type(
|
|
160
|
+
instance_type, clouds='primeintellect')
|
|
161
|
+
|
|
162
|
+
@classmethod
|
|
163
|
+
def get_zone_shell_cmd(cls) -> Optional[str]:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
def make_deploy_resources_variables(
|
|
167
|
+
self,
|
|
168
|
+
resources: 'resources_lib.Resources',
|
|
169
|
+
cluster_name: resources_utils.ClusterName,
|
|
170
|
+
region: 'clouds.Region',
|
|
171
|
+
zones: Optional[List['clouds.Zone']],
|
|
172
|
+
num_nodes: int,
|
|
173
|
+
dryrun: bool = False,
|
|
174
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None
|
|
175
|
+
) -> Dict[str, Optional[str]]:
|
|
176
|
+
del dryrun, cluster_name, num_nodes, volume_mounts
|
|
177
|
+
assert zones is not None, (region, zones)
|
|
178
|
+
|
|
179
|
+
resources = resources.assert_launchable()
|
|
180
|
+
acc_dict = self.get_accelerators_from_instance_type(
|
|
181
|
+
resources.instance_type)
|
|
182
|
+
if acc_dict is not None:
|
|
183
|
+
custom_resources = json.dumps(acc_dict, separators=(',', ':'))
|
|
184
|
+
else:
|
|
185
|
+
custom_resources = None
|
|
186
|
+
|
|
187
|
+
return {
|
|
188
|
+
'instance_type': resources.instance_type,
|
|
189
|
+
'custom_resources': custom_resources,
|
|
190
|
+
'region': region.name,
|
|
191
|
+
'zones': zones[0].name,
|
|
192
|
+
'availability_zone': zones[0].name,
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
def _get_feasible_launchable_resources(
|
|
196
|
+
self, resources: 'resources_lib.Resources'
|
|
197
|
+
) -> 'resources_utils.FeasibleResources':
|
|
198
|
+
"""Returns a list of feasible resources for the given resources."""
|
|
199
|
+
if resources.instance_type is not None:
|
|
200
|
+
assert resources.is_launchable(), resources
|
|
201
|
+
resources = resources.copy(accelerators=None)
|
|
202
|
+
return resources_utils.FeasibleResources([resources], [], None)
|
|
203
|
+
|
|
204
|
+
def _make(instance_list):
|
|
205
|
+
resource_list = []
|
|
206
|
+
for instance_type in instance_list:
|
|
207
|
+
r = resources.copy(
|
|
208
|
+
cloud=PrimeIntellect(),
|
|
209
|
+
instance_type=instance_type,
|
|
210
|
+
accelerators=None,
|
|
211
|
+
cpus=None,
|
|
212
|
+
)
|
|
213
|
+
resource_list.append(r)
|
|
214
|
+
return resource_list
|
|
215
|
+
|
|
216
|
+
# Currently, handle a filter on accelerators only.
|
|
217
|
+
accelerators = resources.accelerators
|
|
218
|
+
if accelerators is None:
|
|
219
|
+
default_instance_type = PrimeIntellect.get_default_instance_type(
|
|
220
|
+
cpus=resources.cpus,
|
|
221
|
+
memory=resources.memory,
|
|
222
|
+
disk_tier=resources.disk_tier)
|
|
223
|
+
if default_instance_type is None:
|
|
224
|
+
# TODO(pokgak): Add hints to all return values in this method
|
|
225
|
+
# to help users understand why the resources are not
|
|
226
|
+
# launchable.
|
|
227
|
+
return resources_utils.FeasibleResources([], [], None)
|
|
228
|
+
else:
|
|
229
|
+
return resources_utils.FeasibleResources(
|
|
230
|
+
_make([default_instance_type]), [], None)
|
|
231
|
+
|
|
232
|
+
assert len(accelerators) == 1, resources
|
|
233
|
+
acc, acc_count = list(accelerators.items())[0]
|
|
234
|
+
(instance_list,
|
|
235
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
|
236
|
+
acc,
|
|
237
|
+
acc_count,
|
|
238
|
+
use_spot=resources.use_spot,
|
|
239
|
+
cpus=resources.cpus,
|
|
240
|
+
region=resources.region,
|
|
241
|
+
zone=resources.zone,
|
|
242
|
+
clouds='primeintellect')
|
|
243
|
+
if instance_list is None:
|
|
244
|
+
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
|
245
|
+
None)
|
|
246
|
+
return resources_utils.FeasibleResources(_make(instance_list),
|
|
247
|
+
fuzzy_candidate_list, None)
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
|
251
|
+
"""Verify that the user has valid credentials for Prime Intellect."""
|
|
252
|
+
|
|
253
|
+
primeintellect_config_file = '~/.prime/config.json'
|
|
254
|
+
if not os.path.isfile(os.path.expanduser(primeintellect_config_file)):
|
|
255
|
+
return (False, f'{primeintellect_config_file} does not exist.')
|
|
256
|
+
|
|
257
|
+
with open(os.path.expanduser(primeintellect_config_file),
|
|
258
|
+
encoding='UTF-8') as f:
|
|
259
|
+
data = json.load(f)
|
|
260
|
+
api_key = data.get('api_key')
|
|
261
|
+
if not api_key:
|
|
262
|
+
print('API key is missing or empty')
|
|
263
|
+
|
|
264
|
+
client = utils.PrimeIntellectAPIClient()
|
|
265
|
+
try:
|
|
266
|
+
client.list_instances()
|
|
267
|
+
except utils.PrimeintellectAPIError as e:
|
|
268
|
+
if e.status_code == 403:
|
|
269
|
+
return False, (
|
|
270
|
+
'Please check that your API key has the correct '
|
|
271
|
+
'permissions, generate a new one at '
|
|
272
|
+
'https://app.primeintellect.ai/dashboard/tokens, '
|
|
273
|
+
'or run \'prime login\' to configure a new API key.')
|
|
274
|
+
return True, None
|
|
275
|
+
|
|
276
|
+
@classmethod
|
|
277
|
+
def _check_compute_credentials(cls) -> CredentialCheckResult:
|
|
278
|
+
"""Checks if the user has access credentials to Prime Intellect's
|
|
279
|
+
compute service."""
|
|
280
|
+
return cls._check_credentials()
|
|
281
|
+
|
|
282
|
+
def get_credential_file_mounts(self) -> Dict[str, str]:
|
|
283
|
+
"""Returns a dict of credential file paths to mount paths."""
|
|
284
|
+
return {
|
|
285
|
+
f'~/.prime/{filename}': f'~/.prime/{filename}'
|
|
286
|
+
for filename in _CREDENTIAL_FILES
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
@classmethod
|
|
290
|
+
def get_current_user_identity(cls) -> Optional[List[str]]:
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
def instance_type_exists(self, instance_type: str) -> bool:
|
|
294
|
+
return catalog.instance_type_exists(instance_type, 'primeintellect')
|
|
295
|
+
|
|
296
|
+
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
|
297
|
+
return catalog.validate_region_zone(region,
|
|
298
|
+
zone,
|
|
299
|
+
clouds='primeintellect')
|
|
300
|
+
|
|
301
|
+
@classmethod
|
|
302
|
+
def _unsupported_features_for_resources(
|
|
303
|
+
cls,
|
|
304
|
+
resources: 'resources_lib.Resources',
|
|
305
|
+
region: Optional[str] = None,
|
|
306
|
+
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
307
|
+
"""The features not supported based on the resources provided.
|
|
308
|
+
|
|
309
|
+
This method is used by check_features_are_supported() to check if the
|
|
310
|
+
cloud implementation supports all the requested features.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
A dict of {feature: reason} for the features not supported by the
|
|
314
|
+
cloud implementation.
|
|
315
|
+
"""
|
|
316
|
+
del resources # unused
|
|
317
|
+
return cls._CLOUD_UNSUPPORTED_FEATURES
|
sky/clouds/runpod.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
""" RunPod Cloud. """
|
|
2
2
|
|
|
3
|
+
from importlib import util as import_lib_util
|
|
4
|
+
import os
|
|
3
5
|
import typing
|
|
4
6
|
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
5
7
|
|
|
@@ -12,9 +14,7 @@ if typing.TYPE_CHECKING:
|
|
|
12
14
|
from sky import resources as resources_lib
|
|
13
15
|
from sky.utils import volume as volume_lib
|
|
14
16
|
|
|
15
|
-
|
|
16
|
-
'config.toml',
|
|
17
|
-
]
|
|
17
|
+
_CREDENTIAL_FILE = 'config.toml'
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
@registry.CLOUD_REGISTRY.register
|
|
@@ -53,7 +53,9 @@ class RunPod(clouds.Cloud):
|
|
|
53
53
|
|
|
54
54
|
@classmethod
|
|
55
55
|
def _unsupported_features_for_resources(
|
|
56
|
-
cls,
|
|
56
|
+
cls,
|
|
57
|
+
resources: 'resources_lib.Resources',
|
|
58
|
+
region: Optional[str] = None,
|
|
57
59
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
58
60
|
"""The features not supported based on the resources provided.
|
|
59
61
|
|
|
@@ -72,10 +74,15 @@ class RunPod(clouds.Cloud):
|
|
|
72
74
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
73
75
|
|
|
74
76
|
@classmethod
|
|
75
|
-
def regions_with_offering(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
77
|
+
def regions_with_offering(
|
|
78
|
+
cls,
|
|
79
|
+
instance_type: str,
|
|
80
|
+
accelerators: Optional[Dict[str, int]],
|
|
81
|
+
use_spot: bool,
|
|
82
|
+
region: Optional[str],
|
|
83
|
+
zone: Optional[str],
|
|
84
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
85
|
+
) -> List[clouds.Region]:
|
|
79
86
|
del accelerators # unused
|
|
80
87
|
regions = catalog.get_region_zones_for_instance_type(
|
|
81
88
|
instance_type, use_spot, 'runpod')
|
|
@@ -193,7 +200,7 @@ class RunPod(clouds.Cloud):
|
|
|
193
200
|
acc_dict)
|
|
194
201
|
|
|
195
202
|
if resources.image_id is None:
|
|
196
|
-
image_id: Optional[str] = 'runpod/base:
|
|
203
|
+
image_id: Optional[str] = 'runpod/base:1.0.2-ubuntu2204'
|
|
197
204
|
elif resources.extract_docker_image() is not None:
|
|
198
205
|
image_id = resources.extract_docker_image()
|
|
199
206
|
else:
|
|
@@ -285,30 +292,84 @@ class RunPod(clouds.Cloud):
|
|
|
285
292
|
|
|
286
293
|
@classmethod
|
|
287
294
|
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
|
288
|
-
"""
|
|
295
|
+
"""Verify that the user has valid credentials for RunPod. """
|
|
296
|
+
dependency_error_msg = ('Failed to import runpod or TOML parser. '
|
|
297
|
+
'Install: pip install "skypilot[runpod]".')
|
|
298
|
+
try:
|
|
299
|
+
runpod_spec = import_lib_util.find_spec('runpod')
|
|
300
|
+
if runpod_spec is None:
|
|
301
|
+
return False, dependency_error_msg
|
|
302
|
+
# Prefer stdlib tomllib (Python 3.11+); fallback to tomli
|
|
303
|
+
tomllib_spec = import_lib_util.find_spec('tomllib')
|
|
304
|
+
tomli_spec = import_lib_util.find_spec('tomli')
|
|
305
|
+
if tomllib_spec is None and tomli_spec is None:
|
|
306
|
+
return False, dependency_error_msg
|
|
307
|
+
except ValueError:
|
|
308
|
+
# docstring of importlib_util.find_spec:
|
|
309
|
+
# First, sys.modules is checked to see if the module was alread
|
|
310
|
+
# imported.
|
|
311
|
+
# If so, then sys.modules[name].__spec__ is returned.
|
|
312
|
+
# If that happens to be set to None, then ValueError is raised.
|
|
313
|
+
return False, dependency_error_msg
|
|
314
|
+
|
|
315
|
+
valid, error = cls._check_runpod_credentials()
|
|
316
|
+
if not valid:
|
|
317
|
+
return False, (
|
|
318
|
+
f'{error} \n' # First line is indented by 4 spaces
|
|
319
|
+
' Credentials can be set up by running: \n'
|
|
320
|
+
f' $ pip install runpod \n'
|
|
321
|
+
f' $ runpod config\n'
|
|
322
|
+
' For more information, see https://docs.skypilot.co/en/latest/getting-started/installation.html#runpod' # pylint: disable=line-too-long
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
return True, None
|
|
326
|
+
|
|
327
|
+
@classmethod
|
|
328
|
+
def _check_runpod_credentials(cls, profile: str = 'default'):
|
|
329
|
+
"""Checks if the credentials file exists and is valid."""
|
|
330
|
+
credential_file = os.path.expanduser(f'~/.runpod/{_CREDENTIAL_FILE}')
|
|
331
|
+
if not os.path.exists(credential_file):
|
|
332
|
+
return False, '~/.runpod/config.toml does not exist.'
|
|
333
|
+
|
|
334
|
+
# We don't need to import TOML parser if config.toml does not exist.
|
|
335
|
+
# When needed, prefer stdlib tomllib (py>=3.11); otherwise use tomli.
|
|
336
|
+
# TODO(andy): remove this fallback after dropping Python 3.10 support.
|
|
289
337
|
try:
|
|
290
|
-
|
|
291
|
-
|
|
338
|
+
try:
|
|
339
|
+
import tomllib as toml # pylint: disable=import-outside-toplevel
|
|
340
|
+
except ModuleNotFoundError: # py<3.11
|
|
341
|
+
import tomli as toml # pylint: disable=import-outside-toplevel
|
|
342
|
+
except ModuleNotFoundError:
|
|
343
|
+
# Should never happen. We already installed proper dependencies for
|
|
344
|
+
# different Python versions in setup_files/dependencies.py.
|
|
345
|
+
return False, (
|
|
346
|
+
'~/.runpod/config.toml exists but no TOML parser is available. '
|
|
347
|
+
'Install tomli for Python < 3.11: pip install tomli.')
|
|
348
|
+
|
|
349
|
+
# Check for default api_key
|
|
350
|
+
try:
|
|
351
|
+
with open(credential_file, 'rb') as cred_file:
|
|
352
|
+
config = toml.load(cred_file)
|
|
292
353
|
|
|
293
|
-
if not
|
|
354
|
+
if profile not in config:
|
|
294
355
|
return False, (
|
|
295
|
-
f'
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
356
|
+
f'~/.runpod/config.toml is missing {profile} profile.')
|
|
357
|
+
|
|
358
|
+
if 'api_key' not in config[profile]:
|
|
359
|
+
return (
|
|
360
|
+
False,
|
|
361
|
+
'~/.runpod/config.toml is missing '
|
|
362
|
+
f'api_key for {profile} profile.',
|
|
300
363
|
)
|
|
301
364
|
|
|
302
|
-
|
|
365
|
+
except (TypeError, ValueError):
|
|
366
|
+
return False, '~/.runpod/config.toml is not a valid TOML file.'
|
|
303
367
|
|
|
304
|
-
|
|
305
|
-
return False, ('Failed to import runpod. '
|
|
306
|
-
'To install, run: pip install skypilot[runpod]')
|
|
368
|
+
return True, None
|
|
307
369
|
|
|
308
370
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
|
309
371
|
return {
|
|
310
|
-
f'~/.runpod/{
|
|
311
|
-
for filename in _CREDENTIAL_FILES
|
|
372
|
+
f'~/.runpod/{_CREDENTIAL_FILE}': f'~/.runpod/{_CREDENTIAL_FILE}'
|
|
312
373
|
}
|
|
313
374
|
|
|
314
375
|
@classmethod
|