skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +12 -2
- sky/adaptors/aws.py +27 -22
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/adaptors/slurm.py +478 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +630 -185
- sky/backends/cloud_vm_ray_backend.py +1111 -928
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +971 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -3
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +36 -32
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/slurm_catalog.py +243 -0
- sky/check.py +87 -46
- sky/client/cli/command.py +1004 -434
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +188 -65
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +8 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +47 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/slurm.py +578 -0
- sky/clouds/ssh.py +6 -3
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +43 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +296 -195
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
- sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -0
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +177 -30
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/models.py +2 -0
- sky/optimizer.py +7 -6
- sky/provision/__init__.py +38 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +22 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +112 -28
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +422 -422
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +45 -15
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/slurm/__init__.py +12 -0
- sky/provision/slurm/config.py +13 -0
- sky/provision/slurm/instance.py +572 -0
- sky/provision/slurm/utils.py +583 -0
- sky/provision/vast/instance.py +9 -4
- sky/provision/vast/utils.py +10 -6
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/plugins.py +222 -0
- sky/server/requests/executor.py +384 -145
- sky/server/requests/payloads.py +83 -19
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +123 -0
- sky/server/requests/requests.py +511 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +102 -20
- sky/server/requests/serializers/return_value_serializers.py +60 -0
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +497 -179
- sky/server/server_utils.py +30 -0
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +64 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +116 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +139 -29
- sky/skylet/events.py +74 -14
- sky/skylet/executor/__init__.py +1 -0
- sky/skylet/executor/slurm.py +189 -0
- sky/skylet/job_lib.py +143 -105
- sky/skylet/log_lib.py +252 -8
- sky/skylet/log_lib.pyi +47 -7
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +27 -2
- sky/skylet/subprocess_daemon.py +104 -28
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/constants.py +12 -0
- sky/ssh_node_pools/core.py +40 -3
- sky/ssh_node_pools/deploy/__init__.py +4 -0
- sky/ssh_node_pools/deploy/deploy.py +952 -0
- sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
- sky/ssh_node_pools/deploy/utils.py +173 -0
- sky/ssh_node_pools/server.py +20 -21
- sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +204 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/slurm-ray.yml.j2 +85 -0
- sky/templates/vast-ray.yml.j2 +2 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/model.conf +1 -1
- sky/users/permission.py +84 -44
- sky/users/rbac.py +31 -3
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +128 -6
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +283 -30
- sky/utils/command_runner.pyi +63 -7
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +55 -7
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +7 -376
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +138 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
- skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
- skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
- sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/utils/resources_utils.py
CHANGED
|
@@ -181,57 +181,81 @@ def simplify_ports(ports: List[str]) -> List[str]:
|
|
|
181
181
|
|
|
182
182
|
|
|
183
183
|
def format_resource(resource: 'resources_lib.Resources',
|
|
184
|
-
|
|
184
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
185
185
|
resource = resource.assert_launchable()
|
|
186
|
-
|
|
187
|
-
|
|
186
|
+
is_k8s = str(resource.cloud).lower() == 'kubernetes'
|
|
187
|
+
if resource.accelerators is None or is_k8s or not simplified_only:
|
|
188
|
+
vcpu, mem = resource.cloud.get_vcpus_mem_from_instance_type(
|
|
189
|
+
resource.instance_type)
|
|
188
190
|
|
|
189
|
-
|
|
191
|
+
elements_simple = []
|
|
192
|
+
elements_full = []
|
|
190
193
|
|
|
191
194
|
if resource.accelerators is not None:
|
|
192
195
|
acc, count = list(resource.accelerators.items())[0]
|
|
193
|
-
|
|
196
|
+
elements_simple.append(f'gpus={acc}:{count}')
|
|
197
|
+
elements_full.append(f'gpus={acc}:{count}')
|
|
194
198
|
|
|
195
|
-
|
|
196
|
-
|
|
199
|
+
if (resource.accelerators is None or is_k8s):
|
|
200
|
+
if vcpu is not None:
|
|
201
|
+
elements_simple.append(f'cpus={int(vcpu)}')
|
|
202
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
203
|
+
if mem is not None:
|
|
204
|
+
elements_simple.append(f'mem={int(mem)}')
|
|
205
|
+
elements_full.append(f'mem={int(mem)}')
|
|
206
|
+
elif not simplified_only:
|
|
197
207
|
if vcpu is not None:
|
|
198
|
-
|
|
208
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
199
209
|
if mem is not None:
|
|
200
|
-
|
|
210
|
+
elements_full.append(f'mem={int(mem)}')
|
|
201
211
|
|
|
202
|
-
instance_type = resource.instance_type
|
|
203
|
-
if simplify:
|
|
204
|
-
instance_type = common_utils.truncate_long_string(instance_type, 15)
|
|
205
212
|
if not is_k8s:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
213
|
+
instance_type_full = resource.instance_type
|
|
214
|
+
instance_type_simple = common_utils.truncate_long_string(
|
|
215
|
+
instance_type_full, 15)
|
|
216
|
+
elements_simple.append(instance_type_simple)
|
|
217
|
+
elements_full.append(instance_type_full)
|
|
218
|
+
elements_simple.append('...')
|
|
219
|
+
if not simplified_only:
|
|
210
220
|
image_id = resource.image_id
|
|
211
221
|
if image_id is not None:
|
|
212
222
|
if None in image_id:
|
|
213
|
-
|
|
223
|
+
elements_full.append(f'image_id={image_id[None]}')
|
|
214
224
|
else:
|
|
215
|
-
|
|
216
|
-
|
|
225
|
+
elements_full.append(f'image_id={image_id}')
|
|
226
|
+
elements_full.append(f'disk={resource.disk_size}')
|
|
217
227
|
disk_tier = resource.disk_tier
|
|
218
228
|
if disk_tier is not None:
|
|
219
|
-
|
|
229
|
+
elements_full.append(f'disk_tier={disk_tier.value}')
|
|
220
230
|
ports = resource.ports
|
|
221
231
|
if ports is not None:
|
|
222
|
-
|
|
232
|
+
elements_full.append(f'ports={ports}')
|
|
223
233
|
|
|
224
234
|
spot = '[spot]' if resource.use_spot else ''
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
235
|
+
resources_str_simple = (
|
|
236
|
+
f'{spot}({"" if not elements_simple else ", ".join(elements_simple)})')
|
|
237
|
+
if simplified_only:
|
|
238
|
+
return resources_str_simple, None
|
|
239
|
+
else:
|
|
240
|
+
resources_str_full = (
|
|
241
|
+
f'{spot}({"" if not elements_full else ", ".join(elements_full)})')
|
|
242
|
+
return resources_str_simple, resources_str_full
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def get_readable_resources_repr(
|
|
246
|
+
handle: 'backends.CloudVmRayResourceHandle',
|
|
247
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
248
|
+
resource_str_simple, resource_str_full = format_resource(
|
|
249
|
+
handle.launched_resources, simplified_only)
|
|
250
|
+
if not simplified_only:
|
|
251
|
+
assert resource_str_full is not None
|
|
230
252
|
if (handle.launched_nodes is not None and
|
|
231
253
|
handle.launched_resources is not None):
|
|
232
|
-
return (f'{handle.launched_nodes}x'
|
|
233
|
-
|
|
234
|
-
|
|
254
|
+
return (f'{handle.launched_nodes}x{resource_str_simple}',
|
|
255
|
+
None if simplified_only else
|
|
256
|
+
f'{handle.launched_nodes}x{resource_str_full}')
|
|
257
|
+
return (_DEFAULT_MESSAGE_HANDLE_INITIALIZING,
|
|
258
|
+
_DEFAULT_MESSAGE_HANDLE_INITIALIZING)
|
|
235
259
|
|
|
236
260
|
|
|
237
261
|
def make_ray_custom_resources_str(
|
sky/utils/rich_utils.py
CHANGED
|
@@ -193,7 +193,8 @@ class _RevertibleStatus:
|
|
|
193
193
|
self.get_status_fn().__exit__(exc_type, exc_val, exc_tb)
|
|
194
194
|
self.set_status_fn(None)
|
|
195
195
|
else:
|
|
196
|
-
self.
|
|
196
|
+
if self.previous_message is not None:
|
|
197
|
+
self.get_status_fn().update(self.previous_message)
|
|
197
198
|
|
|
198
199
|
def update(self, *args, **kwargs):
|
|
199
200
|
self.get_status_fn().update(*args, **kwargs)
|
|
@@ -263,11 +264,12 @@ def safe_logger():
|
|
|
263
264
|
client_status_obj = _get_client_status()
|
|
264
265
|
|
|
265
266
|
client_status_live = (client_status_obj is not None and
|
|
267
|
+
hasattr(client_status_obj, '_live') and
|
|
266
268
|
client_status_obj._live.is_started) # pylint: disable=protected-access
|
|
267
|
-
if client_status_live:
|
|
269
|
+
if client_status_live and client_status_obj is not None:
|
|
268
270
|
client_status_obj.stop()
|
|
269
271
|
yield
|
|
270
|
-
if client_status_live:
|
|
272
|
+
if client_status_live and client_status_obj is not None:
|
|
271
273
|
client_status_obj.start()
|
|
272
274
|
|
|
273
275
|
|
|
@@ -421,7 +423,7 @@ async def decode_rich_status_async(
|
|
|
421
423
|
undecoded_buffer = b''
|
|
422
424
|
|
|
423
425
|
# Iterate over the response content in chunks
|
|
424
|
-
async for chunk in response.content.
|
|
426
|
+
async for chunk, _ in response.content.iter_chunks():
|
|
425
427
|
if chunk is None:
|
|
426
428
|
return
|
|
427
429
|
|
|
@@ -481,6 +483,8 @@ async def decode_rich_status_async(
|
|
|
481
483
|
line = line[:-2] + '\n'
|
|
482
484
|
is_payload, line = message_utils.decode_payload(
|
|
483
485
|
line, raise_for_mismatch=False)
|
|
486
|
+
if line is None:
|
|
487
|
+
continue
|
|
484
488
|
control = None
|
|
485
489
|
if is_payload:
|
|
486
490
|
control, encoded_status = Control.decode(line)
|
sky/utils/schemas.py
CHANGED
|
@@ -460,8 +460,8 @@ def get_volume_schema():
|
|
|
460
460
|
'type': 'string',
|
|
461
461
|
'pattern': constants.MEMORY_SIZE_PATTERN,
|
|
462
462
|
},
|
|
463
|
-
'
|
|
464
|
-
'type': '
|
|
463
|
+
'use_existing': {
|
|
464
|
+
'type': 'boolean',
|
|
465
465
|
},
|
|
466
466
|
'config': {
|
|
467
467
|
'type': 'object',
|
|
@@ -574,6 +574,9 @@ def get_volume_mount_schema():
|
|
|
574
574
|
'volume_name': {
|
|
575
575
|
'type': 'string',
|
|
576
576
|
},
|
|
577
|
+
'is_ephemeral': {
|
|
578
|
+
'type': 'boolean',
|
|
579
|
+
},
|
|
577
580
|
'volume_config': {
|
|
578
581
|
'type': 'object',
|
|
579
582
|
'required': [],
|
|
@@ -791,23 +794,6 @@ def _filter_schema(schema: dict, keys_to_keep: List[Tuple[str, ...]]) -> dict:
|
|
|
791
794
|
return new_schema
|
|
792
795
|
|
|
793
796
|
|
|
794
|
-
def _experimental_task_schema() -> dict:
|
|
795
|
-
# TODO: experimental.config_overrides has been deprecated in favor of the
|
|
796
|
-
# top-level `config` field. Remove in v0.11.0.
|
|
797
|
-
config_override_schema = _filter_schema(
|
|
798
|
-
get_config_schema(), constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK)
|
|
799
|
-
return {
|
|
800
|
-
'experimental': {
|
|
801
|
-
'type': 'object',
|
|
802
|
-
'required': [],
|
|
803
|
-
'additionalProperties': False,
|
|
804
|
-
'properties': {
|
|
805
|
-
'config_overrides': config_override_schema,
|
|
806
|
-
}
|
|
807
|
-
}
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
|
|
811
797
|
def get_task_schema():
|
|
812
798
|
return {
|
|
813
799
|
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
@@ -918,7 +904,6 @@ def get_task_schema():
|
|
|
918
904
|
'_metadata': {
|
|
919
905
|
'type': 'object',
|
|
920
906
|
},
|
|
921
|
-
**_experimental_task_schema(),
|
|
922
907
|
}
|
|
923
908
|
}
|
|
924
909
|
|
|
@@ -1043,11 +1028,21 @@ class RemoteIdentityOptions(enum.Enum):
|
|
|
1043
1028
|
|
|
1044
1029
|
def get_default_remote_identity(cloud: str) -> str:
|
|
1045
1030
|
"""Get the default remote identity for the specified cloud."""
|
|
1046
|
-
if cloud
|
|
1031
|
+
if cloud in ('kubernetes', 'ssh'):
|
|
1047
1032
|
return RemoteIdentityOptions.SERVICE_ACCOUNT.value
|
|
1048
1033
|
return RemoteIdentityOptions.LOCAL_CREDENTIALS.value
|
|
1049
1034
|
|
|
1050
1035
|
|
|
1036
|
+
_CAPABILITIES_SCHEMA = {
|
|
1037
|
+
'capabilities': {
|
|
1038
|
+
'type': 'array',
|
|
1039
|
+
'items': {
|
|
1040
|
+
'type': 'string',
|
|
1041
|
+
'case_insensitive_enum': ['compute', 'storage']
|
|
1042
|
+
},
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1051
1046
|
_REMOTE_IDENTITY_SCHEMA = {
|
|
1052
1047
|
'remote_identity': {
|
|
1053
1048
|
'type': 'string',
|
|
@@ -1070,25 +1065,16 @@ _REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
|
|
|
1070
1065
|
},
|
|
1071
1066
|
}
|
|
1072
1067
|
|
|
1073
|
-
|
|
1074
|
-
'networking': {
|
|
1075
|
-
'type': 'string',
|
|
1076
|
-
'case_insensitive_enum': [
|
|
1077
|
-
type.value for type in kubernetes_enums.KubernetesNetworkingMode
|
|
1078
|
-
],
|
|
1079
|
-
},
|
|
1080
|
-
'ports': {
|
|
1081
|
-
'type': 'string',
|
|
1082
|
-
'case_insensitive_enum': [
|
|
1083
|
-
type.value for type in kubernetes_enums.KubernetesPortMode
|
|
1084
|
-
],
|
|
1085
|
-
},
|
|
1068
|
+
_CONTEXT_CONFIG_SCHEMA_MINIMAL = {
|
|
1086
1069
|
'pod_config': {
|
|
1087
1070
|
'type': 'object',
|
|
1088
1071
|
'required': [],
|
|
1089
1072
|
# Allow arbitrary keys since validating pod spec is hard
|
|
1090
1073
|
'additionalProperties': True,
|
|
1091
1074
|
},
|
|
1075
|
+
'provision_timeout': {
|
|
1076
|
+
'type': 'integer',
|
|
1077
|
+
},
|
|
1092
1078
|
'custom_metadata': {
|
|
1093
1079
|
'type': 'object',
|
|
1094
1080
|
'required': [],
|
|
@@ -1103,9 +1089,23 @@ _CONTEXT_CONFIG_SCHEMA_KUBERNETES = {
|
|
|
1103
1089
|
}]
|
|
1104
1090
|
},
|
|
1105
1091
|
},
|
|
1106
|
-
|
|
1107
|
-
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
_CONTEXT_CONFIG_SCHEMA_KUBERNETES = {
|
|
1095
|
+
# TODO(kevin): Remove 'networking' in v0.13.0.
|
|
1096
|
+
'networking': {
|
|
1097
|
+
'type': 'string',
|
|
1098
|
+
'case_insensitive_enum': [
|
|
1099
|
+
type.value for type in kubernetes_enums.KubernetesNetworkingMode
|
|
1100
|
+
],
|
|
1101
|
+
},
|
|
1102
|
+
'ports': {
|
|
1103
|
+
'type': 'string',
|
|
1104
|
+
'case_insensitive_enum': [
|
|
1105
|
+
type.value for type in kubernetes_enums.KubernetesPortMode
|
|
1106
|
+
],
|
|
1108
1107
|
},
|
|
1108
|
+
**_CONTEXT_CONFIG_SCHEMA_MINIMAL,
|
|
1109
1109
|
'autoscaler': {
|
|
1110
1110
|
'type': 'string',
|
|
1111
1111
|
'case_insensitive_enum': [
|
|
@@ -1153,6 +1153,12 @@ _CONTEXT_CONFIG_SCHEMA_KUBERNETES = {
|
|
|
1153
1153
|
},
|
|
1154
1154
|
'remote_identity': {
|
|
1155
1155
|
'type': 'string',
|
|
1156
|
+
},
|
|
1157
|
+
'post_provision_runcmd': {
|
|
1158
|
+
'type': 'array',
|
|
1159
|
+
'items': {
|
|
1160
|
+
'type': 'string'
|
|
1161
|
+
},
|
|
1156
1162
|
}
|
|
1157
1163
|
}
|
|
1158
1164
|
|
|
@@ -1189,7 +1195,13 @@ def get_config_schema():
|
|
|
1189
1195
|
'consolidation_mode': {
|
|
1190
1196
|
'type': 'boolean',
|
|
1191
1197
|
'default': False,
|
|
1192
|
-
}
|
|
1198
|
+
},
|
|
1199
|
+
'controller_logs_gc_retention_hours': {
|
|
1200
|
+
'type': 'integer',
|
|
1201
|
+
},
|
|
1202
|
+
'task_logs_gc_retention_hours': {
|
|
1203
|
+
'type': 'integer',
|
|
1204
|
+
},
|
|
1193
1205
|
},
|
|
1194
1206
|
},
|
|
1195
1207
|
'bucket': {
|
|
@@ -1234,6 +1246,9 @@ def get_config_schema():
|
|
|
1234
1246
|
'type': 'null',
|
|
1235
1247
|
}],
|
|
1236
1248
|
},
|
|
1249
|
+
'use_ssm': {
|
|
1250
|
+
'type': 'boolean',
|
|
1251
|
+
},
|
|
1237
1252
|
'post_provision_runcmd': {
|
|
1238
1253
|
'type': 'array',
|
|
1239
1254
|
'items': {
|
|
@@ -1247,6 +1262,7 @@ def get_config_schema():
|
|
|
1247
1262
|
}]
|
|
1248
1263
|
},
|
|
1249
1264
|
},
|
|
1265
|
+
**_CAPABILITIES_SCHEMA,
|
|
1250
1266
|
**_LABELS_SCHEMA,
|
|
1251
1267
|
**_NETWORK_CONFIG_SCHEMA,
|
|
1252
1268
|
},
|
|
@@ -1304,6 +1320,7 @@ def get_config_schema():
|
|
|
1304
1320
|
}
|
|
1305
1321
|
],
|
|
1306
1322
|
},
|
|
1323
|
+
**_CAPABILITIES_SCHEMA,
|
|
1307
1324
|
**_LABELS_SCHEMA,
|
|
1308
1325
|
**_NETWORK_CONFIG_SCHEMA,
|
|
1309
1326
|
},
|
|
@@ -1328,10 +1345,15 @@ def get_config_schema():
|
|
|
1328
1345
|
'additionalProperties': False,
|
|
1329
1346
|
'properties': {
|
|
1330
1347
|
'allowed_contexts': {
|
|
1331
|
-
'
|
|
1332
|
-
|
|
1348
|
+
'oneOf': [{
|
|
1349
|
+
'type': 'array',
|
|
1350
|
+
'items': {
|
|
1351
|
+
'type': 'string',
|
|
1352
|
+
},
|
|
1353
|
+
}, {
|
|
1333
1354
|
'type': 'string',
|
|
1334
|
-
|
|
1355
|
+
'pattern': '^all$'
|
|
1356
|
+
}]
|
|
1335
1357
|
},
|
|
1336
1358
|
'context_configs': {
|
|
1337
1359
|
'type': 'object',
|
|
@@ -1361,11 +1383,42 @@ def get_config_schema():
|
|
|
1361
1383
|
'type': 'string',
|
|
1362
1384
|
},
|
|
1363
1385
|
},
|
|
1364
|
-
'
|
|
1386
|
+
'context_configs': {
|
|
1365
1387
|
'type': 'object',
|
|
1366
1388
|
'required': [],
|
|
1367
|
-
|
|
1368
|
-
|
|
1389
|
+
'properties': {},
|
|
1390
|
+
# Properties are ssh cluster names, which are the
|
|
1391
|
+
# kubernetes context names without `ssh-` prefix.
|
|
1392
|
+
'additionalProperties': {
|
|
1393
|
+
'type': 'object',
|
|
1394
|
+
'required': [],
|
|
1395
|
+
'additionalProperties': False,
|
|
1396
|
+
'properties': {
|
|
1397
|
+
**_CONTEXT_CONFIG_SCHEMA_MINIMAL,
|
|
1398
|
+
},
|
|
1399
|
+
},
|
|
1400
|
+
},
|
|
1401
|
+
**_CONTEXT_CONFIG_SCHEMA_MINIMAL,
|
|
1402
|
+
}
|
|
1403
|
+
},
|
|
1404
|
+
'slurm': {
|
|
1405
|
+
'type': 'object',
|
|
1406
|
+
'required': [],
|
|
1407
|
+
'additionalProperties': False,
|
|
1408
|
+
'properties': {
|
|
1409
|
+
'allowed_clusters': {
|
|
1410
|
+
'oneOf': [{
|
|
1411
|
+
'type': 'array',
|
|
1412
|
+
'items': {
|
|
1413
|
+
'type': 'string',
|
|
1414
|
+
},
|
|
1415
|
+
}, {
|
|
1416
|
+
'type': 'string',
|
|
1417
|
+
'pattern': '^all$'
|
|
1418
|
+
}]
|
|
1419
|
+
},
|
|
1420
|
+
'provision_timeout': {
|
|
1421
|
+
'type': 'integer',
|
|
1369
1422
|
},
|
|
1370
1423
|
}
|
|
1371
1424
|
},
|
|
@@ -1403,11 +1456,24 @@ def get_config_schema():
|
|
|
1403
1456
|
}
|
|
1404
1457
|
},
|
|
1405
1458
|
},
|
|
1459
|
+
'vast': {
|
|
1460
|
+
'type': 'object',
|
|
1461
|
+
'required': [],
|
|
1462
|
+
'additionalProperties': False,
|
|
1463
|
+
'properties': {
|
|
1464
|
+
'secure_only': {
|
|
1465
|
+
'type': 'boolean',
|
|
1466
|
+
},
|
|
1467
|
+
}
|
|
1468
|
+
},
|
|
1406
1469
|
'nebius': {
|
|
1407
1470
|
'type': 'object',
|
|
1408
1471
|
'required': [],
|
|
1409
1472
|
'properties': {
|
|
1410
|
-
**_NETWORK_CONFIG_SCHEMA, '
|
|
1473
|
+
**_NETWORK_CONFIG_SCHEMA, 'use_static_ip_address': {
|
|
1474
|
+
'type': 'boolean',
|
|
1475
|
+
},
|
|
1476
|
+
'tenant_id': {
|
|
1411
1477
|
'type': 'string',
|
|
1412
1478
|
},
|
|
1413
1479
|
'domain': {
|
|
@@ -1520,7 +1586,7 @@ def get_config_schema():
|
|
|
1520
1586
|
}
|
|
1521
1587
|
}
|
|
1522
1588
|
|
|
1523
|
-
daemon_schema = {
|
|
1589
|
+
daemon_schema: Dict[str, Any] = {
|
|
1524
1590
|
'type': 'object',
|
|
1525
1591
|
'required': [],
|
|
1526
1592
|
'additionalProperties': False,
|
|
@@ -1580,10 +1646,10 @@ def get_config_schema():
|
|
|
1580
1646
|
|
|
1581
1647
|
allowed_workspace_cloud_names = list(constants.ALL_CLOUDS) + ['cloudflare']
|
|
1582
1648
|
# Create pattern for not supported clouds, i.e.
|
|
1583
|
-
# all clouds except gcp, kubernetes, ssh
|
|
1649
|
+
# all clouds except aws, gcp, kubernetes, ssh, nebius
|
|
1584
1650
|
not_supported_clouds = [
|
|
1585
1651
|
cloud for cloud in allowed_workspace_cloud_names
|
|
1586
|
-
if cloud.lower() not in ['gcp', 'kubernetes', 'ssh', 'nebius']
|
|
1652
|
+
if cloud.lower() not in ['aws', 'gcp', 'kubernetes', 'ssh', 'nebius']
|
|
1587
1653
|
]
|
|
1588
1654
|
not_supported_cloud_regex = '|'.join(not_supported_clouds)
|
|
1589
1655
|
workspaces_schema = {
|
|
@@ -1594,7 +1660,8 @@ def get_config_schema():
|
|
|
1594
1660
|
'type': 'object',
|
|
1595
1661
|
'additionalProperties': False,
|
|
1596
1662
|
'patternProperties': {
|
|
1597
|
-
# Pattern for
|
|
1663
|
+
# Pattern for clouds with no workspace-specific config -
|
|
1664
|
+
# only allow 'disabled' property.
|
|
1598
1665
|
f'^({not_supported_cloud_regex})$': {
|
|
1599
1666
|
'type': 'object',
|
|
1600
1667
|
'additionalProperties': False,
|
|
@@ -1625,7 +1692,21 @@ def get_config_schema():
|
|
|
1625
1692
|
},
|
|
1626
1693
|
'disabled': {
|
|
1627
1694
|
'type': 'boolean'
|
|
1628
|
-
}
|
|
1695
|
+
},
|
|
1696
|
+
**_CAPABILITIES_SCHEMA,
|
|
1697
|
+
},
|
|
1698
|
+
'additionalProperties': False,
|
|
1699
|
+
},
|
|
1700
|
+
'aws': {
|
|
1701
|
+
'type': 'object',
|
|
1702
|
+
'properties': {
|
|
1703
|
+
'profile': {
|
|
1704
|
+
'type': 'string'
|
|
1705
|
+
},
|
|
1706
|
+
'disabled': {
|
|
1707
|
+
'type': 'boolean'
|
|
1708
|
+
},
|
|
1709
|
+
**_CAPABILITIES_SCHEMA,
|
|
1629
1710
|
},
|
|
1630
1711
|
'additionalProperties': False,
|
|
1631
1712
|
},
|
|
@@ -1650,10 +1731,15 @@ def get_config_schema():
|
|
|
1650
1731
|
'required': [],
|
|
1651
1732
|
'properties': {
|
|
1652
1733
|
'allowed_contexts': {
|
|
1653
|
-
'
|
|
1654
|
-
|
|
1734
|
+
'oneOf': [{
|
|
1735
|
+
'type': 'array',
|
|
1736
|
+
'items': {
|
|
1737
|
+
'type': 'string',
|
|
1738
|
+
},
|
|
1739
|
+
}, {
|
|
1655
1740
|
'type': 'string',
|
|
1656
|
-
|
|
1741
|
+
'pattern': '^all$'
|
|
1742
|
+
}]
|
|
1657
1743
|
},
|
|
1658
1744
|
'disabled': {
|
|
1659
1745
|
'type': 'boolean'
|
sky/utils/subprocess_utils.py
CHANGED
|
@@ -10,7 +10,8 @@ import sys
|
|
|
10
10
|
import threading
|
|
11
11
|
import time
|
|
12
12
|
import typing
|
|
13
|
-
from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple,
|
|
13
|
+
from typing import (Any, Callable, Dict, List, Optional, Protocol, Set, Tuple,
|
|
14
|
+
Union)
|
|
14
15
|
|
|
15
16
|
import colorama
|
|
16
17
|
|
|
@@ -18,6 +19,7 @@ from sky import exceptions
|
|
|
18
19
|
from sky import sky_logging
|
|
19
20
|
from sky.adaptors import common as adaptors_common
|
|
20
21
|
from sky.skylet import log_lib
|
|
22
|
+
from sky.skylet import subprocess_daemon
|
|
21
23
|
from sky.utils import common_utils
|
|
22
24
|
from sky.utils import timeline
|
|
23
25
|
from sky.utils import ux_utils
|
|
@@ -107,7 +109,7 @@ def get_parallel_threads(cloud_str: Optional[str] = None) -> int:
|
|
|
107
109
|
|
|
108
110
|
|
|
109
111
|
def run_in_parallel(func: Callable,
|
|
110
|
-
args: List[Any],
|
|
112
|
+
args: Union[List[Any], Set[Any]],
|
|
111
113
|
num_threads: Optional[int] = None) -> List[Any]:
|
|
112
114
|
"""Run a function in parallel on a list of arguments.
|
|
113
115
|
|
|
@@ -128,7 +130,7 @@ def run_in_parallel(func: Callable,
|
|
|
128
130
|
if len(args) == 0:
|
|
129
131
|
return []
|
|
130
132
|
if len(args) == 1:
|
|
131
|
-
return [func(args[0])]
|
|
133
|
+
return [func(list(args)[0])]
|
|
132
134
|
|
|
133
135
|
processes = (num_threads
|
|
134
136
|
if num_threads is not None else get_parallel_threads())
|
|
@@ -305,11 +307,17 @@ def run_with_retries(
|
|
|
305
307
|
return returncode, stdout, stderr
|
|
306
308
|
|
|
307
309
|
|
|
308
|
-
def kill_process_daemon(process_pid: int) -> None:
|
|
310
|
+
def kill_process_daemon(process_pid: int, use_kill_pg: bool = False) -> None:
|
|
309
311
|
"""Start a daemon as a safety net to kill the process.
|
|
310
312
|
|
|
311
313
|
Args:
|
|
312
314
|
process_pid: The PID of the process to kill.
|
|
315
|
+
use_kill_pg: Whether to use kill process group to kill the process. If
|
|
316
|
+
True, the process will use os.killpg() to kill the target process
|
|
317
|
+
group on UNIX system, which is more efficient than using the daemon
|
|
318
|
+
to refresh the process tree in the daemon. Note that both
|
|
319
|
+
implementations have corner cases where subprocesses might not be
|
|
320
|
+
killed. Refer to subprocess_daemon.py for more details.
|
|
313
321
|
"""
|
|
314
322
|
# Get initial children list
|
|
315
323
|
try:
|
|
@@ -336,6 +344,10 @@ def kill_process_daemon(process_pid: int) -> None:
|
|
|
336
344
|
','.join(map(str, initial_children)),
|
|
337
345
|
]
|
|
338
346
|
|
|
347
|
+
env = os.environ.copy()
|
|
348
|
+
if use_kill_pg:
|
|
349
|
+
env[subprocess_daemon.USE_KILL_PG_ENV_VAR] = '1'
|
|
350
|
+
|
|
339
351
|
# We do not need to set `start_new_session=True` here, as the
|
|
340
352
|
# daemon script will detach itself from the parent process with
|
|
341
353
|
# fork to avoid being killed by parent process. See the reason we
|
|
@@ -347,6 +359,7 @@ def kill_process_daemon(process_pid: int) -> None:
|
|
|
347
359
|
stderr=subprocess.DEVNULL,
|
|
348
360
|
# Disable input
|
|
349
361
|
stdin=subprocess.DEVNULL,
|
|
362
|
+
env=env,
|
|
350
363
|
)
|
|
351
364
|
|
|
352
365
|
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Utility functions for threads."""
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from typing import Any, Dict, Generic, Optional, overload, TypeVar
|
|
5
|
+
|
|
6
|
+
from sky.utils import common_utils
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SafeThread(threading.Thread):
|
|
10
|
+
"""A thread that can catch exceptions."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, *args, **kwargs):
|
|
13
|
+
super().__init__(*args, **kwargs)
|
|
14
|
+
self._exc = None
|
|
15
|
+
|
|
16
|
+
def run(self):
|
|
17
|
+
try:
|
|
18
|
+
super().run()
|
|
19
|
+
except BaseException as e: # pylint: disable=broad-except
|
|
20
|
+
self._exc = e
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def format_exc(self) -> Optional[str]:
|
|
24
|
+
if self._exc is None:
|
|
25
|
+
return None
|
|
26
|
+
return common_utils.format_exception(self._exc)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# pylint: disable=invalid-name
|
|
30
|
+
KeyType = TypeVar('KeyType')
|
|
31
|
+
ValueType = TypeVar('ValueType')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Google style guide: Do not rely on the atomicity of built-in types.
|
|
35
|
+
# Our launch and down process pool will be used by multiple threads,
|
|
36
|
+
# therefore we need to use a thread-safe dict.
|
|
37
|
+
# see https://google.github.io/styleguide/pyguide.html#218-threading
|
|
38
|
+
class ThreadSafeDict(Generic[KeyType, ValueType]):
|
|
39
|
+
"""A thread-safe dict."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
42
|
+
self._dict: Dict[KeyType, ValueType] = dict(*args, **kwargs)
|
|
43
|
+
self._lock = threading.Lock()
|
|
44
|
+
|
|
45
|
+
def __getitem__(self, key: KeyType) -> ValueType:
|
|
46
|
+
with self._lock:
|
|
47
|
+
return self._dict.__getitem__(key)
|
|
48
|
+
|
|
49
|
+
def __setitem__(self, key: KeyType, value: ValueType) -> None:
|
|
50
|
+
with self._lock:
|
|
51
|
+
return self._dict.__setitem__(key, value)
|
|
52
|
+
|
|
53
|
+
def __delitem__(self, key: KeyType) -> None:
|
|
54
|
+
with self._lock:
|
|
55
|
+
return self._dict.__delitem__(key)
|
|
56
|
+
|
|
57
|
+
def __len__(self) -> int:
|
|
58
|
+
with self._lock:
|
|
59
|
+
return self._dict.__len__()
|
|
60
|
+
|
|
61
|
+
def __contains__(self, key: KeyType) -> bool:
|
|
62
|
+
with self._lock:
|
|
63
|
+
return self._dict.__contains__(key)
|
|
64
|
+
|
|
65
|
+
def items(self):
|
|
66
|
+
with self._lock:
|
|
67
|
+
return self._dict.items()
|
|
68
|
+
|
|
69
|
+
def values(self):
|
|
70
|
+
with self._lock:
|
|
71
|
+
return self._dict.values()
|
|
72
|
+
|
|
73
|
+
@overload
|
|
74
|
+
def get(self, key: KeyType, default: ValueType) -> ValueType:
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
@overload
|
|
78
|
+
def get(self,
|
|
79
|
+
key: KeyType,
|
|
80
|
+
default: Optional[ValueType] = None) -> Optional[ValueType]:
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
def get(self,
|
|
84
|
+
key: KeyType,
|
|
85
|
+
default: Optional[ValueType] = None) -> Optional[ValueType]:
|
|
86
|
+
with self._lock:
|
|
87
|
+
return self._dict.get(key, default)
|
|
88
|
+
|
|
89
|
+
def pop(self, key: KeyType) -> Optional[ValueType]:
|
|
90
|
+
with self._lock:
|
|
91
|
+
return self._dict.pop(key, None)
|
sky/utils/timeline.py
CHANGED
|
@@ -58,7 +58,8 @@ class Event:
|
|
|
58
58
|
})
|
|
59
59
|
event_begin['args'] = {'stack': '\n'.join(traceback.format_stack())}
|
|
60
60
|
if self._message is not None:
|
|
61
|
-
event_begin['args'][
|
|
61
|
+
event_begin['args'][
|
|
62
|
+
'message'] = self._message # type: ignore[index]
|
|
62
63
|
_events.append(event_begin)
|
|
63
64
|
|
|
64
65
|
def end(self):
|