skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Utilities for handling resource handles."""
|
|
2
|
+
import copy
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def prepare_handle_for_backwards_compatibility(
|
|
7
|
+
handle: typing.Any) -> typing.Any:
|
|
8
|
+
"""Prepare a handle for backwards compatibility with older clients."""
|
|
9
|
+
# skylet_ssh_tunnel was causing backwards compatibility issues with older
|
|
10
|
+
# clients: AttributeError: Can't get attribute 'SSHTunnelInfo'
|
|
11
|
+
#
|
|
12
|
+
# But it is not needed on the client side, so we can just remove it.
|
|
13
|
+
if handle is not None and hasattr(handle, 'skylet_ssh_tunnel'):
|
|
14
|
+
handle = copy.deepcopy(handle)
|
|
15
|
+
handle.skylet_ssh_tunnel = None
|
|
16
|
+
return handle
|
sky/utils/status_lib.py
CHANGED
|
@@ -54,3 +54,13 @@ class StorageStatus(enum.Enum):
|
|
|
54
54
|
|
|
55
55
|
# Finished uploading, in terminal state
|
|
56
56
|
READY = 'READY'
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class VolumeStatus(enum.Enum):
|
|
60
|
+
"""Volume status as recorded in table 'volumes'."""
|
|
61
|
+
|
|
62
|
+
# Volume is ready to be used
|
|
63
|
+
READY = 'READY'
|
|
64
|
+
|
|
65
|
+
# Volume is being used
|
|
66
|
+
IN_USE = 'IN_USE'
|
sky/utils/subprocess_utils.py
CHANGED
|
@@ -6,18 +6,20 @@ import random
|
|
|
6
6
|
import resource
|
|
7
7
|
import shlex
|
|
8
8
|
import subprocess
|
|
9
|
+
import sys
|
|
9
10
|
import threading
|
|
10
11
|
import time
|
|
11
12
|
import typing
|
|
12
|
-
from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple,
|
|
13
|
+
from typing import (Any, Callable, Dict, List, Optional, Protocol, Set, Tuple,
|
|
14
|
+
Union)
|
|
13
15
|
|
|
14
16
|
import colorama
|
|
15
17
|
|
|
16
18
|
from sky import exceptions
|
|
17
19
|
from sky import sky_logging
|
|
18
20
|
from sky.adaptors import common as adaptors_common
|
|
19
|
-
from sky.skylet import constants
|
|
20
21
|
from sky.skylet import log_lib
|
|
22
|
+
from sky.skylet import subprocess_daemon
|
|
21
23
|
from sky.utils import common_utils
|
|
22
24
|
from sky.utils import timeline
|
|
23
25
|
from sky.utils import ux_utils
|
|
@@ -107,7 +109,7 @@ def get_parallel_threads(cloud_str: Optional[str] = None) -> int:
|
|
|
107
109
|
|
|
108
110
|
|
|
109
111
|
def run_in_parallel(func: Callable,
|
|
110
|
-
args: List[Any],
|
|
112
|
+
args: Union[List[Any], Set[Any]],
|
|
111
113
|
num_threads: Optional[int] = None) -> List[Any]:
|
|
112
114
|
"""Run a function in parallel on a list of arguments.
|
|
113
115
|
|
|
@@ -128,7 +130,7 @@ def run_in_parallel(func: Callable,
|
|
|
128
130
|
if len(args) == 0:
|
|
129
131
|
return []
|
|
130
132
|
if len(args) == 1:
|
|
131
|
-
return [func(args[0])]
|
|
133
|
+
return [func(list(args)[0])]
|
|
132
134
|
|
|
133
135
|
processes = (num_threads
|
|
134
136
|
if num_threads is not None else get_parallel_threads())
|
|
@@ -208,8 +210,11 @@ def kill_children_processes(parent_pids: Optional[Union[
|
|
|
208
210
|
kill_process_with_grace_period(child, force=force)
|
|
209
211
|
|
|
210
212
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
+
GenericProcess = Union[multiprocessing.Process, psutil.Process,
|
|
214
|
+
subprocess.Popen]
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def kill_process_with_grace_period(proc: GenericProcess,
|
|
213
218
|
force: bool = False,
|
|
214
219
|
grace_period: int = 10) -> None:
|
|
215
220
|
"""Kill a process with SIGTERM and wait for it to exit.
|
|
@@ -223,6 +228,9 @@ def kill_process_with_grace_period(proc: Union[multiprocessing.Process,
|
|
|
223
228
|
if isinstance(proc, psutil.Process):
|
|
224
229
|
alive = proc.is_running
|
|
225
230
|
wait = proc.wait
|
|
231
|
+
elif isinstance(proc, subprocess.Popen):
|
|
232
|
+
alive = lambda: proc.poll() is None
|
|
233
|
+
wait = proc.wait
|
|
226
234
|
else:
|
|
227
235
|
alive = proc.is_alive
|
|
228
236
|
wait = proc.join
|
|
@@ -240,11 +248,10 @@ def kill_process_with_grace_period(proc: Union[multiprocessing.Process,
|
|
|
240
248
|
# The child process may have already been terminated.
|
|
241
249
|
return
|
|
242
250
|
except psutil.TimeoutExpired:
|
|
243
|
-
# Pass to finally to force kill the process.
|
|
244
|
-
pass
|
|
245
|
-
finally:
|
|
246
251
|
logger.debug(f'Process {proc.pid} did not terminate after '
|
|
247
252
|
f'{grace_period} seconds')
|
|
253
|
+
# Continue to finally to force kill the process.
|
|
254
|
+
finally:
|
|
248
255
|
# Attempt to force kill if the normal termination fails
|
|
249
256
|
if not force:
|
|
250
257
|
logger.debug(f'Force killing process {proc.pid}')
|
|
@@ -300,11 +307,17 @@ def run_with_retries(
|
|
|
300
307
|
return returncode, stdout, stderr
|
|
301
308
|
|
|
302
309
|
|
|
303
|
-
def kill_process_daemon(process_pid: int) -> None:
|
|
310
|
+
def kill_process_daemon(process_pid: int, use_kill_pg: bool = False) -> None:
|
|
304
311
|
"""Start a daemon as a safety net to kill the process.
|
|
305
312
|
|
|
306
313
|
Args:
|
|
307
314
|
process_pid: The PID of the process to kill.
|
|
315
|
+
use_kill_pg: Whether to use kill process group to kill the process. If
|
|
316
|
+
True, the process will use os.killpg() to kill the target process
|
|
317
|
+
group on UNIX system, which is more efficient than using the daemon
|
|
318
|
+
to refresh the process tree in the daemon. Note that both
|
|
319
|
+
implementations have corner cases where subprocesses might not be
|
|
320
|
+
killed. Refer to subprocess_daemon.py for more details.
|
|
308
321
|
"""
|
|
309
322
|
# Get initial children list
|
|
310
323
|
try:
|
|
@@ -317,12 +330,8 @@ def kill_process_daemon(process_pid: int) -> None:
|
|
|
317
330
|
daemon_script = os.path.join(
|
|
318
331
|
os.path.dirname(os.path.abspath(log_lib.__file__)),
|
|
319
332
|
'subprocess_daemon.py')
|
|
320
|
-
python_path = subprocess.check_output(constants.SKY_GET_PYTHON_PATH_CMD,
|
|
321
|
-
shell=True,
|
|
322
|
-
stderr=subprocess.DEVNULL,
|
|
323
|
-
encoding='utf-8').strip()
|
|
324
333
|
daemon_cmd = [
|
|
325
|
-
|
|
334
|
+
sys.executable,
|
|
326
335
|
daemon_script,
|
|
327
336
|
'--parent-pid',
|
|
328
337
|
str(parent_pid),
|
|
@@ -335,6 +344,10 @@ def kill_process_daemon(process_pid: int) -> None:
|
|
|
335
344
|
','.join(map(str, initial_children)),
|
|
336
345
|
]
|
|
337
346
|
|
|
347
|
+
env = os.environ.copy()
|
|
348
|
+
if use_kill_pg:
|
|
349
|
+
env[subprocess_daemon.USE_KILL_PG_ENV_VAR] = '1'
|
|
350
|
+
|
|
338
351
|
# We do not need to set `start_new_session=True` here, as the
|
|
339
352
|
# daemon script will detach itself from the parent process with
|
|
340
353
|
# fork to avoid being killed by parent process. See the reason we
|
|
@@ -346,6 +359,7 @@ def kill_process_daemon(process_pid: int) -> None:
|
|
|
346
359
|
stderr=subprocess.DEVNULL,
|
|
347
360
|
# Disable input
|
|
348
361
|
stdin=subprocess.DEVNULL,
|
|
362
|
+
env=env,
|
|
349
363
|
)
|
|
350
364
|
|
|
351
365
|
|
sky/utils/tempstore.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Temporary storage context manager."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import contextvars
|
|
5
|
+
import functools
|
|
6
|
+
import os
|
|
7
|
+
import tempfile
|
|
8
|
+
import typing
|
|
9
|
+
from typing import Any, Callable, Iterator, Optional, TypeVar
|
|
10
|
+
|
|
11
|
+
_TEMP_DIR: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
|
|
12
|
+
'temp_store_dir', default=None)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@contextlib.contextmanager
|
|
16
|
+
def tempdir() -> Iterator[str]:
|
|
17
|
+
"""Context manager for temporary directory of current context.
|
|
18
|
+
|
|
19
|
+
This wraps tempfile.TemporaryDirectory and makes the temp dir available
|
|
20
|
+
throughout the context, eliminating the need to pass the temp dir to
|
|
21
|
+
the nested functions that need it.
|
|
22
|
+
|
|
23
|
+
This context manager is nestable - nested calls will create new temp dirs
|
|
24
|
+
and restore the previous temp dir when exiting.
|
|
25
|
+
"""
|
|
26
|
+
with tempfile.TemporaryDirectory(prefix='sky-tmp') as temp_dir:
|
|
27
|
+
token = _TEMP_DIR.set(temp_dir)
|
|
28
|
+
try:
|
|
29
|
+
yield temp_dir
|
|
30
|
+
finally:
|
|
31
|
+
_TEMP_DIR.reset(token)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Keep the function signature same as tempfile.mkdtemp.
|
|
35
|
+
# pylint: disable=redefined-builtin
|
|
36
|
+
def mkdtemp(suffix: Optional[str] = None,
|
|
37
|
+
prefix: Optional[str] = None,
|
|
38
|
+
dir: Optional[str] = None) -> str:
|
|
39
|
+
"""Create a temporary directory in the temp dir of current context.
|
|
40
|
+
|
|
41
|
+
The directory will be cleaned when the current context exits.
|
|
42
|
+
If there is no temp dir in current context, this function is equivalent to
|
|
43
|
+
tempfile.mkdtemp.
|
|
44
|
+
"""
|
|
45
|
+
context_temp_dir = _TEMP_DIR.get()
|
|
46
|
+
|
|
47
|
+
if context_temp_dir is not None and dir is None:
|
|
48
|
+
dir = context_temp_dir
|
|
49
|
+
elif context_temp_dir is not None and dir is not None:
|
|
50
|
+
dir = os.path.join(context_temp_dir, dir)
|
|
51
|
+
os.makedirs(dir, exist_ok=True)
|
|
52
|
+
|
|
53
|
+
return tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
F = TypeVar('F', bound=Callable[..., Any])
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def with_tempdir(func: F) -> F:
|
|
60
|
+
"""Decorator that wraps a function call with tempdir() context manager.
|
|
61
|
+
|
|
62
|
+
Refer to `tempdir` for more details.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
@functools.wraps(func)
|
|
66
|
+
def wrapper(*args, **kwargs):
|
|
67
|
+
with tempdir():
|
|
68
|
+
return func(*args, **kwargs)
|
|
69
|
+
|
|
70
|
+
return typing.cast(F, wrapper)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Utility functions for threads."""
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from typing import Any, Dict, Generic, Optional, overload, TypeVar
|
|
5
|
+
|
|
6
|
+
from sky.utils import common_utils
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SafeThread(threading.Thread):
|
|
10
|
+
"""A thread that can catch exceptions."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, *args, **kwargs):
|
|
13
|
+
super().__init__(*args, **kwargs)
|
|
14
|
+
self._exc = None
|
|
15
|
+
|
|
16
|
+
def run(self):
|
|
17
|
+
try:
|
|
18
|
+
super().run()
|
|
19
|
+
except BaseException as e: # pylint: disable=broad-except
|
|
20
|
+
self._exc = e
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def format_exc(self) -> Optional[str]:
|
|
24
|
+
if self._exc is None:
|
|
25
|
+
return None
|
|
26
|
+
return common_utils.format_exception(self._exc)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# pylint: disable=invalid-name
|
|
30
|
+
KeyType = TypeVar('KeyType')
|
|
31
|
+
ValueType = TypeVar('ValueType')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Google style guide: Do not rely on the atomicity of built-in types.
|
|
35
|
+
# Our launch and down process pool will be used by multiple threads,
|
|
36
|
+
# therefore we need to use a thread-safe dict.
|
|
37
|
+
# see https://google.github.io/styleguide/pyguide.html#218-threading
|
|
38
|
+
class ThreadSafeDict(Generic[KeyType, ValueType]):
|
|
39
|
+
"""A thread-safe dict."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
42
|
+
self._dict: Dict[KeyType, ValueType] = dict(*args, **kwargs)
|
|
43
|
+
self._lock = threading.Lock()
|
|
44
|
+
|
|
45
|
+
def __getitem__(self, key: KeyType) -> ValueType:
|
|
46
|
+
with self._lock:
|
|
47
|
+
return self._dict.__getitem__(key)
|
|
48
|
+
|
|
49
|
+
def __setitem__(self, key: KeyType, value: ValueType) -> None:
|
|
50
|
+
with self._lock:
|
|
51
|
+
return self._dict.__setitem__(key, value)
|
|
52
|
+
|
|
53
|
+
def __delitem__(self, key: KeyType) -> None:
|
|
54
|
+
with self._lock:
|
|
55
|
+
return self._dict.__delitem__(key)
|
|
56
|
+
|
|
57
|
+
def __len__(self) -> int:
|
|
58
|
+
with self._lock:
|
|
59
|
+
return self._dict.__len__()
|
|
60
|
+
|
|
61
|
+
def __contains__(self, key: KeyType) -> bool:
|
|
62
|
+
with self._lock:
|
|
63
|
+
return self._dict.__contains__(key)
|
|
64
|
+
|
|
65
|
+
def items(self):
|
|
66
|
+
with self._lock:
|
|
67
|
+
return self._dict.items()
|
|
68
|
+
|
|
69
|
+
def values(self):
|
|
70
|
+
with self._lock:
|
|
71
|
+
return self._dict.values()
|
|
72
|
+
|
|
73
|
+
@overload
|
|
74
|
+
def get(self, key: KeyType, default: ValueType) -> ValueType:
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
@overload
|
|
78
|
+
def get(self,
|
|
79
|
+
key: KeyType,
|
|
80
|
+
default: Optional[ValueType] = None) -> Optional[ValueType]:
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
def get(self,
|
|
84
|
+
key: KeyType,
|
|
85
|
+
default: Optional[ValueType] = None) -> Optional[ValueType]:
|
|
86
|
+
with self._lock:
|
|
87
|
+
return self._dict.get(key, default)
|
|
88
|
+
|
|
89
|
+
def pop(self, key: KeyType) -> Optional[ValueType]:
|
|
90
|
+
with self._lock:
|
|
91
|
+
return self._dict.pop(key, None)
|
sky/utils/timeline.py
CHANGED
|
@@ -4,7 +4,6 @@ The timeline follows the trace event format defined here:
|
|
|
4
4
|
https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
|
|
5
5
|
""" # pylint: disable=line-too-long
|
|
6
6
|
import atexit
|
|
7
|
-
import functools
|
|
8
7
|
import json
|
|
9
8
|
import os
|
|
10
9
|
import threading
|
|
@@ -12,13 +11,15 @@ import time
|
|
|
12
11
|
import traceback
|
|
13
12
|
from typing import Callable, Optional, Union
|
|
14
13
|
|
|
15
|
-
import filelock
|
|
16
|
-
|
|
17
14
|
from sky.utils import common_utils
|
|
18
15
|
|
|
19
16
|
_events = []
|
|
20
17
|
|
|
21
18
|
|
|
19
|
+
def _get_events_file_path():
|
|
20
|
+
return os.environ.get('SKYPILOT_TIMELINE_FILE_PATH')
|
|
21
|
+
|
|
22
|
+
|
|
22
23
|
class Event:
|
|
23
24
|
"""Record an event.
|
|
24
25
|
|
|
@@ -28,6 +29,10 @@ class Event:
|
|
|
28
29
|
"""
|
|
29
30
|
|
|
30
31
|
def __init__(self, name: str, message: Optional[str] = None):
|
|
32
|
+
self._skipped = False
|
|
33
|
+
if not _get_events_file_path():
|
|
34
|
+
self._skipped = True
|
|
35
|
+
return
|
|
31
36
|
self._name = name
|
|
32
37
|
self._message = message
|
|
33
38
|
# See the module doc for the event format.
|
|
@@ -44,6 +49,8 @@ class Event:
|
|
|
44
49
|
self._event['args'] = {'message': self._message}
|
|
45
50
|
|
|
46
51
|
def begin(self):
|
|
52
|
+
if self._skipped:
|
|
53
|
+
return
|
|
47
54
|
event_begin = self._event.copy()
|
|
48
55
|
event_begin.update({
|
|
49
56
|
'ph': 'B',
|
|
@@ -51,10 +58,13 @@ class Event:
|
|
|
51
58
|
})
|
|
52
59
|
event_begin['args'] = {'stack': '\n'.join(traceback.format_stack())}
|
|
53
60
|
if self._message is not None:
|
|
54
|
-
event_begin['args'][
|
|
61
|
+
event_begin['args'][
|
|
62
|
+
'message'] = self._message # type: ignore[index]
|
|
55
63
|
_events.append(event_begin)
|
|
56
64
|
|
|
57
65
|
def end(self):
|
|
66
|
+
if self._skipped:
|
|
67
|
+
return
|
|
58
68
|
event_end = self._event.copy()
|
|
59
69
|
event_end.update({
|
|
60
70
|
'ph': 'E',
|
|
@@ -76,63 +86,26 @@ def event(name_or_fn: Union[str, Callable], message: Optional[str] = None):
|
|
|
76
86
|
return common_utils.make_decorator(Event, name_or_fn, message=message)
|
|
77
87
|
|
|
78
88
|
|
|
79
|
-
class FileLockEvent:
|
|
80
|
-
"""Serve both as a file lock and event for the lock."""
|
|
81
|
-
|
|
82
|
-
def __init__(self, lockfile: Union[str, os.PathLike], timeout: float = -1):
|
|
83
|
-
self._lockfile = lockfile
|
|
84
|
-
os.makedirs(os.path.dirname(os.path.abspath(self._lockfile)),
|
|
85
|
-
exist_ok=True)
|
|
86
|
-
self._lock = filelock.FileLock(self._lockfile, timeout)
|
|
87
|
-
self._hold_lock_event = Event(f'[FileLock.hold]:{self._lockfile}')
|
|
88
|
-
|
|
89
|
-
def acquire(self):
|
|
90
|
-
was_locked = self._lock.is_locked
|
|
91
|
-
with Event(f'[FileLock.acquire]:{self._lockfile}'):
|
|
92
|
-
self._lock.acquire()
|
|
93
|
-
if not was_locked and self._lock.is_locked:
|
|
94
|
-
# start holding the lock after initial acquiring
|
|
95
|
-
self._hold_lock_event.begin()
|
|
96
|
-
|
|
97
|
-
def release(self):
|
|
98
|
-
was_locked = self._lock.is_locked
|
|
99
|
-
self._lock.release()
|
|
100
|
-
if was_locked and not self._lock.is_locked:
|
|
101
|
-
# stop holding the lock after initial releasing
|
|
102
|
-
self._hold_lock_event.end()
|
|
103
|
-
|
|
104
|
-
def __enter__(self):
|
|
105
|
-
self.acquire()
|
|
106
|
-
return self
|
|
107
|
-
|
|
108
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
109
|
-
self.release()
|
|
110
|
-
|
|
111
|
-
def __call__(self, f):
|
|
112
|
-
# Make this class callable as a decorator.
|
|
113
|
-
@functools.wraps(f)
|
|
114
|
-
def wrapper(*args, **kwargs):
|
|
115
|
-
with self:
|
|
116
|
-
return f(*args, **kwargs)
|
|
117
|
-
|
|
118
|
-
return wrapper
|
|
119
|
-
|
|
120
|
-
|
|
121
89
|
def save_timeline():
|
|
122
|
-
|
|
123
|
-
if not
|
|
90
|
+
events_file_path = _get_events_file_path()
|
|
91
|
+
if not events_file_path:
|
|
124
92
|
return
|
|
93
|
+
global _events
|
|
94
|
+
events_to_write = _events
|
|
95
|
+
_events = []
|
|
125
96
|
json_output = {
|
|
126
|
-
'traceEvents':
|
|
97
|
+
'traceEvents': events_to_write,
|
|
127
98
|
'displayTimeUnit': 'ms',
|
|
128
99
|
'otherData': {
|
|
129
|
-
'log_dir': os.path.dirname(os.path.abspath(
|
|
100
|
+
'log_dir': os.path.dirname(os.path.abspath(events_file_path)),
|
|
130
101
|
}
|
|
131
102
|
}
|
|
132
|
-
os.makedirs(os.path.dirname(os.path.abspath(
|
|
133
|
-
|
|
103
|
+
os.makedirs(os.path.dirname(os.path.abspath(events_file_path)),
|
|
104
|
+
exist_ok=True)
|
|
105
|
+
with open(events_file_path, 'w', encoding='utf-8') as f:
|
|
134
106
|
json.dump(json_output, f)
|
|
107
|
+
del events_to_write
|
|
135
108
|
|
|
136
109
|
|
|
137
|
-
if
|
|
110
|
+
if _get_events_file_path():
|
|
138
111
|
atexit.register(save_timeline)
|
sky/utils/ux_utils.py
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
"""Utility functions for UX."""
|
|
2
2
|
import contextlib
|
|
3
3
|
import enum
|
|
4
|
+
import fnmatch
|
|
4
5
|
import os
|
|
5
6
|
import sys
|
|
6
7
|
import traceback
|
|
7
8
|
import typing
|
|
8
|
-
from typing import Callable, Optional, Union
|
|
9
|
+
from typing import Callable, Iterable, List, Optional, Union
|
|
9
10
|
|
|
10
11
|
import colorama
|
|
11
12
|
|
|
12
13
|
from sky import sky_logging
|
|
13
14
|
from sky.skylet import constants
|
|
14
15
|
from sky.utils import common_utils
|
|
16
|
+
from sky.utils import env_options
|
|
15
17
|
from sky.utils import rich_console_utils
|
|
16
18
|
|
|
17
19
|
if typing.TYPE_CHECKING:
|
|
@@ -25,9 +27,16 @@ BOLD = '\033[1m'
|
|
|
25
27
|
RESET_BOLD = '\033[0m'
|
|
26
28
|
|
|
27
29
|
# Log path hint in the spinner during launching
|
|
30
|
+
# (old, kept for backward compatibility)
|
|
28
31
|
_LOG_PATH_HINT = (f'{colorama.Style.DIM}View logs: sky api logs -l '
|
|
29
32
|
'{log_path}'
|
|
30
33
|
f'{colorama.Style.RESET_ALL}')
|
|
34
|
+
# Log hint: recommend sky logs --provision <cluster_name>
|
|
35
|
+
_PROVISION_LOG_HINT = (
|
|
36
|
+
f'{colorama.Style.DIM}View logs: '
|
|
37
|
+
f'{BOLD}sky logs --provision {{cluster_name}}{RESET_BOLD}'
|
|
38
|
+
f'{colorama.Style.RESET_ALL}')
|
|
39
|
+
# Legacy path hint retained for local-only cases where we don't have cluster
|
|
31
40
|
_LOG_PATH_HINT_LOCAL = (f'{colorama.Style.DIM}View logs: '
|
|
32
41
|
'{log_path}'
|
|
33
42
|
f'{colorama.Style.RESET_ALL}')
|
|
@@ -57,10 +66,14 @@ def print_exception_no_traceback():
|
|
|
57
66
|
if error():
|
|
58
67
|
raise ValueError('...')
|
|
59
68
|
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
69
|
+
if env_options.Options.SHOW_DEBUG_INFO.get():
|
|
70
|
+
# When SKYPILOT_DEBUG is set, show the full traceback
|
|
71
|
+
yield
|
|
72
|
+
else:
|
|
73
|
+
original_tracelimit = getattr(sys, 'tracebacklimit', 1000)
|
|
74
|
+
sys.tracebacklimit = 0
|
|
75
|
+
yield
|
|
76
|
+
sys.tracebacklimit = original_tracelimit
|
|
64
77
|
|
|
65
78
|
|
|
66
79
|
@contextlib.contextmanager
|
|
@@ -121,7 +134,10 @@ class RedirectOutputForProcess:
|
|
|
121
134
|
|
|
122
135
|
def log_path_hint(log_path: Union[str, 'pathlib.Path'],
|
|
123
136
|
is_local: bool = False) -> str:
|
|
124
|
-
"""Gets the log path hint for the given log path.
|
|
137
|
+
"""Gets the log path hint for the given log path.
|
|
138
|
+
|
|
139
|
+
Kept for backward compatibility when only paths are available.
|
|
140
|
+
"""
|
|
125
141
|
log_path = str(log_path)
|
|
126
142
|
expanded_home = os.path.expanduser('~')
|
|
127
143
|
if log_path.startswith(expanded_home):
|
|
@@ -134,6 +150,12 @@ def log_path_hint(log_path: Union[str, 'pathlib.Path'],
|
|
|
134
150
|
return _LOG_PATH_HINT.format(log_path=log_path)
|
|
135
151
|
|
|
136
152
|
|
|
153
|
+
def provision_hint(cluster_name: Optional[str]) -> Optional[str]:
|
|
154
|
+
if not cluster_name:
|
|
155
|
+
return None
|
|
156
|
+
return _PROVISION_LOG_HINT.format(cluster_name=cluster_name)
|
|
157
|
+
|
|
158
|
+
|
|
137
159
|
def starting_message(message: str) -> str:
|
|
138
160
|
"""Gets the starting message for the given message."""
|
|
139
161
|
# We have to reset the color before the message, because sometimes if a
|
|
@@ -145,7 +167,8 @@ def starting_message(message: str) -> str:
|
|
|
145
167
|
def finishing_message(message: str,
|
|
146
168
|
log_path: Optional[Union[str, 'pathlib.Path']] = None,
|
|
147
169
|
is_local: bool = False,
|
|
148
|
-
follow_up_message: Optional[str] = None
|
|
170
|
+
follow_up_message: Optional[str] = None,
|
|
171
|
+
cluster_name: Optional[str] = None) -> str:
|
|
149
172
|
"""Gets the finishing message for the given message.
|
|
150
173
|
|
|
151
174
|
Args:
|
|
@@ -161,7 +184,11 @@ def finishing_message(message: str,
|
|
|
161
184
|
follow_up_message = follow_up_message if (follow_up_message
|
|
162
185
|
is not None) else ''
|
|
163
186
|
success_prefix = (f'{colorama.Style.RESET_ALL}{colorama.Fore.GREEN}✓ '
|
|
164
|
-
f'{message}{colorama.Style.RESET_ALL}{follow_up_message}'
|
|
187
|
+
f'{message}{colorama.Style.RESET_ALL}{follow_up_message}'
|
|
188
|
+
f'{colorama.Style.RESET_ALL}')
|
|
189
|
+
hint = provision_hint(cluster_name)
|
|
190
|
+
if hint:
|
|
191
|
+
return f'{success_prefix} {hint}'
|
|
165
192
|
if log_path is None:
|
|
166
193
|
return success_prefix
|
|
167
194
|
path_hint = log_path_hint(log_path, is_local)
|
|
@@ -170,13 +197,17 @@ def finishing_message(message: str,
|
|
|
170
197
|
|
|
171
198
|
def error_message(message: str,
|
|
172
199
|
log_path: Optional[Union[str, 'pathlib.Path']] = None,
|
|
173
|
-
is_local: bool = False
|
|
200
|
+
is_local: bool = False,
|
|
201
|
+
cluster_name: Optional[str] = None) -> str:
|
|
174
202
|
"""Gets the error message for the given message."""
|
|
175
203
|
# We have to reset the color before the message, because sometimes if a
|
|
176
204
|
# previous spinner with dimmed color overflows in a narrow terminal, the
|
|
177
205
|
# color might be messed up.
|
|
178
206
|
error_prefix = (f'{colorama.Style.RESET_ALL}{colorama.Fore.RED}⨯'
|
|
179
207
|
f'{colorama.Style.RESET_ALL} {message}')
|
|
208
|
+
hint = provision_hint(cluster_name)
|
|
209
|
+
if hint:
|
|
210
|
+
return f'{error_prefix} {hint}'
|
|
180
211
|
if log_path is None:
|
|
181
212
|
return error_prefix
|
|
182
213
|
path_hint = log_path_hint(log_path, is_local)
|
|
@@ -194,9 +225,16 @@ def retry_message(message: str) -> str:
|
|
|
194
225
|
|
|
195
226
|
def spinner_message(message: str,
|
|
196
227
|
log_path: Optional[Union[str, 'pathlib.Path']] = None,
|
|
197
|
-
is_local: bool = False
|
|
198
|
-
|
|
228
|
+
is_local: bool = False,
|
|
229
|
+
cluster_name: Optional[str] = None) -> str:
|
|
230
|
+
"""Gets the spinner message for the given message and log path.
|
|
231
|
+
|
|
232
|
+
If cluster_name is provided, recommend `sky logs --provision <cluster>`.
|
|
233
|
+
"""
|
|
199
234
|
colored_spinner = f'[bold cyan]{message}[/]'
|
|
235
|
+
hint = provision_hint(cluster_name)
|
|
236
|
+
if hint:
|
|
237
|
+
return f'{colored_spinner} {hint}'
|
|
200
238
|
if log_path is None:
|
|
201
239
|
return colored_spinner
|
|
202
240
|
path_hint = log_path_hint(log_path, is_local)
|
|
@@ -247,9 +285,40 @@ def command_hint_messages(hint_type: CommandHintType,
|
|
|
247
285
|
f'{BOLD}sky jobs logs {job_id}{RESET_BOLD}'
|
|
248
286
|
f'\n{INDENT_SYMBOL}To stream controller logs:\t\t'
|
|
249
287
|
f'{BOLD}sky jobs logs --controller {job_id}{RESET_BOLD}'
|
|
250
|
-
f'\n{
|
|
251
|
-
f'{BOLD}sky jobs queue{RESET_BOLD}'
|
|
252
|
-
f'\n{INDENT_LAST_SYMBOL}To view managed job dashboard:\t\t'
|
|
253
|
-
f'{BOLD}sky jobs dashboard{RESET_BOLD}')
|
|
288
|
+
f'\n{INDENT_LAST_SYMBOL}To view all managed jobs:\t\t'
|
|
289
|
+
f'{BOLD}sky jobs queue{RESET_BOLD}')
|
|
254
290
|
else:
|
|
255
291
|
raise ValueError(f'Invalid hint type: {hint_type}')
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def is_glob_pattern(pattern: str) -> bool:
|
|
295
|
+
"""Checks if a string contains common glob pattern wildcards."""
|
|
296
|
+
glob_chars = {'*', '?', '[', ']'}
|
|
297
|
+
# Also check for '**' as a specific globstar pattern
|
|
298
|
+
if '**' in pattern:
|
|
299
|
+
return True
|
|
300
|
+
for char in pattern:
|
|
301
|
+
if char in glob_chars:
|
|
302
|
+
return True
|
|
303
|
+
return False
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def get_non_matched_query(query_clusters: Iterable[str],
|
|
307
|
+
cluster_names: Iterable[str]) -> List[str]:
|
|
308
|
+
"""Gets the non-matched query clusters."""
|
|
309
|
+
glob_query_clusters = []
|
|
310
|
+
non_glob_query_clusters = []
|
|
311
|
+
for cluster_name in query_clusters:
|
|
312
|
+
if is_glob_pattern(cluster_name):
|
|
313
|
+
glob_query_clusters.append(cluster_name)
|
|
314
|
+
else:
|
|
315
|
+
non_glob_query_clusters.append(cluster_name)
|
|
316
|
+
not_found_clusters = [
|
|
317
|
+
query_cluster for query_cluster in non_glob_query_clusters
|
|
318
|
+
if query_cluster not in cluster_names
|
|
319
|
+
]
|
|
320
|
+
not_found_clusters.extend([
|
|
321
|
+
query_cluster for query_cluster in glob_query_clusters
|
|
322
|
+
if not fnmatch.filter(cluster_names, query_cluster)
|
|
323
|
+
])
|
|
324
|
+
return not_found_clusters
|
sky/utils/validator.py
CHANGED
|
@@ -14,9 +14,19 @@ def case_insensitive_enum(validator, enums, instance, schema):
|
|
|
14
14
|
f'{instance!r} is not one of {enums!r}')
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
def case_sensitive_enum(validator, enums, instance, schema):
|
|
18
|
+
del validator, schema # Unused.
|
|
19
|
+
if instance not in enums:
|
|
20
|
+
yield jsonschema.ValidationError(
|
|
21
|
+
f'{instance!r} is not one of {enums!r}')
|
|
22
|
+
|
|
23
|
+
|
|
17
24
|
# Move this to a function to delay initialization
|
|
18
25
|
def get_schema_validator():
|
|
19
26
|
"""Get the schema validator class, initializing it only when needed."""
|
|
20
27
|
return jsonschema.validators.extend(
|
|
21
28
|
jsonschema.Draft7Validator,
|
|
22
|
-
validators={
|
|
29
|
+
validators={
|
|
30
|
+
'case_insensitive_enum': case_insensitive_enum,
|
|
31
|
+
'case_sensitive_enum': case_sensitive_enum
|
|
32
|
+
})
|