skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/context.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
"""SkyPilot context for threads and coroutines."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
import contextvars
|
|
6
|
+
import copy
|
|
7
|
+
import functools
|
|
8
|
+
import os
|
|
9
|
+
import pathlib
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
from typing import (Any, Callable, Coroutine, Dict, Iterator, MutableMapping,
|
|
13
|
+
Optional, TextIO, TYPE_CHECKING, TypeVar)
|
|
14
|
+
|
|
15
|
+
from typing_extensions import ParamSpec
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from sky.skypilot_config import ConfigContext
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SkyPilotContext(object):
|
|
22
|
+
"""SkyPilot typed context vars for threads and coroutines.
|
|
23
|
+
|
|
24
|
+
This is a wrapper around `contextvars.ContextVar` that provides a typed
|
|
25
|
+
interface for the SkyPilot specific context variables that can be accessed
|
|
26
|
+
at any layer of the call stack. ContextVar is coroutine local, an empty
|
|
27
|
+
Context will be initialized for each coroutine when it is created.
|
|
28
|
+
|
|
29
|
+
Adding a new context variable for a new feature is as simple as:
|
|
30
|
+
1. Add a new instance variable to the Context class.
|
|
31
|
+
2. (Optional) Add new accessor methods if the variable should be protected.
|
|
32
|
+
|
|
33
|
+
To propagate the context to a new thread/coroutine, use
|
|
34
|
+
`contextvars.copy_context()`.
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
import asyncio
|
|
38
|
+
import contextvars
|
|
39
|
+
import time
|
|
40
|
+
from sky.utils import context
|
|
41
|
+
|
|
42
|
+
def sync_task():
|
|
43
|
+
while True:
|
|
44
|
+
if context.get().is_canceled():
|
|
45
|
+
break
|
|
46
|
+
time.sleep(1)
|
|
47
|
+
|
|
48
|
+
async def fastapi_handler():
|
|
49
|
+
# context.initialize() has been called in lifespan
|
|
50
|
+
ctx = contextvars.copy_context()
|
|
51
|
+
# asyncio.to_thread copies current context implicitly
|
|
52
|
+
task = asyncio.to_thread(sync_task)
|
|
53
|
+
# Or explicitly:
|
|
54
|
+
# loop = asyncio.get_running_loop()
|
|
55
|
+
# ctx = contextvars.copy_context()
|
|
56
|
+
# task = loop.run_in_executor(None, ctx.run, sync_task)
|
|
57
|
+
await asyncio.sleep(1)
|
|
58
|
+
context.get().cancel()
|
|
59
|
+
await task
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self):
|
|
63
|
+
self._canceled = asyncio.Event()
|
|
64
|
+
self._log_file = None
|
|
65
|
+
self._log_file_handle = None
|
|
66
|
+
self.env_overrides = {}
|
|
67
|
+
self.config_context = None
|
|
68
|
+
|
|
69
|
+
def cancel(self):
|
|
70
|
+
"""Cancel the context."""
|
|
71
|
+
self._canceled.set()
|
|
72
|
+
|
|
73
|
+
def is_canceled(self):
|
|
74
|
+
"""Check if the context is canceled."""
|
|
75
|
+
return self._canceled.is_set()
|
|
76
|
+
|
|
77
|
+
def redirect_log(
|
|
78
|
+
self, log_file: Optional[pathlib.Path]) -> Optional[pathlib.Path]:
|
|
79
|
+
"""Redirect the stdout and stderr of current context to a file.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
log_file: The log file to redirect to. If None, the stdout and
|
|
83
|
+
stderr will be restored to the original streams.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
The old log file, or None if the stdout and stderr were not
|
|
87
|
+
redirected.
|
|
88
|
+
"""
|
|
89
|
+
original_log_file = self._log_file
|
|
90
|
+
original_log_handle = self._log_file_handle
|
|
91
|
+
if log_file is None:
|
|
92
|
+
self._log_file_handle = None
|
|
93
|
+
else:
|
|
94
|
+
self._log_file_handle = open(log_file, 'a', encoding='utf-8')
|
|
95
|
+
self._log_file = log_file
|
|
96
|
+
if original_log_handle is not None:
|
|
97
|
+
original_log_handle.close()
|
|
98
|
+
return original_log_file
|
|
99
|
+
|
|
100
|
+
def output_stream(self, fallback: TextIO) -> TextIO:
|
|
101
|
+
if self._log_file_handle is None:
|
|
102
|
+
return fallback
|
|
103
|
+
else:
|
|
104
|
+
return self._log_file_handle
|
|
105
|
+
|
|
106
|
+
def override_envs(self, envs: Dict[str, str]):
|
|
107
|
+
for k, v in envs.items():
|
|
108
|
+
self.env_overrides[k] = v
|
|
109
|
+
|
|
110
|
+
def cleanup(self):
|
|
111
|
+
"""Clean up the context."""
|
|
112
|
+
if self._log_file_handle is not None:
|
|
113
|
+
self._log_file_handle.close()
|
|
114
|
+
self._log_file_handle = None
|
|
115
|
+
|
|
116
|
+
def __enter__(self):
|
|
117
|
+
return self
|
|
118
|
+
|
|
119
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
120
|
+
del exc_type, exc_val, exc_tb
|
|
121
|
+
self.cleanup()
|
|
122
|
+
|
|
123
|
+
def copy(self) -> 'SkyPilotContext':
|
|
124
|
+
"""Create a copy of the context.
|
|
125
|
+
|
|
126
|
+
Changes to the current context after this call will not affect the copy.
|
|
127
|
+
The new context will get its own handle/fd for the log file.
|
|
128
|
+
The new context will get an independent copy of the env var overrides.
|
|
129
|
+
The new context will get an independent copy of the config context.
|
|
130
|
+
Cancellation of the current context will not be propagated to the copy.
|
|
131
|
+
"""
|
|
132
|
+
new_context = SkyPilotContext()
|
|
133
|
+
new_context.redirect_log(self._log_file)
|
|
134
|
+
new_context.env_overrides = self.env_overrides.copy()
|
|
135
|
+
new_context.config_context = copy.deepcopy(self.config_context)
|
|
136
|
+
return new_context
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
_CONTEXT = contextvars.ContextVar[Optional[SkyPilotContext]]('sky_context',
|
|
140
|
+
default=None)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def get() -> Optional[SkyPilotContext]:
|
|
144
|
+
"""Get the current SkyPilot context.
|
|
145
|
+
|
|
146
|
+
If the context is not initialized, get() will return None. This helps
|
|
147
|
+
sync code to check whether it runs in a cancellable context and avoid
|
|
148
|
+
polling the cancellation event if it is not.
|
|
149
|
+
"""
|
|
150
|
+
return _CONTEXT.get()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class ContextualEnviron(MutableMapping[str, str]):
|
|
154
|
+
"""Environment variables wrapper with contextual overrides.
|
|
155
|
+
|
|
156
|
+
An instance of ContextualEnviron will typically be used to replace
|
|
157
|
+
os.environ to make the envron access of current process contextual
|
|
158
|
+
aware.
|
|
159
|
+
|
|
160
|
+
Behavior of spawning a subprocess:
|
|
161
|
+
- The contextual overrides will not be applied to the subprocess by
|
|
162
|
+
default.
|
|
163
|
+
- When using env=os.environ to pass the environment variables to the
|
|
164
|
+
subprocess explicitly. The subprocess will inherit the contextual
|
|
165
|
+
environment variables at the time of the spawn, that is, it will not
|
|
166
|
+
see the updates to the environment variables after the spawn. Also,
|
|
167
|
+
os.environ of the subprocess will not be a ContextualEnviron unless
|
|
168
|
+
the subprocess hijacks os.environ explicitly.
|
|
169
|
+
- Optionally, context.Popen() can be used to automatically pass
|
|
170
|
+
os.environ with overrides to subprocess.
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
Example:
|
|
174
|
+
1. Parent process:
|
|
175
|
+
# Hijack os.environ to be a ContextualEnviron
|
|
176
|
+
os.environ = ContextualEnviron(os.environ)
|
|
177
|
+
ctx = context.get()
|
|
178
|
+
ctx.override_envs({'FOO': 'BAR1'})
|
|
179
|
+
proc = subprocess.Popen(..., env=os.environ)
|
|
180
|
+
# Or use context.Popen instead
|
|
181
|
+
# proc = context.Popen(...)
|
|
182
|
+
ctx.override_envs({'FOO': 'BAR2'})
|
|
183
|
+
2. Subprocess:
|
|
184
|
+
assert os.environ['FOO'] == 'BAR1'
|
|
185
|
+
ctx = context.get()
|
|
186
|
+
# Override the contextual env var in the subprocess does not take
|
|
187
|
+
# effect since the os.environ is not hijacked.
|
|
188
|
+
ctx.override_envs({'FOO': 'BAR3'})
|
|
189
|
+
assert os.environ['FOO'] == 'BAR1'
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
def __init__(self, environ: 'os._Environ[str]') -> None:
|
|
193
|
+
self._environ = environ
|
|
194
|
+
|
|
195
|
+
def __getitem__(self, key: str) -> str:
|
|
196
|
+
ctx = get()
|
|
197
|
+
if ctx is not None:
|
|
198
|
+
if key in ctx.env_overrides:
|
|
199
|
+
value = ctx.env_overrides[key]
|
|
200
|
+
# None is used to indicate that the key is deleted in the
|
|
201
|
+
# context.
|
|
202
|
+
if value is None:
|
|
203
|
+
raise KeyError(key)
|
|
204
|
+
return value
|
|
205
|
+
return self._environ[key]
|
|
206
|
+
|
|
207
|
+
def __iter__(self) -> Iterator[str]:
|
|
208
|
+
|
|
209
|
+
def iter_from_context(ctx: SkyPilotContext) -> Iterator[str]:
|
|
210
|
+
deleted_keys = set()
|
|
211
|
+
for key, value in ctx.env_overrides.items():
|
|
212
|
+
if value is None:
|
|
213
|
+
deleted_keys.add(key)
|
|
214
|
+
else:
|
|
215
|
+
yield key
|
|
216
|
+
for key in self._environ:
|
|
217
|
+
# Deduplicate the keys
|
|
218
|
+
if key not in ctx.env_overrides and key not in deleted_keys:
|
|
219
|
+
yield key
|
|
220
|
+
|
|
221
|
+
ctx = get()
|
|
222
|
+
if ctx is not None:
|
|
223
|
+
return iter_from_context(ctx)
|
|
224
|
+
else:
|
|
225
|
+
return self._environ.__iter__()
|
|
226
|
+
|
|
227
|
+
def __len__(self) -> int:
|
|
228
|
+
return len(dict(self))
|
|
229
|
+
|
|
230
|
+
def __setitem__(self, key: str, value: str) -> None:
|
|
231
|
+
ctx = get()
|
|
232
|
+
if ctx is not None:
|
|
233
|
+
ctx.env_overrides[key] = value
|
|
234
|
+
else:
|
|
235
|
+
self._environ.__setitem__(key, value)
|
|
236
|
+
|
|
237
|
+
def __delitem__(self, key: str) -> None:
|
|
238
|
+
ctx = get()
|
|
239
|
+
if ctx is not None:
|
|
240
|
+
if key in self._environ:
|
|
241
|
+
# If the key is set in the environ of the process, we mark it as
|
|
242
|
+
# deleted in the context by setting the value to None.
|
|
243
|
+
# Note: we must do this even if it was also set in the context,
|
|
244
|
+
# since it could be set in both, and deleting should delete it
|
|
245
|
+
# from both.
|
|
246
|
+
ctx.env_overrides[key] = None
|
|
247
|
+
elif key in ctx.env_overrides:
|
|
248
|
+
# If the key is set in the context, but not the original
|
|
249
|
+
# environ, we can just delete the override.
|
|
250
|
+
del ctx.env_overrides[key]
|
|
251
|
+
else:
|
|
252
|
+
# The key is not set in the context nor the process.
|
|
253
|
+
raise KeyError(key)
|
|
254
|
+
else:
|
|
255
|
+
self._environ.__delitem__(key)
|
|
256
|
+
|
|
257
|
+
def __repr__(self) -> str:
|
|
258
|
+
# Adapted from os._Environ.__repr__
|
|
259
|
+
formatted_items = ', '.join(
|
|
260
|
+
f'{key!r}: {value!r}' for key, value in self.items())
|
|
261
|
+
return f'ctx_environ({{{formatted_items}}})'
|
|
262
|
+
|
|
263
|
+
def copy(self) -> Dict[str, str]:
|
|
264
|
+
copied = self._environ.copy()
|
|
265
|
+
ctx = get()
|
|
266
|
+
if ctx is not None:
|
|
267
|
+
for key in ctx.env_overrides:
|
|
268
|
+
if ctx.env_overrides[key] is None:
|
|
269
|
+
copied.pop(key)
|
|
270
|
+
else:
|
|
271
|
+
copied[key] = ctx.env_overrides[key]
|
|
272
|
+
return copied
|
|
273
|
+
|
|
274
|
+
def setdefault(self, key: str, default: str) -> str:
|
|
275
|
+
return self._environ.setdefault(key, default)
|
|
276
|
+
|
|
277
|
+
def __ior__(self, other):
|
|
278
|
+
if not isinstance(other, Mapping):
|
|
279
|
+
return NotImplemented
|
|
280
|
+
self.update(other)
|
|
281
|
+
return self
|
|
282
|
+
|
|
283
|
+
def __or__(self, other):
|
|
284
|
+
if not isinstance(other, Mapping):
|
|
285
|
+
return NotImplemented
|
|
286
|
+
new = dict(self)
|
|
287
|
+
new.update(other)
|
|
288
|
+
return new
|
|
289
|
+
|
|
290
|
+
def __ror__(self, other):
|
|
291
|
+
if not isinstance(other, Mapping):
|
|
292
|
+
return NotImplemented
|
|
293
|
+
new = dict(other)
|
|
294
|
+
new.update(self)
|
|
295
|
+
return new
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class Popen(subprocess.Popen):
|
|
299
|
+
|
|
300
|
+
def __init__(self, *args, **kwargs):
|
|
301
|
+
env = kwargs.pop('env', None)
|
|
302
|
+
if env is None:
|
|
303
|
+
# Pass a copy of current context.environ to avoid race condition
|
|
304
|
+
# when the context is updated after the Popen is created.
|
|
305
|
+
env = os.environ.copy()
|
|
306
|
+
super().__init__(*args, env=env,
|
|
307
|
+
**kwargs) # type: ignore[call-overload]
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
P = ParamSpec('P')
|
|
311
|
+
T = TypeVar('T')
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def contextual(func: Callable[P, T]) -> Callable[P, T]:
|
|
315
|
+
"""Decorator to initialize a context before executing the function.
|
|
316
|
+
|
|
317
|
+
If a context is already initialized, this decorator will create a new
|
|
318
|
+
context that inherits the values from the existing context.
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
322
|
+
# Within the new contextvars Context, set up the SkyPilotContext.
|
|
323
|
+
original_ctx = get()
|
|
324
|
+
with initialize(original_ctx):
|
|
325
|
+
return func(*args, **kwargs)
|
|
326
|
+
|
|
327
|
+
@functools.wraps(func)
|
|
328
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
329
|
+
# Create a copy of the current contextvars Context so that setting the
|
|
330
|
+
# SkyPilotContext does not affect the caller's context in async
|
|
331
|
+
# environments.
|
|
332
|
+
context = contextvars.copy_context()
|
|
333
|
+
return context.run(run_in_context, *args, **kwargs)
|
|
334
|
+
|
|
335
|
+
return wrapper
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def contextual_async(
|
|
339
|
+
func: Callable[P, Coroutine[Any, Any, T]]
|
|
340
|
+
) -> Callable[P, Coroutine[Any, Any, T]]:
|
|
341
|
+
"""Decorator to initialize a context before executing the function.
|
|
342
|
+
|
|
343
|
+
If a context is already initialized, this decorator will create a new
|
|
344
|
+
context that inherits the values from the existing context.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
async def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
348
|
+
# Within the new contextvars Context, set up the SkyPilotContext.
|
|
349
|
+
original_ctx = get()
|
|
350
|
+
with initialize(original_ctx):
|
|
351
|
+
return await func(*args, **kwargs)
|
|
352
|
+
|
|
353
|
+
@functools.wraps(func)
|
|
354
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
355
|
+
# Create a copy of the current contextvars Context so that setting the
|
|
356
|
+
# SkyPilotContext does not affect the caller's context in async
|
|
357
|
+
# environments.
|
|
358
|
+
context = contextvars.copy_context()
|
|
359
|
+
return await context.run(run_in_context, *args, **kwargs)
|
|
360
|
+
|
|
361
|
+
return wrapper
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def initialize(
|
|
365
|
+
base_context: Optional[SkyPilotContext] = None) -> SkyPilotContext:
|
|
366
|
+
"""Initialize the current SkyPilot context."""
|
|
367
|
+
new_context = base_context.copy(
|
|
368
|
+
) if base_context is not None else SkyPilotContext()
|
|
369
|
+
_CONTEXT.set(new_context)
|
|
370
|
+
return new_context
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
class _ContextualStream:
|
|
374
|
+
"""A base class for streams that are contextually aware.
|
|
375
|
+
|
|
376
|
+
This class implements the TextIO interface via __getattr__ to delegate
|
|
377
|
+
attribute access to the original or contextual stream.
|
|
378
|
+
"""
|
|
379
|
+
_original_stream: TextIO
|
|
380
|
+
|
|
381
|
+
def __init__(self, original_stream: TextIO):
|
|
382
|
+
self._original_stream = original_stream
|
|
383
|
+
|
|
384
|
+
def __getattr__(self, attr: str):
|
|
385
|
+
return getattr(self._active_stream(), attr)
|
|
386
|
+
|
|
387
|
+
def _active_stream(self) -> TextIO:
|
|
388
|
+
ctx = get()
|
|
389
|
+
if ctx is None:
|
|
390
|
+
return self._original_stream
|
|
391
|
+
return ctx.output_stream(self._original_stream)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class Stdout(_ContextualStream):
|
|
395
|
+
|
|
396
|
+
def __init__(self):
|
|
397
|
+
super().__init__(sys.stdout)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class Stderr(_ContextualStream):
|
|
401
|
+
|
|
402
|
+
def __init__(self):
|
|
403
|
+
super().__init__(sys.stderr)
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Utilities for SkyPilot context."""
|
|
2
|
+
import asyncio
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
import contextvars
|
|
5
|
+
import functools
|
|
6
|
+
import multiprocessing
|
|
7
|
+
import os
|
|
8
|
+
import select
|
|
9
|
+
import subprocess
|
|
10
|
+
import sys
|
|
11
|
+
import time
|
|
12
|
+
import typing
|
|
13
|
+
from typing import Any, Callable, IO, Optional, Tuple, TypeVar
|
|
14
|
+
|
|
15
|
+
from typing_extensions import ParamSpec
|
|
16
|
+
|
|
17
|
+
from sky import sky_logging
|
|
18
|
+
from sky.utils import context
|
|
19
|
+
from sky.utils import subprocess_utils
|
|
20
|
+
|
|
21
|
+
StreamHandler = Callable[[IO[Any], IO[Any]], str]
|
|
22
|
+
PASSTHROUGH_FLUSH_INTERVAL_SECONDS = 0.5
|
|
23
|
+
|
|
24
|
+
logger = sky_logging.init_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# TODO(aylei): call hijack_sys_attrs() proactivly in module init at server-side
|
|
28
|
+
# once we have context widely adopted.
|
|
29
|
+
def hijack_sys_attrs():
|
|
30
|
+
"""hijack system attributes to be context aware
|
|
31
|
+
|
|
32
|
+
This function should be called at the very beginning of the processes
|
|
33
|
+
that might use sky.utils.context.
|
|
34
|
+
"""
|
|
35
|
+
# Modify stdout and stderr of unvicorn process to be contextually aware,
|
|
36
|
+
# use setattr to bypass the TextIO type check.
|
|
37
|
+
setattr(sys, 'stdout', context.Stdout())
|
|
38
|
+
setattr(sys, 'stderr', context.Stderr())
|
|
39
|
+
# Reload logger to apply latest stdout and stderr.
|
|
40
|
+
sky_logging.reload_logger()
|
|
41
|
+
# Hijack os.environ with ContextualEnviron to make env variables
|
|
42
|
+
# contextually aware.
|
|
43
|
+
setattr(os, 'environ', context.ContextualEnviron(os.environ))
|
|
44
|
+
# Hijack subprocess.Popen to pass the contextual environ to subprocess
|
|
45
|
+
# by default.
|
|
46
|
+
setattr(subprocess, 'Popen', context.Popen)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def passthrough_stream_handler(in_stream: IO[Any], out_stream: IO[Any]) -> str:
|
|
50
|
+
"""Passthrough the stream from the process to the output stream"""
|
|
51
|
+
last_flush_time = time.time()
|
|
52
|
+
has_unflushed_content = False
|
|
53
|
+
|
|
54
|
+
# Use poll() with timeout instead of readline() to avoid blocking.
|
|
55
|
+
# readline() blocks until a newline is available, which can take minutes
|
|
56
|
+
# for tasks that emit logs infrequently (e.g. jupyter lab server).
|
|
57
|
+
# While readline() is blocked, the timing code never executes, so buffered
|
|
58
|
+
# logs never get flushed. poll() with timeout allows us to periodically
|
|
59
|
+
# flush even when no new data is available, ensuring logs appear promptly.
|
|
60
|
+
fd = in_stream.fileno()
|
|
61
|
+
poller = select.poll()
|
|
62
|
+
poller.register(fd, select.POLLIN)
|
|
63
|
+
|
|
64
|
+
# Timeout in milliseconds for poll()
|
|
65
|
+
poll_timeout_ms = int(PASSTHROUGH_FLUSH_INTERVAL_SECONDS * 1000)
|
|
66
|
+
|
|
67
|
+
while True:
|
|
68
|
+
# Poll with timeout - returns when data available or timeout
|
|
69
|
+
events = poller.poll(poll_timeout_ms)
|
|
70
|
+
|
|
71
|
+
current_time = time.time()
|
|
72
|
+
|
|
73
|
+
if events:
|
|
74
|
+
# Data is available, read a chunk
|
|
75
|
+
chunk = os.read(fd, 4096) # Read up to 4KB
|
|
76
|
+
if not chunk:
|
|
77
|
+
break # EOF
|
|
78
|
+
out_stream.write(chunk.decode('utf-8', errors='replace'))
|
|
79
|
+
has_unflushed_content = True
|
|
80
|
+
|
|
81
|
+
# Flush only if we have unflushed content and timeout reached
|
|
82
|
+
if (has_unflushed_content and current_time - last_flush_time >=
|
|
83
|
+
PASSTHROUGH_FLUSH_INTERVAL_SECONDS):
|
|
84
|
+
out_stream.flush()
|
|
85
|
+
last_flush_time = current_time
|
|
86
|
+
has_unflushed_content = False
|
|
87
|
+
|
|
88
|
+
poller.unregister(fd)
|
|
89
|
+
# Final flush to ensure all data is written
|
|
90
|
+
if has_unflushed_content:
|
|
91
|
+
out_stream.flush()
|
|
92
|
+
|
|
93
|
+
return ''
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def pipe_and_wait_process(
|
|
97
|
+
ctx: context.SkyPilotContext,
|
|
98
|
+
proc: subprocess.Popen,
|
|
99
|
+
poll_interval: float = 0.5,
|
|
100
|
+
cancel_callback: Optional[Callable[[], None]] = None,
|
|
101
|
+
stdout_stream_handler: Optional[StreamHandler] = None,
|
|
102
|
+
stderr_stream_handler: Optional[StreamHandler] = None
|
|
103
|
+
) -> Tuple[str, str]:
|
|
104
|
+
"""Wait for the process to finish or cancel it if the context is cancelled.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
proc: The process to wait for.
|
|
108
|
+
poll_interval: The interval to poll the process.
|
|
109
|
+
cancel_callback: The callback to call if the context is cancelled.
|
|
110
|
+
stdout_stream_handler: An optional handler to handle the stdout stream,
|
|
111
|
+
if None, the stdout stream will be passed through.
|
|
112
|
+
stderr_stream_handler: An optional handler to handle the stderr stream,
|
|
113
|
+
if None, the stderr stream will be passed through.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
if stdout_stream_handler is None:
|
|
117
|
+
stdout_stream_handler = passthrough_stream_handler
|
|
118
|
+
if stderr_stream_handler is None:
|
|
119
|
+
stderr_stream_handler = passthrough_stream_handler
|
|
120
|
+
|
|
121
|
+
# Threads are lazily created, so no harm if stderr is None
|
|
122
|
+
with multiprocessing.pool.ThreadPool(processes=2) as pool:
|
|
123
|
+
# Context will be lost in the new thread, capture current output stream
|
|
124
|
+
# and pass it to the new thread directly.
|
|
125
|
+
stdout_fut = pool.apply_async(
|
|
126
|
+
stdout_stream_handler, (proc.stdout, ctx.output_stream(sys.stdout)))
|
|
127
|
+
stderr_fut = None
|
|
128
|
+
if proc.stderr is not None:
|
|
129
|
+
stderr_fut = pool.apply_async(
|
|
130
|
+
stderr_stream_handler,
|
|
131
|
+
(proc.stderr, ctx.output_stream(sys.stderr)))
|
|
132
|
+
try:
|
|
133
|
+
wait_process(ctx,
|
|
134
|
+
proc,
|
|
135
|
+
poll_interval=poll_interval,
|
|
136
|
+
cancel_callback=cancel_callback)
|
|
137
|
+
finally:
|
|
138
|
+
# Wait for the stream handler threads to exit when process is done
|
|
139
|
+
# or cancelled
|
|
140
|
+
stdout_fut.wait()
|
|
141
|
+
if stderr_fut is not None:
|
|
142
|
+
stderr_fut.wait()
|
|
143
|
+
stdout = stdout_fut.get()
|
|
144
|
+
stderr = ''
|
|
145
|
+
if stderr_fut is not None:
|
|
146
|
+
stderr = stderr_fut.get()
|
|
147
|
+
return stdout, stderr
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def wait_process(ctx: context.SkyPilotContext,
|
|
151
|
+
proc: subprocess.Popen,
|
|
152
|
+
poll_interval: float = 0.5,
|
|
153
|
+
cancel_callback: Optional[Callable[[], None]] = None):
|
|
154
|
+
"""Wait for the process to finish or cancel it if the context is cancelled.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
proc: The process to wait for.
|
|
158
|
+
poll_interval: The interval to poll the process.
|
|
159
|
+
cancel_callback: The callback to call if the context is cancelled.
|
|
160
|
+
"""
|
|
161
|
+
while True:
|
|
162
|
+
if ctx.is_canceled():
|
|
163
|
+
if cancel_callback is not None:
|
|
164
|
+
cancel_callback()
|
|
165
|
+
# Kill the process despite the caller's callback, the utility
|
|
166
|
+
# function gracefully handles the case where the process is
|
|
167
|
+
# already terminated.
|
|
168
|
+
# Bash script typically does not forward SIGTERM to childs, thus
|
|
169
|
+
# cannot be killed gracefully, shorten the grace period for faster
|
|
170
|
+
# termination.
|
|
171
|
+
subprocess_utils.kill_process_with_grace_period(proc,
|
|
172
|
+
grace_period=1)
|
|
173
|
+
raise asyncio.CancelledError()
|
|
174
|
+
try:
|
|
175
|
+
proc.wait(poll_interval)
|
|
176
|
+
except subprocess.TimeoutExpired:
|
|
177
|
+
pass
|
|
178
|
+
else:
|
|
179
|
+
# Process exited
|
|
180
|
+
break
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
F = TypeVar('F', bound=Callable[..., Any])
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def cancellation_guard(func: F) -> F:
|
|
187
|
+
"""Decorator to make a synchronous function cancellable via context.
|
|
188
|
+
|
|
189
|
+
Guards the function execution by checking context.is_canceled() before
|
|
190
|
+
executing the function and raises asyncio.CancelledError if the context
|
|
191
|
+
is already cancelled.
|
|
192
|
+
|
|
193
|
+
This basically mimics the behavior of asyncio, which checks coroutine
|
|
194
|
+
cancelled in await call.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
func: The function to be decorated.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
The wrapped function that checks cancellation before execution.
|
|
201
|
+
|
|
202
|
+
Raises:
|
|
203
|
+
asyncio.CancelledError: If the context is cancelled before execution.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
@functools.wraps(func)
|
|
207
|
+
def wrapper(*args, **kwargs):
|
|
208
|
+
ctx = context.get()
|
|
209
|
+
if ctx is not None and ctx.is_canceled():
|
|
210
|
+
raise asyncio.CancelledError(
|
|
211
|
+
f'Function {func.__name__} cancelled before execution')
|
|
212
|
+
return func(*args, **kwargs)
|
|
213
|
+
|
|
214
|
+
return typing.cast(F, wrapper)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
P = ParamSpec('P')
|
|
218
|
+
T = TypeVar('T')
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# TODO(aylei): replace this with asyncio.to_thread once we drop support for
|
|
222
|
+
# python 3.8
|
|
223
|
+
def to_thread(func: Callable[P, T], /, *args: P.args,
|
|
224
|
+
**kwargs: P.kwargs) -> 'asyncio.Future[T]':
|
|
225
|
+
"""Asynchronously run function *func* in a separate thread.
|
|
226
|
+
|
|
227
|
+
This is same as asyncio.to_thread added in python 3.9
|
|
228
|
+
"""
|
|
229
|
+
return to_thread_with_executor(None, func, *args, **kwargs)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def to_thread_with_executor(executor: Optional[concurrent.futures.Executor],
|
|
233
|
+
func: Callable[P, T], /, *args: P.args,
|
|
234
|
+
**kwargs: P.kwargs) -> 'asyncio.Future[T]':
|
|
235
|
+
"""Asynchronously run function *func* in a separate thread with
|
|
236
|
+
a custom executor."""
|
|
237
|
+
|
|
238
|
+
loop = asyncio.get_running_loop()
|
|
239
|
+
pyctx = contextvars.copy_context()
|
|
240
|
+
func_call: Callable[..., T] = functools.partial(pyctx.run, func, *args,
|
|
241
|
+
**kwargs)
|
|
242
|
+
return loop.run_in_executor(executor, func_call)
|