skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +12 -2
- sky/adaptors/aws.py +27 -22
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/adaptors/slurm.py +478 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +630 -185
- sky/backends/cloud_vm_ray_backend.py +1111 -928
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +971 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -3
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +36 -32
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/slurm_catalog.py +243 -0
- sky/check.py +87 -46
- sky/client/cli/command.py +1004 -434
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +188 -65
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +8 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +47 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/slurm.py +578 -0
- sky/clouds/ssh.py +6 -3
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +43 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +296 -195
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
- sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -0
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +177 -30
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/models.py +2 -0
- sky/optimizer.py +7 -6
- sky/provision/__init__.py +38 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +22 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +112 -28
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +422 -422
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +45 -15
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/slurm/__init__.py +12 -0
- sky/provision/slurm/config.py +13 -0
- sky/provision/slurm/instance.py +572 -0
- sky/provision/slurm/utils.py +583 -0
- sky/provision/vast/instance.py +9 -4
- sky/provision/vast/utils.py +10 -6
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/plugins.py +222 -0
- sky/server/requests/executor.py +384 -145
- sky/server/requests/payloads.py +83 -19
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +123 -0
- sky/server/requests/requests.py +511 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +102 -20
- sky/server/requests/serializers/return_value_serializers.py +60 -0
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +497 -179
- sky/server/server_utils.py +30 -0
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +64 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +116 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +139 -29
- sky/skylet/events.py +74 -14
- sky/skylet/executor/__init__.py +1 -0
- sky/skylet/executor/slurm.py +189 -0
- sky/skylet/job_lib.py +143 -105
- sky/skylet/log_lib.py +252 -8
- sky/skylet/log_lib.pyi +47 -7
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +27 -2
- sky/skylet/subprocess_daemon.py +104 -28
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/constants.py +12 -0
- sky/ssh_node_pools/core.py +40 -3
- sky/ssh_node_pools/deploy/__init__.py +4 -0
- sky/ssh_node_pools/deploy/deploy.py +952 -0
- sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
- sky/ssh_node_pools/deploy/utils.py +173 -0
- sky/ssh_node_pools/server.py +20 -21
- sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +204 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/slurm-ray.yml.j2 +85 -0
- sky/templates/vast-ray.yml.j2 +2 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/model.conf +1 -1
- sky/users/permission.py +84 -44
- sky/users/rbac.py +31 -3
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +128 -6
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +283 -30
- sky/utils/command_runner.pyi +63 -7
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +55 -7
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +7 -376
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +138 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
- skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
- skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
- sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/utils/common_utils.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"""Utils shared between all of sky"""
|
|
2
2
|
|
|
3
|
+
import ctypes
|
|
3
4
|
import difflib
|
|
4
5
|
import enum
|
|
5
6
|
import functools
|
|
7
|
+
import gc
|
|
6
8
|
import getpass
|
|
7
9
|
import hashlib
|
|
8
10
|
import inspect
|
|
@@ -263,13 +265,16 @@ def get_global_job_id(job_timestamp: str,
|
|
|
263
265
|
|
|
264
266
|
class Backoff:
|
|
265
267
|
"""Exponential backoff with jittering."""
|
|
266
|
-
MULTIPLIER = 1.6
|
|
267
268
|
JITTER = 0.4
|
|
268
269
|
|
|
269
|
-
def __init__(self,
|
|
270
|
+
def __init__(self,
|
|
271
|
+
initial_backoff: float = 5,
|
|
272
|
+
max_backoff_factor: int = 5,
|
|
273
|
+
multiplier: float = 1.6):
|
|
270
274
|
self._initial = True
|
|
271
275
|
self._backoff = 0.0
|
|
272
276
|
self._initial_backoff = initial_backoff
|
|
277
|
+
self._multiplier = multiplier
|
|
273
278
|
self._max_backoff = max_backoff_factor * self._initial_backoff
|
|
274
279
|
|
|
275
280
|
# https://github.com/grpc/grpc/blob/2d4f3c56001cd1e1f85734b2f7c5ce5f2797c38a/doc/connection-backoff.md
|
|
@@ -281,7 +286,7 @@ class Backoff:
|
|
|
281
286
|
self._initial = False
|
|
282
287
|
self._backoff = min(self._initial_backoff, self._max_backoff)
|
|
283
288
|
else:
|
|
284
|
-
self._backoff = min(self._backoff * self.
|
|
289
|
+
self._backoff = min(self._backoff * self._multiplier,
|
|
285
290
|
self._max_backoff)
|
|
286
291
|
self._backoff += random.uniform(-self.JITTER * self._backoff,
|
|
287
292
|
self.JITTER * self._backoff)
|
|
@@ -295,6 +300,7 @@ _current_user: Optional['models.User'] = None
|
|
|
295
300
|
_current_request_id: Optional[str] = None
|
|
296
301
|
|
|
297
302
|
|
|
303
|
+
# TODO(aylei,hailong): request context should be contextual
|
|
298
304
|
def set_request_context(client_entrypoint: Optional[str],
|
|
299
305
|
client_command: Optional[str],
|
|
300
306
|
using_remote_api_server: bool,
|
|
@@ -336,19 +342,32 @@ def get_current_command() -> str:
|
|
|
336
342
|
|
|
337
343
|
|
|
338
344
|
def get_current_user() -> 'models.User':
|
|
339
|
-
"""Returns the current
|
|
345
|
+
"""Returns the user in current server session."""
|
|
340
346
|
if _current_user is not None:
|
|
341
347
|
return _current_user
|
|
342
348
|
return models.User.get_current_user()
|
|
343
349
|
|
|
344
350
|
|
|
345
351
|
def get_current_user_name() -> str:
|
|
346
|
-
"""Returns the current
|
|
352
|
+
"""Returns the user name in current server session."""
|
|
347
353
|
name = get_current_user().name
|
|
348
354
|
assert name is not None
|
|
349
355
|
return name
|
|
350
356
|
|
|
351
357
|
|
|
358
|
+
def get_local_user_name() -> str:
|
|
359
|
+
"""Returns the user name in local environment.
|
|
360
|
+
|
|
361
|
+
This is for backward compatibility where anonymous access is implicitly
|
|
362
|
+
allowed when no authentication method at server-side is configured and
|
|
363
|
+
the username from client environment variable will be used to identify the
|
|
364
|
+
user.
|
|
365
|
+
"""
|
|
366
|
+
name = os.getenv(constants.USER_ENV_VAR, getpass.getuser())
|
|
367
|
+
assert name is not None
|
|
368
|
+
return name
|
|
369
|
+
|
|
370
|
+
|
|
352
371
|
def set_current_user(user: 'models.User'):
|
|
353
372
|
"""Sets the current user."""
|
|
354
373
|
global _current_user
|
|
@@ -719,7 +738,8 @@ def find_free_port(start_port: int) -> int:
|
|
|
719
738
|
try:
|
|
720
739
|
s.bind(('', port))
|
|
721
740
|
return port
|
|
722
|
-
except OSError:
|
|
741
|
+
except OSError as e:
|
|
742
|
+
logger.debug(f'Error binding port {port}: {e}')
|
|
723
743
|
pass
|
|
724
744
|
raise OSError('No free ports available.')
|
|
725
745
|
|
|
@@ -994,7 +1014,17 @@ def get_mem_size_gb() -> float:
|
|
|
994
1014
|
except ValueError as e:
|
|
995
1015
|
with ux_utils.print_exception_no_traceback():
|
|
996
1016
|
raise ValueError(
|
|
997
|
-
f'Failed to parse the memory size from {mem_size}'
|
|
1017
|
+
f'Failed to parse the memory size from {mem_size} (GB)'
|
|
1018
|
+
) from e
|
|
1019
|
+
mem_size = os.getenv('SKYPILOT_POD_MEMORY_BYTES_LIMIT')
|
|
1020
|
+
if mem_size is not None:
|
|
1021
|
+
try:
|
|
1022
|
+
return float(mem_size) / (1024**3)
|
|
1023
|
+
except ValueError as e:
|
|
1024
|
+
with ux_utils.print_exception_no_traceback():
|
|
1025
|
+
raise ValueError(
|
|
1026
|
+
f'Failed to parse the memory size from {mem_size} (bytes)'
|
|
1027
|
+
) from e
|
|
998
1028
|
return _mem_size_gb()
|
|
999
1029
|
|
|
1000
1030
|
|
|
@@ -1090,3 +1120,21 @@ def removeprefix(string: str, prefix: str) -> str:
|
|
|
1090
1120
|
if string.startswith(prefix):
|
|
1091
1121
|
return string[len(prefix):]
|
|
1092
1122
|
return string
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def release_memory():
|
|
1126
|
+
"""Release the process memory"""
|
|
1127
|
+
# Do the best effort to release the python heap and let malloc_trim
|
|
1128
|
+
# be more efficient.
|
|
1129
|
+
try:
|
|
1130
|
+
gc.collect()
|
|
1131
|
+
if sys.platform.startswith('linux'):
|
|
1132
|
+
# Will fail on musl (alpine), but at least it works on our
|
|
1133
|
+
# official docker images.
|
|
1134
|
+
libc = ctypes.CDLL('libc.so.6')
|
|
1135
|
+
return libc.malloc_trim(0)
|
|
1136
|
+
return 0
|
|
1137
|
+
except Exception as e: # pylint: disable=broad-except
|
|
1138
|
+
logger.error(f'Failed to release memory: '
|
|
1139
|
+
f'{format_exception(e)}')
|
|
1140
|
+
return 0
|
sky/utils/config_utils.py
CHANGED
|
@@ -272,7 +272,7 @@ def get_cloud_config_value_from_dict(
|
|
|
272
272
|
"""
|
|
273
273
|
input_config = Config(dict_config)
|
|
274
274
|
region_key = None
|
|
275
|
-
if cloud
|
|
275
|
+
if cloud in ('kubernetes', 'ssh'):
|
|
276
276
|
region_key = 'context_configs'
|
|
277
277
|
elif cloud in _REGION_CONFIG_CLOUDS:
|
|
278
278
|
region_key = 'region_configs'
|
|
@@ -283,19 +283,6 @@ def get_cloud_config_value_from_dict(
|
|
|
283
283
|
keys=(cloud, region_key, region) + keys,
|
|
284
284
|
default_value=None,
|
|
285
285
|
override_configs=override_configs)
|
|
286
|
-
if not per_context_config and cloud in _REGION_CONFIG_CLOUDS:
|
|
287
|
-
# TODO (kyuds): Backward compatibility, remove after 0.11.0.
|
|
288
|
-
per_context_config = input_config.get_nested(
|
|
289
|
-
keys=(cloud, region) + keys,
|
|
290
|
-
default_value=None,
|
|
291
|
-
override_configs=override_configs)
|
|
292
|
-
if per_context_config is not None:
|
|
293
|
-
logger.info(
|
|
294
|
-
f'{cloud} configuration is using the legacy format. \n'
|
|
295
|
-
'This format will be deprecated after 0.11.0, refer to '
|
|
296
|
-
'`https://docs.skypilot.co/en/latest/reference/config.html` ' # pylint: disable=line-too-long
|
|
297
|
-
'for the new format. Please use `region_configs` to specify region specific configuration.'
|
|
298
|
-
)
|
|
299
286
|
# if no override found for specified region
|
|
300
287
|
general_config = input_config.get_nested(keys=(cloud,) + keys,
|
|
301
288
|
default_value=default_value,
|
sky/utils/context.py
CHANGED
|
@@ -2,18 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from collections.abc import Mapping
|
|
5
|
-
from collections.abc import MutableMapping
|
|
6
5
|
import contextvars
|
|
6
|
+
import copy
|
|
7
7
|
import functools
|
|
8
8
|
import os
|
|
9
9
|
import pathlib
|
|
10
10
|
import subprocess
|
|
11
11
|
import sys
|
|
12
|
-
import
|
|
13
|
-
|
|
12
|
+
from typing import (Any, Callable, Coroutine, Dict, Iterator, MutableMapping,
|
|
13
|
+
Optional, TextIO, TYPE_CHECKING, TypeVar)
|
|
14
14
|
|
|
15
|
+
from typing_extensions import ParamSpec
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from sky.skypilot_config import ConfigContext
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SkyPilotContext(object):
|
|
17
22
|
"""SkyPilot typed context vars for threads and coroutines.
|
|
18
23
|
|
|
19
24
|
This is a wrapper around `contextvars.ContextVar` that provides a typed
|
|
@@ -88,7 +93,7 @@ class Context(object):
|
|
|
88
93
|
else:
|
|
89
94
|
self._log_file_handle = open(log_file, 'a', encoding='utf-8')
|
|
90
95
|
self._log_file = log_file
|
|
91
|
-
if
|
|
96
|
+
if original_log_handle is not None:
|
|
92
97
|
original_log_handle.close()
|
|
93
98
|
return original_log_file
|
|
94
99
|
|
|
@@ -102,11 +107,40 @@ class Context(object):
|
|
|
102
107
|
for k, v in envs.items():
|
|
103
108
|
self.env_overrides[k] = v
|
|
104
109
|
|
|
110
|
+
def cleanup(self):
|
|
111
|
+
"""Clean up the context."""
|
|
112
|
+
if self._log_file_handle is not None:
|
|
113
|
+
self._log_file_handle.close()
|
|
114
|
+
self._log_file_handle = None
|
|
115
|
+
|
|
116
|
+
def __enter__(self):
|
|
117
|
+
return self
|
|
118
|
+
|
|
119
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
120
|
+
del exc_type, exc_val, exc_tb
|
|
121
|
+
self.cleanup()
|
|
122
|
+
|
|
123
|
+
def copy(self) -> 'SkyPilotContext':
|
|
124
|
+
"""Create a copy of the context.
|
|
125
|
+
|
|
126
|
+
Changes to the current context after this call will not affect the copy.
|
|
127
|
+
The new context will get its own handle/fd for the log file.
|
|
128
|
+
The new context will get an independent copy of the env var overrides.
|
|
129
|
+
The new context will get an independent copy of the config context.
|
|
130
|
+
Cancellation of the current context will not be propagated to the copy.
|
|
131
|
+
"""
|
|
132
|
+
new_context = SkyPilotContext()
|
|
133
|
+
new_context.redirect_log(self._log_file)
|
|
134
|
+
new_context.env_overrides = self.env_overrides.copy()
|
|
135
|
+
new_context.config_context = copy.deepcopy(self.config_context)
|
|
136
|
+
return new_context
|
|
137
|
+
|
|
105
138
|
|
|
106
|
-
_CONTEXT = contextvars.ContextVar('sky_context',
|
|
139
|
+
_CONTEXT = contextvars.ContextVar[Optional[SkyPilotContext]]('sky_context',
|
|
140
|
+
default=None)
|
|
107
141
|
|
|
108
142
|
|
|
109
|
-
def get() -> Optional[
|
|
143
|
+
def get() -> Optional[SkyPilotContext]:
|
|
110
144
|
"""Get the current SkyPilot context.
|
|
111
145
|
|
|
112
146
|
If the context is not initialized, get() will return None. This helps
|
|
@@ -116,7 +150,7 @@ def get() -> Optional[Context]:
|
|
|
116
150
|
return _CONTEXT.get()
|
|
117
151
|
|
|
118
152
|
|
|
119
|
-
class ContextualEnviron(MutableMapping):
|
|
153
|
+
class ContextualEnviron(MutableMapping[str, str]):
|
|
120
154
|
"""Environment variables wrapper with contextual overrides.
|
|
121
155
|
|
|
122
156
|
An instance of ContextualEnviron will typically be used to replace
|
|
@@ -124,7 +158,7 @@ class ContextualEnviron(MutableMapping):
|
|
|
124
158
|
aware.
|
|
125
159
|
|
|
126
160
|
Behavior of spawning a subprocess:
|
|
127
|
-
- The
|
|
161
|
+
- The contextual overrides will not be applied to the subprocess by
|
|
128
162
|
default.
|
|
129
163
|
- When using env=os.environ to pass the environment variables to the
|
|
130
164
|
subprocess explicitly. The subprocess will inherit the contextual
|
|
@@ -155,10 +189,10 @@ class ContextualEnviron(MutableMapping):
|
|
|
155
189
|
assert os.environ['FOO'] == 'BAR1'
|
|
156
190
|
"""
|
|
157
191
|
|
|
158
|
-
def __init__(self, environ):
|
|
192
|
+
def __init__(self, environ: 'os._Environ[str]') -> None:
|
|
159
193
|
self._environ = environ
|
|
160
194
|
|
|
161
|
-
def __getitem__(self, key):
|
|
195
|
+
def __getitem__(self, key: str) -> str:
|
|
162
196
|
ctx = get()
|
|
163
197
|
if ctx is not None:
|
|
164
198
|
if key in ctx.env_overrides:
|
|
@@ -170,51 +204,63 @@ class ContextualEnviron(MutableMapping):
|
|
|
170
204
|
return value
|
|
171
205
|
return self._environ[key]
|
|
172
206
|
|
|
173
|
-
def __iter__(self):
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
207
|
+
def __iter__(self) -> Iterator[str]:
|
|
208
|
+
|
|
209
|
+
def iter_from_context(ctx: SkyPilotContext) -> Iterator[str]:
|
|
210
|
+
deleted_keys = set()
|
|
177
211
|
for key, value in ctx.env_overrides.items():
|
|
178
212
|
if value is None:
|
|
179
213
|
deleted_keys.add(key)
|
|
180
|
-
|
|
214
|
+
else:
|
|
215
|
+
yield key
|
|
181
216
|
for key in self._environ:
|
|
182
217
|
# Deduplicate the keys
|
|
183
218
|
if key not in ctx.env_overrides and key not in deleted_keys:
|
|
184
219
|
yield key
|
|
220
|
+
|
|
221
|
+
ctx = get()
|
|
222
|
+
if ctx is not None:
|
|
223
|
+
return iter_from_context(ctx)
|
|
185
224
|
else:
|
|
186
225
|
return self._environ.__iter__()
|
|
187
226
|
|
|
188
|
-
def __len__(self):
|
|
227
|
+
def __len__(self) -> int:
|
|
189
228
|
return len(dict(self))
|
|
190
229
|
|
|
191
|
-
def __setitem__(self, key, value):
|
|
230
|
+
def __setitem__(self, key: str, value: str) -> None:
|
|
192
231
|
ctx = get()
|
|
193
232
|
if ctx is not None:
|
|
194
233
|
ctx.env_overrides[key] = value
|
|
195
234
|
else:
|
|
196
235
|
self._environ.__setitem__(key, value)
|
|
197
236
|
|
|
198
|
-
def __delitem__(self, key):
|
|
237
|
+
def __delitem__(self, key: str) -> None:
|
|
199
238
|
ctx = get()
|
|
200
239
|
if ctx is not None:
|
|
201
|
-
if key in
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
#
|
|
205
|
-
#
|
|
206
|
-
#
|
|
240
|
+
if key in self._environ:
|
|
241
|
+
# If the key is set in the environ of the process, we mark it as
|
|
242
|
+
# deleted in the context by setting the value to None.
|
|
243
|
+
# Note: we must do this even if it was also set in the context,
|
|
244
|
+
# since it could be set in both, and deleting should delete it
|
|
245
|
+
# from both.
|
|
207
246
|
ctx.env_overrides[key] = None
|
|
247
|
+
elif key in ctx.env_overrides:
|
|
248
|
+
# If the key is set in the context, but not the original
|
|
249
|
+
# environ, we can just delete the override.
|
|
250
|
+
del ctx.env_overrides[key]
|
|
208
251
|
else:
|
|
209
252
|
# The key is not set in the context nor the process.
|
|
210
253
|
raise KeyError(key)
|
|
211
254
|
else:
|
|
212
255
|
self._environ.__delitem__(key)
|
|
213
256
|
|
|
214
|
-
def __repr__(self):
|
|
215
|
-
|
|
257
|
+
def __repr__(self) -> str:
|
|
258
|
+
# Adapted from os._Environ.__repr__
|
|
259
|
+
formatted_items = ', '.join(
|
|
260
|
+
f'{key!r}: {value!r}' for key, value in self.items())
|
|
261
|
+
return f'ctx_environ({{{formatted_items}}})'
|
|
216
262
|
|
|
217
|
-
def copy(self):
|
|
263
|
+
def copy(self) -> Dict[str, str]:
|
|
218
264
|
copied = self._environ.copy()
|
|
219
265
|
ctx = get()
|
|
220
266
|
if ctx is not None:
|
|
@@ -225,7 +271,7 @@ class ContextualEnviron(MutableMapping):
|
|
|
225
271
|
copied[key] = ctx.env_overrides[key]
|
|
226
272
|
return copied
|
|
227
273
|
|
|
228
|
-
def setdefault(self, key, default
|
|
274
|
+
def setdefault(self, key: str, default: str) -> str:
|
|
229
275
|
return self._environ.setdefault(key, default)
|
|
230
276
|
|
|
231
277
|
def __ior__(self, other):
|
|
@@ -257,30 +303,71 @@ class Popen(subprocess.Popen):
|
|
|
257
303
|
# Pass a copy of current context.environ to avoid race condition
|
|
258
304
|
# when the context is updated after the Popen is created.
|
|
259
305
|
env = os.environ.copy()
|
|
260
|
-
super().__init__(*args, env=env,
|
|
306
|
+
super().__init__(*args, env=env,
|
|
307
|
+
**kwargs) # type: ignore[call-overload]
|
|
261
308
|
|
|
262
309
|
|
|
263
|
-
|
|
310
|
+
P = ParamSpec('P')
|
|
311
|
+
T = TypeVar('T')
|
|
264
312
|
|
|
265
313
|
|
|
266
|
-
def contextual(func:
|
|
314
|
+
def contextual(func: Callable[P, T]) -> Callable[P, T]:
|
|
267
315
|
"""Decorator to initialize a context before executing the function.
|
|
268
316
|
|
|
269
|
-
If a context is already initialized, this decorator will
|
|
270
|
-
|
|
317
|
+
If a context is already initialized, this decorator will create a new
|
|
318
|
+
context that inherits the values from the existing context.
|
|
271
319
|
"""
|
|
272
320
|
|
|
321
|
+
def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
322
|
+
# Within the new contextvars Context, set up the SkyPilotContext.
|
|
323
|
+
original_ctx = get()
|
|
324
|
+
with initialize(original_ctx):
|
|
325
|
+
return func(*args, **kwargs)
|
|
326
|
+
|
|
327
|
+
@functools.wraps(func)
|
|
328
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
329
|
+
# Create a copy of the current contextvars Context so that setting the
|
|
330
|
+
# SkyPilotContext does not affect the caller's context in async
|
|
331
|
+
# environments.
|
|
332
|
+
context = contextvars.copy_context()
|
|
333
|
+
return context.run(run_in_context, *args, **kwargs)
|
|
334
|
+
|
|
335
|
+
return wrapper
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def contextual_async(
|
|
339
|
+
func: Callable[P, Coroutine[Any, Any, T]]
|
|
340
|
+
) -> Callable[P, Coroutine[Any, Any, T]]:
|
|
341
|
+
"""Decorator to initialize a context before executing the function.
|
|
342
|
+
|
|
343
|
+
If a context is already initialized, this decorator will create a new
|
|
344
|
+
context that inherits the values from the existing context.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
async def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
348
|
+
# Within the new contextvars Context, set up the SkyPilotContext.
|
|
349
|
+
original_ctx = get()
|
|
350
|
+
with initialize(original_ctx):
|
|
351
|
+
return await func(*args, **kwargs)
|
|
352
|
+
|
|
273
353
|
@functools.wraps(func)
|
|
274
|
-
def wrapper(*args, **kwargs):
|
|
275
|
-
|
|
276
|
-
|
|
354
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
355
|
+
# Create a copy of the current contextvars Context so that setting the
|
|
356
|
+
# SkyPilotContext does not affect the caller's context in async
|
|
357
|
+
# environments.
|
|
358
|
+
context = contextvars.copy_context()
|
|
359
|
+
return await context.run(run_in_context, *args, **kwargs)
|
|
277
360
|
|
|
278
|
-
return
|
|
361
|
+
return wrapper
|
|
279
362
|
|
|
280
363
|
|
|
281
|
-
def initialize(
|
|
364
|
+
def initialize(
|
|
365
|
+
base_context: Optional[SkyPilotContext] = None) -> SkyPilotContext:
|
|
282
366
|
"""Initialize the current SkyPilot context."""
|
|
283
|
-
|
|
367
|
+
new_context = base_context.copy(
|
|
368
|
+
) if base_context is not None else SkyPilotContext()
|
|
369
|
+
_CONTEXT.set(new_context)
|
|
370
|
+
return new_context
|
|
284
371
|
|
|
285
372
|
|
|
286
373
|
class _ContextualStream:
|
sky/utils/context_utils.py
CHANGED
|
@@ -1,20 +1,27 @@
|
|
|
1
1
|
"""Utilities for SkyPilot context."""
|
|
2
2
|
import asyncio
|
|
3
|
+
import concurrent.futures
|
|
3
4
|
import contextvars
|
|
4
5
|
import functools
|
|
5
|
-
import io
|
|
6
6
|
import multiprocessing
|
|
7
7
|
import os
|
|
8
|
+
import select
|
|
8
9
|
import subprocess
|
|
9
10
|
import sys
|
|
11
|
+
import time
|
|
10
12
|
import typing
|
|
11
13
|
from typing import Any, Callable, IO, Optional, Tuple, TypeVar
|
|
12
14
|
|
|
15
|
+
from typing_extensions import ParamSpec
|
|
16
|
+
|
|
13
17
|
from sky import sky_logging
|
|
14
18
|
from sky.utils import context
|
|
15
19
|
from sky.utils import subprocess_utils
|
|
16
20
|
|
|
17
21
|
StreamHandler = Callable[[IO[Any], IO[Any]], str]
|
|
22
|
+
PASSTHROUGH_FLUSH_INTERVAL_SECONDS = 0.5
|
|
23
|
+
|
|
24
|
+
logger = sky_logging.init_logger(__name__)
|
|
18
25
|
|
|
19
26
|
|
|
20
27
|
# TODO(aylei): call hijack_sys_attrs() proactivly in module init at server-side
|
|
@@ -41,23 +48,53 @@ def hijack_sys_attrs():
|
|
|
41
48
|
|
|
42
49
|
def passthrough_stream_handler(in_stream: IO[Any], out_stream: IO[Any]) -> str:
|
|
43
50
|
"""Passthrough the stream from the process to the output stream"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
51
|
+
last_flush_time = time.time()
|
|
52
|
+
has_unflushed_content = False
|
|
53
|
+
|
|
54
|
+
# Use poll() with timeout instead of readline() to avoid blocking.
|
|
55
|
+
# readline() blocks until a newline is available, which can take minutes
|
|
56
|
+
# for tasks that emit logs infrequently (e.g. jupyter lab server).
|
|
57
|
+
# While readline() is blocked, the timing code never executes, so buffered
|
|
58
|
+
# logs never get flushed. poll() with timeout allows us to periodically
|
|
59
|
+
# flush even when no new data is available, ensuring logs appear promptly.
|
|
60
|
+
fd = in_stream.fileno()
|
|
61
|
+
poller = select.poll()
|
|
62
|
+
poller.register(fd, select.POLLIN)
|
|
63
|
+
|
|
64
|
+
# Timeout in milliseconds for poll()
|
|
65
|
+
poll_timeout_ms = int(PASSTHROUGH_FLUSH_INTERVAL_SECONDS * 1000)
|
|
66
|
+
|
|
49
67
|
while True:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
68
|
+
# Poll with timeout - returns when data available or timeout
|
|
69
|
+
events = poller.poll(poll_timeout_ms)
|
|
70
|
+
|
|
71
|
+
current_time = time.time()
|
|
72
|
+
|
|
73
|
+
if events:
|
|
74
|
+
# Data is available, read a chunk
|
|
75
|
+
chunk = os.read(fd, 4096) # Read up to 4KB
|
|
76
|
+
if not chunk:
|
|
77
|
+
break # EOF
|
|
78
|
+
out_stream.write(chunk.decode('utf-8', errors='replace'))
|
|
79
|
+
has_unflushed_content = True
|
|
80
|
+
|
|
81
|
+
# Flush only if we have unflushed content and timeout reached
|
|
82
|
+
if (has_unflushed_content and current_time - last_flush_time >=
|
|
83
|
+
PASSTHROUGH_FLUSH_INTERVAL_SECONDS):
|
|
53
84
|
out_stream.flush()
|
|
54
|
-
|
|
55
|
-
|
|
85
|
+
last_flush_time = current_time
|
|
86
|
+
has_unflushed_content = False
|
|
87
|
+
|
|
88
|
+
poller.unregister(fd)
|
|
89
|
+
# Final flush to ensure all data is written
|
|
90
|
+
if has_unflushed_content:
|
|
91
|
+
out_stream.flush()
|
|
92
|
+
|
|
56
93
|
return ''
|
|
57
94
|
|
|
58
95
|
|
|
59
96
|
def pipe_and_wait_process(
|
|
60
|
-
ctx: context.
|
|
97
|
+
ctx: context.SkyPilotContext,
|
|
61
98
|
proc: subprocess.Popen,
|
|
62
99
|
poll_interval: float = 0.5,
|
|
63
100
|
cancel_callback: Optional[Callable[[], None]] = None,
|
|
@@ -110,7 +147,7 @@ def pipe_and_wait_process(
|
|
|
110
147
|
return stdout, stderr
|
|
111
148
|
|
|
112
149
|
|
|
113
|
-
def wait_process(ctx: context.
|
|
150
|
+
def wait_process(ctx: context.SkyPilotContext,
|
|
114
151
|
proc: subprocess.Popen,
|
|
115
152
|
poll_interval: float = 0.5,
|
|
116
153
|
cancel_callback: Optional[Callable[[], None]] = None):
|
|
@@ -128,7 +165,11 @@ def wait_process(ctx: context.Context,
|
|
|
128
165
|
# Kill the process despite the caller's callback, the utility
|
|
129
166
|
# function gracefully handles the case where the process is
|
|
130
167
|
# already terminated.
|
|
131
|
-
|
|
168
|
+
# Bash script typically does not forward SIGTERM to childs, thus
|
|
169
|
+
# cannot be killed gracefully, shorten the grace period for faster
|
|
170
|
+
# termination.
|
|
171
|
+
subprocess_utils.kill_process_with_grace_period(proc,
|
|
172
|
+
grace_period=1)
|
|
132
173
|
raise asyncio.CancelledError()
|
|
133
174
|
try:
|
|
134
175
|
proc.wait(poll_interval)
|
|
@@ -173,15 +214,29 @@ def cancellation_guard(func: F) -> F:
|
|
|
173
214
|
return typing.cast(F, wrapper)
|
|
174
215
|
|
|
175
216
|
|
|
217
|
+
P = ParamSpec('P')
|
|
218
|
+
T = TypeVar('T')
|
|
219
|
+
|
|
220
|
+
|
|
176
221
|
# TODO(aylei): replace this with asyncio.to_thread once we drop support for
|
|
177
222
|
# python 3.8
|
|
178
|
-
def to_thread(func, /, *args,
|
|
223
|
+
def to_thread(func: Callable[P, T], /, *args: P.args,
|
|
224
|
+
**kwargs: P.kwargs) -> 'asyncio.Future[T]':
|
|
179
225
|
"""Asynchronously run function *func* in a separate thread.
|
|
180
226
|
|
|
181
227
|
This is same as asyncio.to_thread added in python 3.9
|
|
182
228
|
"""
|
|
229
|
+
return to_thread_with_executor(None, func, *args, **kwargs)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def to_thread_with_executor(executor: Optional[concurrent.futures.Executor],
|
|
233
|
+
func: Callable[P, T], /, *args: P.args,
|
|
234
|
+
**kwargs: P.kwargs) -> 'asyncio.Future[T]':
|
|
235
|
+
"""Asynchronously run function *func* in a separate thread with
|
|
236
|
+
a custom executor."""
|
|
237
|
+
|
|
183
238
|
loop = asyncio.get_running_loop()
|
|
184
|
-
# This is critical to pass the current coroutine context to the new thread
|
|
185
239
|
pyctx = contextvars.copy_context()
|
|
186
|
-
func_call = functools.partial(pyctx.run, func, *args,
|
|
187
|
-
|
|
240
|
+
func_call: Callable[..., T] = functools.partial(pyctx.run, func, *args,
|
|
241
|
+
**kwargs)
|
|
242
|
+
return loop.run_in_executor(executor, func_call)
|