skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/task.py
CHANGED
|
@@ -1,43 +1,34 @@
|
|
|
1
1
|
"""Task: a coarse-grained stage in an application."""
|
|
2
2
|
import collections
|
|
3
|
-
import inspect
|
|
4
3
|
import json
|
|
5
4
|
import os
|
|
6
5
|
import re
|
|
7
|
-
import typing
|
|
8
6
|
from typing import (Any, Callable, Dict, Iterable, List, Optional, Set, Tuple,
|
|
9
7
|
Union)
|
|
10
8
|
|
|
11
9
|
import colorama
|
|
10
|
+
from pydantic import SecretStr
|
|
12
11
|
|
|
13
|
-
import sky
|
|
14
12
|
from sky import clouds
|
|
13
|
+
from sky import dag as dag_lib
|
|
15
14
|
from sky import exceptions
|
|
15
|
+
from sky import resources as resources_lib
|
|
16
16
|
from sky import sky_logging
|
|
17
|
-
from sky.adaptors import common as adaptors_common
|
|
18
|
-
import sky.dag
|
|
19
17
|
from sky.data import data_utils
|
|
20
18
|
from sky.data import storage as storage_lib
|
|
21
19
|
from sky.provision import docker_utils
|
|
22
20
|
from sky.serve import service_spec
|
|
23
21
|
from sky.skylet import constants
|
|
24
22
|
from sky.utils import common_utils
|
|
23
|
+
from sky.utils import git
|
|
24
|
+
from sky.utils import registry
|
|
25
25
|
from sky.utils import schemas
|
|
26
26
|
from sky.utils import ux_utils
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
import yaml
|
|
30
|
-
|
|
31
|
-
from sky import resources as resources_lib
|
|
32
|
-
else:
|
|
33
|
-
yaml = adaptors_common.LazyImport('yaml')
|
|
27
|
+
from sky.utils import volume as volume_lib
|
|
28
|
+
from sky.utils import yaml_utils
|
|
34
29
|
|
|
35
30
|
logger = sky_logging.init_logger(__name__)
|
|
36
31
|
|
|
37
|
-
# A lambda generating commands (node rank_i, node addrs -> cmd_i).
|
|
38
|
-
CommandGen = Callable[[int, List[str]], Optional[str]]
|
|
39
|
-
CommandOrCommandGen = Union[str, CommandGen]
|
|
40
|
-
|
|
41
32
|
_VALID_NAME_REGEX = '[a-zA-Z0-9]+(?:[._-]{1,2}[a-zA-Z0-9]+)*'
|
|
42
33
|
_VALID_NAME_DESCR = ('ASCII characters and may contain lowercase and'
|
|
43
34
|
' uppercase letters, digits, underscores, periods,'
|
|
@@ -121,27 +112,61 @@ def _fill_in_env_vars(
|
|
|
121
112
|
return json.loads(yaml_field_str)
|
|
122
113
|
|
|
123
114
|
|
|
124
|
-
def _check_docker_login_config(task_envs: Dict[str, str]
|
|
125
|
-
|
|
115
|
+
def _check_docker_login_config(task_envs: Dict[str, str],
|
|
116
|
+
task_secrets: Dict[str, SecretStr]) -> bool:
|
|
117
|
+
"""Validates a valid docker login config in task_envs and task_secrets.
|
|
126
118
|
|
|
127
|
-
|
|
119
|
+
Docker login variables must be specified together either in envs OR secrets,
|
|
120
|
+
not split across both. If any of the docker login env vars is set, all of
|
|
121
|
+
them must be set in the same location.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
task_envs: Environment variables
|
|
125
|
+
task_secrets: Secret variables (optional, defaults to empty dict)
|
|
128
126
|
|
|
129
127
|
Returns:
|
|
130
|
-
True if there is a valid docker login config
|
|
128
|
+
True if there is a valid docker login config.
|
|
131
129
|
False otherwise.
|
|
132
130
|
Raises:
|
|
133
|
-
ValueError: if
|
|
134
|
-
them are set.
|
|
131
|
+
ValueError: if docker login configuration is invalid.
|
|
135
132
|
"""
|
|
133
|
+
if task_secrets is None:
|
|
134
|
+
task_secrets = {}
|
|
135
|
+
|
|
136
136
|
all_keys = constants.DOCKER_LOGIN_ENV_VARS
|
|
137
|
-
|
|
138
|
-
|
|
137
|
+
envs_keys = all_keys & set(task_envs.keys())
|
|
138
|
+
secrets_keys = all_keys & set(task_secrets.keys())
|
|
139
|
+
|
|
140
|
+
# Check if any docker variables exist
|
|
141
|
+
if not envs_keys and not secrets_keys:
|
|
139
142
|
return False
|
|
140
|
-
|
|
143
|
+
|
|
144
|
+
# Check if variables are split across envs and secrets
|
|
145
|
+
if envs_keys and secrets_keys:
|
|
141
146
|
with ux_utils.print_exception_no_traceback():
|
|
142
147
|
raise ValueError(
|
|
143
|
-
|
|
144
|
-
|
|
148
|
+
'Docker login variables must be specified together either '
|
|
149
|
+
'in envs OR secrets, not split across both. '
|
|
150
|
+
f'Found in envs: {sorted(envs_keys)}, '
|
|
151
|
+
f'Found in secrets: {sorted(secrets_keys)}')
|
|
152
|
+
|
|
153
|
+
# Check if all variables are present in the chosen location
|
|
154
|
+
if envs_keys:
|
|
155
|
+
if len(envs_keys) != len(all_keys):
|
|
156
|
+
with ux_utils.print_exception_no_traceback():
|
|
157
|
+
raise ValueError(
|
|
158
|
+
'Docker login variables must be specified together '
|
|
159
|
+
'in envs. '
|
|
160
|
+
f'Missing from envs: {sorted(all_keys - envs_keys)}')
|
|
161
|
+
|
|
162
|
+
if secrets_keys:
|
|
163
|
+
if len(secrets_keys) != len(all_keys):
|
|
164
|
+
with ux_utils.print_exception_no_traceback():
|
|
165
|
+
raise ValueError(
|
|
166
|
+
'Docker login variables must be specified together '
|
|
167
|
+
'in secrets. '
|
|
168
|
+
f'Missing from secrets: {sorted(all_keys - secrets_keys)}')
|
|
169
|
+
|
|
145
170
|
return True
|
|
146
171
|
|
|
147
172
|
|
|
@@ -149,11 +174,14 @@ def _with_docker_login_config(
|
|
|
149
174
|
resources: Union[Set['resources_lib.Resources'],
|
|
150
175
|
List['resources_lib.Resources']],
|
|
151
176
|
task_envs: Dict[str, str],
|
|
177
|
+
task_secrets: Dict[str, SecretStr],
|
|
152
178
|
) -> Union[Set['resources_lib.Resources'], List['resources_lib.Resources']]:
|
|
153
|
-
if not _check_docker_login_config(task_envs):
|
|
179
|
+
if not _check_docker_login_config(task_envs, task_secrets):
|
|
154
180
|
return resources
|
|
155
|
-
|
|
156
|
-
|
|
181
|
+
envs = task_envs.copy()
|
|
182
|
+
for key, value in task_secrets.items():
|
|
183
|
+
envs[key] = value.get_secret_value()
|
|
184
|
+
docker_login_config = docker_utils.DockerLoginConfig.from_env_vars(envs)
|
|
157
185
|
|
|
158
186
|
def _add_docker_login_config(resources: 'resources_lib.Resources'):
|
|
159
187
|
docker_image = resources.extract_docker_image()
|
|
@@ -165,7 +193,8 @@ def _with_docker_login_config(
|
|
|
165
193
|
f'ignored.{colorama.Style.RESET_ALL}')
|
|
166
194
|
return resources
|
|
167
195
|
# Already checked in extract_docker_image
|
|
168
|
-
assert
|
|
196
|
+
assert resources.image_id is not None and len(
|
|
197
|
+
resources.image_id) == 1, resources.image_id
|
|
169
198
|
region = list(resources.image_id.keys())[0]
|
|
170
199
|
return resources.copy(image_id={region: 'docker:' + docker_image},
|
|
171
200
|
_docker_login_config=docker_login_config)
|
|
@@ -180,8 +209,12 @@ def _with_docker_username_for_runpod(
|
|
|
180
209
|
resources: Union[Set['resources_lib.Resources'],
|
|
181
210
|
List['resources_lib.Resources']],
|
|
182
211
|
task_envs: Dict[str, str],
|
|
212
|
+
task_secrets: Dict[str, SecretStr],
|
|
183
213
|
) -> Union[Set['resources_lib.Resources'], List['resources_lib.Resources']]:
|
|
184
|
-
|
|
214
|
+
envs = task_envs.copy()
|
|
215
|
+
for key, value in task_secrets.items():
|
|
216
|
+
envs[key] = value.get_secret_value()
|
|
217
|
+
docker_username_for_runpod = envs.get(
|
|
185
218
|
constants.RUNPOD_DOCKER_USERNAME_ENV_VAR)
|
|
186
219
|
|
|
187
220
|
# We should not call r.copy() if docker_username_for_runpod is None,
|
|
@@ -193,6 +226,18 @@ def _with_docker_username_for_runpod(
|
|
|
193
226
|
for r in resources))
|
|
194
227
|
|
|
195
228
|
|
|
229
|
+
def get_plaintext_envs_and_secrets(
|
|
230
|
+
envs_and_secrets: Dict[str, Union[str, SecretStr]],) -> Dict[str, str]:
|
|
231
|
+
return {
|
|
232
|
+
k: v.get_secret_value() if isinstance(v, SecretStr) else v
|
|
233
|
+
for k, v in envs_and_secrets.items()
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def get_plaintext_secrets(secrets: Dict[str, SecretStr]) -> Dict[str, str]:
|
|
238
|
+
return {k: v.get_secret_value() for k, v in secrets.items()}
|
|
239
|
+
|
|
240
|
+
|
|
196
241
|
class Task:
|
|
197
242
|
"""Task: a computation to be run on the cloud."""
|
|
198
243
|
|
|
@@ -200,17 +245,27 @@ class Task:
|
|
|
200
245
|
self,
|
|
201
246
|
name: Optional[str] = None,
|
|
202
247
|
*,
|
|
203
|
-
setup: Optional[str] = None,
|
|
204
|
-
run: Optional[
|
|
248
|
+
setup: Optional[Union[str, List[str]]] = None,
|
|
249
|
+
run: Optional[Union[str, List[str]]] = None,
|
|
205
250
|
envs: Optional[Dict[str, str]] = None,
|
|
206
|
-
|
|
251
|
+
secrets: Optional[Dict[str, str]] = None,
|
|
252
|
+
workdir: Optional[Union[str, Dict[str, Any]]] = None,
|
|
207
253
|
num_nodes: Optional[int] = None,
|
|
254
|
+
file_mounts: Optional[Dict[str, str]] = None,
|
|
255
|
+
storage_mounts: Optional[Dict[str, storage_lib.Storage]] = None,
|
|
256
|
+
volumes: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None,
|
|
257
|
+
resources: Optional[Union['resources_lib.Resources',
|
|
258
|
+
List['resources_lib.Resources'],
|
|
259
|
+
Set['resources_lib.Resources']]] = None,
|
|
208
260
|
# Advanced:
|
|
209
261
|
docker_image: Optional[str] = None,
|
|
210
262
|
event_callback: Optional[str] = None,
|
|
211
263
|
blocked_resources: Optional[Iterable['resources_lib.Resources']] = None,
|
|
212
264
|
# Internal use only.
|
|
213
|
-
|
|
265
|
+
_file_mounts_mapping: Optional[Dict[str, str]] = None,
|
|
266
|
+
_volume_mounts: Optional[List[volume_lib.VolumeMount]] = None,
|
|
267
|
+
_metadata: Optional[Dict[str, Any]] = None,
|
|
268
|
+
_user_specified_yaml: Optional[str] = None,
|
|
214
269
|
):
|
|
215
270
|
"""Initializes a Task.
|
|
216
271
|
|
|
@@ -242,38 +297,86 @@ class Task:
|
|
|
242
297
|
|
|
243
298
|
Args:
|
|
244
299
|
name: A string name for the Task for display purposes.
|
|
245
|
-
setup: A setup command, which will be run before executing the run
|
|
300
|
+
setup: A setup command(s), which will be run before executing the run
|
|
246
301
|
commands ``run``, and executed under ``workdir``.
|
|
247
302
|
run: The actual command for the task. If not None, either a shell
|
|
248
|
-
command (str) or a command generator (callable).
|
|
249
|
-
must take a node rank and a list of node addresses as
|
|
250
|
-
return a shell command (str) (valid to return None for
|
|
251
|
-
in which case no commands are run on them).
|
|
252
|
-
run under ``workdir``. Note the command generator
|
|
253
|
-
self-contained lambda.
|
|
303
|
+
command(s) (str, list(str)) or a command generator (callable). If
|
|
304
|
+
latter, it must take a node rank and a list of node addresses as
|
|
305
|
+
input and return a shell command (str) (valid to return None for
|
|
306
|
+
some nodes, in which case no commands are run on them). Run
|
|
307
|
+
commands will be run under ``workdir``. Note the command generator
|
|
308
|
+
should be a self-contained lambda.
|
|
254
309
|
envs: A dictionary of environment variables to set before running the
|
|
255
310
|
setup and run commands.
|
|
256
|
-
|
|
311
|
+
secrets: A dictionary of secret environment variables to set before
|
|
312
|
+
running the setup and run commands. These will be redacted in logs
|
|
313
|
+
and YAML output.
|
|
314
|
+
workdir: The local working directory or a git repository.
|
|
315
|
+
For a local working directory, this directory will be synced
|
|
257
316
|
to a location on the remote VM(s), and ``setup`` and ``run``
|
|
258
317
|
commands will be run under that location (thus, they can rely on
|
|
259
318
|
relative paths when invoking binaries).
|
|
319
|
+
If a git repository is provided, the repository will be cloned to
|
|
320
|
+
the working directory and the ``setup`` and ``run`` commands will
|
|
321
|
+
be run under the cloned repository.
|
|
260
322
|
num_nodes: The number of nodes to provision for this Task. If None,
|
|
261
323
|
treated as 1 node. If > 1, each node will execute its own
|
|
262
324
|
setup/run command, where ``run`` can either be a str, meaning all
|
|
263
325
|
nodes get the same command, or a lambda, with the semantics
|
|
264
326
|
documented above.
|
|
327
|
+
file_mounts: An optional dict of ``{remote_path: (local_path|cloud
|
|
328
|
+
URI)}``, where remote means the VM(s) on which this Task will
|
|
329
|
+
eventually run on, and local means the node from which the task is
|
|
330
|
+
launched.
|
|
331
|
+
storage_mounts: an optional dict of ``{mount_path: sky.Storage
|
|
332
|
+
object}``, where mount_path is the path inside the remote VM(s)
|
|
333
|
+
where the Storage object will be mounted on.
|
|
334
|
+
volumes: A dict of volumes to be mounted for the task. The dict has
|
|
335
|
+
the form of ``{mount_path: volume_name}`` for external persistent
|
|
336
|
+
volumes, or ``{mount_path: volume_config}`` for ephemeral volumes
|
|
337
|
+
where volume_config is a dict with 'size', and optional type,
|
|
338
|
+
labels, and 'config' fields, etc.
|
|
339
|
+
resources: either a sky.Resources, a set of them, or a list of them.
|
|
340
|
+
A set or a list of resources asks the optimizer to "pick the
|
|
341
|
+
best of these resources" to run this task.
|
|
265
342
|
docker_image: (EXPERIMENTAL: Only in effect when LocalDockerBackend
|
|
266
343
|
is used.) The base docker image that this Task will be built on.
|
|
267
344
|
Defaults to 'gpuci/miniforge-cuda:11.4-devel-ubuntu18.04'.
|
|
345
|
+
event_callback: A bash script that will be executed when the task
|
|
346
|
+
changes state.
|
|
268
347
|
blocked_resources: A set of resources that this task cannot run on.
|
|
348
|
+
_file_mounts_mapping: (Internal use only) A dictionary of file mounts
|
|
349
|
+
mapping.
|
|
350
|
+
_volume_mounts: (Internal use only) A list of volume mounts.
|
|
351
|
+
_metadata: (Internal use only) A dictionary of metadata to be added to
|
|
352
|
+
the task.
|
|
353
|
+
_user_specified_yaml: (Internal use only) A string of user-specified
|
|
354
|
+
YAML config.
|
|
269
355
|
"""
|
|
270
356
|
self.name = name
|
|
271
|
-
self.run = run
|
|
272
357
|
self.storage_mounts: Dict[str, storage_lib.Storage] = {}
|
|
273
358
|
self.storage_plans: Dict[storage_lib.Storage,
|
|
274
359
|
storage_lib.StoreType] = {}
|
|
275
|
-
self.setup = setup
|
|
276
360
|
self._envs = envs or {}
|
|
361
|
+
self._secrets = {}
|
|
362
|
+
if secrets is not None:
|
|
363
|
+
self._secrets = {k: SecretStr(v) for k, v in secrets.items()}
|
|
364
|
+
self._volumes = volumes or {}
|
|
365
|
+
|
|
366
|
+
# concatenate commands if given as list
|
|
367
|
+
def _concat(commands: Optional[Union[str, List[str]]]) -> Optional[str]:
|
|
368
|
+
if isinstance(commands, list):
|
|
369
|
+
return '\n'.join(commands)
|
|
370
|
+
return commands
|
|
371
|
+
|
|
372
|
+
self.run = _concat(run)
|
|
373
|
+
self.setup = _concat(setup)
|
|
374
|
+
|
|
375
|
+
# Validate Docker login configuration early if both envs and secrets
|
|
376
|
+
# contain Docker variables
|
|
377
|
+
if self._envs or self._secrets:
|
|
378
|
+
_check_docker_login_config(self._envs, self._secrets)
|
|
379
|
+
|
|
277
380
|
self.workdir = workdir
|
|
278
381
|
self.docker_image = (docker_image if docker_image else
|
|
279
382
|
'gpuci/miniforge-cuda:11.4-devel-ubuntu18.04')
|
|
@@ -288,33 +391,50 @@ class Task:
|
|
|
288
391
|
self.estimated_inputs_size_gigabytes: Optional[float] = None
|
|
289
392
|
self.estimated_outputs_size_gigabytes: Optional[float] = None
|
|
290
393
|
# Default to CPU VM
|
|
291
|
-
self.resources: Union[List[
|
|
292
|
-
Set[
|
|
394
|
+
self.resources: Union[List['resources_lib.Resources'],
|
|
395
|
+
Set['resources_lib.Resources']] = {
|
|
396
|
+
resources_lib.Resources()
|
|
397
|
+
}
|
|
293
398
|
self._service: Optional[service_spec.SkyServiceSpec] = None
|
|
399
|
+
|
|
294
400
|
# Resources that this task cannot run on.
|
|
295
401
|
self.blocked_resources = blocked_resources
|
|
296
402
|
|
|
297
|
-
self.time_estimator_func: Optional[Callable[['
|
|
403
|
+
self.time_estimator_func: Optional[Callable[['resources_lib.Resources'],
|
|
298
404
|
int]] = None
|
|
299
405
|
self.file_mounts: Optional[Dict[str, str]] = None
|
|
300
406
|
|
|
301
407
|
# Only set when 'self' is a jobs controller task: 'self.managed_job_dag'
|
|
302
408
|
# is the underlying managed job dag (sky.Dag object).
|
|
303
|
-
self.managed_job_dag: Optional['
|
|
409
|
+
self.managed_job_dag: Optional['dag_lib.Dag'] = None
|
|
304
410
|
|
|
305
411
|
# Only set when 'self' is a sky serve controller task.
|
|
306
412
|
self.service_name: Optional[str] = None
|
|
307
413
|
|
|
308
414
|
# Filled in by the optimizer. If None, this Task is not planned.
|
|
309
|
-
self.best_resources: Optional[
|
|
415
|
+
self.best_resources: Optional['resources_lib.Resources'] = None
|
|
310
416
|
|
|
311
417
|
# For internal use only.
|
|
312
|
-
self.file_mounts_mapping
|
|
418
|
+
self.file_mounts_mapping: Optional[Dict[str,
|
|
419
|
+
str]] = _file_mounts_mapping
|
|
420
|
+
self.volume_mounts: Optional[List[volume_lib.VolumeMount]] = (
|
|
421
|
+
_volume_mounts)
|
|
313
422
|
|
|
314
|
-
|
|
423
|
+
self._metadata = _metadata if _metadata is not None else {}
|
|
424
|
+
|
|
425
|
+
if resources is not None:
|
|
426
|
+
self.set_resources(resources)
|
|
427
|
+
if storage_mounts is not None:
|
|
428
|
+
self.set_storage_mounts(storage_mounts)
|
|
429
|
+
if file_mounts is not None:
|
|
430
|
+
self.set_file_mounts(file_mounts)
|
|
431
|
+
|
|
432
|
+
dag = dag_lib.get_current_dag()
|
|
315
433
|
if dag is not None:
|
|
316
434
|
dag.add(self)
|
|
317
435
|
|
|
436
|
+
self._user_specified_yaml = _user_specified_yaml
|
|
437
|
+
|
|
318
438
|
def validate(self,
|
|
319
439
|
skip_file_mounts: bool = False,
|
|
320
440
|
skip_workdir: bool = False):
|
|
@@ -342,42 +462,9 @@ class Task:
|
|
|
342
462
|
|
|
343
463
|
def validate_run(self):
|
|
344
464
|
"""Validates if the run command is valid."""
|
|
345
|
-
if
|
|
346
|
-
run_sig = inspect.signature(self.run)
|
|
347
|
-
# Check that run is a function with 2 arguments.
|
|
348
|
-
if len(run_sig.parameters) != 2:
|
|
349
|
-
with ux_utils.print_exception_no_traceback():
|
|
350
|
-
raise ValueError(_RUN_FN_CHECK_FAIL_MSG.format(run_sig))
|
|
351
|
-
|
|
352
|
-
type_list = [int, List[str]]
|
|
353
|
-
# Check annotations, if exists
|
|
354
|
-
for i, param in enumerate(run_sig.parameters.values()):
|
|
355
|
-
if param.annotation != inspect.Parameter.empty:
|
|
356
|
-
if param.annotation != type_list[i]:
|
|
357
|
-
with ux_utils.print_exception_no_traceback():
|
|
358
|
-
raise ValueError(
|
|
359
|
-
_RUN_FN_CHECK_FAIL_MSG.format(run_sig))
|
|
360
|
-
|
|
361
|
-
# Check self containedness.
|
|
362
|
-
run_closure = inspect.getclosurevars(self.run)
|
|
363
|
-
if run_closure.nonlocals:
|
|
364
|
-
with ux_utils.print_exception_no_traceback():
|
|
365
|
-
raise ValueError(
|
|
366
|
-
'run command generator must be self contained. '
|
|
367
|
-
f'Found nonlocals: {run_closure.nonlocals}')
|
|
368
|
-
if run_closure.globals:
|
|
369
|
-
with ux_utils.print_exception_no_traceback():
|
|
370
|
-
raise ValueError(
|
|
371
|
-
'run command generator must be self contained. '
|
|
372
|
-
f'Found globals: {run_closure.globals}')
|
|
373
|
-
if run_closure.unbound:
|
|
374
|
-
# Do not raise an error here. Import statements, which are
|
|
375
|
-
# allowed, will be considered as unbounded.
|
|
376
|
-
pass
|
|
377
|
-
elif self.run is not None and not isinstance(self.run, str):
|
|
465
|
+
if self.run is not None and not isinstance(self.run, str):
|
|
378
466
|
with ux_utils.print_exception_no_traceback():
|
|
379
|
-
raise ValueError('run must be
|
|
380
|
-
f'a command generator ({CommandGen}). '
|
|
467
|
+
raise ValueError('run must be a shell script (str). '
|
|
381
468
|
f'Got {type(self.run)}')
|
|
382
469
|
|
|
383
470
|
def expand_and_validate_file_mounts(self):
|
|
@@ -390,12 +477,9 @@ class Task:
|
|
|
390
477
|
if self.file_mounts is None:
|
|
391
478
|
return
|
|
392
479
|
for target, source in self.file_mounts.items():
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
'File mount paths cannot end with a slash '
|
|
397
|
-
'(try "/mydir: /mydir" or "/myfile: /myfile"). '
|
|
398
|
-
f'Found: target={target} source={source}')
|
|
480
|
+
location = f'file_mounts.{target}: {source}'
|
|
481
|
+
self._validate_mount_path(target, location)
|
|
482
|
+
self._validate_path(source, location)
|
|
399
483
|
if data_utils.is_cloud_store_url(target):
|
|
400
484
|
with ux_utils.print_exception_no_traceback():
|
|
401
485
|
raise ValueError(
|
|
@@ -410,17 +494,25 @@ class Task:
|
|
|
410
494
|
f'File mount source {source!r} does not exist '
|
|
411
495
|
'locally. To fix: check if it exists, and correct '
|
|
412
496
|
'the path.')
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
497
|
+
|
|
498
|
+
def _validate_mount_path(self, path: str, location: str):
|
|
499
|
+
self._validate_path(path, location)
|
|
500
|
+
# TODO(zhwu): /home/username/sky_workdir as the target path need
|
|
501
|
+
# to be filtered out as well.
|
|
502
|
+
if (path == constants.SKY_REMOTE_WORKDIR and self.workdir is not None):
|
|
503
|
+
with ux_utils.print_exception_no_traceback():
|
|
504
|
+
raise ValueError(
|
|
505
|
+
f'Cannot use {constants.SKY_REMOTE_WORKDIR!r} as a '
|
|
506
|
+
'destination path of a file mount, as it will be used '
|
|
507
|
+
'by the workdir. If uploading a file/folder to the '
|
|
508
|
+
'workdir is needed, please specify the full path to '
|
|
509
|
+
'the file/folder.')
|
|
510
|
+
|
|
511
|
+
def _validate_path(self, path: str, location: str):
|
|
512
|
+
if path.endswith('/'):
|
|
513
|
+
with ux_utils.print_exception_no_traceback():
|
|
514
|
+
raise ValueError('Mount paths cannot end with a slash '
|
|
515
|
+
f'Found: {path} in {location}')
|
|
424
516
|
|
|
425
517
|
def expand_and_validate_workdir(self):
|
|
426
518
|
"""Expand workdir to absolute path and validate it.
|
|
@@ -431,6 +523,12 @@ class Task:
|
|
|
431
523
|
"""
|
|
432
524
|
if self.workdir is None:
|
|
433
525
|
return
|
|
526
|
+
# Only expand the workdir if it is a string
|
|
527
|
+
if isinstance(self.workdir, dict):
|
|
528
|
+
git_ref = self.workdir.get('ref')
|
|
529
|
+
if git_ref is not None:
|
|
530
|
+
self._metadata['git_commit'] = git_ref
|
|
531
|
+
return
|
|
434
532
|
user_workdir = self.workdir
|
|
435
533
|
self.workdir = os.path.abspath(os.path.expanduser(user_workdir))
|
|
436
534
|
if not os.path.isdir(self.workdir):
|
|
@@ -440,11 +538,16 @@ class Task:
|
|
|
440
538
|
'Workdir must be a valid directory (or '
|
|
441
539
|
f'a symlink to a directory). {user_workdir} not found.')
|
|
442
540
|
|
|
541
|
+
self._metadata['git_commit'] = common_utils.get_git_commit(self.workdir)
|
|
542
|
+
|
|
443
543
|
@staticmethod
|
|
444
544
|
def from_yaml_config(
|
|
445
545
|
config: Dict[str, Any],
|
|
446
546
|
env_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
547
|
+
secrets_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
447
548
|
) -> 'Task':
|
|
549
|
+
user_specified_yaml = config.pop('_user_specified_yaml',
|
|
550
|
+
yaml_utils.dump_yaml_str(config))
|
|
448
551
|
# More robust handling for 'envs': explicitly convert keys and values to
|
|
449
552
|
# str, since users may pass '123' as keys/values which will get parsed
|
|
450
553
|
# as int causing validate_schema() to fail.
|
|
@@ -457,6 +560,20 @@ class Task:
|
|
|
457
560
|
else:
|
|
458
561
|
new_envs[str(k)] = None
|
|
459
562
|
config['envs'] = new_envs
|
|
563
|
+
|
|
564
|
+
# More robust handling for 'secrets': explicitly convert keys and values
|
|
565
|
+
# to str, since users may pass '123' as keys/values which will get
|
|
566
|
+
# parsed as int causing validate_schema() to fail.
|
|
567
|
+
secrets = config.get('secrets')
|
|
568
|
+
if secrets is not None and isinstance(secrets, dict):
|
|
569
|
+
new_secrets: Dict[str, Optional[str]] = {}
|
|
570
|
+
for k, v in secrets.items():
|
|
571
|
+
if v is not None:
|
|
572
|
+
new_secrets[str(k)] = str(v)
|
|
573
|
+
else:
|
|
574
|
+
new_secrets[str(k)] = None
|
|
575
|
+
config['secrets'] = new_secrets
|
|
576
|
+
|
|
460
577
|
common_utils.validate_schema(config, schemas.get_task_schema(),
|
|
461
578
|
'Invalid task YAML: ')
|
|
462
579
|
if env_overrides is not None:
|
|
@@ -470,6 +587,12 @@ class Task:
|
|
|
470
587
|
new_envs.update(env_overrides)
|
|
471
588
|
config['envs'] = new_envs
|
|
472
589
|
|
|
590
|
+
if secrets_overrides is not None:
|
|
591
|
+
# Override secrets vars from CLI.
|
|
592
|
+
new_secrets = config.get('secrets', {})
|
|
593
|
+
new_secrets.update(secrets_overrides)
|
|
594
|
+
config['secrets'] = new_secrets
|
|
595
|
+
|
|
473
596
|
for k, v in config.get('envs', {}).items():
|
|
474
597
|
if v is None:
|
|
475
598
|
with ux_utils.print_exception_no_traceback():
|
|
@@ -479,21 +602,38 @@ class Task:
|
|
|
479
602
|
f'To set it to be empty, use an empty string ({k}: "" '
|
|
480
603
|
f'in task YAML or --env {k}="" in CLI).')
|
|
481
604
|
|
|
605
|
+
for k, v in config.get('secrets', {}).items():
|
|
606
|
+
if v is None:
|
|
607
|
+
with ux_utils.print_exception_no_traceback():
|
|
608
|
+
raise ValueError(
|
|
609
|
+
f'Secret variable {k!r} is None. Please set a '
|
|
610
|
+
'value for it in task YAML or with --secret flag. '
|
|
611
|
+
f'To set it to be empty, use an empty string ({k}: "" '
|
|
612
|
+
f'in task YAML or --secret {k}="" in CLI).')
|
|
613
|
+
|
|
482
614
|
# Fill in any Task.envs into file_mounts (src/dst paths, storage
|
|
483
615
|
# name/source).
|
|
616
|
+
env_vars = config.get('envs', {})
|
|
617
|
+
secrets = config.get('secrets', {})
|
|
618
|
+
env_and_secrets = env_vars.copy()
|
|
619
|
+
env_and_secrets.update(secrets)
|
|
484
620
|
if config.get('file_mounts') is not None:
|
|
485
621
|
config['file_mounts'] = _fill_in_env_vars(config['file_mounts'],
|
|
486
|
-
|
|
622
|
+
env_and_secrets)
|
|
487
623
|
|
|
488
624
|
# Fill in any Task.envs into service (e.g. MODEL_NAME).
|
|
489
625
|
if config.get('service') is not None:
|
|
490
626
|
config['service'] = _fill_in_env_vars(config['service'],
|
|
491
|
-
|
|
627
|
+
env_and_secrets)
|
|
492
628
|
|
|
493
629
|
# Fill in any Task.envs into workdir
|
|
494
630
|
if config.get('workdir') is not None:
|
|
495
631
|
config['workdir'] = _fill_in_env_vars(config['workdir'],
|
|
496
|
-
|
|
632
|
+
env_and_secrets)
|
|
633
|
+
|
|
634
|
+
if config.get('volumes') is not None:
|
|
635
|
+
config['volumes'] = _fill_in_env_vars(config['volumes'],
|
|
636
|
+
env_and_secrets)
|
|
497
637
|
|
|
498
638
|
task = Task(
|
|
499
639
|
config.pop('name', None),
|
|
@@ -502,8 +642,12 @@ class Task:
|
|
|
502
642
|
setup=config.pop('setup', None),
|
|
503
643
|
num_nodes=config.pop('num_nodes', None),
|
|
504
644
|
envs=config.pop('envs', None),
|
|
645
|
+
secrets=config.pop('secrets', None),
|
|
646
|
+
volumes=config.pop('volumes', None),
|
|
505
647
|
event_callback=config.pop('event_callback', None),
|
|
506
|
-
|
|
648
|
+
_file_mounts_mapping=config.pop('file_mounts_mapping', None),
|
|
649
|
+
_metadata=config.pop('_metadata', None),
|
|
650
|
+
_user_specified_yaml=user_specified_yaml,
|
|
507
651
|
)
|
|
508
652
|
|
|
509
653
|
# Create lists to store storage objects inlined in file_mounts.
|
|
@@ -511,6 +655,7 @@ class Task:
|
|
|
511
655
|
# storage objects with the storage/storage_mount objects.
|
|
512
656
|
fm_storages = []
|
|
513
657
|
file_mounts = config.pop('file_mounts', None)
|
|
658
|
+
volumes = []
|
|
514
659
|
if file_mounts is not None:
|
|
515
660
|
copy_mounts = {}
|
|
516
661
|
for dst_path, src in file_mounts.items():
|
|
@@ -520,7 +665,27 @@ class Task:
|
|
|
520
665
|
# If the src is not a str path, it is likely a dict. Try to
|
|
521
666
|
# parse storage object.
|
|
522
667
|
elif isinstance(src, dict):
|
|
523
|
-
|
|
668
|
+
if (src.get('store') ==
|
|
669
|
+
storage_lib.StoreType.VOLUME.value.lower()):
|
|
670
|
+
# Build the volumes config for resources.
|
|
671
|
+
volume_config = {
|
|
672
|
+
'path': dst_path,
|
|
673
|
+
}
|
|
674
|
+
if src.get('name'):
|
|
675
|
+
volume_config['name'] = src.get('name')
|
|
676
|
+
persistent = src.get('persistent', False)
|
|
677
|
+
volume_config['auto_delete'] = not persistent
|
|
678
|
+
volume_config_detail = src.get('config', {})
|
|
679
|
+
volume_config.update(volume_config_detail)
|
|
680
|
+
volumes.append(volume_config)
|
|
681
|
+
source_path = src.get('source')
|
|
682
|
+
if source_path:
|
|
683
|
+
# For volume, copy the source path to the
|
|
684
|
+
# data directory of the volume mount point.
|
|
685
|
+
copy_mounts[
|
|
686
|
+
f'{dst_path.rstrip("/")}/data'] = source_path
|
|
687
|
+
else:
|
|
688
|
+
fm_storages.append((dst_path, src))
|
|
524
689
|
else:
|
|
525
690
|
with ux_utils.print_exception_no_traceback():
|
|
526
691
|
raise ValueError(f'Unable to parse file_mount '
|
|
@@ -559,34 +724,9 @@ class Task:
|
|
|
559
724
|
task.set_outputs(outputs=outputs,
|
|
560
725
|
estimated_size_gigabytes=estimated_size_gigabytes)
|
|
561
726
|
|
|
562
|
-
# Experimental configs.
|
|
563
|
-
experimental_configs = config.pop('experimental', None)
|
|
564
|
-
|
|
565
727
|
# Handle the top-level config field
|
|
566
728
|
config_override = config.pop('config', None)
|
|
567
729
|
|
|
568
|
-
# Handle backward compatibility with experimental.config_overrides
|
|
569
|
-
# TODO: Remove experimental.config_overrides in 0.11.0.
|
|
570
|
-
if experimental_configs is not None:
|
|
571
|
-
exp_config_override = experimental_configs.pop(
|
|
572
|
-
'config_overrides', None)
|
|
573
|
-
if exp_config_override is not None:
|
|
574
|
-
logger.warning(
|
|
575
|
-
f'{colorama.Fore.YELLOW}`experimental.config_overrides` '
|
|
576
|
-
'field is deprecated in the task YAML. Use the `config` '
|
|
577
|
-
f'field to set config overrides.{colorama.Style.RESET_ALL}')
|
|
578
|
-
if config_override is not None:
|
|
579
|
-
logger.warning(
|
|
580
|
-
f'{colorama.Fore.YELLOW}Both top-level `config` and '
|
|
581
|
-
f'`experimental.config_overrides` are specified. '
|
|
582
|
-
f'Using top-level `config`.{colorama.Style.RESET_ALL}')
|
|
583
|
-
else:
|
|
584
|
-
config_override = exp_config_override
|
|
585
|
-
logger.debug('Overriding skypilot config with task-level config: '
|
|
586
|
-
f'{config_override}')
|
|
587
|
-
assert not experimental_configs, ('Invalid task args: '
|
|
588
|
-
f'{experimental_configs.keys()}')
|
|
589
|
-
|
|
590
730
|
# Store the final config override for use in resource setup
|
|
591
731
|
cluster_config_override = config_override
|
|
592
732
|
|
|
@@ -598,12 +738,35 @@ class Task:
|
|
|
598
738
|
'experimental.config_overrides')
|
|
599
739
|
resources_config[
|
|
600
740
|
'_cluster_config_overrides'] = cluster_config_override
|
|
601
|
-
|
|
741
|
+
if volumes:
|
|
742
|
+
resources_config['volumes'] = volumes
|
|
743
|
+
task.set_resources(
|
|
744
|
+
resources_lib.Resources.from_yaml_config(resources_config))
|
|
602
745
|
|
|
603
746
|
service = config.pop('service', None)
|
|
747
|
+
pool = config.pop('pool', None)
|
|
748
|
+
if service is not None and pool is not None:
|
|
749
|
+
with ux_utils.print_exception_no_traceback():
|
|
750
|
+
raise ValueError(
|
|
751
|
+
'Cannot set both service and pool in the same task.')
|
|
752
|
+
|
|
604
753
|
if service is not None:
|
|
605
754
|
service = service_spec.SkyServiceSpec.from_yaml_config(service)
|
|
606
|
-
|
|
755
|
+
task.set_service(service)
|
|
756
|
+
elif pool is not None:
|
|
757
|
+
pool['pool'] = True
|
|
758
|
+
pool = service_spec.SkyServiceSpec.from_yaml_config(pool)
|
|
759
|
+
task.set_service(pool)
|
|
760
|
+
|
|
761
|
+
volume_mounts = config.pop('volume_mounts', None)
|
|
762
|
+
if volume_mounts is not None:
|
|
763
|
+
task.volume_mounts = []
|
|
764
|
+
for vol in volume_mounts:
|
|
765
|
+
common_utils.validate_schema(vol,
|
|
766
|
+
schemas.get_volume_mount_schema(),
|
|
767
|
+
'Invalid volume mount config: ')
|
|
768
|
+
volume_mount = volume_lib.VolumeMount.from_yaml_config(vol)
|
|
769
|
+
task.volume_mounts.append(volume_mount)
|
|
607
770
|
|
|
608
771
|
assert not config, f'Invalid task args: {config.keys()}'
|
|
609
772
|
return task
|
|
@@ -628,17 +791,140 @@ class Task:
|
|
|
628
791
|
# TODO(zongheng): use
|
|
629
792
|
# https://github.com/yaml/pyyaml/issues/165#issuecomment-430074049
|
|
630
793
|
# to raise errors on duplicate keys.
|
|
631
|
-
|
|
794
|
+
user_specified_yaml = f.read()
|
|
795
|
+
return Task.from_yaml_str(user_specified_yaml)
|
|
796
|
+
|
|
797
|
+
@staticmethod
|
|
798
|
+
def from_yaml_str(yaml_str: str) -> 'Task':
|
|
799
|
+
"""Initializes a task from a task YAML string.
|
|
800
|
+
|
|
801
|
+
Example:
|
|
802
|
+
.. code-block:: python
|
|
803
|
+
|
|
804
|
+
task = sky.Task.from_yaml_str('yaml_str')
|
|
805
|
+
"""
|
|
806
|
+
config = yaml_utils.safe_load(yaml_str)
|
|
632
807
|
|
|
633
808
|
if isinstance(config, str):
|
|
634
809
|
with ux_utils.print_exception_no_traceback():
|
|
635
810
|
raise ValueError('YAML loaded as str, not as dict. '
|
|
636
|
-
f'Is it correct?
|
|
811
|
+
f'Is it correct? content:\n{yaml_str}')
|
|
637
812
|
|
|
638
813
|
if config is None:
|
|
639
814
|
config = {}
|
|
815
|
+
config['_user_specified_yaml'] = yaml_str
|
|
640
816
|
return Task.from_yaml_config(config)
|
|
641
817
|
|
|
818
|
+
def resolve_and_validate_volumes(self) -> None:
|
|
819
|
+
"""Resolve volumes config to volume mounts and validate them.
|
|
820
|
+
|
|
821
|
+
Raises:
|
|
822
|
+
exceptions.VolumeNotFoundError: if any volume is not found.
|
|
823
|
+
exceptions.VolumeTopologyConflictError: if there is conflict in the
|
|
824
|
+
volumes and compute topology.
|
|
825
|
+
"""
|
|
826
|
+
# Volumes has been resolved, a typical case is that the API server
|
|
827
|
+
# has resolved the volumes and the dag was then submitted to
|
|
828
|
+
# controllers.
|
|
829
|
+
if self.volume_mounts is not None:
|
|
830
|
+
return None
|
|
831
|
+
if not self._volumes:
|
|
832
|
+
return None
|
|
833
|
+
volume_mounts: List[volume_lib.VolumeMount] = []
|
|
834
|
+
for dst_path, vol in self._volumes.items():
|
|
835
|
+
self._validate_mount_path(dst_path, location='volumes')
|
|
836
|
+
# Shortcut for `dst_path: volume_name` (external persistent volume)
|
|
837
|
+
if isinstance(vol, str):
|
|
838
|
+
volume_mount = volume_lib.VolumeMount.resolve(dst_path, vol)
|
|
839
|
+
elif isinstance(vol, dict):
|
|
840
|
+
# Check if this is an ephemeral volume config or external volume
|
|
841
|
+
# with 'size' field
|
|
842
|
+
if 'size' in vol:
|
|
843
|
+
# This is an ephemeral volume config
|
|
844
|
+
volume_mount = (
|
|
845
|
+
volume_lib.VolumeMount.resolve_ephemeral_config(
|
|
846
|
+
dst_path, vol))
|
|
847
|
+
elif 'name' in vol:
|
|
848
|
+
# External volume with 'name' field
|
|
849
|
+
volume_mount = volume_lib.VolumeMount.resolve(
|
|
850
|
+
dst_path, vol['name'])
|
|
851
|
+
else:
|
|
852
|
+
raise ValueError(
|
|
853
|
+
f'Invalid volume config: {dst_path}: {vol}. '
|
|
854
|
+
'Either "size" (for ephemeral volume) or "name" '
|
|
855
|
+
'(for external volume) must be set.')
|
|
856
|
+
else:
|
|
857
|
+
raise ValueError(f'Invalid volume config: {dst_path}: {vol}')
|
|
858
|
+
volume_mounts.append(volume_mount)
|
|
859
|
+
# Disable certain access modes
|
|
860
|
+
disabled_modes = {}
|
|
861
|
+
if self.num_nodes > 1:
|
|
862
|
+
disabled_modes[
|
|
863
|
+
volume_lib.VolumeAccessMode.READ_WRITE_ONCE.value] = (
|
|
864
|
+
'access mode ReadWriteOnce is not supported for '
|
|
865
|
+
'multi-node tasks.')
|
|
866
|
+
disabled_modes[
|
|
867
|
+
volume_lib.VolumeAccessMode.READ_WRITE_ONCE_POD.value] = (
|
|
868
|
+
'access mode ReadWriteOncePod is not supported for '
|
|
869
|
+
'multi-node tasks.')
|
|
870
|
+
# TODO(aylei): generalize access mode to all volume types
|
|
871
|
+
# Record the required topology and the volume that requires it, e.g.
|
|
872
|
+
# {'cloud': ('volume_name', 'aws')}
|
|
873
|
+
topology: Dict[str, Tuple[str, Optional[str]]] = {
|
|
874
|
+
'cloud': ('', None),
|
|
875
|
+
'region': ('', None),
|
|
876
|
+
'zone': ('', None),
|
|
877
|
+
}
|
|
878
|
+
for vol in volume_mounts:
|
|
879
|
+
# Check access mode
|
|
880
|
+
access_mode = vol.volume_config.config.get('access_mode', '')
|
|
881
|
+
if access_mode in disabled_modes:
|
|
882
|
+
raise ValueError(f'Volume {vol.volume_name} with '
|
|
883
|
+
f'{disabled_modes[access_mode]}')
|
|
884
|
+
# Skip ephemeral volumes for topology check
|
|
885
|
+
if vol.is_ephemeral:
|
|
886
|
+
continue
|
|
887
|
+
# Check topology
|
|
888
|
+
for key, (vol_name, previous_req) in topology.items():
|
|
889
|
+
req = getattr(vol.volume_config, key)
|
|
890
|
+
if req is not None:
|
|
891
|
+
if previous_req is not None and req != previous_req:
|
|
892
|
+
raise exceptions.VolumeTopologyConflictError(
|
|
893
|
+
f'Volume {vol.volume_name} can only be attached on '
|
|
894
|
+
f'{key}:{req}, which conflicts with another volume '
|
|
895
|
+
f'{vol_name} that requires {key}:{previous_req}.'
|
|
896
|
+
f'Please use different volumes and retry.')
|
|
897
|
+
topology[key] = (vol_name, req)
|
|
898
|
+
# Now we have the topology requirements from the intersection of all
|
|
899
|
+
# volumes. Check if there is topology conflict with the resources.
|
|
900
|
+
# Volume must have no conflict with ALL resources even if user
|
|
901
|
+
# specifies 'any_of' resources to ensure no resources will conflict
|
|
902
|
+
# with the volumes during failover.
|
|
903
|
+
|
|
904
|
+
for res in self.resources:
|
|
905
|
+
for key, (vol_name, vol_req) in topology.items():
|
|
906
|
+
req = getattr(res, key)
|
|
907
|
+
if (req is not None and vol_req is not None and
|
|
908
|
+
str(req) != vol_req):
|
|
909
|
+
raise exceptions.VolumeTopologyConflictError(
|
|
910
|
+
f'The task requires {key}:{req}, which conflicts with '
|
|
911
|
+
f'the volume constraint {key}:{vol_req}. Please '
|
|
912
|
+
f'use different volumes and retry.')
|
|
913
|
+
# No topology conflict, we safely override the topology of resources to
|
|
914
|
+
# satisfy the volume constraints.
|
|
915
|
+
override_params = {}
|
|
916
|
+
for key, (vol_name, vol_req) in topology.items():
|
|
917
|
+
if vol_req is not None:
|
|
918
|
+
if key == 'cloud':
|
|
919
|
+
override_params[key] = registry.CLOUD_REGISTRY.from_str(
|
|
920
|
+
vol_req)
|
|
921
|
+
else:
|
|
922
|
+
override_params[key] = vol_req
|
|
923
|
+
logger.debug(
|
|
924
|
+
f'Override resources with volume constraints: {override_params}')
|
|
925
|
+
self.set_resources_override(override_params)
|
|
926
|
+
self.volume_mounts = volume_mounts
|
|
927
|
+
|
|
642
928
|
@property
|
|
643
929
|
def num_nodes(self) -> int:
|
|
644
930
|
return self._num_nodes
|
|
@@ -653,10 +939,42 @@ class Task:
|
|
|
653
939
|
f'num_nodes should be a positive int. Got: {num_nodes}')
|
|
654
940
|
self._num_nodes = num_nodes
|
|
655
941
|
|
|
942
|
+
@property
|
|
943
|
+
def metadata(self) -> Dict[str, Any]:
|
|
944
|
+
return self._metadata
|
|
945
|
+
|
|
946
|
+
@property
|
|
947
|
+
def metadata_json(self) -> str:
|
|
948
|
+
return json.dumps(self._metadata)
|
|
949
|
+
|
|
656
950
|
@property
|
|
657
951
|
def envs(self) -> Dict[str, str]:
|
|
658
952
|
return self._envs
|
|
659
953
|
|
|
954
|
+
@property
|
|
955
|
+
def secrets(self) -> Dict[str, SecretStr]:
|
|
956
|
+
return self._secrets
|
|
957
|
+
|
|
958
|
+
@property
|
|
959
|
+
def volumes(self) -> Dict[str, Union[str, Dict[str, Any]]]:
|
|
960
|
+
return self._volumes
|
|
961
|
+
|
|
962
|
+
def set_volumes(self, volumes: Dict[str, Union[str, Dict[str,
|
|
963
|
+
Any]]]) -> None:
|
|
964
|
+
"""Sets the volumes for this task.
|
|
965
|
+
|
|
966
|
+
Args:
|
|
967
|
+
volumes: a dict of ``{mount_path: volume_name}`` for external
|
|
968
|
+
persistent volumes, or ``{mount_path: volume_config}`` for
|
|
969
|
+
ephemeral volumes.
|
|
970
|
+
"""
|
|
971
|
+
self._volumes = volumes
|
|
972
|
+
|
|
973
|
+
def update_volumes(self, volumes: Dict[str, Union[str, Dict[str,
|
|
974
|
+
Any]]]) -> None:
|
|
975
|
+
"""Updates the volumes for this task."""
|
|
976
|
+
self._volumes.update(volumes)
|
|
977
|
+
|
|
660
978
|
def update_envs(
|
|
661
979
|
self, envs: Union[None, List[Tuple[str, str]],
|
|
662
980
|
Dict[str, str]]) -> 'Task':
|
|
@@ -697,17 +1015,71 @@ class Task:
|
|
|
697
1015
|
# If the update_envs() is called after set_resources(), we need to
|
|
698
1016
|
# manually update docker login config in task resources, in case the
|
|
699
1017
|
# docker login envs are newly added.
|
|
700
|
-
if _check_docker_login_config(self._envs):
|
|
1018
|
+
if _check_docker_login_config(self._envs, self._secrets):
|
|
1019
|
+
self.resources = _with_docker_login_config(self.resources,
|
|
1020
|
+
self._envs,
|
|
1021
|
+
self._secrets)
|
|
1022
|
+
self.resources = _with_docker_username_for_runpod(
|
|
1023
|
+
self.resources, self._envs, self._secrets)
|
|
1024
|
+
return self
|
|
1025
|
+
|
|
1026
|
+
def update_secrets(
|
|
1027
|
+
self, secrets: Union[None, List[Tuple[str, str]],
|
|
1028
|
+
Dict[str, str]]) -> 'Task':
|
|
1029
|
+
"""Updates secret env vars for use inside the setup/run commands.
|
|
1030
|
+
|
|
1031
|
+
Args:
|
|
1032
|
+
secrets: (optional) either a list of ``(secret_name, value)`` or a
|
|
1033
|
+
dict ``{secret_name: value}``.
|
|
1034
|
+
|
|
1035
|
+
Returns:
|
|
1036
|
+
self: The current task, with secrets updated.
|
|
1037
|
+
|
|
1038
|
+
Raises:
|
|
1039
|
+
ValueError: if various invalid inputs errors are detected.
|
|
1040
|
+
"""
|
|
1041
|
+
if secrets is None:
|
|
1042
|
+
secrets = {}
|
|
1043
|
+
if isinstance(secrets, (list, tuple)):
|
|
1044
|
+
keys = set(secret[0] for secret in secrets)
|
|
1045
|
+
if len(keys) != len(secrets):
|
|
1046
|
+
with ux_utils.print_exception_no_traceback():
|
|
1047
|
+
raise ValueError('Duplicate secret keys provided.')
|
|
1048
|
+
secrets = dict(secrets)
|
|
1049
|
+
if isinstance(secrets, dict):
|
|
1050
|
+
for key in secrets:
|
|
1051
|
+
if not isinstance(key, str):
|
|
1052
|
+
with ux_utils.print_exception_no_traceback():
|
|
1053
|
+
raise ValueError('Secret keys must be strings.')
|
|
1054
|
+
if not common_utils.is_valid_env_var(key):
|
|
1055
|
+
with ux_utils.print_exception_no_traceback():
|
|
1056
|
+
raise ValueError(f'Invalid secret key: {key}')
|
|
1057
|
+
else:
|
|
1058
|
+
with ux_utils.print_exception_no_traceback():
|
|
1059
|
+
raise ValueError(
|
|
1060
|
+
'secrets must be List[Tuple[str, str]] or Dict[str, str]: '
|
|
1061
|
+
f'{secrets}')
|
|
1062
|
+
for key, value in secrets.items():
|
|
1063
|
+
self._secrets[key] = SecretStr(value)
|
|
1064
|
+
# Validate Docker login configuration if needed
|
|
1065
|
+
if _check_docker_login_config(self._envs, self._secrets):
|
|
701
1066
|
self.resources = _with_docker_login_config(self.resources,
|
|
702
|
-
self._envs
|
|
1067
|
+
self._envs,
|
|
1068
|
+
self._secrets)
|
|
703
1069
|
self.resources = _with_docker_username_for_runpod(
|
|
704
|
-
self.resources, self._envs)
|
|
1070
|
+
self.resources, self._envs, self._secrets)
|
|
705
1071
|
return self
|
|
706
1072
|
|
|
707
1073
|
@property
|
|
708
1074
|
def use_spot(self) -> bool:
|
|
709
1075
|
return any(r.use_spot for r in self.resources)
|
|
710
1076
|
|
|
1077
|
+
@property
|
|
1078
|
+
def envs_and_secrets(self) -> Dict[str, Union[str, SecretStr]]:
|
|
1079
|
+
envs = self.envs.copy()
|
|
1080
|
+
envs.update(self.secrets)
|
|
1081
|
+
return envs
|
|
1082
|
+
|
|
711
1083
|
def set_inputs(self, inputs: str,
|
|
712
1084
|
estimated_size_gigabytes: float) -> 'Task':
|
|
713
1085
|
# E.g., 's3://bucket', 'gs://bucket', or None.
|
|
@@ -749,7 +1121,7 @@ class Task:
|
|
|
749
1121
|
def set_resources(
|
|
750
1122
|
self, resources: Union['resources_lib.Resources',
|
|
751
1123
|
List['resources_lib.Resources'],
|
|
752
|
-
Set['resources_lib.Resources']]
|
|
1124
|
+
Set['resources_lib.Resources'], Dict[str, Any]]
|
|
753
1125
|
) -> 'Task':
|
|
754
1126
|
"""Sets the required resources to execute this task.
|
|
755
1127
|
|
|
@@ -763,19 +1135,22 @@ class Task:
|
|
|
763
1135
|
Returns:
|
|
764
1136
|
self: The current task, with resources set.
|
|
765
1137
|
"""
|
|
766
|
-
if isinstance(resources,
|
|
1138
|
+
if isinstance(resources, dict):
|
|
1139
|
+
resources = resources_lib.Resources.from_yaml_config(resources)
|
|
1140
|
+
elif isinstance(resources, resources_lib.Resources):
|
|
767
1141
|
resources = {resources}
|
|
768
1142
|
# TODO(woosuk): Check if the resources are None.
|
|
769
|
-
self.resources = _with_docker_login_config(resources, self.envs
|
|
1143
|
+
self.resources = _with_docker_login_config(resources, self.envs,
|
|
1144
|
+
self.secrets)
|
|
770
1145
|
# Only have effect on RunPod.
|
|
771
1146
|
self.resources = _with_docker_username_for_runpod(
|
|
772
|
-
self.resources, self.envs)
|
|
1147
|
+
self.resources, self.envs, self.secrets)
|
|
773
1148
|
|
|
774
1149
|
# Evaluate if the task requires FUSE and set the requires_fuse flag
|
|
775
1150
|
for _, storage_obj in self.storage_mounts.items():
|
|
776
1151
|
if storage_obj.mode in storage_lib.MOUNTABLE_STORAGE_MODES:
|
|
777
1152
|
for r in self.resources:
|
|
778
|
-
r.
|
|
1153
|
+
r.set_requires_fuse(True)
|
|
779
1154
|
break
|
|
780
1155
|
|
|
781
1156
|
return self
|
|
@@ -790,6 +1165,10 @@ class Task:
|
|
|
790
1165
|
self.set_resources(type(self.resources)(new_resources_list))
|
|
791
1166
|
return self
|
|
792
1167
|
|
|
1168
|
+
def get_resource_config(self) -> Dict[str, Any]:
|
|
1169
|
+
return _resources_to_config(self.resources,
|
|
1170
|
+
factor_out_common_fields=True)
|
|
1171
|
+
|
|
793
1172
|
@property
|
|
794
1173
|
def service(self) -> Optional[service_spec.SkyServiceSpec]:
|
|
795
1174
|
return self._service
|
|
@@ -807,8 +1186,8 @@ class Task:
|
|
|
807
1186
|
self._service = service
|
|
808
1187
|
return self
|
|
809
1188
|
|
|
810
|
-
def set_time_estimator(
|
|
811
|
-
|
|
1189
|
+
def set_time_estimator(
|
|
1190
|
+
self, func: Callable[['resources_lib.Resources'], int]) -> 'Task':
|
|
812
1191
|
"""Sets a func mapping resources to estimated time (secs).
|
|
813
1192
|
|
|
814
1193
|
This is EXPERIMENTAL.
|
|
@@ -864,7 +1243,7 @@ class Task:
|
|
|
864
1243
|
|
|
865
1244
|
Different from set_file_mounts(), this function updates into the
|
|
866
1245
|
existing file_mounts (calls ``dict.update()``), rather than
|
|
867
|
-
|
|
1246
|
+
overwriting it.
|
|
868
1247
|
|
|
869
1248
|
This should be called before provisioning in order to take effect.
|
|
870
1249
|
|
|
@@ -931,7 +1310,7 @@ class Task:
|
|
|
931
1310
|
self.storage_mounts = {}
|
|
932
1311
|
# Clear the requires_fuse flag if no storage mounts are set.
|
|
933
1312
|
for r in self.resources:
|
|
934
|
-
r.
|
|
1313
|
+
r.set_requires_fuse(False)
|
|
935
1314
|
return self
|
|
936
1315
|
for target, storage_obj in storage_mounts.items():
|
|
937
1316
|
# TODO(zhwu): /home/username/sky_workdir as the target path need
|
|
@@ -956,7 +1335,7 @@ class Task:
|
|
|
956
1335
|
# If any storage is using MOUNT mode, we need to enable FUSE in
|
|
957
1336
|
# the resources.
|
|
958
1337
|
for r in self.resources:
|
|
959
|
-
r.
|
|
1338
|
+
r.set_requires_fuse(True)
|
|
960
1339
|
# Storage source validation is done in Storage object
|
|
961
1340
|
self.storage_mounts = storage_mounts
|
|
962
1341
|
return self
|
|
@@ -1170,6 +1549,16 @@ class Task:
|
|
|
1170
1549
|
self.update_file_mounts({
|
|
1171
1550
|
mnt_path: blob_path,
|
|
1172
1551
|
})
|
|
1552
|
+
elif store_type is storage_lib.StoreType.COREWEAVE:
|
|
1553
|
+
if storage.source is not None and not isinstance(
|
|
1554
|
+
storage.source,
|
|
1555
|
+
list) and storage.source.startswith('cw://'):
|
|
1556
|
+
blob_path = storage.source
|
|
1557
|
+
else:
|
|
1558
|
+
blob_path = 'cw://' + storage.name
|
|
1559
|
+
self.update_file_mounts({
|
|
1560
|
+
mnt_path: blob_path,
|
|
1561
|
+
})
|
|
1173
1562
|
else:
|
|
1174
1563
|
with ux_utils.print_exception_no_traceback():
|
|
1175
1564
|
raise ValueError(f'Storage Type {store_type} '
|
|
@@ -1219,11 +1608,85 @@ class Task:
|
|
|
1219
1608
|
d[k] = v
|
|
1220
1609
|
return d
|
|
1221
1610
|
|
|
1222
|
-
def
|
|
1611
|
+
def update_workdir(self, workdir: Optional[str], git_url: Optional[str],
|
|
1612
|
+
git_ref: Optional[str]) -> 'Task':
|
|
1613
|
+
"""Updates the task workdir.
|
|
1614
|
+
|
|
1615
|
+
Args:
|
|
1616
|
+
workdir: The workdir to update.
|
|
1617
|
+
git_url: The git url to update.
|
|
1618
|
+
git_ref: The git ref to update.
|
|
1619
|
+
"""
|
|
1620
|
+
if self.workdir is None or isinstance(self.workdir, str):
|
|
1621
|
+
if workdir is not None:
|
|
1622
|
+
self.workdir = workdir
|
|
1623
|
+
return self
|
|
1624
|
+
if git_url is not None:
|
|
1625
|
+
self.workdir = {}
|
|
1626
|
+
self.workdir['url'] = git_url
|
|
1627
|
+
if git_ref is not None:
|
|
1628
|
+
self.workdir['ref'] = git_ref
|
|
1629
|
+
return self
|
|
1630
|
+
return self
|
|
1631
|
+
if git_url is not None:
|
|
1632
|
+
self.workdir['url'] = git_url
|
|
1633
|
+
if git_ref is not None:
|
|
1634
|
+
self.workdir['ref'] = git_ref
|
|
1635
|
+
return self
|
|
1636
|
+
|
|
1637
|
+
def update_envs_and_secrets_from_workdir(self) -> 'Task':
|
|
1638
|
+
"""Updates the task envs and secrets from the workdir."""
|
|
1639
|
+
if self.workdir is None:
|
|
1640
|
+
return self
|
|
1641
|
+
if not isinstance(self.workdir, dict):
|
|
1642
|
+
return self
|
|
1643
|
+
url = self.workdir['url']
|
|
1644
|
+
ref = self.workdir.get('ref', '')
|
|
1645
|
+
token = os.environ.get(git.GIT_TOKEN_ENV_VAR)
|
|
1646
|
+
ssh_key_path = os.environ.get(git.GIT_SSH_KEY_PATH_ENV_VAR)
|
|
1647
|
+
try:
|
|
1648
|
+
git_repo = git.GitRepo(url, ref, token, ssh_key_path)
|
|
1649
|
+
clone_info = git_repo.get_repo_clone_info()
|
|
1650
|
+
if clone_info is None:
|
|
1651
|
+
return self
|
|
1652
|
+
self.envs[git.GIT_URL_ENV_VAR] = clone_info.url
|
|
1653
|
+
if ref:
|
|
1654
|
+
ref_type = git_repo.get_ref_type()
|
|
1655
|
+
if ref_type == git.GitRefType.COMMIT:
|
|
1656
|
+
self.envs[git.GIT_COMMIT_HASH_ENV_VAR] = ref
|
|
1657
|
+
elif ref_type == git.GitRefType.BRANCH:
|
|
1658
|
+
self.envs[git.GIT_BRANCH_ENV_VAR] = ref
|
|
1659
|
+
elif ref_type == git.GitRefType.TAG:
|
|
1660
|
+
self.envs[git.GIT_TAG_ENV_VAR] = ref
|
|
1661
|
+
if clone_info.token is None and clone_info.ssh_key is None:
|
|
1662
|
+
return self
|
|
1663
|
+
if clone_info.token is not None:
|
|
1664
|
+
self.secrets[git.GIT_TOKEN_ENV_VAR] = SecretStr(
|
|
1665
|
+
clone_info.token)
|
|
1666
|
+
if clone_info.ssh_key is not None:
|
|
1667
|
+
self.secrets[git.GIT_SSH_KEY_ENV_VAR] = SecretStr(
|
|
1668
|
+
clone_info.ssh_key)
|
|
1669
|
+
except exceptions.GitError as e:
|
|
1670
|
+
with ux_utils.print_exception_no_traceback():
|
|
1671
|
+
raise ValueError(f'{str(e)}') from None
|
|
1672
|
+
return self
|
|
1673
|
+
|
|
1674
|
+
def to_yaml_config(self,
|
|
1675
|
+
use_user_specified_yaml: bool = False) -> Dict[str, Any]:
|
|
1223
1676
|
"""Returns a yaml-style dict representation of the task.
|
|
1224
1677
|
|
|
1225
1678
|
INTERNAL: this method is internal-facing.
|
|
1226
1679
|
"""
|
|
1680
|
+
if use_user_specified_yaml:
|
|
1681
|
+
if self._user_specified_yaml is None:
|
|
1682
|
+
return self._to_yaml_config(redact_secrets=True)
|
|
1683
|
+
config = yaml_utils.safe_load(self._user_specified_yaml)
|
|
1684
|
+
if config.get('secrets') is not None:
|
|
1685
|
+
config['secrets'] = {k: '<redacted>' for k in config['secrets']}
|
|
1686
|
+
return config
|
|
1687
|
+
return self._to_yaml_config()
|
|
1688
|
+
|
|
1689
|
+
def _to_yaml_config(self, redact_secrets: bool = False) -> Dict[str, Any]:
|
|
1227
1690
|
config = {}
|
|
1228
1691
|
|
|
1229
1692
|
def add_if_not_none(key, value, no_empty: bool = False):
|
|
@@ -1234,15 +1697,7 @@ class Task:
|
|
|
1234
1697
|
|
|
1235
1698
|
add_if_not_none('name', self.name)
|
|
1236
1699
|
|
|
1237
|
-
tmp_resource_config =
|
|
1238
|
-
if len(self.resources) > 1:
|
|
1239
|
-
resource_list = []
|
|
1240
|
-
for r in self.resources:
|
|
1241
|
-
resource_list.append(r.to_yaml_config())
|
|
1242
|
-
key = 'ordered' if isinstance(self.resources, list) else 'any_of'
|
|
1243
|
-
tmp_resource_config[key] = resource_list
|
|
1244
|
-
else:
|
|
1245
|
-
tmp_resource_config = list(self.resources)[0].to_yaml_config()
|
|
1700
|
+
tmp_resource_config = _resources_to_config(self.resources)
|
|
1246
1701
|
|
|
1247
1702
|
add_if_not_none('resources', tmp_resource_config)
|
|
1248
1703
|
|
|
@@ -1263,8 +1718,17 @@ class Task:
|
|
|
1263
1718
|
add_if_not_none('workdir', self.workdir)
|
|
1264
1719
|
add_if_not_none('event_callback', self.event_callback)
|
|
1265
1720
|
add_if_not_none('run', self.run)
|
|
1721
|
+
|
|
1722
|
+
# Add envs without redaction
|
|
1266
1723
|
add_if_not_none('envs', self.envs, no_empty=True)
|
|
1267
1724
|
|
|
1725
|
+
secrets = self.secrets
|
|
1726
|
+
if secrets and not redact_secrets:
|
|
1727
|
+
secrets = {k: v.get_secret_value() for k, v in secrets.items()}
|
|
1728
|
+
elif secrets and redact_secrets:
|
|
1729
|
+
secrets = {k: '<redacted>' for k, v in secrets.items()}
|
|
1730
|
+
add_if_not_none('secrets', secrets, no_empty=True)
|
|
1731
|
+
|
|
1268
1732
|
add_if_not_none('file_mounts', {})
|
|
1269
1733
|
|
|
1270
1734
|
if self.file_mounts is not None:
|
|
@@ -1277,6 +1741,15 @@ class Task:
|
|
|
1277
1741
|
})
|
|
1278
1742
|
|
|
1279
1743
|
add_if_not_none('file_mounts_mapping', self.file_mounts_mapping)
|
|
1744
|
+
add_if_not_none('volumes', self.volumes)
|
|
1745
|
+
if self.volume_mounts is not None:
|
|
1746
|
+
config['volume_mounts'] = [
|
|
1747
|
+
volume_mount.to_yaml_config()
|
|
1748
|
+
for volume_mount in self.volume_mounts
|
|
1749
|
+
]
|
|
1750
|
+
# we manually check if its empty to not clog up the generated yaml
|
|
1751
|
+
add_if_not_none('_metadata', self._metadata if self._metadata else None)
|
|
1752
|
+
add_if_not_none('_user_specified_yaml', self._user_specified_yaml)
|
|
1280
1753
|
return config
|
|
1281
1754
|
|
|
1282
1755
|
def get_required_cloud_features(
|
|
@@ -1304,7 +1777,12 @@ class Task:
|
|
|
1304
1777
|
return required_features
|
|
1305
1778
|
|
|
1306
1779
|
def __rshift__(self, b):
|
|
1307
|
-
|
|
1780
|
+
dag = dag_lib.get_current_dag()
|
|
1781
|
+
if dag is None:
|
|
1782
|
+
raise RuntimeError(
|
|
1783
|
+
'Cannot use >> operator outside of a DAG context. '
|
|
1784
|
+
'Please use "with sky.Dag() as dag:" to create a DAG context.')
|
|
1785
|
+
dag.add_edge(self, b)
|
|
1308
1786
|
|
|
1309
1787
|
def __repr__(self):
|
|
1310
1788
|
if isinstance(self.run, str):
|
|
@@ -1339,3 +1817,47 @@ class Task:
|
|
|
1339
1817
|
else:
|
|
1340
1818
|
s += '\n resources: default instances'
|
|
1341
1819
|
return s
|
|
1820
|
+
|
|
1821
|
+
|
|
1822
|
+
def _resources_to_config(
|
|
1823
|
+
resources: Union[List['resources_lib.Resources'],
|
|
1824
|
+
Set['resources_lib.Resources']],
|
|
1825
|
+
factor_out_common_fields: bool = False) -> Dict[str, Any]:
|
|
1826
|
+
if len(resources) > 1:
|
|
1827
|
+
resource_list: List[Dict[str, Union[str, int]]] = []
|
|
1828
|
+
for r in resources:
|
|
1829
|
+
resource_list.append(r.to_yaml_config())
|
|
1830
|
+
group_key = 'ordered' if isinstance(resources, list) else 'any_of'
|
|
1831
|
+
if factor_out_common_fields:
|
|
1832
|
+
return _factor_out_common_resource_fields(resource_list, group_key)
|
|
1833
|
+
return {group_key: resource_list}
|
|
1834
|
+
else:
|
|
1835
|
+
return list(resources)[0].to_yaml_config()
|
|
1836
|
+
|
|
1837
|
+
|
|
1838
|
+
def _factor_out_common_resource_fields(configs: List[Dict[str, Union[str,
|
|
1839
|
+
int]]],
|
|
1840
|
+
group_key: str) -> Dict[str, Any]:
|
|
1841
|
+
"""Factors out the fields that are common to all resources."""
|
|
1842
|
+
return_config: Dict[str, Any] = configs[0].copy()
|
|
1843
|
+
if len(configs) > 1:
|
|
1844
|
+
for config in configs[1:]:
|
|
1845
|
+
for key, value in config.items():
|
|
1846
|
+
if key in return_config and return_config[key] != value:
|
|
1847
|
+
del return_config[key]
|
|
1848
|
+
num_empty_configs = 0
|
|
1849
|
+
for config in configs:
|
|
1850
|
+
keys_to_delete = []
|
|
1851
|
+
for key, value in config.items():
|
|
1852
|
+
if key in return_config:
|
|
1853
|
+
keys_to_delete.append(key)
|
|
1854
|
+
for key in keys_to_delete:
|
|
1855
|
+
del config[key]
|
|
1856
|
+
if not config:
|
|
1857
|
+
num_empty_configs += 1
|
|
1858
|
+
|
|
1859
|
+
if num_empty_configs == len(configs):
|
|
1860
|
+
return return_config
|
|
1861
|
+
if len(configs) > 0:
|
|
1862
|
+
return_config[group_key] = configs
|
|
1863
|
+
return return_config
|