skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/common_utils.py
CHANGED
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
"""Utils shared between all of sky"""
|
|
2
2
|
|
|
3
|
+
import ctypes
|
|
3
4
|
import difflib
|
|
5
|
+
import enum
|
|
4
6
|
import functools
|
|
7
|
+
import gc
|
|
5
8
|
import getpass
|
|
6
9
|
import hashlib
|
|
7
10
|
import inspect
|
|
8
|
-
import io
|
|
9
11
|
import os
|
|
10
12
|
import platform
|
|
11
13
|
import random
|
|
12
14
|
import re
|
|
13
15
|
import socket
|
|
16
|
+
import subprocess
|
|
14
17
|
import sys
|
|
15
18
|
import time
|
|
16
19
|
import typing
|
|
@@ -20,6 +23,7 @@ import uuid
|
|
|
20
23
|
import jsonschema
|
|
21
24
|
|
|
22
25
|
from sky import exceptions
|
|
26
|
+
from sky import models
|
|
23
27
|
from sky import sky_logging
|
|
24
28
|
from sky.adaptors import common as adaptors_common
|
|
25
29
|
from sky.skylet import constants
|
|
@@ -31,13 +35,11 @@ from sky.utils import validator
|
|
|
31
35
|
if typing.TYPE_CHECKING:
|
|
32
36
|
import jinja2
|
|
33
37
|
import psutil
|
|
34
|
-
import yaml
|
|
35
38
|
else:
|
|
36
39
|
jinja2 = adaptors_common.LazyImport('jinja2')
|
|
37
40
|
psutil = adaptors_common.LazyImport('psutil')
|
|
38
|
-
yaml = adaptors_common.LazyImport('yaml')
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
USER_HASH_FILE = os.path.expanduser('~/.sky/user_hash')
|
|
41
43
|
USER_HASH_LENGTH = 8
|
|
42
44
|
|
|
43
45
|
# We are using base36 to reduce the length of the hash. 2 chars -> 36^2 = 1296
|
|
@@ -52,6 +54,25 @@ _VALID_ENV_VAR_REGEX = '[a-zA-Z_][a-zA-Z0-9_]*'
|
|
|
52
54
|
logger = sky_logging.init_logger(__name__)
|
|
53
55
|
|
|
54
56
|
|
|
57
|
+
class ProcessStatus(enum.Enum):
|
|
58
|
+
"""Process status."""
|
|
59
|
+
|
|
60
|
+
# The process is scheduled to run, but not started yet.
|
|
61
|
+
SCHEDULED = 'SCHEDULED'
|
|
62
|
+
|
|
63
|
+
# The process is running
|
|
64
|
+
RUNNING = 'RUNNING'
|
|
65
|
+
|
|
66
|
+
# The process is finished and succeeded
|
|
67
|
+
SUCCEEDED = 'SUCCEEDED'
|
|
68
|
+
|
|
69
|
+
# The process is interrupted
|
|
70
|
+
INTERRUPTED = 'INTERRUPTED'
|
|
71
|
+
|
|
72
|
+
# The process failed
|
|
73
|
+
FAILED = 'FAILED'
|
|
74
|
+
|
|
75
|
+
|
|
55
76
|
@annotations.lru_cache(scope='request')
|
|
56
77
|
def get_usage_run_id() -> str:
|
|
57
78
|
"""Returns a unique run id for each 'run'.
|
|
@@ -66,26 +87,37 @@ def get_usage_run_id() -> str:
|
|
|
66
87
|
return str(uuid.uuid4())
|
|
67
88
|
|
|
68
89
|
|
|
69
|
-
def
|
|
90
|
+
def is_valid_user_hash(user_hash: Optional[str]) -> bool:
|
|
70
91
|
if user_hash is None:
|
|
71
92
|
return False
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
return len(user_hash) == USER_HASH_LENGTH
|
|
93
|
+
# Must start with a letter, followed by alphanumeric characters and hyphens
|
|
94
|
+
# This covers both old hex format (e.g., "abc123") and new service account
|
|
95
|
+
# format (e.g., "sa-abc123-token-xyz")
|
|
96
|
+
return bool(re.match(r'^[a-zA-Z0-9][a-zA-Z0-9-]*$', user_hash))
|
|
77
97
|
|
|
78
98
|
|
|
79
99
|
def generate_user_hash() -> str:
|
|
80
100
|
"""Generates a unique user-machine specific hash."""
|
|
81
101
|
hash_str = user_and_hostname_hash()
|
|
82
102
|
user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:USER_HASH_LENGTH]
|
|
83
|
-
if not
|
|
103
|
+
if not is_valid_user_hash(user_hash):
|
|
84
104
|
# A fallback in case the hash is invalid.
|
|
85
105
|
user_hash = uuid.uuid4().hex[:USER_HASH_LENGTH]
|
|
86
106
|
return user_hash
|
|
87
107
|
|
|
88
108
|
|
|
109
|
+
def get_git_commit(path: Optional[str] = None) -> Optional[str]:
|
|
110
|
+
try:
|
|
111
|
+
result = subprocess.run(['git', 'rev-parse', 'HEAD'],
|
|
112
|
+
capture_output=True,
|
|
113
|
+
text=True,
|
|
114
|
+
cwd=path,
|
|
115
|
+
check=True)
|
|
116
|
+
return result.stdout.strip()
|
|
117
|
+
except subprocess.CalledProcessError:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
89
121
|
def get_user_hash() -> str:
|
|
90
122
|
"""Returns a unique user-machine specific hash as a user id.
|
|
91
123
|
|
|
@@ -93,25 +125,30 @@ def get_user_hash() -> str:
|
|
|
93
125
|
hostname changes causing a new user hash to be generated.
|
|
94
126
|
"""
|
|
95
127
|
user_hash = os.getenv(constants.USER_ID_ENV_VAR)
|
|
96
|
-
if
|
|
128
|
+
if is_valid_user_hash(user_hash):
|
|
97
129
|
assert user_hash is not None
|
|
98
130
|
return user_hash
|
|
99
131
|
|
|
100
|
-
if os.path.exists(
|
|
132
|
+
if os.path.exists(USER_HASH_FILE):
|
|
101
133
|
# Read from cached user hash file.
|
|
102
|
-
with open(
|
|
134
|
+
with open(USER_HASH_FILE, 'r', encoding='utf-8') as f:
|
|
103
135
|
# Remove invalid characters.
|
|
104
136
|
user_hash = f.read().strip()
|
|
105
|
-
if
|
|
137
|
+
if is_valid_user_hash(user_hash):
|
|
106
138
|
return user_hash
|
|
107
139
|
|
|
108
140
|
user_hash = generate_user_hash()
|
|
109
|
-
|
|
110
|
-
with open(_USER_HASH_FILE, 'w', encoding='utf-8') as f:
|
|
111
|
-
f.write(user_hash)
|
|
141
|
+
set_user_hash_locally(user_hash)
|
|
112
142
|
return user_hash
|
|
113
143
|
|
|
114
144
|
|
|
145
|
+
def set_user_hash_locally(user_hash: str) -> None:
|
|
146
|
+
"""Sets the user hash to local file."""
|
|
147
|
+
os.makedirs(os.path.dirname(USER_HASH_FILE), exist_ok=True)
|
|
148
|
+
with open(USER_HASH_FILE, 'w', encoding='utf-8') as f:
|
|
149
|
+
f.write(user_hash)
|
|
150
|
+
|
|
151
|
+
|
|
115
152
|
def base36_encode(hex_str: str) -> str:
|
|
116
153
|
"""Converts a hex string to a base36 string."""
|
|
117
154
|
int_value = int(hex_str, 16)
|
|
@@ -228,13 +265,16 @@ def get_global_job_id(job_timestamp: str,
|
|
|
228
265
|
|
|
229
266
|
class Backoff:
|
|
230
267
|
"""Exponential backoff with jittering."""
|
|
231
|
-
MULTIPLIER = 1.6
|
|
232
268
|
JITTER = 0.4
|
|
233
269
|
|
|
234
|
-
def __init__(self,
|
|
270
|
+
def __init__(self,
|
|
271
|
+
initial_backoff: float = 5,
|
|
272
|
+
max_backoff_factor: int = 5,
|
|
273
|
+
multiplier: float = 1.6):
|
|
235
274
|
self._initial = True
|
|
236
275
|
self._backoff = 0.0
|
|
237
276
|
self._initial_backoff = initial_backoff
|
|
277
|
+
self._multiplier = multiplier
|
|
238
278
|
self._max_backoff = max_backoff_factor * self._initial_backoff
|
|
239
279
|
|
|
240
280
|
# https://github.com/grpc/grpc/blob/2d4f3c56001cd1e1f85734b2f7c5ce5f2797c38a/doc/connection-backoff.md
|
|
@@ -246,7 +286,7 @@ class Backoff:
|
|
|
246
286
|
self._initial = False
|
|
247
287
|
self._backoff = min(self._initial_backoff, self._max_backoff)
|
|
248
288
|
else:
|
|
249
|
-
self._backoff = min(self._backoff * self.
|
|
289
|
+
self._backoff = min(self._backoff * self._multiplier,
|
|
250
290
|
self._max_backoff)
|
|
251
291
|
self._backoff += random.uniform(-self.JITTER * self._backoff,
|
|
252
292
|
self.JITTER * self._backoff)
|
|
@@ -256,11 +296,14 @@ class Backoff:
|
|
|
256
296
|
_current_command: Optional[str] = None
|
|
257
297
|
_current_client_entrypoint: Optional[str] = None
|
|
258
298
|
_using_remote_api_server: Optional[bool] = None
|
|
299
|
+
_current_user: Optional['models.User'] = None
|
|
300
|
+
_current_request_id: Optional[str] = None
|
|
259
301
|
|
|
260
302
|
|
|
261
|
-
def
|
|
262
|
-
|
|
263
|
-
|
|
303
|
+
def set_request_context(client_entrypoint: Optional[str],
|
|
304
|
+
client_command: Optional[str],
|
|
305
|
+
using_remote_api_server: bool,
|
|
306
|
+
user: Optional['models.User'], request_id: str) -> None:
|
|
264
307
|
"""Override the current client entrypoint and command.
|
|
265
308
|
|
|
266
309
|
This is useful when we are on the SkyPilot API server side and we have a
|
|
@@ -269,9 +312,20 @@ def set_client_status(client_entrypoint: Optional[str],
|
|
|
269
312
|
global _current_command
|
|
270
313
|
global _current_client_entrypoint
|
|
271
314
|
global _using_remote_api_server
|
|
315
|
+
global _current_user
|
|
316
|
+
global _current_request_id
|
|
272
317
|
_current_command = client_command
|
|
273
318
|
_current_client_entrypoint = client_entrypoint
|
|
274
319
|
_using_remote_api_server = using_remote_api_server
|
|
320
|
+
_current_user = user
|
|
321
|
+
_current_request_id = request_id
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def get_current_request_id() -> str:
|
|
325
|
+
"""Returns the current request id."""
|
|
326
|
+
if _current_request_id is not None:
|
|
327
|
+
return _current_request_id
|
|
328
|
+
return 'dummy-request-id'
|
|
275
329
|
|
|
276
330
|
|
|
277
331
|
def get_current_command() -> str:
|
|
@@ -286,6 +340,26 @@ def get_current_command() -> str:
|
|
|
286
340
|
return get_pretty_entrypoint_cmd()
|
|
287
341
|
|
|
288
342
|
|
|
343
|
+
def get_current_user() -> 'models.User':
|
|
344
|
+
"""Returns the current user."""
|
|
345
|
+
if _current_user is not None:
|
|
346
|
+
return _current_user
|
|
347
|
+
return models.User.get_current_user()
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def get_current_user_name() -> str:
|
|
351
|
+
"""Returns the current user name."""
|
|
352
|
+
name = get_current_user().name
|
|
353
|
+
assert name is not None
|
|
354
|
+
return name
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def set_current_user(user: 'models.User'):
|
|
358
|
+
"""Sets the current user."""
|
|
359
|
+
global _current_user
|
|
360
|
+
_current_user = user
|
|
361
|
+
|
|
362
|
+
|
|
289
363
|
def get_current_client_entrypoint(server_entrypoint: str) -> str:
|
|
290
364
|
"""Returns the current client entrypoint.
|
|
291
365
|
|
|
@@ -324,9 +398,154 @@ def get_pretty_entrypoint_cmd() -> str:
|
|
|
324
398
|
# Turn '/.../anaconda/envs/py36/bin/sky' into 'sky', but keep other
|
|
325
399
|
# things like 'examples/app.py'.
|
|
326
400
|
argv[0] = basename
|
|
401
|
+
|
|
402
|
+
# Redact sensitive values from secrets arguments
|
|
403
|
+
argv = _redact_secrets_values(argv)
|
|
404
|
+
|
|
327
405
|
return ' '.join(argv)
|
|
328
406
|
|
|
329
407
|
|
|
408
|
+
def read_last_n_lines(file_path: str,
|
|
409
|
+
n: int,
|
|
410
|
+
chunk_size: int = 8192,
|
|
411
|
+
encoding: str = 'utf-8',
|
|
412
|
+
errors: str = 'replace') -> List[str]:
|
|
413
|
+
"""Read the last N lines of a file.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
file_path: Path to the file to read.
|
|
417
|
+
n: Number of lines to read from the end of the file.
|
|
418
|
+
chunk_size: Size of chunks in bytes.
|
|
419
|
+
encoding: Encoding to use when decoding binary chunks.
|
|
420
|
+
errors: Error handling for decode errors (e.g., 'replace', 'ignore').
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
A list of the last N lines, preserving newlines where applicable.
|
|
424
|
+
"""
|
|
425
|
+
|
|
426
|
+
assert n >= 0, f'n must be non-negative. Got {n}'
|
|
427
|
+
assert chunk_size > 0, f'chunk_size must be positive. Got {chunk_size}'
|
|
428
|
+
assert os.path.exists(file_path), f'File not found: {file_path}'
|
|
429
|
+
|
|
430
|
+
if n == 0:
|
|
431
|
+
return []
|
|
432
|
+
|
|
433
|
+
try:
|
|
434
|
+
with open(file_path, 'rb') as f:
|
|
435
|
+
# Start reading from the end of the file
|
|
436
|
+
f.seek(0, os.SEEK_END)
|
|
437
|
+
file_size = f.tell()
|
|
438
|
+
if file_size == 0:
|
|
439
|
+
return []
|
|
440
|
+
|
|
441
|
+
pos = file_size
|
|
442
|
+
lines_found = 0
|
|
443
|
+
chunks = []
|
|
444
|
+
|
|
445
|
+
# Read backwards in chunks until we've found at least n newlines
|
|
446
|
+
while pos > 0 and lines_found <= n:
|
|
447
|
+
read_size = min(chunk_size, pos)
|
|
448
|
+
pos -= read_size
|
|
449
|
+
f.seek(pos)
|
|
450
|
+
chunk = f.read(read_size)
|
|
451
|
+
chunks.append(chunk)
|
|
452
|
+
lines_found += chunk.count(b'\n')
|
|
453
|
+
|
|
454
|
+
# Combine all chunks in reverse order since we read backwards
|
|
455
|
+
full_bytes = b''.join(reversed(chunks))
|
|
456
|
+
|
|
457
|
+
# Split by newline byte. Note: this handles '\n' endings.
|
|
458
|
+
all_lines = full_bytes.split(b'\n')
|
|
459
|
+
|
|
460
|
+
# Handle edge case: if file ends with a newline, last element is b''
|
|
461
|
+
if all_lines and all_lines[-1] == b'':
|
|
462
|
+
result_bytes = all_lines[-n - 1:-1]
|
|
463
|
+
else:
|
|
464
|
+
result_bytes = all_lines[-n:]
|
|
465
|
+
|
|
466
|
+
# Decode each line and normalize CR/LF endings
|
|
467
|
+
decoded_lines = [
|
|
468
|
+
line.decode(encoding, errors=errors).rstrip('\r') + '\n'
|
|
469
|
+
for line in result_bytes[:-1]
|
|
470
|
+
]
|
|
471
|
+
|
|
472
|
+
# Decode the final line — only add newline if it was present
|
|
473
|
+
last_line = result_bytes[-1].decode(encoding,
|
|
474
|
+
errors=errors).rstrip('\r')
|
|
475
|
+
decoded_lines.append(last_line)
|
|
476
|
+
|
|
477
|
+
return decoded_lines
|
|
478
|
+
|
|
479
|
+
except OSError as e:
|
|
480
|
+
with ux_utils.print_exception_no_traceback():
|
|
481
|
+
raise RuntimeError(
|
|
482
|
+
f'Failed to read last {n} lines from {file_path}: {e}') from e
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _redact_secrets_values(argv: List[str]) -> List[str]:
|
|
486
|
+
"""Redact sensitive values from --secret arguments.
|
|
487
|
+
|
|
488
|
+
Args:
|
|
489
|
+
argv: Command line arguments
|
|
490
|
+
|
|
491
|
+
Returns:
|
|
492
|
+
Modified argv with redacted --secret values, or original argv if any
|
|
493
|
+
error
|
|
494
|
+
|
|
495
|
+
Examples:
|
|
496
|
+
['sky', 'launch', '--secret', 'HF_TOKEN=secret'] ->
|
|
497
|
+
['sky', 'launch', '--secret', 'HF_TOKEN=<redacted>']
|
|
498
|
+
|
|
499
|
+
['sky', 'launch', '--secret=HF_TOKEN=secret'] ->
|
|
500
|
+
['sky', 'launch', '--secret=HF_TOKEN=<redacted>']
|
|
501
|
+
|
|
502
|
+
['sky', 'launch', '--secret', 'HF_TOKEN'] ->
|
|
503
|
+
['sky', 'launch', '--secret', 'HF_TOKEN'] (no change)
|
|
504
|
+
"""
|
|
505
|
+
try:
|
|
506
|
+
if not argv:
|
|
507
|
+
return argv or []
|
|
508
|
+
|
|
509
|
+
result = []
|
|
510
|
+
i = 0
|
|
511
|
+
|
|
512
|
+
while i < len(argv):
|
|
513
|
+
arg = argv[i]
|
|
514
|
+
|
|
515
|
+
# Ensure arg is a string
|
|
516
|
+
if not isinstance(arg, str):
|
|
517
|
+
result.append(arg)
|
|
518
|
+
i += 1
|
|
519
|
+
continue
|
|
520
|
+
|
|
521
|
+
if arg == '--secret' and i + 1 < len(argv):
|
|
522
|
+
result.append(arg)
|
|
523
|
+
next_arg = argv[i + 1]
|
|
524
|
+
# Ensure next_arg is a string and handle redaction safely
|
|
525
|
+
if isinstance(next_arg, str):
|
|
526
|
+
redacted = re.sub(r'^([^=]+)=.*', r'\1=<redacted>',
|
|
527
|
+
next_arg)
|
|
528
|
+
result.append(redacted)
|
|
529
|
+
else:
|
|
530
|
+
result.append(next_arg)
|
|
531
|
+
i += 2
|
|
532
|
+
elif arg.startswith('--secret='):
|
|
533
|
+
# Redact only if there's a value after the key
|
|
534
|
+
redacted = re.sub(r'^(--secret=[^=]+)=.*', r'\1=<redacted>',
|
|
535
|
+
arg)
|
|
536
|
+
result.append(redacted)
|
|
537
|
+
i += 1
|
|
538
|
+
else:
|
|
539
|
+
result.append(arg)
|
|
540
|
+
i += 1
|
|
541
|
+
|
|
542
|
+
return result
|
|
543
|
+
except Exception: # pylint: disable=broad-except
|
|
544
|
+
# If anything goes wrong with redaction, return original argv
|
|
545
|
+
# This ensures the command can still execute
|
|
546
|
+
return argv or []
|
|
547
|
+
|
|
548
|
+
|
|
330
549
|
def user_and_hostname_hash() -> str:
|
|
331
550
|
"""Returns a string containing <user>-<hostname hash last 4 chars>.
|
|
332
551
|
|
|
@@ -356,69 +575,6 @@ def user_and_hostname_hash() -> str:
|
|
|
356
575
|
return f'{getpass.getuser()}-{hostname_hash}'
|
|
357
576
|
|
|
358
577
|
|
|
359
|
-
def read_yaml(path: Optional[str]) -> Dict[str, Any]:
|
|
360
|
-
if path is None:
|
|
361
|
-
raise ValueError('Attempted to read a None YAML.')
|
|
362
|
-
with open(path, 'r', encoding='utf-8') as f:
|
|
363
|
-
config = yaml.safe_load(f)
|
|
364
|
-
return config
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
def read_yaml_all_str(yaml_str: str) -> List[Dict[str, Any]]:
|
|
368
|
-
stream = io.StringIO(yaml_str)
|
|
369
|
-
config = yaml.safe_load_all(stream)
|
|
370
|
-
configs = list(config)
|
|
371
|
-
if not configs:
|
|
372
|
-
# Empty YAML file.
|
|
373
|
-
return [{}]
|
|
374
|
-
return configs
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
def read_yaml_all(path: str) -> List[Dict[str, Any]]:
|
|
378
|
-
with open(path, 'r', encoding='utf-8') as f:
|
|
379
|
-
return read_yaml_all_str(f.read())
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
def dump_yaml(path: str, config: Union[List[Dict[str, Any]],
|
|
383
|
-
Dict[str, Any]]) -> None:
|
|
384
|
-
"""Dumps a YAML file.
|
|
385
|
-
|
|
386
|
-
Args:
|
|
387
|
-
path: the path to the YAML file.
|
|
388
|
-
config: the configuration to dump.
|
|
389
|
-
"""
|
|
390
|
-
with open(path, 'w', encoding='utf-8') as f:
|
|
391
|
-
f.write(dump_yaml_str(config))
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
def dump_yaml_str(config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str:
|
|
395
|
-
"""Dumps a YAML string.
|
|
396
|
-
|
|
397
|
-
Args:
|
|
398
|
-
config: the configuration to dump.
|
|
399
|
-
|
|
400
|
-
Returns:
|
|
401
|
-
The YAML string.
|
|
402
|
-
"""
|
|
403
|
-
|
|
404
|
-
# https://github.com/yaml/pyyaml/issues/127
|
|
405
|
-
class LineBreakDumper(yaml.SafeDumper):
|
|
406
|
-
|
|
407
|
-
def write_line_break(self, data=None):
|
|
408
|
-
super().write_line_break(data)
|
|
409
|
-
if len(self.indents) == 1:
|
|
410
|
-
super().write_line_break()
|
|
411
|
-
|
|
412
|
-
if isinstance(config, list):
|
|
413
|
-
dump_func = yaml.dump_all # type: ignore
|
|
414
|
-
else:
|
|
415
|
-
dump_func = yaml.dump # type: ignore
|
|
416
|
-
return dump_func(config,
|
|
417
|
-
Dumper=LineBreakDumper,
|
|
418
|
-
sort_keys=False,
|
|
419
|
-
default_flow_style=False)
|
|
420
|
-
|
|
421
|
-
|
|
422
578
|
def make_decorator(cls, name_or_fn: Union[str, Callable],
|
|
423
579
|
**ctx_kwargs) -> Callable:
|
|
424
580
|
"""Make the cls a decorator.
|
|
@@ -668,7 +824,7 @@ def get_cleaned_username(username: str = '') -> str:
|
|
|
668
824
|
Returns:
|
|
669
825
|
A cleaned username.
|
|
670
826
|
"""
|
|
671
|
-
username = username or
|
|
827
|
+
username = username or get_current_user_name()
|
|
672
828
|
username = username.lower()
|
|
673
829
|
username = re.sub(r'[^a-z0-9-_]', '', username)
|
|
674
830
|
username = re.sub(r'^[0-9-]+', '', username)
|
|
@@ -723,10 +879,43 @@ def deprecated_function(
|
|
|
723
879
|
return new_func
|
|
724
880
|
|
|
725
881
|
|
|
726
|
-
def truncate_long_string(s: str,
|
|
727
|
-
|
|
882
|
+
def truncate_long_string(s: str,
|
|
883
|
+
max_length: int = 35,
|
|
884
|
+
truncate_middle: bool = False) -> str:
|
|
885
|
+
"""Truncate a string to a maximum length.
|
|
886
|
+
|
|
887
|
+
Args:
|
|
888
|
+
s: String to truncate.
|
|
889
|
+
max_length: Maximum length of the truncated string.
|
|
890
|
+
truncate_middle: Whether to truncate in the middle of the string.
|
|
891
|
+
If True, the middle part of the string is replaced with '...'.
|
|
892
|
+
If False, truncation happens at the end preserving whole words.
|
|
893
|
+
|
|
894
|
+
Returns:
|
|
895
|
+
Truncated string.
|
|
896
|
+
"""
|
|
728
897
|
if len(s) <= max_length:
|
|
729
898
|
return s
|
|
899
|
+
|
|
900
|
+
if truncate_middle:
|
|
901
|
+
# Reserve 3 characters for '...'
|
|
902
|
+
if max_length <= 3:
|
|
903
|
+
return '...'
|
|
904
|
+
|
|
905
|
+
# Calculate how many characters to keep from beginning and end
|
|
906
|
+
half_length = (max_length - 3) // 2
|
|
907
|
+
remainder = (max_length - 3) % 2
|
|
908
|
+
|
|
909
|
+
# Keep one more character at the beginning if max_length - 3 is odd
|
|
910
|
+
start_length = half_length + remainder
|
|
911
|
+
end_length = half_length
|
|
912
|
+
|
|
913
|
+
# When end_length is 0, just show the start part and '...'
|
|
914
|
+
if end_length == 0:
|
|
915
|
+
return s[:start_length] + '...'
|
|
916
|
+
return s[:start_length] + '...' + s[-end_length:]
|
|
917
|
+
|
|
918
|
+
# Original end-truncation logic
|
|
730
919
|
splits = s.split(' ')
|
|
731
920
|
if len(splits[0]) > max_length:
|
|
732
921
|
return splits[0][:max_length] + '...' # Use '…'?
|
|
@@ -810,7 +999,17 @@ def get_mem_size_gb() -> float:
|
|
|
810
999
|
except ValueError as e:
|
|
811
1000
|
with ux_utils.print_exception_no_traceback():
|
|
812
1001
|
raise ValueError(
|
|
813
|
-
f'Failed to parse the memory size from {mem_size}'
|
|
1002
|
+
f'Failed to parse the memory size from {mem_size} (GB)'
|
|
1003
|
+
) from e
|
|
1004
|
+
mem_size = os.getenv('SKYPILOT_POD_MEMORY_BYTES_LIMIT')
|
|
1005
|
+
if mem_size is not None:
|
|
1006
|
+
try:
|
|
1007
|
+
return float(mem_size) / (1024**3)
|
|
1008
|
+
except ValueError as e:
|
|
1009
|
+
with ux_utils.print_exception_no_traceback():
|
|
1010
|
+
raise ValueError(
|
|
1011
|
+
f'Failed to parse the memory size from {mem_size} (bytes)'
|
|
1012
|
+
) from e
|
|
814
1013
|
return _mem_size_gb()
|
|
815
1014
|
|
|
816
1015
|
|
|
@@ -900,3 +1099,27 @@ def _get_cgroup_memory_limit() -> Optional[int]:
|
|
|
900
1099
|
def _is_cgroup_v2() -> bool:
|
|
901
1100
|
"""Return True if the environment is running cgroup v2."""
|
|
902
1101
|
return os.path.isfile('/sys/fs/cgroup/cgroup.controllers')
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
def removeprefix(string: str, prefix: str) -> str:
|
|
1105
|
+
if string.startswith(prefix):
|
|
1106
|
+
return string[len(prefix):]
|
|
1107
|
+
return string
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def release_memory():
|
|
1111
|
+
"""Release the process memory"""
|
|
1112
|
+
# Do the best effort to release the python heap and let malloc_trim
|
|
1113
|
+
# be more efficient.
|
|
1114
|
+
try:
|
|
1115
|
+
gc.collect()
|
|
1116
|
+
if sys.platform.startswith('linux'):
|
|
1117
|
+
# Will fail on musl (alpine), but at least it works on our
|
|
1118
|
+
# official docker images.
|
|
1119
|
+
libc = ctypes.CDLL('libc.so.6')
|
|
1120
|
+
return libc.malloc_trim(0)
|
|
1121
|
+
return 0
|
|
1122
|
+
except Exception as e: # pylint: disable=broad-except
|
|
1123
|
+
logger.error(f'Failed to release memory: '
|
|
1124
|
+
f'{format_exception(e)}')
|
|
1125
|
+
return 0
|
sky/utils/config_utils.py
CHANGED
|
@@ -6,6 +6,28 @@ from sky import sky_logging
|
|
|
6
6
|
|
|
7
7
|
logger = sky_logging.init_logger(__name__)
|
|
8
8
|
|
|
9
|
+
_REGION_CONFIG_CLOUDS = ['nebius', 'oci']
|
|
10
|
+
|
|
11
|
+
# Kubernetes API use list to represent dictionary fields with patch strategy
|
|
12
|
+
# merge and each item is indexed by the patch merge key. The following map
|
|
13
|
+
# maps the field name to the patch merge key.
|
|
14
|
+
# pylint: disable=line-too-long
|
|
15
|
+
# Ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podspec-v1-core
|
|
16
|
+
# NOTE: field containers and imagePullSecrets are not included deliberately for
|
|
17
|
+
# backward compatibility (we only support one container per pod now).
|
|
18
|
+
_PATCH_MERGE_KEYS = {
|
|
19
|
+
'initContainers': 'name',
|
|
20
|
+
'ephemeralContainers': 'name',
|
|
21
|
+
'volumes': 'name',
|
|
22
|
+
'volumeMounts': 'name',
|
|
23
|
+
'resourceClaims': 'name',
|
|
24
|
+
'env': 'name',
|
|
25
|
+
'hostAliases': 'ip',
|
|
26
|
+
'topologySpreadConstraints': 'topologyKey',
|
|
27
|
+
'ports': 'containerPort',
|
|
28
|
+
'volumeDevices': 'devicePath',
|
|
29
|
+
}
|
|
30
|
+
|
|
9
31
|
|
|
10
32
|
class Config(Dict[str, Any]):
|
|
11
33
|
"""SkyPilot config that supports setting/getting values with nested keys."""
|
|
@@ -209,20 +231,67 @@ def merge_k8s_configs(
|
|
|
209
231
|
merge_k8s_configs(base_config[key][0], value[0],
|
|
210
232
|
next_allowed_override_keys,
|
|
211
233
|
next_disallowed_override_keys)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
234
|
+
# For list fields with patch strategy "merge", we merge the list
|
|
235
|
+
# by the patch merge key.
|
|
236
|
+
elif key in _PATCH_MERGE_KEYS:
|
|
237
|
+
patch_merge_key = _PATCH_MERGE_KEYS[key]
|
|
215
238
|
for override_item in value:
|
|
216
|
-
override_item_name = override_item.get(
|
|
239
|
+
override_item_name = override_item.get(patch_merge_key)
|
|
217
240
|
if override_item_name is not None:
|
|
218
241
|
existing_base_item = next(
|
|
219
242
|
(v for v in base_config[key]
|
|
220
|
-
if v.get(
|
|
243
|
+
if v.get(patch_merge_key) == override_item_name),
|
|
244
|
+
None)
|
|
221
245
|
if existing_base_item is not None:
|
|
222
246
|
merge_k8s_configs(existing_base_item, override_item)
|
|
223
247
|
else:
|
|
224
248
|
base_config[key].append(override_item)
|
|
249
|
+
else:
|
|
250
|
+
base_config[key].append(override_item)
|
|
225
251
|
else:
|
|
226
252
|
base_config[key].extend(value)
|
|
227
253
|
else:
|
|
228
254
|
base_config[key] = value
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def get_cloud_config_value_from_dict(
|
|
258
|
+
dict_config: Dict[str, Any],
|
|
259
|
+
cloud: str,
|
|
260
|
+
keys: Tuple[str, ...],
|
|
261
|
+
region: Optional[str] = None,
|
|
262
|
+
default_value: Optional[Any] = None,
|
|
263
|
+
override_configs: Optional[Dict[str, Any]] = None) -> Any:
|
|
264
|
+
"""Returns the nested key value by reading from config
|
|
265
|
+
Order to get the property_name value:
|
|
266
|
+
1. if region is specified,
|
|
267
|
+
try to get the value from <cloud>/<region_key>/<region>/keys
|
|
268
|
+
2. if no region or no override,
|
|
269
|
+
try to get it at the cloud level <cloud>/keys
|
|
270
|
+
3. if not found at cloud level,
|
|
271
|
+
return either default_value if specified or None
|
|
272
|
+
"""
|
|
273
|
+
input_config = Config(dict_config)
|
|
274
|
+
region_key = None
|
|
275
|
+
if cloud in ('kubernetes', 'ssh'):
|
|
276
|
+
region_key = 'context_configs'
|
|
277
|
+
elif cloud in _REGION_CONFIG_CLOUDS:
|
|
278
|
+
region_key = 'region_configs'
|
|
279
|
+
|
|
280
|
+
per_context_config = None
|
|
281
|
+
if region is not None and region_key is not None:
|
|
282
|
+
per_context_config = input_config.get_nested(
|
|
283
|
+
keys=(cloud, region_key, region) + keys,
|
|
284
|
+
default_value=None,
|
|
285
|
+
override_configs=override_configs)
|
|
286
|
+
# if no override found for specified region
|
|
287
|
+
general_config = input_config.get_nested(keys=(cloud,) + keys,
|
|
288
|
+
default_value=default_value,
|
|
289
|
+
override_configs=override_configs)
|
|
290
|
+
|
|
291
|
+
if (cloud == 'kubernetes' and isinstance(general_config, dict) and
|
|
292
|
+
isinstance(per_context_config, dict)):
|
|
293
|
+
merge_k8s_configs(general_config, per_context_config)
|
|
294
|
+
return general_config
|
|
295
|
+
else:
|
|
296
|
+
return (general_config
|
|
297
|
+
if per_context_config is None else per_context_config)
|