skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/templates/websocket_proxy.py
CHANGED
|
@@ -11,18 +11,43 @@ This script is useful for users who do not have local Kubernetes credentials.
|
|
|
11
11
|
import asyncio
|
|
12
12
|
from http.cookiejar import MozillaCookieJar
|
|
13
13
|
import os
|
|
14
|
+
import struct
|
|
14
15
|
import sys
|
|
15
|
-
|
|
16
|
+
import time
|
|
17
|
+
from typing import Dict, Optional
|
|
16
18
|
from urllib.request import Request
|
|
17
19
|
|
|
20
|
+
import requests
|
|
18
21
|
import websockets
|
|
22
|
+
from websockets.asyncio.client import ClientConnection
|
|
19
23
|
from websockets.asyncio.client import connect
|
|
20
24
|
|
|
25
|
+
from sky import exceptions
|
|
26
|
+
from sky.client import service_account_auth
|
|
27
|
+
from sky.server import constants
|
|
28
|
+
from sky.server.server import KubernetesSSHMessageType
|
|
29
|
+
from sky.skylet import constants as skylet_constants
|
|
30
|
+
|
|
31
|
+
BUFFER_SIZE = 2**16 # 64KB
|
|
32
|
+
HEARTBEAT_INTERVAL_SECONDS = 10
|
|
33
|
+
|
|
34
|
+
# Environment variable for a file path to the API cookie file.
|
|
35
|
+
# Keep in sync with server/constants.py
|
|
36
|
+
API_COOKIE_FILE_ENV_VAR = 'SKYPILOT_API_COOKIE_FILE'
|
|
37
|
+
# Default file if unset.
|
|
38
|
+
# Keep in sync with server/constants.py
|
|
39
|
+
API_COOKIE_FILE_DEFAULT_LOCATION = '~/.sky/cookies.txt'
|
|
40
|
+
|
|
41
|
+
MAX_UNANSWERED_PINGS = 100
|
|
42
|
+
|
|
21
43
|
|
|
22
44
|
def _get_cookie_header(url: str) -> Dict[str, str]:
|
|
23
45
|
"""Extract Cookie header value from a cookie jar for a specific URL"""
|
|
24
|
-
cookie_path = os.environ.get(
|
|
46
|
+
cookie_path = os.environ.get(API_COOKIE_FILE_ENV_VAR)
|
|
25
47
|
if cookie_path is None:
|
|
48
|
+
cookie_path = API_COOKIE_FILE_DEFAULT_LOCATION
|
|
49
|
+
cookie_path = os.path.expanduser(cookie_path)
|
|
50
|
+
if not os.path.exists(cookie_path):
|
|
26
51
|
return {}
|
|
27
52
|
|
|
28
53
|
request = Request(url)
|
|
@@ -36,68 +61,218 @@ def _get_cookie_header(url: str) -> Dict[str, str]:
|
|
|
36
61
|
return {'Cookie': cookie_header}
|
|
37
62
|
|
|
38
63
|
|
|
39
|
-
async def main(url: str) -> None:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
64
|
+
async def main(url: str, timestamps_supported: bool, login_url: str) -> None:
|
|
65
|
+
headers = {}
|
|
66
|
+
headers.update(_get_cookie_header(url))
|
|
67
|
+
headers.update(service_account_auth.get_service_account_headers())
|
|
68
|
+
try:
|
|
69
|
+
async with connect(url, ping_interval=None,
|
|
70
|
+
additional_headers=headers) as websocket:
|
|
71
|
+
await run_websocket_proxy(websocket, timestamps_supported)
|
|
72
|
+
except websockets.exceptions.InvalidStatus as e:
|
|
73
|
+
if e.response.status_code == 403:
|
|
74
|
+
print(str(exceptions.ApiServerAuthenticationError(login_url)),
|
|
75
|
+
file=sys.stderr)
|
|
50
76
|
else:
|
|
51
|
-
|
|
77
|
+
print(f'Error ssh into cluster: {e}', file=sys.stderr)
|
|
78
|
+
sys.exit(1)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def run_websocket_proxy(websocket: ClientConnection,
|
|
82
|
+
timestamps_supported: bool) -> None:
|
|
83
|
+
if os.isatty(sys.stdin.fileno()):
|
|
84
|
+
# pylint: disable=import-outside-toplevel
|
|
85
|
+
import termios
|
|
86
|
+
import tty
|
|
87
|
+
old_settings = termios.tcgetattr(sys.stdin.fileno())
|
|
88
|
+
tty.setraw(sys.stdin.fileno())
|
|
89
|
+
else:
|
|
90
|
+
old_settings = None
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
loop = asyncio.get_running_loop()
|
|
94
|
+
# Use asyncio.Stream primitives to wrap stdin and stdout, this is to
|
|
95
|
+
# avoid creating a new thread for each read/write operation
|
|
96
|
+
# excessively.
|
|
97
|
+
stdin_reader = asyncio.StreamReader()
|
|
98
|
+
protocol = asyncio.StreamReaderProtocol(stdin_reader)
|
|
99
|
+
await loop.connect_read_pipe(lambda: protocol, sys.stdin)
|
|
100
|
+
transport, protocol = await loop.connect_write_pipe(
|
|
101
|
+
asyncio.streams.FlowControlMixin, sys.stdout) # type: ignore
|
|
102
|
+
stdout_writer = asyncio.StreamWriter(transport, protocol, None, loop)
|
|
103
|
+
# Dictionary to store last ping time for latency measurement
|
|
104
|
+
last_ping_time_dict: Optional[Dict[int, float]] = None
|
|
105
|
+
if timestamps_supported:
|
|
106
|
+
last_ping_time_dict = {}
|
|
107
|
+
|
|
108
|
+
# Use an Event to signal when websocket is closed
|
|
109
|
+
websocket_closed_event = asyncio.Event()
|
|
110
|
+
websocket_lock = asyncio.Lock()
|
|
111
|
+
|
|
112
|
+
await asyncio.gather(
|
|
113
|
+
stdin_to_websocket(stdin_reader, websocket, timestamps_supported,
|
|
114
|
+
websocket_closed_event, websocket_lock),
|
|
115
|
+
websocket_to_stdout(websocket, stdout_writer, timestamps_supported,
|
|
116
|
+
last_ping_time_dict, websocket_closed_event,
|
|
117
|
+
websocket_lock),
|
|
118
|
+
latency_monitor(websocket, last_ping_time_dict,
|
|
119
|
+
websocket_closed_event, websocket_lock),
|
|
120
|
+
return_exceptions=True)
|
|
121
|
+
finally:
|
|
122
|
+
if old_settings:
|
|
123
|
+
termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN,
|
|
124
|
+
old_settings)
|
|
52
125
|
|
|
126
|
+
|
|
127
|
+
async def latency_monitor(websocket: ClientConnection,
|
|
128
|
+
last_ping_time_dict: Optional[dict],
|
|
129
|
+
websocket_closed_event: asyncio.Event,
|
|
130
|
+
websocket_lock: asyncio.Lock):
|
|
131
|
+
"""Periodically send PING messages (type 1) to measure latency."""
|
|
132
|
+
if last_ping_time_dict is None:
|
|
133
|
+
return
|
|
134
|
+
next_id = 0
|
|
135
|
+
while not websocket_closed_event.is_set():
|
|
53
136
|
try:
|
|
54
|
-
await asyncio.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
137
|
+
await asyncio.sleep(HEARTBEAT_INTERVAL_SECONDS)
|
|
138
|
+
if len(last_ping_time_dict) >= MAX_UNANSWERED_PINGS:
|
|
139
|
+
# We are not getting responses, clear the dictionary so
|
|
140
|
+
# as not to grow unbounded.
|
|
141
|
+
last_ping_time_dict.clear()
|
|
142
|
+
ping_time = time.time()
|
|
143
|
+
next_id += 1
|
|
144
|
+
last_ping_time_dict[next_id] = ping_time
|
|
145
|
+
message_header_bytes = struct.pack(
|
|
146
|
+
'!BI', KubernetesSSHMessageType.PINGPONG.value, next_id)
|
|
147
|
+
try:
|
|
148
|
+
async with websocket_lock:
|
|
149
|
+
await websocket.send(message_header_bytes)
|
|
150
|
+
except websockets.exceptions.ConnectionClosed as e:
|
|
151
|
+
# Websocket is already closed.
|
|
152
|
+
print(f'Failed to send PING message: {e}', file=sys.stderr)
|
|
153
|
+
break
|
|
154
|
+
except Exception as e:
|
|
155
|
+
print(f'Error in latency_monitor: {e}', file=sys.stderr)
|
|
156
|
+
websocket_closed_event.set()
|
|
157
|
+
raise e
|
|
60
158
|
|
|
61
159
|
|
|
62
|
-
async def stdin_to_websocket(
|
|
160
|
+
async def stdin_to_websocket(reader: asyncio.StreamReader,
|
|
161
|
+
websocket: ClientConnection,
|
|
162
|
+
timestamps_supported: bool,
|
|
163
|
+
websocket_closed_event: asyncio.Event,
|
|
164
|
+
websocket_lock: asyncio.Lock):
|
|
63
165
|
try:
|
|
64
|
-
while
|
|
65
|
-
|
|
66
|
-
|
|
166
|
+
while not websocket_closed_event.is_set():
|
|
167
|
+
# Read at most BUFFER_SIZE bytes, this not affect
|
|
168
|
+
# responsiveness since it will return as soon as
|
|
169
|
+
# there is at least one byte.
|
|
170
|
+
# The BUFFER_SIZE is chosen to be large enough to improve
|
|
171
|
+
# throughput.
|
|
172
|
+
data = await reader.read(BUFFER_SIZE)
|
|
173
|
+
|
|
67
174
|
if not data:
|
|
68
175
|
break
|
|
69
|
-
|
|
176
|
+
if timestamps_supported:
|
|
177
|
+
# Send message with type 0 to indicate data.
|
|
178
|
+
message_type_bytes = struct.pack(
|
|
179
|
+
'!B', KubernetesSSHMessageType.REGULAR_DATA.value)
|
|
180
|
+
data = message_type_bytes + data
|
|
181
|
+
async with websocket_lock:
|
|
182
|
+
await websocket.send(data)
|
|
183
|
+
|
|
70
184
|
except Exception as e: # pylint: disable=broad-except
|
|
71
185
|
print(f'Error in stdin_to_websocket: {e}', file=sys.stderr)
|
|
72
186
|
finally:
|
|
73
|
-
|
|
187
|
+
async with websocket_lock:
|
|
188
|
+
await websocket.close()
|
|
189
|
+
websocket_closed_event.set()
|
|
74
190
|
|
|
75
191
|
|
|
76
|
-
async def websocket_to_stdout(websocket
|
|
192
|
+
async def websocket_to_stdout(websocket: ClientConnection,
|
|
193
|
+
writer: asyncio.StreamWriter,
|
|
194
|
+
timestamps_supported: bool,
|
|
195
|
+
last_ping_time_dict: Optional[dict],
|
|
196
|
+
websocket_closed_event: asyncio.Event,
|
|
197
|
+
websocket_lock: asyncio.Lock):
|
|
77
198
|
try:
|
|
78
|
-
while
|
|
199
|
+
while not websocket_closed_event.is_set():
|
|
79
200
|
message = await websocket.recv()
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
201
|
+
if (timestamps_supported and len(message) > 0 and
|
|
202
|
+
last_ping_time_dict is not None):
|
|
203
|
+
message_type = struct.unpack('!B', message[:1])[0]
|
|
204
|
+
if message_type == KubernetesSSHMessageType.REGULAR_DATA.value:
|
|
205
|
+
# Regular data - strip type byte and write to stdout
|
|
206
|
+
message = message[1:]
|
|
207
|
+
elif message_type == KubernetesSSHMessageType.PINGPONG.value:
|
|
208
|
+
# PONG response - calculate latency and send measurement
|
|
209
|
+
if not len(message) == struct.calcsize('!BI'):
|
|
210
|
+
raise ValueError(
|
|
211
|
+
f'Invalid PONG message length: {len(message)}')
|
|
212
|
+
pong_id = struct.unpack('!I', message[1:5])[0]
|
|
213
|
+
pong_time = time.time()
|
|
214
|
+
|
|
215
|
+
ping_time = last_ping_time_dict.pop(pong_id, None)
|
|
216
|
+
|
|
217
|
+
if ping_time is None:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
latency_seconds = pong_time - ping_time
|
|
221
|
+
latency_ms = int(latency_seconds * 1000)
|
|
222
|
+
|
|
223
|
+
# Send latency measurement (type 2)
|
|
224
|
+
message_type_bytes = struct.pack(
|
|
225
|
+
'!B',
|
|
226
|
+
KubernetesSSHMessageType.LATENCY_MEASUREMENT.value)
|
|
227
|
+
latency_bytes = struct.pack('!Q', latency_ms)
|
|
228
|
+
message = message_type_bytes + latency_bytes
|
|
229
|
+
# Send to server.
|
|
230
|
+
async with websocket_lock:
|
|
231
|
+
await websocket.send(message)
|
|
232
|
+
continue
|
|
233
|
+
# No timestamps support, write directly
|
|
234
|
+
writer.write(message)
|
|
235
|
+
await writer.drain()
|
|
83
236
|
except websockets.exceptions.ConnectionClosed:
|
|
84
237
|
print('WebSocket connection closed', file=sys.stderr)
|
|
85
238
|
except Exception as e: # pylint: disable=broad-except
|
|
86
239
|
print(f'Error in websocket_to_stdout: {e}', file=sys.stderr)
|
|
240
|
+
raise e
|
|
241
|
+
finally:
|
|
242
|
+
async with websocket_lock:
|
|
243
|
+
await websocket.close()
|
|
244
|
+
websocket_closed_event.set()
|
|
87
245
|
|
|
88
246
|
|
|
89
247
|
if __name__ == '__main__':
|
|
90
248
|
server_url = sys.argv[1].strip('/')
|
|
91
|
-
if '://' not in server_url:
|
|
92
|
-
# Keep backward compatibility for legacy server URLs without protocol
|
|
93
|
-
# TODO(aylei): Remove this after 0.10.0
|
|
94
|
-
server_url = f'http://{server_url}'
|
|
95
249
|
|
|
250
|
+
disable_latency_measurement = os.environ.get(
|
|
251
|
+
skylet_constants.SSH_DISABLE_LATENCY_MEASUREMENT_ENV_VAR, '0') == '1'
|
|
252
|
+
if disable_latency_measurement:
|
|
253
|
+
timestamps_are_supported = False
|
|
254
|
+
else:
|
|
255
|
+
# TODO(aylei): remove the separate /api/health call and use the header
|
|
256
|
+
# during websocket handshake to determine the server version.
|
|
257
|
+
health_url = f'{server_url}/api/health'
|
|
258
|
+
cookie_hdr = _get_cookie_header(health_url)
|
|
259
|
+
health_response = requests.get(health_url, headers=cookie_hdr)
|
|
260
|
+
health_data = health_response.json()
|
|
261
|
+
timestamps_are_supported = int(health_data.get('api_version', 0)) > 21
|
|
262
|
+
|
|
263
|
+
# Capture the original API server URL for login hint if authentication
|
|
264
|
+
# is required.
|
|
265
|
+
_login_url = server_url
|
|
96
266
|
server_proto, server_fqdn = server_url.split('://')
|
|
97
267
|
websocket_proto = 'ws'
|
|
98
268
|
if server_proto == 'https':
|
|
99
269
|
websocket_proto = 'wss'
|
|
100
270
|
server_url = f'{websocket_proto}://{server_fqdn}'
|
|
271
|
+
|
|
272
|
+
client_version_str = (f'&client_version={constants.API_VERSION}'
|
|
273
|
+
if timestamps_are_supported else '')
|
|
274
|
+
|
|
101
275
|
websocket_url = (f'{server_url}/kubernetes-pod-ssh-proxy'
|
|
102
|
-
f'?cluster_name={sys.argv[2]}'
|
|
103
|
-
|
|
276
|
+
f'?cluster_name={sys.argv[2]}'
|
|
277
|
+
f'{client_version_str}')
|
|
278
|
+
asyncio.run(main(websocket_url, timestamps_are_supported, _login_url))
|
sky/usage/usage_lib.py
CHANGED
|
@@ -10,13 +10,17 @@ import traceback
|
|
|
10
10
|
import typing
|
|
11
11
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
12
12
|
|
|
13
|
+
from typing_extensions import ParamSpec
|
|
14
|
+
|
|
13
15
|
import sky
|
|
14
16
|
from sky import sky_logging
|
|
17
|
+
from sky import skypilot_config
|
|
15
18
|
from sky.adaptors import common as adaptors_common
|
|
16
19
|
from sky.usage import constants
|
|
17
20
|
from sky.utils import common_utils
|
|
18
21
|
from sky.utils import env_options
|
|
19
22
|
from sky.utils import ux_utils
|
|
23
|
+
from sky.utils import yaml_utils
|
|
20
24
|
|
|
21
25
|
if typing.TYPE_CHECKING:
|
|
22
26
|
import inspect
|
|
@@ -164,6 +168,7 @@ class UsageMessageToReport(MessageToReport):
|
|
|
164
168
|
self.runtimes: Dict[str, float] = {} # update_runtime
|
|
165
169
|
self.exception: Optional[str] = None # entrypoint_context
|
|
166
170
|
self.stacktrace: Optional[str] = None # entrypoint_context
|
|
171
|
+
self.skypilot_config: Optional[Dict[str, Any]] = None
|
|
167
172
|
|
|
168
173
|
# Whether API server is deployed remotely.
|
|
169
174
|
self.using_remote_api_server: bool = (
|
|
@@ -174,6 +179,7 @@ class UsageMessageToReport(MessageToReport):
|
|
|
174
179
|
self.client_entrypoint = common_utils.get_current_client_entrypoint(
|
|
175
180
|
msg)
|
|
176
181
|
self.entrypoint = msg
|
|
182
|
+
self.skypilot_config = dict(skypilot_config.to_dict())
|
|
177
183
|
|
|
178
184
|
def set_internal(self):
|
|
179
185
|
self.internal = True
|
|
@@ -205,8 +211,8 @@ class UsageMessageToReport(MessageToReport):
|
|
|
205
211
|
logger.debug('Multiple accelerators are not supported: '
|
|
206
212
|
f'{resources.accelerators}.')
|
|
207
213
|
self.task_accelerators = list(resources.accelerators.keys())[0]
|
|
208
|
-
self.task_num_accelerators =
|
|
209
|
-
self.task_accelerators]
|
|
214
|
+
self.task_num_accelerators = int(
|
|
215
|
+
resources.accelerators[self.task_accelerators])
|
|
210
216
|
else:
|
|
211
217
|
self.task_accelerators = None
|
|
212
218
|
self.task_num_accelerators = None
|
|
@@ -245,7 +251,8 @@ class UsageMessageToReport(MessageToReport):
|
|
|
245
251
|
logger.debug('Multiple accelerators are not supported: '
|
|
246
252
|
f'{resources.accelerators}.')
|
|
247
253
|
self.accelerators = list(resources.accelerators.keys())[0]
|
|
248
|
-
self.num_accelerators =
|
|
254
|
+
self.num_accelerators = int(
|
|
255
|
+
resources.accelerators[self.accelerators])
|
|
249
256
|
else:
|
|
250
257
|
self.accelerators = None
|
|
251
258
|
self.num_accelerators = None
|
|
@@ -309,21 +316,30 @@ class MessageCollection:
|
|
|
309
316
|
"""A collection of messages."""
|
|
310
317
|
|
|
311
318
|
def __init__(self):
|
|
312
|
-
self._messages = {
|
|
319
|
+
self._messages: Dict[MessageType, MessageToReport] = {
|
|
313
320
|
MessageType.USAGE: UsageMessageToReport(),
|
|
314
321
|
MessageType.HEARTBEAT: HeartbeatMessageToReport()
|
|
315
322
|
}
|
|
316
323
|
|
|
317
324
|
@property
|
|
318
325
|
def usage(self) -> UsageMessageToReport:
|
|
319
|
-
|
|
326
|
+
msg = self._messages[MessageType.USAGE]
|
|
327
|
+
assert isinstance(msg, UsageMessageToReport)
|
|
328
|
+
return msg
|
|
320
329
|
|
|
321
330
|
@property
|
|
322
331
|
def heartbeat(self) -> HeartbeatMessageToReport:
|
|
323
|
-
|
|
332
|
+
msg = self._messages[MessageType.HEARTBEAT]
|
|
333
|
+
assert isinstance(msg, HeartbeatMessageToReport)
|
|
334
|
+
return msg
|
|
324
335
|
|
|
325
336
|
def reset(self, message_type: MessageType):
|
|
326
|
-
|
|
337
|
+
if message_type == MessageType.USAGE:
|
|
338
|
+
self._messages[message_type] = UsageMessageToReport()
|
|
339
|
+
elif message_type == MessageType.HEARTBEAT:
|
|
340
|
+
self._messages[message_type] = HeartbeatMessageToReport()
|
|
341
|
+
else:
|
|
342
|
+
raise ValueError(f'Unknown message type: {message_type}')
|
|
327
343
|
|
|
328
344
|
def __getitem__(self, key):
|
|
329
345
|
return self._messages[key]
|
|
@@ -399,7 +415,7 @@ def _clean_yaml(yaml_info: Dict[str, Optional[str]]):
|
|
|
399
415
|
contents = inspect.getsource(contents)
|
|
400
416
|
|
|
401
417
|
if type(contents) in constants.USAGE_MESSAGE_REDACT_TYPES:
|
|
402
|
-
lines =
|
|
418
|
+
lines = yaml_utils.dump_yaml_str({
|
|
403
419
|
redact_type: contents
|
|
404
420
|
}).strip().split('\n')
|
|
405
421
|
message = (f'{len(lines)} lines {redact_type.upper()}'
|
|
@@ -428,7 +444,7 @@ def prepare_json_from_yaml_config(
|
|
|
428
444
|
with open(yaml_config_or_path, 'r', encoding='utf-8') as f:
|
|
429
445
|
lines = f.readlines()
|
|
430
446
|
comment_lines = [line for line in lines if line.startswith('#')]
|
|
431
|
-
yaml_info =
|
|
447
|
+
yaml_info = yaml_utils.read_yaml_all(yaml_config_or_path)
|
|
432
448
|
|
|
433
449
|
for i in range(len(yaml_info)):
|
|
434
450
|
if yaml_info[i] is None:
|
|
@@ -516,26 +532,26 @@ def entrypoint_context(name: str, fallback: bool = False):
|
|
|
516
532
|
|
|
517
533
|
|
|
518
534
|
T = typing.TypeVar('T')
|
|
535
|
+
P = ParamSpec('P')
|
|
519
536
|
|
|
520
537
|
|
|
521
538
|
@typing.overload
|
|
522
539
|
def entrypoint(
|
|
523
540
|
name_or_fn: str,
|
|
524
|
-
fallback: bool = False
|
|
525
|
-
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
541
|
+
fallback: bool = False) -> Callable[[Callable[P, T]], Callable[P, T]]:
|
|
526
542
|
...
|
|
527
543
|
|
|
528
544
|
|
|
529
545
|
@typing.overload
|
|
530
|
-
def entrypoint(name_or_fn: Callable[
|
|
531
|
-
fallback: bool = False) -> Callable[
|
|
546
|
+
def entrypoint(name_or_fn: Callable[P, T],
|
|
547
|
+
fallback: bool = False) -> Callable[P, T]:
|
|
532
548
|
...
|
|
533
549
|
|
|
534
550
|
|
|
535
551
|
def entrypoint(
|
|
536
|
-
name_or_fn: Union[str, Callable[
|
|
552
|
+
name_or_fn: Union[str, Callable[P, T]],
|
|
537
553
|
fallback: bool = False
|
|
538
|
-
) -> Union[Callable[
|
|
554
|
+
) -> Union[Callable[P, T], Callable[[Callable[P, T]], Callable[P, T]]]:
|
|
539
555
|
return common_utils.make_decorator(entrypoint_context,
|
|
540
556
|
name_or_fn,
|
|
541
557
|
fallback=fallback)
|
sky/users/__init__.py
ADDED
|
File without changes
|
sky/users/model.conf
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# rbac_model.conf
|
|
2
|
+
[request_definition]
|
|
3
|
+
r = sub, obj, act
|
|
4
|
+
|
|
5
|
+
[policy_definition]
|
|
6
|
+
p = sub, obj, act
|
|
7
|
+
|
|
8
|
+
[role_definition]
|
|
9
|
+
g = _, _
|
|
10
|
+
|
|
11
|
+
[policy_effect]
|
|
12
|
+
e = some(where (p.eft == allow))
|
|
13
|
+
|
|
14
|
+
[matchers]
|
|
15
|
+
m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj && r.act == p.act
|