skypilot-nightly 1.0.0.dev20251203__py3-none-any.whl → 1.0.0.dev20260112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +6 -2
- sky/adaptors/aws.py +1 -61
- sky/adaptors/slurm.py +565 -0
- sky/backends/backend_utils.py +95 -12
- sky/backends/cloud_vm_ray_backend.py +224 -65
- sky/backends/task_codegen.py +380 -4
- sky/catalog/__init__.py +0 -3
- sky/catalog/data_fetchers/fetch_gcp.py +9 -1
- sky/catalog/data_fetchers/fetch_nebius.py +1 -1
- sky/catalog/data_fetchers/fetch_vast.py +4 -2
- sky/catalog/kubernetes_catalog.py +12 -4
- sky/catalog/seeweb_catalog.py +30 -15
- sky/catalog/shadeform_catalog.py +5 -2
- sky/catalog/slurm_catalog.py +236 -0
- sky/catalog/vast_catalog.py +30 -6
- sky/check.py +25 -11
- sky/client/cli/command.py +391 -32
- sky/client/interactive_utils.py +190 -0
- sky/client/sdk.py +64 -2
- sky/client/sdk_async.py +9 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/aws.py +60 -2
- sky/clouds/azure.py +2 -0
- sky/clouds/cloud.py +7 -0
- sky/clouds/kubernetes.py +2 -0
- sky/clouds/runpod.py +38 -7
- sky/clouds/slurm.py +610 -0
- sky/clouds/ssh.py +3 -2
- sky/clouds/vast.py +39 -16
- sky/core.py +197 -37
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/3nu-b8raeKRNABZ2d4GAG/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-0565f8975a7dcd10.js +6 -0
- sky/dashboard/out/_next/static/chunks/2109-55a1546d793574a7.js +11 -0
- sky/dashboard/out/_next/static/chunks/2521-099b07cd9e4745bf.js +26 -0
- sky/dashboard/out/_next/static/chunks/2755.a636e04a928a700e.js +31 -0
- sky/dashboard/out/_next/static/chunks/3495.05eab4862217c1a5.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.cfc5dcc9434fd98c.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
- sky/dashboard/out/_next/static/chunks/3981.645d01bf9c8cad0c.js +21 -0
- sky/dashboard/out/_next/static/chunks/4083-0115d67c1fb57d6c.js +21 -0
- sky/dashboard/out/_next/static/chunks/{8640.5b9475a2d18c5416.js → 429.a58e9ba9742309ed.js} +2 -2
- sky/dashboard/out/_next/static/chunks/4555.8e221537181b5dc1.js +6 -0
- sky/dashboard/out/_next/static/chunks/4725.937865b81fdaaebb.js +6 -0
- sky/dashboard/out/_next/static/chunks/6082-edabd8f6092300ce.js +25 -0
- sky/dashboard/out/_next/static/chunks/6989-49cb7dca83a7a62d.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-630bd2a2257275f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/7248-a99800d4db8edabd.js +1 -0
- sky/dashboard/out/_next/static/chunks/754-cfc5d4ad1b843d29.js +18 -0
- sky/dashboard/out/_next/static/chunks/8050-dd8aa107b17dce00.js +16 -0
- sky/dashboard/out/_next/static/chunks/8056-d4ae1e0cb81e7368.js +1 -0
- sky/dashboard/out/_next/static/chunks/8555.011023e296c127b3.js +6 -0
- sky/dashboard/out/_next/static/chunks/8821-93c25df904a8362b.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-0662594b69432ade.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.f15c91c97d124a5f.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-7ad6bd01858556f1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-5a86569acad99764.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8297476714acb4ac.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-337c3ba1085f1210.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-ee39056f9851a3ff.js → clusters-57632ff3684a8b5c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-5fd3a453c079c2ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9f85c02c9c6cae9e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90f16972cbecf354.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-2dd42fc37aad427a.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ed806aeace26b972.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-449a9f5a3bb20fb3.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-bec34706b36f3524.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{volumes-b84b948ff357c43e.js → volumes-a83ba9b38dff7ea9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-84a40f8c7c627fe4.js → [name]-c781e9c3e52ef9fc.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-91e0942f47310aae.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-cfe59cf684ee13b9.js +1 -0
- sky/dashboard/out/_next/static/css/b0dbca28f027cc19.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -0
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +26 -12
- sky/data/mounting_utils.py +44 -5
- sky/global_user_state.py +111 -19
- sky/jobs/client/sdk.py +8 -3
- sky/jobs/controller.py +191 -31
- sky/jobs/recovery_strategy.py +109 -11
- sky/jobs/server/core.py +81 -4
- sky/jobs/server/server.py +14 -0
- sky/jobs/state.py +417 -19
- sky/jobs/utils.py +73 -80
- sky/models.py +11 -0
- sky/optimizer.py +8 -6
- sky/provision/__init__.py +12 -9
- sky/provision/common.py +20 -0
- sky/provision/docker_utils.py +15 -2
- sky/provision/kubernetes/utils.py +163 -20
- sky/provision/kubernetes/volume.py +52 -17
- sky/provision/provisioner.py +17 -7
- sky/provision/runpod/instance.py +3 -1
- sky/provision/runpod/utils.py +13 -1
- sky/provision/runpod/volume.py +25 -9
- sky/provision/slurm/__init__.py +12 -0
- sky/provision/slurm/config.py +13 -0
- sky/provision/slurm/instance.py +618 -0
- sky/provision/slurm/utils.py +689 -0
- sky/provision/vast/instance.py +4 -1
- sky/provision/vast/utils.py +11 -6
- sky/resources.py +135 -13
- sky/schemas/api/responses.py +4 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +1 -1
- sky/schemas/db/spot_jobs/008_add_full_resources.py +34 -0
- sky/schemas/db/spot_jobs/009_job_events.py +32 -0
- sky/schemas/db/spot_jobs/010_job_events_timestamp_with_timezone.py +43 -0
- sky/schemas/db/spot_jobs/011_add_links.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +9 -5
- sky/schemas/generated/jobsv1_pb2.pyi +12 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +44 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +32 -28
- sky/schemas/generated/managed_jobsv1_pb2.pyi +11 -2
- sky/serve/serve_utils.py +232 -40
- sky/serve/server/impl.py +1 -1
- sky/server/common.py +17 -0
- sky/server/constants.py +1 -1
- sky/server/metrics.py +6 -3
- sky/server/plugins.py +238 -0
- sky/server/requests/executor.py +5 -2
- sky/server/requests/payloads.py +30 -1
- sky/server/requests/request_names.py +4 -0
- sky/server/requests/requests.py +33 -11
- sky/server/requests/serializers/encoders.py +22 -0
- sky/server/requests/serializers/return_value_serializers.py +70 -0
- sky/server/server.py +506 -109
- sky/server/server_utils.py +30 -0
- sky/server/uvicorn.py +5 -0
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +22 -9
- sky/sky_logging.py +2 -1
- sky/skylet/attempt_skylet.py +13 -3
- sky/skylet/constants.py +55 -13
- sky/skylet/events.py +10 -4
- sky/skylet/executor/__init__.py +1 -0
- sky/skylet/executor/slurm.py +187 -0
- sky/skylet/job_lib.py +91 -5
- sky/skylet/log_lib.py +22 -6
- sky/skylet/log_lib.pyi +8 -6
- sky/skylet/services.py +18 -3
- sky/skylet/skylet.py +5 -1
- sky/skylet/subprocess_daemon.py +2 -1
- sky/ssh_node_pools/constants.py +12 -0
- sky/ssh_node_pools/core.py +40 -3
- sky/ssh_node_pools/deploy/__init__.py +4 -0
- sky/{utils/kubernetes/deploy_ssh_node_pools.py → ssh_node_pools/deploy/deploy.py} +279 -504
- sky/ssh_node_pools/deploy/tunnel/ssh-tunnel.sh +379 -0
- sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
- sky/ssh_node_pools/deploy/utils.py +173 -0
- sky/ssh_node_pools/server.py +11 -13
- sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
- sky/templates/kubernetes-ray.yml.j2 +12 -6
- sky/templates/slurm-ray.yml.j2 +115 -0
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +18 -41
- sky/users/model.conf +1 -1
- sky/users/permission.py +85 -52
- sky/users/rbac.py +31 -3
- sky/utils/annotations.py +108 -8
- sky/utils/auth_utils.py +42 -0
- sky/utils/cli_utils/status_utils.py +19 -5
- sky/utils/cluster_utils.py +10 -3
- sky/utils/command_runner.py +389 -35
- sky/utils/command_runner.pyi +43 -4
- sky/utils/common_utils.py +47 -31
- sky/utils/context.py +32 -0
- sky/utils/db/db_utils.py +36 -6
- sky/utils/db/migration_utils.py +41 -21
- sky/utils/infra_utils.py +5 -1
- sky/utils/instance_links.py +139 -0
- sky/utils/interactive_utils.py +49 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +42 -33
- sky/utils/kubernetes/kubernetes_deploy_utils.py +2 -94
- sky/utils/kubernetes/rsync_helper.sh +5 -1
- sky/utils/kubernetes/ssh-tunnel.sh +7 -376
- sky/utils/plugin_extensions/__init__.py +14 -0
- sky/utils/plugin_extensions/external_failure_source.py +176 -0
- sky/utils/resources_utils.py +10 -8
- sky/utils/rich_utils.py +9 -11
- sky/utils/schemas.py +93 -19
- sky/utils/status_lib.py +7 -0
- sky/utils/subprocess_utils.py +17 -0
- sky/volumes/client/sdk.py +6 -3
- sky/volumes/server/core.py +65 -27
- sky_templates/ray/start_cluster +8 -4
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/METADATA +67 -59
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/RECORD +208 -180
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +0 -11
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +0 -6
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +0 -26
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +0 -1
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +0 -1
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +0 -1
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +0 -1
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +0 -30
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +0 -41
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +0 -6
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +0 -1
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +0 -31
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +0 -30
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +0 -21
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +0 -1
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +0 -3
- /sky/dashboard/out/_next/static/{96_E2yl3QAiIJGOYCkSpB → 3nu-b8raeKRNABZ2d4GAG}/_ssgManifest.js +0 -0
- /sky/{utils/kubernetes → ssh_node_pools/deploy/tunnel}/cleanup-tunnel.sh +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/top_level.txt +0 -0
sky/utils/command_runner.pyi
CHANGED
|
@@ -6,7 +6,7 @@ determine the return type based on the value of require_outputs.
|
|
|
6
6
|
"""
|
|
7
7
|
import enum
|
|
8
8
|
import typing
|
|
9
|
-
from typing import Any, Iterable, List, Optional, Tuple, Union
|
|
9
|
+
from typing import Any, Callable, Iterable, List, Optional, Tuple, Union
|
|
10
10
|
|
|
11
11
|
from typing_extensions import Literal
|
|
12
12
|
|
|
@@ -27,6 +27,7 @@ def ssh_options_list(
|
|
|
27
27
|
ssh_control_name: Optional[str],
|
|
28
28
|
*,
|
|
29
29
|
ssh_proxy_command: Optional[str] = ...,
|
|
30
|
+
ssh_proxy_jump: Optional[str] = ...,
|
|
30
31
|
docker_ssh_proxy_command: Optional[str] = ...,
|
|
31
32
|
timeout: int = ...,
|
|
32
33
|
port: int = ...,
|
|
@@ -63,6 +64,7 @@ class CommandRunner:
|
|
|
63
64
|
connect_timeout: Optional[int] = ...,
|
|
64
65
|
source_bashrc: bool = ...,
|
|
65
66
|
skip_lines: int = ...,
|
|
67
|
+
run_in_background: bool = ...,
|
|
66
68
|
**kwargs) -> int:
|
|
67
69
|
...
|
|
68
70
|
|
|
@@ -78,6 +80,7 @@ class CommandRunner:
|
|
|
78
80
|
connect_timeout: Optional[int] = ...,
|
|
79
81
|
source_bashrc: bool = ...,
|
|
80
82
|
skip_lines: int = ...,
|
|
83
|
+
run_in_background: bool = ...,
|
|
81
84
|
**kwargs) -> Tuple[int, str, str]:
|
|
82
85
|
...
|
|
83
86
|
|
|
@@ -93,6 +96,7 @@ class CommandRunner:
|
|
|
93
96
|
connect_timeout: Optional[int] = ...,
|
|
94
97
|
source_bashrc: bool = ...,
|
|
95
98
|
skip_lines: int = ...,
|
|
99
|
+
run_in_background: bool = ...,
|
|
96
100
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
97
101
|
...
|
|
98
102
|
|
|
@@ -130,22 +134,25 @@ class SSHCommandRunner(CommandRunner):
|
|
|
130
134
|
ip: str
|
|
131
135
|
port: int
|
|
132
136
|
ssh_user: str
|
|
133
|
-
ssh_private_key: str
|
|
137
|
+
ssh_private_key: Optional[str]
|
|
134
138
|
ssh_control_name: Optional[str]
|
|
135
139
|
docker_user: str
|
|
136
140
|
disable_control_master: Optional[bool]
|
|
137
141
|
port_forward_execute_remote_command: Optional[bool]
|
|
142
|
+
enable_interactive_auth: bool
|
|
138
143
|
|
|
139
144
|
def __init__(
|
|
140
145
|
self,
|
|
141
146
|
node: Tuple[str, int],
|
|
142
147
|
ssh_user: str,
|
|
143
|
-
ssh_private_key: str,
|
|
148
|
+
ssh_private_key: Optional[str],
|
|
144
149
|
ssh_control_name: Optional[str] = ...,
|
|
145
150
|
ssh_proxy_command: Optional[str] = ...,
|
|
151
|
+
ssh_proxy_jump: Optional[str] = ...,
|
|
146
152
|
docker_user: Optional[str] = ...,
|
|
147
153
|
disable_control_master: Optional[bool] = ...,
|
|
148
154
|
port_forward_execute_remote_command: Optional[bool] = ...,
|
|
155
|
+
enable_interactive_auth: bool = ...,
|
|
149
156
|
) -> None:
|
|
150
157
|
...
|
|
151
158
|
|
|
@@ -163,6 +170,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
163
170
|
connect_timeout: Optional[int] = ...,
|
|
164
171
|
source_bashrc: bool = ...,
|
|
165
172
|
skip_lines: int = ...,
|
|
173
|
+
run_in_background: bool = ...,
|
|
166
174
|
**kwargs) -> int:
|
|
167
175
|
...
|
|
168
176
|
|
|
@@ -180,6 +188,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
180
188
|
connect_timeout: Optional[int] = ...,
|
|
181
189
|
source_bashrc: bool = ...,
|
|
182
190
|
skip_lines: int = ...,
|
|
191
|
+
run_in_background: bool = ...,
|
|
183
192
|
**kwargs) -> Tuple[int, str, str]:
|
|
184
193
|
...
|
|
185
194
|
|
|
@@ -197,6 +206,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
197
206
|
connect_timeout: Optional[int] = ...,
|
|
198
207
|
source_bashrc: bool = ...,
|
|
199
208
|
skip_lines: int = ...,
|
|
209
|
+
run_in_background: bool = ...,
|
|
200
210
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
201
211
|
...
|
|
202
212
|
|
|
@@ -216,7 +226,8 @@ class SSHCommandRunner(CommandRunner):
|
|
|
216
226
|
up: bool,
|
|
217
227
|
log_path: str = ...,
|
|
218
228
|
stream_logs: bool = ...,
|
|
219
|
-
max_retry: int =
|
|
229
|
+
max_retry: int = ...,
|
|
230
|
+
get_remote_home_dir: Callable[[], str] = ...) -> None:
|
|
220
231
|
...
|
|
221
232
|
|
|
222
233
|
def port_forward_command(
|
|
@@ -251,6 +262,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
251
262
|
connect_timeout: Optional[int] = ...,
|
|
252
263
|
source_bashrc: bool = ...,
|
|
253
264
|
skip_lines: int = ...,
|
|
265
|
+
run_in_background: bool = ...,
|
|
254
266
|
**kwargs) -> int:
|
|
255
267
|
...
|
|
256
268
|
|
|
@@ -268,6 +280,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
268
280
|
connect_timeout: Optional[int] = ...,
|
|
269
281
|
source_bashrc: bool = ...,
|
|
270
282
|
skip_lines: int = ...,
|
|
283
|
+
run_in_background: bool = ...,
|
|
271
284
|
**kwargs) -> Tuple[int, str, str]:
|
|
272
285
|
...
|
|
273
286
|
|
|
@@ -285,6 +298,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
285
298
|
connect_timeout: Optional[int] = ...,
|
|
286
299
|
source_bashrc: bool = ...,
|
|
287
300
|
skip_lines: int = ...,
|
|
301
|
+
run_in_background: bool = ...,
|
|
288
302
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
289
303
|
...
|
|
290
304
|
|
|
@@ -306,6 +320,28 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
306
320
|
...
|
|
307
321
|
|
|
308
322
|
|
|
323
|
+
class SlurmCommandRunner(SSHCommandRunner):
|
|
324
|
+
"""Runner for Slurm commands."""
|
|
325
|
+
sky_dir: str
|
|
326
|
+
skypilot_runtime_dir: str
|
|
327
|
+
job_id: str
|
|
328
|
+
slurm_node: str
|
|
329
|
+
|
|
330
|
+
def __init__(
|
|
331
|
+
self,
|
|
332
|
+
node: Tuple[str, int],
|
|
333
|
+
ssh_user: str,
|
|
334
|
+
ssh_private_key: Optional[str],
|
|
335
|
+
*,
|
|
336
|
+
sky_dir: str,
|
|
337
|
+
skypilot_runtime_dir: str,
|
|
338
|
+
job_id: str,
|
|
339
|
+
slurm_node: str,
|
|
340
|
+
**kwargs,
|
|
341
|
+
) -> None:
|
|
342
|
+
...
|
|
343
|
+
|
|
344
|
+
|
|
309
345
|
class LocalProcessCommandRunner(CommandRunner):
|
|
310
346
|
|
|
311
347
|
def __init__(self) -> None:
|
|
@@ -325,6 +361,7 @@ class LocalProcessCommandRunner(CommandRunner):
|
|
|
325
361
|
connect_timeout: Optional[int] = ...,
|
|
326
362
|
source_bashrc: bool = ...,
|
|
327
363
|
skip_lines: int = ...,
|
|
364
|
+
run_in_background: bool = ...,
|
|
328
365
|
**kwargs) -> int:
|
|
329
366
|
...
|
|
330
367
|
|
|
@@ -342,6 +379,7 @@ class LocalProcessCommandRunner(CommandRunner):
|
|
|
342
379
|
connect_timeout: Optional[int] = ...,
|
|
343
380
|
source_bashrc: bool = ...,
|
|
344
381
|
skip_lines: int = ...,
|
|
382
|
+
run_in_background: bool = ...,
|
|
345
383
|
**kwargs) -> Tuple[int, str, str]:
|
|
346
384
|
...
|
|
347
385
|
|
|
@@ -359,5 +397,6 @@ class LocalProcessCommandRunner(CommandRunner):
|
|
|
359
397
|
connect_timeout: Optional[int] = ...,
|
|
360
398
|
source_bashrc: bool = ...,
|
|
361
399
|
skip_lines: int = ...,
|
|
400
|
+
run_in_background: bool = ...,
|
|
362
401
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
363
402
|
...
|
sky/utils/common_utils.py
CHANGED
|
@@ -29,6 +29,7 @@ from sky.adaptors import common as adaptors_common
|
|
|
29
29
|
from sky.skylet import constants
|
|
30
30
|
from sky.usage import constants as usage_constants
|
|
31
31
|
from sky.utils import annotations
|
|
32
|
+
from sky.utils import context
|
|
32
33
|
from sky.utils import ux_utils
|
|
33
34
|
from sky.utils import validator
|
|
34
35
|
|
|
@@ -293,11 +294,11 @@ class Backoff:
|
|
|
293
294
|
return self._backoff
|
|
294
295
|
|
|
295
296
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
297
|
+
_CLIENT_COMMAND_KEY = 'client_command'
|
|
298
|
+
_CLIENT_ENTRYPOINT_KEY = 'client_entrypoint'
|
|
299
|
+
_USING_REMOTE_API_SERVER_KEY = 'using_remote_api_server'
|
|
300
|
+
_USER_KEY = 'user'
|
|
301
|
+
_REQUEST_ID_KEY = 'request_id'
|
|
301
302
|
|
|
302
303
|
|
|
303
304
|
def set_request_context(client_entrypoint: Optional[str],
|
|
@@ -309,22 +310,21 @@ def set_request_context(client_entrypoint: Optional[str],
|
|
|
309
310
|
This is useful when we are on the SkyPilot API server side and we have a
|
|
310
311
|
client entrypoint and command from the client.
|
|
311
312
|
"""
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
_current_user = user
|
|
321
|
-
_current_request_id = request_id
|
|
313
|
+
# This function will be called in process executor and coroutine executor.
|
|
314
|
+
# context.set_context_var ensures the context is safe in both cases.
|
|
315
|
+
context.set_context_var(_CLIENT_ENTRYPOINT_KEY, client_entrypoint)
|
|
316
|
+
context.set_context_var(_CLIENT_COMMAND_KEY, client_command)
|
|
317
|
+
context.set_context_var(_USING_REMOTE_API_SERVER_KEY,
|
|
318
|
+
using_remote_api_server)
|
|
319
|
+
context.set_context_var(_USER_KEY, user)
|
|
320
|
+
context.set_context_var(_REQUEST_ID_KEY, request_id)
|
|
322
321
|
|
|
323
322
|
|
|
324
323
|
def get_current_request_id() -> str:
|
|
325
324
|
"""Returns the current request id."""
|
|
326
|
-
|
|
327
|
-
|
|
325
|
+
value = context.get_context_var('request_id')
|
|
326
|
+
if value is not None:
|
|
327
|
+
return value
|
|
328
328
|
return 'dummy-request-id'
|
|
329
329
|
|
|
330
330
|
|
|
@@ -334,30 +334,43 @@ def get_current_command() -> str:
|
|
|
334
334
|
Normally uses get_pretty_entry_point(), but will use the client command on
|
|
335
335
|
the server side.
|
|
336
336
|
"""
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
337
|
+
value = context.get_context_var(_CLIENT_COMMAND_KEY)
|
|
338
|
+
if value is not None:
|
|
339
|
+
return value
|
|
340
340
|
return get_pretty_entrypoint_cmd()
|
|
341
341
|
|
|
342
342
|
|
|
343
343
|
def get_current_user() -> 'models.User':
|
|
344
|
-
"""Returns the current
|
|
345
|
-
|
|
346
|
-
|
|
344
|
+
"""Returns the user in current server session."""
|
|
345
|
+
value = context.get_context_var(_USER_KEY)
|
|
346
|
+
if value is not None:
|
|
347
|
+
return value
|
|
347
348
|
return models.User.get_current_user()
|
|
348
349
|
|
|
349
350
|
|
|
350
351
|
def get_current_user_name() -> str:
|
|
351
|
-
"""Returns the current
|
|
352
|
+
"""Returns the user name in current server session."""
|
|
352
353
|
name = get_current_user().name
|
|
353
354
|
assert name is not None
|
|
354
355
|
return name
|
|
355
356
|
|
|
356
357
|
|
|
358
|
+
def get_local_user_name() -> str:
|
|
359
|
+
"""Returns the user name in local environment.
|
|
360
|
+
|
|
361
|
+
This is for backward compatibility where anonymous access is implicitly
|
|
362
|
+
allowed when no authentication method at server-side is configured and
|
|
363
|
+
the username from client environment variable will be used to identify the
|
|
364
|
+
user.
|
|
365
|
+
"""
|
|
366
|
+
name = os.getenv(constants.USER_ENV_VAR, getpass.getuser())
|
|
367
|
+
assert name is not None
|
|
368
|
+
return name
|
|
369
|
+
|
|
370
|
+
|
|
357
371
|
def set_current_user(user: 'models.User'):
|
|
358
372
|
"""Sets the current user."""
|
|
359
|
-
|
|
360
|
-
_current_user = user
|
|
373
|
+
context.set_context_var('user', user)
|
|
361
374
|
|
|
362
375
|
|
|
363
376
|
def get_current_client_entrypoint(server_entrypoint: str) -> str:
|
|
@@ -366,8 +379,9 @@ def get_current_client_entrypoint(server_entrypoint: str) -> str:
|
|
|
366
379
|
Gets the client entrypoint from the context, if it is not set, returns the
|
|
367
380
|
server entrypoint.
|
|
368
381
|
"""
|
|
369
|
-
|
|
370
|
-
|
|
382
|
+
value = context.get_context_var(_CLIENT_ENTRYPOINT_KEY)
|
|
383
|
+
if value is not None:
|
|
384
|
+
return value
|
|
371
385
|
return server_entrypoint
|
|
372
386
|
|
|
373
387
|
|
|
@@ -376,8 +390,9 @@ def get_using_remote_api_server() -> bool:
|
|
|
376
390
|
if os.getenv(constants.USING_REMOTE_API_SERVER_ENV_VAR) is not None:
|
|
377
391
|
return os.getenv(constants.USING_REMOTE_API_SERVER_ENV_VAR,
|
|
378
392
|
'').lower() in ('true', '1')
|
|
379
|
-
|
|
380
|
-
|
|
393
|
+
value = context.get_context_var(_USING_REMOTE_API_SERVER_KEY)
|
|
394
|
+
if value is not None:
|
|
395
|
+
return value
|
|
381
396
|
# This gets the right status for the local client.
|
|
382
397
|
# TODO(zhwu): This is to prevent circular import. We should refactor this.
|
|
383
398
|
# pylint: disable=import-outside-toplevel
|
|
@@ -724,7 +739,8 @@ def find_free_port(start_port: int) -> int:
|
|
|
724
739
|
try:
|
|
725
740
|
s.bind(('', port))
|
|
726
741
|
return port
|
|
727
|
-
except OSError:
|
|
742
|
+
except OSError as e:
|
|
743
|
+
logger.debug(f'Error binding port {port}: {e}')
|
|
728
744
|
pass
|
|
729
745
|
raise OSError('No free ports available.')
|
|
730
746
|
|
sky/utils/context.py
CHANGED
|
@@ -17,6 +17,8 @@ from typing_extensions import ParamSpec
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
from sky.skypilot_config import ConfigContext
|
|
19
19
|
|
|
20
|
+
_PROCESS_GLOBAL_VARS = {}
|
|
21
|
+
|
|
20
22
|
|
|
21
23
|
class SkyPilotContext(object):
|
|
22
24
|
"""SkyPilot typed context vars for threads and coroutines.
|
|
@@ -65,6 +67,8 @@ class SkyPilotContext(object):
|
|
|
65
67
|
self._log_file_handle = None
|
|
66
68
|
self.env_overrides = {}
|
|
67
69
|
self.config_context = None
|
|
70
|
+
self.request_context = None
|
|
71
|
+
self.vars = {}
|
|
68
72
|
|
|
69
73
|
def cancel(self):
|
|
70
74
|
"""Cancel the context."""
|
|
@@ -113,6 +117,12 @@ class SkyPilotContext(object):
|
|
|
113
117
|
self._log_file_handle.close()
|
|
114
118
|
self._log_file_handle = None
|
|
115
119
|
|
|
120
|
+
def set_var(self, key: str, value: Any):
|
|
121
|
+
self.vars[key] = value
|
|
122
|
+
|
|
123
|
+
def get_var(self, key: str) -> Optional[Any]:
|
|
124
|
+
return self.vars.get(key)
|
|
125
|
+
|
|
116
126
|
def __enter__(self):
|
|
117
127
|
return self
|
|
118
128
|
|
|
@@ -150,6 +160,28 @@ def get() -> Optional[SkyPilotContext]:
|
|
|
150
160
|
return _CONTEXT.get()
|
|
151
161
|
|
|
152
162
|
|
|
163
|
+
def set_context_var(key: str, value: Any):
|
|
164
|
+
ctx = get()
|
|
165
|
+
if ctx is not None:
|
|
166
|
+
# Set the var in context
|
|
167
|
+
ctx.set_var(key, value)
|
|
168
|
+
else:
|
|
169
|
+
# Fallback to process-isolated assumption, where we thought
|
|
170
|
+
# modifying process-scope vars is safe.
|
|
171
|
+
_PROCESS_GLOBAL_VARS[key] = value
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_context_var(key: str) -> Any:
|
|
175
|
+
ctx = get()
|
|
176
|
+
if ctx is not None:
|
|
177
|
+
# Use `in` to check for key existence to distinguish
|
|
178
|
+
# "key not found" from "key's value is None".
|
|
179
|
+
if key in ctx.vars:
|
|
180
|
+
return ctx.get_var(key)
|
|
181
|
+
# Fallback to the variable set in process-scope
|
|
182
|
+
return _PROCESS_GLOBAL_VARS.get(key)
|
|
183
|
+
|
|
184
|
+
|
|
153
185
|
class ContextualEnviron(MutableMapping[str, str]):
|
|
154
186
|
"""Environment variables wrapper with contextual overrides.
|
|
155
187
|
|
sky/utils/db/db_utils.py
CHANGED
|
@@ -75,6 +75,18 @@ def safe_cursor(db_path: str):
|
|
|
75
75
|
conn.close()
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
@contextlib.contextmanager
|
|
79
|
+
def safe_cursor_on_connection(conn: 'sqlite3.Connection'):
|
|
80
|
+
"""A auto-committing, auto-closing cursor on an existing connection."""
|
|
81
|
+
# Ensure commit() is called when the context is exited.
|
|
82
|
+
with conn:
|
|
83
|
+
cursor = conn.cursor()
|
|
84
|
+
try:
|
|
85
|
+
yield cursor
|
|
86
|
+
finally:
|
|
87
|
+
cursor.close()
|
|
88
|
+
|
|
89
|
+
|
|
78
90
|
def add_column_to_table(
|
|
79
91
|
cursor: 'sqlite3.Cursor',
|
|
80
92
|
conn: 'sqlite3.Connection',
|
|
@@ -286,6 +298,11 @@ def drop_column_from_table_alembic(
|
|
|
286
298
|
raise
|
|
287
299
|
|
|
288
300
|
|
|
301
|
+
def fault_point():
|
|
302
|
+
"""For test fault injection."""
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
|
|
289
306
|
class SQLiteConn(threading.local):
|
|
290
307
|
"""Thread-local connection to the sqlite3 database."""
|
|
291
308
|
|
|
@@ -345,8 +362,8 @@ class SQLiteConn(threading.local):
|
|
|
345
362
|
|
|
346
363
|
def exec_and_commit(sql: str, parameters: Optional[Iterable[Any]]):
|
|
347
364
|
# pylint: disable=protected-access
|
|
348
|
-
conn._conn
|
|
349
|
-
|
|
365
|
+
with safe_cursor_on_connection(conn._conn) as cursor:
|
|
366
|
+
cursor.execute(sql, parameters)
|
|
350
367
|
|
|
351
368
|
# pylint: disable=protected-access
|
|
352
369
|
await conn._execute(exec_and_commit, sql, parameters)
|
|
@@ -357,7 +374,20 @@ class SQLiteConn(threading.local):
|
|
|
357
374
|
parameters: Optional[Iterable[Any]] = None
|
|
358
375
|
) -> Iterable[sqlite3.Row]:
|
|
359
376
|
conn = await self._get_async_conn()
|
|
360
|
-
|
|
377
|
+
if parameters is None:
|
|
378
|
+
parameters = []
|
|
379
|
+
|
|
380
|
+
def exec_fetch_all(sql: str, parameters: Optional[Iterable[Any]]):
|
|
381
|
+
# pylint: disable=protected-access
|
|
382
|
+
with safe_cursor_on_connection(conn._conn) as cursor:
|
|
383
|
+
cursor.execute(sql, parameters)
|
|
384
|
+
# Note(dev): sqlite3.Connection cannot be patched, keep
|
|
385
|
+
# fault_point here to test the integrity of exec_fetch_all()
|
|
386
|
+
fault_point()
|
|
387
|
+
return cursor.fetchall()
|
|
388
|
+
|
|
389
|
+
# pylint: disable=protected-access
|
|
390
|
+
return await conn._execute(exec_fetch_all, sql, parameters)
|
|
361
391
|
|
|
362
392
|
async def execute_get_returning_value_async(
|
|
363
393
|
self,
|
|
@@ -372,9 +402,9 @@ class SQLiteConn(threading.local):
|
|
|
372
402
|
def exec_and_get_returning_value(sql: str,
|
|
373
403
|
parameters: Optional[Iterable[Any]]):
|
|
374
404
|
# pylint: disable=protected-access
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
405
|
+
with safe_cursor_on_connection(conn._conn) as cursor:
|
|
406
|
+
cursor.execute(sql, parameters)
|
|
407
|
+
return cursor.fetchone()
|
|
378
408
|
|
|
379
409
|
# pylint: disable=protected-access
|
|
380
410
|
return await conn._execute(exec_and_get_returning_value, sql,
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
+
from typing import Optional
|
|
6
7
|
|
|
7
8
|
from alembic import command as alembic_command
|
|
8
9
|
from alembic.config import Config
|
|
@@ -22,7 +23,7 @@ GLOBAL_USER_STATE_VERSION = '011'
|
|
|
22
23
|
GLOBAL_USER_STATE_LOCK_PATH = f'~/.sky/locks/.{GLOBAL_USER_STATE_DB_NAME}.lock'
|
|
23
24
|
|
|
24
25
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|
|
25
|
-
SPOT_JOBS_VERSION = '
|
|
26
|
+
SPOT_JOBS_VERSION = '011'
|
|
26
27
|
SPOT_JOBS_LOCK_PATH = f'~/.sky/locks/.{SPOT_JOBS_DB_NAME}.lock'
|
|
27
28
|
|
|
28
29
|
SERVE_DB_NAME = 'serve_db'
|
|
@@ -52,12 +53,22 @@ def db_lock(db_name: str):
|
|
|
52
53
|
f'file if you believe it is stale.') from e
|
|
53
54
|
|
|
54
55
|
|
|
55
|
-
def get_alembic_config(engine: sqlalchemy.engine.Engine,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
56
|
+
def get_alembic_config(engine: sqlalchemy.engine.Engine,
|
|
57
|
+
section: str,
|
|
58
|
+
alembic_ini_path: Optional[str] = None):
|
|
59
|
+
"""Get Alembic configuration for the given section.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
engine: SQLAlchemy engine for the database.
|
|
63
|
+
section: Alembic section name (e.g., 'state_db' or 'spot_jobs_db').
|
|
64
|
+
alembic_ini_path: Optional path to a custom alembic.ini file.
|
|
65
|
+
If not provided, uses the default SkyPilot alembic.ini.
|
|
66
|
+
"""
|
|
67
|
+
if alembic_ini_path is None:
|
|
68
|
+
# Default to SkyPilot's alembic.ini
|
|
69
|
+
alembic_ini_path = os.path.join(
|
|
70
|
+
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
|
71
|
+
'setup_files', 'alembic.ini')
|
|
61
72
|
alembic_cfg = Config(alembic_ini_path, ini_section=section)
|
|
62
73
|
|
|
63
74
|
# Override the database URL to match SkyPilot's current connection
|
|
@@ -73,19 +84,23 @@ def get_alembic_config(engine: sqlalchemy.engine.Engine, section: str):
|
|
|
73
84
|
return alembic_cfg
|
|
74
85
|
|
|
75
86
|
|
|
76
|
-
def needs_upgrade(engine: sqlalchemy.engine.Engine,
|
|
77
|
-
|
|
87
|
+
def needs_upgrade(engine: sqlalchemy.engine.Engine,
|
|
88
|
+
section: str,
|
|
89
|
+
target_revision: str,
|
|
90
|
+
alembic_ini_path: Optional[str] = None):
|
|
78
91
|
"""Check if the database needs to be upgraded.
|
|
79
92
|
|
|
80
93
|
Args:
|
|
81
|
-
engine: SQLAlchemy engine for the database
|
|
82
|
-
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
83
|
-
|
|
94
|
+
engine: SQLAlchemy engine for the database.
|
|
95
|
+
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
96
|
+
'spot_jobs_db').
|
|
97
|
+
target_revision: Target revision to upgrade to (e.g., '001').
|
|
98
|
+
alembic_ini_path: Optional path to a custom alembic.ini file.
|
|
84
99
|
"""
|
|
85
100
|
current_rev = None
|
|
86
101
|
|
|
87
102
|
# get alembic config for the given section
|
|
88
|
-
alembic_config = get_alembic_config(engine, section)
|
|
103
|
+
alembic_config = get_alembic_config(engine, section, alembic_ini_path)
|
|
89
104
|
version_table = alembic_config.get_section_option(
|
|
90
105
|
alembic_config.config_ini_section, 'version_table', 'alembic_version')
|
|
91
106
|
|
|
@@ -112,26 +127,31 @@ def needs_upgrade(engine: sqlalchemy.engine.Engine, section: str,
|
|
|
112
127
|
return current_rev_num < target_rev_num
|
|
113
128
|
|
|
114
129
|
|
|
115
|
-
def safe_alembic_upgrade(engine: sqlalchemy.engine.Engine,
|
|
116
|
-
|
|
130
|
+
def safe_alembic_upgrade(engine: sqlalchemy.engine.Engine,
|
|
131
|
+
section: str,
|
|
132
|
+
target_revision: str,
|
|
133
|
+
alembic_ini_path: Optional[str] = None):
|
|
117
134
|
"""Upgrade the database if needed. Uses a file lock to ensure
|
|
118
135
|
that only one process tries to upgrade the database at a time.
|
|
119
136
|
|
|
120
137
|
Args:
|
|
121
|
-
engine: SQLAlchemy engine for the database
|
|
122
|
-
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
123
|
-
|
|
138
|
+
engine: SQLAlchemy engine for the database.
|
|
139
|
+
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
140
|
+
'spot_jobs_db').
|
|
141
|
+
target_revision: Target revision to upgrade to (e.g., '001').
|
|
142
|
+
alembic_ini_path: Optional path to a custom alembic.ini file.
|
|
124
143
|
"""
|
|
125
144
|
# set alembic logger to warning level
|
|
126
145
|
alembic_logger = logging.getLogger('alembic')
|
|
127
146
|
alembic_logger.setLevel(logging.WARNING)
|
|
128
147
|
|
|
129
|
-
alembic_config = get_alembic_config(engine, section)
|
|
148
|
+
alembic_config = get_alembic_config(engine, section, alembic_ini_path)
|
|
130
149
|
|
|
131
150
|
# only acquire lock if db needs upgrade
|
|
132
|
-
if needs_upgrade(engine, section, target_revision):
|
|
151
|
+
if needs_upgrade(engine, section, target_revision, alembic_ini_path):
|
|
133
152
|
with db_lock(section):
|
|
134
153
|
# check again if db needs upgrade in case another
|
|
135
154
|
# process upgraded it while we were waiting for the lock
|
|
136
|
-
if needs_upgrade(engine, section, target_revision
|
|
155
|
+
if needs_upgrade(engine, section, target_revision,
|
|
156
|
+
alembic_ini_path):
|
|
137
157
|
alembic_command.upgrade(alembic_config, target_revision)
|
sky/utils/infra_utils.py
CHANGED
|
@@ -173,7 +173,11 @@ class InfraInfo:
|
|
|
173
173
|
return '-'
|
|
174
174
|
|
|
175
175
|
region_or_zone = None
|
|
176
|
-
|
|
176
|
+
# For Slurm, zones = partitions. We want to show the cluster
|
|
177
|
+
# name (region) instead of the partition name (zone), as different
|
|
178
|
+
# Slurm clusters can easily have same partition name.
|
|
179
|
+
is_slurm = self.cloud.lower() == 'slurm'
|
|
180
|
+
if not is_slurm and self.zone is not None and self.zone != '*':
|
|
177
181
|
region_or_zone = self.zone
|
|
178
182
|
elif self.region is not None and self.region != '*':
|
|
179
183
|
# If using region, we remove the ssh- prefix if it exists for SSH
|