skypilot-nightly 1.0.0.dev20251210__py3-none-any.whl → 1.0.0.dev20260112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +4 -2
- sky/adaptors/slurm.py +159 -72
- sky/backends/backend_utils.py +52 -10
- sky/backends/cloud_vm_ray_backend.py +192 -32
- sky/backends/task_codegen.py +40 -2
- sky/catalog/data_fetchers/fetch_gcp.py +9 -1
- sky/catalog/data_fetchers/fetch_nebius.py +1 -1
- sky/catalog/data_fetchers/fetch_vast.py +4 -2
- sky/catalog/seeweb_catalog.py +30 -15
- sky/catalog/shadeform_catalog.py +5 -2
- sky/catalog/slurm_catalog.py +0 -7
- sky/catalog/vast_catalog.py +30 -6
- sky/check.py +11 -8
- sky/client/cli/command.py +106 -54
- sky/client/interactive_utils.py +190 -0
- sky/client/sdk.py +8 -0
- sky/client/sdk_async.py +9 -0
- sky/clouds/aws.py +60 -2
- sky/clouds/azure.py +2 -0
- sky/clouds/kubernetes.py +2 -0
- sky/clouds/runpod.py +38 -7
- sky/clouds/slurm.py +44 -12
- sky/clouds/ssh.py +1 -1
- sky/clouds/vast.py +30 -17
- sky/core.py +69 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/3nu-b8raeKRNABZ2d4GAG/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-0565f8975a7dcd10.js +6 -0
- sky/dashboard/out/_next/static/chunks/2109-55a1546d793574a7.js +11 -0
- sky/dashboard/out/_next/static/chunks/2521-099b07cd9e4745bf.js +26 -0
- sky/dashboard/out/_next/static/chunks/2755.a636e04a928a700e.js +31 -0
- sky/dashboard/out/_next/static/chunks/3495.05eab4862217c1a5.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.cfc5dcc9434fd98c.js +1 -0
- sky/dashboard/out/_next/static/chunks/3981.645d01bf9c8cad0c.js +21 -0
- sky/dashboard/out/_next/static/chunks/4083-0115d67c1fb57d6c.js +21 -0
- sky/dashboard/out/_next/static/chunks/{8640.5b9475a2d18c5416.js → 429.a58e9ba9742309ed.js} +2 -2
- sky/dashboard/out/_next/static/chunks/4555.8e221537181b5dc1.js +6 -0
- sky/dashboard/out/_next/static/chunks/4725.937865b81fdaaebb.js +6 -0
- sky/dashboard/out/_next/static/chunks/6082-edabd8f6092300ce.js +25 -0
- sky/dashboard/out/_next/static/chunks/6989-49cb7dca83a7a62d.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-630bd2a2257275f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/7248-a99800d4db8edabd.js +1 -0
- sky/dashboard/out/_next/static/chunks/754-cfc5d4ad1b843d29.js +18 -0
- sky/dashboard/out/_next/static/chunks/8050-dd8aa107b17dce00.js +16 -0
- sky/dashboard/out/_next/static/chunks/8056-d4ae1e0cb81e7368.js +1 -0
- sky/dashboard/out/_next/static/chunks/8555.011023e296c127b3.js +6 -0
- sky/dashboard/out/_next/static/chunks/8821-93c25df904a8362b.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-0662594b69432ade.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.f15c91c97d124a5f.js +6 -0
- sky/dashboard/out/_next/static/chunks/{9353-8369df1cf105221c.js → 9353-7ad6bd01858556f1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-5a86569acad99764.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8297476714acb4ac.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-337c3ba1085f1210.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-9e5d47818b9bdadd.js → clusters-57632ff3684a8b5c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-5fd3a453c079c2ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9f85c02c9c6cae9e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90f16972cbecf354.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-2dd42fc37aad427a.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ed806aeace26b972.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-bec34706b36f3524.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{volumes-ef19d49c6d0e8500.js → volumes-a83ba9b38dff7ea9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-96e0f298308da7e2.js → [name]-c781e9c3e52ef9fc.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-91e0942f47310aae.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-cfe59cf684ee13b9.js +1 -0
- sky/dashboard/out/_next/static/css/b0dbca28f027cc19.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +26 -12
- sky/data/mounting_utils.py +29 -4
- sky/global_user_state.py +108 -16
- sky/jobs/client/sdk.py +8 -3
- sky/jobs/controller.py +191 -31
- sky/jobs/recovery_strategy.py +109 -11
- sky/jobs/server/core.py +81 -4
- sky/jobs/server/server.py +14 -0
- sky/jobs/state.py +417 -19
- sky/jobs/utils.py +73 -80
- sky/models.py +9 -0
- sky/optimizer.py +2 -1
- sky/provision/__init__.py +11 -9
- sky/provision/kubernetes/utils.py +122 -15
- sky/provision/kubernetes/volume.py +52 -17
- sky/provision/provisioner.py +2 -1
- sky/provision/runpod/instance.py +3 -1
- sky/provision/runpod/utils.py +13 -1
- sky/provision/runpod/volume.py +25 -9
- sky/provision/slurm/instance.py +75 -29
- sky/provision/slurm/utils.py +213 -107
- sky/provision/vast/utils.py +1 -0
- sky/resources.py +135 -13
- sky/schemas/api/responses.py +4 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +1 -1
- sky/schemas/db/spot_jobs/008_add_full_resources.py +34 -0
- sky/schemas/db/spot_jobs/009_job_events.py +32 -0
- sky/schemas/db/spot_jobs/010_job_events_timestamp_with_timezone.py +43 -0
- sky/schemas/db/spot_jobs/011_add_links.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +9 -5
- sky/schemas/generated/jobsv1_pb2.pyi +12 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +44 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +32 -28
- sky/schemas/generated/managed_jobsv1_pb2.pyi +11 -2
- sky/serve/serve_utils.py +232 -40
- sky/server/common.py +17 -0
- sky/server/constants.py +1 -1
- sky/server/metrics.py +6 -3
- sky/server/plugins.py +16 -0
- sky/server/requests/payloads.py +18 -0
- sky/server/requests/request_names.py +2 -0
- sky/server/requests/requests.py +28 -10
- sky/server/requests/serializers/encoders.py +5 -0
- sky/server/requests/serializers/return_value_serializers.py +14 -4
- sky/server/server.py +434 -107
- sky/server/uvicorn.py +5 -0
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +21 -10
- sky/sky_logging.py +2 -1
- sky/skylet/constants.py +22 -5
- sky/skylet/executor/slurm.py +4 -6
- sky/skylet/job_lib.py +89 -4
- sky/skylet/services.py +18 -3
- sky/ssh_node_pools/deploy/tunnel/cleanup-tunnel.sh +62 -0
- sky/ssh_node_pools/deploy/tunnel/ssh-tunnel.sh +379 -0
- sky/templates/kubernetes-ray.yml.j2 +4 -6
- sky/templates/slurm-ray.yml.j2 +32 -2
- sky/templates/websocket_proxy.py +18 -41
- sky/users/permission.py +61 -51
- sky/utils/auth_utils.py +42 -0
- sky/utils/cli_utils/status_utils.py +19 -5
- sky/utils/cluster_utils.py +10 -3
- sky/utils/command_runner.py +256 -94
- sky/utils/command_runner.pyi +16 -0
- sky/utils/common_utils.py +30 -29
- sky/utils/context.py +32 -0
- sky/utils/db/db_utils.py +36 -6
- sky/utils/db/migration_utils.py +41 -21
- sky/utils/infra_utils.py +5 -1
- sky/utils/instance_links.py +139 -0
- sky/utils/interactive_utils.py +49 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +42 -33
- sky/utils/kubernetes/rsync_helper.sh +5 -1
- sky/utils/plugin_extensions/__init__.py +14 -0
- sky/utils/plugin_extensions/external_failure_source.py +176 -0
- sky/utils/resources_utils.py +10 -8
- sky/utils/rich_utils.py +9 -11
- sky/utils/schemas.py +63 -20
- sky/utils/status_lib.py +7 -0
- sky/utils/subprocess_utils.py +17 -0
- sky/volumes/client/sdk.py +6 -3
- sky/volumes/server/core.py +65 -27
- sky_templates/ray/start_cluster +8 -4
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/METADATA +53 -57
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/RECORD +172 -162
- sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +0 -11
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +0 -6
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +0 -26
- sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +0 -1
- sky/dashboard/out/_next/static/chunks/3800-b589397dc09c5b4e.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +0 -1
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
- sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +0 -1
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +0 -30
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +0 -41
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +0 -6
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +0 -31
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +0 -30
- sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-12c559ec4d81fdbd.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-d187cd0413d72475.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +0 -21
- sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-cb4da3abe08ebf19.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +0 -1
- sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +0 -3
- /sky/dashboard/out/_next/static/{KYAhEFa3FTfq4JyKVgo-s → 3nu-b8raeKRNABZ2d4GAG}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/plugins/{[...slug]-4f46050ca065d8f8.js → [...slug]-449a9f5a3bb20fb3.js} +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/top_level.txt +0 -0
sky/utils/command_runner.pyi
CHANGED
|
@@ -27,6 +27,7 @@ def ssh_options_list(
|
|
|
27
27
|
ssh_control_name: Optional[str],
|
|
28
28
|
*,
|
|
29
29
|
ssh_proxy_command: Optional[str] = ...,
|
|
30
|
+
ssh_proxy_jump: Optional[str] = ...,
|
|
30
31
|
docker_ssh_proxy_command: Optional[str] = ...,
|
|
31
32
|
timeout: int = ...,
|
|
32
33
|
port: int = ...,
|
|
@@ -63,6 +64,7 @@ class CommandRunner:
|
|
|
63
64
|
connect_timeout: Optional[int] = ...,
|
|
64
65
|
source_bashrc: bool = ...,
|
|
65
66
|
skip_lines: int = ...,
|
|
67
|
+
run_in_background: bool = ...,
|
|
66
68
|
**kwargs) -> int:
|
|
67
69
|
...
|
|
68
70
|
|
|
@@ -78,6 +80,7 @@ class CommandRunner:
|
|
|
78
80
|
connect_timeout: Optional[int] = ...,
|
|
79
81
|
source_bashrc: bool = ...,
|
|
80
82
|
skip_lines: int = ...,
|
|
83
|
+
run_in_background: bool = ...,
|
|
81
84
|
**kwargs) -> Tuple[int, str, str]:
|
|
82
85
|
...
|
|
83
86
|
|
|
@@ -93,6 +96,7 @@ class CommandRunner:
|
|
|
93
96
|
connect_timeout: Optional[int] = ...,
|
|
94
97
|
source_bashrc: bool = ...,
|
|
95
98
|
skip_lines: int = ...,
|
|
99
|
+
run_in_background: bool = ...,
|
|
96
100
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
97
101
|
...
|
|
98
102
|
|
|
@@ -135,6 +139,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
135
139
|
docker_user: str
|
|
136
140
|
disable_control_master: Optional[bool]
|
|
137
141
|
port_forward_execute_remote_command: Optional[bool]
|
|
142
|
+
enable_interactive_auth: bool
|
|
138
143
|
|
|
139
144
|
def __init__(
|
|
140
145
|
self,
|
|
@@ -143,9 +148,11 @@ class SSHCommandRunner(CommandRunner):
|
|
|
143
148
|
ssh_private_key: Optional[str],
|
|
144
149
|
ssh_control_name: Optional[str] = ...,
|
|
145
150
|
ssh_proxy_command: Optional[str] = ...,
|
|
151
|
+
ssh_proxy_jump: Optional[str] = ...,
|
|
146
152
|
docker_user: Optional[str] = ...,
|
|
147
153
|
disable_control_master: Optional[bool] = ...,
|
|
148
154
|
port_forward_execute_remote_command: Optional[bool] = ...,
|
|
155
|
+
enable_interactive_auth: bool = ...,
|
|
149
156
|
) -> None:
|
|
150
157
|
...
|
|
151
158
|
|
|
@@ -163,6 +170,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
163
170
|
connect_timeout: Optional[int] = ...,
|
|
164
171
|
source_bashrc: bool = ...,
|
|
165
172
|
skip_lines: int = ...,
|
|
173
|
+
run_in_background: bool = ...,
|
|
166
174
|
**kwargs) -> int:
|
|
167
175
|
...
|
|
168
176
|
|
|
@@ -180,6 +188,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
180
188
|
connect_timeout: Optional[int] = ...,
|
|
181
189
|
source_bashrc: bool = ...,
|
|
182
190
|
skip_lines: int = ...,
|
|
191
|
+
run_in_background: bool = ...,
|
|
183
192
|
**kwargs) -> Tuple[int, str, str]:
|
|
184
193
|
...
|
|
185
194
|
|
|
@@ -197,6 +206,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
197
206
|
connect_timeout: Optional[int] = ...,
|
|
198
207
|
source_bashrc: bool = ...,
|
|
199
208
|
skip_lines: int = ...,
|
|
209
|
+
run_in_background: bool = ...,
|
|
200
210
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
201
211
|
...
|
|
202
212
|
|
|
@@ -252,6 +262,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
252
262
|
connect_timeout: Optional[int] = ...,
|
|
253
263
|
source_bashrc: bool = ...,
|
|
254
264
|
skip_lines: int = ...,
|
|
265
|
+
run_in_background: bool = ...,
|
|
255
266
|
**kwargs) -> int:
|
|
256
267
|
...
|
|
257
268
|
|
|
@@ -269,6 +280,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
269
280
|
connect_timeout: Optional[int] = ...,
|
|
270
281
|
source_bashrc: bool = ...,
|
|
271
282
|
skip_lines: int = ...,
|
|
283
|
+
run_in_background: bool = ...,
|
|
272
284
|
**kwargs) -> Tuple[int, str, str]:
|
|
273
285
|
...
|
|
274
286
|
|
|
@@ -286,6 +298,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
286
298
|
connect_timeout: Optional[int] = ...,
|
|
287
299
|
source_bashrc: bool = ...,
|
|
288
300
|
skip_lines: int = ...,
|
|
301
|
+
run_in_background: bool = ...,
|
|
289
302
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
290
303
|
...
|
|
291
304
|
|
|
@@ -348,6 +361,7 @@ class LocalProcessCommandRunner(CommandRunner):
|
|
|
348
361
|
connect_timeout: Optional[int] = ...,
|
|
349
362
|
source_bashrc: bool = ...,
|
|
350
363
|
skip_lines: int = ...,
|
|
364
|
+
run_in_background: bool = ...,
|
|
351
365
|
**kwargs) -> int:
|
|
352
366
|
...
|
|
353
367
|
|
|
@@ -365,6 +379,7 @@ class LocalProcessCommandRunner(CommandRunner):
|
|
|
365
379
|
connect_timeout: Optional[int] = ...,
|
|
366
380
|
source_bashrc: bool = ...,
|
|
367
381
|
skip_lines: int = ...,
|
|
382
|
+
run_in_background: bool = ...,
|
|
368
383
|
**kwargs) -> Tuple[int, str, str]:
|
|
369
384
|
...
|
|
370
385
|
|
|
@@ -382,5 +397,6 @@ class LocalProcessCommandRunner(CommandRunner):
|
|
|
382
397
|
connect_timeout: Optional[int] = ...,
|
|
383
398
|
source_bashrc: bool = ...,
|
|
384
399
|
skip_lines: int = ...,
|
|
400
|
+
run_in_background: bool = ...,
|
|
385
401
|
**kwargs) -> Union[Tuple[int, str, str], int]:
|
|
386
402
|
...
|
sky/utils/common_utils.py
CHANGED
|
@@ -29,6 +29,7 @@ from sky.adaptors import common as adaptors_common
|
|
|
29
29
|
from sky.skylet import constants
|
|
30
30
|
from sky.usage import constants as usage_constants
|
|
31
31
|
from sky.utils import annotations
|
|
32
|
+
from sky.utils import context
|
|
32
33
|
from sky.utils import ux_utils
|
|
33
34
|
from sky.utils import validator
|
|
34
35
|
|
|
@@ -293,14 +294,13 @@ class Backoff:
|
|
|
293
294
|
return self._backoff
|
|
294
295
|
|
|
295
296
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
297
|
+
_CLIENT_COMMAND_KEY = 'client_command'
|
|
298
|
+
_CLIENT_ENTRYPOINT_KEY = 'client_entrypoint'
|
|
299
|
+
_USING_REMOTE_API_SERVER_KEY = 'using_remote_api_server'
|
|
300
|
+
_USER_KEY = 'user'
|
|
301
|
+
_REQUEST_ID_KEY = 'request_id'
|
|
301
302
|
|
|
302
303
|
|
|
303
|
-
# TODO(aylei,hailong): request context should be contextual
|
|
304
304
|
def set_request_context(client_entrypoint: Optional[str],
|
|
305
305
|
client_command: Optional[str],
|
|
306
306
|
using_remote_api_server: bool,
|
|
@@ -310,22 +310,21 @@ def set_request_context(client_entrypoint: Optional[str],
|
|
|
310
310
|
This is useful when we are on the SkyPilot API server side and we have a
|
|
311
311
|
client entrypoint and command from the client.
|
|
312
312
|
"""
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
_current_user = user
|
|
322
|
-
_current_request_id = request_id
|
|
313
|
+
# This function will be called in process executor and coroutine executor.
|
|
314
|
+
# context.set_context_var ensures the context is safe in both cases.
|
|
315
|
+
context.set_context_var(_CLIENT_ENTRYPOINT_KEY, client_entrypoint)
|
|
316
|
+
context.set_context_var(_CLIENT_COMMAND_KEY, client_command)
|
|
317
|
+
context.set_context_var(_USING_REMOTE_API_SERVER_KEY,
|
|
318
|
+
using_remote_api_server)
|
|
319
|
+
context.set_context_var(_USER_KEY, user)
|
|
320
|
+
context.set_context_var(_REQUEST_ID_KEY, request_id)
|
|
323
321
|
|
|
324
322
|
|
|
325
323
|
def get_current_request_id() -> str:
|
|
326
324
|
"""Returns the current request id."""
|
|
327
|
-
|
|
328
|
-
|
|
325
|
+
value = context.get_context_var('request_id')
|
|
326
|
+
if value is not None:
|
|
327
|
+
return value
|
|
329
328
|
return 'dummy-request-id'
|
|
330
329
|
|
|
331
330
|
|
|
@@ -335,16 +334,17 @@ def get_current_command() -> str:
|
|
|
335
334
|
Normally uses get_pretty_entry_point(), but will use the client command on
|
|
336
335
|
the server side.
|
|
337
336
|
"""
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
337
|
+
value = context.get_context_var(_CLIENT_COMMAND_KEY)
|
|
338
|
+
if value is not None:
|
|
339
|
+
return value
|
|
341
340
|
return get_pretty_entrypoint_cmd()
|
|
342
341
|
|
|
343
342
|
|
|
344
343
|
def get_current_user() -> 'models.User':
|
|
345
344
|
"""Returns the user in current server session."""
|
|
346
|
-
|
|
347
|
-
|
|
345
|
+
value = context.get_context_var(_USER_KEY)
|
|
346
|
+
if value is not None:
|
|
347
|
+
return value
|
|
348
348
|
return models.User.get_current_user()
|
|
349
349
|
|
|
350
350
|
|
|
@@ -370,8 +370,7 @@ def get_local_user_name() -> str:
|
|
|
370
370
|
|
|
371
371
|
def set_current_user(user: 'models.User'):
|
|
372
372
|
"""Sets the current user."""
|
|
373
|
-
|
|
374
|
-
_current_user = user
|
|
373
|
+
context.set_context_var('user', user)
|
|
375
374
|
|
|
376
375
|
|
|
377
376
|
def get_current_client_entrypoint(server_entrypoint: str) -> str:
|
|
@@ -380,8 +379,9 @@ def get_current_client_entrypoint(server_entrypoint: str) -> str:
|
|
|
380
379
|
Gets the client entrypoint from the context, if it is not set, returns the
|
|
381
380
|
server entrypoint.
|
|
382
381
|
"""
|
|
383
|
-
|
|
384
|
-
|
|
382
|
+
value = context.get_context_var(_CLIENT_ENTRYPOINT_KEY)
|
|
383
|
+
if value is not None:
|
|
384
|
+
return value
|
|
385
385
|
return server_entrypoint
|
|
386
386
|
|
|
387
387
|
|
|
@@ -390,8 +390,9 @@ def get_using_remote_api_server() -> bool:
|
|
|
390
390
|
if os.getenv(constants.USING_REMOTE_API_SERVER_ENV_VAR) is not None:
|
|
391
391
|
return os.getenv(constants.USING_REMOTE_API_SERVER_ENV_VAR,
|
|
392
392
|
'').lower() in ('true', '1')
|
|
393
|
-
|
|
394
|
-
|
|
393
|
+
value = context.get_context_var(_USING_REMOTE_API_SERVER_KEY)
|
|
394
|
+
if value is not None:
|
|
395
|
+
return value
|
|
395
396
|
# This gets the right status for the local client.
|
|
396
397
|
# TODO(zhwu): This is to prevent circular import. We should refactor this.
|
|
397
398
|
# pylint: disable=import-outside-toplevel
|
sky/utils/context.py
CHANGED
|
@@ -17,6 +17,8 @@ from typing_extensions import ParamSpec
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
from sky.skypilot_config import ConfigContext
|
|
19
19
|
|
|
20
|
+
_PROCESS_GLOBAL_VARS = {}
|
|
21
|
+
|
|
20
22
|
|
|
21
23
|
class SkyPilotContext(object):
|
|
22
24
|
"""SkyPilot typed context vars for threads and coroutines.
|
|
@@ -65,6 +67,8 @@ class SkyPilotContext(object):
|
|
|
65
67
|
self._log_file_handle = None
|
|
66
68
|
self.env_overrides = {}
|
|
67
69
|
self.config_context = None
|
|
70
|
+
self.request_context = None
|
|
71
|
+
self.vars = {}
|
|
68
72
|
|
|
69
73
|
def cancel(self):
|
|
70
74
|
"""Cancel the context."""
|
|
@@ -113,6 +117,12 @@ class SkyPilotContext(object):
|
|
|
113
117
|
self._log_file_handle.close()
|
|
114
118
|
self._log_file_handle = None
|
|
115
119
|
|
|
120
|
+
def set_var(self, key: str, value: Any):
|
|
121
|
+
self.vars[key] = value
|
|
122
|
+
|
|
123
|
+
def get_var(self, key: str) -> Optional[Any]:
|
|
124
|
+
return self.vars.get(key)
|
|
125
|
+
|
|
116
126
|
def __enter__(self):
|
|
117
127
|
return self
|
|
118
128
|
|
|
@@ -150,6 +160,28 @@ def get() -> Optional[SkyPilotContext]:
|
|
|
150
160
|
return _CONTEXT.get()
|
|
151
161
|
|
|
152
162
|
|
|
163
|
+
def set_context_var(key: str, value: Any):
|
|
164
|
+
ctx = get()
|
|
165
|
+
if ctx is not None:
|
|
166
|
+
# Set the var in context
|
|
167
|
+
ctx.set_var(key, value)
|
|
168
|
+
else:
|
|
169
|
+
# Fallback to process-isolated assumption, where we thought
|
|
170
|
+
# modifying process-scope vars is safe.
|
|
171
|
+
_PROCESS_GLOBAL_VARS[key] = value
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_context_var(key: str) -> Any:
|
|
175
|
+
ctx = get()
|
|
176
|
+
if ctx is not None:
|
|
177
|
+
# Use `in` to check for key existence to distinguish
|
|
178
|
+
# "key not found" from "key's value is None".
|
|
179
|
+
if key in ctx.vars:
|
|
180
|
+
return ctx.get_var(key)
|
|
181
|
+
# Fallback to the variable set in process-scope
|
|
182
|
+
return _PROCESS_GLOBAL_VARS.get(key)
|
|
183
|
+
|
|
184
|
+
|
|
153
185
|
class ContextualEnviron(MutableMapping[str, str]):
|
|
154
186
|
"""Environment variables wrapper with contextual overrides.
|
|
155
187
|
|
sky/utils/db/db_utils.py
CHANGED
|
@@ -75,6 +75,18 @@ def safe_cursor(db_path: str):
|
|
|
75
75
|
conn.close()
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
@contextlib.contextmanager
|
|
79
|
+
def safe_cursor_on_connection(conn: 'sqlite3.Connection'):
|
|
80
|
+
"""A auto-committing, auto-closing cursor on an existing connection."""
|
|
81
|
+
# Ensure commit() is called when the context is exited.
|
|
82
|
+
with conn:
|
|
83
|
+
cursor = conn.cursor()
|
|
84
|
+
try:
|
|
85
|
+
yield cursor
|
|
86
|
+
finally:
|
|
87
|
+
cursor.close()
|
|
88
|
+
|
|
89
|
+
|
|
78
90
|
def add_column_to_table(
|
|
79
91
|
cursor: 'sqlite3.Cursor',
|
|
80
92
|
conn: 'sqlite3.Connection',
|
|
@@ -286,6 +298,11 @@ def drop_column_from_table_alembic(
|
|
|
286
298
|
raise
|
|
287
299
|
|
|
288
300
|
|
|
301
|
+
def fault_point():
|
|
302
|
+
"""For test fault injection."""
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
|
|
289
306
|
class SQLiteConn(threading.local):
|
|
290
307
|
"""Thread-local connection to the sqlite3 database."""
|
|
291
308
|
|
|
@@ -345,8 +362,8 @@ class SQLiteConn(threading.local):
|
|
|
345
362
|
|
|
346
363
|
def exec_and_commit(sql: str, parameters: Optional[Iterable[Any]]):
|
|
347
364
|
# pylint: disable=protected-access
|
|
348
|
-
conn._conn
|
|
349
|
-
|
|
365
|
+
with safe_cursor_on_connection(conn._conn) as cursor:
|
|
366
|
+
cursor.execute(sql, parameters)
|
|
350
367
|
|
|
351
368
|
# pylint: disable=protected-access
|
|
352
369
|
await conn._execute(exec_and_commit, sql, parameters)
|
|
@@ -357,7 +374,20 @@ class SQLiteConn(threading.local):
|
|
|
357
374
|
parameters: Optional[Iterable[Any]] = None
|
|
358
375
|
) -> Iterable[sqlite3.Row]:
|
|
359
376
|
conn = await self._get_async_conn()
|
|
360
|
-
|
|
377
|
+
if parameters is None:
|
|
378
|
+
parameters = []
|
|
379
|
+
|
|
380
|
+
def exec_fetch_all(sql: str, parameters: Optional[Iterable[Any]]):
|
|
381
|
+
# pylint: disable=protected-access
|
|
382
|
+
with safe_cursor_on_connection(conn._conn) as cursor:
|
|
383
|
+
cursor.execute(sql, parameters)
|
|
384
|
+
# Note(dev): sqlite3.Connection cannot be patched, keep
|
|
385
|
+
# fault_point here to test the integrity of exec_fetch_all()
|
|
386
|
+
fault_point()
|
|
387
|
+
return cursor.fetchall()
|
|
388
|
+
|
|
389
|
+
# pylint: disable=protected-access
|
|
390
|
+
return await conn._execute(exec_fetch_all, sql, parameters)
|
|
361
391
|
|
|
362
392
|
async def execute_get_returning_value_async(
|
|
363
393
|
self,
|
|
@@ -372,9 +402,9 @@ class SQLiteConn(threading.local):
|
|
|
372
402
|
def exec_and_get_returning_value(sql: str,
|
|
373
403
|
parameters: Optional[Iterable[Any]]):
|
|
374
404
|
# pylint: disable=protected-access
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
405
|
+
with safe_cursor_on_connection(conn._conn) as cursor:
|
|
406
|
+
cursor.execute(sql, parameters)
|
|
407
|
+
return cursor.fetchone()
|
|
378
408
|
|
|
379
409
|
# pylint: disable=protected-access
|
|
380
410
|
return await conn._execute(exec_and_get_returning_value, sql,
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
+
from typing import Optional
|
|
6
7
|
|
|
7
8
|
from alembic import command as alembic_command
|
|
8
9
|
from alembic.config import Config
|
|
@@ -22,7 +23,7 @@ GLOBAL_USER_STATE_VERSION = '011'
|
|
|
22
23
|
GLOBAL_USER_STATE_LOCK_PATH = f'~/.sky/locks/.{GLOBAL_USER_STATE_DB_NAME}.lock'
|
|
23
24
|
|
|
24
25
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|
|
25
|
-
SPOT_JOBS_VERSION = '
|
|
26
|
+
SPOT_JOBS_VERSION = '011'
|
|
26
27
|
SPOT_JOBS_LOCK_PATH = f'~/.sky/locks/.{SPOT_JOBS_DB_NAME}.lock'
|
|
27
28
|
|
|
28
29
|
SERVE_DB_NAME = 'serve_db'
|
|
@@ -52,12 +53,22 @@ def db_lock(db_name: str):
|
|
|
52
53
|
f'file if you believe it is stale.') from e
|
|
53
54
|
|
|
54
55
|
|
|
55
|
-
def get_alembic_config(engine: sqlalchemy.engine.Engine,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
56
|
+
def get_alembic_config(engine: sqlalchemy.engine.Engine,
|
|
57
|
+
section: str,
|
|
58
|
+
alembic_ini_path: Optional[str] = None):
|
|
59
|
+
"""Get Alembic configuration for the given section.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
engine: SQLAlchemy engine for the database.
|
|
63
|
+
section: Alembic section name (e.g., 'state_db' or 'spot_jobs_db').
|
|
64
|
+
alembic_ini_path: Optional path to a custom alembic.ini file.
|
|
65
|
+
If not provided, uses the default SkyPilot alembic.ini.
|
|
66
|
+
"""
|
|
67
|
+
if alembic_ini_path is None:
|
|
68
|
+
# Default to SkyPilot's alembic.ini
|
|
69
|
+
alembic_ini_path = os.path.join(
|
|
70
|
+
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
|
71
|
+
'setup_files', 'alembic.ini')
|
|
61
72
|
alembic_cfg = Config(alembic_ini_path, ini_section=section)
|
|
62
73
|
|
|
63
74
|
# Override the database URL to match SkyPilot's current connection
|
|
@@ -73,19 +84,23 @@ def get_alembic_config(engine: sqlalchemy.engine.Engine, section: str):
|
|
|
73
84
|
return alembic_cfg
|
|
74
85
|
|
|
75
86
|
|
|
76
|
-
def needs_upgrade(engine: sqlalchemy.engine.Engine,
|
|
77
|
-
|
|
87
|
+
def needs_upgrade(engine: sqlalchemy.engine.Engine,
|
|
88
|
+
section: str,
|
|
89
|
+
target_revision: str,
|
|
90
|
+
alembic_ini_path: Optional[str] = None):
|
|
78
91
|
"""Check if the database needs to be upgraded.
|
|
79
92
|
|
|
80
93
|
Args:
|
|
81
|
-
engine: SQLAlchemy engine for the database
|
|
82
|
-
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
83
|
-
|
|
94
|
+
engine: SQLAlchemy engine for the database.
|
|
95
|
+
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
96
|
+
'spot_jobs_db').
|
|
97
|
+
target_revision: Target revision to upgrade to (e.g., '001').
|
|
98
|
+
alembic_ini_path: Optional path to a custom alembic.ini file.
|
|
84
99
|
"""
|
|
85
100
|
current_rev = None
|
|
86
101
|
|
|
87
102
|
# get alembic config for the given section
|
|
88
|
-
alembic_config = get_alembic_config(engine, section)
|
|
103
|
+
alembic_config = get_alembic_config(engine, section, alembic_ini_path)
|
|
89
104
|
version_table = alembic_config.get_section_option(
|
|
90
105
|
alembic_config.config_ini_section, 'version_table', 'alembic_version')
|
|
91
106
|
|
|
@@ -112,26 +127,31 @@ def needs_upgrade(engine: sqlalchemy.engine.Engine, section: str,
|
|
|
112
127
|
return current_rev_num < target_rev_num
|
|
113
128
|
|
|
114
129
|
|
|
115
|
-
def safe_alembic_upgrade(engine: sqlalchemy.engine.Engine,
|
|
116
|
-
|
|
130
|
+
def safe_alembic_upgrade(engine: sqlalchemy.engine.Engine,
|
|
131
|
+
section: str,
|
|
132
|
+
target_revision: str,
|
|
133
|
+
alembic_ini_path: Optional[str] = None):
|
|
117
134
|
"""Upgrade the database if needed. Uses a file lock to ensure
|
|
118
135
|
that only one process tries to upgrade the database at a time.
|
|
119
136
|
|
|
120
137
|
Args:
|
|
121
|
-
engine: SQLAlchemy engine for the database
|
|
122
|
-
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
123
|
-
|
|
138
|
+
engine: SQLAlchemy engine for the database.
|
|
139
|
+
section: Alembic section to upgrade (e.g., 'state_db' or
|
|
140
|
+
'spot_jobs_db').
|
|
141
|
+
target_revision: Target revision to upgrade to (e.g., '001').
|
|
142
|
+
alembic_ini_path: Optional path to a custom alembic.ini file.
|
|
124
143
|
"""
|
|
125
144
|
# set alembic logger to warning level
|
|
126
145
|
alembic_logger = logging.getLogger('alembic')
|
|
127
146
|
alembic_logger.setLevel(logging.WARNING)
|
|
128
147
|
|
|
129
|
-
alembic_config = get_alembic_config(engine, section)
|
|
148
|
+
alembic_config = get_alembic_config(engine, section, alembic_ini_path)
|
|
130
149
|
|
|
131
150
|
# only acquire lock if db needs upgrade
|
|
132
|
-
if needs_upgrade(engine, section, target_revision):
|
|
151
|
+
if needs_upgrade(engine, section, target_revision, alembic_ini_path):
|
|
133
152
|
with db_lock(section):
|
|
134
153
|
# check again if db needs upgrade in case another
|
|
135
154
|
# process upgraded it while we were waiting for the lock
|
|
136
|
-
if needs_upgrade(engine, section, target_revision
|
|
155
|
+
if needs_upgrade(engine, section, target_revision,
|
|
156
|
+
alembic_ini_path):
|
|
137
157
|
alembic_command.upgrade(alembic_config, target_revision)
|
sky/utils/infra_utils.py
CHANGED
|
@@ -173,7 +173,11 @@ class InfraInfo:
|
|
|
173
173
|
return '-'
|
|
174
174
|
|
|
175
175
|
region_or_zone = None
|
|
176
|
-
|
|
176
|
+
# For Slurm, zones = partitions. We want to show the cluster
|
|
177
|
+
# name (region) instead of the partition name (zone), as different
|
|
178
|
+
# Slurm clusters can easily have same partition name.
|
|
179
|
+
is_slurm = self.cloud.lower() == 'slurm'
|
|
180
|
+
if not is_slurm and self.zone is not None and self.zone != '*':
|
|
177
181
|
region_or_zone = self.zone
|
|
178
182
|
elif self.region is not None and self.region != '*':
|
|
179
183
|
# If using region, we remove the ssh- prefix if it exists for SSH
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Utility functions for generating instance links for cloud providers."""
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
from sky import sky_logging
|
|
5
|
+
from sky.provision import common
|
|
6
|
+
from sky.provision import constants as provision_constants
|
|
7
|
+
|
|
8
|
+
logger = sky_logging.init_logger(__name__)
|
|
9
|
+
|
|
10
|
+
# URL templates for each cloud provider
|
|
11
|
+
# Placeholders:
|
|
12
|
+
# {region} - Cloud region
|
|
13
|
+
# {project_id} - GCP project ID
|
|
14
|
+
# {subscription_id} - Azure subscription ID
|
|
15
|
+
# {resource_group} - Azure resource group
|
|
16
|
+
# {tag_key} - Tag key used to identify cluster instances
|
|
17
|
+
# {cluster_name} - Name of the cluster
|
|
18
|
+
|
|
19
|
+
AWS_INSTANCES_URL = ('https://{region}.console.aws.amazon.com/ec2/v2/home'
|
|
20
|
+
'?region={region}#Instances:tag:{tag_key}={cluster_name}')
|
|
21
|
+
|
|
22
|
+
# Azure doesn't support direct tag filter URLs, so we link to the resource group
|
|
23
|
+
AZURE_RESOURCE_GROUP_URL = (
|
|
24
|
+
'https://portal.azure.com/#@/resource/subscriptions'
|
|
25
|
+
'/{subscription_id}/resourceGroups/{resource_group}/overview')
|
|
26
|
+
|
|
27
|
+
# GCP Console base URL
|
|
28
|
+
GCP_INSTANCES_BASE_URL = 'https://console.cloud.google.com/compute/instances'
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _build_gcp_instances_url(project_id: str, tag_key: str,
|
|
32
|
+
cluster_name: str) -> str:
|
|
33
|
+
"""Build GCP instances URL with label filter.
|
|
34
|
+
|
|
35
|
+
GCP Console uses a pageState parameter with a specially encoded filter.
|
|
36
|
+
The filter JSON structure is:
|
|
37
|
+
[{"k":"","t":10,"v":"\"label_key:label_value\"","s":true}]
|
|
38
|
+
|
|
39
|
+
Where:
|
|
40
|
+
- k: filter key (empty for label filters)
|
|
41
|
+
- t: filter type (10 = label filter)
|
|
42
|
+
- v: filter value with escaped quotes around "label_key:label_value"
|
|
43
|
+
- s: unknown, always true
|
|
44
|
+
|
|
45
|
+
GCP uses a mix of:
|
|
46
|
+
- Standard URL encoding for outer structure (%22 for ")
|
|
47
|
+
- Underscore notation inside the filter (_22 for ", _3A for :, etc.)
|
|
48
|
+
- Double URL-encoding for brackets (%255B = %5B = [)
|
|
49
|
+
"""
|
|
50
|
+
# Build the filter value: \"tag_key:cluster_name\"
|
|
51
|
+
# Using underscore notation: _5C_22 = \", _3A = :
|
|
52
|
+
filter_value = f'_5C_22{tag_key}_3A{cluster_name}_5C_22'
|
|
53
|
+
|
|
54
|
+
# Build the filter object using underscore notation for internal quotes and
|
|
55
|
+
# colons.
|
|
56
|
+
# {"k":"","t":10,"v":"<filter_value>","s":true}
|
|
57
|
+
# _22 = ", _3A = :, _2C = ,
|
|
58
|
+
filter_obj = (
|
|
59
|
+
f'_22k_22_3A_22_22_2C' # "k":"",
|
|
60
|
+
f'_22t_22_3A10_2C' # "t":10,
|
|
61
|
+
f'_22v_22_3A_22{filter_value}_22_2C' # "v":"<value>",
|
|
62
|
+
f'_22s_22_3Atrue') # "s":true
|
|
63
|
+
|
|
64
|
+
# Wrap in array brackets (double URL-encoded: %255B = %5B = [, %257D = %7D)
|
|
65
|
+
filter_array = f'%255B%257B{filter_obj}%257D%255D'
|
|
66
|
+
|
|
67
|
+
# Build pageState: ("instances":("p":0,"f":"<filter>"))
|
|
68
|
+
# %22 = " (standard URL encoding)
|
|
69
|
+
page_state = f'(%22instances%22:(%22p%22:0,%22f%22:%22{filter_array}%22))'
|
|
70
|
+
|
|
71
|
+
return (
|
|
72
|
+
f'{GCP_INSTANCES_BASE_URL}?project={project_id}&pageState={page_state}')
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def generate_instance_links(
|
|
76
|
+
cluster_info: common.ClusterInfo,
|
|
77
|
+
cluster_name: str,
|
|
78
|
+
) -> Dict[str, str]:
|
|
79
|
+
"""Generate instance links for a cluster based on the cloud provider.
|
|
80
|
+
|
|
81
|
+
Creates links to filtered views in cloud consoles that show all instances
|
|
82
|
+
belonging to the cluster (useful for multi-node jobs).
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
cluster_info: ClusterInfo object containing instance information.
|
|
86
|
+
cluster_name: Cluster name for tag-based filtering.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Dictionary mapping link labels to URLs. Empty dict if links cannot be
|
|
90
|
+
generated (e.g., for Kubernetes or unsupported clouds).
|
|
91
|
+
"""
|
|
92
|
+
links: Dict[str, str] = {}
|
|
93
|
+
provider_name = cluster_info.provider_name.lower()
|
|
94
|
+
provider_config = cluster_info.provider_config or {}
|
|
95
|
+
|
|
96
|
+
# Skip Kubernetes and other non-cloud providers
|
|
97
|
+
if provider_name in ('kubernetes', 'local'):
|
|
98
|
+
return links
|
|
99
|
+
|
|
100
|
+
# Tag used by SkyPilot to identify cluster instances
|
|
101
|
+
tag_key = provision_constants.TAG_RAY_CLUSTER_NAME
|
|
102
|
+
|
|
103
|
+
if provider_name == 'aws':
|
|
104
|
+
region = provider_config.get('region')
|
|
105
|
+
if not region:
|
|
106
|
+
logger.debug('AWS region not found in provider config, '
|
|
107
|
+
'skipping instance links')
|
|
108
|
+
return links
|
|
109
|
+
links['AWS Instances'] = AWS_INSTANCES_URL.format(
|
|
110
|
+
region=region,
|
|
111
|
+
tag_key=tag_key,
|
|
112
|
+
cluster_name=cluster_name,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
elif provider_name == 'gcp':
|
|
116
|
+
project_id = provider_config.get('project_id')
|
|
117
|
+
if not project_id:
|
|
118
|
+
logger.debug('GCP project_id not found in provider config, '
|
|
119
|
+
'skipping instance links')
|
|
120
|
+
return links
|
|
121
|
+
links['GCP Instances'] = _build_gcp_instances_url(
|
|
122
|
+
project_id=project_id,
|
|
123
|
+
tag_key=tag_key,
|
|
124
|
+
cluster_name=cluster_name,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
elif provider_name == 'azure':
|
|
128
|
+
subscription_id = provider_config.get('subscription_id')
|
|
129
|
+
resource_group = provider_config.get('resource_group')
|
|
130
|
+
if not subscription_id or not resource_group:
|
|
131
|
+
logger.debug('Azure subscription_id or resource_group not found '
|
|
132
|
+
'in provider config, skipping instance links')
|
|
133
|
+
return links
|
|
134
|
+
links['Azure Resource Group'] = AZURE_RESOURCE_GROUP_URL.format(
|
|
135
|
+
subscription_id=subscription_id,
|
|
136
|
+
resource_group=resource_group,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return links
|