skypilot-nightly 1.0.0.dev20250716__py3-none-any.whl → 1.0.0.dev20250718__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +4 -2
- sky/backends/backend.py +8 -4
- sky/backends/cloud_vm_ray_backend.py +50 -1
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +2 -1
- sky/catalog/common.py +60 -50
- sky/catalog/data_fetchers/fetch_gcp.py +1 -0
- sky/catalog/gcp_catalog.py +24 -7
- sky/catalog/kubernetes_catalog.py +5 -1
- sky/client/cli/command.py +180 -77
- sky/client/cli/git.py +549 -0
- sky/client/common.py +1 -1
- sky/client/sdk.py +1 -1
- sky/clouds/gcp.py +1 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{gVXjeFhvtWXyOsx9xYNvM → FUjweqdImyeYhMYFON-Se}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/1043-734e57d2b27dfe5d.js +1 -0
- sky/dashboard/out/_next/static/chunks/4869.bdd42f14b51d1d6f.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-8e0b2055bf5dd499.js +1 -0
- sky/dashboard/out/_next/static/chunks/{9984.b56614f3c4c5961d.js → 9984.2b5e3fa69171bff9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa406155b4223d0d.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-14d404b7dd28502a.js → [job]-c5b357bfd9502fbe.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-6b0575ea521af4f3.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/exceptions.py +5 -0
- sky/execution.py +1 -1
- sky/jobs/client/sdk.py +1 -1
- sky/jobs/server/core.py +14 -0
- sky/provision/kubernetes/utils.py +6 -0
- sky/serve/client/sdk.py +1 -1
- sky/server/common.py +8 -3
- sky/server/rest.py +71 -26
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +2 -0
- sky/task.py +12 -2
- sky/utils/command_runner.py +144 -35
- sky/utils/controller_utils.py +4 -3
- sky/utils/git.py +9 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/schemas.py +15 -1
- {skypilot_nightly-1.0.0.dev20250716.dist-info → skypilot_nightly-1.0.0.dev20250718.dist-info}/METADATA +3 -1
- {skypilot_nightly-1.0.0.dev20250716.dist-info → skypilot_nightly-1.0.0.dev20250718.dist-info}/RECORD +60 -57
- sky/dashboard/out/_next/static/chunks/1043-90a88c46f27b3df5.js +0 -1
- sky/dashboard/out/_next/static/chunks/4869.c139c0124e677fc8.js +0 -16
- sky/dashboard/out/_next/static/chunks/8969-743abf4bc86baf48.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-9096ea50b8e2cf9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/webpack-3fad5d4a0541a02d.js +0 -1
- /sky/dashboard/out/_next/static/{gVXjeFhvtWXyOsx9xYNvM → FUjweqdImyeYhMYFON-Se}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250716.dist-info → skypilot_nightly-1.0.0.dev20250718.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250716.dist-info → skypilot_nightly-1.0.0.dev20250718.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250716.dist-info → skypilot_nightly-1.0.0.dev20250718.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250716.dist-info → skypilot_nightly-1.0.0.dev20250718.dist-info}/top_level.txt +0 -0
sky/utils/command_runner.py
CHANGED
@@ -6,14 +6,17 @@ import pathlib
|
|
6
6
|
import shlex
|
7
7
|
import sys
|
8
8
|
import time
|
9
|
-
from typing import Any, Callable, Iterable, List, Optional, Tuple, Type,
|
9
|
+
from typing import (Any, Callable, Dict, Iterable, List, Optional, Tuple, Type,
|
10
|
+
Union)
|
10
11
|
|
12
|
+
from sky import exceptions
|
11
13
|
from sky import sky_logging
|
12
14
|
from sky.skylet import constants
|
13
15
|
from sky.skylet import log_lib
|
14
16
|
from sky.utils import common_utils
|
15
17
|
from sky.utils import context_utils
|
16
18
|
from sky.utils import control_master_utils
|
19
|
+
from sky.utils import git as git_utils
|
17
20
|
from sky.utils import subprocess_utils
|
18
21
|
from sky.utils import timeline
|
19
22
|
|
@@ -177,6 +180,20 @@ class CommandRunner:
|
|
177
180
|
def node_id(self) -> str:
|
178
181
|
return '-'.join(str(x) for x in self.node)
|
179
182
|
|
183
|
+
def _get_remote_home_dir(self) -> str:
|
184
|
+
# Use `echo ~` to get the remote home directory, instead of pwd or
|
185
|
+
# echo $HOME, because pwd can be `/` when the remote user is root
|
186
|
+
# and $HOME is not always set.
|
187
|
+
rc, remote_home_dir, stderr = self.run('echo ~',
|
188
|
+
require_outputs=True,
|
189
|
+
separate_stderr=True,
|
190
|
+
stream_logs=False)
|
191
|
+
if rc != 0:
|
192
|
+
raise ValueError('Failed to get remote home directory: '
|
193
|
+
f'{remote_home_dir + stderr}')
|
194
|
+
remote_home_dir = remote_home_dir.strip()
|
195
|
+
return remote_home_dir
|
196
|
+
|
180
197
|
def _get_command_to_run(
|
181
198
|
self,
|
182
199
|
cmd: Union[str, List[str]],
|
@@ -228,6 +245,27 @@ class CommandRunner:
|
|
228
245
|
command_str = ' '.join(command)
|
229
246
|
return command_str
|
230
247
|
|
248
|
+
def _get_remote_home_dir_with_retry(
|
249
|
+
self,
|
250
|
+
max_retry: int,
|
251
|
+
get_remote_home_dir: Callable[[], str],
|
252
|
+
) -> str:
|
253
|
+
"""Returns the remote home directory with retry."""
|
254
|
+
backoff = common_utils.Backoff(initial_backoff=1, max_backoff_factor=5)
|
255
|
+
retries_left = max_retry
|
256
|
+
assert retries_left > 0, f'max_retry {max_retry} must be positive.'
|
257
|
+
while retries_left >= 0:
|
258
|
+
try:
|
259
|
+
return get_remote_home_dir()
|
260
|
+
except Exception: # pylint: disable=broad-except
|
261
|
+
if retries_left == 0:
|
262
|
+
raise
|
263
|
+
sleep_time = backoff.current_backoff()
|
264
|
+
logger.warning(f'Failed to get remote home dir '
|
265
|
+
f'- retrying in {sleep_time} seconds.')
|
266
|
+
retries_left -= 1
|
267
|
+
time.sleep(sleep_time)
|
268
|
+
|
231
269
|
def _rsync(
|
232
270
|
self,
|
233
271
|
source: str,
|
@@ -248,23 +286,6 @@ class CommandRunner:
|
|
248
286
|
rsync_command.append(prefix_command)
|
249
287
|
rsync_command += ['rsync', RSYNC_DISPLAY_OPTION]
|
250
288
|
|
251
|
-
def _get_remote_home_dir_with_retry():
|
252
|
-
backoff = common_utils.Backoff(initial_backoff=1,
|
253
|
-
max_backoff_factor=5)
|
254
|
-
retries_left = max_retry
|
255
|
-
assert retries_left > 0, f'max_retry {max_retry} must be positive.'
|
256
|
-
while retries_left >= 0:
|
257
|
-
try:
|
258
|
-
return get_remote_home_dir()
|
259
|
-
except Exception: # pylint: disable=broad-except
|
260
|
-
if retries_left == 0:
|
261
|
-
raise
|
262
|
-
sleep_time = backoff.current_backoff()
|
263
|
-
logger.warning(f'Failed to get remote home dir '
|
264
|
-
f'- retrying in {sleep_time} seconds.')
|
265
|
-
retries_left -= 1
|
266
|
-
time.sleep(sleep_time)
|
267
|
-
|
268
289
|
# --filter
|
269
290
|
# The source is a local path, so we need to resolve it.
|
270
291
|
resolved_source = pathlib.Path(source).expanduser().resolve()
|
@@ -297,7 +318,9 @@ class CommandRunner:
|
|
297
318
|
pathlib.Path(target).expanduser().resolve())
|
298
319
|
else:
|
299
320
|
if target.startswith('~'):
|
300
|
-
remote_home_dir = _get_remote_home_dir_with_retry(
|
321
|
+
remote_home_dir = self._get_remote_home_dir_with_retry(
|
322
|
+
max_retry=max_retry,
|
323
|
+
get_remote_home_dir=get_remote_home_dir)
|
301
324
|
resolved_target = target.replace('~', remote_home_dir)
|
302
325
|
full_source_str = str(resolved_source)
|
303
326
|
if resolved_source.is_dir():
|
@@ -316,7 +339,9 @@ class CommandRunner:
|
|
316
339
|
else:
|
317
340
|
resolved_target = os.path.expanduser(target)
|
318
341
|
if source.startswith('~'):
|
319
|
-
remote_home_dir = _get_remote_home_dir_with_retry(
|
342
|
+
remote_home_dir = self._get_remote_home_dir_with_retry(
|
343
|
+
max_retry=max_retry,
|
344
|
+
get_remote_home_dir=get_remote_home_dir)
|
320
345
|
resolved_source = source.replace('~', remote_home_dir)
|
321
346
|
rsync_command.extend([
|
322
347
|
f'{maybe_dest_prefix}{resolved_source!r}',
|
@@ -451,6 +476,104 @@ class CommandRunner:
|
|
451
476
|
"""
|
452
477
|
raise NotImplementedError
|
453
478
|
|
479
|
+
@timeline.event
|
480
|
+
def git_clone(
|
481
|
+
self,
|
482
|
+
target_dir: str,
|
483
|
+
*,
|
484
|
+
# Advanced options.
|
485
|
+
log_path: str = os.devnull,
|
486
|
+
stream_logs: bool = True,
|
487
|
+
connect_timeout: Optional[int] = None,
|
488
|
+
max_retry: int = 1,
|
489
|
+
envs_and_secrets: Optional[Dict[str, str]] = None,
|
490
|
+
) -> None:
|
491
|
+
"""Clones a Git repository on the remote machine using git_clone.sh.
|
492
|
+
|
493
|
+
Note: Git environment variables (GIT_URL, GIT_BRANCH, GIT_TOKEN, etc.)
|
494
|
+
must be set before calling this function.
|
495
|
+
|
496
|
+
Args:
|
497
|
+
target_dir: Target directory where the repository will be cloned.
|
498
|
+
log_path: Redirect stdout/stderr to the log_path.
|
499
|
+
stream_logs: Stream logs to the stdout/stderr.
|
500
|
+
connect_timeout: timeout in seconds for the connection.
|
501
|
+
max_retry: The maximum number of retries for the rsync command.
|
502
|
+
This value should be non-negative.
|
503
|
+
envs_and_secrets: Environment variables and secrets to be set
|
504
|
+
before running the script.
|
505
|
+
Raises:
|
506
|
+
exceptions.CommandError: git clone command failed.
|
507
|
+
"""
|
508
|
+
# Find the git_clone.sh script path
|
509
|
+
git_clone_script_path = os.path.join(
|
510
|
+
os.path.dirname(os.path.abspath(__file__)), 'git_clone.sh')
|
511
|
+
|
512
|
+
if not os.path.exists(git_clone_script_path):
|
513
|
+
error_msg = f'git_clone.sh {git_clone_script_path} not found'
|
514
|
+
logger.error(error_msg)
|
515
|
+
raise exceptions.CommandError(1, '', error_msg, None)
|
516
|
+
|
517
|
+
# Remote script path (use a unique name to avoid conflicts)
|
518
|
+
script_hash = hashlib.md5(
|
519
|
+
f'{self.node_id}_{target_dir}'.encode()).hexdigest()[:8]
|
520
|
+
remote_script_path = f'/tmp/sky_git_clone_{script_hash}.sh'
|
521
|
+
|
522
|
+
# Step 1: Transfer the script to remote machine using rsync
|
523
|
+
logger.debug(
|
524
|
+
f'Transferring git_clone.sh to {self.node_id}:{remote_script_path}')
|
525
|
+
self.rsync(
|
526
|
+
source=git_clone_script_path,
|
527
|
+
target=remote_script_path,
|
528
|
+
up=True,
|
529
|
+
log_path=log_path,
|
530
|
+
stream_logs=False # Don't spam logs for script transfer
|
531
|
+
)
|
532
|
+
|
533
|
+
# Step 2: Execute the script on remote machine
|
534
|
+
if target_dir.startswith('~'):
|
535
|
+
remote_home_dir = self._get_remote_home_dir_with_retry(
|
536
|
+
max_retry=max_retry,
|
537
|
+
get_remote_home_dir=self._get_remote_home_dir)
|
538
|
+
target_dir = target_dir.replace('~', remote_home_dir)
|
539
|
+
quoted_target_dir = shlex.quote(target_dir)
|
540
|
+
quoted_script_path = shlex.quote(remote_script_path)
|
541
|
+
cmd = ''
|
542
|
+
log_cmd = ''
|
543
|
+
if envs_and_secrets:
|
544
|
+
for key, value in envs_and_secrets.items():
|
545
|
+
value = shlex.quote(value)
|
546
|
+
cmd += f'export {key}={value} && '
|
547
|
+
if (key == git_utils.GIT_TOKEN_ENV_VAR or
|
548
|
+
key == git_utils.GIT_SSH_KEY_ENV_VAR):
|
549
|
+
log_cmd += f'export {key}=******** && '
|
550
|
+
else:
|
551
|
+
log_cmd += f'export {key}={value} && '
|
552
|
+
exec_cmd = (f'bash {quoted_script_path} {quoted_target_dir} '
|
553
|
+
f'&& rm -f {quoted_script_path}')
|
554
|
+
cmd += exec_cmd
|
555
|
+
log_cmd += exec_cmd
|
556
|
+
|
557
|
+
logger.debug(f'Running git clone script on {self.node_id}: {log_cmd}')
|
558
|
+
|
559
|
+
backoff = common_utils.Backoff(initial_backoff=5, max_backoff_factor=5)
|
560
|
+
assert max_retry > 0, f'max_retry {max_retry} must be positive.'
|
561
|
+
while max_retry >= 0:
|
562
|
+
returncode = self.run(cmd,
|
563
|
+
log_path=log_path,
|
564
|
+
stream_logs=stream_logs,
|
565
|
+
connect_timeout=connect_timeout,
|
566
|
+
require_outputs=False)
|
567
|
+
if returncode == 0:
|
568
|
+
break
|
569
|
+
max_retry -= 1
|
570
|
+
time.sleep(backoff.current_backoff())
|
571
|
+
|
572
|
+
if returncode != 0:
|
573
|
+
error_msg = f'Git clone failed on {self.node_id}: {target_dir}'
|
574
|
+
logger.error(error_msg)
|
575
|
+
raise exceptions.CommandError(returncode, log_cmd, error_msg, None)
|
576
|
+
|
454
577
|
|
455
578
|
class SSHCommandRunner(CommandRunner):
|
456
579
|
"""Runner for SSH commands."""
|
@@ -941,20 +1064,6 @@ class KubernetesCommandRunner(CommandRunner):
|
|
941
1064
|
exceptions.CommandError: rsync command failed.
|
942
1065
|
"""
|
943
1066
|
|
944
|
-
def get_remote_home_dir() -> str:
|
945
|
-
# Use `echo ~` to get the remote home directory, instead of pwd or
|
946
|
-
# echo $HOME, because pwd can be `/` when the remote user is root
|
947
|
-
# and $HOME is not always set.
|
948
|
-
rc, remote_home_dir, stderr = self.run('echo ~',
|
949
|
-
require_outputs=True,
|
950
|
-
separate_stderr=True,
|
951
|
-
stream_logs=False)
|
952
|
-
if rc != 0:
|
953
|
-
raise ValueError('Failed to get remote home directory: '
|
954
|
-
f'{remote_home_dir + stderr}')
|
955
|
-
remote_home_dir = remote_home_dir.strip()
|
956
|
-
return remote_home_dir
|
957
|
-
|
958
1067
|
# Build command.
|
959
1068
|
helper_path = shlex.quote(
|
960
1069
|
os.path.join(os.path.abspath(os.path.dirname(__file__)),
|
@@ -980,7 +1089,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
980
1089
|
# rsync with `kubectl` as the rsh command will cause ~/xx parsed as
|
981
1090
|
# /~/xx, so we need to replace ~ with the remote home directory. We
|
982
1091
|
# only need to do this when ~ is at the beginning of the path.
|
983
|
-
get_remote_home_dir=
|
1092
|
+
get_remote_home_dir=self._get_remote_home_dir)
|
984
1093
|
|
985
1094
|
|
986
1095
|
class LocalProcessCommandRunner(CommandRunner):
|
sky/utils/controller_utils.py
CHANGED
@@ -812,7 +812,7 @@ def translate_local_file_mounts_to_two_hop(
|
|
812
812
|
file_mount_id = 0
|
813
813
|
|
814
814
|
file_mounts_to_translate = task.file_mounts or {}
|
815
|
-
if task.workdir is not None:
|
815
|
+
if task.workdir is not None and isinstance(task.workdir, str):
|
816
816
|
file_mounts_to_translate[constants.SKY_REMOTE_WORKDIR] = task.workdir
|
817
817
|
task.workdir = None
|
818
818
|
|
@@ -880,7 +880,8 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
880
880
|
copy_mounts = {}
|
881
881
|
|
882
882
|
has_local_source_paths_file_mounts = bool(copy_mounts)
|
883
|
-
has_local_source_paths_workdir = task.workdir is not None
|
883
|
+
has_local_source_paths_workdir = (task.workdir is not None and
|
884
|
+
isinstance(task.workdir, str))
|
884
885
|
|
885
886
|
msg = None
|
886
887
|
if has_local_source_paths_workdir and has_local_source_paths_file_mounts:
|
@@ -928,7 +929,7 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
928
929
|
|
929
930
|
# Step 1: Translate the workdir to SkyPilot storage.
|
930
931
|
new_storage_mounts = {}
|
931
|
-
if task.workdir is not None:
|
932
|
+
if task.workdir is not None and isinstance(task.workdir, str):
|
932
933
|
workdir = task.workdir
|
933
934
|
task.workdir = None
|
934
935
|
if (constants.SKY_REMOTE_WORKDIR in original_file_mounts or
|
sky/utils/git.py
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
"""Git related constants."""
|
2
|
+
|
3
|
+
GIT_TOKEN_ENV_VAR = 'GIT_TOKEN'
|
4
|
+
GIT_SSH_KEY_PATH_ENV_VAR = 'GIT_SSH_KEY_PATH'
|
5
|
+
GIT_SSH_KEY_ENV_VAR = 'GIT_SSH_KEY'
|
6
|
+
GIT_URL_ENV_VAR = 'GIT_URL'
|
7
|
+
GIT_COMMIT_HASH_ENV_VAR = 'GIT_COMMIT_HASH'
|
8
|
+
GIT_BRANCH_ENV_VAR = 'GIT_BRANCH'
|
9
|
+
GIT_TAG_ENV_VAR = 'GIT_TAG'
|