skypilot-nightly 1.0.0.dev20250914__py3-none-any.whl → 1.0.0.dev20250916__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +4 -4
- sky/catalog/data_fetchers/fetch_seeweb.py +2 -2
- sky/client/sdk.py +6 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/5339.4a881570243431a5.js +51 -0
- sky/dashboard/out/_next/static/chunks/{6990-11c8e9b982e8ffec.js → 6990-f6818c84ed8f1c86.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-e2e3d2d3de7d43e5.js → webpack-05f82d90d6fd7f82.js} +1 -1
- sky/dashboard/out/_next/static/{5iak5kYp9a9ezANCb74L8 → y8s7LlyyfhMzpzCkxuD2r}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/provision/docker_utils.py +44 -1
- sky/provision/instance_setup.py +15 -1
- sky/provision/kubernetes/instance.py +16 -2
- sky/provision/lambda_cloud/instance.py +12 -11
- sky/server/common.py +13 -0
- sky/server/constants.py +3 -0
- sky/server/requests/executor.py +20 -6
- sky/server/server.py +10 -5
- sky/skypilot_config.py +10 -3
- sky/utils/command_runner.py +21 -11
- {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/METADATA +34 -34
- {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/RECORD +42 -42
- sky/dashboard/out/_next/static/chunks/5339.c033b29835da0f35.js +0 -51
- /sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-7528cc0ef8c522c5.js} +0 -0
- /sky/dashboard/out/_next/static/{5iak5kYp9a9ezANCb74L8 → y8s7LlyyfhMzpzCkxuD2r}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-05f82d90d6fd7f82.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-7528cc0ef8c522c5.js" defer=""></script><script src="/dashboard/_next/static/y8s7LlyyfhMzpzCkxuD2r/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/y8s7LlyyfhMzpzCkxuD2r/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"y8s7LlyyfhMzpzCkxuD2r","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/provision/docker_utils.py
CHANGED
|
@@ -32,6 +32,30 @@ DOCKER_SOCKET_NOT_READY_STR = ('Is the docker daemon running?')
|
|
|
32
32
|
|
|
33
33
|
_DOCKER_SOCKET_WAIT_TIMEOUT_SECONDS = 30
|
|
34
34
|
|
|
35
|
+
# Install AWS CLI v2 (not v1 from pip) as it's required for ECR authentication
|
|
36
|
+
# AWS CLI v2 is installed as a standalone binary, not a Python package. See:
|
|
37
|
+
# https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
|
|
38
|
+
INSTALL_AWS_CLI_CMD = (
|
|
39
|
+
'which aws || ((command -v unzip >/dev/null 2>&1 || '
|
|
40
|
+
'(sudo apt-get update && sudo apt-get install -y unzip)) && '
|
|
41
|
+
'curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" '
|
|
42
|
+
'-o "/tmp/awscliv2.zip" && '
|
|
43
|
+
'unzip -q /tmp/awscliv2.zip -d /tmp && sudo /tmp/aws/install '
|
|
44
|
+
'&& rm -rf /tmp/awscliv2.zip /tmp/aws)')
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _extract_region_from_ecr_server(server: str) -> str:
|
|
48
|
+
"""Extract AWS region from ECR server URL.
|
|
49
|
+
|
|
50
|
+
ECR server format: <account-id>.dkr.ecr.<region>.amazonaws.com
|
|
51
|
+
Returns the region part from the URL.
|
|
52
|
+
"""
|
|
53
|
+
# Split: ['<account-id>', 'dkr', 'ecr', '<region>', 'amazonaws', 'com']
|
|
54
|
+
parts = server.split('.')
|
|
55
|
+
if len(parts) >= 6 and parts[1] == 'dkr' and parts[2] == 'ecr':
|
|
56
|
+
return parts[3]
|
|
57
|
+
raise ValueError(f'Invalid ECR server format: {server}')
|
|
58
|
+
|
|
35
59
|
|
|
36
60
|
@dataclasses.dataclass
|
|
37
61
|
class DockerLoginConfig:
|
|
@@ -236,9 +260,9 @@ class DockerInitializer:
|
|
|
236
260
|
|
|
237
261
|
# SkyPilot: Docker login if user specified a private docker registry.
|
|
238
262
|
if 'docker_login_config' in self.docker_config:
|
|
239
|
-
# TODO(tian): Maybe support a command to get the login password?
|
|
240
263
|
docker_login_config = DockerLoginConfig(
|
|
241
264
|
**self.docker_config['docker_login_config'])
|
|
265
|
+
|
|
242
266
|
if docker_login_config.password:
|
|
243
267
|
# Password is allowed to be empty, in that case, we will not run
|
|
244
268
|
# the login command, and assume that the image pulling is
|
|
@@ -249,6 +273,25 @@ class DockerInitializer:
|
|
|
249
273
|
f'--password {shlex.quote(docker_login_config.password)} '
|
|
250
274
|
f'{shlex.quote(docker_login_config.server)}',
|
|
251
275
|
wait_for_docker_daemon=True)
|
|
276
|
+
elif (docker_login_config.server.endswith('.amazonaws.com') and
|
|
277
|
+
'.dkr.ecr.' in docker_login_config.server):
|
|
278
|
+
# AWS ECR: Use aws ecr get-login-password for authentication
|
|
279
|
+
# ECR format: <account-id>.dkr.ecr.<region>.amazonaws.com
|
|
280
|
+
# This command uses the IAM credentials from the EC2 instance
|
|
281
|
+
# Ref: https://docs.aws.amazon.com/AmazonECR/latest/userguide/registry_auth.html # pylint: disable=line-too-long
|
|
282
|
+
region = _extract_region_from_ecr_server(
|
|
283
|
+
docker_login_config.server)
|
|
284
|
+
|
|
285
|
+
# AWS CLI is not pre-installed on AWS instances, unlike gcloud
|
|
286
|
+
# on GCP instances, so we need to install it first
|
|
287
|
+
self._run(INSTALL_AWS_CLI_CMD, wait_for_docker_daemon=False)
|
|
288
|
+
|
|
289
|
+
self._run(
|
|
290
|
+
f'aws ecr get-login-password --region {region} | '
|
|
291
|
+
f'{self.docker_cmd} login --username AWS '
|
|
292
|
+
f'--password-stdin '
|
|
293
|
+
f'{shlex.quote(docker_login_config.server)}',
|
|
294
|
+
wait_for_docker_daemon=True)
|
|
252
295
|
elif docker_login_config.server.endswith('-docker.pkg.dev'):
|
|
253
296
|
# Docker image server is on GCR, we need to do additional setup
|
|
254
297
|
# to pull the image.
|
sky/provision/instance_setup.py
CHANGED
|
@@ -136,6 +136,20 @@ def _hint_worker_log_path(cluster_name: str, cluster_info: common.ClusterInfo,
|
|
|
136
136
|
logger.info(f'Logs of worker nodes can be found at: {worker_log_path}')
|
|
137
137
|
|
|
138
138
|
|
|
139
|
+
class SSHThreadPoolExecutor(futures.ThreadPoolExecutor):
|
|
140
|
+
"""ThreadPoolExecutor that kills children processes on exit."""
|
|
141
|
+
|
|
142
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
143
|
+
# ssh command runner eventually calls
|
|
144
|
+
# log_lib.run_with_log, which will spawn
|
|
145
|
+
# subprocesses. If we are exiting the context
|
|
146
|
+
# we need to kill the children processes
|
|
147
|
+
# to avoid leakage.
|
|
148
|
+
subprocess_utils.kill_children_processes()
|
|
149
|
+
self.shutdown()
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
|
|
139
153
|
def _parallel_ssh_with_cache(func,
|
|
140
154
|
cluster_name: str,
|
|
141
155
|
stage_name: str,
|
|
@@ -148,7 +162,7 @@ def _parallel_ssh_with_cache(func,
|
|
|
148
162
|
# as 32 is too large for some machines.
|
|
149
163
|
max_workers = subprocess_utils.get_parallel_threads(
|
|
150
164
|
cluster_info.provider_name)
|
|
151
|
-
with
|
|
165
|
+
with SSHThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
152
166
|
results = []
|
|
153
167
|
runners = provision.get_command_runners(cluster_info.provider_name,
|
|
154
168
|
cluster_info, **ssh_credentials)
|
|
@@ -35,6 +35,9 @@ _TIMEOUT_FOR_POD_TERMINATION = 60 # 1 minutes
|
|
|
35
35
|
_MAX_RETRIES = 3
|
|
36
36
|
_NUM_THREADS = subprocess_utils.get_parallel_threads('kubernetes')
|
|
37
37
|
|
|
38
|
+
# Pattern to extract SSH user from command output, handling MOTD contamination
|
|
39
|
+
_SSH_USER_PATTERN = re.compile(r'SKYPILOT_SSH_USER: ([^\s\n]+)')
|
|
40
|
+
|
|
38
41
|
logger = sky_logging.init_logger(__name__)
|
|
39
42
|
|
|
40
43
|
|
|
@@ -1276,7 +1279,11 @@ def get_cluster_info(
|
|
|
1276
1279
|
assert cpu_request is not None, 'cpu_request should not be None'
|
|
1277
1280
|
|
|
1278
1281
|
ssh_user = 'sky'
|
|
1279
|
-
|
|
1282
|
+
# Use pattern matching to extract SSH user, handling MOTD contamination.
|
|
1283
|
+
# Some container images (like CUDA-Q) print MOTD when login shells start,
|
|
1284
|
+
# which can contaminate command output. We use a unique pattern to extract
|
|
1285
|
+
# the actual username reliably.
|
|
1286
|
+
get_k8s_ssh_user_cmd = 'echo "SKYPILOT_SSH_USER: $(whoami)"'
|
|
1280
1287
|
assert head_pod_name is not None
|
|
1281
1288
|
runner = command_runner.KubernetesCommandRunner(
|
|
1282
1289
|
((namespace, context), head_pod_name))
|
|
@@ -1286,7 +1293,14 @@ def get_cluster_info(
|
|
|
1286
1293
|
stream_logs=False)
|
|
1287
1294
|
_raise_command_running_error('get ssh user', get_k8s_ssh_user_cmd,
|
|
1288
1295
|
head_pod_name, rc, stdout + stderr)
|
|
1289
|
-
|
|
1296
|
+
|
|
1297
|
+
# Extract SSH user using pattern matching
|
|
1298
|
+
ssh_user_match = _SSH_USER_PATTERN.search(stdout)
|
|
1299
|
+
if ssh_user_match:
|
|
1300
|
+
ssh_user = ssh_user_match.group(1)
|
|
1301
|
+
else:
|
|
1302
|
+
raise ValueError('Failed to find SSH user identifier: '
|
|
1303
|
+
f'{stdout + stderr}')
|
|
1290
1304
|
logger.debug(
|
|
1291
1305
|
f'Using ssh user {ssh_user} for cluster {cluster_name_on_cloud}')
|
|
1292
1306
|
|
|
@@ -106,34 +106,35 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
|
106
106
|
created_instance_ids = []
|
|
107
107
|
remote_ssh_key_name = config.authentication_config['remote_key_name']
|
|
108
108
|
|
|
109
|
-
def
|
|
109
|
+
def launch_node(node_type: str) -> str:
|
|
110
110
|
try:
|
|
111
111
|
instance_ids = lambda_client.create_instances(
|
|
112
112
|
instance_type=config.node_config['InstanceType'],
|
|
113
113
|
region=region,
|
|
114
114
|
name=f'{cluster_name_on_cloud}-{node_type}',
|
|
115
|
-
|
|
115
|
+
# Quantity cannot actually be greater than 1; see:
|
|
116
|
+
# https://github.com/skypilot-org/skypilot/issues/7084
|
|
117
|
+
quantity=1,
|
|
116
118
|
ssh_key_name=remote_ssh_key_name,
|
|
117
119
|
)
|
|
118
|
-
logger.info(f'Launched {
|
|
119
|
-
f'
|
|
120
|
-
return instance_ids
|
|
120
|
+
logger.info(f'Launched {node_type} node, '
|
|
121
|
+
f'instance_id: {instance_ids[0]}')
|
|
122
|
+
return instance_ids[0]
|
|
121
123
|
except Exception as e:
|
|
122
124
|
logger.warning(f'run_instances error: {e}')
|
|
123
125
|
raise
|
|
124
126
|
|
|
125
127
|
if head_instance_id is None:
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
created_instance_ids.append(instance_ids[0])
|
|
129
|
-
head_instance_id = instance_ids[0]
|
|
128
|
+
head_instance_id = launch_node('head')
|
|
129
|
+
created_instance_ids.append(head_instance_id)
|
|
130
130
|
|
|
131
131
|
assert head_instance_id is not None, 'head_instance_id should not be None'
|
|
132
132
|
|
|
133
133
|
worker_node_count = to_start_count - 1
|
|
134
134
|
if worker_node_count > 0:
|
|
135
|
-
|
|
136
|
-
|
|
135
|
+
for _ in range(worker_node_count):
|
|
136
|
+
worker_instance_id = launch_node('worker')
|
|
137
|
+
created_instance_ids.append(worker_instance_id)
|
|
137
138
|
|
|
138
139
|
while True:
|
|
139
140
|
instances = _filter_instances(cluster_name_on_cloud, ['active'])
|
sky/server/common.py
CHANGED
|
@@ -515,6 +515,19 @@ def get_request_id(response: 'requests.Response') -> RequestId[T]:
|
|
|
515
515
|
return RequestId[T](request_id)
|
|
516
516
|
|
|
517
517
|
|
|
518
|
+
def get_stream_request_id(
|
|
519
|
+
response: 'requests.Response') -> Optional[RequestId[T]]:
|
|
520
|
+
"""This is same as the above function, but just for `sdk.stream_and_get.
|
|
521
|
+
We do this because `/api/stream` may choose the latest request id, and
|
|
522
|
+
we need to keep track of that information. Request id in this case can
|
|
523
|
+
be None."""
|
|
524
|
+
handle_request_error(response)
|
|
525
|
+
request_id = response.headers.get(server_constants.STREAM_REQUEST_HEADER)
|
|
526
|
+
if request_id is not None:
|
|
527
|
+
return RequestId[T](request_id)
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
|
|
518
531
|
def _start_api_server(deploy: bool = False,
|
|
519
532
|
host: str = '127.0.0.1',
|
|
520
533
|
foreground: bool = False,
|
sky/server/constants.py
CHANGED
|
@@ -61,3 +61,6 @@ DASHBOARD_DIR = os.path.join(os.path.dirname(__file__), '..', 'dashboard',
|
|
|
61
61
|
|
|
62
62
|
# The interval (seconds) for the event to be restarted in the background.
|
|
63
63
|
DAEMON_RESTART_INTERVAL_SECONDS = 20
|
|
64
|
+
|
|
65
|
+
# Cookie header for stream request id.
|
|
66
|
+
STREAM_REQUEST_HEADER = 'X-SkyPilot-Stream-Request-ID'
|
sky/server/requests/executor.py
CHANGED
|
@@ -282,8 +282,8 @@ def _get_queue(schedule_type: api_requests.ScheduleType) -> RequestQueue:
|
|
|
282
282
|
|
|
283
283
|
@contextlib.contextmanager
|
|
284
284
|
def override_request_env_and_config(
|
|
285
|
-
request_body: payloads.RequestBody,
|
|
286
|
-
|
|
285
|
+
request_body: payloads.RequestBody, request_id: str,
|
|
286
|
+
request_name: str) -> Generator[None, None, None]:
|
|
287
287
|
"""Override the environment and SkyPilot config for a request."""
|
|
288
288
|
original_env = os.environ.copy()
|
|
289
289
|
try:
|
|
@@ -319,9 +319,22 @@ def override_request_env_and_config(
|
|
|
319
319
|
with skypilot_config.override_skypilot_config(
|
|
320
320
|
request_body.override_skypilot_config,
|
|
321
321
|
request_body.override_skypilot_config_path):
|
|
322
|
-
#
|
|
323
|
-
#
|
|
324
|
-
|
|
322
|
+
# Skip permission check for sky.workspaces.get request
|
|
323
|
+
# as it is used to determine which workspaces the user
|
|
324
|
+
# has access to.
|
|
325
|
+
if request_name != 'sky.workspaces.get':
|
|
326
|
+
try:
|
|
327
|
+
# Reject requests that the user does not have permission
|
|
328
|
+
# to access.
|
|
329
|
+
workspaces_core.reject_request_for_unauthorized_workspace(
|
|
330
|
+
user)
|
|
331
|
+
except exceptions.PermissionDeniedError as e:
|
|
332
|
+
logger.debug(
|
|
333
|
+
f'{request_id} permission denied to workspace: '
|
|
334
|
+
f'{skypilot_config.get_active_workspace()}: {e}')
|
|
335
|
+
raise e
|
|
336
|
+
logger.debug(
|
|
337
|
+
f'{request_id} permission granted to {request_name} request')
|
|
325
338
|
yield
|
|
326
339
|
finally:
|
|
327
340
|
# We need to call the save_timeline() since atexit will not be
|
|
@@ -402,7 +415,8 @@ def _request_execution_wrapper(request_id: str,
|
|
|
402
415
|
# captured in the log file.
|
|
403
416
|
try:
|
|
404
417
|
with sky_logging.add_debug_log_handler(request_id), \
|
|
405
|
-
override_request_env_and_config(
|
|
418
|
+
override_request_env_and_config(
|
|
419
|
+
request_body, request_id, request_name), \
|
|
406
420
|
tempstore.tempdir():
|
|
407
421
|
if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
|
|
408
422
|
config = skypilot_config.to_dict()
|
sky/server/server.py
CHANGED
|
@@ -1571,6 +1571,15 @@ async def stream(
|
|
|
1571
1571
|
detail=f'Log path {log_path!r} does not exist')
|
|
1572
1572
|
|
|
1573
1573
|
log_path_to_stream = resolved_log_path
|
|
1574
|
+
|
|
1575
|
+
headers = {
|
|
1576
|
+
'Cache-Control': 'no-cache, no-transform',
|
|
1577
|
+
'X-Accel-Buffering': 'no',
|
|
1578
|
+
'Transfer-Encoding': 'chunked'
|
|
1579
|
+
}
|
|
1580
|
+
if request_id is not None:
|
|
1581
|
+
headers[server_constants.STREAM_REQUEST_HEADER] = request_id
|
|
1582
|
+
|
|
1574
1583
|
return fastapi.responses.StreamingResponse(
|
|
1575
1584
|
content=stream_utils.log_streamer(request_id,
|
|
1576
1585
|
log_path_to_stream,
|
|
@@ -1578,11 +1587,7 @@ async def stream(
|
|
|
1578
1587
|
tail=tail,
|
|
1579
1588
|
follow=follow),
|
|
1580
1589
|
media_type='text/plain',
|
|
1581
|
-
headers=
|
|
1582
|
-
'Cache-Control': 'no-cache, no-transform',
|
|
1583
|
-
'X-Accel-Buffering': 'no',
|
|
1584
|
-
'Transfer-Encoding': 'chunked'
|
|
1585
|
-
},
|
|
1590
|
+
headers=headers,
|
|
1586
1591
|
)
|
|
1587
1592
|
|
|
1588
1593
|
|
sky/skypilot_config.py
CHANGED
|
@@ -415,10 +415,17 @@ def local_active_workspace_ctx(workspace: str) -> Iterator[None]:
|
|
|
415
415
|
def get_active_workspace(force_user_workspace: bool = False) -> str:
|
|
416
416
|
context_workspace = getattr(_active_workspace_context, 'workspace', None)
|
|
417
417
|
if not force_user_workspace and context_workspace is not None:
|
|
418
|
-
logger.debug(f'
|
|
418
|
+
logger.debug(f'Got context workspace: {context_workspace}')
|
|
419
419
|
return context_workspace
|
|
420
|
-
|
|
421
|
-
|
|
420
|
+
active_workspace = get_nested(keys=('active_workspace',),
|
|
421
|
+
default_value=None)
|
|
422
|
+
if active_workspace is None:
|
|
423
|
+
logger.debug(f'No active workspace found, using default workspace: '
|
|
424
|
+
f'{constants.SKYPILOT_DEFAULT_WORKSPACE}')
|
|
425
|
+
active_workspace = constants.SKYPILOT_DEFAULT_WORKSPACE
|
|
426
|
+
else:
|
|
427
|
+
logger.debug(f'Got active workspace: {active_workspace}')
|
|
428
|
+
return active_workspace
|
|
422
429
|
|
|
423
430
|
|
|
424
431
|
def set_nested(keys: Tuple[str, ...], value: Any) -> Dict[str, Any]:
|
sky/utils/command_runner.py
CHANGED
|
@@ -3,6 +3,7 @@ import enum
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import os
|
|
5
5
|
import pathlib
|
|
6
|
+
import re
|
|
6
7
|
import shlex
|
|
7
8
|
import sys
|
|
8
9
|
import time
|
|
@@ -22,6 +23,9 @@ from sky.utils import timeline
|
|
|
22
23
|
|
|
23
24
|
logger = sky_logging.init_logger(__name__)
|
|
24
25
|
|
|
26
|
+
# Pattern to extract home directory from command output
|
|
27
|
+
_HOME_DIR_PATTERN = re.compile(r'SKYPILOT_HOME_DIR: ([^\s\n]+)')
|
|
28
|
+
|
|
25
29
|
# Rsync options
|
|
26
30
|
# TODO(zhwu): This will print a per-file progress bar (with -P),
|
|
27
31
|
# shooting a lot of messages to the output. --info=progress2 is used
|
|
@@ -183,17 +187,25 @@ class CommandRunner:
|
|
|
183
187
|
return '-'.join(str(x) for x in self.node)
|
|
184
188
|
|
|
185
189
|
def _get_remote_home_dir(self) -> str:
|
|
186
|
-
# Use
|
|
187
|
-
#
|
|
188
|
-
#
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
190
|
+
# Use pattern matching to extract home directory.
|
|
191
|
+
# Some container images print MOTD when login shells start, which can
|
|
192
|
+
# contaminate command output. We use a unique pattern to extract the
|
|
193
|
+
# actual home directory reliably.
|
|
194
|
+
rc, output, stderr = self.run('echo "SKYPILOT_HOME_DIR: $(echo ~)"',
|
|
195
|
+
require_outputs=True,
|
|
196
|
+
separate_stderr=True,
|
|
197
|
+
stream_logs=False)
|
|
193
198
|
if rc != 0:
|
|
194
199
|
raise ValueError('Failed to get remote home directory: '
|
|
195
|
-
f'{
|
|
196
|
-
|
|
200
|
+
f'{output + stderr}')
|
|
201
|
+
|
|
202
|
+
# Extract home directory using pattern matching
|
|
203
|
+
home_dir_match = _HOME_DIR_PATTERN.search(output)
|
|
204
|
+
if home_dir_match:
|
|
205
|
+
remote_home_dir = home_dir_match.group(1)
|
|
206
|
+
else:
|
|
207
|
+
raise ValueError('Failed to find remote home directory identifier: '
|
|
208
|
+
f'{output + stderr}')
|
|
197
209
|
return remote_home_dir
|
|
198
210
|
|
|
199
211
|
def _get_command_to_run(
|
|
@@ -414,7 +426,6 @@ class CommandRunner:
|
|
|
414
426
|
SkyPilot but we still want to get rid of some warning messages,
|
|
415
427
|
such as SSH warnings.
|
|
416
428
|
|
|
417
|
-
|
|
418
429
|
Returns:
|
|
419
430
|
returncode
|
|
420
431
|
or
|
|
@@ -991,7 +1002,6 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
991
1002
|
SkyPilot but we still want to get rid of some warning messages,
|
|
992
1003
|
such as SSH warnings.
|
|
993
1004
|
|
|
994
|
-
|
|
995
1005
|
Returns:
|
|
996
1006
|
returncode
|
|
997
1007
|
or
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: skypilot-nightly
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev20250916
|
|
4
4
|
Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
|
|
5
5
|
Author: SkyPilot Team
|
|
6
6
|
License: Apache 2.0
|
|
@@ -151,49 +151,49 @@ Requires-Dist: grpcio>=1.63.0; extra == "server"
|
|
|
151
151
|
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
|
|
152
152
|
Requires-Dist: aiosqlite; extra == "server"
|
|
153
153
|
Provides-Extra: all
|
|
154
|
-
Requires-Dist: sqlalchemy_adapter; extra == "all"
|
|
155
|
-
Requires-Dist: azure-core>=1.31.0; extra == "all"
|
|
156
|
-
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
|
157
|
-
Requires-Dist: pydo>=0.3.0; extra == "all"
|
|
158
|
-
Requires-Dist: docker; extra == "all"
|
|
159
|
-
Requires-Dist: azure-core>=1.24.0; extra == "all"
|
|
160
|
-
Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
|
|
161
|
-
Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
|
|
162
154
|
Requires-Dist: boto3>=1.26.1; extra == "all"
|
|
163
|
-
Requires-Dist: ibm-cloud-sdk-core; extra == "all"
|
|
164
|
-
Requires-Dist: anyio; extra == "all"
|
|
165
|
-
Requires-Dist: oci; extra == "all"
|
|
166
155
|
Requires-Dist: colorama<0.4.5; extra == "all"
|
|
167
|
-
Requires-Dist:
|
|
156
|
+
Requires-Dist: msgraph-sdk; extra == "all"
|
|
157
|
+
Requires-Dist: pyjwt; extra == "all"
|
|
158
|
+
Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
|
|
159
|
+
Requires-Dist: passlib; extra == "all"
|
|
160
|
+
Requires-Dist: ecsapi>=0.2.0; extra == "all"
|
|
161
|
+
Requires-Dist: azure-common; extra == "all"
|
|
162
|
+
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
163
|
+
Requires-Dist: ibm-vpc; extra == "all"
|
|
168
164
|
Requires-Dist: cudo-compute>=0.1.10; extra == "all"
|
|
165
|
+
Requires-Dist: azure-core>=1.24.0; extra == "all"
|
|
166
|
+
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
167
|
+
Requires-Dist: azure-identity>=1.19.0; extra == "all"
|
|
168
|
+
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
169
|
+
Requires-Dist: nebius>=0.2.47; extra == "all"
|
|
170
|
+
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
171
|
+
Requires-Dist: msrestazure; extra == "all"
|
|
172
|
+
Requires-Dist: oci; extra == "all"
|
|
173
|
+
Requires-Dist: azure-core>=1.31.0; extra == "all"
|
|
174
|
+
Requires-Dist: ray[default]>=2.6.1; extra == "all"
|
|
169
175
|
Requires-Dist: google-cloud-storage; extra == "all"
|
|
170
|
-
Requires-Dist:
|
|
171
|
-
Requires-Dist:
|
|
176
|
+
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
|
177
|
+
Requires-Dist: ibm-cloud-sdk-core; extra == "all"
|
|
178
|
+
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
179
|
+
Requires-Dist: aiosqlite; extra == "all"
|
|
172
180
|
Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
|
|
173
|
-
Requires-Dist:
|
|
174
|
-
Requires-Dist:
|
|
181
|
+
Requires-Dist: websockets; extra == "all"
|
|
182
|
+
Requires-Dist: sqlalchemy_adapter; extra == "all"
|
|
175
183
|
Requires-Dist: python-dateutil; extra == "all"
|
|
176
|
-
Requires-Dist:
|
|
177
|
-
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
178
|
-
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
179
|
-
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
184
|
+
Requires-Dist: azure-cli>=2.65.0; extra == "all"
|
|
180
185
|
Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
|
|
181
|
-
Requires-Dist:
|
|
182
|
-
Requires-Dist:
|
|
183
|
-
Requires-Dist:
|
|
184
|
-
Requires-Dist: aiosqlite; extra == "all"
|
|
185
|
-
Requires-Dist: msgraph-sdk; extra == "all"
|
|
186
|
-
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
186
|
+
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
187
|
+
Requires-Dist: docker; extra == "all"
|
|
188
|
+
Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
|
|
187
189
|
Requires-Dist: aiohttp; extra == "all"
|
|
188
|
-
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
|
189
|
-
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
190
190
|
Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
|
|
191
|
-
Requires-Dist:
|
|
192
|
-
Requires-Dist:
|
|
193
|
-
Requires-Dist: passlib; extra == "all"
|
|
194
|
-
Requires-Dist: nebius>=0.2.47; extra == "all"
|
|
191
|
+
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
|
192
|
+
Requires-Dist: pydo>=0.3.0; extra == "all"
|
|
195
193
|
Requires-Dist: runpod>=1.6.1; extra == "all"
|
|
196
|
-
Requires-Dist:
|
|
194
|
+
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
195
|
+
Requires-Dist: anyio; extra == "all"
|
|
196
|
+
Requires-Dist: casbin; extra == "all"
|
|
197
197
|
Dynamic: author
|
|
198
198
|
Dynamic: classifier
|
|
199
199
|
Dynamic: description
|