skypilot-nightly 1.0.0.dev20250914__py3-none-any.whl → 1.0.0.dev20250916__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (43) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/cloud_vm_ray_backend.py +4 -4
  3. sky/catalog/data_fetchers/fetch_seeweb.py +2 -2
  4. sky/client/sdk.py +6 -0
  5. sky/dashboard/out/404.html +1 -1
  6. sky/dashboard/out/_next/static/chunks/5339.4a881570243431a5.js +51 -0
  7. sky/dashboard/out/_next/static/chunks/{6990-11c8e9b982e8ffec.js → 6990-f6818c84ed8f1c86.js} +1 -1
  8. sky/dashboard/out/_next/static/chunks/{webpack-e2e3d2d3de7d43e5.js → webpack-05f82d90d6fd7f82.js} +1 -1
  9. sky/dashboard/out/_next/static/{5iak5kYp9a9ezANCb74L8 → y8s7LlyyfhMzpzCkxuD2r}/_buildManifest.js +1 -1
  10. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  11. sky/dashboard/out/clusters/[cluster].html +1 -1
  12. sky/dashboard/out/clusters.html +1 -1
  13. sky/dashboard/out/config.html +1 -1
  14. sky/dashboard/out/index.html +1 -1
  15. sky/dashboard/out/infra/[context].html +1 -1
  16. sky/dashboard/out/infra.html +1 -1
  17. sky/dashboard/out/jobs/[job].html +1 -1
  18. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  19. sky/dashboard/out/jobs.html +1 -1
  20. sky/dashboard/out/users.html +1 -1
  21. sky/dashboard/out/volumes.html +1 -1
  22. sky/dashboard/out/workspace/new.html +1 -1
  23. sky/dashboard/out/workspaces/[name].html +1 -1
  24. sky/dashboard/out/workspaces.html +1 -1
  25. sky/provision/docker_utils.py +44 -1
  26. sky/provision/instance_setup.py +15 -1
  27. sky/provision/kubernetes/instance.py +16 -2
  28. sky/provision/lambda_cloud/instance.py +12 -11
  29. sky/server/common.py +13 -0
  30. sky/server/constants.py +3 -0
  31. sky/server/requests/executor.py +20 -6
  32. sky/server/server.py +10 -5
  33. sky/skypilot_config.py +10 -3
  34. sky/utils/command_runner.py +21 -11
  35. {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/METADATA +34 -34
  36. {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/RECORD +42 -42
  37. sky/dashboard/out/_next/static/chunks/5339.c033b29835da0f35.js +0 -51
  38. /sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-7528cc0ef8c522c5.js} +0 -0
  39. /sky/dashboard/out/_next/static/{5iak5kYp9a9ezANCb74L8 → y8s7LlyyfhMzpzCkxuD2r}/_ssgManifest.js +0 -0
  40. {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/WHEEL +0 -0
  41. {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/entry_points.txt +0 -0
  42. {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/licenses/LICENSE +0 -0
  43. {skypilot_nightly-1.0.0.dev20250914.dist-info → skypilot_nightly-1.0.0.dev20250916.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-e2e3d2d3de7d43e5.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-7598c33a746cdc91.js" defer=""></script><script src="/dashboard/_next/static/5iak5kYp9a9ezANCb74L8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/5iak5kYp9a9ezANCb74L8/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"5iak5kYp9a9ezANCb74L8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-05f82d90d6fd7f82.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-7528cc0ef8c522c5.js" defer=""></script><script src="/dashboard/_next/static/y8s7LlyyfhMzpzCkxuD2r/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/y8s7LlyyfhMzpzCkxuD2r/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"y8s7LlyyfhMzpzCkxuD2r","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -32,6 +32,30 @@ DOCKER_SOCKET_NOT_READY_STR = ('Is the docker daemon running?')
32
32
 
33
33
  _DOCKER_SOCKET_WAIT_TIMEOUT_SECONDS = 30
34
34
 
35
+ # Install AWS CLI v2 (not v1 from pip) as it's required for ECR authentication
36
+ # AWS CLI v2 is installed as a standalone binary, not a Python package. See:
37
+ # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
38
+ INSTALL_AWS_CLI_CMD = (
39
+ 'which aws || ((command -v unzip >/dev/null 2>&1 || '
40
+ '(sudo apt-get update && sudo apt-get install -y unzip)) && '
41
+ 'curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" '
42
+ '-o "/tmp/awscliv2.zip" && '
43
+ 'unzip -q /tmp/awscliv2.zip -d /tmp && sudo /tmp/aws/install '
44
+ '&& rm -rf /tmp/awscliv2.zip /tmp/aws)')
45
+
46
+
47
+ def _extract_region_from_ecr_server(server: str) -> str:
48
+ """Extract AWS region from ECR server URL.
49
+
50
+ ECR server format: <account-id>.dkr.ecr.<region>.amazonaws.com
51
+ Returns the region part from the URL.
52
+ """
53
+ # Split: ['<account-id>', 'dkr', 'ecr', '<region>', 'amazonaws', 'com']
54
+ parts = server.split('.')
55
+ if len(parts) >= 6 and parts[1] == 'dkr' and parts[2] == 'ecr':
56
+ return parts[3]
57
+ raise ValueError(f'Invalid ECR server format: {server}')
58
+
35
59
 
36
60
  @dataclasses.dataclass
37
61
  class DockerLoginConfig:
@@ -236,9 +260,9 @@ class DockerInitializer:
236
260
 
237
261
  # SkyPilot: Docker login if user specified a private docker registry.
238
262
  if 'docker_login_config' in self.docker_config:
239
- # TODO(tian): Maybe support a command to get the login password?
240
263
  docker_login_config = DockerLoginConfig(
241
264
  **self.docker_config['docker_login_config'])
265
+
242
266
  if docker_login_config.password:
243
267
  # Password is allowed to be empty, in that case, we will not run
244
268
  # the login command, and assume that the image pulling is
@@ -249,6 +273,25 @@ class DockerInitializer:
249
273
  f'--password {shlex.quote(docker_login_config.password)} '
250
274
  f'{shlex.quote(docker_login_config.server)}',
251
275
  wait_for_docker_daemon=True)
276
+ elif (docker_login_config.server.endswith('.amazonaws.com') and
277
+ '.dkr.ecr.' in docker_login_config.server):
278
+ # AWS ECR: Use aws ecr get-login-password for authentication
279
+ # ECR format: <account-id>.dkr.ecr.<region>.amazonaws.com
280
+ # This command uses the IAM credentials from the EC2 instance
281
+ # Ref: https://docs.aws.amazon.com/AmazonECR/latest/userguide/registry_auth.html # pylint: disable=line-too-long
282
+ region = _extract_region_from_ecr_server(
283
+ docker_login_config.server)
284
+
285
+ # AWS CLI is not pre-installed on AWS instances, unlike gcloud
286
+ # on GCP instances, so we need to install it first
287
+ self._run(INSTALL_AWS_CLI_CMD, wait_for_docker_daemon=False)
288
+
289
+ self._run(
290
+ f'aws ecr get-login-password --region {region} | '
291
+ f'{self.docker_cmd} login --username AWS '
292
+ f'--password-stdin '
293
+ f'{shlex.quote(docker_login_config.server)}',
294
+ wait_for_docker_daemon=True)
252
295
  elif docker_login_config.server.endswith('-docker.pkg.dev'):
253
296
  # Docker image server is on GCR, we need to do additional setup
254
297
  # to pull the image.
@@ -136,6 +136,20 @@ def _hint_worker_log_path(cluster_name: str, cluster_info: common.ClusterInfo,
136
136
  logger.info(f'Logs of worker nodes can be found at: {worker_log_path}')
137
137
 
138
138
 
139
+ class SSHThreadPoolExecutor(futures.ThreadPoolExecutor):
140
+ """ThreadPoolExecutor that kills children processes on exit."""
141
+
142
+ def __exit__(self, exc_type, exc_val, exc_tb):
143
+ # ssh command runner eventually calls
144
+ # log_lib.run_with_log, which will spawn
145
+ # subprocesses. If we are exiting the context
146
+ # we need to kill the children processes
147
+ # to avoid leakage.
148
+ subprocess_utils.kill_children_processes()
149
+ self.shutdown()
150
+ return False
151
+
152
+
139
153
  def _parallel_ssh_with_cache(func,
140
154
  cluster_name: str,
141
155
  stage_name: str,
@@ -148,7 +162,7 @@ def _parallel_ssh_with_cache(func,
148
162
  # as 32 is too large for some machines.
149
163
  max_workers = subprocess_utils.get_parallel_threads(
150
164
  cluster_info.provider_name)
151
- with futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
165
+ with SSHThreadPoolExecutor(max_workers=max_workers) as pool:
152
166
  results = []
153
167
  runners = provision.get_command_runners(cluster_info.provider_name,
154
168
  cluster_info, **ssh_credentials)
@@ -35,6 +35,9 @@ _TIMEOUT_FOR_POD_TERMINATION = 60 # 1 minutes
35
35
  _MAX_RETRIES = 3
36
36
  _NUM_THREADS = subprocess_utils.get_parallel_threads('kubernetes')
37
37
 
38
+ # Pattern to extract SSH user from command output, handling MOTD contamination
39
+ _SSH_USER_PATTERN = re.compile(r'SKYPILOT_SSH_USER: ([^\s\n]+)')
40
+
38
41
  logger = sky_logging.init_logger(__name__)
39
42
 
40
43
 
@@ -1276,7 +1279,11 @@ def get_cluster_info(
1276
1279
  assert cpu_request is not None, 'cpu_request should not be None'
1277
1280
 
1278
1281
  ssh_user = 'sky'
1279
- get_k8s_ssh_user_cmd = 'echo $(whoami)'
1282
+ # Use pattern matching to extract SSH user, handling MOTD contamination.
1283
+ # Some container images (like CUDA-Q) print MOTD when login shells start,
1284
+ # which can contaminate command output. We use a unique pattern to extract
1285
+ # the actual username reliably.
1286
+ get_k8s_ssh_user_cmd = 'echo "SKYPILOT_SSH_USER: $(whoami)"'
1280
1287
  assert head_pod_name is not None
1281
1288
  runner = command_runner.KubernetesCommandRunner(
1282
1289
  ((namespace, context), head_pod_name))
@@ -1286,7 +1293,14 @@ def get_cluster_info(
1286
1293
  stream_logs=False)
1287
1294
  _raise_command_running_error('get ssh user', get_k8s_ssh_user_cmd,
1288
1295
  head_pod_name, rc, stdout + stderr)
1289
- ssh_user = stdout.strip()
1296
+
1297
+ # Extract SSH user using pattern matching
1298
+ ssh_user_match = _SSH_USER_PATTERN.search(stdout)
1299
+ if ssh_user_match:
1300
+ ssh_user = ssh_user_match.group(1)
1301
+ else:
1302
+ raise ValueError('Failed to find SSH user identifier: '
1303
+ f'{stdout + stderr}')
1290
1304
  logger.debug(
1291
1305
  f'Using ssh user {ssh_user} for cluster {cluster_name_on_cloud}')
1292
1306
 
@@ -106,34 +106,35 @@ def run_instances(region: str, cluster_name_on_cloud: str,
106
106
  created_instance_ids = []
107
107
  remote_ssh_key_name = config.authentication_config['remote_key_name']
108
108
 
109
- def launch_nodes(node_type: str, quantity: int) -> List[str]:
109
+ def launch_node(node_type: str) -> str:
110
110
  try:
111
111
  instance_ids = lambda_client.create_instances(
112
112
  instance_type=config.node_config['InstanceType'],
113
113
  region=region,
114
114
  name=f'{cluster_name_on_cloud}-{node_type}',
115
- quantity=quantity,
115
+ # Quantity cannot actually be greater than 1; see:
116
+ # https://github.com/skypilot-org/skypilot/issues/7084
117
+ quantity=1,
116
118
  ssh_key_name=remote_ssh_key_name,
117
119
  )
118
- logger.info(f'Launched {len(instance_ids)} {node_type} node(s), '
119
- f'instance_ids: {instance_ids}')
120
- return instance_ids
120
+ logger.info(f'Launched {node_type} node, '
121
+ f'instance_id: {instance_ids[0]}')
122
+ return instance_ids[0]
121
123
  except Exception as e:
122
124
  logger.warning(f'run_instances error: {e}')
123
125
  raise
124
126
 
125
127
  if head_instance_id is None:
126
- instance_ids = launch_nodes('head', 1)
127
- assert len(instance_ids) == 1
128
- created_instance_ids.append(instance_ids[0])
129
- head_instance_id = instance_ids[0]
128
+ head_instance_id = launch_node('head')
129
+ created_instance_ids.append(head_instance_id)
130
130
 
131
131
  assert head_instance_id is not None, 'head_instance_id should not be None'
132
132
 
133
133
  worker_node_count = to_start_count - 1
134
134
  if worker_node_count > 0:
135
- instance_ids = launch_nodes('worker', worker_node_count)
136
- created_instance_ids.extend(instance_ids)
135
+ for _ in range(worker_node_count):
136
+ worker_instance_id = launch_node('worker')
137
+ created_instance_ids.append(worker_instance_id)
137
138
 
138
139
  while True:
139
140
  instances = _filter_instances(cluster_name_on_cloud, ['active'])
sky/server/common.py CHANGED
@@ -515,6 +515,19 @@ def get_request_id(response: 'requests.Response') -> RequestId[T]:
515
515
  return RequestId[T](request_id)
516
516
 
517
517
 
518
+ def get_stream_request_id(
519
+ response: 'requests.Response') -> Optional[RequestId[T]]:
520
+ """This is same as the above function, but just for `sdk.stream_and_get.
521
+ We do this because `/api/stream` may choose the latest request id, and
522
+ we need to keep track of that information. Request id in this case can
523
+ be None."""
524
+ handle_request_error(response)
525
+ request_id = response.headers.get(server_constants.STREAM_REQUEST_HEADER)
526
+ if request_id is not None:
527
+ return RequestId[T](request_id)
528
+ return None
529
+
530
+
518
531
  def _start_api_server(deploy: bool = False,
519
532
  host: str = '127.0.0.1',
520
533
  foreground: bool = False,
sky/server/constants.py CHANGED
@@ -61,3 +61,6 @@ DASHBOARD_DIR = os.path.join(os.path.dirname(__file__), '..', 'dashboard',
61
61
 
62
62
  # The interval (seconds) for the event to be restarted in the background.
63
63
  DAEMON_RESTART_INTERVAL_SECONDS = 20
64
+
65
+ # Cookie header for stream request id.
66
+ STREAM_REQUEST_HEADER = 'X-SkyPilot-Stream-Request-ID'
@@ -282,8 +282,8 @@ def _get_queue(schedule_type: api_requests.ScheduleType) -> RequestQueue:
282
282
 
283
283
  @contextlib.contextmanager
284
284
  def override_request_env_and_config(
285
- request_body: payloads.RequestBody,
286
- request_id: str) -> Generator[None, None, None]:
285
+ request_body: payloads.RequestBody, request_id: str,
286
+ request_name: str) -> Generator[None, None, None]:
287
287
  """Override the environment and SkyPilot config for a request."""
288
288
  original_env = os.environ.copy()
289
289
  try:
@@ -319,9 +319,22 @@ def override_request_env_and_config(
319
319
  with skypilot_config.override_skypilot_config(
320
320
  request_body.override_skypilot_config,
321
321
  request_body.override_skypilot_config_path):
322
- # Rejecting requests to workspaces that the user does not have
323
- # permission to access.
324
- workspaces_core.reject_request_for_unauthorized_workspace(user)
322
+ # Skip permission check for sky.workspaces.get request
323
+ # as it is used to determine which workspaces the user
324
+ # has access to.
325
+ if request_name != 'sky.workspaces.get':
326
+ try:
327
+ # Reject requests that the user does not have permission
328
+ # to access.
329
+ workspaces_core.reject_request_for_unauthorized_workspace(
330
+ user)
331
+ except exceptions.PermissionDeniedError as e:
332
+ logger.debug(
333
+ f'{request_id} permission denied to workspace: '
334
+ f'{skypilot_config.get_active_workspace()}: {e}')
335
+ raise e
336
+ logger.debug(
337
+ f'{request_id} permission granted to {request_name} request')
325
338
  yield
326
339
  finally:
327
340
  # We need to call the save_timeline() since atexit will not be
@@ -402,7 +415,8 @@ def _request_execution_wrapper(request_id: str,
402
415
  # captured in the log file.
403
416
  try:
404
417
  with sky_logging.add_debug_log_handler(request_id), \
405
- override_request_env_and_config(request_body, request_id), \
418
+ override_request_env_and_config(
419
+ request_body, request_id, request_name), \
406
420
  tempstore.tempdir():
407
421
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
408
422
  config = skypilot_config.to_dict()
sky/server/server.py CHANGED
@@ -1571,6 +1571,15 @@ async def stream(
1571
1571
  detail=f'Log path {log_path!r} does not exist')
1572
1572
 
1573
1573
  log_path_to_stream = resolved_log_path
1574
+
1575
+ headers = {
1576
+ 'Cache-Control': 'no-cache, no-transform',
1577
+ 'X-Accel-Buffering': 'no',
1578
+ 'Transfer-Encoding': 'chunked'
1579
+ }
1580
+ if request_id is not None:
1581
+ headers[server_constants.STREAM_REQUEST_HEADER] = request_id
1582
+
1574
1583
  return fastapi.responses.StreamingResponse(
1575
1584
  content=stream_utils.log_streamer(request_id,
1576
1585
  log_path_to_stream,
@@ -1578,11 +1587,7 @@ async def stream(
1578
1587
  tail=tail,
1579
1588
  follow=follow),
1580
1589
  media_type='text/plain',
1581
- headers={
1582
- 'Cache-Control': 'no-cache, no-transform',
1583
- 'X-Accel-Buffering': 'no',
1584
- 'Transfer-Encoding': 'chunked'
1585
- },
1590
+ headers=headers,
1586
1591
  )
1587
1592
 
1588
1593
 
sky/skypilot_config.py CHANGED
@@ -415,10 +415,17 @@ def local_active_workspace_ctx(workspace: str) -> Iterator[None]:
415
415
  def get_active_workspace(force_user_workspace: bool = False) -> str:
416
416
  context_workspace = getattr(_active_workspace_context, 'workspace', None)
417
417
  if not force_user_workspace and context_workspace is not None:
418
- logger.debug(f'Get context workspace: {context_workspace}')
418
+ logger.debug(f'Got context workspace: {context_workspace}')
419
419
  return context_workspace
420
- return get_nested(keys=('active_workspace',),
421
- default_value=constants.SKYPILOT_DEFAULT_WORKSPACE)
420
+ active_workspace = get_nested(keys=('active_workspace',),
421
+ default_value=None)
422
+ if active_workspace is None:
423
+ logger.debug(f'No active workspace found, using default workspace: '
424
+ f'{constants.SKYPILOT_DEFAULT_WORKSPACE}')
425
+ active_workspace = constants.SKYPILOT_DEFAULT_WORKSPACE
426
+ else:
427
+ logger.debug(f'Got active workspace: {active_workspace}')
428
+ return active_workspace
422
429
 
423
430
 
424
431
  def set_nested(keys: Tuple[str, ...], value: Any) -> Dict[str, Any]:
@@ -3,6 +3,7 @@ import enum
3
3
  import hashlib
4
4
  import os
5
5
  import pathlib
6
+ import re
6
7
  import shlex
7
8
  import sys
8
9
  import time
@@ -22,6 +23,9 @@ from sky.utils import timeline
22
23
 
23
24
  logger = sky_logging.init_logger(__name__)
24
25
 
26
+ # Pattern to extract home directory from command output
27
+ _HOME_DIR_PATTERN = re.compile(r'SKYPILOT_HOME_DIR: ([^\s\n]+)')
28
+
25
29
  # Rsync options
26
30
  # TODO(zhwu): This will print a per-file progress bar (with -P),
27
31
  # shooting a lot of messages to the output. --info=progress2 is used
@@ -183,17 +187,25 @@ class CommandRunner:
183
187
  return '-'.join(str(x) for x in self.node)
184
188
 
185
189
  def _get_remote_home_dir(self) -> str:
186
- # Use `echo ~` to get the remote home directory, instead of pwd or
187
- # echo $HOME, because pwd can be `/` when the remote user is root
188
- # and $HOME is not always set.
189
- rc, remote_home_dir, stderr = self.run('echo ~',
190
- require_outputs=True,
191
- separate_stderr=True,
192
- stream_logs=False)
190
+ # Use pattern matching to extract home directory.
191
+ # Some container images print MOTD when login shells start, which can
192
+ # contaminate command output. We use a unique pattern to extract the
193
+ # actual home directory reliably.
194
+ rc, output, stderr = self.run('echo "SKYPILOT_HOME_DIR: $(echo ~)"',
195
+ require_outputs=True,
196
+ separate_stderr=True,
197
+ stream_logs=False)
193
198
  if rc != 0:
194
199
  raise ValueError('Failed to get remote home directory: '
195
- f'{remote_home_dir + stderr}')
196
- remote_home_dir = remote_home_dir.strip()
200
+ f'{output + stderr}')
201
+
202
+ # Extract home directory using pattern matching
203
+ home_dir_match = _HOME_DIR_PATTERN.search(output)
204
+ if home_dir_match:
205
+ remote_home_dir = home_dir_match.group(1)
206
+ else:
207
+ raise ValueError('Failed to find remote home directory identifier: '
208
+ f'{output + stderr}')
197
209
  return remote_home_dir
198
210
 
199
211
  def _get_command_to_run(
@@ -414,7 +426,6 @@ class CommandRunner:
414
426
  SkyPilot but we still want to get rid of some warning messages,
415
427
  such as SSH warnings.
416
428
 
417
-
418
429
  Returns:
419
430
  returncode
420
431
  or
@@ -991,7 +1002,6 @@ class KubernetesCommandRunner(CommandRunner):
991
1002
  SkyPilot but we still want to get rid of some warning messages,
992
1003
  such as SSH warnings.
993
1004
 
994
-
995
1005
  Returns:
996
1006
  returncode
997
1007
  or
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250914
3
+ Version: 1.0.0.dev20250916
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -151,49 +151,49 @@ Requires-Dist: grpcio>=1.63.0; extra == "server"
151
151
  Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
152
152
  Requires-Dist: aiosqlite; extra == "server"
153
153
  Provides-Extra: all
154
- Requires-Dist: sqlalchemy_adapter; extra == "all"
155
- Requires-Dist: azure-core>=1.31.0; extra == "all"
156
- Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
157
- Requires-Dist: pydo>=0.3.0; extra == "all"
158
- Requires-Dist: docker; extra == "all"
159
- Requires-Dist: azure-core>=1.24.0; extra == "all"
160
- Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
161
- Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
162
154
  Requires-Dist: boto3>=1.26.1; extra == "all"
163
- Requires-Dist: ibm-cloud-sdk-core; extra == "all"
164
- Requires-Dist: anyio; extra == "all"
165
- Requires-Dist: oci; extra == "all"
166
155
  Requires-Dist: colorama<0.4.5; extra == "all"
167
- Requires-Dist: msrestazure; extra == "all"
156
+ Requires-Dist: msgraph-sdk; extra == "all"
157
+ Requires-Dist: pyjwt; extra == "all"
158
+ Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
159
+ Requires-Dist: passlib; extra == "all"
160
+ Requires-Dist: ecsapi>=0.2.0; extra == "all"
161
+ Requires-Dist: azure-common; extra == "all"
162
+ Requires-Dist: grpcio>=1.63.0; extra == "all"
163
+ Requires-Dist: ibm-vpc; extra == "all"
168
164
  Requires-Dist: cudo-compute>=0.1.10; extra == "all"
165
+ Requires-Dist: azure-core>=1.24.0; extra == "all"
166
+ Requires-Dist: ibm-cos-sdk; extra == "all"
167
+ Requires-Dist: azure-identity>=1.19.0; extra == "all"
168
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
169
+ Requires-Dist: nebius>=0.2.47; extra == "all"
170
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
171
+ Requires-Dist: msrestazure; extra == "all"
172
+ Requires-Dist: oci; extra == "all"
173
+ Requires-Dist: azure-core>=1.31.0; extra == "all"
174
+ Requires-Dist: ray[default]>=2.6.1; extra == "all"
169
175
  Requires-Dist: google-cloud-storage; extra == "all"
170
- Requires-Dist: casbin; extra == "all"
171
- Requires-Dist: azure-cli>=2.65.0; extra == "all"
176
+ Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
177
+ Requires-Dist: ibm-cloud-sdk-core; extra == "all"
178
+ Requires-Dist: awscli>=1.27.10; extra == "all"
179
+ Requires-Dist: aiosqlite; extra == "all"
172
180
  Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
173
- Requires-Dist: ecsapi>=0.2.0; extra == "all"
174
- Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
181
+ Requires-Dist: websockets; extra == "all"
182
+ Requires-Dist: sqlalchemy_adapter; extra == "all"
175
183
  Requires-Dist: python-dateutil; extra == "all"
176
- Requires-Dist: ray[default]>=2.6.1; extra == "all"
177
- Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
178
- Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
179
- Requires-Dist: ibm-cos-sdk; extra == "all"
184
+ Requires-Dist: azure-cli>=2.65.0; extra == "all"
180
185
  Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
181
- Requires-Dist: websockets; extra == "all"
182
- Requires-Dist: azure-identity>=1.19.0; extra == "all"
183
- Requires-Dist: ibm-vpc; extra == "all"
184
- Requires-Dist: aiosqlite; extra == "all"
185
- Requires-Dist: msgraph-sdk; extra == "all"
186
- Requires-Dist: botocore>=1.29.10; extra == "all"
186
+ Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
187
+ Requires-Dist: docker; extra == "all"
188
+ Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
187
189
  Requires-Dist: aiohttp; extra == "all"
188
- Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
189
- Requires-Dist: awscli>=1.27.10; extra == "all"
190
190
  Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
191
- Requires-Dist: pyjwt; extra == "all"
192
- Requires-Dist: grpcio>=1.63.0; extra == "all"
193
- Requires-Dist: passlib; extra == "all"
194
- Requires-Dist: nebius>=0.2.47; extra == "all"
191
+ Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
192
+ Requires-Dist: pydo>=0.3.0; extra == "all"
195
193
  Requires-Dist: runpod>=1.6.1; extra == "all"
196
- Requires-Dist: azure-common; extra == "all"
194
+ Requires-Dist: botocore>=1.29.10; extra == "all"
195
+ Requires-Dist: anyio; extra == "all"
196
+ Requires-Dist: casbin; extra == "all"
197
197
  Dynamic: author
198
198
  Dynamic: classifier
199
199
  Dynamic: description