skypilot-nightly 1.0.0.dev20250417__py3-none-any.whl → 1.0.0.dev20250422__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/aws.py +2 -13
  3. sky/backends/backend_utils.py +28 -0
  4. sky/backends/wheel_utils.py +9 -0
  5. sky/cli.py +93 -24
  6. sky/client/cli.py +93 -24
  7. sky/client/common.py +10 -3
  8. sky/client/sdk.py +6 -3
  9. sky/clouds/aws.py +5 -5
  10. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +9 -9
  11. sky/dashboard/out/404.html +1 -0
  12. sky/dashboard/out/_next/static/2GsKhI8XKYj9B2969iIDf/_buildManifest.js +1 -0
  13. sky/dashboard/out/_next/static/2GsKhI8XKYj9B2969iIDf/_ssgManifest.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/236-d437cf66e68a6f64.js +6 -0
  15. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +15 -0
  16. sky/dashboard/out/_next/static/chunks/37-72fdc8f71d6e4784.js +6 -0
  17. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +59 -0
  18. sky/dashboard/out/_next/static/chunks/845-2ea1cc63ba1f4067.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/979-7cd0778078b9cfad.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +33 -0
  22. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/_app-3001e84c61acddfb.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-b09f7fbf6d5d74f6.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b57ec043f09c5813.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-ef2e0e91a9222cac.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +1 -0
  34. sky/dashboard/out/_next/static/css/f3538cd90cfca88c.css +3 -0
  35. sky/dashboard/out/clusters/[cluster]/[job].html +1 -0
  36. sky/dashboard/out/clusters/[cluster].html +1 -0
  37. sky/dashboard/out/clusters.html +1 -0
  38. sky/dashboard/out/favicon.ico +0 -0
  39. sky/dashboard/out/index.html +1 -0
  40. sky/dashboard/out/jobs/[job].html +1 -0
  41. sky/dashboard/out/jobs.html +1 -0
  42. sky/dashboard/out/skypilot.svg +15 -0
  43. sky/dashboard/out/videos/cursor-small.mp4 +0 -0
  44. sky/data/data_transfer.py +2 -1
  45. sky/data/storage.py +24 -14
  46. sky/optimizer.py +7 -9
  47. sky/provision/provisioner.py +2 -1
  48. sky/provision/runpod/utils.py +32 -6
  49. sky/resources.py +11 -2
  50. sky/serve/__init__.py +2 -0
  51. sky/serve/autoscalers.py +6 -2
  52. sky/serve/client/sdk.py +61 -0
  53. sky/serve/replica_managers.py +6 -8
  54. sky/serve/serve_utils.py +33 -1
  55. sky/serve/server/core.py +187 -5
  56. sky/serve/server/server.py +28 -0
  57. sky/server/common.py +19 -1
  58. sky/server/constants.py +6 -0
  59. sky/server/requests/executor.py +4 -0
  60. sky/server/requests/payloads.py +27 -15
  61. sky/server/server.py +43 -0
  62. sky/setup_files/MANIFEST.in +1 -0
  63. sky/sky_logging.py +10 -0
  64. sky/skypilot_config.py +58 -37
  65. sky/templates/kubernetes-ray.yml.j2 +6 -2
  66. sky/utils/config_utils.py +0 -1
  67. sky/utils/controller_utils.py +0 -1
  68. {skypilot_nightly-1.0.0.dev20250417.dist-info → skypilot_nightly-1.0.0.dev20250422.dist-info}/METADATA +1 -1
  69. {skypilot_nightly-1.0.0.dev20250417.dist-info → skypilot_nightly-1.0.0.dev20250422.dist-info}/RECORD +73 -40
  70. {skypilot_nightly-1.0.0.dev20250417.dist-info → skypilot_nightly-1.0.0.dev20250422.dist-info}/WHEEL +1 -1
  71. {skypilot_nightly-1.0.0.dev20250417.dist-info → skypilot_nightly-1.0.0.dev20250422.dist-info}/entry_points.txt +0 -0
  72. {skypilot_nightly-1.0.0.dev20250417.dist-info → skypilot_nightly-1.0.0.dev20250422.dist-info}/licenses/LICENSE +0 -0
  73. {skypilot_nightly-1.0.0.dev20250417.dist-info → skypilot_nightly-1.0.0.dev20250422.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'ff17963ce2804e65e3aa2cb1fb02dcd247f680e6'
8
+ _SKYPILOT_COMMIT_SHA = '69aebc34b3963a1a8d4026e68b3cffd86347c1e2'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250417'
38
+ __version__ = '1.0.0.dev20250422'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/adaptors/aws.py CHANGED
@@ -28,7 +28,6 @@ This is informed by the following boto3 docs:
28
28
 
29
29
  # pylint: disable=import-outside-toplevel
30
30
 
31
- import functools
32
31
  import logging
33
32
  import threading
34
33
  import time
@@ -60,23 +59,13 @@ class _ThreadLocalLRUCache(threading.local):
60
59
 
61
60
  def __init__(self, maxsize=32):
62
61
  super().__init__()
63
- self.cache = annotations.lru_cache(scope='global', maxsize=maxsize)
62
+ self.cache = annotations.lru_cache(scope='request', maxsize=maxsize)
64
63
 
65
64
 
66
65
  def _thread_local_lru_cache(maxsize=32):
67
66
  # Create thread-local storage for the LRU cache
68
67
  local_cache = _ThreadLocalLRUCache(maxsize)
69
-
70
- def decorator(func):
71
-
72
- @functools.wraps(func)
73
- def wrapper(*args, **kwargs):
74
- # Use the thread-local LRU cache
75
- return local_cache.cache(func)(*args, **kwargs)
76
-
77
- return wrapper
78
-
79
- return decorator
68
+ return local_cache.cache
80
69
 
81
70
 
82
71
  def _assert_kwargs_builtin_type(kwargs):
@@ -2582,11 +2582,36 @@ def get_clusters(
2582
2582
  logger.info(f'Cluster(s) not found: {bright}{clusters_str}{reset}.')
2583
2583
  records = new_records
2584
2584
 
2585
+ def _update_record_with_resources(record: Optional[Dict[str, Any]]) -> None:
2586
+ """Add the resources to the record."""
2587
+ if record is None:
2588
+ return
2589
+ handle = record['handle']
2590
+ if handle is None:
2591
+ return
2592
+ record['nodes'] = handle.launched_nodes
2593
+ if handle.launched_resources is None:
2594
+ return
2595
+ record['cloud'] = (f'{handle.launched_resources.cloud}'
2596
+ if handle.launched_resources.cloud else None)
2597
+ record['region'] = (f'{handle.launched_resources.region}'
2598
+ if handle.launched_resources.region else None)
2599
+ record['cpus'] = (f'{handle.launched_resources.cpus}'
2600
+ if handle.launched_resources.cpus else None)
2601
+ record['memory'] = (f'{handle.launched_resources.memory}'
2602
+ if handle.launched_resources.memory else None)
2603
+ record['accelerators'] = (f'{handle.launched_resources.accelerators}'
2604
+ if handle.launched_resources.accelerators else
2605
+ None)
2606
+
2585
2607
  # Add auth_config to the records
2586
2608
  for record in records:
2587
2609
  _update_record_with_credentials_and_resources_str(record)
2588
2610
 
2589
2611
  if refresh == common.StatusRefreshMode.NONE:
2612
+ # Add resources to the records
2613
+ for record in records:
2614
+ _update_record_with_resources(record)
2590
2615
  return records
2591
2616
 
2592
2617
  plural = 's' if len(records) > 1 else ''
@@ -2662,6 +2687,9 @@ def get_clusters(
2662
2687
  for cluster_name, e in failed_clusters:
2663
2688
  logger.warning(f' {bright}{cluster_name}{reset}: {e}')
2664
2689
 
2690
+ # Add resources to the records
2691
+ for record in kept_records:
2692
+ _update_record_with_resources(record)
2665
2693
  return kept_records
2666
2694
 
2667
2695
 
@@ -85,6 +85,15 @@ def _build_sky_wheel() -> pathlib.Path:
85
85
  for f in setup_files_dir.iterdir():
86
86
  if f.is_file() and f.name != 'setup.py':
87
87
  shutil.copy(str(f), str(tmp_dir))
88
+ if f.name == 'MANIFEST.in':
89
+ # Remove the line `sky/dashboard/out`, so we do not
90
+ # include the dashboard files in the internal wheel
91
+ import fileinput # pylint: disable=import-outside-toplevel
92
+ with fileinput.input(tmp_dir / f.name,
93
+ inplace=True) as file:
94
+ for line in file:
95
+ if 'sky/dashboard/out' not in line:
96
+ print(line, end='')
88
97
 
89
98
  init_file_path = SKY_PACKAGE_PATH / '__init__.py'
90
99
  init_file_content = init_file_path.read_text()
sky/cli.py CHANGED
@@ -28,6 +28,7 @@ import datetime
28
28
  import functools
29
29
  import getpass
30
30
  import os
31
+ import pathlib
31
32
  import shlex
32
33
  import shutil
33
34
  import subprocess
@@ -301,13 +302,9 @@ def config_option(expose_value: bool):
301
302
  try:
302
303
  if len(value) == 0:
303
304
  return None
304
- elif len(value) > 1:
305
- raise ValueError('argument specified multiple times. '
306
- 'To specify multiple configs, use '
307
- '--config nested.key1=val1,another.key2=val2')
308
305
  else:
309
306
  # Apply the config overrides to the skypilot config.
310
- return skypilot_config.apply_cli_config(value[0])
307
+ return skypilot_config.apply_cli_config(value)
311
308
  except ValueError as e:
312
309
  raise click.BadParameter(f'{str(e)}') from e
313
310
 
@@ -4867,8 +4864,14 @@ def serve_down(
4867
4864
  default=False,
4868
4865
  required=False,
4869
4866
  help='Show the load balancer logs of this service.')
4867
+ @click.option('--sync-down',
4868
+ '-s',
4869
+ is_flag=True,
4870
+ default=False,
4871
+ help='Sync down logs to the local machine. Can be combined with '
4872
+ '--controller, --load-balancer, or a replica ID to narrow scope.')
4870
4873
  @click.argument('service_name', required=True, type=str)
4871
- @click.argument('replica_id', required=False, type=int)
4874
+ @click.argument('replica_ids', required=False, type=int, nargs=-1)
4872
4875
  @usage_lib.entrypoint
4873
4876
  # TODO(tian): Add default argument for this CLI if none of the flags are
4874
4877
  # specified.
@@ -4877,9 +4880,13 @@ def serve_logs(
4877
4880
  follow: bool,
4878
4881
  controller: bool,
4879
4882
  load_balancer: bool,
4880
- replica_id: Optional[int],
4883
+ replica_ids: Tuple[int, ...],
4884
+ sync_down: bool,
4881
4885
  ):
4882
- """Tail the log of a service.
4886
+ """Tail or sync down logs of a service.
4887
+
4888
+ Logs can be tailed from one target (controller, load balancer, or a single
4889
+ replica) or synced down from multiple targets simultaneously.
4883
4890
 
4884
4891
  Example:
4885
4892
 
@@ -4893,27 +4900,89 @@ def serve_logs(
4893
4900
  \b
4894
4901
  # Tail the logs of replica 1
4895
4902
  sky serve logs [SERVICE_NAME] 1
4903
+ \b
4904
+ # Sync down all logs of the service (controller, LB, all replicas)
4905
+ sky serve logs [SERVICE_NAME] --sync-down
4906
+ \b
4907
+ # Sync down controller logs and logs for replicas 1 and 3
4908
+ sky serve logs [SERVICE_NAME] 1 3 --controller --sync-down
4896
4909
  """
4897
- have_replica_id = replica_id is not None
4898
- num_flags = (controller + load_balancer + have_replica_id)
4899
- if num_flags > 1:
4900
- raise click.UsageError('At most one of --controller, --load-balancer, '
4901
- '[REPLICA_ID] can be specified.')
4902
- if num_flags == 0:
4903
- raise click.UsageError('One of --controller, --load-balancer, '
4904
- '[REPLICA_ID] must be specified.')
4910
+ chosen_components: Set[serve_lib.ServiceComponent] = set()
4905
4911
  if controller:
4906
- target_component = serve_lib.ServiceComponent.CONTROLLER
4907
- elif load_balancer:
4908
- target_component = serve_lib.ServiceComponent.LOAD_BALANCER
4909
- else:
4910
- # Already checked that num_flags == 1.
4911
- assert replica_id is not None
4912
- target_component = serve_lib.ServiceComponent.REPLICA
4912
+ chosen_components.add(serve_lib.ServiceComponent.CONTROLLER)
4913
+ if load_balancer:
4914
+ chosen_components.add(serve_lib.ServiceComponent.LOAD_BALANCER)
4915
+ # replica_ids contains the specific replica IDs provided by the user.
4916
+ # If it's not empty, it implies the user wants replica logs.
4917
+ if replica_ids:
4918
+ chosen_components.add(serve_lib.ServiceComponent.REPLICA)
4919
+
4920
+ if sync_down:
4921
+ # For sync-down, multiple targets are allowed.
4922
+ # If no specific components/replicas are mentioned, sync all.
4923
+ # Note: Multiple replicas or targets can only be specified when
4924
+ # using --sync-down.
4925
+ targets_to_sync = list(chosen_components)
4926
+ if not targets_to_sync and not replica_ids:
4927
+ # Default to all components if nothing specific is requested
4928
+ targets_to_sync = [
4929
+ serve_lib.ServiceComponent.CONTROLLER,
4930
+ serve_lib.ServiceComponent.LOAD_BALANCER,
4931
+ serve_lib.ServiceComponent.REPLICA,
4932
+ ]
4933
+
4934
+ timestamp = sky_logging.get_run_timestamp()
4935
+ log_dir = (pathlib.Path(constants.SKY_LOGS_DIRECTORY) / 'service' /
4936
+ f'{service_name}_{timestamp}').expanduser()
4937
+ log_dir.mkdir(parents=True, exist_ok=True)
4938
+
4939
+ with rich_utils.client_status(
4940
+ ux_utils.spinner_message('Downloading service logs...')):
4941
+ serve_lib.sync_down_logs(service_name,
4942
+ local_dir=str(log_dir),
4943
+ targets=targets_to_sync,
4944
+ replica_ids=list(replica_ids))
4945
+ style = colorama.Style
4946
+ fore = colorama.Fore
4947
+ logger.info(f'{fore.CYAN}Service {service_name} logs: '
4948
+ f'{log_dir}{style.RESET_ALL}')
4949
+ return
4950
+
4951
+ # Tailing requires exactly one target.
4952
+ num_targets = len(chosen_components)
4953
+ # If REPLICA component is chosen, len(replica_ids) must be 1 for tailing.
4954
+ if serve_lib.ServiceComponent.REPLICA in chosen_components:
4955
+ if len(replica_ids) != 1:
4956
+ raise click.UsageError(
4957
+ 'Can only tail logs from a single replica at a time. '
4958
+ 'Provide exactly one REPLICA_ID or use --sync-down '
4959
+ 'to download logs from multiple replicas.')
4960
+ # If replica is chosen and len is 1, num_targets effectively counts it.
4961
+ # We need to ensure no other component (controller/LB) is selected.
4962
+ if num_targets > 1:
4963
+ raise click.UsageError(
4964
+ 'Can only tail logs from one target at a time (controller, '
4965
+ 'load balancer, or a single replica). Use --sync-down '
4966
+ 'to download logs from multiple sources.')
4967
+ elif num_targets == 0:
4968
+ raise click.UsageError(
4969
+ 'Specify a target to tail: --controller, --load-balancer, or '
4970
+ 'a REPLICA_ID.')
4971
+ elif num_targets > 1:
4972
+ raise click.UsageError(
4973
+ 'Can only tail logs from one target at a time. Use --sync-down '
4974
+ 'to download logs from multiple sources.')
4975
+
4976
+ # At this point, we have exactly one target for tailing.
4977
+ assert len(chosen_components) == 1
4978
+ assert len(replica_ids) in [0, 1]
4979
+ target_component = chosen_components.pop()
4980
+ target_replica_id: Optional[int] = replica_ids[0] if replica_ids else None
4981
+
4913
4982
  try:
4914
4983
  serve_lib.tail_logs(service_name,
4915
4984
  target=target_component,
4916
- replica_id=replica_id,
4985
+ replica_id=target_replica_id,
4917
4986
  follow=follow)
4918
4987
  except exceptions.ClusterNotUpError:
4919
4988
  with ux_utils.print_exception_no_traceback():
sky/client/cli.py CHANGED
@@ -28,6 +28,7 @@ import datetime
28
28
  import functools
29
29
  import getpass
30
30
  import os
31
+ import pathlib
31
32
  import shlex
32
33
  import shutil
33
34
  import subprocess
@@ -301,13 +302,9 @@ def config_option(expose_value: bool):
301
302
  try:
302
303
  if len(value) == 0:
303
304
  return None
304
- elif len(value) > 1:
305
- raise ValueError('argument specified multiple times. '
306
- 'To specify multiple configs, use '
307
- '--config nested.key1=val1,another.key2=val2')
308
305
  else:
309
306
  # Apply the config overrides to the skypilot config.
310
- return skypilot_config.apply_cli_config(value[0])
307
+ return skypilot_config.apply_cli_config(value)
311
308
  except ValueError as e:
312
309
  raise click.BadParameter(f'{str(e)}') from e
313
310
 
@@ -4867,8 +4864,14 @@ def serve_down(
4867
4864
  default=False,
4868
4865
  required=False,
4869
4866
  help='Show the load balancer logs of this service.')
4867
+ @click.option('--sync-down',
4868
+ '-s',
4869
+ is_flag=True,
4870
+ default=False,
4871
+ help='Sync down logs to the local machine. Can be combined with '
4872
+ '--controller, --load-balancer, or a replica ID to narrow scope.')
4870
4873
  @click.argument('service_name', required=True, type=str)
4871
- @click.argument('replica_id', required=False, type=int)
4874
+ @click.argument('replica_ids', required=False, type=int, nargs=-1)
4872
4875
  @usage_lib.entrypoint
4873
4876
  # TODO(tian): Add default argument for this CLI if none of the flags are
4874
4877
  # specified.
@@ -4877,9 +4880,13 @@ def serve_logs(
4877
4880
  follow: bool,
4878
4881
  controller: bool,
4879
4882
  load_balancer: bool,
4880
- replica_id: Optional[int],
4883
+ replica_ids: Tuple[int, ...],
4884
+ sync_down: bool,
4881
4885
  ):
4882
- """Tail the log of a service.
4886
+ """Tail or sync down logs of a service.
4887
+
4888
+ Logs can be tailed from one target (controller, load balancer, or a single
4889
+ replica) or synced down from multiple targets simultaneously.
4883
4890
 
4884
4891
  Example:
4885
4892
 
@@ -4893,27 +4900,89 @@ def serve_logs(
4893
4900
  \b
4894
4901
  # Tail the logs of replica 1
4895
4902
  sky serve logs [SERVICE_NAME] 1
4903
+ \b
4904
+ # Sync down all logs of the service (controller, LB, all replicas)
4905
+ sky serve logs [SERVICE_NAME] --sync-down
4906
+ \b
4907
+ # Sync down controller logs and logs for replicas 1 and 3
4908
+ sky serve logs [SERVICE_NAME] 1 3 --controller --sync-down
4896
4909
  """
4897
- have_replica_id = replica_id is not None
4898
- num_flags = (controller + load_balancer + have_replica_id)
4899
- if num_flags > 1:
4900
- raise click.UsageError('At most one of --controller, --load-balancer, '
4901
- '[REPLICA_ID] can be specified.')
4902
- if num_flags == 0:
4903
- raise click.UsageError('One of --controller, --load-balancer, '
4904
- '[REPLICA_ID] must be specified.')
4910
+ chosen_components: Set[serve_lib.ServiceComponent] = set()
4905
4911
  if controller:
4906
- target_component = serve_lib.ServiceComponent.CONTROLLER
4907
- elif load_balancer:
4908
- target_component = serve_lib.ServiceComponent.LOAD_BALANCER
4909
- else:
4910
- # Already checked that num_flags == 1.
4911
- assert replica_id is not None
4912
- target_component = serve_lib.ServiceComponent.REPLICA
4912
+ chosen_components.add(serve_lib.ServiceComponent.CONTROLLER)
4913
+ if load_balancer:
4914
+ chosen_components.add(serve_lib.ServiceComponent.LOAD_BALANCER)
4915
+ # replica_ids contains the specific replica IDs provided by the user.
4916
+ # If it's not empty, it implies the user wants replica logs.
4917
+ if replica_ids:
4918
+ chosen_components.add(serve_lib.ServiceComponent.REPLICA)
4919
+
4920
+ if sync_down:
4921
+ # For sync-down, multiple targets are allowed.
4922
+ # If no specific components/replicas are mentioned, sync all.
4923
+ # Note: Multiple replicas or targets can only be specified when
4924
+ # using --sync-down.
4925
+ targets_to_sync = list(chosen_components)
4926
+ if not targets_to_sync and not replica_ids:
4927
+ # Default to all components if nothing specific is requested
4928
+ targets_to_sync = [
4929
+ serve_lib.ServiceComponent.CONTROLLER,
4930
+ serve_lib.ServiceComponent.LOAD_BALANCER,
4931
+ serve_lib.ServiceComponent.REPLICA,
4932
+ ]
4933
+
4934
+ timestamp = sky_logging.get_run_timestamp()
4935
+ log_dir = (pathlib.Path(constants.SKY_LOGS_DIRECTORY) / 'service' /
4936
+ f'{service_name}_{timestamp}').expanduser()
4937
+ log_dir.mkdir(parents=True, exist_ok=True)
4938
+
4939
+ with rich_utils.client_status(
4940
+ ux_utils.spinner_message('Downloading service logs...')):
4941
+ serve_lib.sync_down_logs(service_name,
4942
+ local_dir=str(log_dir),
4943
+ targets=targets_to_sync,
4944
+ replica_ids=list(replica_ids))
4945
+ style = colorama.Style
4946
+ fore = colorama.Fore
4947
+ logger.info(f'{fore.CYAN}Service {service_name} logs: '
4948
+ f'{log_dir}{style.RESET_ALL}')
4949
+ return
4950
+
4951
+ # Tailing requires exactly one target.
4952
+ num_targets = len(chosen_components)
4953
+ # If REPLICA component is chosen, len(replica_ids) must be 1 for tailing.
4954
+ if serve_lib.ServiceComponent.REPLICA in chosen_components:
4955
+ if len(replica_ids) != 1:
4956
+ raise click.UsageError(
4957
+ 'Can only tail logs from a single replica at a time. '
4958
+ 'Provide exactly one REPLICA_ID or use --sync-down '
4959
+ 'to download logs from multiple replicas.')
4960
+ # If replica is chosen and len is 1, num_targets effectively counts it.
4961
+ # We need to ensure no other component (controller/LB) is selected.
4962
+ if num_targets > 1:
4963
+ raise click.UsageError(
4964
+ 'Can only tail logs from one target at a time (controller, '
4965
+ 'load balancer, or a single replica). Use --sync-down '
4966
+ 'to download logs from multiple sources.')
4967
+ elif num_targets == 0:
4968
+ raise click.UsageError(
4969
+ 'Specify a target to tail: --controller, --load-balancer, or '
4970
+ 'a REPLICA_ID.')
4971
+ elif num_targets > 1:
4972
+ raise click.UsageError(
4973
+ 'Can only tail logs from one target at a time. Use --sync-down '
4974
+ 'to download logs from multiple sources.')
4975
+
4976
+ # At this point, we have exactly one target for tailing.
4977
+ assert len(chosen_components) == 1
4978
+ assert len(replica_ids) in [0, 1]
4979
+ target_component = chosen_components.pop()
4980
+ target_replica_id: Optional[int] = replica_ids[0] if replica_ids else None
4981
+
4913
4982
  try:
4914
4983
  serve_lib.tail_logs(service_name,
4915
4984
  target=target_component,
4916
- replica_id=replica_id,
4985
+ replica_id=target_replica_id,
4917
4986
  follow=follow)
4918
4987
  except exceptions.ClusterNotUpError:
4919
4988
  with ux_utils.print_exception_no_traceback():
sky/client/common.py CHANGED
@@ -53,11 +53,18 @@ API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS = 5
53
53
 
54
54
 
55
55
  def download_logs_from_api_server(
56
- paths_on_api_server: Iterable[str]) -> Dict[str, str]:
56
+ paths_on_api_server: Iterable[str],
57
+ remote_machine_prefix: str = str(
58
+ server_common.api_server_user_logs_dir_prefix()),
59
+ local_machine_prefix: str = constants.SKY_LOGS_DIRECTORY
60
+ ) -> Dict[str, str]:
57
61
  """Downloads the logs from the API server.
58
62
 
59
63
  Args:
60
64
  paths_on_api_server: The paths on the API server to download.
65
+ remote_machine_prefix: The prefix of the remote machine to save the
66
+ logs.
67
+ local_machine_prefix: The prefix of the local machine to save the logs.
61
68
 
62
69
  Returns:
63
70
  A dictionary mapping the remote path on API server to the local path.
@@ -69,8 +76,8 @@ def download_logs_from_api_server(
69
76
  # This should be moved to remote API server. A proper way might be
70
77
  # set the returned path to be started with a special prefix, instead
71
78
  # of using the `api_server_user_logs_dir_prefix()`.
72
- str(server_common.api_server_user_logs_dir_prefix()),
73
- constants.SKY_LOGS_DIRECTORY) for remote_path in paths_on_api_server
79
+ remote_machine_prefix,
80
+ local_machine_prefix) for remote_path in paths_on_api_server
74
81
  }
75
82
  body = payloads.DownloadBody(folder_paths=list(paths_on_api_server),)
76
83
  response = requests.post(f'{server_common.get_server_url()}/download',
sky/client/sdk.py CHANGED
@@ -1821,6 +1821,9 @@ def api_login(endpoint: Optional[str] = None) -> None:
1821
1821
  else:
1822
1822
  config = skypilot_config.get_user_config()
1823
1823
  config.set_nested(('api_server', 'endpoint'), endpoint)
1824
- common_utils.dump_yaml(str(config_path), config)
1825
- click.secho(f'Logged in to SkyPilot API server at {endpoint}',
1826
- fg='green')
1824
+ common_utils.dump_yaml(str(config_path), dict(config))
1825
+ dashboard_msg = f'Dashboard: {endpoint}/dashboard'
1826
+ click.secho(
1827
+ f'Logged in to SkyPilot API server at {endpoint}.'
1828
+ f' {dashboard_msg}',
1829
+ fg='green')
sky/clouds/aws.py CHANGED
@@ -571,7 +571,7 @@ class AWS(clouds.Cloud):
571
571
  return cls._check_credentials()
572
572
 
573
573
  @classmethod
574
- @annotations.lru_cache(scope='global',
574
+ @annotations.lru_cache(scope='request',
575
575
  maxsize=1) # Cache since getting identity is slow.
576
576
  def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
577
577
  """Checks if the user has access credentials to AWS."""
@@ -710,7 +710,7 @@ class AWS(clouds.Cloud):
710
710
  return AWSIdentityType.SHARED_CREDENTIALS_FILE
711
711
 
712
712
  @classmethod
713
- @annotations.lru_cache(scope='global', maxsize=1)
713
+ @annotations.lru_cache(scope='request', maxsize=1)
714
714
  def _aws_configure_list(cls) -> Optional[bytes]:
715
715
  proc = subprocess.run('aws configure list',
716
716
  shell=True,
@@ -722,7 +722,7 @@ class AWS(clouds.Cloud):
722
722
  return proc.stdout
723
723
 
724
724
  @classmethod
725
- @annotations.lru_cache(scope='global',
725
+ @annotations.lru_cache(scope='request',
726
726
  maxsize=1) # Cache since getting identity is slow.
727
727
  def _sts_get_caller_identity(cls) -> Optional[List[List[str]]]:
728
728
  try:
@@ -804,7 +804,7 @@ class AWS(clouds.Cloud):
804
804
  return [user_ids]
805
805
 
806
806
  @classmethod
807
- @annotations.lru_cache(scope='global',
807
+ @annotations.lru_cache(scope='request',
808
808
  maxsize=1) # Cache since getting identity is slow.
809
809
  def get_user_identities(cls) -> Optional[List[List[str]]]:
810
810
  """Returns a [UserId, Account] list that uniquely identifies the user.
@@ -909,7 +909,7 @@ class AWS(clouds.Cloud):
909
909
  if os.path.exists(os.path.expanduser(f'~/.aws/{filename}'))
910
910
  }
911
911
 
912
- @annotations.lru_cache(scope='global', maxsize=1)
912
+ @annotations.lru_cache(scope='request', maxsize=1)
913
913
  def can_credential_expire(self) -> bool:
914
914
  identity_type = self._current_identity_type()
915
915
  return (identity_type is not None and
@@ -60,8 +60,9 @@ HIDDEN_TPU_DF = pd.read_csv(
60
60
  ,tpu-v3-2048,1,,,tpu-v3-2048,2048.0,614.4,us-east1,us-east1-d
61
61
  """)))
62
62
 
63
- # TPU V6e price for the following regions is missing in the SKUs.
64
- TPU_V6E_MISSING_REGIONS = ['us-central2', 'southamerica-west1']
63
+ # Maximum price for TPU V6e is $691.2/hour. Here we set a higher price
64
+ # so the failover will go to the region with precise pricing info first.
65
+ TPU_V6E_MAX_PRICE = 700
65
66
 
66
67
  # TPU V5 is not visible in specific zones. We hardcode the missing zones here.
67
68
  # NOTE(dev): Keep the zones and the df in sync.
@@ -699,13 +700,12 @@ def get_tpu_df(gce_skus: List[Dict[str, Any]],
699
700
  spot_str = 'spot ' if spot else ''
700
701
  print(f'The {spot_str}price of {tpu_name} in {tpu_region} is '
701
702
  'not found in SKUs or hidden TPU price DF.')
702
- if (tpu_name.startswith('tpu-v6e') and
703
- tpu_region in TPU_V6E_MISSING_REGIONS):
704
- if not spot:
705
- tpu_price = 0.0
706
- else:
707
- assert spot or tpu_price is not None, (row, hidden_tpu,
708
- HIDDEN_TPU_DF)
703
+ # GCP's TPU V6e pricing info is not stable and there are some
704
+ # regions that are missing the pricing info. We set the price to
705
+ # the maximum price so the failover will go to the region with
706
+ # precise pricing info first.
707
+ if tpu_name.startswith('tpu-v6e'):
708
+ tpu_price = TPU_V6E_MAX_PRICE
709
709
  return tpu_price
710
710
 
711
711
  df['Price'] = df.apply(lambda row: get_tpu_price(row, spot=False), axis=1)
@@ -0,0 +1 @@
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>404: This page could not be found</title><meta name="next-head-count" content="3"/><link rel="preload" href="/dashboard/_next/static/css/f3538cd90cfca88c.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/f3538cd90cfca88c.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-830f59b8404e96b8.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-3001e84c61acddfb.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-1be831200e60c5c0.js" defer=""></script><script src="/dashboard/_next/static/2GsKhI8XKYj9B2969iIDf/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/2GsKhI8XKYj9B2969iIDf/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div style="line-height:48px"><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding-right:23px;font-size:24px;font-weight:500;vertical-align:top">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:28px">This page could not be found<!-- -->.</h2></div></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"2GsKhI8XKYj9B2969iIDf","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
@@ -0,0 +1 @@
1
+ self.__BUILD_MANIFEST=function(s,c,e,t,a,r){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-f9f039532ca8cbc4.js"],"/_error":["static/chunks/pages/_error-1be831200e60c5c0.js"],"/clusters":[s,e,c,t,a,"static/chunks/pages/clusters-a93b93e10b8b074e.js"],"/clusters/[cluster]":[s,e,c,t,r,a,"static/chunks/pages/clusters/[cluster]-b57ec043f09c5813.js"],"/clusters/[cluster]/[job]":[s,c,"static/chunks/pages/clusters/[cluster]/[job]-b09f7fbf6d5d74f6.js"],"/jobs":[s,e,c,t,r,"static/chunks/pages/jobs-a75029b67aab6a2e.js"],"/jobs/[job]":[s,c,"static/chunks/pages/jobs/[job]-ef2e0e91a9222cac.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/jobs","/jobs/[job]"]}}("static/chunks/678-206dddca808e6d16.js","static/chunks/979-7cd0778078b9cfad.js","static/chunks/312-c3c8845990db8ffc.js","static/chunks/845-2ea1cc63ba1f4067.js","static/chunks/37-72fdc8f71d6e4784.js","static/chunks/236-d437cf66e68a6f64.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
@@ -0,0 +1 @@
1
+ self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()