skypilot-nightly 1.0.0.dev20241113__py3-none-any.whl → 1.0.0.dev20241115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/serve/serve_utils.py CHANGED
@@ -46,8 +46,14 @@ NUM_SERVICE_THRESHOLD = (_SYSTEM_MEMORY_GB //
46
46
  constants.CONTROLLER_MEMORY_USAGE_GB)
47
47
  _CONTROLLER_URL = 'http://localhost:{CONTROLLER_PORT}'
48
48
 
49
- _SKYPILOT_PROVISION_LOG_PATTERN = r'.*tail -n100 -f (.*provision\.log).*'
50
- _SKYPILOT_LOG_PATTERN = r'.*tail -n100 -f (.*\.log).*'
49
+ # NOTE(dev): We assume log paths are either in ~/sky_logs/... or ~/.sky/...
50
+ # and always appear after a space. Be careful when changing UX as this
51
+ # assumption is used to expand some log files while ignoring others.
52
+ _SKYPILOT_LOG_DIRS = r'~/(sky_logs|\.sky)'
53
+ _SKYPILOT_PROVISION_LOG_PATTERN = (
54
+ fr'.* ({_SKYPILOT_LOG_DIRS}/.*provision\.log)')
55
+ _SKYPILOT_LOG_PATTERN = fr'.* ({_SKYPILOT_LOG_DIRS}/.*\.log)'
56
+
51
57
  # TODO(tian): Find all existing replica id and print here.
52
58
  _FAILED_TO_FIND_REPLICA_MSG = (
53
59
  f'{colorama.Fore.RED}Failed to find replica '
@@ -591,7 +597,7 @@ def get_latest_version_with_min_replicas(
591
597
  return active_versions[-1] if active_versions else None
592
598
 
593
599
 
594
- def _follow_replica_logs(
600
+ def _follow_logs_with_provision_expanding(
595
601
  file: TextIO,
596
602
  cluster_name: str,
597
603
  *,
@@ -599,7 +605,7 @@ def _follow_replica_logs(
599
605
  stop_on_eof: bool = False,
600
606
  idle_timeout_seconds: Optional[int] = None,
601
607
  ) -> Iterator[str]:
602
- """Follows logs for a replica, handling nested log files.
608
+ """Follows logs and expands any provision.log references found.
603
609
 
604
610
  Args:
605
611
  file: Log file to read from.
@@ -610,7 +616,7 @@ def _follow_replica_logs(
610
616
  new content.
611
617
 
612
618
  Yields:
613
- Log lines from the main file and any nested log files.
619
+ Log lines, including expanded content from referenced provision logs.
614
620
  """
615
621
 
616
622
  def cluster_is_up() -> bool:
@@ -620,36 +626,35 @@ def _follow_replica_logs(
620
626
  return cluster_record['status'] == status_lib.ClusterStatus.UP
621
627
 
622
628
  def process_line(line: str) -> Iterator[str]:
623
- # Tailing detailed progress for user. All logs in skypilot is
624
- # of format `To view detailed progress: tail -n100 -f *.log`.
625
- # Check if the line is directing users to view logs
629
+ # The line might be directing users to view logs, like
630
+ # `✓ Cluster launched: new-http. View logs at: *.log`
631
+ # We should tail the detailed logs for user.
626
632
  provision_log_prompt = re.match(_SKYPILOT_PROVISION_LOG_PATTERN, line)
627
- other_log_prompt = re.match(_SKYPILOT_LOG_PATTERN, line)
633
+ log_prompt = re.match(_SKYPILOT_LOG_PATTERN, line)
628
634
 
629
635
  if provision_log_prompt is not None:
630
636
  nested_log_path = os.path.expanduser(provision_log_prompt.group(1))
631
- with open(nested_log_path, 'r', newline='', encoding='utf-8') as f:
632
- # We still exit if more than 10 seconds without new content
633
- # to avoid any internal bug that causes the launch to fail
634
- # while cluster status remains INIT.
635
- # Originally, we output the next line first before printing
636
- # the launching logs. Since the next line is always
637
- # `Launching on <cloud> <region> (<zone>)`, we output it first
638
- # to indicate the process is starting.
639
- # TODO(andyl): After refactor #4323, the above logic is broken,
640
- # but coincidentally with the new UX 3.0, the `Cluster launched`
641
- # message is printed first, making the output appear correct.
642
- # Explaining this since it's technically a breaking change
643
- # for this refactor PR #4323. Will remove soon in a fix PR
644
- # for adapting the serve.follow_logs to the new UX.
645
- yield from _follow_replica_logs(f,
646
- cluster_name,
647
- should_stop=cluster_is_up,
648
- stop_on_eof=stop_on_eof,
649
- idle_timeout_seconds=10)
637
+
638
+ try:
639
+ with open(nested_log_path, 'r', newline='',
640
+ encoding='utf-8') as f:
641
+ # We still exit if more than 10 seconds without new content
642
+ # to avoid any internal bug that causes the launch to fail
643
+ # while cluster status remains INIT.
644
+ yield from log_utils.follow_logs(f,
645
+ should_stop=cluster_is_up,
646
+ stop_on_eof=stop_on_eof,
647
+ idle_timeout_seconds=10)
648
+ except FileNotFoundError:
649
+ yield line
650
+
651
+ yield (f'{colorama.Fore.YELLOW}{colorama.Style.BRIGHT}'
652
+ f'Try to expand log file {nested_log_path} but not '
653
+ f'found. Skipping...{colorama.Style.RESET_ALL}')
654
+ pass
650
655
  return
651
656
 
652
- if other_log_prompt is not None:
657
+ if log_prompt is not None:
653
658
  # Now we skip other logs (file sync logs) since we lack
654
659
  # utility to determine when these log files are finished
655
660
  # writing.
@@ -702,7 +707,7 @@ def stream_replica_logs(service_name: str, replica_id: int,
702
707
  replica_provisioned = (
703
708
  lambda: _get_replica_status() != serve_state.ReplicaStatus.PROVISIONING)
704
709
  with open(launch_log_file_name, 'r', newline='', encoding='utf-8') as f:
705
- for line in _follow_replica_logs(
710
+ for line in _follow_logs_with_provision_expanding(
706
711
  f,
707
712
  replica_cluster_name,
708
713
  should_stop=replica_provisioned,
sky/skylet/constants.py CHANGED
@@ -75,7 +75,7 @@ TASK_ID_LIST_ENV_VAR = 'SKYPILOT_TASK_IDS'
75
75
  # cluster yaml is updated.
76
76
  #
77
77
  # TODO(zongheng,zhanghao): make the upgrading of skylet automatic?
78
- SKYLET_VERSION = '8'
78
+ SKYLET_VERSION = '9'
79
79
  # The version of the lib files that skylet/jobs use. Whenever there is an API
80
80
  # change for the job_lib or log_lib, we need to bump this version, so that the
81
81
  # user can be notified to update their SkyPilot version on the remote cluster.