skypilot-nightly 1.0.0.dev20241114__py3-none-any.whl → 1.0.0.dev20241115__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +7 -4
- sky/backends/cloud_vm_ray_backend.py +14 -10
- sky/clouds/oci.py +0 -2
- sky/clouds/utils/oci_utils.py +5 -0
- sky/execution.py +37 -22
- sky/jobs/core.py +0 -1
- sky/jobs/utils.py +4 -3
- sky/provision/oci/instance.py +12 -11
- sky/provision/oci/query_utils.py +212 -6
- sky/serve/core.py +1 -0
- sky/serve/serve_utils.py +35 -30
- sky/skylet/constants.py +1 -1
- sky/skylet/job_lib.py +249 -138
- sky/skylet/log_lib.py +1 -34
- sky/skylet/subprocess_daemon.py +33 -13
- sky/utils/controller_utils.py +10 -9
- sky/utils/subprocess_utils.py +50 -0
- {skypilot_nightly-1.0.0.dev20241114.dist-info → skypilot_nightly-1.0.0.dev20241115.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241114.dist-info → skypilot_nightly-1.0.0.dev20241115.dist-info}/RECORD +24 -24
- {skypilot_nightly-1.0.0.dev20241114.dist-info → skypilot_nightly-1.0.0.dev20241115.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241114.dist-info → skypilot_nightly-1.0.0.dev20241115.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241114.dist-info → skypilot_nightly-1.0.0.dev20241115.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241114.dist-info → skypilot_nightly-1.0.0.dev20241115.dist-info}/top_level.txt +0 -0
sky/serve/serve_utils.py
CHANGED
@@ -46,8 +46,14 @@ NUM_SERVICE_THRESHOLD = (_SYSTEM_MEMORY_GB //
|
|
46
46
|
constants.CONTROLLER_MEMORY_USAGE_GB)
|
47
47
|
_CONTROLLER_URL = 'http://localhost:{CONTROLLER_PORT}'
|
48
48
|
|
49
|
-
|
50
|
-
|
49
|
+
# NOTE(dev): We assume log paths are either in ~/sky_logs/... or ~/.sky/...
|
50
|
+
# and always appear after a space. Be careful when changing UX as this
|
51
|
+
# assumption is used to expand some log files while ignoring others.
|
52
|
+
_SKYPILOT_LOG_DIRS = r'~/(sky_logs|\.sky)'
|
53
|
+
_SKYPILOT_PROVISION_LOG_PATTERN = (
|
54
|
+
fr'.* ({_SKYPILOT_LOG_DIRS}/.*provision\.log)')
|
55
|
+
_SKYPILOT_LOG_PATTERN = fr'.* ({_SKYPILOT_LOG_DIRS}/.*\.log)'
|
56
|
+
|
51
57
|
# TODO(tian): Find all existing replica id and print here.
|
52
58
|
_FAILED_TO_FIND_REPLICA_MSG = (
|
53
59
|
f'{colorama.Fore.RED}Failed to find replica '
|
@@ -591,7 +597,7 @@ def get_latest_version_with_min_replicas(
|
|
591
597
|
return active_versions[-1] if active_versions else None
|
592
598
|
|
593
599
|
|
594
|
-
def
|
600
|
+
def _follow_logs_with_provision_expanding(
|
595
601
|
file: TextIO,
|
596
602
|
cluster_name: str,
|
597
603
|
*,
|
@@ -599,7 +605,7 @@ def _follow_replica_logs(
|
|
599
605
|
stop_on_eof: bool = False,
|
600
606
|
idle_timeout_seconds: Optional[int] = None,
|
601
607
|
) -> Iterator[str]:
|
602
|
-
"""Follows logs
|
608
|
+
"""Follows logs and expands any provision.log references found.
|
603
609
|
|
604
610
|
Args:
|
605
611
|
file: Log file to read from.
|
@@ -610,7 +616,7 @@ def _follow_replica_logs(
|
|
610
616
|
new content.
|
611
617
|
|
612
618
|
Yields:
|
613
|
-
Log lines
|
619
|
+
Log lines, including expanded content from referenced provision logs.
|
614
620
|
"""
|
615
621
|
|
616
622
|
def cluster_is_up() -> bool:
|
@@ -620,36 +626,35 @@ def _follow_replica_logs(
|
|
620
626
|
return cluster_record['status'] == status_lib.ClusterStatus.UP
|
621
627
|
|
622
628
|
def process_line(line: str) -> Iterator[str]:
|
623
|
-
#
|
624
|
-
#
|
625
|
-
#
|
629
|
+
# The line might be directing users to view logs, like
|
630
|
+
# `✓ Cluster launched: new-http. View logs at: *.log`
|
631
|
+
# We should tail the detailed logs for user.
|
626
632
|
provision_log_prompt = re.match(_SKYPILOT_PROVISION_LOG_PATTERN, line)
|
627
|
-
|
633
|
+
log_prompt = re.match(_SKYPILOT_LOG_PATTERN, line)
|
628
634
|
|
629
635
|
if provision_log_prompt is not None:
|
630
636
|
nested_log_path = os.path.expanduser(provision_log_prompt.group(1))
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
yield
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
idle_timeout_seconds=10)
|
637
|
+
|
638
|
+
try:
|
639
|
+
with open(nested_log_path, 'r', newline='',
|
640
|
+
encoding='utf-8') as f:
|
641
|
+
# We still exit if more than 10 seconds without new content
|
642
|
+
# to avoid any internal bug that causes the launch to fail
|
643
|
+
# while cluster status remains INIT.
|
644
|
+
yield from log_utils.follow_logs(f,
|
645
|
+
should_stop=cluster_is_up,
|
646
|
+
stop_on_eof=stop_on_eof,
|
647
|
+
idle_timeout_seconds=10)
|
648
|
+
except FileNotFoundError:
|
649
|
+
yield line
|
650
|
+
|
651
|
+
yield (f'{colorama.Fore.YELLOW}{colorama.Style.BRIGHT}'
|
652
|
+
f'Try to expand log file {nested_log_path} but not '
|
653
|
+
f'found. Skipping...{colorama.Style.RESET_ALL}')
|
654
|
+
pass
|
650
655
|
return
|
651
656
|
|
652
|
-
if
|
657
|
+
if log_prompt is not None:
|
653
658
|
# Now we skip other logs (file sync logs) since we lack
|
654
659
|
# utility to determine when these log files are finished
|
655
660
|
# writing.
|
@@ -702,7 +707,7 @@ def stream_replica_logs(service_name: str, replica_id: int,
|
|
702
707
|
replica_provisioned = (
|
703
708
|
lambda: _get_replica_status() != serve_state.ReplicaStatus.PROVISIONING)
|
704
709
|
with open(launch_log_file_name, 'r', newline='', encoding='utf-8') as f:
|
705
|
-
for line in
|
710
|
+
for line in _follow_logs_with_provision_expanding(
|
706
711
|
f,
|
707
712
|
replica_cluster_name,
|
708
713
|
should_stop=replica_provisioned,
|
sky/skylet/constants.py
CHANGED
@@ -75,7 +75,7 @@ TASK_ID_LIST_ENV_VAR = 'SKYPILOT_TASK_IDS'
|
|
75
75
|
# cluster yaml is updated.
|
76
76
|
#
|
77
77
|
# TODO(zongheng,zhanghao): make the upgrading of skylet automatic?
|
78
|
-
SKYLET_VERSION = '
|
78
|
+
SKYLET_VERSION = '9'
|
79
79
|
# The version of the lib files that skylet/jobs use. Whenever there is an API
|
80
80
|
# change for the job_lib or log_lib, we need to bump this version, so that the
|
81
81
|
# user can be notified to update their SkyPilot version on the remote cluster.
|