skypilot-nightly 1.0.0.dev20250718__py3-none-any.whl → 1.0.0.dev20250723__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/admin_policy.py +11 -4
- sky/backends/backend_utils.py +50 -24
- sky/backends/cloud_vm_ray_backend.py +41 -38
- sky/catalog/__init__.py +3 -1
- sky/catalog/aws_catalog.py +8 -5
- sky/catalog/azure_catalog.py +8 -5
- sky/catalog/common.py +8 -2
- sky/catalog/cudo_catalog.py +5 -2
- sky/catalog/do_catalog.py +4 -1
- sky/catalog/fluidstack_catalog.py +5 -2
- sky/catalog/gcp_catalog.py +8 -5
- sky/catalog/hyperbolic_catalog.py +5 -2
- sky/catalog/ibm_catalog.py +8 -5
- sky/catalog/lambda_catalog.py +8 -5
- sky/catalog/nebius_catalog.py +8 -5
- sky/catalog/oci_catalog.py +8 -5
- sky/catalog/paperspace_catalog.py +4 -1
- sky/catalog/runpod_catalog.py +5 -2
- sky/catalog/scp_catalog.py +8 -5
- sky/catalog/vast_catalog.py +5 -2
- sky/catalog/vsphere_catalog.py +4 -1
- sky/client/cli/command.py +63 -25
- sky/client/sdk.py +61 -11
- sky/clouds/aws.py +12 -7
- sky/clouds/azure.py +12 -7
- sky/clouds/cloud.py +9 -8
- sky/clouds/cudo.py +13 -7
- sky/clouds/do.py +12 -7
- sky/clouds/fluidstack.py +11 -6
- sky/clouds/gcp.py +12 -7
- sky/clouds/hyperbolic.py +11 -6
- sky/clouds/ibm.py +11 -6
- sky/clouds/kubernetes.py +7 -3
- sky/clouds/lambda_cloud.py +11 -6
- sky/clouds/nebius.py +14 -12
- sky/clouds/oci.py +12 -7
- sky/clouds/paperspace.py +12 -7
- sky/clouds/runpod.py +12 -7
- sky/clouds/scp.py +11 -6
- sky/clouds/vast.py +14 -8
- sky/clouds/vsphere.py +11 -6
- sky/core.py +6 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{1043-734e57d2b27dfe5d.js → 1043-869d9c78bf5dd3df.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{1141-d8c6404a7c6fffe6.js → 1141-e49a159c30a6c4a7.js} +1 -1
- sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +30 -0
- sky/dashboard/out/_next/static/chunks/1871-ea0e7283886407ca.js +6 -0
- sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +15 -0
- sky/dashboard/out/_next/static/chunks/{2641.35edc9ccaeaad9e3.js → 2641.74c19c4d45a2c034.js} +1 -1
- sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +1 -0
- sky/dashboard/out/_next/static/chunks/{4725.4c849b1e05c8e9ad.js → 4725.66125dcd9832aa5d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +16 -0
- sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +15 -0
- sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +1 -0
- sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +55 -0
- sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +1 -0
- sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +41 -0
- sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +6 -0
- sky/dashboard/out/_next/static/chunks/938-7ee806653aef0609.js +1 -0
- sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +30 -0
- sky/dashboard/out/_next/static/chunks/{9984.2b5e3fa69171bff9.js → 9984.0460de9d3adf5582.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-fa406155b4223d0d.js → [job]-2186770cc2de1623.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-0c37ee1ac5f3474d.js → [cluster]-95afb019ab85801c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-c5b357bfd9502fbe.js → [job]-dc0299ffefebcdbe.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{users-19e98664bdd61643.js → users-6790fcefd5487b13.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +1 -0
- sky/dashboard/out/_next/static/css/b3227360726f12eb.css +3 -0
- sky/dashboard/out/_next/static/mym3Ciwp-zqU7ZpOLGnrW/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/mounting_utils.py +93 -32
- sky/global_user_state.py +12 -143
- sky/jobs/state.py +9 -88
- sky/jobs/utils.py +28 -13
- sky/provision/nebius/utils.py +3 -6
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/serve/client/sdk.py +6 -2
- sky/serve/controller.py +7 -3
- sky/serve/serve_state.py +1 -1
- sky/serve/serve_utils.py +171 -75
- sky/serve/server/core.py +17 -6
- sky/server/common.py +4 -3
- sky/server/requests/payloads.py +2 -0
- sky/server/requests/requests.py +1 -1
- sky/setup_files/MANIFEST.in +2 -0
- sky/setup_files/alembic.ini +148 -0
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/configs.py +1 -1
- sky/skylet/constants.py +4 -0
- sky/skylet/job_lib.py +1 -1
- sky/skypilot_config.py +1 -1
- sky/users/permission.py +1 -1
- sky/utils/common_utils.py +85 -3
- sky/utils/config_utils.py +15 -0
- sky/utils/db/__init__.py +0 -0
- sky/utils/{db_utils.py → db/db_utils.py} +59 -0
- sky/utils/db/migration_utils.py +93 -0
- sky/utils/locks.py +319 -0
- sky/utils/schemas.py +38 -34
- sky/utils/timeline.py +41 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/RECORD +134 -125
- sky/dashboard/out/_next/static/FUjweqdImyeYhMYFON-Se/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1746.27d40aedc22bd2d6.js +0 -60
- sky/dashboard/out/_next/static/chunks/1871-76491ac174a95278.js +0 -6
- sky/dashboard/out/_next/static/chunks/2544.27f70672535675ed.js +0 -1
- sky/dashboard/out/_next/static/chunks/2875.c24c6d57dc82e436.js +0 -25
- sky/dashboard/out/_next/static/chunks/3785.95b94f18aaec7233.js +0 -1
- sky/dashboard/out/_next/static/chunks/3947-b059261d6fa88a1f.js +0 -35
- sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4869.bdd42f14b51d1d6f.js +0 -16
- sky/dashboard/out/_next/static/chunks/5491.918ffed0ba7a5294.js +0 -20
- sky/dashboard/out/_next/static/chunks/6990-dcb411b566e64cde.js +0 -1
- sky/dashboard/out/_next/static/chunks/804-9f5e98ce84d46bdd.js +0 -21
- sky/dashboard/out/_next/static/chunks/9025.133e9ba5c780afeb.js +0 -6
- sky/dashboard/out/_next/static/chunks/938-6a9ffdaa21eee969.js +0 -1
- sky/dashboard/out/_next/static/chunks/9470-b6f6a35283863a6f.js +0 -1
- sky/dashboard/out/_next/static/chunks/9847.46e613d000c55859.js +0 -30
- sky/dashboard/out/_next/static/chunks/pages/_app-771a40cde532309b.js +0 -20
- sky/dashboard/out/_next/static/chunks/pages/clusters-102d169e87913ba1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-927ddeebe57a8ac3.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-8b0809f59034d509.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7c0187f43757a548.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-a1e43d9ef51a9cea.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-6b0575ea521af4f3.js +0 -1
- sky/dashboard/out/_next/static/css/219887b94512388c.css +0 -3
- /sky/dashboard/out/_next/static/{FUjweqdImyeYhMYFON-Se → mym3Ciwp-zqU7ZpOLGnrW}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/top_level.txt +0 -0
sky/serve/serve_utils.py
CHANGED
|
@@ -12,8 +12,8 @@ import shutil
|
|
|
12
12
|
import threading
|
|
13
13
|
import time
|
|
14
14
|
import typing
|
|
15
|
-
from typing import (Any, Callable, DefaultDict, Dict, Generic, Iterator,
|
|
16
|
-
Optional, TextIO, Type, TypeVar, Union)
|
|
15
|
+
from typing import (Any, Callable, DefaultDict, Deque, Dict, Generic, Iterator,
|
|
16
|
+
List, Optional, TextIO, Type, TypeVar, Union)
|
|
17
17
|
import uuid
|
|
18
18
|
|
|
19
19
|
import colorama
|
|
@@ -782,6 +782,54 @@ def get_latest_version_with_min_replicas(
|
|
|
782
782
|
return active_versions[-1] if active_versions else None
|
|
783
783
|
|
|
784
784
|
|
|
785
|
+
def _process_line(line: str,
|
|
786
|
+
cluster_name: str,
|
|
787
|
+
stop_on_eof: bool = False) -> Iterator[str]:
|
|
788
|
+
# The line might be directing users to view logs, like
|
|
789
|
+
# `✓ Cluster launched: new-http. View logs at: *.log`
|
|
790
|
+
# We should tail the detailed logs for user.
|
|
791
|
+
def cluster_is_up() -> bool:
|
|
792
|
+
cluster_record = global_user_state.get_cluster_from_name(cluster_name)
|
|
793
|
+
if cluster_record is None:
|
|
794
|
+
return False
|
|
795
|
+
return cluster_record['status'] == status_lib.ClusterStatus.UP
|
|
796
|
+
|
|
797
|
+
provision_log_prompt = re.match(_SKYPILOT_PROVISION_LOG_PATTERN, line)
|
|
798
|
+
log_prompt = re.match(_SKYPILOT_LOG_PATTERN, line)
|
|
799
|
+
|
|
800
|
+
if provision_log_prompt is not None:
|
|
801
|
+
nested_log_path = os.path.expanduser(provision_log_prompt.group(1))
|
|
802
|
+
|
|
803
|
+
try:
|
|
804
|
+
with open(nested_log_path, 'r', newline='', encoding='utf-8') as f:
|
|
805
|
+
# We still exit if more than 10 seconds without new content
|
|
806
|
+
# to avoid any internal bug that causes the launch to fail
|
|
807
|
+
# while cluster status remains INIT.
|
|
808
|
+
yield from log_utils.follow_logs(f,
|
|
809
|
+
should_stop=cluster_is_up,
|
|
810
|
+
stop_on_eof=stop_on_eof,
|
|
811
|
+
idle_timeout_seconds=10)
|
|
812
|
+
except FileNotFoundError:
|
|
813
|
+
yield line
|
|
814
|
+
|
|
815
|
+
yield (f'{colorama.Fore.YELLOW}{colorama.Style.BRIGHT}'
|
|
816
|
+
f'Try to expand log file {nested_log_path} but not '
|
|
817
|
+
f'found. Skipping...{colorama.Style.RESET_ALL}')
|
|
818
|
+
pass
|
|
819
|
+
return
|
|
820
|
+
|
|
821
|
+
if log_prompt is not None:
|
|
822
|
+
# Now we skip other logs (file sync logs) since we lack
|
|
823
|
+
# utility to determine when these log files are finished
|
|
824
|
+
# writing.
|
|
825
|
+
# TODO(tian): We should not skip these logs since there are
|
|
826
|
+
# small chance that error will happen in file sync. Need to
|
|
827
|
+
# find a better way to do this.
|
|
828
|
+
return
|
|
829
|
+
|
|
830
|
+
yield line
|
|
831
|
+
|
|
832
|
+
|
|
785
833
|
def _follow_logs_with_provision_expanding(
|
|
786
834
|
file: TextIO,
|
|
787
835
|
cluster_name: str,
|
|
@@ -804,51 +852,8 @@ def _follow_logs_with_provision_expanding(
|
|
|
804
852
|
Log lines, including expanded content from referenced provision logs.
|
|
805
853
|
"""
|
|
806
854
|
|
|
807
|
-
def cluster_is_up() -> bool:
|
|
808
|
-
cluster_record = global_user_state.get_cluster_from_name(cluster_name)
|
|
809
|
-
if cluster_record is None:
|
|
810
|
-
return False
|
|
811
|
-
return cluster_record['status'] == status_lib.ClusterStatus.UP
|
|
812
|
-
|
|
813
855
|
def process_line(line: str) -> Iterator[str]:
|
|
814
|
-
|
|
815
|
-
# `✓ Cluster launched: new-http. View logs at: *.log`
|
|
816
|
-
# We should tail the detailed logs for user.
|
|
817
|
-
provision_log_prompt = re.match(_SKYPILOT_PROVISION_LOG_PATTERN, line)
|
|
818
|
-
log_prompt = re.match(_SKYPILOT_LOG_PATTERN, line)
|
|
819
|
-
|
|
820
|
-
if provision_log_prompt is not None:
|
|
821
|
-
nested_log_path = os.path.expanduser(provision_log_prompt.group(1))
|
|
822
|
-
|
|
823
|
-
try:
|
|
824
|
-
with open(nested_log_path, 'r', newline='',
|
|
825
|
-
encoding='utf-8') as f:
|
|
826
|
-
# We still exit if more than 10 seconds without new content
|
|
827
|
-
# to avoid any internal bug that causes the launch to fail
|
|
828
|
-
# while cluster status remains INIT.
|
|
829
|
-
yield from log_utils.follow_logs(f,
|
|
830
|
-
should_stop=cluster_is_up,
|
|
831
|
-
stop_on_eof=stop_on_eof,
|
|
832
|
-
idle_timeout_seconds=10)
|
|
833
|
-
except FileNotFoundError:
|
|
834
|
-
yield line
|
|
835
|
-
|
|
836
|
-
yield (f'{colorama.Fore.YELLOW}{colorama.Style.BRIGHT}'
|
|
837
|
-
f'Try to expand log file {nested_log_path} but not '
|
|
838
|
-
f'found. Skipping...{colorama.Style.RESET_ALL}')
|
|
839
|
-
pass
|
|
840
|
-
return
|
|
841
|
-
|
|
842
|
-
if log_prompt is not None:
|
|
843
|
-
# Now we skip other logs (file sync logs) since we lack
|
|
844
|
-
# utility to determine when these log files are finished
|
|
845
|
-
# writing.
|
|
846
|
-
# TODO(tian): We should not skip these logs since there are
|
|
847
|
-
# small chance that error will happen in file sync. Need to
|
|
848
|
-
# find a better way to do this.
|
|
849
|
-
return
|
|
850
|
-
|
|
851
|
-
yield line
|
|
856
|
+
yield from _process_line(line, cluster_name, stop_on_eof=stop_on_eof)
|
|
852
857
|
|
|
853
858
|
return log_utils.follow_logs(file,
|
|
854
859
|
should_stop=should_stop,
|
|
@@ -857,18 +862,51 @@ def _follow_logs_with_provision_expanding(
|
|
|
857
862
|
idle_timeout_seconds=idle_timeout_seconds)
|
|
858
863
|
|
|
859
864
|
|
|
860
|
-
def
|
|
861
|
-
|
|
865
|
+
def _capped_follow_logs_with_provision_expanding(
|
|
866
|
+
log_list: List[str],
|
|
867
|
+
cluster_name: str,
|
|
868
|
+
*,
|
|
869
|
+
line_cap: int = 100,
|
|
870
|
+
) -> Iterator[str]:
|
|
871
|
+
"""Follows logs and expands any provision.log references found.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
log_list: List of Log Lines to read from.
|
|
875
|
+
cluster_name: Name of the cluster being launched.
|
|
876
|
+
line_cap: Number of last lines to return
|
|
877
|
+
|
|
878
|
+
Yields:
|
|
879
|
+
Log lines, including expanded content from referenced provision logs.
|
|
880
|
+
"""
|
|
881
|
+
all_lines: Deque[str] = collections.deque(maxlen=line_cap)
|
|
882
|
+
|
|
883
|
+
for line in log_list:
|
|
884
|
+
for processed in _process_line(line=line,
|
|
885
|
+
cluster_name=cluster_name,
|
|
886
|
+
stop_on_eof=False):
|
|
887
|
+
all_lines.append(processed)
|
|
888
|
+
|
|
889
|
+
yield from all_lines
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def stream_replica_logs(service_name: str, replica_id: int, follow: bool,
|
|
893
|
+
tail: Optional[int]) -> str:
|
|
862
894
|
msg = check_service_status_healthy(service_name)
|
|
863
895
|
if msg is not None:
|
|
864
896
|
return msg
|
|
865
897
|
print(f'{colorama.Fore.YELLOW}Start streaming logs for launching process '
|
|
866
898
|
f'of replica {replica_id}.{colorama.Style.RESET_ALL}')
|
|
867
|
-
|
|
868
899
|
log_file_name = generate_replica_log_file_name(service_name, replica_id)
|
|
869
900
|
if os.path.exists(log_file_name):
|
|
870
|
-
|
|
871
|
-
|
|
901
|
+
if tail is not None:
|
|
902
|
+
lines = common_utils.read_last_n_lines(log_file_name, tail)
|
|
903
|
+
for line in lines:
|
|
904
|
+
if not line.endswith('\n'):
|
|
905
|
+
line += '\n'
|
|
906
|
+
print(line, end='', flush=True)
|
|
907
|
+
else:
|
|
908
|
+
with open(log_file_name, 'r', encoding='utf-8') as f:
|
|
909
|
+
print(f.read(), flush=True)
|
|
872
910
|
return ''
|
|
873
911
|
|
|
874
912
|
launch_log_file_name = generate_replica_launch_log_file_name(
|
|
@@ -891,24 +929,48 @@ def stream_replica_logs(service_name: str, replica_id: int,
|
|
|
891
929
|
|
|
892
930
|
replica_provisioned = (
|
|
893
931
|
lambda: _get_replica_status() != serve_state.ReplicaStatus.PROVISIONING)
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
932
|
+
|
|
933
|
+
# Handle launch logs based on number parameter
|
|
934
|
+
final_lines_to_print = []
|
|
935
|
+
if tail is not None:
|
|
936
|
+
static_lines = common_utils.read_last_n_lines(launch_log_file_name,
|
|
937
|
+
tail)
|
|
938
|
+
lines = list(
|
|
939
|
+
_capped_follow_logs_with_provision_expanding(
|
|
940
|
+
log_list=static_lines,
|
|
941
|
+
cluster_name=replica_cluster_name,
|
|
942
|
+
line_cap=tail,
|
|
943
|
+
))
|
|
944
|
+
final_lines_to_print += lines
|
|
945
|
+
else:
|
|
946
|
+
with open(launch_log_file_name, 'r', newline='', encoding='utf-8') as f:
|
|
947
|
+
for line in _follow_logs_with_provision_expanding(
|
|
948
|
+
f,
|
|
949
|
+
replica_cluster_name,
|
|
950
|
+
should_stop=replica_provisioned,
|
|
951
|
+
stop_on_eof=not follow,
|
|
952
|
+
):
|
|
953
|
+
print(line, end='', flush=True)
|
|
902
954
|
|
|
903
955
|
if (not follow and
|
|
904
956
|
_get_replica_status() == serve_state.ReplicaStatus.PROVISIONING):
|
|
905
957
|
# Early exit if not following the logs.
|
|
958
|
+
if tail is not None:
|
|
959
|
+
for line in final_lines_to_print:
|
|
960
|
+
if not line.endswith('\n'):
|
|
961
|
+
line += '\n'
|
|
962
|
+
print(line, end='', flush=True)
|
|
906
963
|
return ''
|
|
907
964
|
|
|
908
965
|
backend = backends.CloudVmRayBackend()
|
|
909
966
|
handle = global_user_state.get_handle_from_cluster_name(
|
|
910
967
|
replica_cluster_name)
|
|
911
968
|
if handle is None:
|
|
969
|
+
if tail is not None:
|
|
970
|
+
for line in final_lines_to_print:
|
|
971
|
+
if not line.endswith('\n'):
|
|
972
|
+
line += '\n'
|
|
973
|
+
print(line, end='', flush=True)
|
|
912
974
|
return _FAILED_TO_FIND_REPLICA_MSG.format(replica_id=replica_id)
|
|
913
975
|
assert isinstance(handle, backends.CloudVmRayResourceHandle), handle
|
|
914
976
|
|
|
@@ -917,15 +979,37 @@ def stream_replica_logs(service_name: str, replica_id: int,
|
|
|
917
979
|
f'of replica {replica_id}...{colorama.Style.RESET_ALL}')
|
|
918
980
|
|
|
919
981
|
# Always tail the latest logs, which represent user setup & run.
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
982
|
+
if tail is None:
|
|
983
|
+
returncode = backend.tail_logs(handle, job_id=None, follow=follow)
|
|
984
|
+
if returncode != 0:
|
|
985
|
+
return (f'{colorama.Fore.RED}Failed to stream logs for replica '
|
|
986
|
+
f'{replica_id}.{colorama.Style.RESET_ALL}')
|
|
987
|
+
elif not follow and tail > 0:
|
|
988
|
+
final = backend.tail_logs(handle,
|
|
989
|
+
job_id=None,
|
|
990
|
+
follow=follow,
|
|
991
|
+
tail=tail,
|
|
992
|
+
stream_logs=False,
|
|
993
|
+
require_outputs=True,
|
|
994
|
+
process_stream=True)
|
|
995
|
+
if isinstance(final, int) or (final[0] != 0 and final[0] != 101):
|
|
996
|
+
if tail is not None:
|
|
997
|
+
for line in final_lines_to_print:
|
|
998
|
+
if not line.endswith('\n'):
|
|
999
|
+
line += '\n'
|
|
1000
|
+
print(line, end='', flush=True)
|
|
1001
|
+
return (f'{colorama.Fore.RED}Failed to stream logs for replica '
|
|
1002
|
+
f'{replica_id}.{colorama.Style.RESET_ALL}')
|
|
1003
|
+
final_lines_to_print += final[1].splitlines()
|
|
1004
|
+
for line in final_lines_to_print[-tail:]:
|
|
1005
|
+
if not line.endswith('\n'):
|
|
1006
|
+
line += '\n'
|
|
1007
|
+
print(line, end='', flush=True)
|
|
924
1008
|
return ''
|
|
925
1009
|
|
|
926
1010
|
|
|
927
1011
|
def stream_serve_process_logs(service_name: str, stream_controller: bool,
|
|
928
|
-
follow: bool) -> str:
|
|
1012
|
+
follow: bool, tail: Optional[int]) -> str:
|
|
929
1013
|
msg = check_service_status_healthy(service_name)
|
|
930
1014
|
if msg is not None:
|
|
931
1015
|
return msg
|
|
@@ -940,14 +1024,24 @@ def stream_serve_process_logs(service_name: str, stream_controller: bool,
|
|
|
940
1024
|
return True
|
|
941
1025
|
return record['status'] in serve_state.ServiceStatus.failed_statuses()
|
|
942
1026
|
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
):
|
|
1027
|
+
if tail is not None:
|
|
1028
|
+
lines = common_utils.read_last_n_lines(os.path.expanduser(log_file),
|
|
1029
|
+
tail)
|
|
1030
|
+
for line in lines:
|
|
1031
|
+
if not line.endswith('\n'):
|
|
1032
|
+
line += '\n'
|
|
950
1033
|
print(line, end='', flush=True)
|
|
1034
|
+
else:
|
|
1035
|
+
with open(os.path.expanduser(log_file),
|
|
1036
|
+
'r',
|
|
1037
|
+
newline='',
|
|
1038
|
+
encoding='utf-8') as f:
|
|
1039
|
+
for line in log_utils.follow_logs(
|
|
1040
|
+
f,
|
|
1041
|
+
should_stop=_service_is_terminal,
|
|
1042
|
+
stop_on_eof=not follow,
|
|
1043
|
+
):
|
|
1044
|
+
print(line, end='', flush=True)
|
|
951
1045
|
return ''
|
|
952
1046
|
|
|
953
1047
|
|
|
@@ -1140,20 +1234,22 @@ class ServeCodeGen:
|
|
|
1140
1234
|
|
|
1141
1235
|
@classmethod
|
|
1142
1236
|
def stream_replica_logs(cls, service_name: str, replica_id: int,
|
|
1143
|
-
follow: bool) -> str:
|
|
1237
|
+
follow: bool, tail: Optional[int]) -> str:
|
|
1144
1238
|
code = [
|
|
1145
1239
|
'msg = serve_utils.stream_replica_logs('
|
|
1146
|
-
f'{service_name!r}, {replica_id!r}, follow={follow})',
|
|
1240
|
+
f'{service_name!r}, {replica_id!r}, follow={follow}, tail={tail})',
|
|
1147
1241
|
'print(msg, flush=True)'
|
|
1148
1242
|
]
|
|
1149
1243
|
return cls._build(code)
|
|
1150
1244
|
|
|
1151
1245
|
@classmethod
|
|
1152
1246
|
def stream_serve_process_logs(cls, service_name: str,
|
|
1153
|
-
stream_controller: bool, follow: bool
|
|
1247
|
+
stream_controller: bool, follow: bool,
|
|
1248
|
+
tail: Optional[int]) -> str:
|
|
1154
1249
|
code = [
|
|
1155
1250
|
f'msg = serve_utils.stream_serve_process_logs({service_name!r}, '
|
|
1156
|
-
f'{stream_controller}, follow={follow}
|
|
1251
|
+
f'{stream_controller}, follow={follow}, tail={tail})',
|
|
1252
|
+
'print(msg, flush=True)'
|
|
1157
1253
|
]
|
|
1158
1254
|
return cls._build(code)
|
|
1159
1255
|
|
sky/serve/server/core.py
CHANGED
|
@@ -740,6 +740,7 @@ def tail_logs(
|
|
|
740
740
|
target: ServiceComponentOrStr,
|
|
741
741
|
replica_id: Optional[int] = None,
|
|
742
742
|
follow: bool = True,
|
|
743
|
+
tail: Optional[int] = None,
|
|
743
744
|
) -> None:
|
|
744
745
|
"""Tails logs for a service.
|
|
745
746
|
|
|
@@ -805,11 +806,14 @@ def tail_logs(
|
|
|
805
806
|
service_name,
|
|
806
807
|
stream_controller=(
|
|
807
808
|
target == serve_utils.ServiceComponent.CONTROLLER),
|
|
808
|
-
follow=follow
|
|
809
|
+
follow=follow,
|
|
810
|
+
tail=tail)
|
|
809
811
|
else:
|
|
810
812
|
assert replica_id is not None, service_name
|
|
811
|
-
code = serve_utils.ServeCodeGen.stream_replica_logs(
|
|
812
|
-
|
|
813
|
+
code = serve_utils.ServeCodeGen.stream_replica_logs(service_name,
|
|
814
|
+
replica_id,
|
|
815
|
+
follow,
|
|
816
|
+
tail=tail)
|
|
813
817
|
|
|
814
818
|
# With the stdin=subprocess.DEVNULL, the ctrl-c will not directly
|
|
815
819
|
# kill the process, so we need to handle it manually here.
|
|
@@ -834,6 +838,7 @@ def sync_down_logs(
|
|
|
834
838
|
targets: Union[ServiceComponentOrStr, List[ServiceComponentOrStr],
|
|
835
839
|
None] = None,
|
|
836
840
|
replica_ids: Optional[List[int]] = None,
|
|
841
|
+
tail: Optional[int] = None,
|
|
837
842
|
) -> str:
|
|
838
843
|
"""Sync down logs from the controller for the given service.
|
|
839
844
|
|
|
@@ -936,16 +941,22 @@ def sync_down_logs(
|
|
|
936
941
|
if component == serve_utils.ServiceComponent.CONTROLLER:
|
|
937
942
|
stream_logs_code = (
|
|
938
943
|
serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
939
|
-
service_name,
|
|
944
|
+
service_name,
|
|
945
|
+
stream_controller=True,
|
|
946
|
+
follow=False,
|
|
947
|
+
tail=tail))
|
|
940
948
|
elif component == serve_utils.ServiceComponent.LOAD_BALANCER:
|
|
941
949
|
stream_logs_code = (
|
|
942
950
|
serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
943
|
-
service_name,
|
|
951
|
+
service_name,
|
|
952
|
+
stream_controller=False,
|
|
953
|
+
follow=False,
|
|
954
|
+
tail=tail))
|
|
944
955
|
elif component == serve_utils.ServiceComponent.REPLICA:
|
|
945
956
|
replica_id = target.replica_id
|
|
946
957
|
assert replica_id is not None, service_name
|
|
947
958
|
stream_logs_code = serve_utils.ServeCodeGen.stream_replica_logs(
|
|
948
|
-
service_name, replica_id, follow=False)
|
|
959
|
+
service_name, replica_id, follow=False, tail=tail)
|
|
949
960
|
else:
|
|
950
961
|
assert False, component
|
|
951
962
|
|
sky/server/common.py
CHANGED
|
@@ -252,8 +252,9 @@ def get_dashboard_url(server_url: str,
|
|
|
252
252
|
|
|
253
253
|
|
|
254
254
|
@annotations.lru_cache(scope='global')
|
|
255
|
-
def is_api_server_local():
|
|
256
|
-
|
|
255
|
+
def is_api_server_local(endpoint: Optional[str] = None):
|
|
256
|
+
server_url = endpoint if endpoint is not None else get_server_url()
|
|
257
|
+
return server_url in AVAILABLE_LOCAL_API_SERVER_URLS
|
|
257
258
|
|
|
258
259
|
|
|
259
260
|
def _handle_non_200_server_status(
|
|
@@ -566,7 +567,7 @@ def check_server_healthy(
|
|
|
566
567
|
api_server_status = api_server_info.status
|
|
567
568
|
if api_server_status == ApiServerStatus.VERSION_MISMATCH:
|
|
568
569
|
msg = api_server_info.error
|
|
569
|
-
if is_api_server_local():
|
|
570
|
+
if is_api_server_local(endpoint):
|
|
570
571
|
# For local server, just hint user to restart the server to get
|
|
571
572
|
# a consistent version.
|
|
572
573
|
msg = _LOCAL_API_SERVER_RESTART_HINT
|
sky/server/requests/payloads.py
CHANGED
|
@@ -557,6 +557,7 @@ class ServeLogsBody(RequestBody):
|
|
|
557
557
|
target: Union[str, serve.ServiceComponent]
|
|
558
558
|
replica_id: Optional[int] = None
|
|
559
559
|
follow: bool = True
|
|
560
|
+
tail: Optional[int] = None
|
|
560
561
|
|
|
561
562
|
|
|
562
563
|
class ServeDownloadLogsBody(RequestBody):
|
|
@@ -566,6 +567,7 @@ class ServeDownloadLogsBody(RequestBody):
|
|
|
566
567
|
targets: Optional[Union[str, serve.ServiceComponent,
|
|
567
568
|
List[Union[str, serve.ServiceComponent]]]]
|
|
568
569
|
replica_ids: Optional[List[int]] = None
|
|
570
|
+
tail: Optional[int] = None
|
|
569
571
|
|
|
570
572
|
|
|
571
573
|
class ServeStatusBody(RequestBody):
|
sky/server/requests/requests.py
CHANGED
|
@@ -29,10 +29,10 @@ from sky.server.requests.serializers import decoders
|
|
|
29
29
|
from sky.server.requests.serializers import encoders
|
|
30
30
|
from sky.utils import common
|
|
31
31
|
from sky.utils import common_utils
|
|
32
|
-
from sky.utils import db_utils
|
|
33
32
|
from sky.utils import env_options
|
|
34
33
|
from sky.utils import subprocess_utils
|
|
35
34
|
from sky.utils import ux_utils
|
|
35
|
+
from sky.utils.db import db_utils
|
|
36
36
|
|
|
37
37
|
logger = sky_logging.init_logger(__name__)
|
|
38
38
|
|
sky/setup_files/MANIFEST.in
CHANGED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# alembic configuration for global user state, jobs state, and sky config db migrations.
|
|
2
|
+
|
|
3
|
+
[DEFAULT]
|
|
4
|
+
# path to migration scripts.
|
|
5
|
+
# this is typically a path given in POSIX (e.g. forward slashes)
|
|
6
|
+
# format, relative to the token %(here)s which refers to the location of this
|
|
7
|
+
# ini file
|
|
8
|
+
script_location = %(here)s/../schemas/db
|
|
9
|
+
|
|
10
|
+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
|
11
|
+
# Uncomment the line below if you want the files to be prepended with date and time
|
|
12
|
+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
|
13
|
+
# for all available tokens
|
|
14
|
+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
|
15
|
+
|
|
16
|
+
# sys.path path, will be prepended to sys.path if present.
|
|
17
|
+
# defaults to the current working directory. for multiple paths, the path separator
|
|
18
|
+
# is defined by "path_separator" below.
|
|
19
|
+
prepend_sys_path = .
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# timezone to use when rendering the date within the migration file
|
|
23
|
+
# as well as the filename.
|
|
24
|
+
# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
|
|
25
|
+
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
|
26
|
+
# string value is passed to ZoneInfo()
|
|
27
|
+
# leave blank for localtime
|
|
28
|
+
# timezone =
|
|
29
|
+
|
|
30
|
+
# max length of characters to apply to the "slug" field
|
|
31
|
+
# truncate_slug_length = 40
|
|
32
|
+
|
|
33
|
+
# set to 'true' to run the environment during
|
|
34
|
+
# the 'revision' command, regardless of autogenerate
|
|
35
|
+
# revision_environment = false
|
|
36
|
+
|
|
37
|
+
# set to 'true' to allow .pyc and .pyo files without
|
|
38
|
+
# a source .py file to be detected as revisions in the
|
|
39
|
+
# versions/ directory
|
|
40
|
+
# sourceless = false
|
|
41
|
+
|
|
42
|
+
# version location specification; This defaults
|
|
43
|
+
# to <script_location>/versions. When using multiple version
|
|
44
|
+
# directories, initial revisions must be specified with --version-path.
|
|
45
|
+
# The path separator used here should be the separator specified by "path_separator"
|
|
46
|
+
# below.
|
|
47
|
+
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
|
|
48
|
+
|
|
49
|
+
# path_separator; This indicates what character is used to split lists of file
|
|
50
|
+
# paths, including version_locations and prepend_sys_path within configparser
|
|
51
|
+
# files such as alembic.ini.
|
|
52
|
+
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
|
|
53
|
+
# to provide os-dependent path splitting.
|
|
54
|
+
#
|
|
55
|
+
# Note that in order to support legacy alembic.ini files, this default does NOT
|
|
56
|
+
# take place if path_separator is not present in alembic.ini. If this
|
|
57
|
+
# option is omitted entirely, fallback logic is as follows:
|
|
58
|
+
#
|
|
59
|
+
# 1. Parsing of the version_locations option falls back to using the legacy
|
|
60
|
+
# "version_path_separator" key, which if absent then falls back to the legacy
|
|
61
|
+
# behavior of splitting on spaces and/or commas.
|
|
62
|
+
# 2. Parsing of the prepend_sys_path option falls back to the legacy
|
|
63
|
+
# behavior of splitting on spaces, commas, or colons.
|
|
64
|
+
#
|
|
65
|
+
# Valid values for path_separator are:
|
|
66
|
+
#
|
|
67
|
+
# path_separator = :
|
|
68
|
+
# path_separator = ;
|
|
69
|
+
# path_separator = space
|
|
70
|
+
# path_separator = newline
|
|
71
|
+
#
|
|
72
|
+
# Use os.pathsep. Default configuration used for new projects.
|
|
73
|
+
path_separator = os
|
|
74
|
+
|
|
75
|
+
# set to 'true' to search source files recursively
|
|
76
|
+
# in each "version_locations" directory
|
|
77
|
+
# new in Alembic version 1.10
|
|
78
|
+
# recursive_version_locations = false
|
|
79
|
+
|
|
80
|
+
# the output encoding used when revision files
|
|
81
|
+
# are written from script.py.mako
|
|
82
|
+
# output_encoding = utf-8
|
|
83
|
+
|
|
84
|
+
# database URL. This is consumed by the user-maintained env.py script only.
|
|
85
|
+
# other means of configuring database URLs may be customized within the env.py
|
|
86
|
+
# file.
|
|
87
|
+
# sqlalchemy.url = driver://user:pass@localhost/dbname
|
|
88
|
+
|
|
89
|
+
[state_db]
|
|
90
|
+
version_locations = %(here)s/../schemas/db/global_user_state
|
|
91
|
+
version_table = alembic_version_state_db
|
|
92
|
+
|
|
93
|
+
[spot_jobs_db]
|
|
94
|
+
version_locations = %(here)s/../schemas/db/spot_jobs
|
|
95
|
+
version_table = alembic_version_spot_jobs_db
|
|
96
|
+
|
|
97
|
+
[post_write_hooks]
|
|
98
|
+
# post_write_hooks defines scripts or Python functions that are run
|
|
99
|
+
# on newly generated revision scripts. See the documentation for further
|
|
100
|
+
# detail and examples
|
|
101
|
+
|
|
102
|
+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
|
103
|
+
# hooks = black
|
|
104
|
+
# black.type = console_scripts
|
|
105
|
+
# black.entrypoint = black
|
|
106
|
+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
|
107
|
+
|
|
108
|
+
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
|
109
|
+
# hooks = ruff
|
|
110
|
+
# ruff.type = exec
|
|
111
|
+
# ruff.executable = %(here)s/.venv/bin/ruff
|
|
112
|
+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
|
113
|
+
|
|
114
|
+
# Logging configuration. This is also consumed by the user-maintained
|
|
115
|
+
# env.py script only.
|
|
116
|
+
[loggers]
|
|
117
|
+
keys = root,sqlalchemy,alembic
|
|
118
|
+
|
|
119
|
+
[handlers]
|
|
120
|
+
keys = console
|
|
121
|
+
|
|
122
|
+
[formatters]
|
|
123
|
+
keys = generic
|
|
124
|
+
|
|
125
|
+
[logger_root]
|
|
126
|
+
level = WARNING
|
|
127
|
+
handlers = console
|
|
128
|
+
qualname =
|
|
129
|
+
|
|
130
|
+
[logger_sqlalchemy]
|
|
131
|
+
level = WARNING
|
|
132
|
+
handlers =
|
|
133
|
+
qualname = sqlalchemy.engine
|
|
134
|
+
|
|
135
|
+
[logger_alembic]
|
|
136
|
+
level = WARNING
|
|
137
|
+
handlers =
|
|
138
|
+
qualname = alembic
|
|
139
|
+
|
|
140
|
+
[handler_console]
|
|
141
|
+
class = StreamHandler
|
|
142
|
+
args = (sys.stderr,)
|
|
143
|
+
level = NOTSET
|
|
144
|
+
formatter = generic
|
|
145
|
+
|
|
146
|
+
[formatter_generic]
|
|
147
|
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
|
148
|
+
datefmt = %H:%M:%S
|
sky/setup_files/dependencies.py
CHANGED
sky/skylet/configs.py
CHANGED
sky/skylet/constants.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Constants for SkyPilot."""
|
|
2
|
+
import os
|
|
2
3
|
from typing import List, Tuple
|
|
3
4
|
|
|
4
5
|
from packaging import version
|
|
@@ -491,3 +492,6 @@ DEFAULT_PRIORITY = 0
|
|
|
491
492
|
|
|
492
493
|
GRACE_PERIOD_SECONDS_ENV_VAR = SKYPILOT_ENV_VAR_PREFIX + 'GRACE_PERIOD_SECONDS'
|
|
493
494
|
COST_REPORT_DEFAULT_DAYS = 30
|
|
495
|
+
|
|
496
|
+
# The directory for file locks.
|
|
497
|
+
SKY_LOCKS_DIR = os.path.expanduser('~/.sky/locks')
|
sky/skylet/job_lib.py
CHANGED
|
@@ -24,10 +24,10 @@ from sky import sky_logging
|
|
|
24
24
|
from sky.adaptors import common as adaptors_common
|
|
25
25
|
from sky.skylet import constants
|
|
26
26
|
from sky.utils import common_utils
|
|
27
|
-
from sky.utils import db_utils
|
|
28
27
|
from sky.utils import log_utils
|
|
29
28
|
from sky.utils import message_utils
|
|
30
29
|
from sky.utils import subprocess_utils
|
|
30
|
+
from sky.utils.db import db_utils
|
|
31
31
|
|
|
32
32
|
if typing.TYPE_CHECKING:
|
|
33
33
|
import psutil
|
sky/skypilot_config.py
CHANGED
|
@@ -73,9 +73,9 @@ from sky.skylet import constants
|
|
|
73
73
|
from sky.utils import common_utils
|
|
74
74
|
from sky.utils import config_utils
|
|
75
75
|
from sky.utils import context
|
|
76
|
-
from sky.utils import db_utils
|
|
77
76
|
from sky.utils import schemas
|
|
78
77
|
from sky.utils import ux_utils
|
|
78
|
+
from sky.utils.db import db_utils
|
|
79
79
|
from sky.utils.kubernetes import config_map_utils
|
|
80
80
|
|
|
81
81
|
if typing.TYPE_CHECKING:
|
sky/users/permission.py
CHANGED
|
@@ -15,7 +15,7 @@ from sky import sky_logging
|
|
|
15
15
|
from sky.skylet import constants
|
|
16
16
|
from sky.users import rbac
|
|
17
17
|
from sky.utils import common_utils
|
|
18
|
-
from sky.utils import db_utils
|
|
18
|
+
from sky.utils.db import db_utils
|
|
19
19
|
|
|
20
20
|
logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
|
|
21
21
|
logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
|