skypilot-nightly 1.0.0.dev20250827__py3-none-any.whl → 1.0.0.dev20250829__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/admin_policy.py +11 -10
- sky/authentication.py +1 -1
- sky/backends/backend.py +3 -5
- sky/backends/backend_utils.py +140 -52
- sky/backends/cloud_vm_ray_backend.py +30 -25
- sky/backends/local_docker_backend.py +3 -8
- sky/backends/wheel_utils.py +35 -8
- sky/client/cli/command.py +41 -9
- sky/client/sdk.py +23 -8
- sky/client/sdk_async.py +6 -2
- sky/clouds/aws.py +118 -1
- sky/core.py +1 -4
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +82 -22
- sky/jobs/client/sdk.py +5 -2
- sky/jobs/recovery_strategy.py +9 -4
- sky/jobs/server/server.py +2 -1
- sky/logs/agent.py +2 -2
- sky/logs/aws.py +6 -3
- sky/provision/aws/config.py +78 -3
- sky/provision/aws/instance.py +45 -6
- sky/provision/do/utils.py +2 -1
- sky/provision/kubernetes/instance.py +55 -11
- sky/provision/kubernetes/utils.py +11 -2
- sky/provision/nebius/utils.py +36 -2
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/serve/client/impl.py +5 -4
- sky/serve/replica_managers.py +4 -3
- sky/serve/serve_utils.py +2 -2
- sky/serve/server/impl.py +3 -2
- sky/serve/server/server.py +2 -1
- sky/server/auth/oauth2_proxy.py +10 -4
- sky/server/common.py +4 -4
- sky/server/daemons.py +16 -5
- sky/server/requests/executor.py +5 -3
- sky/server/requests/payloads.py +3 -1
- sky/server/requests/preconditions.py +3 -2
- sky/server/requests/requests.py +121 -19
- sky/server/server.py +85 -60
- sky/server/stream_utils.py +7 -5
- sky/setup_files/dependencies.py +6 -1
- sky/sky_logging.py +28 -0
- sky/skylet/constants.py +6 -0
- sky/skylet/events.py +2 -3
- sky/skypilot_config.py +10 -10
- sky/task.py +1 -1
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +4 -8
- sky/usage/usage_lib.py +3 -2
- sky/utils/annotations.py +8 -2
- sky/utils/cluster_utils.py +3 -3
- sky/utils/common_utils.py +0 -72
- sky/utils/controller_utils.py +4 -3
- sky/utils/dag_utils.py +4 -4
- sky/utils/db/db_utils.py +11 -0
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/kubernetes/config_map_utils.py +3 -3
- sky/utils/kubernetes_enums.py +1 -0
- sky/utils/lock_events.py +94 -0
- sky/utils/schemas.py +3 -0
- sky/utils/timeline.py +24 -93
- sky/utils/yaml_utils.py +77 -10
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/METADATA +8 -2
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/RECORD +86 -84
- /sky/dashboard/out/_next/static/{-eL7Ky3bxVivzeLHNB9U6 → hYJYFIxp_ZFONR4wTIJqZ}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{-eL7Ky3bxVivzeLHNB9U6 → hYJYFIxp_ZFONR4wTIJqZ}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/top_level.txt +0 -0
sky/utils/timeline.py
CHANGED
|
@@ -4,7 +4,6 @@ The timeline follows the trace event format defined here:
|
|
|
4
4
|
https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
|
|
5
5
|
""" # pylint: disable=line-too-long
|
|
6
6
|
import atexit
|
|
7
|
-
import functools
|
|
8
7
|
import json
|
|
9
8
|
import os
|
|
10
9
|
import threading
|
|
@@ -12,14 +11,15 @@ import time
|
|
|
12
11
|
import traceback
|
|
13
12
|
from typing import Callable, Optional, Union
|
|
14
13
|
|
|
15
|
-
import filelock
|
|
16
|
-
|
|
17
14
|
from sky.utils import common_utils
|
|
18
|
-
from sky.utils import locks
|
|
19
15
|
|
|
20
16
|
_events = []
|
|
21
17
|
|
|
22
18
|
|
|
19
|
+
def _get_events_file_path():
|
|
20
|
+
return os.environ.get('SKYPILOT_TIMELINE_FILE_PATH')
|
|
21
|
+
|
|
22
|
+
|
|
23
23
|
class Event:
|
|
24
24
|
"""Record an event.
|
|
25
25
|
|
|
@@ -29,6 +29,10 @@ class Event:
|
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
def __init__(self, name: str, message: Optional[str] = None):
|
|
32
|
+
self._skipped = False
|
|
33
|
+
if not _get_events_file_path():
|
|
34
|
+
self._skipped = True
|
|
35
|
+
return
|
|
32
36
|
self._name = name
|
|
33
37
|
self._message = message
|
|
34
38
|
# See the module doc for the event format.
|
|
@@ -45,6 +49,8 @@ class Event:
|
|
|
45
49
|
self._event['args'] = {'message': self._message}
|
|
46
50
|
|
|
47
51
|
def begin(self):
|
|
52
|
+
if self._skipped:
|
|
53
|
+
return
|
|
48
54
|
event_begin = self._event.copy()
|
|
49
55
|
event_begin.update({
|
|
50
56
|
'ph': 'B',
|
|
@@ -56,6 +62,8 @@ class Event:
|
|
|
56
62
|
_events.append(event_begin)
|
|
57
63
|
|
|
58
64
|
def end(self):
|
|
65
|
+
if self._skipped:
|
|
66
|
+
return
|
|
59
67
|
event_end = self._event.copy()
|
|
60
68
|
event_end.update({
|
|
61
69
|
'ph': 'E',
|
|
@@ -77,103 +85,26 @@ def event(name_or_fn: Union[str, Callable], message: Optional[str] = None):
|
|
|
77
85
|
return common_utils.make_decorator(Event, name_or_fn, message=message)
|
|
78
86
|
|
|
79
87
|
|
|
80
|
-
class DistributedLockEvent:
|
|
81
|
-
"""Serve both as a distributed lock and event for the lock."""
|
|
82
|
-
|
|
83
|
-
def __init__(self, lock_id: str, timeout: Optional[float] = None):
|
|
84
|
-
self._lock_id = lock_id
|
|
85
|
-
self._lock = locks.get_lock(lock_id, timeout)
|
|
86
|
-
self._hold_lock_event = Event(f'[DistributedLock.hold]:{lock_id}')
|
|
87
|
-
|
|
88
|
-
def acquire(self):
|
|
89
|
-
was_locked = self._lock.is_locked
|
|
90
|
-
with Event(f'[DistributedLock.acquire]:{self._lock_id}'):
|
|
91
|
-
self._lock.acquire()
|
|
92
|
-
if not was_locked and self._lock.is_locked:
|
|
93
|
-
# start holding the lock after initial acquiring
|
|
94
|
-
self._hold_lock_event.begin()
|
|
95
|
-
|
|
96
|
-
def release(self):
|
|
97
|
-
was_locked = self._lock.is_locked
|
|
98
|
-
self._lock.release()
|
|
99
|
-
if was_locked and not self._lock.is_locked:
|
|
100
|
-
# stop holding the lock after initial releasing
|
|
101
|
-
self._hold_lock_event.end()
|
|
102
|
-
|
|
103
|
-
def __enter__(self):
|
|
104
|
-
self.acquire()
|
|
105
|
-
return self
|
|
106
|
-
|
|
107
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
108
|
-
self.release()
|
|
109
|
-
|
|
110
|
-
def __call__(self, f):
|
|
111
|
-
|
|
112
|
-
@functools.wraps(f)
|
|
113
|
-
def wrapper(*args, **kwargs):
|
|
114
|
-
with self:
|
|
115
|
-
return f(*args, **kwargs)
|
|
116
|
-
|
|
117
|
-
return wrapper
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
class FileLockEvent:
|
|
121
|
-
"""Serve both as a file lock and event for the lock."""
|
|
122
|
-
|
|
123
|
-
def __init__(self, lockfile: Union[str, os.PathLike], timeout: float = -1):
|
|
124
|
-
self._lockfile = lockfile
|
|
125
|
-
os.makedirs(os.path.dirname(os.path.abspath(self._lockfile)),
|
|
126
|
-
exist_ok=True)
|
|
127
|
-
self._lock = filelock.FileLock(self._lockfile, timeout)
|
|
128
|
-
self._hold_lock_event = Event(f'[FileLock.hold]:{self._lockfile}')
|
|
129
|
-
|
|
130
|
-
def acquire(self):
|
|
131
|
-
was_locked = self._lock.is_locked
|
|
132
|
-
with Event(f'[FileLock.acquire]:{self._lockfile}'):
|
|
133
|
-
self._lock.acquire()
|
|
134
|
-
if not was_locked and self._lock.is_locked:
|
|
135
|
-
# start holding the lock after initial acquiring
|
|
136
|
-
self._hold_lock_event.begin()
|
|
137
|
-
|
|
138
|
-
def release(self):
|
|
139
|
-
was_locked = self._lock.is_locked
|
|
140
|
-
self._lock.release()
|
|
141
|
-
if was_locked and not self._lock.is_locked:
|
|
142
|
-
# stop holding the lock after initial releasing
|
|
143
|
-
self._hold_lock_event.end()
|
|
144
|
-
|
|
145
|
-
def __enter__(self):
|
|
146
|
-
self.acquire()
|
|
147
|
-
return self
|
|
148
|
-
|
|
149
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
150
|
-
self.release()
|
|
151
|
-
|
|
152
|
-
def __call__(self, f):
|
|
153
|
-
# Make this class callable as a decorator.
|
|
154
|
-
@functools.wraps(f)
|
|
155
|
-
def wrapper(*args, **kwargs):
|
|
156
|
-
with self:
|
|
157
|
-
return f(*args, **kwargs)
|
|
158
|
-
|
|
159
|
-
return wrapper
|
|
160
|
-
|
|
161
|
-
|
|
162
88
|
def save_timeline():
|
|
163
|
-
|
|
164
|
-
if not
|
|
89
|
+
events_file_path = _get_events_file_path()
|
|
90
|
+
if not events_file_path:
|
|
165
91
|
return
|
|
92
|
+
global _events
|
|
93
|
+
events_to_write = _events
|
|
94
|
+
_events = []
|
|
166
95
|
json_output = {
|
|
167
|
-
'traceEvents':
|
|
96
|
+
'traceEvents': events_to_write,
|
|
168
97
|
'displayTimeUnit': 'ms',
|
|
169
98
|
'otherData': {
|
|
170
|
-
'log_dir': os.path.dirname(os.path.abspath(
|
|
99
|
+
'log_dir': os.path.dirname(os.path.abspath(events_file_path)),
|
|
171
100
|
}
|
|
172
101
|
}
|
|
173
|
-
os.makedirs(os.path.dirname(os.path.abspath(
|
|
174
|
-
|
|
102
|
+
os.makedirs(os.path.dirname(os.path.abspath(events_file_path)),
|
|
103
|
+
exist_ok=True)
|
|
104
|
+
with open(events_file_path, 'w', encoding='utf-8') as f:
|
|
175
105
|
json.dump(json_output, f)
|
|
106
|
+
del events_to_write
|
|
176
107
|
|
|
177
108
|
|
|
178
|
-
if
|
|
109
|
+
if _get_events_file_path():
|
|
179
110
|
atexit.register(save_timeline)
|
sky/utils/yaml_utils.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""YAML utilities."""
|
|
2
|
-
|
|
2
|
+
import io
|
|
3
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union
|
|
3
4
|
|
|
4
5
|
from sky.adaptors import common
|
|
5
6
|
|
|
@@ -8,28 +9,94 @@ if TYPE_CHECKING:
|
|
|
8
9
|
else:
|
|
9
10
|
yaml = common.LazyImport('yaml')
|
|
10
11
|
|
|
11
|
-
|
|
12
|
+
_c_extension_unavailable = False
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
def safe_load(stream) -> Any:
|
|
15
|
-
global
|
|
16
|
-
if
|
|
16
|
+
global _c_extension_unavailable
|
|
17
|
+
if _c_extension_unavailable:
|
|
17
18
|
return yaml.load(stream, Loader=yaml.SafeLoader)
|
|
18
19
|
|
|
19
20
|
try:
|
|
20
21
|
return yaml.load(stream, Loader=yaml.CSafeLoader)
|
|
21
|
-
except
|
|
22
|
-
|
|
22
|
+
except AttributeError:
|
|
23
|
+
_c_extension_unavailable = True
|
|
23
24
|
return yaml.load(stream, Loader=yaml.SafeLoader)
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
def safe_load_all(stream) -> Any:
|
|
27
|
-
global
|
|
28
|
-
if
|
|
28
|
+
global _c_extension_unavailable
|
|
29
|
+
if _c_extension_unavailable:
|
|
29
30
|
return yaml.load_all(stream, Loader=yaml.SafeLoader)
|
|
30
31
|
|
|
31
32
|
try:
|
|
32
33
|
return yaml.load_all(stream, Loader=yaml.CSafeLoader)
|
|
33
|
-
except
|
|
34
|
-
|
|
34
|
+
except AttributeError:
|
|
35
|
+
_c_extension_unavailable = True
|
|
35
36
|
return yaml.load_all(stream, Loader=yaml.SafeLoader)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read_yaml(path: Optional[str]) -> Dict[str, Any]:
|
|
40
|
+
if path is None:
|
|
41
|
+
raise ValueError('Attempted to read a None YAML.')
|
|
42
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
43
|
+
config = safe_load(f)
|
|
44
|
+
return config
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def read_yaml_all_str(yaml_str: str) -> List[Dict[str, Any]]:
|
|
48
|
+
stream = io.StringIO(yaml_str)
|
|
49
|
+
config = safe_load_all(stream)
|
|
50
|
+
configs = list(config)
|
|
51
|
+
if not configs:
|
|
52
|
+
# Empty YAML file.
|
|
53
|
+
return [{}]
|
|
54
|
+
return configs
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def read_yaml_all(path: str) -> List[Dict[str, Any]]:
|
|
58
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
59
|
+
return read_yaml_all_str(f.read())
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def dump_yaml(path: str,
|
|
63
|
+
config: Union[List[Dict[str, Any]], Dict[str, Any]],
|
|
64
|
+
blank: bool = False) -> None:
|
|
65
|
+
"""Dumps a YAML file.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
path: the path to the YAML file.
|
|
69
|
+
config: the configuration to dump.
|
|
70
|
+
"""
|
|
71
|
+
with open(path, 'w', encoding='utf-8') as f:
|
|
72
|
+
contents = dump_yaml_str(config)
|
|
73
|
+
if blank and isinstance(config, dict) and len(config) == 0:
|
|
74
|
+
# when dumping to yaml, an empty dict will go in as {}.
|
|
75
|
+
contents = ''
|
|
76
|
+
f.write(contents)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def dump_yaml_str(config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str:
|
|
80
|
+
"""Dumps a YAML string.
|
|
81
|
+
Args:
|
|
82
|
+
config: the configuration to dump.
|
|
83
|
+
Returns:
|
|
84
|
+
The YAML string.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
# https://github.com/yaml/pyyaml/issues/127
|
|
88
|
+
class LineBreakDumper(yaml.SafeDumper):
|
|
89
|
+
|
|
90
|
+
def write_line_break(self, data=None):
|
|
91
|
+
super().write_line_break(data)
|
|
92
|
+
if len(self.indents) == 1:
|
|
93
|
+
super().write_line_break()
|
|
94
|
+
|
|
95
|
+
if isinstance(config, list):
|
|
96
|
+
dump_func = yaml.dump_all # type: ignore
|
|
97
|
+
else:
|
|
98
|
+
dump_func = yaml.dump # type: ignore
|
|
99
|
+
return dump_func(config,
|
|
100
|
+
Dumper=LineBreakDumper,
|
|
101
|
+
sort_keys=False,
|
|
102
|
+
default_flow_style=False)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: skypilot-nightly
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev20250829
|
|
4
4
|
Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
|
|
5
5
|
Author: SkyPilot Team
|
|
6
6
|
License: Apache 2.0
|
|
@@ -34,7 +34,7 @@ Requires-Dist: python-dotenv
|
|
|
34
34
|
Requires-Dist: rich
|
|
35
35
|
Requires-Dist: tabulate
|
|
36
36
|
Requires-Dist: typing_extensions
|
|
37
|
-
Requires-Dist: filelock>=3.
|
|
37
|
+
Requires-Dist: filelock>=3.15.0
|
|
38
38
|
Requires-Dist: packaging
|
|
39
39
|
Requires-Dist: psutil
|
|
40
40
|
Requires-Dist: pulp
|
|
@@ -59,6 +59,8 @@ Requires-Dist: gitpython
|
|
|
59
59
|
Requires-Dist: types-paramiko
|
|
60
60
|
Requires-Dist: alembic
|
|
61
61
|
Requires-Dist: aiohttp
|
|
62
|
+
Requires-Dist: aiosqlite
|
|
63
|
+
Requires-Dist: anyio
|
|
62
64
|
Provides-Extra: aws
|
|
63
65
|
Requires-Dist: awscli>=1.27.10; extra == "aws"
|
|
64
66
|
Requires-Dist: botocore>=1.29.10; extra == "aws"
|
|
@@ -136,8 +138,10 @@ Requires-Dist: sqlalchemy_adapter; extra == "server"
|
|
|
136
138
|
Requires-Dist: passlib; extra == "server"
|
|
137
139
|
Requires-Dist: pyjwt; extra == "server"
|
|
138
140
|
Requires-Dist: aiohttp; extra == "server"
|
|
141
|
+
Requires-Dist: anyio; extra == "server"
|
|
139
142
|
Requires-Dist: grpcio>=1.63.0; extra == "server"
|
|
140
143
|
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
|
|
144
|
+
Requires-Dist: aiosqlite; extra == "server"
|
|
141
145
|
Provides-Extra: all
|
|
142
146
|
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
143
147
|
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
@@ -194,8 +198,10 @@ Requires-Dist: sqlalchemy_adapter; extra == "all"
|
|
|
194
198
|
Requires-Dist: passlib; extra == "all"
|
|
195
199
|
Requires-Dist: pyjwt; extra == "all"
|
|
196
200
|
Requires-Dist: aiohttp; extra == "all"
|
|
201
|
+
Requires-Dist: anyio; extra == "all"
|
|
197
202
|
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
198
203
|
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
204
|
+
Requires-Dist: aiosqlite; extra == "all"
|
|
199
205
|
Dynamic: author
|
|
200
206
|
Dynamic: classifier
|
|
201
207
|
Dynamic: description
|