skypilot-nightly 1.0.0.dev20250413__py3-none-any.whl → 1.0.0.dev20250417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +7 -0
- sky/authentication.py +2 -2
- sky/backends/backend_utils.py +3 -3
- sky/backends/cloud_vm_ray_backend.py +22 -29
- sky/check.py +1 -1
- sky/cli.py +161 -55
- sky/client/cli.py +161 -55
- sky/client/sdk.py +5 -5
- sky/clouds/aws.py +2 -2
- sky/clouds/kubernetes.py +0 -8
- sky/clouds/oci.py +1 -1
- sky/core.py +17 -11
- sky/exceptions.py +5 -0
- sky/jobs/constants.py +8 -1
- sky/jobs/server/core.py +12 -8
- sky/models.py +28 -0
- sky/provision/kubernetes/config.py +1 -1
- sky/provision/kubernetes/instance.py +16 -14
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +50 -22
- sky/resources.py +47 -2
- sky/serve/constants.py +6 -0
- sky/serve/load_balancing_policies.py +0 -4
- sky/serve/serve_state.py +0 -6
- sky/serve/server/core.py +5 -2
- sky/server/common.py +133 -46
- sky/server/constants.py +1 -1
- sky/server/requests/serializers/decoders.py +2 -5
- sky/server/requests/serializers/encoders.py +2 -5
- sky/server/server.py +1 -1
- sky/setup_files/dependencies.py +1 -0
- sky/sky_logging.py +2 -2
- sky/skylet/constants.py +5 -7
- sky/skylet/job_lib.py +3 -3
- sky/skypilot_config.py +194 -73
- sky/templates/kubernetes-ray.yml.j2 +1 -1
- sky/utils/cli_utils/status_utils.py +12 -5
- sky/utils/config_utils.py +39 -14
- sky/utils/controller_utils.py +44 -6
- sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
- sky/utils/kubernetes/gpu_labeler.py +99 -16
- sky/utils/schemas.py +24 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/RECORD +49 -49
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/top_level.txt +0 -0
sky/skypilot_config.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""Immutable user configurations (EXPERIMENTAL).
|
2
2
|
|
3
3
|
On module import, we attempt to parse the config located at _USER_CONFIG_PATH
|
4
|
-
(default: ~/.sky/
|
4
|
+
(default: ~/.sky/config.yaml). Caller can then use
|
5
5
|
|
6
6
|
>> skypilot_config.loaded()
|
7
7
|
|
@@ -35,14 +35,14 @@ Consider the following config contents:
|
|
35
35
|
|
36
36
|
then:
|
37
37
|
|
38
|
-
# Assuming ~/.sky/
|
38
|
+
# Assuming ~/.sky/config.yaml exists and can be loaded:
|
39
39
|
skypilot_config.loaded() # ==> True
|
40
40
|
|
41
41
|
skypilot_config.get_nested(('a', 'nested'), None) # ==> 1
|
42
42
|
skypilot_config.get_nested(('a', 'nonexist'), None) # ==> None
|
43
43
|
skypilot_config.get_nested(('a',), None) # ==> {'nested': 1}
|
44
44
|
|
45
|
-
# If ~/.sky/
|
45
|
+
# If ~/.sky/config.yaml doesn't exist or failed to be loaded:
|
46
46
|
skypilot_config.loaded() # ==> False
|
47
47
|
skypilot_config.get_nested(('a', 'nested'), None) # ==> None
|
48
48
|
skypilot_config.get_nested(('a', 'nonexist'), None) # ==> None
|
@@ -52,8 +52,11 @@ import contextlib
|
|
52
52
|
import copy
|
53
53
|
import os
|
54
54
|
import pprint
|
55
|
+
import threading
|
55
56
|
import typing
|
56
|
-
from typing import Any, Dict, Iterator, Optional, Tuple
|
57
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
58
|
+
|
59
|
+
from omegaconf import OmegaConf
|
57
60
|
|
58
61
|
from sky import exceptions
|
59
62
|
from sky import sky_logging
|
@@ -99,34 +102,115 @@ ENV_VAR_SKYPILOT_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}CONFIG'
|
|
99
102
|
ENV_VAR_USER_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}USER_CONFIG'
|
100
103
|
ENV_VAR_PROJECT_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}PROJECT_CONFIG'
|
101
104
|
|
102
|
-
#
|
103
|
-
|
104
|
-
|
105
|
-
|
105
|
+
# (Used by server) Environment variable for setting the server config file.
|
106
|
+
ENV_VAR_SERVER_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}SERVER_CONFIG'
|
107
|
+
|
108
|
+
# Path to the client config files.
|
109
|
+
_USER_CONFIG_PATH = '~/.sky/config.yaml'
|
110
|
+
_PROJECT_CONFIG_PATH = '.sky.yaml'
|
111
|
+
|
112
|
+
# Path to the server config file.
|
113
|
+
_SERVER_CONFIG_PATH = _USER_CONFIG_PATH
|
106
114
|
|
107
115
|
# The loaded config.
|
108
116
|
_dict = config_utils.Config()
|
109
117
|
_loaded_config_path: Optional[str] = None
|
110
118
|
_config_overridden: bool = False
|
119
|
+
_reload_config_lock = threading.Lock()
|
111
120
|
|
112
121
|
|
113
|
-
# This function exists solely to maintain backward compatibility with the
|
114
|
-
# legacy user config file located at ~/.sky/config.yaml.
|
115
122
|
def get_user_config_path() -> str:
|
116
|
-
"""Returns the path to the user config file.
|
117
|
-
|
118
|
-
If only the legacy user config file exists, return
|
119
|
-
the legacy user config path.
|
120
|
-
Otherwise, return the new user config path.
|
121
|
-
"""
|
122
|
-
user_config_path = os.path.expanduser(_USER_CONFIG_PATH)
|
123
|
-
legacy_user_config_path = os.path.expanduser(_LEGACY_USER_CONFIG_PATH)
|
124
|
-
if (os.path.exists(legacy_user_config_path) and
|
125
|
-
not os.path.exists(user_config_path)):
|
126
|
-
return _LEGACY_USER_CONFIG_PATH
|
123
|
+
"""Returns the path to the user config file."""
|
127
124
|
return _USER_CONFIG_PATH
|
128
125
|
|
129
126
|
|
127
|
+
def get_user_config() -> config_utils.Config:
|
128
|
+
"""Returns the user config."""
|
129
|
+
# find the user config file
|
130
|
+
user_config_path = _get_config_file_path(ENV_VAR_USER_CONFIG)
|
131
|
+
if user_config_path:
|
132
|
+
logger.debug('using user config file specified by '
|
133
|
+
f'{ENV_VAR_USER_CONFIG}: {user_config_path}')
|
134
|
+
user_config_path = os.path.expanduser(user_config_path)
|
135
|
+
if not os.path.exists(user_config_path):
|
136
|
+
with ux_utils.print_exception_no_traceback():
|
137
|
+
raise FileNotFoundError(
|
138
|
+
'Config file specified by env var '
|
139
|
+
f'{ENV_VAR_USER_CONFIG} ({user_config_path!r}) '
|
140
|
+
'does not exist. Please double check the path or unset the '
|
141
|
+
f'env var: unset {ENV_VAR_USER_CONFIG}')
|
142
|
+
else:
|
143
|
+
user_config_path = get_user_config_path()
|
144
|
+
logger.debug(f'using default user config file: {user_config_path}')
|
145
|
+
user_config_path = os.path.expanduser(user_config_path)
|
146
|
+
|
147
|
+
# load the user config file
|
148
|
+
if os.path.exists(user_config_path):
|
149
|
+
user_config = _parse_config_file(user_config_path)
|
150
|
+
_validate_config(user_config, user_config_path)
|
151
|
+
else:
|
152
|
+
user_config = config_utils.Config()
|
153
|
+
return user_config
|
154
|
+
|
155
|
+
|
156
|
+
def _get_project_config() -> config_utils.Config:
|
157
|
+
# find the project config file
|
158
|
+
project_config_path = _get_config_file_path(ENV_VAR_PROJECT_CONFIG)
|
159
|
+
if project_config_path:
|
160
|
+
logger.debug('using project config file specified by '
|
161
|
+
f'{ENV_VAR_PROJECT_CONFIG}: {project_config_path}')
|
162
|
+
project_config_path = os.path.expanduser(project_config_path)
|
163
|
+
if not os.path.exists(project_config_path):
|
164
|
+
with ux_utils.print_exception_no_traceback():
|
165
|
+
raise FileNotFoundError(
|
166
|
+
'Config file specified by env var '
|
167
|
+
f'{ENV_VAR_PROJECT_CONFIG} ({project_config_path!r}) '
|
168
|
+
'does not exist. Please double check the path or unset the '
|
169
|
+
f'env var: unset {ENV_VAR_PROJECT_CONFIG}')
|
170
|
+
else:
|
171
|
+
logger.debug(
|
172
|
+
f'using default project config file: {_PROJECT_CONFIG_PATH}')
|
173
|
+
project_config_path = _PROJECT_CONFIG_PATH
|
174
|
+
project_config_path = os.path.expanduser(project_config_path)
|
175
|
+
|
176
|
+
# load the project config file
|
177
|
+
if os.path.exists(project_config_path):
|
178
|
+
project_config = _parse_config_file(project_config_path)
|
179
|
+
_validate_config(project_config, project_config_path)
|
180
|
+
else:
|
181
|
+
project_config = config_utils.Config()
|
182
|
+
return project_config
|
183
|
+
|
184
|
+
|
185
|
+
def get_server_config() -> config_utils.Config:
|
186
|
+
"""Returns the server config."""
|
187
|
+
# find the server config file
|
188
|
+
server_config_path = _get_config_file_path(ENV_VAR_SERVER_CONFIG)
|
189
|
+
if server_config_path:
|
190
|
+
logger.debug('using server config file specified by '
|
191
|
+
f'{ENV_VAR_SERVER_CONFIG}: {server_config_path}')
|
192
|
+
server_config_path = os.path.expanduser(server_config_path)
|
193
|
+
if not os.path.exists(server_config_path):
|
194
|
+
with ux_utils.print_exception_no_traceback():
|
195
|
+
raise FileNotFoundError(
|
196
|
+
'Config file specified by env var '
|
197
|
+
f'{ENV_VAR_SERVER_CONFIG} ({server_config_path!r}) '
|
198
|
+
'does not exist. Please double check the path or unset the '
|
199
|
+
f'env var: unset {ENV_VAR_SERVER_CONFIG}')
|
200
|
+
else:
|
201
|
+
server_config_path = _SERVER_CONFIG_PATH
|
202
|
+
logger.debug(f'using default server config file: {server_config_path}')
|
203
|
+
server_config_path = os.path.expanduser(server_config_path)
|
204
|
+
|
205
|
+
# load the server config file
|
206
|
+
if os.path.exists(server_config_path):
|
207
|
+
server_config = _parse_config_file(server_config_path)
|
208
|
+
_validate_config(server_config, server_config_path)
|
209
|
+
else:
|
210
|
+
server_config = config_utils.Config()
|
211
|
+
return server_config
|
212
|
+
|
213
|
+
|
130
214
|
def get_nested(keys: Tuple[str, ...],
|
131
215
|
default_value: Any,
|
132
216
|
override_configs: Optional[Dict[str, Any]] = None) -> Any:
|
@@ -177,18 +261,18 @@ def _get_config_file_path(envvar: str) -> Optional[str]:
|
|
177
261
|
return None
|
178
262
|
|
179
263
|
|
180
|
-
def _validate_config(config: Dict[str, Any],
|
264
|
+
def _validate_config(config: Dict[str, Any], config_source: str) -> None:
|
181
265
|
"""Validates the config."""
|
182
266
|
common_utils.validate_schema(
|
183
267
|
config,
|
184
268
|
schemas.get_config_schema(),
|
185
|
-
f'Invalid config YAML ({
|
269
|
+
f'Invalid config YAML from ({config_source}). See: '
|
186
270
|
'https://docs.skypilot.co/en/latest/reference/config.html. ' # pylint: disable=line-too-long
|
187
271
|
'Error: ',
|
188
272
|
skip_none=False)
|
189
273
|
|
190
274
|
|
191
|
-
def
|
275
|
+
def overlay_skypilot_config(
|
192
276
|
original_config: Optional[config_utils.Config],
|
193
277
|
override_configs: Optional[config_utils.Config]) -> config_utils.Config:
|
194
278
|
"""Overlays the override configs on the original configs."""
|
@@ -202,6 +286,12 @@ def _overlay_skypilot_config(
|
|
202
286
|
return config
|
203
287
|
|
204
288
|
|
289
|
+
def safe_reload_config() -> None:
|
290
|
+
"""Reloads the config, safe to be called concurrently."""
|
291
|
+
with _reload_config_lock:
|
292
|
+
_reload_config()
|
293
|
+
|
294
|
+
|
205
295
|
def _reload_config() -> None:
|
206
296
|
internal_config_path = os.environ.get(ENV_VAR_SKYPILOT_CONFIG)
|
207
297
|
if internal_config_path is not None:
|
@@ -213,7 +303,10 @@ def _reload_config() -> None:
|
|
213
303
|
_reload_config_from_internal_file(internal_config_path)
|
214
304
|
return
|
215
305
|
|
216
|
-
|
306
|
+
if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
|
307
|
+
_reload_config_as_server()
|
308
|
+
else:
|
309
|
+
_reload_config_as_client()
|
217
310
|
|
218
311
|
|
219
312
|
def _parse_config_file(config_path: str) -> config_utils.Config:
|
@@ -251,67 +344,44 @@ def _reload_config_from_internal_file(internal_config_path: str) -> None:
|
|
251
344
|
_loaded_config_path = config_path
|
252
345
|
|
253
346
|
|
254
|
-
def
|
347
|
+
def _reload_config_as_server() -> None:
|
255
348
|
global _dict
|
256
349
|
# Reset the global variables, to avoid using stale values.
|
257
350
|
_dict = config_utils.Config()
|
258
351
|
|
259
|
-
|
260
|
-
|
261
|
-
if
|
262
|
-
|
263
|
-
f'{ENV_VAR_USER_CONFIG}: {user_config_path}')
|
264
|
-
user_config_path = os.path.expanduser(user_config_path)
|
265
|
-
if not os.path.exists(user_config_path):
|
266
|
-
with ux_utils.print_exception_no_traceback():
|
267
|
-
raise FileNotFoundError(
|
268
|
-
'Config file specified by env var '
|
269
|
-
f'{ENV_VAR_USER_CONFIG} ({user_config_path!r}) '
|
270
|
-
'does not exist. Please double check the path or unset the '
|
271
|
-
f'env var: unset {ENV_VAR_USER_CONFIG}')
|
272
|
-
else:
|
273
|
-
user_config_path = get_user_config_path()
|
274
|
-
logger.debug(f'using default user config file: {user_config_path}')
|
275
|
-
user_config_path = os.path.expanduser(user_config_path)
|
352
|
+
overrides: List[config_utils.Config] = []
|
353
|
+
server_config = get_server_config()
|
354
|
+
if server_config:
|
355
|
+
overrides.append(server_config)
|
276
356
|
|
277
|
-
|
357
|
+
# layer the configs on top of each other based on priority
|
358
|
+
overlaid_server_config: config_utils.Config = config_utils.Config()
|
359
|
+
for override in overrides:
|
360
|
+
overlaid_server_config = overlay_skypilot_config(
|
361
|
+
original_config=overlaid_server_config, override_configs=override)
|
362
|
+
logger.debug(f'final server config: {overlaid_server_config}')
|
363
|
+
_dict = overlaid_server_config
|
278
364
|
|
279
|
-
# find the project config file
|
280
|
-
project_config_path = _get_config_file_path(ENV_VAR_PROJECT_CONFIG)
|
281
|
-
if project_config_path:
|
282
|
-
logger.debug('using project config file specified by '
|
283
|
-
f'{ENV_VAR_PROJECT_CONFIG}: {project_config_path}')
|
284
|
-
project_config_path = os.path.expanduser(project_config_path)
|
285
|
-
if not os.path.exists(project_config_path):
|
286
|
-
with ux_utils.print_exception_no_traceback():
|
287
|
-
raise FileNotFoundError(
|
288
|
-
'Config file specified by env var '
|
289
|
-
f'{ENV_VAR_PROJECT_CONFIG} ({project_config_path!r}) '
|
290
|
-
'does not exist. Please double check the path or unset the '
|
291
|
-
f'env var: unset {ENV_VAR_PROJECT_CONFIG}')
|
292
|
-
else:
|
293
|
-
logger.debug(
|
294
|
-
f'using default project config file: {_PROJECT_CONFIG_PATH}')
|
295
|
-
project_config_path = _PROJECT_CONFIG_PATH
|
296
|
-
project_config_path = os.path.expanduser(project_config_path)
|
297
365
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
overrides.append(user_config)
|
366
|
+
def _reload_config_as_client() -> None:
|
367
|
+
global _dict
|
368
|
+
# Reset the global variables, to avoid using stale values.
|
369
|
+
_dict = config_utils.Config()
|
303
370
|
|
304
|
-
|
305
|
-
|
306
|
-
|
371
|
+
overrides: List[config_utils.Config] = []
|
372
|
+
user_config = get_user_config()
|
373
|
+
if user_config:
|
374
|
+
overrides.append(user_config)
|
375
|
+
project_config = _get_project_config()
|
376
|
+
if project_config:
|
307
377
|
overrides.append(project_config)
|
308
378
|
|
309
379
|
# layer the configs on top of each other based on priority
|
310
380
|
overlaid_client_config: config_utils.Config = config_utils.Config()
|
311
381
|
for override in overrides:
|
312
|
-
overlaid_client_config =
|
382
|
+
overlaid_client_config = overlay_skypilot_config(
|
313
383
|
original_config=overlaid_client_config, override_configs=override)
|
314
|
-
logger.debug(f'final config: {overlaid_client_config}')
|
384
|
+
logger.debug(f'final client config: {overlaid_client_config}')
|
315
385
|
_dict = overlaid_client_config
|
316
386
|
|
317
387
|
|
@@ -323,7 +393,7 @@ def loaded_config_path() -> Optional[str]:
|
|
323
393
|
return _loaded_config_path
|
324
394
|
|
325
395
|
|
326
|
-
# Load on import.
|
396
|
+
# Load on import, synchronization is guaranteed by python interpreter.
|
327
397
|
_reload_config()
|
328
398
|
|
329
399
|
|
@@ -374,3 +444,54 @@ def override_skypilot_config(
|
|
374
444
|
finally:
|
375
445
|
_dict = original_config
|
376
446
|
_config_overridden = False
|
447
|
+
|
448
|
+
|
449
|
+
def _compose_cli_config(cli_config: Optional[str],) -> config_utils.Config:
|
450
|
+
"""Composes the skypilot CLI config.
|
451
|
+
CLI config can either be:
|
452
|
+
- A path to a config file
|
453
|
+
- A comma-separated list of key-value pairs
|
454
|
+
"""
|
455
|
+
|
456
|
+
if not cli_config:
|
457
|
+
return config_utils.Config()
|
458
|
+
|
459
|
+
config_source = 'CLI'
|
460
|
+
maybe_config_path = os.path.expanduser(cli_config)
|
461
|
+
try:
|
462
|
+
if os.path.isfile(maybe_config_path):
|
463
|
+
config_source = maybe_config_path
|
464
|
+
# cli_config is a path to a config file
|
465
|
+
parsed_config = OmegaConf.to_object(
|
466
|
+
OmegaConf.load(maybe_config_path))
|
467
|
+
else: # cli_config is a comma-separated list of key-value pairs
|
468
|
+
variables: List[str] = []
|
469
|
+
variables = cli_config.split(',')
|
470
|
+
parsed_config = OmegaConf.to_object(
|
471
|
+
OmegaConf.from_dotlist(variables))
|
472
|
+
_validate_config(parsed_config, config_source)
|
473
|
+
except ValueError as e:
|
474
|
+
raise ValueError(f'Invalid config override: {cli_config}. '
|
475
|
+
f'Check if config file exists or if the dotlist '
|
476
|
+
f'is formatted as: key1=value1,key2=value2') from e
|
477
|
+
logger.debug('CLI overrides config syntax check passed.')
|
478
|
+
|
479
|
+
return parsed_config
|
480
|
+
|
481
|
+
|
482
|
+
def apply_cli_config(cli_config: Optional[str]) -> Dict[str, Any]:
|
483
|
+
"""Applies the CLI provided config.
|
484
|
+
SAFETY:
|
485
|
+
This function directly modifies the global _dict variable.
|
486
|
+
This is considered fine in CLI context because the program will exit after
|
487
|
+
a single CLI command is executed.
|
488
|
+
Args:
|
489
|
+
cli_config: A path to a config file or a comma-separated
|
490
|
+
list of key-value pairs.
|
491
|
+
"""
|
492
|
+
global _dict
|
493
|
+
parsed_config = _compose_cli_config(cli_config)
|
494
|
+
logger.debug(f'applying following CLI overrides: {parsed_config}')
|
495
|
+
_dict = overlay_skypilot_config(original_config=_dict,
|
496
|
+
override_configs=parsed_config)
|
497
|
+
return parsed_config
|
@@ -96,7 +96,7 @@ provider:
|
|
96
96
|
name: skypilot-service-account-role
|
97
97
|
apiGroup: rbac.authorization.k8s.io
|
98
98
|
|
99
|
-
# Role for the skypilot-system namespace to create
|
99
|
+
# Role for the skypilot-system namespace to create fusermount-server and
|
100
100
|
# any other system components.
|
101
101
|
autoscaler_skypilot_system_role:
|
102
102
|
kind: Role
|
@@ -6,8 +6,8 @@ import click
|
|
6
6
|
import colorama
|
7
7
|
|
8
8
|
from sky import backends
|
9
|
-
from sky.skylet import constants
|
10
9
|
from sky.utils import common_utils
|
10
|
+
from sky.utils import controller_utils
|
11
11
|
from sky.utils import log_utils
|
12
12
|
from sky.utils import resources_utils
|
13
13
|
from sky.utils import status_lib
|
@@ -198,12 +198,19 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
198
198
|
|
199
199
|
if cluster_records:
|
200
200
|
if controller_name is not None:
|
201
|
-
|
201
|
+
controller = controller_utils.Controllers.from_name(controller_name)
|
202
|
+
if controller is None:
|
203
|
+
raise ValueError(f'Controller {controller_name} not found.')
|
204
|
+
autostop_minutes, _ = (
|
205
|
+
controller_utils.get_controller_autostop_config(
|
206
|
+
controller=controller))
|
207
|
+
if autostop_minutes is not None:
|
208
|
+
autostop_str = (f'{colorama.Style.DIM} (will be autostopped if '
|
209
|
+
f'idle for {autostop_minutes}min)'
|
210
|
+
f'{colorama.Style.RESET_ALL}')
|
202
211
|
click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
203
212
|
f'{controller_name}{colorama.Style.RESET_ALL}'
|
204
|
-
f'{
|
205
|
-
f'{autostop_minutes}min)'
|
206
|
-
f'{colorama.Style.RESET_ALL}')
|
213
|
+
f'{autostop_str}')
|
207
214
|
else:
|
208
215
|
click.echo(f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}Clusters'
|
209
216
|
f'{colorama.Style.RESET_ALL}')
|
sky/utils/config_utils.py
CHANGED
@@ -112,14 +112,39 @@ def _recursive_update(
|
|
112
112
|
disallowed_override_keys: Optional[List[Tuple[str,
|
113
113
|
...]]] = None) -> Config:
|
114
114
|
"""Recursively updates base configuration with override configuration"""
|
115
|
+
|
116
|
+
def _update_k8s_config(
|
117
|
+
base_config: Config,
|
118
|
+
override_config: Dict[str, Any],
|
119
|
+
allowed_override_keys: Optional[List[Tuple[str, ...]]] = None,
|
120
|
+
disallowed_override_keys: Optional[List[Tuple[str,
|
121
|
+
...]]] = None) -> Config:
|
122
|
+
"""Updates the top-level k8s config with the override config."""
|
123
|
+
for key, value in override_config.items():
|
124
|
+
(next_allowed_override_keys, next_disallowed_override_keys
|
125
|
+
) = _check_allowed_and_disallowed_override_keys(
|
126
|
+
key, allowed_override_keys, disallowed_override_keys)
|
127
|
+
if key in ['custom_metadata', 'pod_config'] and key in base_config:
|
128
|
+
merge_k8s_configs(base_config[key], value,
|
129
|
+
next_allowed_override_keys,
|
130
|
+
next_disallowed_override_keys)
|
131
|
+
elif (isinstance(value, dict) and key in base_config and
|
132
|
+
isinstance(base_config[key], dict)):
|
133
|
+
_recursive_update(base_config[key], value,
|
134
|
+
next_allowed_override_keys,
|
135
|
+
next_disallowed_override_keys)
|
136
|
+
else:
|
137
|
+
base_config[key] = value
|
138
|
+
return base_config
|
139
|
+
|
115
140
|
for key, value in override_config.items():
|
116
141
|
(next_allowed_override_keys, next_disallowed_override_keys
|
117
142
|
) = _check_allowed_and_disallowed_override_keys(
|
118
143
|
key, allowed_override_keys, disallowed_override_keys)
|
119
144
|
if key == 'kubernetes' and key in base_config:
|
120
|
-
|
121
|
-
|
122
|
-
|
145
|
+
_update_k8s_config(base_config[key], value,
|
146
|
+
next_allowed_override_keys,
|
147
|
+
next_disallowed_override_keys)
|
123
148
|
elif (isinstance(value, dict) and key in base_config and
|
124
149
|
isinstance(base_config[key], dict)):
|
125
150
|
_recursive_update(base_config[key], value,
|
@@ -185,19 +210,19 @@ def merge_k8s_configs(
|
|
185
210
|
merge_k8s_configs(base_config[key][0], value[0],
|
186
211
|
next_allowed_override_keys,
|
187
212
|
next_disallowed_override_keys)
|
188
|
-
elif key in ['volumes', 'volumeMounts']:
|
189
|
-
# If the key is 'volumes'
|
190
|
-
# item with the same name and merge it.
|
191
|
-
for
|
192
|
-
|
193
|
-
if
|
194
|
-
|
213
|
+
elif key in ['volumes', 'volumeMounts', 'initContainers']:
|
214
|
+
# If the key is 'volumes', 'volumeMounts', or 'initContainers',
|
215
|
+
# we search for item with the same name and merge it.
|
216
|
+
for override_item in value:
|
217
|
+
override_item_name = override_item.get('name')
|
218
|
+
if override_item_name is not None:
|
219
|
+
existing_base_item = next(
|
195
220
|
(v for v in base_config[key]
|
196
|
-
if v.get('name') ==
|
197
|
-
if
|
198
|
-
merge_k8s_configs(
|
221
|
+
if v.get('name') == override_item_name), None)
|
222
|
+
if existing_base_item is not None:
|
223
|
+
merge_k8s_configs(existing_base_item, override_item)
|
199
224
|
else:
|
200
|
-
base_config[key].append(
|
225
|
+
base_config[key].append(override_item)
|
201
226
|
else:
|
202
227
|
base_config[key].extend(value)
|
203
228
|
else:
|
sky/utils/controller_utils.py
CHANGED
@@ -6,7 +6,7 @@ import getpass
|
|
6
6
|
import os
|
7
7
|
import tempfile
|
8
8
|
import typing
|
9
|
-
from typing import Any, Dict, Iterable, List, Optional, Set
|
9
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
10
10
|
import uuid
|
11
11
|
|
12
12
|
import colorama
|
@@ -46,7 +46,7 @@ logger = sky_logging.init_logger(__name__)
|
|
46
46
|
# controller resources spec.
|
47
47
|
CONTROLLER_RESOURCES_NOT_VALID_MESSAGE = (
|
48
48
|
'{controller_type} controller resources is not valid, please check '
|
49
|
-
'~/.sky/
|
49
|
+
'~/.sky/config.yaml file and make sure '
|
50
50
|
'{controller_type}.controller.resources is a valid resources spec. '
|
51
51
|
'Details:\n {err}')
|
52
52
|
|
@@ -72,6 +72,7 @@ class _ControllerSpec:
|
|
72
72
|
default_hint_if_non_existent: str
|
73
73
|
connection_error_hint: str
|
74
74
|
default_resources_config: Dict[str, Any]
|
75
|
+
default_autostop_config: Dict[str, Any]
|
75
76
|
|
76
77
|
@property
|
77
78
|
def decline_down_when_failed_to_fetch_status_hint(self) -> str:
|
@@ -118,7 +119,8 @@ class Controllers(enum.Enum):
|
|
118
119
|
default_hint_if_non_existent='No in-progress managed jobs.',
|
119
120
|
connection_error_hint=(
|
120
121
|
'Failed to connect to jobs controller, please try again later.'),
|
121
|
-
default_resources_config=managed_job_constants.CONTROLLER_RESOURCES
|
122
|
+
default_resources_config=managed_job_constants.CONTROLLER_RESOURCES,
|
123
|
+
default_autostop_config=managed_job_constants.CONTROLLER_AUTOSTOP)
|
122
124
|
SKY_SERVE_CONTROLLER = _ControllerSpec(
|
123
125
|
controller_type='serve',
|
124
126
|
name='serve controller',
|
@@ -148,7 +150,8 @@ class Controllers(enum.Enum):
|
|
148
150
|
default_hint_if_non_existent='No live services.',
|
149
151
|
connection_error_hint=(
|
150
152
|
'Failed to connect to serve controller, please try again later.'),
|
151
|
-
default_resources_config=serve_constants.CONTROLLER_RESOURCES
|
153
|
+
default_resources_config=serve_constants.CONTROLLER_RESOURCES,
|
154
|
+
default_autostop_config=serve_constants.CONTROLLER_AUTOSTOP)
|
152
155
|
|
153
156
|
@classmethod
|
154
157
|
def from_name(cls, name: Optional[str]) -> Optional['Controllers']:
|
@@ -262,8 +265,9 @@ def _get_cloud_dependencies_installation_commands(
|
|
262
265
|
' ARCH="amd64"; '
|
263
266
|
'fi && '
|
264
267
|
'(command -v kubectl &>/dev/null || '
|
265
|
-
'("https://dl.k8s.io/release/v1.31.6
|
266
|
-
'&&
|
268
|
+
'(curl -s -LO "https://dl.k8s.io/release/v1.31.6'
|
269
|
+
'/bin/linux/$ARCH/kubectl" && '
|
270
|
+
'sudo install -o root -g root -m 0755 '
|
267
271
|
'kubectl /usr/local/bin/kubectl))')
|
268
272
|
elif isinstance(cloud, clouds.Cudo):
|
269
273
|
step_prefix = prefix_str.replace('<step>', str(len(commands) + 1))
|
@@ -601,6 +605,40 @@ def get_controller_resources(
|
|
601
605
|
return result
|
602
606
|
|
603
607
|
|
608
|
+
def get_controller_autostop_config(
|
609
|
+
controller: Controllers) -> Tuple[Optional[int], bool]:
|
610
|
+
"""Get the autostop config for the controller.
|
611
|
+
|
612
|
+
Returns:
|
613
|
+
A tuple of (idle_minutes_to_autostop, down), which correspond to the
|
614
|
+
values passed to execution.launch().
|
615
|
+
"""
|
616
|
+
controller_autostop_config_copied: Dict[str, Any] = copy.copy(
|
617
|
+
controller.value.default_autostop_config)
|
618
|
+
if skypilot_config.loaded():
|
619
|
+
custom_controller_autostop_config = skypilot_config.get_nested(
|
620
|
+
(controller.value.controller_type, 'controller', 'autostop'), None)
|
621
|
+
if custom_controller_autostop_config is False:
|
622
|
+
# Disabled with `autostop: false` in config.
|
623
|
+
# To indicate autostop is disabled, we return None for
|
624
|
+
# idle_minutes_to_autostop.
|
625
|
+
return None, False
|
626
|
+
elif custom_controller_autostop_config is True:
|
627
|
+
# Enabled with default values. There is no change in behavior, but
|
628
|
+
# this is included by for completeness, since `False` is valid.
|
629
|
+
pass
|
630
|
+
elif custom_controller_autostop_config is not None:
|
631
|
+
# We have specific config values.
|
632
|
+
# Override the controller autostop config with the ones specified in
|
633
|
+
# the config.
|
634
|
+
assert isinstance(custom_controller_autostop_config, dict)
|
635
|
+
controller_autostop_config_copied.update(
|
636
|
+
custom_controller_autostop_config)
|
637
|
+
|
638
|
+
return (controller_autostop_config_copied['idle_minutes'],
|
639
|
+
controller_autostop_config_copied['down'])
|
640
|
+
|
641
|
+
|
604
642
|
def _setup_proxy_command_on_controller(
|
605
643
|
controller_launched_cloud: 'clouds.Cloud',
|
606
644
|
user_config: Dict[str, Any]) -> config_utils.Config:
|
@@ -328,9 +328,9 @@ cp kubeconfig ~/.kube/config
|
|
328
328
|
# Verify that you can access the cluster
|
329
329
|
kubectl get pods
|
330
330
|
|
331
|
-
Also add this to your ~/.sky/
|
331
|
+
Also add this to your ~/.sky/config.yaml to use the new service account:
|
332
332
|
|
333
|
-
# ~/.sky/
|
333
|
+
# ~/.sky/config.yaml
|
334
334
|
kubernetes:
|
335
335
|
remote_identity: ${SKYPILOT_SA}
|
336
336
|
"
|