skypilot-nightly 1.0.0.dev20250827__py3-none-any.whl → 1.0.0.dev20250829__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/admin_policy.py +11 -10
- sky/authentication.py +1 -1
- sky/backends/backend.py +3 -5
- sky/backends/backend_utils.py +140 -52
- sky/backends/cloud_vm_ray_backend.py +30 -25
- sky/backends/local_docker_backend.py +3 -8
- sky/backends/wheel_utils.py +35 -8
- sky/client/cli/command.py +41 -9
- sky/client/sdk.py +23 -8
- sky/client/sdk_async.py +6 -2
- sky/clouds/aws.py +118 -1
- sky/core.py +1 -4
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +82 -22
- sky/jobs/client/sdk.py +5 -2
- sky/jobs/recovery_strategy.py +9 -4
- sky/jobs/server/server.py +2 -1
- sky/logs/agent.py +2 -2
- sky/logs/aws.py +6 -3
- sky/provision/aws/config.py +78 -3
- sky/provision/aws/instance.py +45 -6
- sky/provision/do/utils.py +2 -1
- sky/provision/kubernetes/instance.py +55 -11
- sky/provision/kubernetes/utils.py +11 -2
- sky/provision/nebius/utils.py +36 -2
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/serve/client/impl.py +5 -4
- sky/serve/replica_managers.py +4 -3
- sky/serve/serve_utils.py +2 -2
- sky/serve/server/impl.py +3 -2
- sky/serve/server/server.py +2 -1
- sky/server/auth/oauth2_proxy.py +10 -4
- sky/server/common.py +4 -4
- sky/server/daemons.py +16 -5
- sky/server/requests/executor.py +5 -3
- sky/server/requests/payloads.py +3 -1
- sky/server/requests/preconditions.py +3 -2
- sky/server/requests/requests.py +121 -19
- sky/server/server.py +85 -60
- sky/server/stream_utils.py +7 -5
- sky/setup_files/dependencies.py +6 -1
- sky/sky_logging.py +28 -0
- sky/skylet/constants.py +6 -0
- sky/skylet/events.py +2 -3
- sky/skypilot_config.py +10 -10
- sky/task.py +1 -1
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +4 -8
- sky/usage/usage_lib.py +3 -2
- sky/utils/annotations.py +8 -2
- sky/utils/cluster_utils.py +3 -3
- sky/utils/common_utils.py +0 -72
- sky/utils/controller_utils.py +4 -3
- sky/utils/dag_utils.py +4 -4
- sky/utils/db/db_utils.py +11 -0
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/kubernetes/config_map_utils.py +3 -3
- sky/utils/kubernetes_enums.py +1 -0
- sky/utils/lock_events.py +94 -0
- sky/utils/schemas.py +3 -0
- sky/utils/timeline.py +24 -93
- sky/utils/yaml_utils.py +77 -10
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/METADATA +8 -2
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/RECORD +86 -84
- /sky/dashboard/out/_next/static/{-eL7Ky3bxVivzeLHNB9U6 → hYJYFIxp_ZFONR4wTIJqZ}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{-eL7Ky3bxVivzeLHNB9U6 → hYJYFIxp_ZFONR4wTIJqZ}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/top_level.txt +0 -0
sky/skypilot_config.py
CHANGED
|
@@ -494,7 +494,7 @@ def reload_config() -> None:
|
|
|
494
494
|
def parse_and_validate_config_file(config_path: str) -> config_utils.Config:
|
|
495
495
|
config = config_utils.Config()
|
|
496
496
|
try:
|
|
497
|
-
config_dict =
|
|
497
|
+
config_dict = yaml_utils.read_yaml(config_path)
|
|
498
498
|
config = config_utils.Config.from_dict(config_dict)
|
|
499
499
|
# pop the db url from the config, and set it to the env var.
|
|
500
500
|
# this is to avoid db url (considered a sensitive value)
|
|
@@ -504,7 +504,7 @@ def parse_and_validate_config_file(config_path: str) -> config_utils.Config:
|
|
|
504
504
|
os.environ[constants.ENV_VAR_DB_CONNECTION_URI] = db_url
|
|
505
505
|
if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
|
|
506
506
|
logger.debug(f'Config loaded from {config_path}:\n'
|
|
507
|
-
f'{
|
|
507
|
+
f'{yaml_utils.dump_yaml_str(dict(config))}')
|
|
508
508
|
except yaml.YAMLError as e:
|
|
509
509
|
logger.error(f'Error in loading config file ({config_path}):', e)
|
|
510
510
|
if config:
|
|
@@ -600,7 +600,7 @@ def _reload_config_as_server() -> None:
|
|
|
600
600
|
sqlalchemy_engine.dispose()
|
|
601
601
|
if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
|
|
602
602
|
logger.debug(f'server config: \n'
|
|
603
|
-
f'{
|
|
603
|
+
f'{yaml_utils.dump_yaml_str(dict(server_config))}')
|
|
604
604
|
_set_loaded_config(server_config)
|
|
605
605
|
_set_loaded_config_path(server_config_path)
|
|
606
606
|
|
|
@@ -628,7 +628,7 @@ def _reload_config_as_client() -> None:
|
|
|
628
628
|
if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
|
|
629
629
|
logger.debug(
|
|
630
630
|
f'client config (before task and CLI overrides): \n'
|
|
631
|
-
f'{
|
|
631
|
+
f'{yaml_utils.dump_yaml_str(dict(overlaid_client_config))}')
|
|
632
632
|
_set_loaded_config(overlaid_client_config)
|
|
633
633
|
_set_loaded_config_path([user_config_path, project_config_path])
|
|
634
634
|
|
|
@@ -738,9 +738,9 @@ def override_skypilot_config(
|
|
|
738
738
|
'Failed to override the SkyPilot config on API '
|
|
739
739
|
'server with your local SkyPilot config:\n'
|
|
740
740
|
'=== SkyPilot config on API server ===\n'
|
|
741
|
-
f'{
|
|
741
|
+
f'{yaml_utils.dump_yaml_str(dict(original_config))}\n'
|
|
742
742
|
'=== Your local SkyPilot config ===\n'
|
|
743
|
-
f'{
|
|
743
|
+
f'{yaml_utils.dump_yaml_str(dict(override_configs))}\n'
|
|
744
744
|
f'Details: {e}') from e
|
|
745
745
|
finally:
|
|
746
746
|
_set_loaded_config(original_config)
|
|
@@ -767,7 +767,7 @@ def replace_skypilot_config(new_configs: config_utils.Config) -> Iterator[None]:
|
|
|
767
767
|
mode='w',
|
|
768
768
|
prefix='mutated-skypilot-config-',
|
|
769
769
|
suffix='.yaml') as temp_file:
|
|
770
|
-
|
|
770
|
+
yaml_utils.dump_yaml(temp_file.name, dict(**new_configs))
|
|
771
771
|
# Modify the env var of current process or context so that the
|
|
772
772
|
# new config will be used by spawned sub-processes.
|
|
773
773
|
# Note that this code modifies os.environ directly because it
|
|
@@ -831,7 +831,7 @@ def apply_cli_config(cli_config: Optional[List[str]]) -> Dict[str, Any]:
|
|
|
831
831
|
parsed_config = _compose_cli_config(cli_config)
|
|
832
832
|
if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
|
|
833
833
|
logger.debug(f'applying following CLI overrides: \n'
|
|
834
|
-
f'{
|
|
834
|
+
f'{yaml_utils.dump_yaml_str(dict(parsed_config))}')
|
|
835
835
|
_set_loaded_config(
|
|
836
836
|
overlay_skypilot_config(original_config=_get_loaded_config(),
|
|
837
837
|
override_configs=parsed_config))
|
|
@@ -875,7 +875,7 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
|
|
|
875
875
|
def _set_config_yaml_to_db(key: str,
|
|
876
876
|
config: config_utils.Config):
|
|
877
877
|
assert sqlalchemy_engine is not None
|
|
878
|
-
config_str =
|
|
878
|
+
config_str = yaml_utils.dump_yaml_str(dict(config))
|
|
879
879
|
with orm.Session(sqlalchemy_engine) as session:
|
|
880
880
|
if (sqlalchemy_engine.dialect.name ==
|
|
881
881
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
@@ -901,7 +901,7 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
|
|
|
901
901
|
|
|
902
902
|
if not db_updated:
|
|
903
903
|
# save to the local file (PVC in Kubernetes, local file otherwise)
|
|
904
|
-
|
|
904
|
+
yaml_utils.dump_yaml(global_config_path, dict(config))
|
|
905
905
|
|
|
906
906
|
if config_map_utils.is_running_in_kubernetes():
|
|
907
907
|
# In Kubernetes, sync the PVC config to ConfigMap for user
|
sky/task.py
CHANGED
|
@@ -564,7 +564,7 @@ class Task:
|
|
|
564
564
|
secrets_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
565
565
|
) -> 'Task':
|
|
566
566
|
user_specified_yaml = config.pop('_user_specified_yaml',
|
|
567
|
-
|
|
567
|
+
yaml_utils.dump_yaml_str(config))
|
|
568
568
|
# More robust handling for 'envs': explicitly convert keys and values to
|
|
569
569
|
# str, since users may pass '123' as keys/values which will get parsed
|
|
570
570
|
# as int causing validate_schema() to fail.
|
sky/templates/aws-ray.yml.j2
CHANGED
sky/templates/nebius-ray.yml.j2
CHANGED
|
@@ -56,15 +56,11 @@ available_node_types:
|
|
|
56
56
|
filesystem_mount_path: {{ fs.filesystem_mount_path }}
|
|
57
57
|
{%- endfor %}
|
|
58
58
|
UserData: |
|
|
59
|
-
runcmd:
|
|
60
|
-
- sudo sed -i 's/^#\?AllowTcpForwarding.*/AllowTcpForwarding yes/' /etc/ssh/sshd_config
|
|
61
|
-
- systemctl restart sshd
|
|
62
|
-
|
|
63
59
|
{# Two available OS images:
|
|
64
|
-
1.
|
|
65
|
-
2.
|
|
66
|
-
To optimize deployment speed, Docker is only installed when using
|
|
67
|
-
{%- if docker_image is not none and image_id
|
|
60
|
+
1. ubuntu24.04-driverless - requires Docker installation
|
|
61
|
+
2. ubuntu24.04-cuda12 - comes with Docker pre-installed
|
|
62
|
+
To optimize deployment speed, Docker is only installed when using ubuntu24.04-driverless #}
|
|
63
|
+
{%- if docker_image is not none and image_id.endswith('-driverless') %}
|
|
68
64
|
apt:
|
|
69
65
|
sources:
|
|
70
66
|
docker.list:
|
sky/usage/usage_lib.py
CHANGED
|
@@ -19,6 +19,7 @@ from sky.usage import constants
|
|
|
19
19
|
from sky.utils import common_utils
|
|
20
20
|
from sky.utils import env_options
|
|
21
21
|
from sky.utils import ux_utils
|
|
22
|
+
from sky.utils import yaml_utils
|
|
22
23
|
|
|
23
24
|
if typing.TYPE_CHECKING:
|
|
24
25
|
import inspect
|
|
@@ -402,7 +403,7 @@ def _clean_yaml(yaml_info: Dict[str, Optional[str]]):
|
|
|
402
403
|
contents = inspect.getsource(contents)
|
|
403
404
|
|
|
404
405
|
if type(contents) in constants.USAGE_MESSAGE_REDACT_TYPES:
|
|
405
|
-
lines =
|
|
406
|
+
lines = yaml_utils.dump_yaml_str({
|
|
406
407
|
redact_type: contents
|
|
407
408
|
}).strip().split('\n')
|
|
408
409
|
message = (f'{len(lines)} lines {redact_type.upper()}'
|
|
@@ -431,7 +432,7 @@ def prepare_json_from_yaml_config(
|
|
|
431
432
|
with open(yaml_config_or_path, 'r', encoding='utf-8') as f:
|
|
432
433
|
lines = f.readlines()
|
|
433
434
|
comment_lines = [line for line in lines if line.startswith('#')]
|
|
434
|
-
yaml_info =
|
|
435
|
+
yaml_info = yaml_utils.read_yaml_all(yaml_config_or_path)
|
|
435
436
|
|
|
436
437
|
for i in range(len(yaml_info)):
|
|
437
438
|
if yaml_info[i] is None:
|
sky/utils/annotations.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing_extensions import ParamSpec
|
|
|
7
7
|
|
|
8
8
|
# Whether the current process is a SkyPilot API server process.
|
|
9
9
|
is_on_api_server = True
|
|
10
|
-
|
|
10
|
+
_FUNCTIONS_NEED_RELOAD_CACHE = []
|
|
11
11
|
|
|
12
12
|
T = TypeVar('T')
|
|
13
13
|
P = ParamSpec('P')
|
|
@@ -50,7 +50,13 @@ def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
|
|
|
50
50
|
else:
|
|
51
51
|
cached_func = functools.lru_cache(*lru_cache_args,
|
|
52
52
|
**lru_cache_kwargs)(func)
|
|
53
|
-
|
|
53
|
+
_FUNCTIONS_NEED_RELOAD_CACHE.append(cached_func)
|
|
54
54
|
return cached_func
|
|
55
55
|
|
|
56
56
|
return decorator
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def clear_request_level_cache():
|
|
60
|
+
"""Clear the request-level cache."""
|
|
61
|
+
for func in _FUNCTIONS_NEED_RELOAD_CACHE:
|
|
62
|
+
func.cache_clear()
|
sky/utils/cluster_utils.py
CHANGED
|
@@ -11,7 +11,7 @@ import uuid
|
|
|
11
11
|
from sky.skylet import constants
|
|
12
12
|
from sky.utils import command_runner
|
|
13
13
|
from sky.utils import common_utils
|
|
14
|
-
from sky.utils import
|
|
14
|
+
from sky.utils import lock_events
|
|
15
15
|
|
|
16
16
|
# The cluster yaml used to create the current cluster where the module is
|
|
17
17
|
# called.
|
|
@@ -107,7 +107,7 @@ class SSHConfigHelper(object):
|
|
|
107
107
|
return auth_config['ssh_private_key']
|
|
108
108
|
|
|
109
109
|
@classmethod
|
|
110
|
-
@
|
|
110
|
+
@lock_events.FileLockEvent(ssh_conf_lock_path)
|
|
111
111
|
def add_cluster(
|
|
112
112
|
cls,
|
|
113
113
|
cluster_name: str,
|
|
@@ -334,7 +334,7 @@ class SSHConfigHelper(object):
|
|
|
334
334
|
cluster_name: Cluster name.
|
|
335
335
|
"""
|
|
336
336
|
|
|
337
|
-
with
|
|
337
|
+
with lock_events.FileLockEvent(
|
|
338
338
|
cls.ssh_conf_per_cluster_lock_path.format(cluster_name)):
|
|
339
339
|
cluster_config_path = os.path.expanduser(
|
|
340
340
|
cls.ssh_cluster_path.format(cluster_name))
|
sky/utils/common_utils.py
CHANGED
|
@@ -6,7 +6,6 @@ import functools
|
|
|
6
6
|
import getpass
|
|
7
7
|
import hashlib
|
|
8
8
|
import inspect
|
|
9
|
-
import io
|
|
10
9
|
import os
|
|
11
10
|
import platform
|
|
12
11
|
import random
|
|
@@ -30,16 +29,13 @@ from sky.usage import constants as usage_constants
|
|
|
30
29
|
from sky.utils import annotations
|
|
31
30
|
from sky.utils import ux_utils
|
|
32
31
|
from sky.utils import validator
|
|
33
|
-
from sky.utils import yaml_utils
|
|
34
32
|
|
|
35
33
|
if typing.TYPE_CHECKING:
|
|
36
34
|
import jinja2
|
|
37
35
|
import psutil
|
|
38
|
-
import yaml
|
|
39
36
|
else:
|
|
40
37
|
jinja2 = adaptors_common.LazyImport('jinja2')
|
|
41
38
|
psutil = adaptors_common.LazyImport('psutil')
|
|
42
|
-
yaml = adaptors_common.LazyImport('yaml')
|
|
43
39
|
|
|
44
40
|
USER_HASH_FILE = os.path.expanduser('~/.sky/user_hash')
|
|
45
41
|
USER_HASH_LENGTH = 8
|
|
@@ -574,74 +570,6 @@ def user_and_hostname_hash() -> str:
|
|
|
574
570
|
return f'{getpass.getuser()}-{hostname_hash}'
|
|
575
571
|
|
|
576
572
|
|
|
577
|
-
def read_yaml(path: Optional[str]) -> Dict[str, Any]:
|
|
578
|
-
if path is None:
|
|
579
|
-
raise ValueError('Attempted to read a None YAML.')
|
|
580
|
-
with open(path, 'r', encoding='utf-8') as f:
|
|
581
|
-
config = yaml_utils.safe_load(f)
|
|
582
|
-
return config
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
def read_yaml_all_str(yaml_str: str) -> List[Dict[str, Any]]:
|
|
586
|
-
stream = io.StringIO(yaml_str)
|
|
587
|
-
config = yaml_utils.safe_load_all(stream)
|
|
588
|
-
configs = list(config)
|
|
589
|
-
if not configs:
|
|
590
|
-
# Empty YAML file.
|
|
591
|
-
return [{}]
|
|
592
|
-
return configs
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
def read_yaml_all(path: str) -> List[Dict[str, Any]]:
|
|
596
|
-
with open(path, 'r', encoding='utf-8') as f:
|
|
597
|
-
return read_yaml_all_str(f.read())
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
def dump_yaml(path: str,
|
|
601
|
-
config: Union[List[Dict[str, Any]], Dict[str, Any]],
|
|
602
|
-
blank: bool = False) -> None:
|
|
603
|
-
"""Dumps a YAML file.
|
|
604
|
-
|
|
605
|
-
Args:
|
|
606
|
-
path: the path to the YAML file.
|
|
607
|
-
config: the configuration to dump.
|
|
608
|
-
"""
|
|
609
|
-
with open(path, 'w', encoding='utf-8') as f:
|
|
610
|
-
contents = dump_yaml_str(config)
|
|
611
|
-
if blank and isinstance(config, dict) and len(config) == 0:
|
|
612
|
-
# when dumping to yaml, an empty dict will go in as {}.
|
|
613
|
-
contents = ''
|
|
614
|
-
f.write(contents)
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
def dump_yaml_str(config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str:
|
|
618
|
-
"""Dumps a YAML string.
|
|
619
|
-
|
|
620
|
-
Args:
|
|
621
|
-
config: the configuration to dump.
|
|
622
|
-
|
|
623
|
-
Returns:
|
|
624
|
-
The YAML string.
|
|
625
|
-
"""
|
|
626
|
-
|
|
627
|
-
# https://github.com/yaml/pyyaml/issues/127
|
|
628
|
-
class LineBreakDumper(yaml.SafeDumper):
|
|
629
|
-
|
|
630
|
-
def write_line_break(self, data=None):
|
|
631
|
-
super().write_line_break(data)
|
|
632
|
-
if len(self.indents) == 1:
|
|
633
|
-
super().write_line_break()
|
|
634
|
-
|
|
635
|
-
if isinstance(config, list):
|
|
636
|
-
dump_func = yaml.dump_all # type: ignore
|
|
637
|
-
else:
|
|
638
|
-
dump_func = yaml.dump # type: ignore
|
|
639
|
-
return dump_func(config,
|
|
640
|
-
Dumper=LineBreakDumper,
|
|
641
|
-
sort_keys=False,
|
|
642
|
-
default_flow_style=False)
|
|
643
|
-
|
|
644
|
-
|
|
645
573
|
def make_decorator(cls, name_or_fn: Union[str, Callable],
|
|
646
574
|
**ctx_kwargs) -> Callable:
|
|
647
575
|
"""Make the cls a decorator.
|
sky/utils/controller_utils.py
CHANGED
|
@@ -38,6 +38,7 @@ from sky.utils import env_options
|
|
|
38
38
|
from sky.utils import registry
|
|
39
39
|
from sky.utils import rich_utils
|
|
40
40
|
from sky.utils import ux_utils
|
|
41
|
+
from sky.utils import yaml_utils
|
|
41
42
|
|
|
42
43
|
if typing.TYPE_CHECKING:
|
|
43
44
|
import psutil
|
|
@@ -497,7 +498,7 @@ def shared_controller_vars_to_fill(
|
|
|
497
498
|
with tempfile.NamedTemporaryFile(
|
|
498
499
|
delete=False,
|
|
499
500
|
suffix=_LOCAL_SKYPILOT_CONFIG_PATH_SUFFIX) as temp_file:
|
|
500
|
-
|
|
501
|
+
yaml_utils.dump_yaml(temp_file.name, dict(**local_user_config))
|
|
501
502
|
local_user_config_path = temp_file.name
|
|
502
503
|
|
|
503
504
|
vars_to_fill: Dict[str, Any] = {
|
|
@@ -786,9 +787,9 @@ def replace_skypilot_config_path_in_file_mounts(
|
|
|
786
787
|
continue
|
|
787
788
|
if local_path.endswith(_LOCAL_SKYPILOT_CONFIG_PATH_SUFFIX):
|
|
788
789
|
with tempfile.NamedTemporaryFile('w', delete=False) as f:
|
|
789
|
-
user_config =
|
|
790
|
+
user_config = yaml_utils.read_yaml(local_path)
|
|
790
791
|
config = _setup_proxy_command_on_controller(cloud, user_config)
|
|
791
|
-
|
|
792
|
+
yaml_utils.dump_yaml(f.name, dict(**config))
|
|
792
793
|
file_mounts[remote_path] = f.name
|
|
793
794
|
replaced = True
|
|
794
795
|
if replaced:
|
sky/utils/dag_utils.py
CHANGED
|
@@ -6,9 +6,9 @@ from sky import dag as dag_lib
|
|
|
6
6
|
from sky import sky_logging
|
|
7
7
|
from sky import task as task_lib
|
|
8
8
|
from sky.utils import cluster_utils
|
|
9
|
-
from sky.utils import common_utils
|
|
10
9
|
from sky.utils import registry
|
|
11
10
|
from sky.utils import ux_utils
|
|
11
|
+
from sky.utils import yaml_utils
|
|
12
12
|
|
|
13
13
|
logger = sky_logging.init_logger(__name__)
|
|
14
14
|
|
|
@@ -117,7 +117,7 @@ def load_chain_dag_from_yaml(
|
|
|
117
117
|
A chain Dag with 1 or more tasks (an empty entrypoint would create a
|
|
118
118
|
trivial task).
|
|
119
119
|
"""
|
|
120
|
-
configs =
|
|
120
|
+
configs = yaml_utils.read_yaml_all(path)
|
|
121
121
|
return _load_chain_dag(configs, env_overrides, secret_overrides)
|
|
122
122
|
|
|
123
123
|
|
|
@@ -143,7 +143,7 @@ def load_chain_dag_from_yaml_str(
|
|
|
143
143
|
A chain Dag with 1 or more tasks (an empty entrypoint would create a
|
|
144
144
|
trivial task).
|
|
145
145
|
"""
|
|
146
|
-
configs =
|
|
146
|
+
configs = yaml_utils.read_yaml_all_str(yaml_str)
|
|
147
147
|
return _load_chain_dag(configs, env_overrides, secrets_overrides)
|
|
148
148
|
|
|
149
149
|
|
|
@@ -164,7 +164,7 @@ def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag,
|
|
|
164
164
|
configs.append(
|
|
165
165
|
task.to_yaml_config(
|
|
166
166
|
use_user_specified_yaml=use_user_specified_yaml))
|
|
167
|
-
return
|
|
167
|
+
return yaml_utils.dump_yaml_str(configs)
|
|
168
168
|
|
|
169
169
|
|
|
170
170
|
def dump_chain_dag_to_yaml(dag: dag_lib.Dag, path: str) -> None:
|
sky/utils/db/db_utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Utils for sky databases."""
|
|
2
|
+
import asyncio
|
|
2
3
|
import contextlib
|
|
3
4
|
import enum
|
|
4
5
|
import sqlite3
|
|
@@ -6,6 +7,7 @@ import threading
|
|
|
6
7
|
import typing
|
|
7
8
|
from typing import Any, Callable, Optional
|
|
8
9
|
|
|
10
|
+
import aiosqlite
|
|
9
11
|
import sqlalchemy
|
|
10
12
|
from sqlalchemy import exc as sqlalchemy_exc
|
|
11
13
|
|
|
@@ -283,3 +285,12 @@ class SQLiteConn(threading.local):
|
|
|
283
285
|
self.conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
|
|
284
286
|
self.cursor = self.conn.cursor()
|
|
285
287
|
create_table(self.cursor, self.conn)
|
|
288
|
+
self._async_conn: Optional[aiosqlite.Connection] = None
|
|
289
|
+
self._async_conn_lock = asyncio.Lock()
|
|
290
|
+
|
|
291
|
+
async def async_conn(self) -> aiosqlite.Connection:
|
|
292
|
+
if self._async_conn is None:
|
|
293
|
+
async with self._async_conn_lock:
|
|
294
|
+
if self._async_conn is None:
|
|
295
|
+
self._async_conn = await aiosqlite.connect(self.db_path)
|
|
296
|
+
return self._async_conn
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -19,7 +19,7 @@ logger = sky_logging.init_logger(__name__)
|
|
|
19
19
|
DB_INIT_LOCK_TIMEOUT_SECONDS = 10
|
|
20
20
|
|
|
21
21
|
GLOBAL_USER_STATE_DB_NAME = 'state_db'
|
|
22
|
-
GLOBAL_USER_STATE_VERSION = '
|
|
22
|
+
GLOBAL_USER_STATE_VERSION = '007'
|
|
23
23
|
GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
|
|
24
24
|
|
|
25
25
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|
|
@@ -4,7 +4,7 @@ import os
|
|
|
4
4
|
from sky import sky_logging
|
|
5
5
|
from sky import skypilot_config
|
|
6
6
|
from sky.adaptors import kubernetes
|
|
7
|
-
from sky.utils import
|
|
7
|
+
from sky.utils import yaml_utils
|
|
8
8
|
|
|
9
9
|
logger = sky_logging.init_logger(__name__)
|
|
10
10
|
|
|
@@ -69,7 +69,7 @@ def initialize_configmap_sync_on_startup(config_file_path: str) -> None:
|
|
|
69
69
|
|
|
70
70
|
current_config = skypilot_config.parse_and_validate_config_file(
|
|
71
71
|
config_file_path)
|
|
72
|
-
config_yaml =
|
|
72
|
+
config_yaml = yaml_utils.dump_yaml_str(dict(current_config))
|
|
73
73
|
|
|
74
74
|
configmap_body = {
|
|
75
75
|
'apiVersion': 'v1',
|
|
@@ -111,7 +111,7 @@ def patch_configmap_with_config(config, config_file_path: str) -> None:
|
|
|
111
111
|
try:
|
|
112
112
|
namespace = _get_kubernetes_namespace()
|
|
113
113
|
configmap_name = _get_configmap_name()
|
|
114
|
-
config_yaml =
|
|
114
|
+
config_yaml = yaml_utils.dump_yaml_str(dict(config))
|
|
115
115
|
patch_body = {'data': {'config.yaml': config_yaml}}
|
|
116
116
|
|
|
117
117
|
try:
|
sky/utils/kubernetes_enums.py
CHANGED
sky/utils/lock_events.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Lock events."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import os
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
|
|
7
|
+
import filelock
|
|
8
|
+
|
|
9
|
+
from sky.utils import locks
|
|
10
|
+
from sky.utils import timeline
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DistributedLockEvent:
|
|
14
|
+
"""Serve both as a distributed lock and event for the lock."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, lock_id: str, timeout: Optional[float] = None):
|
|
17
|
+
self._lock_id = lock_id
|
|
18
|
+
self._lock = locks.get_lock(lock_id, timeout)
|
|
19
|
+
self._hold_lock_event = timeline.Event(
|
|
20
|
+
f'[DistributedLock.hold]:{lock_id}')
|
|
21
|
+
|
|
22
|
+
def acquire(self):
|
|
23
|
+
was_locked = self._lock.is_locked
|
|
24
|
+
with timeline.Event(f'[DistributedLock.acquire]:{self._lock_id}'):
|
|
25
|
+
self._lock.acquire()
|
|
26
|
+
if not was_locked and self._lock.is_locked:
|
|
27
|
+
# start holding the lock after initial acquiring
|
|
28
|
+
self._hold_lock_event.begin()
|
|
29
|
+
|
|
30
|
+
def release(self):
|
|
31
|
+
was_locked = self._lock.is_locked
|
|
32
|
+
self._lock.release()
|
|
33
|
+
if was_locked and not self._lock.is_locked:
|
|
34
|
+
# stop holding the lock after initial releasing
|
|
35
|
+
self._hold_lock_event.end()
|
|
36
|
+
|
|
37
|
+
def __enter__(self):
|
|
38
|
+
self.acquire()
|
|
39
|
+
return self
|
|
40
|
+
|
|
41
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
42
|
+
self.release()
|
|
43
|
+
|
|
44
|
+
def __call__(self, f):
|
|
45
|
+
|
|
46
|
+
@functools.wraps(f)
|
|
47
|
+
def wrapper(*args, **kwargs):
|
|
48
|
+
with self:
|
|
49
|
+
return f(*args, **kwargs)
|
|
50
|
+
|
|
51
|
+
return wrapper
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class FileLockEvent:
|
|
55
|
+
"""Serve both as a file lock and event for the lock."""
|
|
56
|
+
|
|
57
|
+
def __init__(self, lockfile: Union[str, os.PathLike], timeout: float = -1):
|
|
58
|
+
self._lockfile = lockfile
|
|
59
|
+
os.makedirs(os.path.dirname(os.path.abspath(self._lockfile)),
|
|
60
|
+
exist_ok=True)
|
|
61
|
+
self._lock = filelock.FileLock(self._lockfile, timeout)
|
|
62
|
+
self._hold_lock_event = timeline.Event(
|
|
63
|
+
f'[FileLock.hold]:{self._lockfile}')
|
|
64
|
+
|
|
65
|
+
def acquire(self):
|
|
66
|
+
was_locked = self._lock.is_locked
|
|
67
|
+
with timeline.Event(f'[FileLock.acquire]:{self._lockfile}'):
|
|
68
|
+
self._lock.acquire()
|
|
69
|
+
if not was_locked and self._lock.is_locked:
|
|
70
|
+
# start holding the lock after initial acquiring
|
|
71
|
+
self._hold_lock_event.begin()
|
|
72
|
+
|
|
73
|
+
def release(self):
|
|
74
|
+
was_locked = self._lock.is_locked
|
|
75
|
+
self._lock.release()
|
|
76
|
+
if was_locked and not self._lock.is_locked:
|
|
77
|
+
# stop holding the lock after initial releasing
|
|
78
|
+
self._hold_lock_event.end()
|
|
79
|
+
|
|
80
|
+
def __enter__(self):
|
|
81
|
+
self.acquire()
|
|
82
|
+
return self
|
|
83
|
+
|
|
84
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
85
|
+
self.release()
|
|
86
|
+
|
|
87
|
+
def __call__(self, f):
|
|
88
|
+
# Make this class callable as a decorator.
|
|
89
|
+
@functools.wraps(f)
|
|
90
|
+
def wrapper(*args, **kwargs):
|
|
91
|
+
with self:
|
|
92
|
+
return f(*args, **kwargs)
|
|
93
|
+
|
|
94
|
+
return wrapper
|