skypilot-nightly 1.0.0.dev20250827__py3-none-any.whl → 1.0.0.dev20250829__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (86) hide show
  1. sky/__init__.py +2 -2
  2. sky/admin_policy.py +11 -10
  3. sky/authentication.py +1 -1
  4. sky/backends/backend.py +3 -5
  5. sky/backends/backend_utils.py +140 -52
  6. sky/backends/cloud_vm_ray_backend.py +30 -25
  7. sky/backends/local_docker_backend.py +3 -8
  8. sky/backends/wheel_utils.py +35 -8
  9. sky/client/cli/command.py +41 -9
  10. sky/client/sdk.py +23 -8
  11. sky/client/sdk_async.py +6 -2
  12. sky/clouds/aws.py +118 -1
  13. sky/core.py +1 -4
  14. sky/dashboard/out/404.html +1 -1
  15. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  16. sky/dashboard/out/clusters/[cluster].html +1 -1
  17. sky/dashboard/out/clusters.html +1 -1
  18. sky/dashboard/out/config.html +1 -1
  19. sky/dashboard/out/index.html +1 -1
  20. sky/dashboard/out/infra/[context].html +1 -1
  21. sky/dashboard/out/infra.html +1 -1
  22. sky/dashboard/out/jobs/[job].html +1 -1
  23. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  24. sky/dashboard/out/jobs.html +1 -1
  25. sky/dashboard/out/users.html +1 -1
  26. sky/dashboard/out/volumes.html +1 -1
  27. sky/dashboard/out/workspace/new.html +1 -1
  28. sky/dashboard/out/workspaces/[name].html +1 -1
  29. sky/dashboard/out/workspaces.html +1 -1
  30. sky/global_user_state.py +82 -22
  31. sky/jobs/client/sdk.py +5 -2
  32. sky/jobs/recovery_strategy.py +9 -4
  33. sky/jobs/server/server.py +2 -1
  34. sky/logs/agent.py +2 -2
  35. sky/logs/aws.py +6 -3
  36. sky/provision/aws/config.py +78 -3
  37. sky/provision/aws/instance.py +45 -6
  38. sky/provision/do/utils.py +2 -1
  39. sky/provision/kubernetes/instance.py +55 -11
  40. sky/provision/kubernetes/utils.py +11 -2
  41. sky/provision/nebius/utils.py +36 -2
  42. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  43. sky/serve/client/impl.py +5 -4
  44. sky/serve/replica_managers.py +4 -3
  45. sky/serve/serve_utils.py +2 -2
  46. sky/serve/server/impl.py +3 -2
  47. sky/serve/server/server.py +2 -1
  48. sky/server/auth/oauth2_proxy.py +10 -4
  49. sky/server/common.py +4 -4
  50. sky/server/daemons.py +16 -5
  51. sky/server/requests/executor.py +5 -3
  52. sky/server/requests/payloads.py +3 -1
  53. sky/server/requests/preconditions.py +3 -2
  54. sky/server/requests/requests.py +121 -19
  55. sky/server/server.py +85 -60
  56. sky/server/stream_utils.py +7 -5
  57. sky/setup_files/dependencies.py +6 -1
  58. sky/sky_logging.py +28 -0
  59. sky/skylet/constants.py +6 -0
  60. sky/skylet/events.py +2 -3
  61. sky/skypilot_config.py +10 -10
  62. sky/task.py +1 -1
  63. sky/templates/aws-ray.yml.j2 +1 -0
  64. sky/templates/nebius-ray.yml.j2 +4 -8
  65. sky/usage/usage_lib.py +3 -2
  66. sky/utils/annotations.py +8 -2
  67. sky/utils/cluster_utils.py +3 -3
  68. sky/utils/common_utils.py +0 -72
  69. sky/utils/controller_utils.py +4 -3
  70. sky/utils/dag_utils.py +4 -4
  71. sky/utils/db/db_utils.py +11 -0
  72. sky/utils/db/migration_utils.py +1 -1
  73. sky/utils/kubernetes/config_map_utils.py +3 -3
  74. sky/utils/kubernetes_enums.py +1 -0
  75. sky/utils/lock_events.py +94 -0
  76. sky/utils/schemas.py +3 -0
  77. sky/utils/timeline.py +24 -93
  78. sky/utils/yaml_utils.py +77 -10
  79. {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/METADATA +8 -2
  80. {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/RECORD +86 -84
  81. /sky/dashboard/out/_next/static/{-eL7Ky3bxVivzeLHNB9U6 → hYJYFIxp_ZFONR4wTIJqZ}/_buildManifest.js +0 -0
  82. /sky/dashboard/out/_next/static/{-eL7Ky3bxVivzeLHNB9U6 → hYJYFIxp_ZFONR4wTIJqZ}/_ssgManifest.js +0 -0
  83. {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/WHEEL +0 -0
  84. {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/entry_points.txt +0 -0
  85. {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/licenses/LICENSE +0 -0
  86. {skypilot_nightly-1.0.0.dev20250827.dist-info → skypilot_nightly-1.0.0.dev20250829.dist-info}/top_level.txt +0 -0
sky/skypilot_config.py CHANGED
@@ -494,7 +494,7 @@ def reload_config() -> None:
494
494
  def parse_and_validate_config_file(config_path: str) -> config_utils.Config:
495
495
  config = config_utils.Config()
496
496
  try:
497
- config_dict = common_utils.read_yaml(config_path)
497
+ config_dict = yaml_utils.read_yaml(config_path)
498
498
  config = config_utils.Config.from_dict(config_dict)
499
499
  # pop the db url from the config, and set it to the env var.
500
500
  # this is to avoid db url (considered a sensitive value)
@@ -504,7 +504,7 @@ def parse_and_validate_config_file(config_path: str) -> config_utils.Config:
504
504
  os.environ[constants.ENV_VAR_DB_CONNECTION_URI] = db_url
505
505
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
506
506
  logger.debug(f'Config loaded from {config_path}:\n'
507
- f'{common_utils.dump_yaml_str(dict(config))}')
507
+ f'{yaml_utils.dump_yaml_str(dict(config))}')
508
508
  except yaml.YAMLError as e:
509
509
  logger.error(f'Error in loading config file ({config_path}):', e)
510
510
  if config:
@@ -600,7 +600,7 @@ def _reload_config_as_server() -> None:
600
600
  sqlalchemy_engine.dispose()
601
601
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
602
602
  logger.debug(f'server config: \n'
603
- f'{common_utils.dump_yaml_str(dict(server_config))}')
603
+ f'{yaml_utils.dump_yaml_str(dict(server_config))}')
604
604
  _set_loaded_config(server_config)
605
605
  _set_loaded_config_path(server_config_path)
606
606
 
@@ -628,7 +628,7 @@ def _reload_config_as_client() -> None:
628
628
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
629
629
  logger.debug(
630
630
  f'client config (before task and CLI overrides): \n'
631
- f'{common_utils.dump_yaml_str(dict(overlaid_client_config))}')
631
+ f'{yaml_utils.dump_yaml_str(dict(overlaid_client_config))}')
632
632
  _set_loaded_config(overlaid_client_config)
633
633
  _set_loaded_config_path([user_config_path, project_config_path])
634
634
 
@@ -738,9 +738,9 @@ def override_skypilot_config(
738
738
  'Failed to override the SkyPilot config on API '
739
739
  'server with your local SkyPilot config:\n'
740
740
  '=== SkyPilot config on API server ===\n'
741
- f'{common_utils.dump_yaml_str(dict(original_config))}\n'
741
+ f'{yaml_utils.dump_yaml_str(dict(original_config))}\n'
742
742
  '=== Your local SkyPilot config ===\n'
743
- f'{common_utils.dump_yaml_str(dict(override_configs))}\n'
743
+ f'{yaml_utils.dump_yaml_str(dict(override_configs))}\n'
744
744
  f'Details: {e}') from e
745
745
  finally:
746
746
  _set_loaded_config(original_config)
@@ -767,7 +767,7 @@ def replace_skypilot_config(new_configs: config_utils.Config) -> Iterator[None]:
767
767
  mode='w',
768
768
  prefix='mutated-skypilot-config-',
769
769
  suffix='.yaml') as temp_file:
770
- common_utils.dump_yaml(temp_file.name, dict(**new_configs))
770
+ yaml_utils.dump_yaml(temp_file.name, dict(**new_configs))
771
771
  # Modify the env var of current process or context so that the
772
772
  # new config will be used by spawned sub-processes.
773
773
  # Note that this code modifies os.environ directly because it
@@ -831,7 +831,7 @@ def apply_cli_config(cli_config: Optional[List[str]]) -> Dict[str, Any]:
831
831
  parsed_config = _compose_cli_config(cli_config)
832
832
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
833
833
  logger.debug(f'applying following CLI overrides: \n'
834
- f'{common_utils.dump_yaml_str(dict(parsed_config))}')
834
+ f'{yaml_utils.dump_yaml_str(dict(parsed_config))}')
835
835
  _set_loaded_config(
836
836
  overlay_skypilot_config(original_config=_get_loaded_config(),
837
837
  override_configs=parsed_config))
@@ -875,7 +875,7 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
875
875
  def _set_config_yaml_to_db(key: str,
876
876
  config: config_utils.Config):
877
877
  assert sqlalchemy_engine is not None
878
- config_str = common_utils.dump_yaml_str(dict(config))
878
+ config_str = yaml_utils.dump_yaml_str(dict(config))
879
879
  with orm.Session(sqlalchemy_engine) as session:
880
880
  if (sqlalchemy_engine.dialect.name ==
881
881
  db_utils.SQLAlchemyDialect.SQLITE.value):
@@ -901,7 +901,7 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
901
901
 
902
902
  if not db_updated:
903
903
  # save to the local file (PVC in Kubernetes, local file otherwise)
904
- common_utils.dump_yaml(global_config_path, dict(config))
904
+ yaml_utils.dump_yaml(global_config_path, dict(config))
905
905
 
906
906
  if config_map_utils.is_running_in_kubernetes():
907
907
  # In Kubernetes, sync the PVC config to ConfigMap for user
sky/task.py CHANGED
@@ -564,7 +564,7 @@ class Task:
564
564
  secrets_overrides: Optional[List[Tuple[str, str]]] = None,
565
565
  ) -> 'Task':
566
566
  user_specified_yaml = config.pop('_user_specified_yaml',
567
- common_utils.dump_yaml_str(config))
567
+ yaml_utils.dump_yaml_str(config))
568
568
  # More robust handling for 'envs': explicitly convert keys and values to
569
569
  # str, since users may pass '123' as keys/values which will get parsed
570
570
  # as int causing validate_schema() to fail.
@@ -48,6 +48,7 @@ provider:
48
48
  # The upper-level SkyPilot code has make sure there will not be resource
49
49
  # leakage.
50
50
  disable_launch_config_check: true
51
+ max_efa_interfaces: {{max_efa_interfaces}}
51
52
 
52
53
  auth:
53
54
  ssh_user: {{ssh_user}}
@@ -56,15 +56,11 @@ available_node_types:
56
56
  filesystem_mount_path: {{ fs.filesystem_mount_path }}
57
57
  {%- endfor %}
58
58
  UserData: |
59
- runcmd:
60
- - sudo sed -i 's/^#\?AllowTcpForwarding.*/AllowTcpForwarding yes/' /etc/ssh/sshd_config
61
- - systemctl restart sshd
62
-
63
59
  {# Two available OS images:
64
- 1. ubuntu22.04-driverless - requires Docker installation
65
- 2. ubuntu22.04-cuda12 - comes with Docker pre-installed
66
- To optimize deployment speed, Docker is only installed when using ubuntu22.04-driverless #}
67
- {%- if docker_image is not none and image_id == 'ubuntu22.04-driverless' %}
60
+ 1. ubuntu24.04-driverless - requires Docker installation
61
+ 2. ubuntu24.04-cuda12 - comes with Docker pre-installed
62
+ To optimize deployment speed, Docker is only installed when using ubuntu24.04-driverless #}
63
+ {%- if docker_image is not none and image_id.endswith('-driverless') %}
68
64
  apt:
69
65
  sources:
70
66
  docker.list:
sky/usage/usage_lib.py CHANGED
@@ -19,6 +19,7 @@ from sky.usage import constants
19
19
  from sky.utils import common_utils
20
20
  from sky.utils import env_options
21
21
  from sky.utils import ux_utils
22
+ from sky.utils import yaml_utils
22
23
 
23
24
  if typing.TYPE_CHECKING:
24
25
  import inspect
@@ -402,7 +403,7 @@ def _clean_yaml(yaml_info: Dict[str, Optional[str]]):
402
403
  contents = inspect.getsource(contents)
403
404
 
404
405
  if type(contents) in constants.USAGE_MESSAGE_REDACT_TYPES:
405
- lines = common_utils.dump_yaml_str({
406
+ lines = yaml_utils.dump_yaml_str({
406
407
  redact_type: contents
407
408
  }).strip().split('\n')
408
409
  message = (f'{len(lines)} lines {redact_type.upper()}'
@@ -431,7 +432,7 @@ def prepare_json_from_yaml_config(
431
432
  with open(yaml_config_or_path, 'r', encoding='utf-8') as f:
432
433
  lines = f.readlines()
433
434
  comment_lines = [line for line in lines if line.startswith('#')]
434
- yaml_info = common_utils.read_yaml_all(yaml_config_or_path)
435
+ yaml_info = yaml_utils.read_yaml_all(yaml_config_or_path)
435
436
 
436
437
  for i in range(len(yaml_info)):
437
438
  if yaml_info[i] is None:
sky/utils/annotations.py CHANGED
@@ -7,7 +7,7 @@ from typing_extensions import ParamSpec
7
7
 
8
8
  # Whether the current process is a SkyPilot API server process.
9
9
  is_on_api_server = True
10
- FUNCTIONS_NEED_RELOAD_CACHE = []
10
+ _FUNCTIONS_NEED_RELOAD_CACHE = []
11
11
 
12
12
  T = TypeVar('T')
13
13
  P = ParamSpec('P')
@@ -50,7 +50,13 @@ def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
50
50
  else:
51
51
  cached_func = functools.lru_cache(*lru_cache_args,
52
52
  **lru_cache_kwargs)(func)
53
- FUNCTIONS_NEED_RELOAD_CACHE.append(cached_func)
53
+ _FUNCTIONS_NEED_RELOAD_CACHE.append(cached_func)
54
54
  return cached_func
55
55
 
56
56
  return decorator
57
+
58
+
59
+ def clear_request_level_cache():
60
+ """Clear the request-level cache."""
61
+ for func in _FUNCTIONS_NEED_RELOAD_CACHE:
62
+ func.cache_clear()
@@ -11,7 +11,7 @@ import uuid
11
11
  from sky.skylet import constants
12
12
  from sky.utils import command_runner
13
13
  from sky.utils import common_utils
14
- from sky.utils import timeline
14
+ from sky.utils import lock_events
15
15
 
16
16
  # The cluster yaml used to create the current cluster where the module is
17
17
  # called.
@@ -107,7 +107,7 @@ class SSHConfigHelper(object):
107
107
  return auth_config['ssh_private_key']
108
108
 
109
109
  @classmethod
110
- @timeline.FileLockEvent(ssh_conf_lock_path)
110
+ @lock_events.FileLockEvent(ssh_conf_lock_path)
111
111
  def add_cluster(
112
112
  cls,
113
113
  cluster_name: str,
@@ -334,7 +334,7 @@ class SSHConfigHelper(object):
334
334
  cluster_name: Cluster name.
335
335
  """
336
336
 
337
- with timeline.FileLockEvent(
337
+ with lock_events.FileLockEvent(
338
338
  cls.ssh_conf_per_cluster_lock_path.format(cluster_name)):
339
339
  cluster_config_path = os.path.expanduser(
340
340
  cls.ssh_cluster_path.format(cluster_name))
sky/utils/common_utils.py CHANGED
@@ -6,7 +6,6 @@ import functools
6
6
  import getpass
7
7
  import hashlib
8
8
  import inspect
9
- import io
10
9
  import os
11
10
  import platform
12
11
  import random
@@ -30,16 +29,13 @@ from sky.usage import constants as usage_constants
30
29
  from sky.utils import annotations
31
30
  from sky.utils import ux_utils
32
31
  from sky.utils import validator
33
- from sky.utils import yaml_utils
34
32
 
35
33
  if typing.TYPE_CHECKING:
36
34
  import jinja2
37
35
  import psutil
38
- import yaml
39
36
  else:
40
37
  jinja2 = adaptors_common.LazyImport('jinja2')
41
38
  psutil = adaptors_common.LazyImport('psutil')
42
- yaml = adaptors_common.LazyImport('yaml')
43
39
 
44
40
  USER_HASH_FILE = os.path.expanduser('~/.sky/user_hash')
45
41
  USER_HASH_LENGTH = 8
@@ -574,74 +570,6 @@ def user_and_hostname_hash() -> str:
574
570
  return f'{getpass.getuser()}-{hostname_hash}'
575
571
 
576
572
 
577
- def read_yaml(path: Optional[str]) -> Dict[str, Any]:
578
- if path is None:
579
- raise ValueError('Attempted to read a None YAML.')
580
- with open(path, 'r', encoding='utf-8') as f:
581
- config = yaml_utils.safe_load(f)
582
- return config
583
-
584
-
585
- def read_yaml_all_str(yaml_str: str) -> List[Dict[str, Any]]:
586
- stream = io.StringIO(yaml_str)
587
- config = yaml_utils.safe_load_all(stream)
588
- configs = list(config)
589
- if not configs:
590
- # Empty YAML file.
591
- return [{}]
592
- return configs
593
-
594
-
595
- def read_yaml_all(path: str) -> List[Dict[str, Any]]:
596
- with open(path, 'r', encoding='utf-8') as f:
597
- return read_yaml_all_str(f.read())
598
-
599
-
600
- def dump_yaml(path: str,
601
- config: Union[List[Dict[str, Any]], Dict[str, Any]],
602
- blank: bool = False) -> None:
603
- """Dumps a YAML file.
604
-
605
- Args:
606
- path: the path to the YAML file.
607
- config: the configuration to dump.
608
- """
609
- with open(path, 'w', encoding='utf-8') as f:
610
- contents = dump_yaml_str(config)
611
- if blank and isinstance(config, dict) and len(config) == 0:
612
- # when dumping to yaml, an empty dict will go in as {}.
613
- contents = ''
614
- f.write(contents)
615
-
616
-
617
- def dump_yaml_str(config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str:
618
- """Dumps a YAML string.
619
-
620
- Args:
621
- config: the configuration to dump.
622
-
623
- Returns:
624
- The YAML string.
625
- """
626
-
627
- # https://github.com/yaml/pyyaml/issues/127
628
- class LineBreakDumper(yaml.SafeDumper):
629
-
630
- def write_line_break(self, data=None):
631
- super().write_line_break(data)
632
- if len(self.indents) == 1:
633
- super().write_line_break()
634
-
635
- if isinstance(config, list):
636
- dump_func = yaml.dump_all # type: ignore
637
- else:
638
- dump_func = yaml.dump # type: ignore
639
- return dump_func(config,
640
- Dumper=LineBreakDumper,
641
- sort_keys=False,
642
- default_flow_style=False)
643
-
644
-
645
573
  def make_decorator(cls, name_or_fn: Union[str, Callable],
646
574
  **ctx_kwargs) -> Callable:
647
575
  """Make the cls a decorator.
@@ -38,6 +38,7 @@ from sky.utils import env_options
38
38
  from sky.utils import registry
39
39
  from sky.utils import rich_utils
40
40
  from sky.utils import ux_utils
41
+ from sky.utils import yaml_utils
41
42
 
42
43
  if typing.TYPE_CHECKING:
43
44
  import psutil
@@ -497,7 +498,7 @@ def shared_controller_vars_to_fill(
497
498
  with tempfile.NamedTemporaryFile(
498
499
  delete=False,
499
500
  suffix=_LOCAL_SKYPILOT_CONFIG_PATH_SUFFIX) as temp_file:
500
- common_utils.dump_yaml(temp_file.name, dict(**local_user_config))
501
+ yaml_utils.dump_yaml(temp_file.name, dict(**local_user_config))
501
502
  local_user_config_path = temp_file.name
502
503
 
503
504
  vars_to_fill: Dict[str, Any] = {
@@ -786,9 +787,9 @@ def replace_skypilot_config_path_in_file_mounts(
786
787
  continue
787
788
  if local_path.endswith(_LOCAL_SKYPILOT_CONFIG_PATH_SUFFIX):
788
789
  with tempfile.NamedTemporaryFile('w', delete=False) as f:
789
- user_config = common_utils.read_yaml(local_path)
790
+ user_config = yaml_utils.read_yaml(local_path)
790
791
  config = _setup_proxy_command_on_controller(cloud, user_config)
791
- common_utils.dump_yaml(f.name, dict(**config))
792
+ yaml_utils.dump_yaml(f.name, dict(**config))
792
793
  file_mounts[remote_path] = f.name
793
794
  replaced = True
794
795
  if replaced:
sky/utils/dag_utils.py CHANGED
@@ -6,9 +6,9 @@ from sky import dag as dag_lib
6
6
  from sky import sky_logging
7
7
  from sky import task as task_lib
8
8
  from sky.utils import cluster_utils
9
- from sky.utils import common_utils
10
9
  from sky.utils import registry
11
10
  from sky.utils import ux_utils
11
+ from sky.utils import yaml_utils
12
12
 
13
13
  logger = sky_logging.init_logger(__name__)
14
14
 
@@ -117,7 +117,7 @@ def load_chain_dag_from_yaml(
117
117
  A chain Dag with 1 or more tasks (an empty entrypoint would create a
118
118
  trivial task).
119
119
  """
120
- configs = common_utils.read_yaml_all(path)
120
+ configs = yaml_utils.read_yaml_all(path)
121
121
  return _load_chain_dag(configs, env_overrides, secret_overrides)
122
122
 
123
123
 
@@ -143,7 +143,7 @@ def load_chain_dag_from_yaml_str(
143
143
  A chain Dag with 1 or more tasks (an empty entrypoint would create a
144
144
  trivial task).
145
145
  """
146
- configs = common_utils.read_yaml_all_str(yaml_str)
146
+ configs = yaml_utils.read_yaml_all_str(yaml_str)
147
147
  return _load_chain_dag(configs, env_overrides, secrets_overrides)
148
148
 
149
149
 
@@ -164,7 +164,7 @@ def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag,
164
164
  configs.append(
165
165
  task.to_yaml_config(
166
166
  use_user_specified_yaml=use_user_specified_yaml))
167
- return common_utils.dump_yaml_str(configs)
167
+ return yaml_utils.dump_yaml_str(configs)
168
168
 
169
169
 
170
170
  def dump_chain_dag_to_yaml(dag: dag_lib.Dag, path: str) -> None:
sky/utils/db/db_utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Utils for sky databases."""
2
+ import asyncio
2
3
  import contextlib
3
4
  import enum
4
5
  import sqlite3
@@ -6,6 +7,7 @@ import threading
6
7
  import typing
7
8
  from typing import Any, Callable, Optional
8
9
 
10
+ import aiosqlite
9
11
  import sqlalchemy
10
12
  from sqlalchemy import exc as sqlalchemy_exc
11
13
 
@@ -283,3 +285,12 @@ class SQLiteConn(threading.local):
283
285
  self.conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
284
286
  self.cursor = self.conn.cursor()
285
287
  create_table(self.cursor, self.conn)
288
+ self._async_conn: Optional[aiosqlite.Connection] = None
289
+ self._async_conn_lock = asyncio.Lock()
290
+
291
+ async def async_conn(self) -> aiosqlite.Connection:
292
+ if self._async_conn is None:
293
+ async with self._async_conn_lock:
294
+ if self._async_conn is None:
295
+ self._async_conn = await aiosqlite.connect(self.db_path)
296
+ return self._async_conn
@@ -19,7 +19,7 @@ logger = sky_logging.init_logger(__name__)
19
19
  DB_INIT_LOCK_TIMEOUT_SECONDS = 10
20
20
 
21
21
  GLOBAL_USER_STATE_DB_NAME = 'state_db'
22
- GLOBAL_USER_STATE_VERSION = '006'
22
+ GLOBAL_USER_STATE_VERSION = '007'
23
23
  GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
24
24
 
25
25
  SPOT_JOBS_DB_NAME = 'spot_jobs_db'
@@ -4,7 +4,7 @@ import os
4
4
  from sky import sky_logging
5
5
  from sky import skypilot_config
6
6
  from sky.adaptors import kubernetes
7
- from sky.utils import common_utils
7
+ from sky.utils import yaml_utils
8
8
 
9
9
  logger = sky_logging.init_logger(__name__)
10
10
 
@@ -69,7 +69,7 @@ def initialize_configmap_sync_on_startup(config_file_path: str) -> None:
69
69
 
70
70
  current_config = skypilot_config.parse_and_validate_config_file(
71
71
  config_file_path)
72
- config_yaml = common_utils.dump_yaml_str(dict(current_config))
72
+ config_yaml = yaml_utils.dump_yaml_str(dict(current_config))
73
73
 
74
74
  configmap_body = {
75
75
  'apiVersion': 'v1',
@@ -111,7 +111,7 @@ def patch_configmap_with_config(config, config_file_path: str) -> None:
111
111
  try:
112
112
  namespace = _get_kubernetes_namespace()
113
113
  configmap_name = _get_configmap_name()
114
- config_yaml = common_utils.dump_yaml_str(dict(config))
114
+ config_yaml = yaml_utils.dump_yaml_str(dict(config))
115
115
  patch_body = {'data': {'config.yaml': config_yaml}}
116
116
 
117
117
  try:
@@ -42,4 +42,5 @@ class KubernetesAutoscalerType(enum.Enum):
42
42
  """Enum for the different types of cluster autoscalers for Kubernetes."""
43
43
  GKE = 'gke'
44
44
  KARPENTER = 'karpenter'
45
+ COREWEAVE = 'coreweave'
45
46
  GENERIC = 'generic'
@@ -0,0 +1,94 @@
1
+ """Lock events."""
2
+
3
+ import functools
4
+ import os
5
+ from typing import Optional, Union
6
+
7
+ import filelock
8
+
9
+ from sky.utils import locks
10
+ from sky.utils import timeline
11
+
12
+
13
+ class DistributedLockEvent:
14
+ """Serve both as a distributed lock and event for the lock."""
15
+
16
+ def __init__(self, lock_id: str, timeout: Optional[float] = None):
17
+ self._lock_id = lock_id
18
+ self._lock = locks.get_lock(lock_id, timeout)
19
+ self._hold_lock_event = timeline.Event(
20
+ f'[DistributedLock.hold]:{lock_id}')
21
+
22
+ def acquire(self):
23
+ was_locked = self._lock.is_locked
24
+ with timeline.Event(f'[DistributedLock.acquire]:{self._lock_id}'):
25
+ self._lock.acquire()
26
+ if not was_locked and self._lock.is_locked:
27
+ # start holding the lock after initial acquiring
28
+ self._hold_lock_event.begin()
29
+
30
+ def release(self):
31
+ was_locked = self._lock.is_locked
32
+ self._lock.release()
33
+ if was_locked and not self._lock.is_locked:
34
+ # stop holding the lock after initial releasing
35
+ self._hold_lock_event.end()
36
+
37
+ def __enter__(self):
38
+ self.acquire()
39
+ return self
40
+
41
+ def __exit__(self, exc_type, exc_val, exc_tb):
42
+ self.release()
43
+
44
+ def __call__(self, f):
45
+
46
+ @functools.wraps(f)
47
+ def wrapper(*args, **kwargs):
48
+ with self:
49
+ return f(*args, **kwargs)
50
+
51
+ return wrapper
52
+
53
+
54
+ class FileLockEvent:
55
+ """Serve both as a file lock and event for the lock."""
56
+
57
+ def __init__(self, lockfile: Union[str, os.PathLike], timeout: float = -1):
58
+ self._lockfile = lockfile
59
+ os.makedirs(os.path.dirname(os.path.abspath(self._lockfile)),
60
+ exist_ok=True)
61
+ self._lock = filelock.FileLock(self._lockfile, timeout)
62
+ self._hold_lock_event = timeline.Event(
63
+ f'[FileLock.hold]:{self._lockfile}')
64
+
65
+ def acquire(self):
66
+ was_locked = self._lock.is_locked
67
+ with timeline.Event(f'[FileLock.acquire]:{self._lockfile}'):
68
+ self._lock.acquire()
69
+ if not was_locked and self._lock.is_locked:
70
+ # start holding the lock after initial acquiring
71
+ self._hold_lock_event.begin()
72
+
73
+ def release(self):
74
+ was_locked = self._lock.is_locked
75
+ self._lock.release()
76
+ if was_locked and not self._lock.is_locked:
77
+ # stop holding the lock after initial releasing
78
+ self._hold_lock_event.end()
79
+
80
+ def __enter__(self):
81
+ self.acquire()
82
+ return self
83
+
84
+ def __exit__(self, exc_type, exc_val, exc_tb):
85
+ self.release()
86
+
87
+ def __call__(self, f):
88
+ # Make this class callable as a decorator.
89
+ @functools.wraps(f)
90
+ def wrapper(*args, **kwargs):
91
+ with self:
92
+ return f(*args, **kwargs)
93
+
94
+ return wrapper
sky/utils/schemas.py CHANGED
@@ -1555,6 +1555,9 @@ def get_config_schema():
1555
1555
  'cluster_event_retention_hours': {
1556
1556
  'type': 'number',
1557
1557
  },
1558
+ 'cluster_debug_event_retention_hours': {
1559
+ 'type': 'number',
1560
+ },
1558
1561
  }
1559
1562
  }
1560
1563