skypilot-nightly 1.0.0.dev20250410__py3-none-any.whl → 1.0.0.dev20250412__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/oci.py +2 -2
  3. sky/authentication.py +2 -2
  4. sky/backends/backend_utils.py +1 -1
  5. sky/backends/cloud_vm_ray_backend.py +3 -3
  6. sky/check.py +1 -1
  7. sky/cli.py +51 -47
  8. sky/client/cli.py +51 -47
  9. sky/client/common.py +4 -2
  10. sky/client/sdk.py +60 -27
  11. sky/clouds/aws.py +2 -2
  12. sky/clouds/cloud.py +3 -2
  13. sky/clouds/kubernetes.py +20 -3
  14. sky/clouds/nebius.py +2 -4
  15. sky/clouds/oci.py +2 -2
  16. sky/clouds/utils/oci_utils.py +1 -1
  17. sky/core.py +12 -17
  18. sky/data/mounting_utils.py +34 -10
  19. sky/exceptions.py +1 -1
  20. sky/execution.py +5 -4
  21. sky/jobs/client/sdk.py +5 -0
  22. sky/optimizer.py +1 -2
  23. sky/provision/instance_setup.py +3 -1
  24. sky/provision/kubernetes/config.py +41 -36
  25. sky/provision/kubernetes/instance.py +4 -7
  26. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +54 -0
  27. sky/provision/kubernetes/network_utils.py +1 -1
  28. sky/provision/kubernetes/utils.py +51 -35
  29. sky/serve/client/sdk.py +6 -0
  30. sky/server/common.py +16 -1
  31. sky/server/constants.py +5 -0
  32. sky/server/requests/payloads.py +2 -0
  33. sky/setup_files/dependencies.py +1 -1
  34. sky/skylet/constants.py +2 -2
  35. sky/skypilot_config.py +197 -70
  36. sky/templates/kubernetes-ray.yml.j2 +66 -25
  37. sky/templates/websocket_proxy.py +41 -2
  38. sky/utils/config_utils.py +1 -1
  39. sky/utils/controller_utils.py +1 -1
  40. sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
  41. sky/utils/kubernetes/rsync_helper.sh +26 -11
  42. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/METADATA +3 -1
  43. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/RECORD +47 -48
  44. sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml +0 -10
  45. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +0 -68
  46. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/WHEEL +0 -0
  47. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/entry_points.txt +0 -0
  48. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/licenses/LICENSE +0 -0
  49. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/top_level.txt +0 -0
@@ -1454,14 +1454,14 @@ def is_kubeconfig_exec_auth(
1454
1454
 
1455
1455
 
1456
1456
  Using exec-based authentication is problematic when used in conjunction
1457
- with kubernetes.remote_identity = LOCAL_CREDENTIAL in ~/.sky/config.yaml.
1457
+ with kubernetes.remote_identity = LOCAL_CREDENTIAL in ~/.sky/skyconfig.yaml.
1458
1458
  This is because the exec-based authentication may not have the relevant
1459
1459
  dependencies installed on the remote cluster or may have hardcoded paths
1460
1460
  that are not available on the remote cluster.
1461
1461
 
1462
1462
  Returns:
1463
1463
  bool: True if exec-based authentication is used and LOCAL_CREDENTIAL
1464
- mode is used for remote_identity in ~/.sky/config.yaml.
1464
+ mode is used for remote_identity in ~/.sky/skyconfig.yaml.
1465
1465
  str: Error message if exec-based authentication is used, None otherwise
1466
1466
  """
1467
1467
  k8s = kubernetes.kubernetes
@@ -1514,7 +1514,7 @@ def is_kubeconfig_exec_auth(
1514
1514
  'Managed Jobs or SkyServe controller on Kubernetes. '
1515
1515
  'To fix, configure SkyPilot to create a service account '
1516
1516
  'for running pods by setting the following in '
1517
- '~/.sky/config.yaml:\n'
1517
+ '~/.sky/skyconfig.yaml:\n'
1518
1518
  ' kubernetes:\n'
1519
1519
  ' remote_identity: SERVICE_ACCOUNT\n'
1520
1520
  ' More: https://docs.skypilot.co/en/latest/'
@@ -2148,32 +2148,35 @@ def fill_ssh_jump_template(ssh_key_secret: str, ssh_jump_image: str,
2148
2148
  return content
2149
2149
 
2150
2150
 
2151
- def check_port_forward_mode_dependencies() -> None:
2152
- """Checks if 'socat' and 'nc' are installed"""
2151
+ def check_port_forward_mode_dependencies(
2152
+ raise_error: bool = True) -> Optional[List[str]]:
2153
+ """Checks if 'socat' and 'nc' are installed
2153
2154
 
2154
- # Construct runtime errors
2155
- socat_default_error = RuntimeError(
2156
- f'`socat` is required to setup Kubernetes cloud with '
2155
+ Args:
2156
+ raise_error: set to true when the dependencies need to be present.
2157
+ set to false for `sky check`, where reason strings are compiled
2158
+ at the end.
2159
+
2160
+ Returns: the reasons list if there are missing dependencies.
2161
+ """
2162
+
2163
+ # errors
2164
+ socat_message = (
2165
+ '`socat` is required to setup Kubernetes cloud with '
2157
2166
  f'`{kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD.value}` ' # pylint: disable=line-too-long
2158
- 'default networking mode and it is not installed. '
2159
- 'On Debian/Ubuntu, install it with:\n'
2160
- f' $ sudo apt install socat\n'
2161
- f'On MacOS, install it with: \n'
2162
- f' $ brew install socat')
2163
- netcat_default_error = RuntimeError(
2164
- f'`nc` is required to setup Kubernetes cloud with '
2167
+ 'default networking mode and it is not installed. ')
2168
+ netcat_default_message = (
2169
+ '`nc` is required to setup Kubernetes cloud with '
2165
2170
  f'`{kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD.value}` ' # pylint: disable=line-too-long
2166
- 'default networking mode and it is not installed. '
2167
- 'On Debian/Ubuntu, install it with:\n'
2168
- f' $ sudo apt install netcat\n'
2169
- f'On MacOS, install it with: \n'
2170
- f' $ brew install netcat')
2171
- mac_installed_error = RuntimeError(
2172
- f'The default MacOS `nc` is installed. However, for '
2171
+ 'default networking mode and it is not installed. ')
2172
+ netcat_macos_message = (
2173
+ 'The default MacOS `nc` is installed. However, for '
2173
2174
  f'`{kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD.value}` ' # pylint: disable=line-too-long
2174
- 'default networking mode, GNU netcat is required. '
2175
- f'On MacOS, install it with: \n'
2176
- f' $ brew install netcat')
2175
+ 'default networking mode, GNU netcat is required. ')
2176
+
2177
+ # save
2178
+ reasons = []
2179
+ required_binaries = []
2177
2180
 
2178
2181
  # Ensure socat is installed
2179
2182
  try:
@@ -2182,8 +2185,8 @@ def check_port_forward_mode_dependencies() -> None:
2182
2185
  stderr=subprocess.DEVNULL,
2183
2186
  check=True)
2184
2187
  except (FileNotFoundError, subprocess.CalledProcessError):
2185
- with ux_utils.print_exception_no_traceback():
2186
- raise socat_default_error from None
2188
+ required_binaries.append('socat')
2189
+ reasons.append(socat_message)
2187
2190
 
2188
2191
  # Ensure netcat is installed
2189
2192
  #
@@ -2198,15 +2201,28 @@ def check_port_forward_mode_dependencies() -> None:
2198
2201
  netcat_output.stderr)
2199
2202
 
2200
2203
  if nc_mac_installed:
2201
- with ux_utils.print_exception_no_traceback():
2202
- raise mac_installed_error from None
2204
+ required_binaries.append('netcat')
2205
+ reasons.append(netcat_macos_message)
2203
2206
  elif netcat_output.returncode != 0:
2204
- with ux_utils.print_exception_no_traceback():
2205
- raise netcat_default_error from None
2207
+ required_binaries.append('netcat')
2208
+ reasons.append(netcat_default_message)
2206
2209
 
2207
2210
  except FileNotFoundError:
2208
- with ux_utils.print_exception_no_traceback():
2209
- raise netcat_default_error from None
2211
+ required_binaries.append('netcat')
2212
+ reasons.append(netcat_default_message)
2213
+
2214
+ if required_binaries:
2215
+ reasons.extend([
2216
+ 'On Debian/Ubuntu, install the missing dependenc(ies) with:',
2217
+ f' $ sudo apt install {" ".join(required_binaries)}',
2218
+ 'On MacOS, install with: ',
2219
+ f' $ brew install {" ".join(required_binaries)}',
2220
+ ])
2221
+ if raise_error:
2222
+ with ux_utils.print_exception_no_traceback():
2223
+ raise RuntimeError('\n'.join(reasons))
2224
+ return reasons
2225
+ return None
2210
2226
 
2211
2227
 
2212
2228
  def get_endpoint_debug_message() -> str:
@@ -2236,7 +2252,7 @@ def combine_pod_config_fields(
2236
2252
  cluster_config_overrides: Dict[str, Any],
2237
2253
  ) -> None:
2238
2254
  """Adds or updates fields in the YAML with fields from the
2239
- ~/.sky/config.yaml's kubernetes.pod_spec dict.
2255
+ ~/.sky/skyconfig.yaml's kubernetes.pod_spec dict.
2240
2256
  This can be used to add fields to the YAML that are not supported by
2241
2257
  SkyPilot yet, or require simple configuration (e.g., adding an
2242
2258
  imagePullSecrets field).
@@ -2296,7 +2312,7 @@ def combine_pod_config_fields(
2296
2312
 
2297
2313
  def combine_metadata_fields(cluster_yaml_path: str) -> None:
2298
2314
  """Updates the metadata for all Kubernetes objects created by SkyPilot with
2299
- fields from the ~/.sky/config.yaml's kubernetes.custom_metadata dict.
2315
+ fields from the ~/.sky/skyconfig.yaml's kubernetes.custom_metadata dict.
2300
2316
 
2301
2317
  Obeys the same add or update semantics as combine_pod_config_fields().
2302
2318
  """
sky/serve/client/sdk.py CHANGED
@@ -74,6 +74,7 @@ def up(
74
74
  f'{server_common.get_server_url()}/serve/up',
75
75
  json=json.loads(body.model_dump_json()),
76
76
  timeout=(5, None),
77
+ cookies=server_common.get_api_cookie_jar(),
77
78
  )
78
79
  return server_common.get_request_id(response)
79
80
 
@@ -132,6 +133,7 @@ def update(
132
133
  f'{server_common.get_server_url()}/serve/update',
133
134
  json=json.loads(body.model_dump_json()),
134
135
  timeout=(5, None),
136
+ cookies=server_common.get_api_cookie_jar(),
135
137
  )
136
138
  return server_common.get_request_id(response)
137
139
 
@@ -173,6 +175,7 @@ def down(
173
175
  f'{server_common.get_server_url()}/serve/down',
174
176
  json=json.loads(body.model_dump_json()),
175
177
  timeout=(5, None),
178
+ cookies=server_common.get_api_cookie_jar(),
176
179
  )
177
180
  return server_common.get_request_id(response)
178
181
 
@@ -207,6 +210,7 @@ def terminate_replica(service_name: str, replica_id: int,
207
210
  f'{server_common.get_server_url()}/serve/terminate-replica',
208
211
  json=json.loads(body.model_dump_json()),
209
212
  timeout=(5, None),
213
+ cookies=server_common.get_api_cookie_jar(),
210
214
  )
211
215
  return server_common.get_request_id(response)
212
216
 
@@ -279,6 +283,7 @@ def status(
279
283
  f'{server_common.get_server_url()}/serve/status',
280
284
  json=json.loads(body.model_dump_json()),
281
285
  timeout=(5, None),
286
+ cookies=server_common.get_api_cookie_jar(),
282
287
  )
283
288
  return server_common.get_request_id(response)
284
289
 
@@ -365,6 +370,7 @@ def tail_logs(service_name: str,
365
370
  json=json.loads(body.model_dump_json()),
366
371
  timeout=(5, None),
367
372
  stream=True,
373
+ cookies=server_common.get_api_cookie_jar(),
368
374
  )
369
375
  request_id = server_common.get_request_id(response)
370
376
  sdk.stream_response(request_id, response, output_stream)
sky/server/common.py CHANGED
@@ -3,6 +3,7 @@
3
3
  import dataclasses
4
4
  import enum
5
5
  import functools
6
+ from http.cookiejar import MozillaCookieJar
6
7
  import json
7
8
  import os
8
9
  import pathlib
@@ -80,6 +81,18 @@ class ApiServerInfo:
80
81
  api_version: ApiVersion
81
82
 
82
83
 
84
+ def get_api_cookie_jar() -> requests.cookies.RequestsCookieJar:
85
+ """Returns the cookie jar used by the client to access the API server."""
86
+ cookie_file = os.environ.get(server_constants.API_COOKIE_FILE_ENV_VAR)
87
+ cookie_jar = requests.cookies.RequestsCookieJar()
88
+ if cookie_file and os.path.exists(cookie_file):
89
+ cookie_path = pathlib.Path(cookie_file).expanduser().resolve()
90
+ file_cookie_jar = MozillaCookieJar(cookie_path)
91
+ file_cookie_jar.load()
92
+ cookie_jar.update(file_cookie_jar)
93
+ return cookie_jar
94
+
95
+
83
96
  @annotations.lru_cache(scope='global')
84
97
  def get_server_url(host: Optional[str] = None) -> str:
85
98
  endpoint = DEFAULT_SERVER_URL
@@ -117,7 +130,9 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
117
130
  server_url = endpoint if endpoint is not None else get_server_url()
118
131
  while time_out_try_count <= RETRY_COUNT_ON_TIMEOUT:
119
132
  try:
120
- response = requests.get(f'{server_url}/api/health', timeout=2.5)
133
+ response = requests.get(f'{server_url}/api/health',
134
+ timeout=2.5,
135
+ cookies=get_api_cookie_jar())
121
136
  if response.status_code == 200:
122
137
  try:
123
138
  result = response.json()
sky/server/constants.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """Constants for the API servers."""
2
2
 
3
+ from sky.skylet import constants
4
+
3
5
  # API server version, whenever there is a change in API server that requires a
4
6
  # restart of the local API server or error out when the client does not match
5
7
  # the server version.
@@ -19,3 +21,6 @@ API_SERVER_REQUEST_DB_PATH = '~/.sky/api_server/requests.db'
19
21
  # The interval (seconds) for the cluster status to be refreshed in the
20
22
  # background.
21
23
  CLUSTER_REFRESH_DAEMON_INTERVAL_SECONDS = 60
24
+
25
+ # Environment variable for a file path to the API cookie file.
26
+ API_COOKIE_FILE_ENV_VAR = f'{constants.SKYPILOT_ENV_VAR_PREFIX}API_COOKIE_FILE'
@@ -47,6 +47,8 @@ def request_body_env_vars() -> dict:
47
47
  # Remove the path to config file, as the config content is included in the
48
48
  # request body and will be merged with the config on the server side.
49
49
  env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
50
+ env_vars.pop(skypilot_config.ENV_VAR_USER_CONFIG, None)
51
+ env_vars.pop(skypilot_config.ENV_VAR_PROJECT_CONFIG, None)
50
52
  return env_vars
51
53
 
52
54
 
@@ -131,7 +131,7 @@ extras_require: Dict[str, List[str]] = {
131
131
  'scp': local_ray,
132
132
  'oci': ['oci'] + local_ray,
133
133
  # Kubernetes 32.0.0 has an authentication bug: https://github.com/kubernetes-client/python/issues/2333 # pylint: disable=line-too-long
134
- 'kubernetes': ['kubernetes>=20.0.0,!=32.0.0'],
134
+ 'kubernetes': ['kubernetes>=20.0.0,!=32.0.0', 'websockets'],
135
135
  'remote': remote,
136
136
  # For the container registry auth api. Reference:
137
137
  # https://github.com/runpod/runpod-python/releases/tag/1.6.1
sky/skylet/constants.py CHANGED
@@ -117,7 +117,7 @@ RUNPOD_DOCKER_USERNAME_ENV_VAR = 'SKYPILOT_RUNPOD_DOCKER_USERNAME'
117
117
 
118
118
  # Commands for disable GPU ECC, which can improve the performance of the GPU
119
119
  # for some workloads by 30%. This will only be applied when a user specify
120
- # `nvidia_gpus.disable_ecc: true` in ~/.sky/config.yaml.
120
+ # `nvidia_gpus.disable_ecc: true` in ~/.sky/skyconfig.yaml.
121
121
  # Running this command will reboot the machine, introducing overhead for
122
122
  # provisioning the machine.
123
123
  # https://portal.nutanix.com/page/documents/kbs/details?targetId=kA00e000000LKjOCAW
@@ -337,7 +337,7 @@ RCLONE_LOG_DIR = '~/.sky/rclone_log'
337
337
  RCLONE_CACHE_DIR = '~/.cache/rclone'
338
338
  RCLONE_CACHE_REFRESH_INTERVAL = 10
339
339
 
340
- # The keys that can be overridden in the `~/.sky/config.yaml` file. The
340
+ # The keys that can be overridden in the `~/.sky/skyconfig.yaml` file. The
341
341
  # overrides are specified in task YAMLs.
342
342
  OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
343
343
  ('docker', 'run_options'),
sky/skypilot_config.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Immutable user configurations (EXPERIMENTAL).
2
2
 
3
- On module import, we attempt to parse the config located at CONFIG_PATH
4
- (default: ~/.sky/config.yaml). Caller can then use
3
+ On module import, we attempt to parse the config located at _USER_CONFIG_PATH
4
+ (default: ~/.sky/skyconfig.yaml). Caller can then use
5
5
 
6
6
  >> skypilot_config.loaded()
7
7
 
@@ -35,14 +35,14 @@ Consider the following config contents:
35
35
 
36
36
  then:
37
37
 
38
- # Assuming ~/.sky/config.yaml exists and can be loaded:
38
+ # Assuming ~/.sky/skyconfig.yaml exists and can be loaded:
39
39
  skypilot_config.loaded() # ==> True
40
40
 
41
41
  skypilot_config.get_nested(('a', 'nested'), None) # ==> 1
42
42
  skypilot_config.get_nested(('a', 'nonexist'), None) # ==> None
43
43
  skypilot_config.get_nested(('a',), None) # ==> {'nested': 1}
44
44
 
45
- # If ~/.sky/config.yaml doesn't exist or failed to be loaded:
45
+ # If ~/.sky/skyconfig.yaml doesn't exist or failed to be loaded:
46
46
  skypilot_config.loaded() # ==> False
47
47
  skypilot_config.get_nested(('a', 'nested'), None) # ==> None
48
48
  skypilot_config.get_nested(('a', 'nonexist'), None) # ==> None
@@ -52,7 +52,6 @@ import contextlib
52
52
  import copy
53
53
  import os
54
54
  import pprint
55
- import tempfile
56
55
  import typing
57
56
  from typing import Any, Dict, Iterator, Optional, Tuple
58
57
 
@@ -72,26 +71,60 @@ else:
72
71
 
73
72
  logger = sky_logging.init_logger(__name__)
74
73
 
75
- # The config path is discovered in this order:
74
+ # The config is generated as described below:
76
75
  #
77
- # (1) (Used internally) If env var {ENV_VAR_SKYPILOT_CONFIG} exists, use its
78
- # path;
79
- # (2) If file {CONFIG_PATH} exists, use this file.
76
+ # (*) (Used internally) If env var {ENV_VAR_SKYPILOT_CONFIG} exists, use its
77
+ # path as the config file. Do not use any other config files.
78
+ # This behavior is subject to change and should not be relied on by users.
79
+ # Else,
80
+ # (1) If env var {ENV_VAR_USER_CONFIG} exists, use its path as the user
81
+ # config file. Else, use the default path {_USER_CONFIG_PATH}.
82
+ # (2) If env var {ENV_VAR_PROJECT_CONFIG} exists, use its path as the project
83
+ # config file. Else, use the default path {_PROJECT_CONFIG_PATH}.
84
+ # (3) Override any config keys in (1) with the ones in (2).
85
+ # (4) Validate the final config.
80
86
  #
81
- # If the path discovered by (1) fails to load, we do not attempt to go to step
82
- # 2 in the list.
87
+ # (*) is used internally to implement the behavior of the jobs controller.
88
+ # It is not intended to be used by end users.
89
+ # (1) and (2) are used by end users to set non-default user and project config
90
+ # files on clients.
83
91
 
84
92
  # (Used internally) An env var holding the path to the local config file. This
85
93
  # is only used by jobs controller tasks to ensure recoveries of the same job
86
94
  # use the same config file.
87
95
  ENV_VAR_SKYPILOT_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}CONFIG'
88
96
 
89
- # Path to the local config file.
90
- CONFIG_PATH = '~/.sky/config.yaml'
97
+ # (Used by users) Environment variables for setting non-default user and
98
+ # project config files on clients.
99
+ ENV_VAR_USER_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}USER_CONFIG'
100
+ ENV_VAR_PROJECT_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}PROJECT_CONFIG'
101
+
102
+ # Path to the local config files.
103
+ _LEGACY_USER_CONFIG_PATH = '~/.sky/config.yaml'
104
+ _USER_CONFIG_PATH = '~/.sky/skyconfig.yaml'
105
+ _PROJECT_CONFIG_PATH = 'skyconfig.yaml'
91
106
 
92
107
  # The loaded config.
93
108
  _dict = config_utils.Config()
94
109
  _loaded_config_path: Optional[str] = None
110
+ _config_overridden: bool = False
111
+
112
+
113
+ # This function exists solely to maintain backward compatibility with the
114
+ # legacy user config file located at ~/.sky/config.yaml.
115
+ def get_user_config_path() -> str:
116
+ """Returns the path to the user config file.
117
+
118
+ If only the legacy user config file exists, return
119
+ the legacy user config path.
120
+ Otherwise, return the new user config path.
121
+ """
122
+ user_config_path = os.path.expanduser(_USER_CONFIG_PATH)
123
+ legacy_user_config_path = os.path.expanduser(_LEGACY_USER_CONFIG_PATH)
124
+ if (os.path.exists(legacy_user_config_path) and
125
+ not os.path.exists(user_config_path)):
126
+ return _LEGACY_USER_CONFIG_PATH
127
+ return _USER_CONFIG_PATH
95
128
 
96
129
 
97
130
  def get_nested(keys: Tuple[str, ...],
@@ -137,48 +170,156 @@ def to_dict() -> config_utils.Config:
137
170
  return copy.deepcopy(_dict)
138
171
 
139
172
 
173
+ def _get_config_file_path(envvar: str) -> Optional[str]:
174
+ config_path_via_env_var = os.environ.get(envvar)
175
+ if config_path_via_env_var is not None:
176
+ return os.path.expanduser(config_path_via_env_var)
177
+ return None
178
+
179
+
180
+ def _validate_config(config: Dict[str, Any], config_path: str) -> None:
181
+ """Validates the config."""
182
+ common_utils.validate_schema(
183
+ config,
184
+ schemas.get_config_schema(),
185
+ f'Invalid config YAML ({config_path}). See: '
186
+ 'https://docs.skypilot.co/en/latest/reference/config.html. ' # pylint: disable=line-too-long
187
+ 'Error: ',
188
+ skip_none=False)
189
+
190
+
191
+ def _overlay_skypilot_config(
192
+ original_config: Optional[config_utils.Config],
193
+ override_configs: Optional[config_utils.Config]) -> config_utils.Config:
194
+ """Overlays the override configs on the original configs."""
195
+ if original_config is None:
196
+ original_config = config_utils.Config()
197
+ config = original_config.get_nested(keys=tuple(),
198
+ default_value=None,
199
+ override_configs=override_configs,
200
+ allowed_override_keys=None,
201
+ disallowed_override_keys=None)
202
+ return config
203
+
204
+
140
205
  def _reload_config() -> None:
206
+ internal_config_path = os.environ.get(ENV_VAR_SKYPILOT_CONFIG)
207
+ if internal_config_path is not None:
208
+ # {ENV_VAR_SKYPILOT_CONFIG} is used internally.
209
+ # When this environment variable is set, the config loading
210
+ # behavior is not defined in the public interface.
211
+ # SkyPilot reserves the right to change the config loading behavior
212
+ # at any time when this environment variable is set.
213
+ _reload_config_from_internal_file(internal_config_path)
214
+ return
215
+
216
+ _reload_config_hierarchical()
217
+
218
+
219
+ def _parse_config_file(config_path: str) -> config_utils.Config:
220
+ config = config_utils.Config()
221
+ try:
222
+ config_dict = common_utils.read_yaml(config_path)
223
+ config = config_utils.Config.from_dict(config_dict)
224
+ logger.debug(
225
+ f'Config loaded from {config_path}:\n{pprint.pformat(config)}')
226
+ except yaml.YAMLError as e:
227
+ logger.error(f'Error in loading config file ({config_path}):', e)
228
+ if config:
229
+ _validate_config(config, config_path)
230
+
231
+ logger.debug(f'Config syntax check passed for path: {config_path}')
232
+ return config
233
+
234
+
235
+ def _reload_config_from_internal_file(internal_config_path: str) -> None:
141
236
  global _dict, _loaded_config_path
142
237
  # Reset the global variables, to avoid using stale values.
143
238
  _dict = config_utils.Config()
144
239
  _loaded_config_path = None
145
240
 
146
- config_path_via_env_var = os.environ.get(ENV_VAR_SKYPILOT_CONFIG)
147
- if config_path_via_env_var is not None:
148
- config_path = os.path.expanduser(config_path_via_env_var)
149
- if not os.path.exists(config_path):
241
+ config_path = os.path.expanduser(internal_config_path)
242
+ if not os.path.exists(config_path):
243
+ with ux_utils.print_exception_no_traceback():
244
+ raise FileNotFoundError(
245
+ 'Config file specified by env var '
246
+ f'{ENV_VAR_SKYPILOT_CONFIG} ({config_path!r}) does not '
247
+ 'exist. Please double check the path or unset the env var: '
248
+ f'unset {ENV_VAR_SKYPILOT_CONFIG}')
249
+ logger.debug(f'Using config path: {config_path}')
250
+ _dict = _parse_config_file(config_path)
251
+ _loaded_config_path = config_path
252
+
253
+
254
+ def _reload_config_hierarchical() -> None:
255
+ global _dict
256
+ # Reset the global variables, to avoid using stale values.
257
+ _dict = config_utils.Config()
258
+
259
+ # find the user config file
260
+ user_config_path = _get_config_file_path(ENV_VAR_USER_CONFIG)
261
+ if user_config_path:
262
+ logger.debug('using user config file specified by '
263
+ f'{ENV_VAR_USER_CONFIG}: {user_config_path}')
264
+ user_config_path = os.path.expanduser(user_config_path)
265
+ if not os.path.exists(user_config_path):
266
+ with ux_utils.print_exception_no_traceback():
267
+ raise FileNotFoundError(
268
+ 'Config file specified by env var '
269
+ f'{ENV_VAR_USER_CONFIG} ({user_config_path!r}) '
270
+ 'does not exist. Please double check the path or unset the '
271
+ f'env var: unset {ENV_VAR_USER_CONFIG}')
272
+ else:
273
+ user_config_path = get_user_config_path()
274
+ logger.debug(f'using default user config file: {user_config_path}')
275
+ user_config_path = os.path.expanduser(user_config_path)
276
+
277
+ overrides = []
278
+
279
+ # find the project config file
280
+ project_config_path = _get_config_file_path(ENV_VAR_PROJECT_CONFIG)
281
+ if project_config_path:
282
+ logger.debug('using project config file specified by '
283
+ f'{ENV_VAR_PROJECT_CONFIG}: {project_config_path}')
284
+ project_config_path = os.path.expanduser(project_config_path)
285
+ if not os.path.exists(project_config_path):
150
286
  with ux_utils.print_exception_no_traceback():
151
287
  raise FileNotFoundError(
152
288
  'Config file specified by env var '
153
- f'{ENV_VAR_SKYPILOT_CONFIG} ({config_path!r}) does not '
154
- 'exist. Please double check the path or unset the env var: '
155
- f'unset {ENV_VAR_SKYPILOT_CONFIG}')
289
+ f'{ENV_VAR_PROJECT_CONFIG} ({project_config_path!r}) '
290
+ 'does not exist. Please double check the path or unset the '
291
+ f'env var: unset {ENV_VAR_PROJECT_CONFIG}')
156
292
  else:
157
- config_path = CONFIG_PATH
158
- config_path = os.path.expanduser(config_path)
159
- if os.path.exists(config_path):
160
- logger.debug(f'Using config path: {config_path}')
161
- try:
162
- config = common_utils.read_yaml(config_path)
163
- _dict = config_utils.Config.from_dict(config)
164
- _loaded_config_path = config_path
165
- logger.debug(f'Config loaded:\n{pprint.pformat(_dict)}')
166
- except yaml.YAMLError as e:
167
- logger.error(f'Error in loading config file ({config_path}):', e)
168
- if _dict:
169
- common_utils.validate_schema(
170
- _dict,
171
- schemas.get_config_schema(),
172
- f'Invalid config YAML ({config_path}). See: '
173
- 'https://docs.skypilot.co/en/latest/reference/config.html. ' # pylint: disable=line-too-long
174
- 'Error: ',
175
- skip_none=False)
176
-
177
- logger.debug('Config syntax check passed.')
293
+ logger.debug(
294
+ f'using default project config file: {_PROJECT_CONFIG_PATH}')
295
+ project_config_path = _PROJECT_CONFIG_PATH
296
+ project_config_path = os.path.expanduser(project_config_path)
297
+
298
+ # load the user config file
299
+ if os.path.exists(user_config_path):
300
+ user_config = _parse_config_file(user_config_path)
301
+ _validate_config(user_config, user_config_path)
302
+ overrides.append(user_config)
303
+
304
+ if os.path.exists(project_config_path):
305
+ project_config = _parse_config_file(project_config_path)
306
+ _validate_config(project_config, project_config_path)
307
+ overrides.append(project_config)
308
+
309
+ # layer the configs on top of each other based on priority
310
+ overlaid_client_config: config_utils.Config = config_utils.Config()
311
+ for override in overrides:
312
+ overlaid_client_config = _overlay_skypilot_config(
313
+ original_config=overlaid_client_config, override_configs=override)
314
+ logger.debug(f'final config: {overlaid_client_config}')
315
+ _dict = overlaid_client_config
178
316
 
179
317
 
180
318
  def loaded_config_path() -> Optional[str]:
181
- """Returns the path to the loaded config file."""
319
+ """Returns the path to the loaded config file, or
320
+ '<overridden>' if the config is overridden."""
321
+ if _config_overridden:
322
+ return '<overridden>'
182
323
  return _loaded_config_path
183
324
 
184
325
 
@@ -195,31 +336,30 @@ def loaded() -> bool:
195
336
  def override_skypilot_config(
196
337
  override_configs: Optional[Dict[str, Any]]) -> Iterator[None]:
197
338
  """Overrides the user configurations."""
339
+ global _dict, _config_overridden
198
340
  # TODO(SKY-1215): allow admin user to extend the disallowed keys or specify
199
341
  # allowed keys.
200
342
  if not override_configs:
201
343
  # If no override configs (None or empty dict), do nothing.
202
344
  yield
203
345
  return
204
- original_env_config_path = _loaded_config_path
205
- original_config = dict(_dict)
346
+ original_config = _dict
206
347
  config = _dict.get_nested(
207
348
  keys=tuple(),
208
349
  default_value=None,
209
350
  override_configs=override_configs,
210
351
  allowed_override_keys=None,
211
352
  disallowed_override_keys=constants.SKIPPED_CLIENT_OVERRIDE_KEYS)
212
- with tempfile.NamedTemporaryFile(
213
- mode='w',
214
- prefix='skypilot_config',
215
- # Have to avoid deleting the file as the underlying function needs
216
- # to read the config file, and we need to close the file mode='w'
217
- # to enable reading.
218
- delete=False) as f:
219
- common_utils.dump_yaml(f.name, dict(config))
220
- os.environ[ENV_VAR_SKYPILOT_CONFIG] = f.name
221
353
  try:
222
- _reload_config()
354
+ common_utils.validate_schema(
355
+ config,
356
+ schemas.get_config_schema(),
357
+ 'Invalid config. See: '
358
+ 'https://docs.skypilot.co/en/latest/reference/config.html. ' # pylint: disable=line-too-long
359
+ 'Error: ',
360
+ skip_none=False)
361
+ _config_overridden = True
362
+ _dict = config
223
363
  yield
224
364
  except exceptions.InvalidSkyPilotConfigError as e:
225
365
  with ux_utils.print_exception_no_traceback():
@@ -227,23 +367,10 @@ def override_skypilot_config(
227
367
  'Failed to override the SkyPilot config on API '
228
368
  'server with your local SkyPilot config:\n'
229
369
  '=== SkyPilot config on API server ===\n'
230
- f'{common_utils.dump_yaml_str(original_config)}\n'
370
+ f'{common_utils.dump_yaml_str(dict(original_config))}\n'
231
371
  '=== Your local SkyPilot config ===\n'
232
372
  f'{common_utils.dump_yaml_str(override_configs)}\n'
233
373
  f'Details: {e}') from e
234
-
235
374
  finally:
236
- if original_env_config_path is not None:
237
- os.environ[ENV_VAR_SKYPILOT_CONFIG] = original_env_config_path
238
- else:
239
- os.environ.pop(ENV_VAR_SKYPILOT_CONFIG, None)
240
- # Reload the config to restore the original config to avoid the next
241
- # request reusing the same process to use the config for the current
242
- # request.
243
- _reload_config()
244
-
245
- try:
246
- os.remove(f.name)
247
- except Exception: # pylint: disable=broad-except
248
- # Failing to delete the file is not critical.
249
- pass
375
+ _dict = original_config
376
+ _config_overridden = False