skypilot-nightly 1.0.0.dev20250215__py3-none-any.whl → 1.0.0.dev20250217__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. sky/__init__.py +48 -22
  2. sky/adaptors/aws.py +2 -1
  3. sky/adaptors/azure.py +4 -4
  4. sky/adaptors/cloudflare.py +4 -4
  5. sky/adaptors/kubernetes.py +8 -8
  6. sky/authentication.py +42 -45
  7. sky/backends/backend.py +2 -2
  8. sky/backends/backend_utils.py +108 -221
  9. sky/backends/cloud_vm_ray_backend.py +283 -282
  10. sky/benchmark/benchmark_utils.py +6 -2
  11. sky/check.py +40 -28
  12. sky/cli.py +1213 -1116
  13. sky/client/__init__.py +1 -0
  14. sky/client/cli.py +5644 -0
  15. sky/client/common.py +345 -0
  16. sky/client/sdk.py +1757 -0
  17. sky/cloud_stores.py +12 -6
  18. sky/clouds/__init__.py +0 -2
  19. sky/clouds/aws.py +20 -13
  20. sky/clouds/azure.py +5 -3
  21. sky/clouds/cloud.py +1 -1
  22. sky/clouds/cudo.py +2 -1
  23. sky/clouds/do.py +2 -1
  24. sky/clouds/fluidstack.py +3 -2
  25. sky/clouds/gcp.py +10 -8
  26. sky/clouds/ibm.py +8 -7
  27. sky/clouds/kubernetes.py +7 -6
  28. sky/clouds/lambda_cloud.py +8 -7
  29. sky/clouds/oci.py +4 -3
  30. sky/clouds/paperspace.py +2 -1
  31. sky/clouds/runpod.py +2 -1
  32. sky/clouds/scp.py +8 -7
  33. sky/clouds/service_catalog/__init__.py +3 -3
  34. sky/clouds/service_catalog/aws_catalog.py +7 -1
  35. sky/clouds/service_catalog/common.py +4 -2
  36. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +2 -2
  37. sky/clouds/utils/oci_utils.py +1 -1
  38. sky/clouds/vast.py +2 -1
  39. sky/clouds/vsphere.py +2 -1
  40. sky/core.py +263 -99
  41. sky/dag.py +4 -0
  42. sky/data/mounting_utils.py +2 -1
  43. sky/data/storage.py +97 -35
  44. sky/data/storage_utils.py +69 -9
  45. sky/exceptions.py +138 -5
  46. sky/execution.py +47 -50
  47. sky/global_user_state.py +105 -22
  48. sky/jobs/__init__.py +12 -14
  49. sky/jobs/client/__init__.py +0 -0
  50. sky/jobs/client/sdk.py +296 -0
  51. sky/jobs/constants.py +30 -1
  52. sky/jobs/controller.py +12 -6
  53. sky/jobs/dashboard/dashboard.py +2 -6
  54. sky/jobs/recovery_strategy.py +22 -29
  55. sky/jobs/server/__init__.py +1 -0
  56. sky/jobs/{core.py → server/core.py} +101 -34
  57. sky/jobs/server/dashboard_utils.py +64 -0
  58. sky/jobs/server/server.py +182 -0
  59. sky/jobs/utils.py +32 -23
  60. sky/models.py +27 -0
  61. sky/optimizer.py +9 -11
  62. sky/provision/__init__.py +6 -3
  63. sky/provision/aws/config.py +2 -2
  64. sky/provision/aws/instance.py +1 -1
  65. sky/provision/azure/instance.py +1 -1
  66. sky/provision/cudo/instance.py +1 -1
  67. sky/provision/do/instance.py +1 -1
  68. sky/provision/do/utils.py +0 -5
  69. sky/provision/fluidstack/fluidstack_utils.py +4 -3
  70. sky/provision/fluidstack/instance.py +4 -2
  71. sky/provision/gcp/instance.py +1 -1
  72. sky/provision/instance_setup.py +2 -2
  73. sky/provision/kubernetes/constants.py +8 -0
  74. sky/provision/kubernetes/instance.py +1 -1
  75. sky/provision/kubernetes/utils.py +67 -76
  76. sky/provision/lambda_cloud/instance.py +3 -15
  77. sky/provision/logging.py +1 -1
  78. sky/provision/oci/instance.py +7 -4
  79. sky/provision/paperspace/instance.py +1 -1
  80. sky/provision/provisioner.py +3 -2
  81. sky/provision/runpod/instance.py +1 -1
  82. sky/provision/vast/instance.py +1 -1
  83. sky/provision/vast/utils.py +2 -1
  84. sky/provision/vsphere/instance.py +2 -11
  85. sky/resources.py +55 -40
  86. sky/serve/__init__.py +6 -10
  87. sky/serve/client/__init__.py +0 -0
  88. sky/serve/client/sdk.py +366 -0
  89. sky/serve/constants.py +3 -0
  90. sky/serve/replica_managers.py +10 -10
  91. sky/serve/serve_utils.py +56 -36
  92. sky/serve/server/__init__.py +0 -0
  93. sky/serve/{core.py → server/core.py} +37 -17
  94. sky/serve/server/server.py +117 -0
  95. sky/serve/service.py +8 -1
  96. sky/server/__init__.py +1 -0
  97. sky/server/common.py +441 -0
  98. sky/server/constants.py +21 -0
  99. sky/server/html/log.html +174 -0
  100. sky/server/requests/__init__.py +0 -0
  101. sky/server/requests/executor.py +462 -0
  102. sky/server/requests/payloads.py +481 -0
  103. sky/server/requests/queues/__init__.py +0 -0
  104. sky/server/requests/queues/mp_queue.py +76 -0
  105. sky/server/requests/requests.py +567 -0
  106. sky/server/requests/serializers/__init__.py +0 -0
  107. sky/server/requests/serializers/decoders.py +192 -0
  108. sky/server/requests/serializers/encoders.py +166 -0
  109. sky/server/server.py +1095 -0
  110. sky/server/stream_utils.py +144 -0
  111. sky/setup_files/MANIFEST.in +1 -0
  112. sky/setup_files/dependencies.py +12 -4
  113. sky/setup_files/setup.py +1 -1
  114. sky/sky_logging.py +9 -13
  115. sky/skylet/autostop_lib.py +2 -2
  116. sky/skylet/constants.py +46 -12
  117. sky/skylet/events.py +5 -6
  118. sky/skylet/job_lib.py +78 -66
  119. sky/skylet/log_lib.py +17 -11
  120. sky/skypilot_config.py +79 -94
  121. sky/task.py +119 -73
  122. sky/templates/aws-ray.yml.j2 +4 -4
  123. sky/templates/azure-ray.yml.j2 +3 -2
  124. sky/templates/cudo-ray.yml.j2 +3 -2
  125. sky/templates/fluidstack-ray.yml.j2 +3 -2
  126. sky/templates/gcp-ray.yml.j2 +3 -2
  127. sky/templates/ibm-ray.yml.j2 +3 -2
  128. sky/templates/jobs-controller.yaml.j2 +1 -12
  129. sky/templates/kubernetes-ray.yml.j2 +3 -2
  130. sky/templates/lambda-ray.yml.j2 +3 -2
  131. sky/templates/oci-ray.yml.j2 +3 -2
  132. sky/templates/paperspace-ray.yml.j2 +3 -2
  133. sky/templates/runpod-ray.yml.j2 +3 -2
  134. sky/templates/scp-ray.yml.j2 +3 -2
  135. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  136. sky/templates/vsphere-ray.yml.j2 +4 -2
  137. sky/templates/websocket_proxy.py +64 -0
  138. sky/usage/constants.py +8 -0
  139. sky/usage/usage_lib.py +45 -11
  140. sky/utils/accelerator_registry.py +33 -53
  141. sky/utils/admin_policy_utils.py +2 -1
  142. sky/utils/annotations.py +51 -0
  143. sky/utils/cli_utils/status_utils.py +33 -3
  144. sky/utils/cluster_utils.py +356 -0
  145. sky/utils/command_runner.py +69 -14
  146. sky/utils/common.py +74 -0
  147. sky/utils/common_utils.py +133 -93
  148. sky/utils/config_utils.py +204 -0
  149. sky/utils/control_master_utils.py +2 -3
  150. sky/utils/controller_utils.py +133 -147
  151. sky/utils/dag_utils.py +72 -24
  152. sky/utils/kubernetes/deploy_remote_cluster.sh +2 -2
  153. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  154. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  155. sky/utils/log_utils.py +83 -23
  156. sky/utils/message_utils.py +81 -0
  157. sky/utils/registry.py +127 -0
  158. sky/utils/resources_utils.py +2 -2
  159. sky/utils/rich_utils.py +213 -34
  160. sky/utils/schemas.py +19 -2
  161. sky/{status_lib.py → utils/status_lib.py} +12 -7
  162. sky/utils/subprocess_utils.py +51 -35
  163. sky/utils/timeline.py +7 -2
  164. sky/utils/ux_utils.py +95 -25
  165. {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/METADATA +8 -3
  166. {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/RECORD +170 -132
  167. sky/clouds/cloud_registry.py +0 -76
  168. sky/utils/cluster_yaml_utils.py +0 -24
  169. {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/LICENSE +0 -0
  170. {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/WHEEL +0 -0
  171. {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/entry_points.txt +0 -0
  172. {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/top_level.txt +0 -0
sky/server/common.py ADDED
@@ -0,0 +1,441 @@
1
+ """Common data structures and constants used in the API."""
2
+
3
+ import dataclasses
4
+ import enum
5
+ import functools
6
+ import importlib
7
+ import json
8
+ import os
9
+ import pathlib
10
+ import subprocess
11
+ import time
12
+ import typing
13
+ from typing import Any, Dict, Optional
14
+ import uuid
15
+
16
+ import colorama
17
+ import filelock
18
+ import psutil
19
+ import pydantic
20
+ import requests
21
+
22
+ from sky import exceptions
23
+ from sky import sky_logging
24
+ from sky import skypilot_config
25
+ from sky.data import data_utils
26
+ from sky.server import constants as server_constants
27
+ from sky.skylet import constants
28
+ from sky.usage import usage_lib
29
+ from sky.utils import annotations
30
+ from sky.utils import common
31
+ from sky.utils import common_utils
32
+ from sky.utils import rich_utils
33
+ from sky.utils import ux_utils
34
+
35
+ if typing.TYPE_CHECKING:
36
+ from sky import dag as dag_lib
37
+ from sky.server.requests import payloads
38
+
39
+ DEFAULT_SERVER_URL = 'http://127.0.0.1:46580'
40
+ AVAILBLE_LOCAL_API_SERVER_HOSTS = ['0.0.0.0', 'localhost', '127.0.0.1']
41
+ AVAILABLE_LOCAL_API_SERVER_URLS = [
42
+ f'http://{host}:46580' for host in AVAILBLE_LOCAL_API_SERVER_HOSTS
43
+ ]
44
+
45
+ API_SERVER_CMD = 'python -m sky.server.server'
46
+ # The client dir on the API server for storing user-specific data, such as file
47
+ # mounts, logs, etc. This dir is empheral and will be cleaned up when the API
48
+ # server is restarted.
49
+ API_SERVER_CLIENT_DIR = pathlib.Path('~/.sky/api_server/clients')
50
+ RETRY_COUNT_ON_TIMEOUT = 3
51
+
52
+ SKY_API_VERSION_WARNING = (
53
+ f'{colorama.Fore.YELLOW}SkyPilot API server is too old: '
54
+ f'v{{server_version}} (client version is v{{client_version}}). '
55
+ 'Please restart the SkyPilot API server with: '
56
+ 'sky api stop; sky api start'
57
+ f'{colorama.Style.RESET_ALL}')
58
+ RequestId = str
59
+ ApiVersion = Optional[str]
60
+
61
+ logger = sky_logging.init_logger(__name__)
62
+
63
+
64
+ class ApiServerStatus(enum.Enum):
65
+ HEALTHY = 'healthy'
66
+ UNHEALTHY = 'unhealthy'
67
+ VERSION_MISMATCH = 'version_mismatch'
68
+
69
+
70
+ @dataclasses.dataclass
71
+ class ApiServerInfo:
72
+ status: ApiServerStatus
73
+ api_version: ApiVersion
74
+
75
+
76
+ @annotations.lru_cache(scope='global')
77
+ def get_server_url(host: Optional[str] = None) -> str:
78
+ endpoint = DEFAULT_SERVER_URL
79
+ if host is not None:
80
+ endpoint = f'http://{host}:46580'
81
+
82
+ url = os.environ.get(
83
+ constants.SKY_API_SERVER_URL_ENV_VAR,
84
+ skypilot_config.get_nested(('api_server', 'endpoint'), endpoint))
85
+ return url.rstrip('/')
86
+
87
+
88
+ @annotations.lru_cache(scope='global')
89
+ def is_api_server_local():
90
+ return get_server_url() in AVAILABLE_LOCAL_API_SERVER_URLS
91
+
92
+
93
+ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
94
+ """Retrieve the status of the API server.
95
+
96
+ This function checks the health of the API server by sending a request
97
+ to the server's health endpoint. It retries the connection a specified
98
+ number of times in case of a timeout.
99
+
100
+ Args:
101
+ endpoint (Optional[str]): The endpoint of the API server.
102
+ If None, the default endpoint will be used.
103
+
104
+ Returns:
105
+ ApiServerInfo: An object containing the status and API version
106
+ of the server. The status can be HEALTHY, UNHEALTHY
107
+ or VERSION_MISMATCH.
108
+ """
109
+ time_out_try_count = 1
110
+ server_url = endpoint if endpoint is not None else get_server_url()
111
+ while time_out_try_count <= RETRY_COUNT_ON_TIMEOUT:
112
+ try:
113
+ response = requests.get(f'{server_url}/api/health', timeout=2.5)
114
+ if response.status_code == 200:
115
+ try:
116
+ result = response.json()
117
+ api_version = result.get('api_version')
118
+ if api_version is None:
119
+ logger.warning(f'API server response missing '
120
+ f'version info. {server_url} may '
121
+ f'not be running SkyPilot API server.')
122
+ return ApiServerInfo(status=ApiServerStatus.UNHEALTHY,
123
+ api_version=None)
124
+ if api_version == server_constants.API_VERSION:
125
+ return ApiServerInfo(status=ApiServerStatus.HEALTHY,
126
+ api_version=api_version)
127
+ return ApiServerInfo(
128
+ status=ApiServerStatus.VERSION_MISMATCH,
129
+ api_version=api_version)
130
+ except (json.JSONDecodeError, AttributeError) as e:
131
+ logger.warning('Failed to parse API server response: '
132
+ f'{str(e)}')
133
+ return ApiServerInfo(status=ApiServerStatus.UNHEALTHY,
134
+ api_version=None)
135
+ else:
136
+ return ApiServerInfo(status=ApiServerStatus.UNHEALTHY,
137
+ api_version=None)
138
+ except requests.exceptions.Timeout:
139
+ if time_out_try_count == RETRY_COUNT_ON_TIMEOUT:
140
+ return ApiServerInfo(status=ApiServerStatus.UNHEALTHY,
141
+ api_version=None)
142
+ time_out_try_count += 1
143
+ continue
144
+ except requests.exceptions.ConnectionError:
145
+ return ApiServerInfo(status=ApiServerStatus.UNHEALTHY,
146
+ api_version=None)
147
+
148
+ return ApiServerInfo(status=ApiServerStatus.UNHEALTHY, api_version=None)
149
+
150
+
151
+ def start_uvicorn_in_background(deploy: bool = False, host: str = '127.0.0.1'):
152
+ if not is_api_server_local():
153
+ raise RuntimeError(
154
+ f'Cannot start API server: {get_server_url()} is not a local URL')
155
+
156
+ # Check available memory before starting the server.
157
+ avail_mem_size_gb: float = psutil.virtual_memory().available / (1024**3)
158
+ if avail_mem_size_gb <= server_constants.MIN_AVAIL_MEM_GB:
159
+ logger.warning(
160
+ f'{colorama.Fore.YELLOW}Your SkyPilot API server machine only has '
161
+ f'{avail_mem_size_gb:.1f}GB memory available. '
162
+ f'At least {server_constants.MIN_AVAIL_MEM_GB}GB is recommended to '
163
+ f'support higher load with better performance.'
164
+ f'{colorama.Style.RESET_ALL}')
165
+ log_path = os.path.expanduser(constants.API_SERVER_LOGS)
166
+ os.makedirs(os.path.dirname(log_path), exist_ok=True)
167
+
168
+ # The command to run uvicorn. Adjust the app:app to your application's
169
+ # location.
170
+ api_server_cmd = API_SERVER_CMD
171
+ if deploy:
172
+ api_server_cmd += ' --deploy'
173
+ if host is not None:
174
+ api_server_cmd += f' --host {host}'
175
+ cmd = f'{api_server_cmd} > {log_path} 2>&1'
176
+
177
+ # Start the uvicorn process in the background and don't wait for it.
178
+ # If this is called from a CLI invocation, we need start_new_session=True so
179
+ # that SIGINT on the CLI will not also kill the API server.
180
+ subprocess.Popen(cmd, shell=True, start_new_session=True)
181
+
182
+ # Wait for the server to start until timeout.
183
+ # Conservative upper time bound for starting the server based on profiling.
184
+ timeout_sec = 12
185
+ start_time = time.time()
186
+ while True:
187
+ api_server_info = get_api_server_status()
188
+ assert api_server_info.status != ApiServerStatus.VERSION_MISMATCH, (
189
+ f'API server version mismatch when starting the server. '
190
+ f'Server version: {api_server_info.api_version} '
191
+ f'Client version: {server_constants.API_VERSION}')
192
+ if api_server_info.status == ApiServerStatus.HEALTHY:
193
+ break
194
+ elif time.time() - start_time >= timeout_sec:
195
+ with ux_utils.print_exception_no_traceback():
196
+ raise RuntimeError(
197
+ 'Failed to start SkyPilot API server at '
198
+ f'{get_server_url(host)}'
199
+ f'\nView logs at: {constants.API_SERVER_LOGS}')
200
+ time.sleep(0.5)
201
+
202
+
203
+ def handle_request_error(response: requests.Response) -> None:
204
+ if response.status_code != 200:
205
+ with ux_utils.print_exception_no_traceback():
206
+ raise RuntimeError(
207
+ 'Failed to process response from SkyPilot API server at '
208
+ f'{get_server_url()}. '
209
+ f'Response: {response.status_code} '
210
+ f'{response.text}')
211
+
212
+
213
+ def get_request_id(response: requests.Response) -> RequestId:
214
+ handle_request_error(response)
215
+ request_id = response.headers.get('X-Request-ID')
216
+ if request_id is None:
217
+ with ux_utils.print_exception_no_traceback():
218
+ raise RuntimeError(
219
+ 'Failed to get request ID from SkyPilot API server at '
220
+ f'{get_server_url()}. Response: {response.status_code} '
221
+ f'{response.text}')
222
+ return request_id
223
+
224
+
225
+ def _start_api_server(deploy: bool = False, host: str = '127.0.0.1'):
226
+ """Starts a SkyPilot API server locally."""
227
+ # Lock to prevent multiple processes from starting the server at the
228
+ # same time, causing issues with database initialization.
229
+ server_url = get_server_url(host)
230
+ assert server_url in AVAILABLE_LOCAL_API_SERVER_URLS, (
231
+ f'server url {server_url} is not a local url')
232
+ with rich_utils.client_status('Starting SkyPilot API server'):
233
+ logger.info(f'{colorama.Style.DIM}Failed to connect to '
234
+ f'SkyPilot API server at {server_url}. '
235
+ 'Starting a local server.'
236
+ f'{colorama.Style.RESET_ALL}')
237
+ start_uvicorn_in_background(deploy=deploy, host=host)
238
+ logger.info(ux_utils.finishing_message('SkyPilot API server started.'))
239
+
240
+
241
+ def check_server_healthy(endpoint: Optional[str] = None,) -> None:
242
+ """Check if the API server is healthy.
243
+
244
+ Args:
245
+ endpoint (Optional[str]): The endpoint of the API server.
246
+ If None, the default endpoint will be used.
247
+
248
+ Raises:
249
+ RuntimeError: If the server is not healthy or the client version does
250
+ not match the server version.
251
+ """
252
+ endpoint = endpoint if endpoint is not None else get_server_url()
253
+ api_server_info = get_api_server_status(endpoint)
254
+ api_server_status = api_server_info.status
255
+ if api_server_status == ApiServerStatus.VERSION_MISMATCH:
256
+ with ux_utils.print_exception_no_traceback():
257
+ raise RuntimeError(
258
+ SKY_API_VERSION_WARNING.format(
259
+ server_version=api_server_info.api_version,
260
+ client_version=server_constants.API_VERSION))
261
+ elif api_server_status == ApiServerStatus.UNHEALTHY:
262
+ with ux_utils.print_exception_no_traceback():
263
+ raise exceptions.ApiServerConnectionError(endpoint)
264
+
265
+
266
+ def check_server_healthy_or_start_fn(deploy: bool = False,
267
+ host: str = '127.0.0.1'):
268
+ try:
269
+ check_server_healthy()
270
+ except exceptions.ApiServerConnectionError as exc:
271
+ endpoint = get_server_url()
272
+ if not is_api_server_local():
273
+ with ux_utils.print_exception_no_traceback():
274
+ raise exceptions.ApiServerConnectionError(endpoint) from exc
275
+ with filelock.FileLock(
276
+ os.path.expanduser(constants.API_SERVER_CREATION_LOCK_PATH)):
277
+ # Check again if server is already running. Other processes may
278
+ # have started the server while we were waiting for the lock.
279
+ api_server_info = get_api_server_status(endpoint)
280
+ if api_server_info.status == ApiServerStatus.UNHEALTHY:
281
+ _start_api_server(deploy, host)
282
+
283
+
284
+ def check_server_healthy_or_start(func):
285
+
286
+ @functools.wraps(func)
287
+ def wrapper(*args, deploy: bool = False, host: str = '127.0.0.1', **kwargs):
288
+ check_server_healthy_or_start_fn(deploy, host)
289
+ return func(*args, **kwargs)
290
+
291
+ return wrapper
292
+
293
+
294
+ def process_mounts_in_task_on_api_server(task: str, env_vars: Dict[str, str],
295
+ workdir_only: bool) -> 'dag_lib.Dag':
296
+ """Translates the file mounts path in a task to the path on API server.
297
+
298
+ When a task involves file mounts, the client will invoke
299
+ `upload_mounts_to_api_server` above to upload those local files to the API
300
+ server first. This function will then translates the paths in the task to
301
+ be the actual file paths on the API server, based on the
302
+ `file_mounts_mapping` in the task set by the client.
303
+
304
+ Args:
305
+ task: The task to be translated.
306
+ env_vars: The environment variables of the task.
307
+ workdir_only: Whether to only translate the workdir, which is used for
308
+ `exec`, as it does not need other files/folders in file_mounts.
309
+
310
+ Returns:
311
+ The translated task as a single-task dag.
312
+ """
313
+ from sky.utils import dag_utils # pylint: disable=import-outside-toplevel
314
+
315
+ user_hash = env_vars.get(constants.USER_ID_ENV_VAR, 'unknown')
316
+
317
+ # We should not use int(time.time()) as there can be multiple requests at
318
+ # the same second.
319
+ task_id = str(uuid.uuid4().hex)
320
+ client_dir = (API_SERVER_CLIENT_DIR.expanduser().resolve() / user_hash)
321
+ client_task_dir = client_dir / 'tasks'
322
+ client_task_dir.mkdir(parents=True, exist_ok=True)
323
+
324
+ client_task_path = client_task_dir / f'{task_id}.yaml'
325
+ client_task_path.write_text(task)
326
+
327
+ client_file_mounts_dir = client_dir / 'file_mounts'
328
+ client_file_mounts_dir.mkdir(parents=True, exist_ok=True)
329
+
330
+ def _get_client_file_mounts_path(
331
+ original_path: str, file_mounts_mapping: Dict[str, str]) -> str:
332
+ return str(client_file_mounts_dir /
333
+ file_mounts_mapping[original_path].lstrip('/'))
334
+
335
+ task_configs = common_utils.read_yaml_all(str(client_task_path))
336
+ for task_config in task_configs:
337
+ if task_config is None:
338
+ continue
339
+ file_mounts_mapping = task_config.get('file_mounts_mapping', {})
340
+ if not file_mounts_mapping:
341
+ # We did not mount any files to new paths on the remote server
342
+ # so no need to resolve filepaths.
343
+ continue
344
+ if 'workdir' in task_config:
345
+ workdir = task_config['workdir']
346
+ task_config['workdir'] = str(
347
+ client_file_mounts_dir /
348
+ file_mounts_mapping[workdir].lstrip('/'))
349
+ if workdir_only:
350
+ continue
351
+ if 'file_mounts' in task_config:
352
+ file_mounts = task_config['file_mounts']
353
+ for dst, src in file_mounts.items():
354
+ if isinstance(src, str):
355
+ if not data_utils.is_cloud_store_url(src):
356
+ file_mounts[dst] = _get_client_file_mounts_path(
357
+ src, file_mounts_mapping)
358
+ elif isinstance(src, dict):
359
+ if 'source' in src:
360
+ source = src['source']
361
+ if isinstance(source, str):
362
+ if data_utils.is_cloud_store_url(source):
363
+ continue
364
+ src['source'] = _get_client_file_mounts_path(
365
+ source, file_mounts_mapping)
366
+ else:
367
+ new_source = []
368
+ for src_item in source:
369
+ new_source.append(
370
+ _get_client_file_mounts_path(
371
+ src_item, file_mounts_mapping))
372
+ src['source'] = new_source
373
+ else:
374
+ raise ValueError(f'Unexpected file_mounts value: {src}')
375
+ if 'service' in task_config:
376
+ service = task_config['service']
377
+ if 'tls' in service:
378
+ tls = service['tls']
379
+ for key in ['keyfile', 'certfile']:
380
+ if key in tls:
381
+ tls[key] = _get_client_file_mounts_path(
382
+ tls[key], file_mounts_mapping)
383
+
384
+ # We can switch to using string, but this is to make it easier to debug, by
385
+ # persisting the translated task yaml file.
386
+ translated_client_task_path = client_dir / f'{task_id}_translated.yaml'
387
+ common_utils.dump_yaml(str(translated_client_task_path), task_configs)
388
+
389
+ dag = dag_utils.load_chain_dag_from_yaml(str(translated_client_task_path))
390
+ return dag
391
+
392
+
393
+ def api_server_user_logs_dir_prefix(
394
+ user_hash: Optional[str] = None) -> pathlib.Path:
395
+ if user_hash is None:
396
+ user_hash = common_utils.get_user_hash()
397
+ return API_SERVER_CLIENT_DIR / user_hash / 'sky_logs'
398
+
399
+
400
+ def request_body_to_params(body: pydantic.BaseModel) -> Dict[str, Any]:
401
+ return {
402
+ k: v for k, v in body.model_dump(mode='json').items() if v is not None
403
+ }
404
+
405
+
406
+ def reload_for_new_request(client_entrypoint: Optional[str],
407
+ client_command: Optional[str]):
408
+ """Reload modules, global variables, and usage message for a new request."""
409
+ # When a user request is sent to api server, it changes the user hash in the
410
+ # env vars, but since controller_utils is imported before the env vars are
411
+ # set, it doesn't get updated. So we need to reload it here.
412
+ # pylint: disable=import-outside-toplevel
413
+ from sky.utils import controller_utils
414
+ common.SKY_SERVE_CONTROLLER_NAME = common.get_controller_name(
415
+ common.ControllerType.SERVE)
416
+ common.JOB_CONTROLLER_NAME = common.get_controller_name(
417
+ common.ControllerType.JOBS)
418
+ # TODO(zhwu): We should avoid reloading the controller_utils module.
419
+ # Instead, we should reload required cache or global variables.
420
+ # TODO(zhwu): Reloading the controller_utils module may cause the global
421
+ # variables in other modules referring the `controller_utils.Controllers`
422
+ # dangling, as they will be pointing to the old object. We should not use
423
+ # it in global variables.
424
+ importlib.reload(controller_utils)
425
+
426
+ # Reset the client entrypoint and command for the usage message.
427
+ common_utils.set_client_entrypoint_and_command(
428
+ client_entrypoint=client_entrypoint,
429
+ client_command=client_command,
430
+ )
431
+ # We need to reset usage message, so that the message is up-to-date with the
432
+ # latest information in the context, e.g. client entrypoint and run id.
433
+ usage_lib.messages.reset(usage_lib.MessageType.USAGE)
434
+
435
+ for func in annotations.FUNCTIONS_NEED_RELOAD_CACHE:
436
+ func.cache_clear()
437
+
438
+ # Make sure the logger takes the new environment variables. This is
439
+ # necessary because the logger is initialized before the environment
440
+ # variables are set, such as SKYPILOT_DEBUG.
441
+ sky_logging.reload_logger()
@@ -0,0 +1,21 @@
1
+ """Constants for the API servers."""
2
+
3
+ # API server version, whenever there is a change in API server that requires a
4
+ # restart of the local API server or error out when the client does not match
5
+ # the server version.
6
+ API_VERSION = '1'
7
+
8
+ # Prefix for API request names.
9
+ REQUEST_NAME_PREFIX = 'sky.'
10
+ # The user ID of the SkyPilot system.
11
+ SKYPILOT_SYSTEM_USER_ID = 'skypilot-system'
12
+ # The memory (GB) that SkyPilot tries to not use to prevent OOM.
13
+ MIN_AVAIL_MEM_GB = 2
14
+ # Default encoder/decoder handler name.
15
+ DEFAULT_HANDLER_NAME = 'default'
16
+ # The path to the API request database.
17
+ API_SERVER_REQUEST_DB_PATH = '~/.sky/api_server/requests.db'
18
+
19
+ # The interval (seconds) for the cluster status to be refreshed in the
20
+ # background.
21
+ CLUSTER_REFRESH_DAEMON_INTERVAL_SECONDS = 60
@@ -0,0 +1,174 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <style>
5
+ body {
6
+ margin: 0;
7
+ padding: 10px;
8
+ background: #1e1e1e;
9
+ color: #d4d4d4;
10
+ font-family: monospace;
11
+ }
12
+ #output {
13
+ white-space: pre-wrap;
14
+ word-wrap: break-word;
15
+ font-size: 14px;
16
+ line-height: 1.4;
17
+ }
18
+ .ansi-black-fg { color: #000000; }
19
+ .ansi-red-fg { color: #cd0000; }
20
+ .ansi-green-fg { color: #00cd00; }
21
+ .ansi-yellow-fg { color: #cdcd00; }
22
+ .ansi-blue-fg { color: #0000ee; }
23
+ .ansi-magenta-fg { color: #cd00cd; }
24
+ .ansi-cyan-fg { color: #00cdcd; }
25
+ .ansi-white-fg { color: #e5e5e5; }
26
+ .ansi-bright-black-fg { color: #7f7f7f; }
27
+ .ansi-bright-red-fg { color: #ff0000; }
28
+ .ansi-bright-green-fg { color: #00ff00; }
29
+ .ansi-bright-yellow-fg { color: #ffff00; }
30
+ .ansi-bright-blue-fg { color: #5c5cff; }
31
+ .ansi-bright-magenta-fg { color: #ff00ff; }
32
+ .ansi-bright-cyan-fg { color: #00ffff; }
33
+ .ansi-bright-white-fg { color: #ffffff; }
34
+ .ansi-bold { font-weight: bold; }
35
+ .ansi-dim { opacity: 0.7; }
36
+ .ansi-italic { font-style: italic; }
37
+ .ansi-underline { text-decoration: underline; }
38
+ </style>
39
+ </head>
40
+ <body>
41
+ <pre id="output"></pre>
42
+ <script>
43
+ const output = document.getElementById('output');
44
+
45
+ // ANSI escape code parser
46
+ class AnsiParser {
47
+ constructor() {
48
+ this.fg = null;
49
+ this.bg = null;
50
+ this.bold = false;
51
+ this.dim = false;
52
+ this.italic = false;
53
+ this.underline = false;
54
+ }
55
+
56
+ reset() {
57
+ this.fg = null;
58
+ this.bg = null;
59
+ this.bold = false;
60
+ this.dim = false;
61
+ this.italic = false;
62
+ this.underline = false;
63
+ }
64
+
65
+ getStyle() {
66
+ const classes = [];
67
+ if (this.fg) classes.push(`ansi-${this.fg}-fg`);
68
+ if (this.bg) classes.push(`ansi-${this.bg}-bg`);
69
+ if (this.bold) classes.push('ansi-bold');
70
+ if (this.dim) classes.push('ansi-dim');
71
+ if (this.italic) classes.push('ansi-italic');
72
+ if (this.underline) classes.push('ansi-underline');
73
+ return classes.join(' ');
74
+ }
75
+
76
+ parse(text) {
77
+ const result = [];
78
+ const regex = /\x1b\[(\d+)m|([^\x1b]+)/g;
79
+ let match;
80
+
81
+ while ((match = regex.exec(text)) !== null) {
82
+ if (match[1]) {
83
+ // ANSI escape code
84
+ const code = parseInt(match[1]);
85
+ switch(code) {
86
+ case 0: this.reset(); break;
87
+ case 1: this.bold = true; break;
88
+ case 2: this.dim = true; break;
89
+ case 3: this.italic = true; break;
90
+ case 4: this.underline = true; break;
91
+ case 30: this.fg = 'black'; break;
92
+ case 31: this.fg = 'red'; break;
93
+ case 32: this.fg = 'green'; break;
94
+ case 33: this.fg = 'yellow'; break;
95
+ case 34: this.fg = 'blue'; break;
96
+ case 35: this.fg = 'magenta'; break;
97
+ case 36: this.fg = 'cyan'; break;
98
+ case 37: this.fg = 'white'; break;
99
+ case 90: this.fg = 'bright-black'; break;
100
+ case 91: this.fg = 'bright-red'; break;
101
+ case 92: this.fg = 'bright-green'; break;
102
+ case 93: this.fg = 'bright-yellow'; break;
103
+ case 94: this.fg = 'bright-blue'; break;
104
+ case 95: this.fg = 'bright-magenta'; break;
105
+ case 96: this.fg = 'bright-cyan'; break;
106
+ case 97: this.fg = 'bright-white'; break;
107
+ }
108
+ } else if (match[2]) {
109
+ // Regular text
110
+ const style = this.getStyle();
111
+ result.push(style
112
+ ? `<span class="${style}">${match[2]}</span>`
113
+ : match[2]);
114
+ }
115
+ }
116
+ return result.join('');
117
+ }
118
+ }
119
+
120
+ const parser = new AnsiParser();
121
+ const decoder = new TextDecoder();
122
+
123
+ function displayError(message) {
124
+ const errorText = `\n\x1b[1;31mError: ${message}\x1b[0m\n`; // Red, bold error message
125
+ output.innerHTML += parser.parse(errorText);
126
+ window.scrollTo(0, document.body.scrollHeight);
127
+ }
128
+
129
+ fetch('{stream_url}')
130
+ .then(response => {
131
+ if (!response.ok) {
132
+ // For HTTP errors, read the error message from response
133
+ return response.text().then(text => {
134
+ try {
135
+ // Try to parse as JSON (FastAPI error format)
136
+ const error = JSON.parse(text);
137
+ throw new Error(error.detail || error.message || text);
138
+ } catch (e) {
139
+ // If not JSON, use the raw text
140
+ throw new Error(text);
141
+ }
142
+ });
143
+ }
144
+ const reader = response.body.getReader();
145
+
146
+ function readStream() {
147
+ reader.read().then(({ done, value }) => {
148
+ if (done) {
149
+ return;
150
+ }
151
+ try {
152
+ const text = decoder.decode(value, { stream: true });
153
+ output.innerHTML += parser.parse(text);
154
+ window.scrollTo(0, document.body.scrollHeight);
155
+ readStream();
156
+ } catch (err) {
157
+ displayError(`Failed to process stream: ${err.message}`);
158
+ throw err;
159
+ }
160
+ }).catch(err => {
161
+ displayError(`Failed to read stream: ${err.message}`);
162
+ console.error('Stream read error:', err);
163
+ });
164
+ }
165
+
166
+ readStream();
167
+ })
168
+ .catch(err => {
169
+ displayError(err.message);
170
+ console.error('Fetch error:', err);
171
+ });
172
+ </script>
173
+ </body>
174
+ </html>
File without changes