skypilot-nightly 1.0.0.dev20250520__py3-none-any.whl → 1.0.0.dev20250522__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +4 -1
- sky/backends/cloud_vm_ray_backend.py +56 -37
- sky/check.py +3 -3
- sky/cli.py +89 -16
- sky/client/cli.py +89 -16
- sky/client/sdk.py +92 -4
- sky/clouds/__init__.py +2 -0
- sky/clouds/cloud.py +6 -0
- sky/clouds/gcp.py +156 -21
- sky/clouds/service_catalog/__init__.py +3 -0
- sky/clouds/service_catalog/common.py +9 -2
- sky/clouds/service_catalog/constants.py +1 -0
- sky/core.py +6 -8
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/CzOVV6JpRQBRt5GhZuhyK/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +6 -0
- sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +6 -0
- sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +1 -0
- sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +1 -0
- sky/dashboard/out/_next/static/chunks/{678-206dddca808e6d16.js → 582-683f4f27b81996dc.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +1 -0
- sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/data/storage.py +1 -0
- sky/execution.py +57 -8
- sky/jobs/server/core.py +5 -3
- sky/jobs/utils.py +38 -7
- sky/optimizer.py +41 -39
- sky/provision/gcp/constants.py +147 -4
- sky/provision/gcp/instance_utils.py +10 -0
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/provisioner.py +16 -7
- sky/resources.py +233 -18
- sky/serve/serve_utils.py +5 -13
- sky/serve/server/core.py +2 -4
- sky/server/common.py +60 -14
- sky/server/constants.py +2 -0
- sky/server/html/token_page.html +154 -0
- sky/server/requests/executor.py +3 -6
- sky/server/requests/payloads.py +3 -3
- sky/server/server.py +40 -8
- sky/skypilot_config.py +117 -31
- sky/task.py +24 -1
- sky/templates/gcp-ray.yml.j2 +44 -1
- sky/templates/nebius-ray.yml.j2 +0 -2
- sky/utils/admin_policy_utils.py +26 -22
- sky/utils/cli_utils/status_utils.py +95 -56
- sky/utils/common_utils.py +35 -2
- sky/utils/context.py +36 -6
- sky/utils/context_utils.py +15 -0
- sky/utils/infra_utils.py +175 -0
- sky/utils/resources_utils.py +55 -21
- sky/utils/schemas.py +111 -5
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/RECORD +73 -68
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/WHEEL +1 -1
- sky/dashboard/out/_next/static/8hlc2dkbIDDBOkxtEW7X6/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/845-0ca6f2c1ba667c3b.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- /sky/dashboard/out/_next/static/{8hlc2dkbIDDBOkxtEW7X6 → CzOVV6JpRQBRt5GhZuhyK}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/top_level.txt +0 -0
sky/server/common.py
CHANGED
@@ -12,7 +12,7 @@ import subprocess
|
|
12
12
|
import sys
|
13
13
|
import time
|
14
14
|
import typing
|
15
|
-
from typing import Any, Dict, Optional
|
15
|
+
from typing import Any, Dict, Literal, Optional
|
16
16
|
from urllib import parse
|
17
17
|
import uuid
|
18
18
|
|
@@ -116,6 +116,7 @@ class ApiServerStatus(enum.Enum):
|
|
116
116
|
HEALTHY = 'healthy'
|
117
117
|
UNHEALTHY = 'unhealthy'
|
118
118
|
VERSION_MISMATCH = 'version_mismatch'
|
119
|
+
NEEDS_AUTH = 'needs_auth'
|
119
120
|
|
120
121
|
|
121
122
|
@dataclasses.dataclass
|
@@ -127,15 +128,21 @@ class ApiServerInfo:
|
|
127
128
|
commit: Optional[str] = None
|
128
129
|
|
129
130
|
|
131
|
+
def get_api_cookie_jar_path() -> str:
|
132
|
+
return os.environ.get(server_constants.API_COOKIE_FILE_ENV_VAR,
|
133
|
+
server_constants.API_COOKIE_FILE_DEFAULT_LOCATION)
|
134
|
+
|
135
|
+
|
130
136
|
def get_api_cookie_jar() -> requests.cookies.RequestsCookieJar:
|
131
137
|
"""Returns the cookie jar used by the client to access the API server."""
|
132
|
-
cookie_file = os.environ.get(server_constants.API_COOKIE_FILE_ENV_VAR)
|
133
138
|
cookie_jar = requests.cookies.RequestsCookieJar()
|
134
|
-
|
139
|
+
cookie_file = get_api_cookie_jar_path()
|
140
|
+
if cookie_file:
|
135
141
|
cookie_path = pathlib.Path(cookie_file).expanduser().resolve()
|
136
|
-
|
137
|
-
|
138
|
-
|
142
|
+
if cookie_path.exists():
|
143
|
+
file_cookie_jar = MozillaCookieJar(cookie_path)
|
144
|
+
file_cookie_jar.load()
|
145
|
+
cookie_jar.update(file_cookie_jar)
|
139
146
|
return cookie_jar
|
140
147
|
|
141
148
|
|
@@ -196,6 +203,7 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
|
|
196
203
|
response = requests.get(f'{server_url}/api/health',
|
197
204
|
timeout=2.5,
|
198
205
|
cookies=get_api_cookie_jar())
|
206
|
+
logger.debug(f'Health check status: {response.status_code}')
|
199
207
|
if response.status_code == 200:
|
200
208
|
try:
|
201
209
|
result = response.json()
|
@@ -217,9 +225,24 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
|
|
217
225
|
server_info.status = ApiServerStatus.VERSION_MISMATCH
|
218
226
|
return server_info
|
219
227
|
except (json.JSONDecodeError, AttributeError) as e:
|
228
|
+
# Try to check if we got redirected to a login page.
|
229
|
+
for prev_response in response.history:
|
230
|
+
logger.debug(f'Previous response: {prev_response.url}')
|
231
|
+
# Heuristic: check if the url looks like a login page or
|
232
|
+
# oauth flow.
|
233
|
+
if any(key in prev_response.url
|
234
|
+
for key in ['login', 'oauth2']):
|
235
|
+
logger.debug(
|
236
|
+
f'URL {prev_response.url} looks like '
|
237
|
+
'a login page or oauth flow, so try to '
|
238
|
+
'get the cookie.')
|
239
|
+
return ApiServerInfo(
|
240
|
+
status=ApiServerStatus.NEEDS_AUTH)
|
220
241
|
logger.warning('Failed to parse API server response: '
|
221
242
|
f'{str(e)}')
|
222
243
|
return ApiServerInfo(status=ApiServerStatus.UNHEALTHY)
|
244
|
+
elif response.status_code == 401:
|
245
|
+
return ApiServerInfo(status=ApiServerStatus.NEEDS_AUTH)
|
223
246
|
else:
|
224
247
|
return ApiServerInfo(status=ApiServerStatus.UNHEALTHY)
|
225
248
|
except requests.exceptions.Timeout:
|
@@ -297,7 +320,6 @@ def _start_api_server(deploy: bool = False,
|
|
297
320
|
|
298
321
|
log_path = os.path.expanduser(constants.API_SERVER_LOGS)
|
299
322
|
os.makedirs(os.path.dirname(log_path), exist_ok=True)
|
300
|
-
cmd = f'{" ".join(args)} > {log_path} 2>&1 < /dev/null'
|
301
323
|
|
302
324
|
# Start the API server process in the background and don't wait for it.
|
303
325
|
# If this is called from a CLI invocation, we need
|
@@ -305,10 +327,20 @@ def _start_api_server(deploy: bool = False,
|
|
305
327
|
# the API server.
|
306
328
|
server_env = os.environ.copy()
|
307
329
|
server_env[constants.ENV_VAR_IS_SKYPILOT_SERVER] = 'true'
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
330
|
+
with open(log_path, 'w', encoding='utf-8') as log_file:
|
331
|
+
# Because the log file is opened using a with statement, it may seem
|
332
|
+
# that the file will be closed when the with statement is exited
|
333
|
+
# causing the child process to be unable to write to the log file.
|
334
|
+
# However, Popen makes the file descriptor inheritable which means
|
335
|
+
# the child process will inherit its own copy of the fd,
|
336
|
+
# independent of the parent's fd table which enables to child
|
337
|
+
# process to continue writing to the log file.
|
338
|
+
proc = subprocess.Popen(args,
|
339
|
+
stdout=log_file,
|
340
|
+
stderr=subprocess.STDOUT,
|
341
|
+
stdin=subprocess.DEVNULL,
|
342
|
+
start_new_session=True,
|
343
|
+
env=server_env)
|
312
344
|
|
313
345
|
start_time = time.time()
|
314
346
|
while True:
|
@@ -360,7 +392,12 @@ def _start_api_server(deploy: bool = False,
|
|
360
392
|
f'SkyPilot API server started. {dashboard_msg}'))
|
361
393
|
|
362
394
|
|
363
|
-
def check_server_healthy(
|
395
|
+
def check_server_healthy(
|
396
|
+
endpoint: Optional[str] = None
|
397
|
+
) -> Literal[
|
398
|
+
# Use an incomplete list of Literals here to enforce raising for other
|
399
|
+
# enum values.
|
400
|
+
ApiServerStatus.HEALTHY, ApiServerStatus.NEEDS_AUTH]:
|
364
401
|
"""Check if the API server is healthy.
|
365
402
|
|
366
403
|
Args:
|
@@ -370,6 +407,11 @@ def check_server_healthy(endpoint: Optional[str] = None,) -> None:
|
|
370
407
|
Raises:
|
371
408
|
RuntimeError: If the server is not healthy or the client version does
|
372
409
|
not match the server version.
|
410
|
+
|
411
|
+
Returns:
|
412
|
+
ApiServerStatus: The status of the API server, unless the server is
|
413
|
+
unhealthy or the client version does not match the server version,
|
414
|
+
in which case an exception is raised.
|
373
415
|
"""
|
374
416
|
endpoint = endpoint if endpoint is not None else get_server_url()
|
375
417
|
api_server_info = get_api_server_status(endpoint)
|
@@ -432,6 +474,8 @@ def check_server_healthy(endpoint: Optional[str] = None,) -> None:
|
|
432
474
|
|
433
475
|
hinted_for_server_install_version_mismatch = True
|
434
476
|
|
477
|
+
return api_server_status
|
478
|
+
|
435
479
|
|
436
480
|
def _get_version_info_hint(server_info: ApiServerInfo) -> str:
|
437
481
|
assert server_info.version is not None, 'Server version is None'
|
@@ -482,11 +526,13 @@ def get_skypilot_version_on_disk() -> str:
|
|
482
526
|
def check_server_healthy_or_start_fn(deploy: bool = False,
|
483
527
|
host: str = '127.0.0.1',
|
484
528
|
foreground: bool = False):
|
529
|
+
api_server_status = None
|
485
530
|
try:
|
486
|
-
check_server_healthy()
|
531
|
+
api_server_status = check_server_healthy()
|
487
532
|
except exceptions.ApiServerConnectionError as exc:
|
488
533
|
endpoint = get_server_url()
|
489
|
-
if not is_api_server_local()
|
534
|
+
if (not is_api_server_local() or
|
535
|
+
api_server_status == ApiServerStatus.NEEDS_AUTH):
|
490
536
|
with ux_utils.print_exception_no_traceback():
|
491
537
|
raise exceptions.ApiServerConnectionError(endpoint) from exc
|
492
538
|
# Lock to prevent multiple processes from starting the server at the
|
sky/server/constants.py
CHANGED
@@ -26,6 +26,8 @@ CLUSTER_REFRESH_DAEMON_INTERVAL_SECONDS = 60
|
|
26
26
|
|
27
27
|
# Environment variable for a file path to the API cookie file.
|
28
28
|
API_COOKIE_FILE_ENV_VAR = f'{constants.SKYPILOT_ENV_VAR_PREFIX}API_COOKIE_FILE'
|
29
|
+
# Default file if unset.
|
30
|
+
API_COOKIE_FILE_DEFAULT_LOCATION = '~/.sky/cookies.txt'
|
29
31
|
|
30
32
|
# The path to the dashboard build output
|
31
33
|
DASHBOARD_DIR = os.path.join(os.path.dirname(__file__), '..', 'dashboard',
|
@@ -0,0 +1,154 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
+
<title>SkyPilot API Server Login</title>
|
7
|
+
<style>
|
8
|
+
body {
|
9
|
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
|
10
|
+
display: flex;
|
11
|
+
flex-direction: column;
|
12
|
+
align-items: center;
|
13
|
+
justify-content: center;
|
14
|
+
min-height: 100vh;
|
15
|
+
margin: 0;
|
16
|
+
background-color: #f8f9fa;
|
17
|
+
color: #202124;
|
18
|
+
padding: 20px;
|
19
|
+
box-sizing: border-box;
|
20
|
+
}
|
21
|
+
.container {
|
22
|
+
background-color: #ffffff;
|
23
|
+
padding: 48px;
|
24
|
+
border-radius: 8px;
|
25
|
+
box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);
|
26
|
+
text-align: center;
|
27
|
+
max-width: 600px;
|
28
|
+
width: 100%;
|
29
|
+
}
|
30
|
+
.logo {
|
31
|
+
width: 64px;
|
32
|
+
height: 64px;
|
33
|
+
margin-bottom: 20px;
|
34
|
+
display: inline-block;
|
35
|
+
}
|
36
|
+
.logo svg {
|
37
|
+
width: 100%;
|
38
|
+
height: 100%;
|
39
|
+
}
|
40
|
+
h1 {
|
41
|
+
font-size: 24px;
|
42
|
+
font-weight: 500;
|
43
|
+
margin-bottom: 20px;
|
44
|
+
color: #202124;
|
45
|
+
}
|
46
|
+
p {
|
47
|
+
font-size: 14px;
|
48
|
+
line-height: 1.5;
|
49
|
+
margin-bottom: 20px;
|
50
|
+
color: #5f6368;
|
51
|
+
}
|
52
|
+
.code-block {
|
53
|
+
background-color: #f1f3f4;
|
54
|
+
border: 1px solid #dadce0;
|
55
|
+
border-radius: 4px;
|
56
|
+
padding: 16px;
|
57
|
+
margin-top: 24px;
|
58
|
+
margin-bottom: 24px;
|
59
|
+
margin-left: auto;
|
60
|
+
margin-right: auto;
|
61
|
+
text-align: left;
|
62
|
+
word-break: break-all;
|
63
|
+
white-space: pre-wrap;
|
64
|
+
font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace;
|
65
|
+
font-size: 13px;
|
66
|
+
line-height: 1.4;
|
67
|
+
max-width: 480px;
|
68
|
+
}
|
69
|
+
#token-box { /* Specifically for the token */
|
70
|
+
height: auto;
|
71
|
+
min-height: 6em; /* Ensure it's a reasonable size */
|
72
|
+
max-height: 15em; /* Prevent it from getting too large */
|
73
|
+
overflow-y: auto;
|
74
|
+
}
|
75
|
+
.copy-button {
|
76
|
+
background-color: #1a73e8;
|
77
|
+
color: white;
|
78
|
+
border: none;
|
79
|
+
border-radius: 4px;
|
80
|
+
padding: 10px 24px;
|
81
|
+
font-size: 14px;
|
82
|
+
font-weight: 500;
|
83
|
+
cursor: pointer;
|
84
|
+
transition: background-color 0.3s;
|
85
|
+
margin-top: 10px;
|
86
|
+
}
|
87
|
+
.copy-button:hover {
|
88
|
+
background-color: #287ae6;
|
89
|
+
}
|
90
|
+
.copy-button:active {
|
91
|
+
background-color: #1b66c9;
|
92
|
+
}
|
93
|
+
.footer-text {
|
94
|
+
font-size: 12px;
|
95
|
+
color: #5f6368;
|
96
|
+
margin-top: 30px;
|
97
|
+
}
|
98
|
+
</style>
|
99
|
+
</head>
|
100
|
+
<body>
|
101
|
+
<div class="container">
|
102
|
+
<div class="logo">
|
103
|
+
<!-- SkyPilot Logo Icon -->
|
104
|
+
<svg viewBox="0 0 50 50" fill="none" xmlns="http://www.w3.org/2000/svg">
|
105
|
+
<path d="M25.1258 30.8274L19.2842 31.6783L33.8316 46.2268L31.492 37.1925L25.1258 30.8274Z" fill="#372F8A"/>
|
106
|
+
<path d="M46.9433 0.000976562L0.719727 13.1148L15.2661 27.6601L16.633 21.3925L10.3728 15.1323L40.183 6.74118C40.183 6.74118 46.102 0.855027 46.9444 0.00203721L46.9433 0.000976562Z" fill="#372F8A"/>
|
107
|
+
<path d="M40.1821 6.74021L31.4922 37.1925L33.8318 46.2257L46.9445 0C46.1022 0.85299 40.1831 6.73915 40.1831 6.73915L40.1821 6.74021Z" fill="#372F8A"/>
|
108
|
+
<path d="M21.3356 25.6089L19.2842 31.6783L25.1258 30.8275L30.3741 16.6011L30.3275 16.617L21.3356 25.6089Z" fill="#195D7F"/>
|
109
|
+
<path d="M16.632 21.3918L15.2651 27.6605L21.3357 25.6091L30.3276 16.6172L16.632 21.3918Z" fill="#39A4DD"/>
|
110
|
+
</svg>
|
111
|
+
</div>
|
112
|
+
<h1>Sign in to SkyPilot CLI</h1>
|
113
|
+
<p>You are seeing this page because a SkyPilot command requires authentication.</p>
|
114
|
+
|
115
|
+
<p>Please copy the following token and paste it into your SkyPilot CLI prompt:</p>
|
116
|
+
<div id="token-box" class="code-block">SKYPILOT_API_SERVER_USER_TOKEN_PLACEHOLDER</div>
|
117
|
+
<button id="copy-btn" class="copy-button">Copy Token</button>
|
118
|
+
|
119
|
+
<p class="footer-text">You can close this tab after copying the token.</p>
|
120
|
+
</div>
|
121
|
+
|
122
|
+
<script>
|
123
|
+
const tokenBox = document.getElementById('token-box');
|
124
|
+
const copyBtn = document.getElementById('copy-btn');
|
125
|
+
|
126
|
+
function selectToken() {
|
127
|
+
// For <pre> or <div>, create a range to select its content
|
128
|
+
const range = document.createRange();
|
129
|
+
range.selectNodeContents(tokenBox);
|
130
|
+
const sel = window.getSelection();
|
131
|
+
sel.removeAllRanges();
|
132
|
+
sel.addRange(range);
|
133
|
+
}
|
134
|
+
|
135
|
+
// Optional: Select the token when the page loads or when token box is clicked
|
136
|
+
tokenBox.addEventListener('click', selectToken);
|
137
|
+
window.addEventListener('load', selectToken);
|
138
|
+
|
139
|
+
copyBtn.addEventListener('click', () => {
|
140
|
+
selectToken(); // Select the text
|
141
|
+
try {
|
142
|
+
document.execCommand('copy');
|
143
|
+
copyBtn.textContent = 'Copied!';
|
144
|
+
} catch (err) {
|
145
|
+
copyBtn.textContent = 'Error!';
|
146
|
+
console.error('Failed to copy text: ', err);
|
147
|
+
}
|
148
|
+
setTimeout(() => {
|
149
|
+
copyBtn.textContent = 'Copy Token';
|
150
|
+
}, 2000);
|
151
|
+
});
|
152
|
+
</script>
|
153
|
+
</body>
|
154
|
+
</html>
|
sky/server/requests/executor.py
CHANGED
@@ -20,8 +20,6 @@ See the [README.md](../README.md) for detailed architecture of the executor.
|
|
20
20
|
"""
|
21
21
|
import asyncio
|
22
22
|
import contextlib
|
23
|
-
import contextvars
|
24
|
-
import functools
|
25
23
|
import multiprocessing
|
26
24
|
import os
|
27
25
|
import queue as queue_lib
|
@@ -52,6 +50,7 @@ from sky.skylet import constants
|
|
52
50
|
from sky.utils import annotations
|
53
51
|
from sky.utils import common_utils
|
54
52
|
from sky.utils import context
|
53
|
+
from sky.utils import context_utils
|
55
54
|
from sky.utils import subprocess_utils
|
56
55
|
from sky.utils import timeline
|
57
56
|
|
@@ -368,10 +367,8 @@ async def execute_request_coroutine(request: api_requests.Request):
|
|
368
367
|
# 1. skypilot config is not contextual
|
369
368
|
# 2. envs that read directly from os.environ are not contextual
|
370
369
|
ctx.override_envs(request_body.env_vars)
|
371
|
-
|
372
|
-
|
373
|
-
func_call = functools.partial(pyctx.run, func, **request_body.to_kwargs())
|
374
|
-
fut: asyncio.Future = loop.run_in_executor(None, func_call)
|
370
|
+
fut: asyncio.Future = context_utils.to_thread(func,
|
371
|
+
**request_body.to_kwargs())
|
375
372
|
|
376
373
|
async def poll_task(request_id: str) -> bool:
|
377
374
|
request = api_requests.get_request(request_id)
|
sky/server/requests/payloads.py
CHANGED
@@ -443,9 +443,9 @@ class ServeStatusBody(RequestBody):
|
|
443
443
|
|
444
444
|
class RealtimeGpuAvailabilityRequestBody(RequestBody):
|
445
445
|
"""The request body for the realtime GPU availability endpoint."""
|
446
|
-
context: Optional[str]
|
447
|
-
name_filter: Optional[str]
|
448
|
-
quantity_filter: Optional[int]
|
446
|
+
context: Optional[str] = None
|
447
|
+
name_filter: Optional[str] = None
|
448
|
+
quantity_filter: Optional[int] = None
|
449
449
|
|
450
450
|
|
451
451
|
class KubernetesNodeInfoRequestBody(RequestBody):
|
sky/server/server.py
CHANGED
@@ -2,9 +2,11 @@
|
|
2
2
|
|
3
3
|
import argparse
|
4
4
|
import asyncio
|
5
|
+
import base64
|
5
6
|
import contextlib
|
6
7
|
import dataclasses
|
7
8
|
import datetime
|
9
|
+
import json
|
8
10
|
import logging
|
9
11
|
import multiprocessing
|
10
12
|
import os
|
@@ -49,6 +51,7 @@ from sky.utils import admin_policy_utils
|
|
49
51
|
from sky.utils import common as common_lib
|
50
52
|
from sky.utils import common_utils
|
51
53
|
from sky.utils import context
|
54
|
+
from sky.utils import context_utils
|
52
55
|
from sky.utils import dag_utils
|
53
56
|
from sky.utils import env_options
|
54
57
|
from sky.utils import status_lib
|
@@ -218,6 +221,34 @@ app.include_router(jobs_rest.router, prefix='/jobs', tags=['jobs'])
|
|
218
221
|
app.include_router(serve_rest.router, prefix='/serve', tags=['serve'])
|
219
222
|
|
220
223
|
|
224
|
+
@app.get('/token')
|
225
|
+
async def token(request: fastapi.Request) -> fastapi.responses.HTMLResponse:
|
226
|
+
# Use base64 encoding to avoid having to escape anything in the HTML.
|
227
|
+
json_bytes = json.dumps(request.cookies).encode('utf-8')
|
228
|
+
base64_str = base64.b64encode(json_bytes).decode('utf-8')
|
229
|
+
|
230
|
+
html_dir = pathlib.Path(__file__).parent / 'html'
|
231
|
+
token_page_path = html_dir / 'token_page.html'
|
232
|
+
try:
|
233
|
+
with open(token_page_path, 'r', encoding='utf-8') as f:
|
234
|
+
html_content = f.read()
|
235
|
+
except FileNotFoundError as e:
|
236
|
+
raise fastapi.HTTPException(
|
237
|
+
status_code=500, detail='Token page template not found.') from e
|
238
|
+
|
239
|
+
html_content = html_content.replace(
|
240
|
+
'SKYPILOT_API_SERVER_USER_TOKEN_PLACEHOLDER', base64_str)
|
241
|
+
|
242
|
+
return fastapi.responses.HTMLResponse(
|
243
|
+
content=html_content,
|
244
|
+
headers={
|
245
|
+
'Cache-Control': 'no-cache, no-transform',
|
246
|
+
# X-Accel-Buffering: no is useful for preventing buffering issues
|
247
|
+
# with some reverse proxies.
|
248
|
+
'X-Accel-Buffering': 'no'
|
249
|
+
})
|
250
|
+
|
251
|
+
|
221
252
|
@app.post('/check')
|
222
253
|
async def check(request: fastapi.Request,
|
223
254
|
check_body: payloads.CheckBody) -> None:
|
@@ -327,25 +358,26 @@ async def validate(validate_body: payloads.ValidateBody) -> None:
|
|
327
358
|
# pairs.
|
328
359
|
logger.debug(f'Validating tasks: {validate_body.dag}')
|
329
360
|
|
361
|
+
context.initialize()
|
362
|
+
|
330
363
|
def validate_dag(dag: dag_utils.dag_lib.Dag):
|
331
364
|
# TODO: Admin policy may contain arbitrary code, which may be expensive
|
332
365
|
# to run and may block the server thread. However, moving it into the
|
333
366
|
# executor adds a ~150ms penalty on the local API server because of
|
334
367
|
# added RTTs. For now, we stick to doing the validation inline in the
|
335
368
|
# server thread.
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
369
|
+
with admin_policy_utils.apply_and_use_config_in_current_request(
|
370
|
+
dag, request_options=validate_body.request_options) as dag:
|
371
|
+
# Skip validating workdir and file_mounts, as those need to be
|
372
|
+
# validated after the files are uploaded to the SkyPilot API server
|
373
|
+
# with `upload_mounts_to_api_server`.
|
374
|
+
dag.validate(skip_file_mounts=True, skip_workdir=True)
|
342
375
|
|
343
376
|
try:
|
344
377
|
dag = dag_utils.load_chain_dag_from_yaml_str(validate_body.dag)
|
345
|
-
loop = asyncio.get_running_loop()
|
346
378
|
# Apply admin policy and validate DAG is blocking, run it in a separate
|
347
379
|
# thread executor to avoid blocking the uvicorn event loop.
|
348
|
-
await
|
380
|
+
await context_utils.to_thread(validate_dag, dag)
|
349
381
|
except Exception as e: # pylint: disable=broad-except
|
350
382
|
raise fastapi.HTTPException(
|
351
383
|
status_code=400, detail=exceptions.serialize_exception(e)) from e
|