skypilot-nightly 1.0.0.dev20250523__py3-none-any.whl → 1.0.0.dev20250526__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +62 -45
- sky/backends/cloud_vm_ray_backend.py +3 -1
- sky/check.py +335 -170
- sky/cli.py +56 -13
- sky/client/cli.py +56 -13
- sky/client/sdk.py +54 -10
- sky/clouds/gcp.py +19 -3
- sky/core.py +5 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/7GEgRyZKRaSnYZCV1Jwol/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/25-062253ea41fb8eec.js +6 -0
- sky/dashboard/out/_next/static/chunks/480-5a0de8b6570ea105.js +1 -0
- sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
- sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
- sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
- sky/dashboard/out/_next/static/chunks/578-d351125af46c293f.js +6 -0
- sky/dashboard/out/_next/static/chunks/734-a6e01d7f98904741.js +1 -0
- sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
- sky/dashboard/out/_next/static/chunks/938-59956af3950b02ed.js +1 -0
- sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-96a715a6fb01e228.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-3b5aad09a25f64b7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-abb7d744ecf15109.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-48dc8d67d4b60be1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/users-b8acf6e6735323a2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-bbf436f41381e169.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7733c960685b4385.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-5ed48b3201b998c8.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
- sky/dashboard/out/_next/static/css/28558d57108b05ae.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/storage.py +1 -1
- sky/global_user_state.py +606 -543
- sky/jobs/constants.py +1 -1
- sky/jobs/server/core.py +72 -56
- sky/jobs/state.py +26 -5
- sky/jobs/utils.py +65 -13
- sky/optimizer.py +6 -3
- sky/provision/fluidstack/instance.py +1 -0
- sky/serve/server/core.py +9 -6
- sky/server/html/token_page.html +6 -1
- sky/server/requests/executor.py +1 -0
- sky/server/requests/payloads.py +28 -0
- sky/server/server.py +59 -5
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +4 -1
- sky/skypilot_config.py +107 -11
- sky/utils/cli_utils/status_utils.py +18 -8
- sky/utils/db_utils.py +53 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +166 -147
- sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
- sky/utils/kubernetes/ssh-tunnel.sh +20 -28
- sky/utils/log_utils.py +4 -0
- sky/utils/schemas.py +54 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +295 -0
- sky/workspaces/server.py +62 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/RECORD +79 -63
- sky/dashboard/out/_next/static/ECKwDNS9v9y3_IKFZ2lpp/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
- sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
- sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
- sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
- sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
- /sky/dashboard/out/_next/static/{ECKwDNS9v9y3_IKFZ2lpp → 7GEgRyZKRaSnYZCV1Jwol}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/top_level.txt +0 -0
sky/server/server.py
CHANGED
@@ -6,6 +6,7 @@ import base64
|
|
6
6
|
import contextlib
|
7
7
|
import dataclasses
|
8
8
|
import datetime
|
9
|
+
import hashlib
|
9
10
|
import json
|
10
11
|
import logging
|
11
12
|
import multiprocessing
|
@@ -31,6 +32,7 @@ from sky import core
|
|
31
32
|
from sky import exceptions
|
32
33
|
from sky import execution
|
33
34
|
from sky import global_user_state
|
35
|
+
from sky import models
|
34
36
|
from sky import sky_logging
|
35
37
|
from sky.clouds import service_catalog
|
36
38
|
from sky.data import storage_utils
|
@@ -56,6 +58,7 @@ from sky.utils import dag_utils
|
|
56
58
|
from sky.utils import env_options
|
57
59
|
from sky.utils import status_lib
|
58
60
|
from sky.utils import subprocess_utils
|
61
|
+
from sky.workspaces import server as workspaces_rest
|
59
62
|
|
60
63
|
# pylint: disable=ungrouped-imports
|
61
64
|
if sys.version_info >= (3, 10):
|
@@ -110,6 +113,38 @@ class RequestIDMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
|
|
110
113
|
return response
|
111
114
|
|
112
115
|
|
116
|
+
def _get_auth_user_header(request: fastapi.Request) -> Optional[models.User]:
|
117
|
+
if 'X-Auth-Request-Email' not in request.headers:
|
118
|
+
return None
|
119
|
+
user_name = request.headers['X-Auth-Request-Email']
|
120
|
+
user_hash = hashlib.md5(
|
121
|
+
user_name.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
|
122
|
+
return models.User(id=user_hash, name=user_name)
|
123
|
+
|
124
|
+
|
125
|
+
class AuthProxyMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
|
126
|
+
"""Middleware to handle auth proxy."""
|
127
|
+
|
128
|
+
async def dispatch(self, request: fastapi.Request, call_next):
|
129
|
+
auth_user = _get_auth_user_header(request)
|
130
|
+
body = await request.body()
|
131
|
+
if auth_user and body:
|
132
|
+
try:
|
133
|
+
original_json = await request.json()
|
134
|
+
except json.JSONDecodeError as e:
|
135
|
+
logger.error(f'Error parsing request JSON: {e}')
|
136
|
+
else:
|
137
|
+
logger.debug(f'Overriding user for {request.state.request_id}: '
|
138
|
+
f'{auth_user.name}, {auth_user.id}')
|
139
|
+
if 'env_vars' in original_json:
|
140
|
+
original_json['env_vars'][
|
141
|
+
constants.USER_ID_ENV_VAR] = auth_user.id
|
142
|
+
original_json['env_vars'][
|
143
|
+
constants.USER_ENV_VAR] = auth_user.name
|
144
|
+
request._body = json.dumps(original_json).encode('utf-8') # pylint: disable=protected-access
|
145
|
+
return await call_next(request)
|
146
|
+
|
147
|
+
|
113
148
|
# Default expiration time for upload ids before cleanup.
|
114
149
|
_DEFAULT_UPLOAD_EXPIRATION_TIME = datetime.timedelta(hours=1)
|
115
150
|
# Key: (upload_id, user_hash), Value: the time when the upload id needs to be
|
@@ -216,15 +251,29 @@ app.add_middleware(
|
|
216
251
|
allow_headers=['*'],
|
217
252
|
# TODO(syang): remove X-Request-ID when v0.10.0 is released.
|
218
253
|
expose_headers=['X-Request-ID', 'X-Skypilot-Request-ID'])
|
254
|
+
app.add_middleware(AuthProxyMiddleware)
|
219
255
|
app.add_middleware(RequestIDMiddleware)
|
220
256
|
app.include_router(jobs_rest.router, prefix='/jobs', tags=['jobs'])
|
221
257
|
app.include_router(serve_rest.router, prefix='/serve', tags=['serve'])
|
258
|
+
app.include_router(workspaces_rest.router,
|
259
|
+
prefix='/workspaces',
|
260
|
+
tags=['workspaces'])
|
222
261
|
|
223
262
|
|
224
263
|
@app.get('/token')
|
225
264
|
async def token(request: fastapi.Request) -> fastapi.responses.HTMLResponse:
|
265
|
+
# If we have auth info, save this user to the database.
|
266
|
+
user = _get_auth_user_header(request)
|
267
|
+
if user is not None:
|
268
|
+
global_user_state.add_or_update_user(user)
|
269
|
+
|
270
|
+
token_data = {
|
271
|
+
'v': 1, # Token version number, bump for backwards incompatible.
|
272
|
+
'user': user.id if user is not None else None,
|
273
|
+
'cookies': request.cookies,
|
274
|
+
}
|
226
275
|
# Use base64 encoding to avoid having to escape anything in the HTML.
|
227
|
-
json_bytes = json.dumps(
|
276
|
+
json_bytes = json.dumps(token_data).encode('utf-8')
|
228
277
|
base64_str = base64.b64encode(json_bytes).decode('utf-8')
|
229
278
|
|
230
279
|
html_dir = pathlib.Path(__file__).parent / 'html'
|
@@ -236,8 +285,10 @@ async def token(request: fastapi.Request) -> fastapi.responses.HTMLResponse:
|
|
236
285
|
raise fastapi.HTTPException(
|
237
286
|
status_code=500, detail='Token page template not found.') from e
|
238
287
|
|
288
|
+
user_info_string = f'Logged in as {user.name}' if user is not None else ''
|
239
289
|
html_content = html_content.replace(
|
240
|
-
'SKYPILOT_API_SERVER_USER_TOKEN_PLACEHOLDER',
|
290
|
+
'SKYPILOT_API_SERVER_USER_TOKEN_PLACEHOLDER',
|
291
|
+
base64_str).replace('USER_PLACEHOLDER', user_info_string)
|
241
292
|
|
242
293
|
return fastapi.responses.HTMLResponse(
|
243
294
|
content=html_content,
|
@@ -263,12 +314,13 @@ async def check(request: fastapi.Request,
|
|
263
314
|
|
264
315
|
|
265
316
|
@app.get('/enabled_clouds')
|
266
|
-
async def enabled_clouds(request: fastapi.Request
|
317
|
+
async def enabled_clouds(request: fastapi.Request,
|
318
|
+
workspace: Optional[str] = None) -> None:
|
267
319
|
"""Gets enabled clouds on the server."""
|
268
320
|
executor.schedule_request(
|
269
321
|
request_id=request.state.request_id,
|
270
322
|
request_name='enabled_clouds',
|
271
|
-
request_body=payloads.
|
323
|
+
request_body=payloads.EnabledCloudsBody(workspace=workspace),
|
272
324
|
func=core.enabled_clouds,
|
273
325
|
schedule_type=requests_lib.ScheduleType.SHORT,
|
274
326
|
)
|
@@ -1113,7 +1165,7 @@ async def api_status(
|
|
1113
1165
|
|
1114
1166
|
|
1115
1167
|
@app.get('/api/health')
|
1116
|
-
async def health() -> Dict[str,
|
1168
|
+
async def health(request: fastapi.Request) -> Dict[str, Any]:
|
1117
1169
|
"""Checks the health of the API server.
|
1118
1170
|
|
1119
1171
|
Returns:
|
@@ -1125,12 +1177,14 @@ async def health() -> Dict[str, str]:
|
|
1125
1177
|
disk, which can be used to warn about restarting the API server
|
1126
1178
|
- commit: str; The commit hash of SkyPilot used for API server.
|
1127
1179
|
"""
|
1180
|
+
user = _get_auth_user_header(request)
|
1128
1181
|
return {
|
1129
1182
|
'status': common.ApiServerStatus.HEALTHY.value,
|
1130
1183
|
'api_version': server_constants.API_VERSION,
|
1131
1184
|
'version': sky.__version__,
|
1132
1185
|
'version_on_disk': common.get_skypilot_version_on_disk(),
|
1133
1186
|
'commit': sky.__commit__,
|
1187
|
+
'user': user.to_dict() if user is not None else None,
|
1134
1188
|
}
|
1135
1189
|
|
1136
1190
|
|
sky/setup_files/dependencies.py
CHANGED
sky/skylet/constants.py
CHANGED
@@ -378,7 +378,8 @@ OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
|
|
378
378
|
# we skip the following keys because they are meant to be client-side configs.
|
379
379
|
SKIPPED_CLIENT_OVERRIDE_KEYS: List[Tuple[str, ...]] = [('admin_policy',),
|
380
380
|
('api_server',),
|
381
|
-
('allowed_clouds',)
|
381
|
+
('allowed_clouds',),
|
382
|
+
('workspaces',)]
|
382
383
|
|
383
384
|
# Constants for Azure blob storage
|
384
385
|
WAIT_FOR_STORAGE_ACCOUNT_CREATION = 60
|
@@ -405,3 +406,5 @@ SKY_USER_FILE_PATH = '~/.sky/generated'
|
|
405
406
|
|
406
407
|
# Environment variable that is set to 'true' if this is a skypilot server.
|
407
408
|
ENV_VAR_IS_SKYPILOT_SERVER = 'IS_SKYPILOT_SERVER'
|
409
|
+
|
410
|
+
SKYPILOT_DEFAULT_WORKSPACE = 'default'
|
sky/skypilot_config.py
CHANGED
@@ -57,6 +57,8 @@ import threading
|
|
57
57
|
import typing
|
58
58
|
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
59
59
|
|
60
|
+
import filelock
|
61
|
+
|
60
62
|
from sky import exceptions
|
61
63
|
from sky import sky_logging
|
62
64
|
from sky.adaptors import common as adaptors_common
|
@@ -66,6 +68,7 @@ from sky.utils import config_utils
|
|
66
68
|
from sky.utils import context
|
67
69
|
from sky.utils import schemas
|
68
70
|
from sky.utils import ux_utils
|
71
|
+
from sky.utils.kubernetes import config_map_utils
|
69
72
|
|
70
73
|
if typing.TYPE_CHECKING:
|
71
74
|
import yaml
|
@@ -120,8 +123,17 @@ class ConfigContext:
|
|
120
123
|
|
121
124
|
|
122
125
|
# The global loaded config.
|
126
|
+
_active_workspace_context = threading.local()
|
123
127
|
_global_config_context = ConfigContext()
|
124
|
-
|
128
|
+
|
129
|
+
SKYPILOT_CONFIG_LOCK_PATH = '~/.sky/locks/.skypilot_config.lock'
|
130
|
+
|
131
|
+
|
132
|
+
def get_skypilot_config_lock_path() -> str:
|
133
|
+
"""Get the path for the SkyPilot config lock file."""
|
134
|
+
lock_path = os.path.expanduser(SKYPILOT_CONFIG_LOCK_PATH)
|
135
|
+
os.makedirs(os.path.dirname(lock_path), exist_ok=True)
|
136
|
+
return lock_path
|
125
137
|
|
126
138
|
|
127
139
|
def _get_config_context() -> ConfigContext:
|
@@ -194,8 +206,7 @@ def get_user_config() -> config_utils.Config:
|
|
194
206
|
|
195
207
|
# load the user config file
|
196
208
|
if os.path.exists(user_config_path):
|
197
|
-
user_config =
|
198
|
-
_validate_config(user_config, user_config_path)
|
209
|
+
user_config = parse_and_validate_config_file(user_config_path)
|
199
210
|
else:
|
200
211
|
user_config = config_utils.Config()
|
201
212
|
return user_config
|
@@ -223,8 +234,7 @@ def _get_project_config() -> config_utils.Config:
|
|
223
234
|
|
224
235
|
# load the project config file
|
225
236
|
if os.path.exists(project_config_path):
|
226
|
-
project_config =
|
227
|
-
_validate_config(project_config, project_config_path)
|
237
|
+
project_config = parse_and_validate_config_file(project_config_path)
|
228
238
|
else:
|
229
239
|
project_config = config_utils.Config()
|
230
240
|
return project_config
|
@@ -252,8 +262,7 @@ def get_server_config() -> config_utils.Config:
|
|
252
262
|
|
253
263
|
# load the server config file
|
254
264
|
if os.path.exists(server_config_path):
|
255
|
-
server_config =
|
256
|
-
_validate_config(server_config, server_config_path)
|
265
|
+
server_config = parse_and_validate_config_file(server_config_path)
|
257
266
|
else:
|
258
267
|
server_config = config_utils.Config()
|
259
268
|
return server_config
|
@@ -287,6 +296,60 @@ def get_nested(keys: Tuple[str, ...],
|
|
287
296
|
disallowed_override_keys=None)
|
288
297
|
|
289
298
|
|
299
|
+
def get_workspace_cloud(cloud: str,
|
300
|
+
workspace: Optional[str] = None) -> config_utils.Config:
|
301
|
+
"""Returns the workspace config."""
|
302
|
+
if workspace is None:
|
303
|
+
workspace = get_active_workspace()
|
304
|
+
clouds = get_nested(keys=(
|
305
|
+
'workspaces',
|
306
|
+
workspace,
|
307
|
+
), default_value=None)
|
308
|
+
if clouds is None:
|
309
|
+
return config_utils.Config()
|
310
|
+
return clouds.get(cloud.lower(), config_utils.Config())
|
311
|
+
|
312
|
+
|
313
|
+
@contextlib.contextmanager
|
314
|
+
def local_active_workspace_ctx(workspace: str) -> Iterator[None]:
|
315
|
+
"""Temporarily set the active workspace IN CURRENT THREAD.
|
316
|
+
|
317
|
+
Note: having this function thread-local is error-prone, as wrapping some
|
318
|
+
operations with this will not have the underlying threads to get the
|
319
|
+
correct active workspace. However, we cannot make it global either, as
|
320
|
+
backend_utils.refresh_cluster_status() will be called in multiple threads,
|
321
|
+
and they may have different active workspaces for different threads.
|
322
|
+
|
323
|
+
# TODO(zhwu): make this function global by default and able to be set
|
324
|
+
# it to thread-local with an argument.
|
325
|
+
|
326
|
+
Args:
|
327
|
+
workspace: The workspace to set as active.
|
328
|
+
|
329
|
+
Raises:
|
330
|
+
RuntimeError: If called from a non-main thread.
|
331
|
+
"""
|
332
|
+
original_workspace = get_active_workspace()
|
333
|
+
if original_workspace == workspace:
|
334
|
+
# No change, do nothing.
|
335
|
+
yield
|
336
|
+
return
|
337
|
+
_active_workspace_context.workspace = workspace
|
338
|
+
logger.debug(f'Set context workspace: {workspace}')
|
339
|
+
yield
|
340
|
+
logger.debug(f'Reset context workspace: {original_workspace}')
|
341
|
+
_active_workspace_context.workspace = original_workspace
|
342
|
+
|
343
|
+
|
344
|
+
def get_active_workspace(force_user_workspace: bool = False) -> str:
|
345
|
+
context_workspace = getattr(_active_workspace_context, 'workspace', None)
|
346
|
+
if not force_user_workspace and context_workspace is not None:
|
347
|
+
logger.debug(f'Get context workspace: {context_workspace}')
|
348
|
+
return context_workspace
|
349
|
+
return get_nested(keys=('active_workspace',),
|
350
|
+
default_value=constants.SKYPILOT_DEFAULT_WORKSPACE)
|
351
|
+
|
352
|
+
|
290
353
|
def set_nested(keys: Tuple[str, ...], value: Any) -> Dict[str, Any]:
|
291
354
|
"""Returns a deep-copied config with the nested key set to value.
|
292
355
|
|
@@ -336,7 +399,7 @@ def overlay_skypilot_config(
|
|
336
399
|
|
337
400
|
def safe_reload_config() -> None:
|
338
401
|
"""Reloads the config, safe to be called concurrently."""
|
339
|
-
with
|
402
|
+
with filelock.FileLock(get_skypilot_config_lock_path()):
|
340
403
|
_reload_config()
|
341
404
|
|
342
405
|
|
@@ -357,7 +420,7 @@ def _reload_config() -> None:
|
|
357
420
|
_reload_config_as_client()
|
358
421
|
|
359
422
|
|
360
|
-
def
|
423
|
+
def parse_and_validate_config_file(config_path: str) -> config_utils.Config:
|
361
424
|
config = config_utils.Config()
|
362
425
|
try:
|
363
426
|
config_dict = common_utils.read_yaml(config_path)
|
@@ -413,7 +476,7 @@ def _reload_config_from_internal_file(internal_config_path: str) -> None:
|
|
413
476
|
'exist. Please double check the path or unset the env var: '
|
414
477
|
f'unset {ENV_VAR_SKYPILOT_CONFIG}')
|
415
478
|
logger.debug(f'Using config path: {config_path}')
|
416
|
-
_set_loaded_config(
|
479
|
+
_set_loaded_config(parse_and_validate_config_file(config_path))
|
417
480
|
_set_loaded_config_path(config_path)
|
418
481
|
|
419
482
|
|
@@ -512,6 +575,19 @@ def override_skypilot_config(
|
|
512
575
|
override_configs=dict(override_configs),
|
513
576
|
allowed_override_keys=None,
|
514
577
|
disallowed_override_keys=constants.SKIPPED_CLIENT_OVERRIDE_KEYS)
|
578
|
+
workspace = config.get_nested(
|
579
|
+
keys=('active_workspace',),
|
580
|
+
default_value=constants.SKYPILOT_DEFAULT_WORKSPACE)
|
581
|
+
if (workspace != constants.SKYPILOT_DEFAULT_WORKSPACE and workspace
|
582
|
+
not in get_nested(keys=('workspaces',), default_value={})):
|
583
|
+
raise ValueError(f'Workspace {workspace} does not exist. '
|
584
|
+
'Use `sky check` to see if it is defined on the API '
|
585
|
+
'server and try again.')
|
586
|
+
# Initialize the active workspace context to the workspace specified, so
|
587
|
+
# that a new request is not affected by the previous request's workspace.
|
588
|
+
global _active_workspace_context
|
589
|
+
_active_workspace_context = threading.local()
|
590
|
+
|
515
591
|
try:
|
516
592
|
common_utils.validate_schema(
|
517
593
|
config,
|
@@ -592,7 +668,7 @@ def _compose_cli_config(cli_config: Optional[List[str]]) -> config_utils.Config:
|
|
592
668
|
'Cannot use multiple --config flags with a config file.')
|
593
669
|
config_source = maybe_config_path
|
594
670
|
# cli_config is a path to a config file
|
595
|
-
parsed_config =
|
671
|
+
parsed_config = parse_and_validate_config_file(maybe_config_path)
|
596
672
|
else: # cli_config is a comma-separated list of key-value pairs
|
597
673
|
parsed_config = _parse_dotlist(cli_config)
|
598
674
|
_validate_config(parsed_config, config_source)
|
@@ -623,3 +699,23 @@ def apply_cli_config(cli_config: Optional[List[str]]) -> Dict[str, Any]:
|
|
623
699
|
overlay_skypilot_config(original_config=_get_loaded_config(),
|
624
700
|
override_configs=parsed_config))
|
625
701
|
return parsed_config
|
702
|
+
|
703
|
+
|
704
|
+
def update_config_no_lock(config: config_utils.Config) -> None:
|
705
|
+
"""Dumps the new config to a file and syncs to ConfigMap if in Kubernetes.
|
706
|
+
|
707
|
+
Args:
|
708
|
+
config: The config to save and sync.
|
709
|
+
"""
|
710
|
+
global_config_path = os.path.expanduser(get_user_config_path())
|
711
|
+
|
712
|
+
# Always save to the local file (PVC in Kubernetes, local file otherwise)
|
713
|
+
common_utils.dump_yaml(global_config_path, dict(config))
|
714
|
+
|
715
|
+
if config_map_utils.is_running_in_kubernetes():
|
716
|
+
# In Kubernetes, sync the PVC config to ConfigMap for user convenience
|
717
|
+
# PVC file is the source of truth, ConfigMap is just a mirror for easy
|
718
|
+
# access
|
719
|
+
config_map_utils.patch_configmap_with_config(config, global_config_path)
|
720
|
+
|
721
|
+
_reload_config()
|
@@ -48,7 +48,8 @@ class StatusColumn:
|
|
48
48
|
def show_status_table(cluster_records: List[_ClusterRecord],
|
49
49
|
show_all: bool,
|
50
50
|
show_user: bool,
|
51
|
-
query_clusters: Optional[List[str]] = None
|
51
|
+
query_clusters: Optional[List[str]] = None,
|
52
|
+
show_workspaces: bool = False) -> int:
|
52
53
|
"""Compute cluster table values and display.
|
53
54
|
|
54
55
|
Returns:
|
@@ -56,7 +57,6 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
56
57
|
STOPPED.
|
57
58
|
"""
|
58
59
|
# TODO(zhwu): Update the information for autostop clusters.
|
59
|
-
|
60
60
|
status_columns = [
|
61
61
|
StatusColumn('NAME', _get_name),
|
62
62
|
]
|
@@ -66,6 +66,9 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
66
66
|
StatusColumn('USER_ID', _get_user_hash, show_by_default=False))
|
67
67
|
|
68
68
|
status_columns += [
|
69
|
+
StatusColumn('WORKSPACE',
|
70
|
+
_get_workspace,
|
71
|
+
show_by_default=show_workspaces),
|
69
72
|
StatusColumn('INFRA', _get_infra, truncate=not show_all),
|
70
73
|
StatusColumn('RESOURCES', _get_resources, truncate=not show_all),
|
71
74
|
StatusColumn('STATUS', _get_status_colored),
|
@@ -106,12 +109,13 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
106
109
|
for cluster in query_clusters
|
107
110
|
if cluster not in cluster_names
|
108
111
|
]
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
112
|
+
if not_found_clusters:
|
113
|
+
cluster_str = 'Cluster'
|
114
|
+
if len(not_found_clusters) > 1:
|
115
|
+
cluster_str += 's'
|
116
|
+
cluster_str += ' '
|
117
|
+
cluster_str += ', '.join(not_found_clusters)
|
118
|
+
click.echo(f'{cluster_str} not found.')
|
115
119
|
elif not cluster_records:
|
116
120
|
click.echo('No existing clusters.')
|
117
121
|
return num_pending_autostop
|
@@ -243,6 +247,12 @@ def _get_status(cluster_record: _ClusterRecord,
|
|
243
247
|
return cluster_record['status']
|
244
248
|
|
245
249
|
|
250
|
+
def _get_workspace(cluster_record: _ClusterRecord,
|
251
|
+
truncate: bool = True) -> str:
|
252
|
+
del truncate
|
253
|
+
return cluster_record['workspace']
|
254
|
+
|
255
|
+
|
246
256
|
def _get_status_colored(cluster_record: _ClusterRecord,
|
247
257
|
truncate: bool = True) -> str:
|
248
258
|
del truncate
|
sky/utils/db_utils.py
CHANGED
@@ -1,9 +1,17 @@
|
|
1
1
|
"""Utils for sky databases."""
|
2
2
|
import contextlib
|
3
|
+
import enum
|
3
4
|
import sqlite3
|
4
5
|
import threading
|
6
|
+
import typing
|
5
7
|
from typing import Any, Callable, Optional
|
6
8
|
|
9
|
+
import sqlalchemy
|
10
|
+
from sqlalchemy import exc as sqlalchemy_exc
|
11
|
+
|
12
|
+
if typing.TYPE_CHECKING:
|
13
|
+
from sqlalchemy.orm import Session
|
14
|
+
|
7
15
|
# This parameter (passed to sqlite3.connect) controls how long we will wait to
|
8
16
|
# obtains a database lock (not necessarily during connection, but whenever it is
|
9
17
|
# needed). It is not a connection timeout.
|
@@ -21,6 +29,11 @@ from typing import Any, Callable, Optional
|
|
21
29
|
_DB_TIMEOUT_S = 60
|
22
30
|
|
23
31
|
|
32
|
+
class SQLAlchemyDialect(enum.Enum):
|
33
|
+
SQLITE = 'sqlite'
|
34
|
+
POSTGRESQL = 'postgresql'
|
35
|
+
|
36
|
+
|
24
37
|
@contextlib.contextmanager
|
25
38
|
def safe_cursor(db_path: str):
|
26
39
|
"""A newly created, auto-committing, auto-closing cursor."""
|
@@ -71,6 +84,46 @@ def add_column_to_table(
|
|
71
84
|
conn.commit()
|
72
85
|
|
73
86
|
|
87
|
+
def add_column_to_table_sqlalchemy(
|
88
|
+
session: 'Session',
|
89
|
+
table_name: str,
|
90
|
+
column_name: str,
|
91
|
+
column_type: str,
|
92
|
+
copy_from: Optional[str] = None,
|
93
|
+
value_to_replace_existing_entries: Optional[Any] = None,
|
94
|
+
):
|
95
|
+
"""Add a column to a table."""
|
96
|
+
dialect = session.bind.dialect
|
97
|
+
if dialect.name == SQLAlchemyDialect.SQLITE.value:
|
98
|
+
try:
|
99
|
+
session.execute(
|
100
|
+
sqlalchemy.text(f'ALTER TABLE {table_name} '
|
101
|
+
f'ADD COLUMN {column_name} {column_type}'))
|
102
|
+
if copy_from is not None:
|
103
|
+
session.execute(
|
104
|
+
sqlalchemy.text(f'UPDATE {table_name} '
|
105
|
+
f'SET {column_name} = {copy_from}'))
|
106
|
+
if value_to_replace_existing_entries is not None:
|
107
|
+
session.execute(
|
108
|
+
sqlalchemy.text(f'UPDATE {table_name} '
|
109
|
+
f'SET {column_name} = :replacement_value '
|
110
|
+
f'WHERE {column_name} IS NULL'),
|
111
|
+
{'replacement_value': value_to_replace_existing_entries})
|
112
|
+
except sqlalchemy_exc.OperationalError as e:
|
113
|
+
if 'duplicate column name' in str(e):
|
114
|
+
pass
|
115
|
+
else:
|
116
|
+
raise
|
117
|
+
elif dialect.name == SQLAlchemyDialect.POSTGRESQL.value:
|
118
|
+
# TODO(syang) support postgres dialect
|
119
|
+
session.rollback()
|
120
|
+
raise ValueError('Unsupported database dialect')
|
121
|
+
else:
|
122
|
+
session.rollback()
|
123
|
+
raise ValueError('Unsupported database dialect')
|
124
|
+
session.commit()
|
125
|
+
|
126
|
+
|
74
127
|
def rename_column(
|
75
128
|
cursor: 'sqlite3.Cursor',
|
76
129
|
conn: 'sqlite3.Connection',
|
@@ -0,0 +1,133 @@
|
|
1
|
+
"""Utilities for Kubernetes ConfigMap operations in SkyPilot."""
|
2
|
+
import os
|
3
|
+
|
4
|
+
from sky import sky_logging
|
5
|
+
from sky import skypilot_config
|
6
|
+
from sky.adaptors import kubernetes
|
7
|
+
from sky.utils import common_utils
|
8
|
+
|
9
|
+
logger = sky_logging.init_logger(__name__)
|
10
|
+
|
11
|
+
# Kubernetes ConfigMap sync constants
|
12
|
+
_KUBE_SERVICE_ACCOUNT_PATH = '/var/run/secrets/kubernetes.io/serviceaccount'
|
13
|
+
_CONFIGMAP_SYNC_TIMEOUT = 10 # seconds
|
14
|
+
|
15
|
+
|
16
|
+
def is_running_in_kubernetes() -> bool:
|
17
|
+
"""Check if we're running inside a Kubernetes pod."""
|
18
|
+
return os.path.exists(f'{_KUBE_SERVICE_ACCOUNT_PATH}/token')
|
19
|
+
|
20
|
+
|
21
|
+
def _get_kubernetes_namespace() -> str:
|
22
|
+
"""Get the current Kubernetes namespace from the service account."""
|
23
|
+
try:
|
24
|
+
namespace_file = f'{_KUBE_SERVICE_ACCOUNT_PATH}/namespace'
|
25
|
+
if os.path.exists(namespace_file):
|
26
|
+
with open(namespace_file, encoding='utf-8') as f:
|
27
|
+
return f.read().strip()
|
28
|
+
except (OSError, IOError):
|
29
|
+
pass
|
30
|
+
return 'default'
|
31
|
+
|
32
|
+
|
33
|
+
def _get_configmap_name() -> str:
|
34
|
+
"""Get the ConfigMap name for the SkyPilot config."""
|
35
|
+
release_name = (os.getenv('HELM_RELEASE_NAME') or
|
36
|
+
os.getenv('SKYPILOT_RELEASE_NAME') or 'skypilot')
|
37
|
+
return f'{release_name}-config'
|
38
|
+
|
39
|
+
|
40
|
+
def initialize_configmap_sync_on_startup(config_file_path: str) -> None:
|
41
|
+
"""Initialize ConfigMap sync on API server startup.
|
42
|
+
|
43
|
+
This syncs existing PVC config to ConfigMap if ConfigMap doesn't exist.
|
44
|
+
This handles the upgrade scenario where an existing deployment has
|
45
|
+
workspace configs on PVC but no ConfigMap exists.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
config_file_path: Path to the config file to sync.
|
49
|
+
"""
|
50
|
+
config_file_path = os.path.expanduser(config_file_path)
|
51
|
+
if not is_running_in_kubernetes() or not os.path.exists(config_file_path):
|
52
|
+
return
|
53
|
+
|
54
|
+
try:
|
55
|
+
namespace = _get_kubernetes_namespace()
|
56
|
+
configmap_name = _get_configmap_name()
|
57
|
+
|
58
|
+
# Check if ConfigMap exists
|
59
|
+
try:
|
60
|
+
kubernetes.core_api().read_namespaced_config_map(
|
61
|
+
name=configmap_name, namespace=namespace)
|
62
|
+
# ConfigMap exists, don't overwrite it
|
63
|
+
logger.debug(f'ConfigMap {configmap_name} already exists')
|
64
|
+
return
|
65
|
+
except kubernetes.kubernetes.client.rest.ApiException as e:
|
66
|
+
if e.status != 404:
|
67
|
+
raise
|
68
|
+
# ConfigMap doesn't exist, create it
|
69
|
+
|
70
|
+
current_config = skypilot_config.parse_and_validate_config_file(
|
71
|
+
config_file_path)
|
72
|
+
config_yaml = common_utils.dump_yaml_str(dict(current_config))
|
73
|
+
|
74
|
+
configmap_body = {
|
75
|
+
'apiVersion': 'v1',
|
76
|
+
'kind': 'ConfigMap',
|
77
|
+
'metadata': {
|
78
|
+
'name': configmap_name,
|
79
|
+
'namespace': namespace,
|
80
|
+
'labels': {
|
81
|
+
'app.kubernetes.io/name': 'skypilot',
|
82
|
+
'app.kubernetes.io/component': 'config'
|
83
|
+
}
|
84
|
+
},
|
85
|
+
'data': {
|
86
|
+
'config.yaml': config_yaml
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
kubernetes.core_api().create_namespaced_config_map(
|
91
|
+
namespace=namespace,
|
92
|
+
body=configmap_body,
|
93
|
+
_request_timeout=_CONFIGMAP_SYNC_TIMEOUT)
|
94
|
+
|
95
|
+
logger.info(f'Synced PVC config to new ConfigMap {configmap_name}')
|
96
|
+
|
97
|
+
except Exception as e: # pylint: disable=broad-except
|
98
|
+
logger.warning(f'Failed to initialize ConfigMap sync: {e}')
|
99
|
+
|
100
|
+
|
101
|
+
def patch_configmap_with_config(config, config_file_path: str) -> None:
|
102
|
+
"""Patch the Kubernetes ConfigMap with the updated config.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
config: The updated config to sync to the ConfigMap.
|
106
|
+
config_file_path: Path to the config file for fallback sync.
|
107
|
+
"""
|
108
|
+
if not is_running_in_kubernetes():
|
109
|
+
return
|
110
|
+
|
111
|
+
try:
|
112
|
+
namespace = _get_kubernetes_namespace()
|
113
|
+
configmap_name = _get_configmap_name()
|
114
|
+
config_yaml = common_utils.dump_yaml_str(dict(config))
|
115
|
+
patch_body = {'data': {'config.yaml': config_yaml}}
|
116
|
+
|
117
|
+
try:
|
118
|
+
kubernetes.core_api().patch_namespaced_config_map(
|
119
|
+
name=configmap_name,
|
120
|
+
namespace=namespace,
|
121
|
+
body=patch_body,
|
122
|
+
_request_timeout=_CONFIGMAP_SYNC_TIMEOUT)
|
123
|
+
logger.debug(f'Synced config to ConfigMap {configmap_name}')
|
124
|
+
except kubernetes.kubernetes.client.rest.ApiException as e:
|
125
|
+
if e.status == 404:
|
126
|
+
# ConfigMap doesn't exist, create it
|
127
|
+
logger.info(f'ConfigMap {configmap_name} not found, creating')
|
128
|
+
initialize_configmap_sync_on_startup(config_file_path)
|
129
|
+
else:
|
130
|
+
raise
|
131
|
+
|
132
|
+
except Exception as e: # pylint: disable=broad-except
|
133
|
+
logger.warning(f'Failed to sync config to ConfigMap: {e}')
|