skypilot-nightly 1.0.0.dev20250526__py3-none-any.whl → 1.0.0.dev20250527__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/check.py +28 -5
- sky/cli.py +5 -22
- sky/client/cli.py +5 -22
- sky/client/sdk.py +5 -2
- sky/clouds/cloud.py +2 -2
- sky/clouds/kubernetes.py +10 -5
- sky/clouds/service_catalog/kubernetes_catalog.py +4 -0
- sky/clouds/ssh.py +24 -8
- sky/core.py +20 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/D5bjIfl4Ob3SV3LJz3CO0/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/236-e220ba0c35bf089e.js +6 -0
- sky/dashboard/out/_next/static/chunks/{498-d7722313e5e5b4e6.js → 320-afea3ddcc5bd1c6c.js} +1 -16
- sky/dashboard/out/_next/static/chunks/{480-5a0de8b6570ea105.js → 470-1d784f5c8750744a.js} +1 -1
- sky/dashboard/out/_next/static/chunks/627-31b701e69f52db0c.js +1 -0
- sky/dashboard/out/_next/static/chunks/843-e35d71cf1c7f706e.js +11 -0
- sky/dashboard/out/_next/static/chunks/990-f85643b521f7ca65.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-339b59921ccfe266.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e23fcddf60578a0d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-9e6d1ec6e1ac5b29.js → clusters-8afda8efa5b74997.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/config-72b8c6c2edfd0e39.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-1521baab6992916b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4d913940b4fa6f5a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ff7e8e377d02b651.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-9900af52acf8648d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/{new-bbf436f41381e169.js → new-63763ffa3edb4508.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-7733c960685b4385.js → [name]-3ede7a13caf23375.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-72330c4d0fc9a4a2.js +1 -0
- sky/dashboard/out/_next/static/css/6a1c0d711a4bdaf1.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/server/constants.py +1 -1
- sky/server/requests/payloads.py +18 -5
- sky/server/requests/serializers/decoders.py +0 -11
- sky/server/server.py +19 -9
- sky/skypilot_config.py +4 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +19 -0
- sky/utils/schemas.py +57 -5
- sky/workspaces/core.py +186 -50
- sky/workspaces/server.py +25 -0
- {skypilot_nightly-1.0.0.dev20250526.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250526.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/RECORD +61 -58
- {skypilot_nightly-1.0.0.dev20250526.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/WHEEL +1 -1
- sky/dashboard/out/_next/static/7GEgRyZKRaSnYZCV1Jwol/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/25-062253ea41fb8eec.js +0 -6
- sky/dashboard/out/_next/static/chunks/734-a6e01d7f98904741.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-59956af3950b02ed.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-3b5aad09a25f64b7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/infra-abb7d744ecf15109.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-48dc8d67d4b60be1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/users-b8acf6e6735323a2.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-5ed48b3201b998c8.js +0 -1
- sky/dashboard/out/_next/static/css/28558d57108b05ae.css +0 -3
- /sky/dashboard/out/_next/static/{7GEgRyZKRaSnYZCV1Jwol → D5bjIfl4Ob3SV3LJz3CO0}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{573-f17bd89d9f9118b3.js → 573-82bd40a37af834f1.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{578-d351125af46c293f.js → 578-24f35aa98d38d638.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-96a715a6fb01e228.js → _app-3985f074c163a856.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250526.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250526.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250526.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/top_level.txt +0 -0
sky/server/requests/payloads.py
CHANGED
@@ -72,6 +72,8 @@ def request_body_env_vars() -> dict:
|
|
72
72
|
|
73
73
|
def get_override_skypilot_config_from_client() -> Dict[str, Any]:
|
74
74
|
"""Returns the override configs from the client."""
|
75
|
+
if annotations.is_on_api_server:
|
76
|
+
return {}
|
75
77
|
config = skypilot_config.to_dict()
|
76
78
|
# Remove the API server config, as we should not specify the SkyPilot
|
77
79
|
# server endpoint on the server side. This avoids the warning at
|
@@ -134,6 +136,12 @@ class CheckBody(RequestBody):
|
|
134
136
|
workspace: Optional[str] = None
|
135
137
|
|
136
138
|
|
139
|
+
class EnabledCloudsBody(RequestBody):
|
140
|
+
"""The request body for the enabled clouds endpoint."""
|
141
|
+
workspace: Optional[str] = None
|
142
|
+
expand: bool = False
|
143
|
+
|
144
|
+
|
137
145
|
class DagRequestBody(RequestBody):
|
138
146
|
"""Request body base class for endpoints with a dag."""
|
139
147
|
dag: str
|
@@ -533,11 +541,6 @@ class UploadZipFileResponse(pydantic.BaseModel):
|
|
533
541
|
missing_chunks: Optional[List[str]] = None
|
534
542
|
|
535
543
|
|
536
|
-
class EnabledCloudsBody(RequestBody):
|
537
|
-
"""The request body for the enabled clouds endpoint."""
|
538
|
-
workspace: Optional[str] = None
|
539
|
-
|
540
|
-
|
541
544
|
class UpdateWorkspaceBody(RequestBody):
|
542
545
|
"""The request body for updating a specific workspace configuration."""
|
543
546
|
workspace_name: str = '' # Will be set from path parameter
|
@@ -553,3 +556,13 @@ class CreateWorkspaceBody(RequestBody):
|
|
553
556
|
class DeleteWorkspaceBody(RequestBody):
|
554
557
|
"""The request body for deleting a workspace."""
|
555
558
|
workspace_name: str
|
559
|
+
|
560
|
+
|
561
|
+
class UpdateConfigBody(RequestBody):
|
562
|
+
"""The request body for updating the entire SkyPilot configuration."""
|
563
|
+
config: Dict[str, Any]
|
564
|
+
|
565
|
+
|
566
|
+
class GetConfigBody(RequestBody):
|
567
|
+
"""The request body for getting the entire SkyPilot configuration."""
|
568
|
+
pass
|
@@ -12,7 +12,6 @@ from sky.provision.kubernetes import utils as kubernetes_utils
|
|
12
12
|
from sky.serve import serve_state
|
13
13
|
from sky.server import constants as server_constants
|
14
14
|
from sky.skylet import job_lib
|
15
|
-
from sky.utils import registry
|
16
15
|
from sky.utils import status_lib
|
17
16
|
|
18
17
|
if typing.TYPE_CHECKING:
|
@@ -135,16 +134,6 @@ def decode_cost_report(
|
|
135
134
|
return return_value
|
136
135
|
|
137
136
|
|
138
|
-
@register_decoders('enabled_clouds')
|
139
|
-
def decode_enabled_clouds(return_value: List[str]) -> List['clouds.Cloud']:
|
140
|
-
clouds = []
|
141
|
-
for cloud_name in return_value:
|
142
|
-
cloud = registry.CLOUD_REGISTRY.from_str(cloud_name)
|
143
|
-
assert cloud is not None, return_value
|
144
|
-
clouds.append(cloud)
|
145
|
-
return clouds
|
146
|
-
|
147
|
-
|
148
137
|
@register_decoders('list_accelerators')
|
149
138
|
def decode_list_accelerators(
|
150
139
|
return_value: Dict[str, List[List[Any]]]
|
sky/server/server.py
CHANGED
@@ -127,6 +127,11 @@ class AuthProxyMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
|
|
127
127
|
|
128
128
|
async def dispatch(self, request: fastapi.Request, call_next):
|
129
129
|
auth_user = _get_auth_user_header(request)
|
130
|
+
|
131
|
+
# Add user to database if auth_user is present
|
132
|
+
if auth_user is not None:
|
133
|
+
global_user_state.add_or_update_user(auth_user)
|
134
|
+
|
130
135
|
body = await request.body()
|
131
136
|
if auth_user and body:
|
132
137
|
try:
|
@@ -137,10 +142,16 @@ class AuthProxyMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
|
|
137
142
|
logger.debug(f'Overriding user for {request.state.request_id}: '
|
138
143
|
f'{auth_user.name}, {auth_user.id}')
|
139
144
|
if 'env_vars' in original_json:
|
140
|
-
original_json
|
141
|
-
|
142
|
-
|
143
|
-
|
145
|
+
if isinstance(original_json.get('env_vars'), dict):
|
146
|
+
original_json['env_vars'][
|
147
|
+
constants.USER_ID_ENV_VAR] = auth_user.id
|
148
|
+
original_json['env_vars'][
|
149
|
+
constants.USER_ENV_VAR] = auth_user.name
|
150
|
+
else:
|
151
|
+
logger.warning(
|
152
|
+
f'"env_vars" in request body is not a dictionary '
|
153
|
+
f'for request {request.state.request_id}. '
|
154
|
+
'Skipping user info injection into body.')
|
144
155
|
request._body = json.dumps(original_json).encode('utf-8') # pylint: disable=protected-access
|
145
156
|
return await call_next(request)
|
146
157
|
|
@@ -262,10 +273,7 @@ app.include_router(workspaces_rest.router,
|
|
262
273
|
|
263
274
|
@app.get('/token')
|
264
275
|
async def token(request: fastapi.Request) -> fastapi.responses.HTMLResponse:
|
265
|
-
# If we have auth info, save this user to the database.
|
266
276
|
user = _get_auth_user_header(request)
|
267
|
-
if user is not None:
|
268
|
-
global_user_state.add_or_update_user(user)
|
269
277
|
|
270
278
|
token_data = {
|
271
279
|
'v': 1, # Token version number, bump for backwards incompatible.
|
@@ -315,12 +323,14 @@ async def check(request: fastapi.Request,
|
|
315
323
|
|
316
324
|
@app.get('/enabled_clouds')
|
317
325
|
async def enabled_clouds(request: fastapi.Request,
|
318
|
-
workspace: Optional[str] = None
|
326
|
+
workspace: Optional[str] = None,
|
327
|
+
expand: bool = False) -> None:
|
319
328
|
"""Gets enabled clouds on the server."""
|
320
329
|
executor.schedule_request(
|
321
330
|
request_id=request.state.request_id,
|
322
331
|
request_name='enabled_clouds',
|
323
|
-
request_body=payloads.EnabledCloudsBody(workspace=workspace
|
332
|
+
request_body=payloads.EnabledCloudsBody(workspace=workspace,
|
333
|
+
expand=expand),
|
324
334
|
func=core.enabled_clouds,
|
325
335
|
schedule_type=requests_lib.ScheduleType.SHORT,
|
326
336
|
)
|
sky/skypilot_config.py
CHANGED
@@ -299,6 +299,10 @@ def get_nested(keys: Tuple[str, ...],
|
|
299
299
|
def get_workspace_cloud(cloud: str,
|
300
300
|
workspace: Optional[str] = None) -> config_utils.Config:
|
301
301
|
"""Returns the workspace config."""
|
302
|
+
# TODO(zhwu): Instead of just returning the workspace specific config, we
|
303
|
+
# should return the config that already merges the global config, so that
|
304
|
+
# the caller does not need to manually merge the global config with
|
305
|
+
# the workspace specific config.
|
302
306
|
if workspace is None:
|
303
307
|
workspace = get_active_workspace()
|
304
308
|
clouds = get_nested(keys=(
|
@@ -5,6 +5,9 @@ the 'command' field in the exec configuration, leaving only the executable name.
|
|
5
5
|
This is useful when moving between different environments where auth plugin
|
6
6
|
executables might be installed in different locations.
|
7
7
|
|
8
|
+
For Nebius kubeconfigs, it also changes the --profile argument to 'sky' to
|
9
|
+
ensure compatibility with SkyPilot's expected profile configuration.
|
10
|
+
|
8
11
|
It assumes the target environment has the auth executable available in PATH.
|
9
12
|
If not, you'll need to update your environment container to include the auth
|
10
13
|
executable in PATH.
|
@@ -21,6 +24,8 @@ import yaml
|
|
21
24
|
def strip_auth_plugin_paths(kubeconfig_path: str, output_path: str):
|
22
25
|
"""Strip path information from exec plugin commands in a kubeconfig file.
|
23
26
|
|
27
|
+
For Nebius kubeconfigs, also changes the --profile argument to 'sky'.
|
28
|
+
|
24
29
|
Args:
|
25
30
|
kubeconfig_path (str): Path to the input kubeconfig file
|
26
31
|
output_path (str): Path where the modified kubeconfig will be saved
|
@@ -40,6 +45,20 @@ def strip_auth_plugin_paths(kubeconfig_path: str, output_path: str):
|
|
40
45
|
exec_info['command'] = executable
|
41
46
|
updated = True
|
42
47
|
|
48
|
+
# Handle Nebius kubeconfigs: change --profile to 'sky'
|
49
|
+
if executable == 'nebius' or current_command == 'nebius':
|
50
|
+
args = exec_info.get('args', [])
|
51
|
+
if args and '--profile' in args:
|
52
|
+
try:
|
53
|
+
profile_index = args.index('--profile')
|
54
|
+
if profile_index + 1 < len(args):
|
55
|
+
old_profile = args[profile_index + 1]
|
56
|
+
if old_profile != 'sky':
|
57
|
+
args[profile_index + 1] = 'sky'
|
58
|
+
updated = True
|
59
|
+
except ValueError:
|
60
|
+
pass # --profile not found in args
|
61
|
+
|
43
62
|
if updated:
|
44
63
|
with open(output_path, 'w', encoding='utf-8') as file:
|
45
64
|
yaml.safe_dump(config, file)
|
sky/utils/schemas.py
CHANGED
@@ -1044,6 +1044,25 @@ def get_config_schema():
|
|
1044
1044
|
},
|
1045
1045
|
}
|
1046
1046
|
},
|
1047
|
+
'ssh': {
|
1048
|
+
'type': 'object',
|
1049
|
+
'required': [],
|
1050
|
+
'additionalProperties': False,
|
1051
|
+
'properties': {
|
1052
|
+
'allowed_node_pools': {
|
1053
|
+
'type': 'array',
|
1054
|
+
'items': {
|
1055
|
+
'type': 'string',
|
1056
|
+
},
|
1057
|
+
},
|
1058
|
+
'pod_config': {
|
1059
|
+
'type': 'object',
|
1060
|
+
'required': [],
|
1061
|
+
# Allow arbitrary keys since validating pod spec is hard
|
1062
|
+
'additionalProperties': True,
|
1063
|
+
},
|
1064
|
+
}
|
1065
|
+
},
|
1047
1066
|
'oci': {
|
1048
1067
|
'type': 'object',
|
1049
1068
|
'required': [],
|
@@ -1177,12 +1196,13 @@ def get_config_schema():
|
|
1177
1196
|
|
1178
1197
|
allowed_workspace_cloud_names = list(
|
1179
1198
|
service_catalog.ALL_CLOUDS) + ['cloudflare']
|
1180
|
-
# Create pattern for
|
1181
|
-
|
1199
|
+
# Create pattern for not supported clouds, i.e.
|
1200
|
+
# all clouds except gcp, kubernetes, ssh
|
1201
|
+
not_supported_clouds = [
|
1182
1202
|
cloud for cloud in allowed_workspace_cloud_names
|
1183
|
-
if cloud.lower()
|
1203
|
+
if cloud.lower() not in ['gcp', 'kubernetes', 'ssh']
|
1184
1204
|
]
|
1185
|
-
|
1205
|
+
not_supported_cloud_regex = '|'.join(not_supported_clouds)
|
1186
1206
|
workspaces_schema = {
|
1187
1207
|
'type': 'object',
|
1188
1208
|
'required': [],
|
@@ -1192,7 +1212,7 @@ def get_config_schema():
|
|
1192
1212
|
'additionalProperties': False,
|
1193
1213
|
'patternProperties': {
|
1194
1214
|
# Pattern for non-GCP clouds - only allows 'disabled' property
|
1195
|
-
f'^({
|
1215
|
+
f'^({not_supported_cloud_regex})$': {
|
1196
1216
|
'type': 'object',
|
1197
1217
|
'additionalProperties': False,
|
1198
1218
|
'properties': {
|
@@ -1217,6 +1237,38 @@ def get_config_schema():
|
|
1217
1237
|
},
|
1218
1238
|
'additionalProperties': False,
|
1219
1239
|
},
|
1240
|
+
'ssh': {
|
1241
|
+
'type': 'object',
|
1242
|
+
'required': [],
|
1243
|
+
'properties': {
|
1244
|
+
'allowed_node_pools': {
|
1245
|
+
'type': 'array',
|
1246
|
+
'items': {
|
1247
|
+
'type': 'string',
|
1248
|
+
},
|
1249
|
+
},
|
1250
|
+
'disabled': {
|
1251
|
+
'type': 'boolean'
|
1252
|
+
},
|
1253
|
+
},
|
1254
|
+
'additionalProperties': False,
|
1255
|
+
},
|
1256
|
+
'kubernetes': {
|
1257
|
+
'type': 'object',
|
1258
|
+
'required': [],
|
1259
|
+
'properties': {
|
1260
|
+
'allowed_contexts': {
|
1261
|
+
'type': 'array',
|
1262
|
+
'items': {
|
1263
|
+
'type': 'string',
|
1264
|
+
},
|
1265
|
+
},
|
1266
|
+
'disabled': {
|
1267
|
+
'type': 'boolean'
|
1268
|
+
},
|
1269
|
+
},
|
1270
|
+
'additionalProperties': False,
|
1271
|
+
},
|
1220
1272
|
},
|
1221
1273
|
},
|
1222
1274
|
}
|
sky/workspaces/core.py
CHANGED
@@ -13,6 +13,7 @@ from sky import skypilot_config
|
|
13
13
|
from sky.skylet import constants
|
14
14
|
from sky.usage import usage_lib
|
15
15
|
from sky.utils import common_utils
|
16
|
+
from sky.utils import config_utils
|
16
17
|
from sky.utils import schemas
|
17
18
|
|
18
19
|
logger = sky_logging.init_logger(__name__)
|
@@ -88,70 +89,103 @@ def _check_workspace_has_no_active_resources(workspace_name: str,
|
|
88
89
|
Raises:
|
89
90
|
ValueError: If the workspace has active clusters or managed jobs.
|
90
91
|
"""
|
92
|
+
_check_workspaces_have_no_active_resources([(workspace_name, operation)])
|
91
93
|
|
92
|
-
def check_clusters():
|
93
|
-
# Check for active clusters
|
94
|
-
all_clusters = global_user_state.get_clusters()
|
95
|
-
workspace_clusters = [
|
96
|
-
cluster for cluster in all_clusters
|
97
|
-
if (cluster.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE)
|
98
|
-
== workspace_name)
|
99
|
-
]
|
100
|
-
return workspace_clusters
|
101
94
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
from sky.jobs.server import core as managed_jobs_core
|
95
|
+
def _check_workspaces_have_no_active_resources(
|
96
|
+
workspace_operations: list) -> None:
|
97
|
+
"""Check if workspaces have active clusters or managed jobs.
|
106
98
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
managed_jobs = managed_jobs_core.queue(refresh=False,
|
111
|
-
skip_finished=True,
|
112
|
-
all_users=True)
|
113
|
-
|
114
|
-
workspace_active_jobs = [
|
115
|
-
job for job in managed_jobs
|
116
|
-
if job.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) ==
|
117
|
-
workspace_name
|
118
|
-
]
|
99
|
+
Args:
|
100
|
+
workspace_operations: List of tuples (workspace_name, operation) where
|
101
|
+
operation is 'update' or 'delete'.
|
119
102
|
|
120
|
-
|
103
|
+
Raises:
|
104
|
+
ValueError: If any workspace has active clusters or managed jobs.
|
105
|
+
The error message will include all workspaces with issues.
|
106
|
+
"""
|
107
|
+
if not workspace_operations:
|
108
|
+
return
|
109
|
+
|
110
|
+
def get_all_clusters():
|
111
|
+
return global_user_state.get_clusters()
|
121
112
|
|
113
|
+
def get_all_managed_jobs():
|
114
|
+
# pylint: disable=import-outside-toplevel
|
115
|
+
from sky.jobs.server import core as managed_jobs_core
|
116
|
+
try:
|
117
|
+
return managed_jobs_core.queue(refresh=False,
|
118
|
+
skip_finished=True,
|
119
|
+
all_users=True)
|
122
120
|
except exceptions.ClusterNotUpError:
|
123
|
-
# If we can't check managed jobs (e.g., controller not running),
|
124
|
-
# log a warning but don't fail the operation
|
125
121
|
logger.warning('All jobs should be finished in workspace.')
|
126
122
|
return []
|
127
123
|
|
128
|
-
#
|
124
|
+
# Fetch both clusters and jobs in parallel
|
129
125
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
130
|
-
|
131
|
-
jobs_future = executor.submit(
|
126
|
+
clusters_future = executor.submit(get_all_clusters)
|
127
|
+
jobs_future = executor.submit(get_all_managed_jobs)
|
128
|
+
|
129
|
+
all_clusters = clusters_future.result()
|
130
|
+
all_managed_jobs = jobs_future.result()
|
132
131
|
|
133
|
-
|
134
|
-
|
135
|
-
|
132
|
+
# Collect all error messages instead of raising immediately
|
133
|
+
error_messages = []
|
134
|
+
|
135
|
+
# Check each workspace against the fetched data
|
136
|
+
for workspace_name, operation in workspace_operations:
|
137
|
+
# Filter clusters for this workspace
|
138
|
+
workspace_clusters = [
|
139
|
+
cluster for cluster in all_clusters
|
140
|
+
if (cluster.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE)
|
141
|
+
== workspace_name)
|
142
|
+
]
|
136
143
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
144
|
+
# Filter managed jobs for this workspace
|
145
|
+
workspace_active_jobs = [
|
146
|
+
job for job in all_managed_jobs
|
147
|
+
if job.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) ==
|
148
|
+
workspace_name
|
141
149
|
]
|
142
|
-
cluster_list = ', '.join(active_cluster_names)
|
143
|
-
raise ValueError(
|
144
|
-
f'Cannot {operation} workspace {workspace_name!r} because it has '
|
145
|
-
f'{len(workspace_clusters)} active cluster(s): {cluster_list}. '
|
146
|
-
f'Please terminate these clusters first.')
|
147
150
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
151
|
+
# Collect error messages for this workspace
|
152
|
+
workspace_errors = []
|
153
|
+
|
154
|
+
if workspace_clusters:
|
155
|
+
active_cluster_names = [
|
156
|
+
cluster['name'] for cluster in workspace_clusters
|
157
|
+
]
|
158
|
+
cluster_list = ', '.join(active_cluster_names)
|
159
|
+
workspace_errors.append(
|
160
|
+
f'{len(workspace_clusters)} active cluster(s): {cluster_list}')
|
161
|
+
|
162
|
+
if workspace_active_jobs:
|
163
|
+
job_names = [job['job_id'] for job in workspace_active_jobs]
|
164
|
+
job_list = ', '.join(job_names)
|
165
|
+
workspace_errors.append(
|
166
|
+
f'{len(workspace_active_jobs)} active managed job(s): '
|
167
|
+
f'{job_list}')
|
168
|
+
|
169
|
+
# If this workspace has issues, add to overall error messages
|
170
|
+
if workspace_errors:
|
171
|
+
workspace_error_summary = ' and '.join(workspace_errors)
|
172
|
+
error_messages.append(
|
173
|
+
f'Cannot {operation} workspace {workspace_name!r} because it '
|
174
|
+
f'has {workspace_error_summary}.')
|
175
|
+
|
176
|
+
# If we collected any errors, raise them all together
|
177
|
+
if error_messages:
|
178
|
+
if len(error_messages) == 1:
|
179
|
+
# Single workspace error
|
180
|
+
full_message = error_messages[
|
181
|
+
0] + ' Please terminate these resources first.'
|
182
|
+
else:
|
183
|
+
# Multiple workspace errors
|
184
|
+
full_message = (f'Cannot proceed due to active resources in '
|
185
|
+
f'{len(error_messages)} workspace(s):\n' +
|
186
|
+
'\n'.join(f'• {msg}' for msg in error_messages) +
|
187
|
+
'\nPlease terminate these resources first.')
|
188
|
+
raise ValueError(full_message)
|
155
189
|
|
156
190
|
|
157
191
|
def _validate_workspace_config(workspace_name: str,
|
@@ -293,3 +327,105 @@ def delete_workspace(workspace_name: str) -> Dict[str, Any]:
|
|
293
327
|
|
294
328
|
# Use the internal helper function to save
|
295
329
|
return _update_workspaces_config(delete_workspace_fn)
|
330
|
+
|
331
|
+
|
332
|
+
# =========================
|
333
|
+
# = Config Management =
|
334
|
+
# =========================
|
335
|
+
|
336
|
+
|
337
|
+
@usage_lib.entrypoint
|
338
|
+
def get_config() -> Dict[str, Any]:
|
339
|
+
"""Returns the entire SkyPilot configuration.
|
340
|
+
|
341
|
+
Returns:
|
342
|
+
The complete SkyPilot configuration as a dictionary.
|
343
|
+
"""
|
344
|
+
return skypilot_config.to_dict()
|
345
|
+
|
346
|
+
|
347
|
+
@usage_lib.entrypoint
|
348
|
+
def update_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
349
|
+
"""Updates the entire SkyPilot configuration.
|
350
|
+
|
351
|
+
Args:
|
352
|
+
config: The new configuration to save.
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
The updated configuration.
|
356
|
+
|
357
|
+
Raises:
|
358
|
+
ValueError: If the configuration is invalid, or if there are
|
359
|
+
active clusters or managed jobs in workspaces being modified.
|
360
|
+
FileNotFoundError: If the config file cannot be found.
|
361
|
+
PermissionError: If the config file cannot be written.
|
362
|
+
"""
|
363
|
+
# Validate the configuration using the schema
|
364
|
+
try:
|
365
|
+
common_utils.validate_schema(config, schemas.get_config_schema(),
|
366
|
+
'Invalid SkyPilot configuration: ')
|
367
|
+
except exceptions.InvalidSkyPilotConfigError as e:
|
368
|
+
raise ValueError(str(e)) from e
|
369
|
+
|
370
|
+
# Check for API server changes and validate them
|
371
|
+
current_config = skypilot_config.to_dict()
|
372
|
+
|
373
|
+
current_endpoint = current_config.get('api_server', {}).get('endpoint')
|
374
|
+
new_endpoint = config.get('api_server', {}).get('endpoint')
|
375
|
+
if current_endpoint != new_endpoint:
|
376
|
+
raise ValueError('API server endpoint should not be changed to avoid '
|
377
|
+
'unexpected behavior.')
|
378
|
+
|
379
|
+
# Check for workspace changes and validate them
|
380
|
+
current_workspaces = current_config.get('workspaces', {})
|
381
|
+
new_workspaces = config.get('workspaces', {})
|
382
|
+
|
383
|
+
# Collect all workspaces that need to be checked for active resources
|
384
|
+
workspaces_to_check = []
|
385
|
+
|
386
|
+
# Check each workspace that is being modified
|
387
|
+
for workspace_name, new_workspace_config in new_workspaces.items():
|
388
|
+
current_workspace_config = current_workspaces.get(workspace_name, {})
|
389
|
+
|
390
|
+
# If workspace configuration is changing, validate and mark for checking
|
391
|
+
if current_workspace_config != new_workspace_config:
|
392
|
+
_validate_workspace_config(workspace_name, new_workspace_config)
|
393
|
+
workspaces_to_check.append((workspace_name, 'update'))
|
394
|
+
|
395
|
+
# Check for workspace deletions
|
396
|
+
for workspace_name in current_workspaces:
|
397
|
+
if workspace_name not in new_workspaces:
|
398
|
+
# Workspace is being deleted
|
399
|
+
if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
|
400
|
+
raise ValueError(f'Cannot delete the default workspace '
|
401
|
+
f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
|
402
|
+
workspaces_to_check.append((workspace_name, 'delete'))
|
403
|
+
|
404
|
+
# Check all workspaces for active resources in one efficient call
|
405
|
+
_check_workspaces_have_no_active_resources(workspaces_to_check)
|
406
|
+
|
407
|
+
# Use file locking to prevent race conditions
|
408
|
+
lock_path = skypilot_config.get_skypilot_config_lock_path()
|
409
|
+
try:
|
410
|
+
with filelock.FileLock(lock_path,
|
411
|
+
_WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
|
412
|
+
# Convert to config_utils.Config and save
|
413
|
+
config_obj = config_utils.Config.from_dict(config)
|
414
|
+
skypilot_config.update_config_no_lock(config_obj)
|
415
|
+
except filelock.Timeout as e:
|
416
|
+
raise RuntimeError(
|
417
|
+
f'Failed to update configuration due to a timeout '
|
418
|
+
f'when trying to acquire the lock at {lock_path}. This may '
|
419
|
+
'indicate another SkyPilot process is currently updating the '
|
420
|
+
'configuration. Please try again or manually remove the lock '
|
421
|
+
f'file if you believe it is stale.') from e
|
422
|
+
|
423
|
+
# Validate the configuration by running sky check
|
424
|
+
try:
|
425
|
+
sky_check.check(quiet=True)
|
426
|
+
except Exception as e: # pylint: disable=broad-except
|
427
|
+
logger.warning(f'Configuration saved but '
|
428
|
+
f'validation check failed: {e}')
|
429
|
+
# Don't fail the update if the check fails, just warn
|
430
|
+
|
431
|
+
return config
|
sky/workspaces/server.py
CHANGED
@@ -60,3 +60,28 @@ async def delete(request: fastapi.Request,
|
|
60
60
|
func=core.delete_workspace,
|
61
61
|
schedule_type=api_requests.ScheduleType.SHORT,
|
62
62
|
)
|
63
|
+
|
64
|
+
|
65
|
+
@router.get('/config')
|
66
|
+
async def get_config(request: fastapi.Request) -> None:
|
67
|
+
"""Gets the entire SkyPilot configuration."""
|
68
|
+
executor.schedule_request(
|
69
|
+
request_id=request.state.request_id,
|
70
|
+
request_name='workspaces.get_config',
|
71
|
+
request_body=payloads.GetConfigBody(),
|
72
|
+
func=core.get_config,
|
73
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
74
|
+
)
|
75
|
+
|
76
|
+
|
77
|
+
@router.post('/config')
|
78
|
+
async def update_config(request: fastapi.Request,
|
79
|
+
update_config_body: payloads.UpdateConfigBody) -> None:
|
80
|
+
"""Updates the entire SkyPilot configuration."""
|
81
|
+
executor.schedule_request(
|
82
|
+
request_id=request.state.request_id,
|
83
|
+
request_name='workspaces.update_config',
|
84
|
+
request_body=update_config_body,
|
85
|
+
func=core.update_config,
|
86
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
87
|
+
)
|