skypilot-nightly 1.0.0.dev20250524__py3-none-any.whl → 1.0.0.dev20250527__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/check.py +32 -6
- sky/cli.py +17 -24
- sky/client/cli.py +17 -24
- sky/client/sdk.py +5 -2
- sky/clouds/cloud.py +2 -2
- sky/clouds/kubernetes.py +10 -5
- sky/clouds/service_catalog/kubernetes_catalog.py +4 -0
- sky/clouds/ssh.py +24 -8
- sky/core.py +20 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/D5bjIfl4Ob3SV3LJz3CO0/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/236-e220ba0c35bf089e.js +6 -0
- sky/dashboard/out/_next/static/chunks/{498-d7722313e5e5b4e6.js → 320-afea3ddcc5bd1c6c.js} +1 -16
- sky/dashboard/out/_next/static/chunks/470-1d784f5c8750744a.js +1 -0
- sky/dashboard/out/_next/static/chunks/578-24f35aa98d38d638.js +6 -0
- sky/dashboard/out/_next/static/chunks/627-31b701e69f52db0c.js +1 -0
- sky/dashboard/out/_next/static/chunks/843-e35d71cf1c7f706e.js +11 -0
- sky/dashboard/out/_next/static/chunks/990-f85643b521f7ca65.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-3985f074c163a856.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-339b59921ccfe266.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e23fcddf60578a0d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-9e6d1ec6e1ac5b29.js → clusters-8afda8efa5b74997.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/config-72b8c6c2edfd0e39.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-1521baab6992916b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4d913940b4fa6f5a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ff7e8e377d02b651.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-9900af52acf8648d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-63763ffa3edb4508.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-3ede7a13caf23375.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-72330c4d0fc9a4a2.js +1 -0
- sky/dashboard/out/_next/static/css/6a1c0d711a4bdaf1.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +592 -552
- sky/server/constants.py +1 -1
- sky/server/requests/payloads.py +33 -3
- sky/server/requests/serializers/decoders.py +0 -11
- sky/server/server.py +23 -22
- sky/setup_files/dependencies.py +1 -0
- sky/skypilot_config.py +35 -9
- sky/utils/db_utils.py +53 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +20 -4
- sky/utils/kubernetes/exec_kubeconfig_converter.py +19 -0
- sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
- sky/utils/kubernetes/ssh-tunnel.sh +20 -28
- sky/utils/schemas.py +57 -5
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +431 -0
- sky/workspaces/server.py +87 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/RECORD +69 -57
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/WHEEL +1 -1
- sky/dashboard/out/_next/static/aHej19bZyl4hoHgrzPCn7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/480-ee58038f1a4afd5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/578-7a4795009a56430c.js +0 -6
- sky/dashboard/out/_next/static/chunks/734-5f5ce8f347b7f417.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-f347f6144075b0c8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-dec800f9ef1b10f4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-37c042a356f8e608.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/infra-e690d864aa00e2ea.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-db6558a5ec687011.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/users-2d319455c3f1c3e2.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-02a7b60f2ead275f.js +0 -1
- sky/dashboard/out/_next/static/css/d2cdba64c9202dd7.css +0 -3
- /sky/dashboard/out/_next/static/{aHej19bZyl4hoHgrzPCn7 → D5bjIfl4Ob3SV3LJz3CO0}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{573-f17bd89d9f9118b3.js → 573-82bd40a37af834f1.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250524.dist-info → skypilot_nightly-1.0.0.dev20250527.dist-info}/top_level.txt +0 -0
@@ -188,14 +188,17 @@ generate_credentials_json() {
|
|
188
188
|
debug_log "Key data length: $(echo -n "$client_key_data" | wc -c) bytes"
|
189
189
|
|
190
190
|
# Check if we can create proper JSON with `jq`
|
191
|
-
if command -v jq &>/dev/null; then
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
191
|
+
if ! command -v jq &>/dev/null; then
|
192
|
+
echo "jq is not installed. Please install jq to use this script." >&2
|
193
|
+
exit 1
|
194
|
+
fi
|
195
|
+
debug_log "Using jq for JSON formatting"
|
196
|
+
|
197
|
+
# Create a temporary file for the JSON output to avoid shell escaping issues
|
198
|
+
local TEMP_JSON_FILE=$(mktemp)
|
199
|
+
|
200
|
+
# Write the JSON to the temporary file using jq for proper JSON formatting
|
201
|
+
cat > "$TEMP_JSON_FILE" << EOL
|
199
202
|
{
|
200
203
|
"apiVersion": "client.authentication.k8s.io/v1beta1",
|
201
204
|
"kind": "ExecCredential",
|
@@ -207,25 +210,14 @@ generate_credentials_json() {
|
|
207
210
|
}
|
208
211
|
EOL
|
209
212
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
else
|
219
|
-
debug_log "jq is not available, using simpler formatting method"
|
220
|
-
|
221
|
-
# Alternative approach: encode with base64 and use the token field instead
|
222
|
-
# This works because kubectl will decode token data properly
|
223
|
-
local combined_data=$(echo -n "${client_cert_data}:${client_key_data}" | base64 | tr -d '\n')
|
224
|
-
|
225
|
-
echo "{\"apiVersion\":\"client.authentication.k8s.io/v1beta1\",\"kind\":\"ExecCredential\",\"status\":{\"token\":\"$combined_data\",\"expirationTimestamp\":\"$expiration_time\"}}"
|
226
|
-
|
227
|
-
debug_log "Sent certificate data as encoded token instead of direct certificate fields"
|
228
|
-
fi
|
213
|
+
# Read the JSON from the file
|
214
|
+
local json_response=$(cat "$TEMP_JSON_FILE")
|
215
|
+
|
216
|
+
# Clean up
|
217
|
+
rm -f "$TEMP_JSON_FILE"
|
218
|
+
|
219
|
+
# Output the JSON
|
220
|
+
echo "$json_response"
|
229
221
|
else
|
230
222
|
# Fallback to token-based credential for tunnel-only authentication
|
231
223
|
echo "{\"apiVersion\":\"client.authentication.k8s.io/v1beta1\",\"kind\":\"ExecCredential\",\"status\":{\"token\":\"k8s-ssh-tunnel-token\",\"expirationTimestamp\":\"$expiration_time\"}}"
|
@@ -384,4 +376,4 @@ fi
|
|
384
376
|
|
385
377
|
# Return valid credential format with certificates if available
|
386
378
|
generate_credentials_json
|
387
|
-
exit 0
|
379
|
+
exit 0
|
sky/utils/schemas.py
CHANGED
@@ -1044,6 +1044,25 @@ def get_config_schema():
|
|
1044
1044
|
},
|
1045
1045
|
}
|
1046
1046
|
},
|
1047
|
+
'ssh': {
|
1048
|
+
'type': 'object',
|
1049
|
+
'required': [],
|
1050
|
+
'additionalProperties': False,
|
1051
|
+
'properties': {
|
1052
|
+
'allowed_node_pools': {
|
1053
|
+
'type': 'array',
|
1054
|
+
'items': {
|
1055
|
+
'type': 'string',
|
1056
|
+
},
|
1057
|
+
},
|
1058
|
+
'pod_config': {
|
1059
|
+
'type': 'object',
|
1060
|
+
'required': [],
|
1061
|
+
# Allow arbitrary keys since validating pod spec is hard
|
1062
|
+
'additionalProperties': True,
|
1063
|
+
},
|
1064
|
+
}
|
1065
|
+
},
|
1047
1066
|
'oci': {
|
1048
1067
|
'type': 'object',
|
1049
1068
|
'required': [],
|
@@ -1177,12 +1196,13 @@ def get_config_schema():
|
|
1177
1196
|
|
1178
1197
|
allowed_workspace_cloud_names = list(
|
1179
1198
|
service_catalog.ALL_CLOUDS) + ['cloudflare']
|
1180
|
-
# Create pattern for
|
1181
|
-
|
1199
|
+
# Create pattern for not supported clouds, i.e.
|
1200
|
+
# all clouds except gcp, kubernetes, ssh
|
1201
|
+
not_supported_clouds = [
|
1182
1202
|
cloud for cloud in allowed_workspace_cloud_names
|
1183
|
-
if cloud.lower()
|
1203
|
+
if cloud.lower() not in ['gcp', 'kubernetes', 'ssh']
|
1184
1204
|
]
|
1185
|
-
|
1205
|
+
not_supported_cloud_regex = '|'.join(not_supported_clouds)
|
1186
1206
|
workspaces_schema = {
|
1187
1207
|
'type': 'object',
|
1188
1208
|
'required': [],
|
@@ -1192,7 +1212,7 @@ def get_config_schema():
|
|
1192
1212
|
'additionalProperties': False,
|
1193
1213
|
'patternProperties': {
|
1194
1214
|
# Pattern for non-GCP clouds - only allows 'disabled' property
|
1195
|
-
f'^({
|
1215
|
+
f'^({not_supported_cloud_regex})$': {
|
1196
1216
|
'type': 'object',
|
1197
1217
|
'additionalProperties': False,
|
1198
1218
|
'properties': {
|
@@ -1217,6 +1237,38 @@ def get_config_schema():
|
|
1217
1237
|
},
|
1218
1238
|
'additionalProperties': False,
|
1219
1239
|
},
|
1240
|
+
'ssh': {
|
1241
|
+
'type': 'object',
|
1242
|
+
'required': [],
|
1243
|
+
'properties': {
|
1244
|
+
'allowed_node_pools': {
|
1245
|
+
'type': 'array',
|
1246
|
+
'items': {
|
1247
|
+
'type': 'string',
|
1248
|
+
},
|
1249
|
+
},
|
1250
|
+
'disabled': {
|
1251
|
+
'type': 'boolean'
|
1252
|
+
},
|
1253
|
+
},
|
1254
|
+
'additionalProperties': False,
|
1255
|
+
},
|
1256
|
+
'kubernetes': {
|
1257
|
+
'type': 'object',
|
1258
|
+
'required': [],
|
1259
|
+
'properties': {
|
1260
|
+
'allowed_contexts': {
|
1261
|
+
'type': 'array',
|
1262
|
+
'items': {
|
1263
|
+
'type': 'string',
|
1264
|
+
},
|
1265
|
+
},
|
1266
|
+
'disabled': {
|
1267
|
+
'type': 'boolean'
|
1268
|
+
},
|
1269
|
+
},
|
1270
|
+
'additionalProperties': False,
|
1271
|
+
},
|
1220
1272
|
},
|
1221
1273
|
},
|
1222
1274
|
}
|
File without changes
|
sky/workspaces/core.py
ADDED
@@ -0,0 +1,431 @@
|
|
1
|
+
"""Workspace management core."""
|
2
|
+
|
3
|
+
import concurrent.futures
|
4
|
+
from typing import Any, Callable, Dict
|
5
|
+
|
6
|
+
import filelock
|
7
|
+
|
8
|
+
from sky import check as sky_check
|
9
|
+
from sky import exceptions
|
10
|
+
from sky import global_user_state
|
11
|
+
from sky import sky_logging
|
12
|
+
from sky import skypilot_config
|
13
|
+
from sky.skylet import constants
|
14
|
+
from sky.usage import usage_lib
|
15
|
+
from sky.utils import common_utils
|
16
|
+
from sky.utils import config_utils
|
17
|
+
from sky.utils import schemas
|
18
|
+
|
19
|
+
logger = sky_logging.init_logger(__name__)
|
20
|
+
|
21
|
+
# Lock for workspace configuration updates to prevent race conditions
|
22
|
+
_WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS = 60
|
23
|
+
|
24
|
+
# =========================
|
25
|
+
# = Workspace Management =
|
26
|
+
# =========================
|
27
|
+
|
28
|
+
|
29
|
+
def get_workspaces() -> Dict[str, Any]:
|
30
|
+
"""Returns the workspace config."""
|
31
|
+
workspaces = skypilot_config.get_nested(('workspaces',), default_value={})
|
32
|
+
if constants.SKYPILOT_DEFAULT_WORKSPACE not in workspaces:
|
33
|
+
workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
|
34
|
+
return workspaces
|
35
|
+
|
36
|
+
|
37
|
+
def _update_workspaces_config(
|
38
|
+
workspace_modifier_fn: Callable[[Dict[str, Any]],
|
39
|
+
None]) -> Dict[str, Any]:
|
40
|
+
"""Update the workspaces configuration in the config file.
|
41
|
+
|
42
|
+
This function uses file locking to prevent race conditions when multiple
|
43
|
+
processes try to update the workspace configuration simultaneously.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
workspace_modifier_fn: A function that takes the current workspaces
|
47
|
+
dict and modifies it in-place. This ensures all read-modify-write
|
48
|
+
operations happen atomically inside the lock.
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
The updated workspaces configuration.
|
52
|
+
"""
|
53
|
+
lock_path = skypilot_config.get_skypilot_config_lock_path()
|
54
|
+
try:
|
55
|
+
with filelock.FileLock(lock_path,
|
56
|
+
_WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
|
57
|
+
# Read the current config inside the lock to ensure we have
|
58
|
+
# the latest state
|
59
|
+
current_config = skypilot_config.to_dict()
|
60
|
+
current_workspaces = current_config.get('workspaces', {}).copy()
|
61
|
+
|
62
|
+
# Apply the modification inside the lock
|
63
|
+
workspace_modifier_fn(current_workspaces)
|
64
|
+
|
65
|
+
# Update the config with the modified workspaces
|
66
|
+
current_config['workspaces'] = current_workspaces
|
67
|
+
|
68
|
+
# Write the configuration back to the file
|
69
|
+
skypilot_config.update_config_no_lock(current_config)
|
70
|
+
|
71
|
+
return current_workspaces
|
72
|
+
except filelock.Timeout as e:
|
73
|
+
raise RuntimeError(
|
74
|
+
f'Failed to update workspace configuration due to a timeout '
|
75
|
+
f'when trying to acquire the lock at {lock_path}. This may '
|
76
|
+
'indicate another SkyPilot process is currently updating the '
|
77
|
+
'configuration. Please try again or manually remove the lock '
|
78
|
+
f'file if you believe it is stale.') from e
|
79
|
+
|
80
|
+
|
81
|
+
def _check_workspace_has_no_active_resources(workspace_name: str,
|
82
|
+
operation: str) -> None:
|
83
|
+
"""Check if a workspace has active clusters or managed jobs.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
workspace_name: The name of the workspace to check.
|
87
|
+
operation: The operation being performed ('update' or 'delete').
|
88
|
+
|
89
|
+
Raises:
|
90
|
+
ValueError: If the workspace has active clusters or managed jobs.
|
91
|
+
"""
|
92
|
+
_check_workspaces_have_no_active_resources([(workspace_name, operation)])
|
93
|
+
|
94
|
+
|
95
|
+
def _check_workspaces_have_no_active_resources(
|
96
|
+
workspace_operations: list) -> None:
|
97
|
+
"""Check if workspaces have active clusters or managed jobs.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
workspace_operations: List of tuples (workspace_name, operation) where
|
101
|
+
operation is 'update' or 'delete'.
|
102
|
+
|
103
|
+
Raises:
|
104
|
+
ValueError: If any workspace has active clusters or managed jobs.
|
105
|
+
The error message will include all workspaces with issues.
|
106
|
+
"""
|
107
|
+
if not workspace_operations:
|
108
|
+
return
|
109
|
+
|
110
|
+
def get_all_clusters():
|
111
|
+
return global_user_state.get_clusters()
|
112
|
+
|
113
|
+
def get_all_managed_jobs():
|
114
|
+
# pylint: disable=import-outside-toplevel
|
115
|
+
from sky.jobs.server import core as managed_jobs_core
|
116
|
+
try:
|
117
|
+
return managed_jobs_core.queue(refresh=False,
|
118
|
+
skip_finished=True,
|
119
|
+
all_users=True)
|
120
|
+
except exceptions.ClusterNotUpError:
|
121
|
+
logger.warning('All jobs should be finished in workspace.')
|
122
|
+
return []
|
123
|
+
|
124
|
+
# Fetch both clusters and jobs in parallel
|
125
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
126
|
+
clusters_future = executor.submit(get_all_clusters)
|
127
|
+
jobs_future = executor.submit(get_all_managed_jobs)
|
128
|
+
|
129
|
+
all_clusters = clusters_future.result()
|
130
|
+
all_managed_jobs = jobs_future.result()
|
131
|
+
|
132
|
+
# Collect all error messages instead of raising immediately
|
133
|
+
error_messages = []
|
134
|
+
|
135
|
+
# Check each workspace against the fetched data
|
136
|
+
for workspace_name, operation in workspace_operations:
|
137
|
+
# Filter clusters for this workspace
|
138
|
+
workspace_clusters = [
|
139
|
+
cluster for cluster in all_clusters
|
140
|
+
if (cluster.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE)
|
141
|
+
== workspace_name)
|
142
|
+
]
|
143
|
+
|
144
|
+
# Filter managed jobs for this workspace
|
145
|
+
workspace_active_jobs = [
|
146
|
+
job for job in all_managed_jobs
|
147
|
+
if job.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) ==
|
148
|
+
workspace_name
|
149
|
+
]
|
150
|
+
|
151
|
+
# Collect error messages for this workspace
|
152
|
+
workspace_errors = []
|
153
|
+
|
154
|
+
if workspace_clusters:
|
155
|
+
active_cluster_names = [
|
156
|
+
cluster['name'] for cluster in workspace_clusters
|
157
|
+
]
|
158
|
+
cluster_list = ', '.join(active_cluster_names)
|
159
|
+
workspace_errors.append(
|
160
|
+
f'{len(workspace_clusters)} active cluster(s): {cluster_list}')
|
161
|
+
|
162
|
+
if workspace_active_jobs:
|
163
|
+
job_names = [job['job_id'] for job in workspace_active_jobs]
|
164
|
+
job_list = ', '.join(job_names)
|
165
|
+
workspace_errors.append(
|
166
|
+
f'{len(workspace_active_jobs)} active managed job(s): '
|
167
|
+
f'{job_list}')
|
168
|
+
|
169
|
+
# If this workspace has issues, add to overall error messages
|
170
|
+
if workspace_errors:
|
171
|
+
workspace_error_summary = ' and '.join(workspace_errors)
|
172
|
+
error_messages.append(
|
173
|
+
f'Cannot {operation} workspace {workspace_name!r} because it '
|
174
|
+
f'has {workspace_error_summary}.')
|
175
|
+
|
176
|
+
# If we collected any errors, raise them all together
|
177
|
+
if error_messages:
|
178
|
+
if len(error_messages) == 1:
|
179
|
+
# Single workspace error
|
180
|
+
full_message = error_messages[
|
181
|
+
0] + ' Please terminate these resources first.'
|
182
|
+
else:
|
183
|
+
# Multiple workspace errors
|
184
|
+
full_message = (f'Cannot proceed due to active resources in '
|
185
|
+
f'{len(error_messages)} workspace(s):\n' +
|
186
|
+
'\n'.join(f'• {msg}' for msg in error_messages) +
|
187
|
+
'\nPlease terminate these resources first.')
|
188
|
+
raise ValueError(full_message)
|
189
|
+
|
190
|
+
|
191
|
+
def _validate_workspace_config(workspace_name: str,
|
192
|
+
workspace_config: Dict[str, Any]) -> None:
|
193
|
+
"""Validate the workspace configuration.
|
194
|
+
"""
|
195
|
+
workspace_schema = schemas.get_config_schema(
|
196
|
+
)['properties']['workspaces']['additionalProperties']
|
197
|
+
try:
|
198
|
+
common_utils.validate_schema(
|
199
|
+
workspace_config, workspace_schema,
|
200
|
+
f'Invalid configuration for workspace {workspace_name!r}: ')
|
201
|
+
except exceptions.InvalidSkyPilotConfigError as e:
|
202
|
+
# We need to replace this exception with a ValueError because: a) it is
|
203
|
+
# more user-friendly and b) it will not be caught by the try-except by
|
204
|
+
# the caller the may cause confusion.
|
205
|
+
raise ValueError(str(e)) from e
|
206
|
+
|
207
|
+
|
208
|
+
@usage_lib.entrypoint
|
209
|
+
def update_workspace(workspace_name: str, config: Dict[str,
|
210
|
+
Any]) -> Dict[str, Any]:
|
211
|
+
"""Updates a specific workspace configuration.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
workspace_name: The name of the workspace to update.
|
215
|
+
config: The new configuration for the workspace.
|
216
|
+
|
217
|
+
Returns:
|
218
|
+
The updated workspaces configuration.
|
219
|
+
|
220
|
+
Raises:
|
221
|
+
ValueError: If the workspace configuration is invalid, or if there are
|
222
|
+
active clusters or managed jobs in the workspace.
|
223
|
+
FileNotFoundError: If the config file cannot be found.
|
224
|
+
PermissionError: If the config file cannot be written.
|
225
|
+
"""
|
226
|
+
# Check for active clusters and managed jobs in the workspace
|
227
|
+
_check_workspace_has_no_active_resources(workspace_name, 'update')
|
228
|
+
|
229
|
+
_validate_workspace_config(workspace_name, config)
|
230
|
+
|
231
|
+
def update_workspace_fn(workspaces: Dict[str, Any]) -> None:
|
232
|
+
"""Function to update workspace inside the lock."""
|
233
|
+
workspaces[workspace_name] = config
|
234
|
+
|
235
|
+
# Use the internal helper function to save
|
236
|
+
result = _update_workspaces_config(update_workspace_fn)
|
237
|
+
|
238
|
+
# Validate the workspace by running sky check for it
|
239
|
+
try:
|
240
|
+
sky_check.check(quiet=True, workspace=workspace_name)
|
241
|
+
except Exception as e: # pylint: disable=broad-except
|
242
|
+
logger.warning(f'Workspace {workspace_name} configuration saved but '
|
243
|
+
f'validation check failed: {e}')
|
244
|
+
# Don't fail the update if the check fails, just warn
|
245
|
+
|
246
|
+
return result
|
247
|
+
|
248
|
+
|
249
|
+
@usage_lib.entrypoint
|
250
|
+
def create_workspace(workspace_name: str, config: Dict[str,
|
251
|
+
Any]) -> Dict[str, Any]:
|
252
|
+
"""Creates a new workspace configuration.
|
253
|
+
|
254
|
+
Args:
|
255
|
+
workspace_name: The name of the workspace to create.
|
256
|
+
config: The configuration for the new workspace.
|
257
|
+
|
258
|
+
Returns:
|
259
|
+
The updated workspaces configuration.
|
260
|
+
|
261
|
+
Raises:
|
262
|
+
ValueError: If the workspace already exists or configuration is invalid.
|
263
|
+
FileNotFoundError: If the config file cannot be found.
|
264
|
+
PermissionError: If the config file cannot be written.
|
265
|
+
"""
|
266
|
+
# Validate the workspace name
|
267
|
+
if not workspace_name or not isinstance(workspace_name, str):
|
268
|
+
raise ValueError('Workspace name must be a non-empty string.')
|
269
|
+
|
270
|
+
_validate_workspace_config(workspace_name, config)
|
271
|
+
|
272
|
+
def create_workspace_fn(workspaces: Dict[str, Any]) -> None:
|
273
|
+
"""Function to create workspace inside the lock."""
|
274
|
+
if workspace_name in workspaces:
|
275
|
+
raise ValueError(f'Workspace {workspace_name!r} already exists. '
|
276
|
+
'Use update instead.')
|
277
|
+
workspaces[workspace_name] = config
|
278
|
+
|
279
|
+
# Use the internal helper function to save
|
280
|
+
result = _update_workspaces_config(create_workspace_fn)
|
281
|
+
|
282
|
+
# Validate the workspace by running sky check for it
|
283
|
+
try:
|
284
|
+
sky_check.check(quiet=True, workspace=workspace_name)
|
285
|
+
except Exception as e: # pylint: disable=broad-except
|
286
|
+
logger.warning(f'Workspace {workspace_name} configuration saved but '
|
287
|
+
f'validation check failed: {e}')
|
288
|
+
# Don't fail the update if the check fails, just warn
|
289
|
+
|
290
|
+
return result
|
291
|
+
|
292
|
+
|
293
|
+
@usage_lib.entrypoint
|
294
|
+
def delete_workspace(workspace_name: str) -> Dict[str, Any]:
|
295
|
+
"""Deletes a workspace configuration.
|
296
|
+
|
297
|
+
Args:
|
298
|
+
workspace_name: The name of the workspace to delete.
|
299
|
+
|
300
|
+
Returns:
|
301
|
+
The updated workspaces configuration.
|
302
|
+
|
303
|
+
Raises:
|
304
|
+
ValueError: If the workspace doesn't exist, is the default workspace,
|
305
|
+
or has active clusters or managed jobs.
|
306
|
+
FileNotFoundError: If the config file cannot be found.
|
307
|
+
PermissionError: If the config file cannot be written.
|
308
|
+
"""
|
309
|
+
# Prevent deletion of default workspace
|
310
|
+
if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
|
311
|
+
raise ValueError(f'Cannot delete the default workspace '
|
312
|
+
f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
|
313
|
+
|
314
|
+
# Check if workspace exists
|
315
|
+
current_workspaces = get_workspaces()
|
316
|
+
if workspace_name not in current_workspaces:
|
317
|
+
raise ValueError(f'Workspace {workspace_name!r} does not exist.')
|
318
|
+
|
319
|
+
# Check for active clusters and managed jobs in the workspace
|
320
|
+
_check_workspace_has_no_active_resources(workspace_name, 'delete')
|
321
|
+
|
322
|
+
def delete_workspace_fn(workspaces: Dict[str, Any]) -> None:
|
323
|
+
"""Function to delete workspace inside the lock."""
|
324
|
+
if workspace_name not in workspaces:
|
325
|
+
raise ValueError(f'Workspace {workspace_name!r} does not exist.')
|
326
|
+
del workspaces[workspace_name]
|
327
|
+
|
328
|
+
# Use the internal helper function to save
|
329
|
+
return _update_workspaces_config(delete_workspace_fn)
|
330
|
+
|
331
|
+
|
332
|
+
# =========================
|
333
|
+
# = Config Management =
|
334
|
+
# =========================
|
335
|
+
|
336
|
+
|
337
|
+
@usage_lib.entrypoint
|
338
|
+
def get_config() -> Dict[str, Any]:
|
339
|
+
"""Returns the entire SkyPilot configuration.
|
340
|
+
|
341
|
+
Returns:
|
342
|
+
The complete SkyPilot configuration as a dictionary.
|
343
|
+
"""
|
344
|
+
return skypilot_config.to_dict()
|
345
|
+
|
346
|
+
|
347
|
+
@usage_lib.entrypoint
|
348
|
+
def update_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
349
|
+
"""Updates the entire SkyPilot configuration.
|
350
|
+
|
351
|
+
Args:
|
352
|
+
config: The new configuration to save.
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
The updated configuration.
|
356
|
+
|
357
|
+
Raises:
|
358
|
+
ValueError: If the configuration is invalid, or if there are
|
359
|
+
active clusters or managed jobs in workspaces being modified.
|
360
|
+
FileNotFoundError: If the config file cannot be found.
|
361
|
+
PermissionError: If the config file cannot be written.
|
362
|
+
"""
|
363
|
+
# Validate the configuration using the schema
|
364
|
+
try:
|
365
|
+
common_utils.validate_schema(config, schemas.get_config_schema(),
|
366
|
+
'Invalid SkyPilot configuration: ')
|
367
|
+
except exceptions.InvalidSkyPilotConfigError as e:
|
368
|
+
raise ValueError(str(e)) from e
|
369
|
+
|
370
|
+
# Check for API server changes and validate them
|
371
|
+
current_config = skypilot_config.to_dict()
|
372
|
+
|
373
|
+
current_endpoint = current_config.get('api_server', {}).get('endpoint')
|
374
|
+
new_endpoint = config.get('api_server', {}).get('endpoint')
|
375
|
+
if current_endpoint != new_endpoint:
|
376
|
+
raise ValueError('API server endpoint should not be changed to avoid '
|
377
|
+
'unexpected behavior.')
|
378
|
+
|
379
|
+
# Check for workspace changes and validate them
|
380
|
+
current_workspaces = current_config.get('workspaces', {})
|
381
|
+
new_workspaces = config.get('workspaces', {})
|
382
|
+
|
383
|
+
# Collect all workspaces that need to be checked for active resources
|
384
|
+
workspaces_to_check = []
|
385
|
+
|
386
|
+
# Check each workspace that is being modified
|
387
|
+
for workspace_name, new_workspace_config in new_workspaces.items():
|
388
|
+
current_workspace_config = current_workspaces.get(workspace_name, {})
|
389
|
+
|
390
|
+
# If workspace configuration is changing, validate and mark for checking
|
391
|
+
if current_workspace_config != new_workspace_config:
|
392
|
+
_validate_workspace_config(workspace_name, new_workspace_config)
|
393
|
+
workspaces_to_check.append((workspace_name, 'update'))
|
394
|
+
|
395
|
+
# Check for workspace deletions
|
396
|
+
for workspace_name in current_workspaces:
|
397
|
+
if workspace_name not in new_workspaces:
|
398
|
+
# Workspace is being deleted
|
399
|
+
if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
|
400
|
+
raise ValueError(f'Cannot delete the default workspace '
|
401
|
+
f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
|
402
|
+
workspaces_to_check.append((workspace_name, 'delete'))
|
403
|
+
|
404
|
+
# Check all workspaces for active resources in one efficient call
|
405
|
+
_check_workspaces_have_no_active_resources(workspaces_to_check)
|
406
|
+
|
407
|
+
# Use file locking to prevent race conditions
|
408
|
+
lock_path = skypilot_config.get_skypilot_config_lock_path()
|
409
|
+
try:
|
410
|
+
with filelock.FileLock(lock_path,
|
411
|
+
_WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
|
412
|
+
# Convert to config_utils.Config and save
|
413
|
+
config_obj = config_utils.Config.from_dict(config)
|
414
|
+
skypilot_config.update_config_no_lock(config_obj)
|
415
|
+
except filelock.Timeout as e:
|
416
|
+
raise RuntimeError(
|
417
|
+
f'Failed to update configuration due to a timeout '
|
418
|
+
f'when trying to acquire the lock at {lock_path}. This may '
|
419
|
+
'indicate another SkyPilot process is currently updating the '
|
420
|
+
'configuration. Please try again or manually remove the lock '
|
421
|
+
f'file if you believe it is stale.') from e
|
422
|
+
|
423
|
+
# Validate the configuration by running sky check
|
424
|
+
try:
|
425
|
+
sky_check.check(quiet=True)
|
426
|
+
except Exception as e: # pylint: disable=broad-except
|
427
|
+
logger.warning(f'Configuration saved but '
|
428
|
+
f'validation check failed: {e}')
|
429
|
+
# Don't fail the update if the check fails, just warn
|
430
|
+
|
431
|
+
return config
|
sky/workspaces/server.py
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
"""REST API for workspace management."""
|
2
|
+
|
3
|
+
import fastapi
|
4
|
+
|
5
|
+
from sky.server.requests import executor
|
6
|
+
from sky.server.requests import payloads
|
7
|
+
from sky.server.requests import requests as api_requests
|
8
|
+
from sky.workspaces import core
|
9
|
+
|
10
|
+
router = fastapi.APIRouter()
|
11
|
+
|
12
|
+
|
13
|
+
@router.get('')
|
14
|
+
# pylint: disable=redefined-builtin
|
15
|
+
async def get(request: fastapi.Request) -> None:
|
16
|
+
"""Gets workspace config on the server."""
|
17
|
+
executor.schedule_request(
|
18
|
+
request_id=request.state.request_id,
|
19
|
+
request_name='workspaces.get',
|
20
|
+
request_body=payloads.RequestBody(),
|
21
|
+
func=core.get_workspaces,
|
22
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
@router.post('/update')
|
27
|
+
async def update(request: fastapi.Request,
|
28
|
+
update_workspace_body: payloads.UpdateWorkspaceBody) -> None:
|
29
|
+
"""Updates a specific workspace configuration."""
|
30
|
+
executor.schedule_request(
|
31
|
+
request_id=request.state.request_id,
|
32
|
+
request_name='workspaces.update',
|
33
|
+
request_body=update_workspace_body,
|
34
|
+
func=core.update_workspace,
|
35
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
@router.post('/create')
|
40
|
+
async def create(request: fastapi.Request,
|
41
|
+
create_workspace_body: payloads.CreateWorkspaceBody) -> None:
|
42
|
+
"""Creates a new workspace configuration."""
|
43
|
+
executor.schedule_request(
|
44
|
+
request_id=request.state.request_id,
|
45
|
+
request_name='workspaces.create',
|
46
|
+
request_body=create_workspace_body,
|
47
|
+
func=core.create_workspace,
|
48
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
@router.post('/delete')
|
53
|
+
async def delete(request: fastapi.Request,
|
54
|
+
delete_workspace_body: payloads.DeleteWorkspaceBody) -> None:
|
55
|
+
"""Deletes a workspace configuration."""
|
56
|
+
executor.schedule_request(
|
57
|
+
request_id=request.state.request_id,
|
58
|
+
request_name='workspaces.delete',
|
59
|
+
request_body=delete_workspace_body,
|
60
|
+
func=core.delete_workspace,
|
61
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
62
|
+
)
|
63
|
+
|
64
|
+
|
65
|
+
@router.get('/config')
|
66
|
+
async def get_config(request: fastapi.Request) -> None:
|
67
|
+
"""Gets the entire SkyPilot configuration."""
|
68
|
+
executor.schedule_request(
|
69
|
+
request_id=request.state.request_id,
|
70
|
+
request_name='workspaces.get_config',
|
71
|
+
request_body=payloads.GetConfigBody(),
|
72
|
+
func=core.get_config,
|
73
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
74
|
+
)
|
75
|
+
|
76
|
+
|
77
|
+
@router.post('/config')
|
78
|
+
async def update_config(request: fastapi.Request,
|
79
|
+
update_config_body: payloads.UpdateConfigBody) -> None:
|
80
|
+
"""Updates the entire SkyPilot configuration."""
|
81
|
+
executor.schedule_request(
|
82
|
+
request_id=request.state.request_id,
|
83
|
+
request_name='workspaces.update_config',
|
84
|
+
request_body=update_config_body,
|
85
|
+
func=core.update_config,
|
86
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
87
|
+
)
|