skypilot-nightly 1.0.0.dev20250909__py3-none-any.whl → 1.0.0.dev20250912__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/authentication.py +19 -4
- sky/backends/backend_utils.py +160 -23
- sky/backends/cloud_vm_ray_backend.py +226 -74
- sky/catalog/__init__.py +7 -0
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +18 -0
- sky/catalog/data_fetchers/fetch_aws.py +13 -1
- sky/client/cli/command.py +2 -71
- sky/client/sdk.py +20 -0
- sky/client/sdk_async.py +23 -18
- sky/clouds/aws.py +26 -6
- sky/clouds/cloud.py +8 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/3294.ba6586f9755b0edb.js +6 -0
- sky/dashboard/out/_next/static/chunks/{webpack-d4fabc08788e14af.js → webpack-e8a0c4c3c6f408fb.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage.py +5 -1
- sky/execution.py +21 -14
- sky/global_user_state.py +34 -0
- sky/jobs/client/sdk_async.py +4 -2
- sky/jobs/constants.py +3 -0
- sky/jobs/controller.py +734 -310
- sky/jobs/recovery_strategy.py +251 -129
- sky/jobs/scheduler.py +247 -174
- sky/jobs/server/core.py +20 -4
- sky/jobs/server/utils.py +2 -2
- sky/jobs/state.py +709 -508
- sky/jobs/utils.py +90 -40
- sky/logs/agent.py +10 -2
- sky/provision/aws/config.py +4 -1
- sky/provision/gcp/config.py +6 -1
- sky/provision/kubernetes/config.py +7 -2
- sky/provision/kubernetes/instance.py +84 -41
- sky/provision/kubernetes/utils.py +17 -8
- sky/provision/provisioner.py +1 -0
- sky/provision/vast/instance.py +1 -1
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/serve/replica_managers.py +0 -7
- sky/serve/serve_utils.py +5 -0
- sky/serve/server/impl.py +1 -2
- sky/serve/service.py +0 -2
- sky/server/common.py +8 -3
- sky/server/config.py +55 -27
- sky/server/constants.py +1 -0
- sky/server/daemons.py +7 -11
- sky/server/metrics.py +41 -8
- sky/server/requests/executor.py +41 -4
- sky/server/requests/serializers/encoders.py +1 -1
- sky/server/server.py +9 -1
- sky/server/uvicorn.py +11 -5
- sky/setup_files/dependencies.py +4 -2
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/constants.py +14 -7
- sky/skylet/events.py +2 -10
- sky/skylet/log_lib.py +11 -0
- sky/skylet/log_lib.pyi +9 -0
- sky/task.py +62 -0
- sky/templates/kubernetes-ray.yml.j2 +120 -3
- sky/utils/accelerator_registry.py +3 -1
- sky/utils/command_runner.py +35 -11
- sky/utils/command_runner.pyi +25 -3
- sky/utils/common_utils.py +11 -1
- sky/utils/context_utils.py +15 -2
- sky/utils/controller_utils.py +5 -0
- sky/utils/db/db_utils.py +31 -2
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/git.py +559 -1
- sky/utils/resource_checker.py +8 -7
- sky/utils/rich_utils.py +3 -1
- sky/utils/subprocess_utils.py +9 -0
- sky/volumes/volume.py +2 -0
- sky/workspaces/core.py +57 -21
- {skypilot_nightly-1.0.0.dev20250909.dist-info → skypilot_nightly-1.0.0.dev20250912.dist-info}/METADATA +38 -36
- {skypilot_nightly-1.0.0.dev20250909.dist-info → skypilot_nightly-1.0.0.dev20250912.dist-info}/RECORD +95 -95
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- /sky/dashboard/out/_next/static/{eWytLgin5zvayQw3Xk46m → DAiq7V2xJnO1LSfmunZl6}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{eWytLgin5zvayQw3Xk46m → DAiq7V2xJnO1LSfmunZl6}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250909.dist-info → skypilot_nightly-1.0.0.dev20250912.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250909.dist-info → skypilot_nightly-1.0.0.dev20250912.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250909.dist-info → skypilot_nightly-1.0.0.dev20250912.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250909.dist-info → skypilot_nightly-1.0.0.dev20250912.dist-info}/top_level.txt +0 -0
sky/workspaces/core.py
CHANGED
|
@@ -14,6 +14,7 @@ from sky.backends import backend_utils
|
|
|
14
14
|
from sky.skylet import constants
|
|
15
15
|
from sky.usage import usage_lib
|
|
16
16
|
from sky.users import permission
|
|
17
|
+
from sky.users import rbac
|
|
17
18
|
from sky.utils import annotations
|
|
18
19
|
from sky.utils import common_utils
|
|
19
20
|
from sky.utils import config_utils
|
|
@@ -147,11 +148,15 @@ def _compare_workspace_configs(
|
|
|
147
148
|
private_new = new_config.get('private', False)
|
|
148
149
|
private_changed = private_old != private_new
|
|
149
150
|
|
|
151
|
+
admin_user_ids = permission.permission_service.get_users_for_role(
|
|
152
|
+
rbac.RoleName.ADMIN.value)
|
|
150
153
|
# Get allowed users (resolve to user IDs for comparison)
|
|
151
154
|
allowed_users_old = workspaces_utils.get_workspace_users(
|
|
152
155
|
current_config) if private_old else []
|
|
156
|
+
allowed_users_old += admin_user_ids
|
|
153
157
|
allowed_users_new = workspaces_utils.get_workspace_users(
|
|
154
158
|
new_config) if private_new else []
|
|
159
|
+
allowed_users_new += admin_user_ids
|
|
155
160
|
|
|
156
161
|
# Convert to sets for easier comparison
|
|
157
162
|
old_users_set = set(allowed_users_old)
|
|
@@ -188,6 +193,24 @@ def _compare_workspace_configs(
|
|
|
188
193
|
added_users=added_users)
|
|
189
194
|
|
|
190
195
|
|
|
196
|
+
def _validate_workspace_config_changes_with_lock(
|
|
197
|
+
workspace_name: str, current_config: Dict[str, Any],
|
|
198
|
+
new_config: Dict[str, Any]) -> None:
|
|
199
|
+
lock_id = backend_utils.workspace_lock_id(workspace_name)
|
|
200
|
+
lock_timeout = backend_utils.WORKSPACE_LOCK_TIMEOUT_SECONDS
|
|
201
|
+
try:
|
|
202
|
+
with locks.get_lock(lock_id, lock_timeout):
|
|
203
|
+
# Validate the configuration changes based on active resources
|
|
204
|
+
_validate_workspace_config_changes(workspace_name, current_config,
|
|
205
|
+
new_config)
|
|
206
|
+
except locks.LockTimeout as e:
|
|
207
|
+
raise RuntimeError(
|
|
208
|
+
f'Failed to validate workspace {workspace_name!r} due to '
|
|
209
|
+
'a timeout when trying to access database. Please '
|
|
210
|
+
f'try again or manually remove the lock at {lock_id}. '
|
|
211
|
+
f'{common_utils.format_exception(e)}') from None
|
|
212
|
+
|
|
213
|
+
|
|
191
214
|
def _validate_workspace_config_changes(workspace_name: str,
|
|
192
215
|
current_config: Dict[str, Any],
|
|
193
216
|
new_config: Dict[str, Any]) -> None:
|
|
@@ -232,7 +255,7 @@ def _validate_workspace_config_changes(workspace_name: str,
|
|
|
232
255
|
f' private. Checking that all active resources belong'
|
|
233
256
|
f' to allowed users.')
|
|
234
257
|
|
|
235
|
-
error_summary, missed_users_names = (
|
|
258
|
+
error_summary, missed_users_names, _ = (
|
|
236
259
|
resource_checker.check_users_workspaces_active_resources(
|
|
237
260
|
config_comparison.allowed_users_new, [workspace_name]))
|
|
238
261
|
if error_summary:
|
|
@@ -259,11 +282,35 @@ def _validate_workspace_config_changes(workspace_name: str,
|
|
|
259
282
|
f'Checking that removed users'
|
|
260
283
|
f' {config_comparison.removed_users} do not have'
|
|
261
284
|
f' active resources in workspace {workspace_name!r}.')
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
285
|
+
error_summary, missed_users_names, missed_user_dict = (
|
|
286
|
+
resource_checker.check_users_workspaces_active_resources(
|
|
287
|
+
config_comparison.allowed_users_new, [workspace_name]))
|
|
288
|
+
if error_summary:
|
|
289
|
+
error_user_ids = []
|
|
290
|
+
for user_id in config_comparison.removed_users:
|
|
291
|
+
if user_id in missed_user_dict:
|
|
292
|
+
error_user_ids.append(user_id)
|
|
293
|
+
error_user_names = []
|
|
294
|
+
if error_user_ids:
|
|
295
|
+
error_user_names = [
|
|
296
|
+
missed_user_dict[user_id]
|
|
297
|
+
for user_id in error_user_ids
|
|
298
|
+
]
|
|
299
|
+
|
|
300
|
+
error_msg = 'Cannot '
|
|
301
|
+
error_users_list = ', '.join(error_user_names)
|
|
302
|
+
if len(error_user_names) == 1:
|
|
303
|
+
error_msg += f'remove user {error_users_list!r} ' \
|
|
304
|
+
f'from workspace {workspace_name!r} because the ' \
|
|
305
|
+
f'user has {error_summary}'
|
|
306
|
+
else:
|
|
307
|
+
error_msg += f'remove users {error_users_list!r}' \
|
|
308
|
+
f' from workspace {workspace_name!r} because the' \
|
|
309
|
+
f' users have {error_summary}'
|
|
310
|
+
error_msg += ', but not in the allowed_users list.' \
|
|
311
|
+
' Please either add the users to allowed_users or' \
|
|
312
|
+
' ask them to terminate their resources.'
|
|
313
|
+
raise ValueError(error_msg)
|
|
267
314
|
else:
|
|
268
315
|
# Other configuration changes - check that workspace has no active
|
|
269
316
|
# resources
|
|
@@ -310,20 +357,8 @@ def update_workspace(workspace_name: str, config: Dict[str,
|
|
|
310
357
|
default_value={})
|
|
311
358
|
current_config = current_workspaces.get(workspace_name, {})
|
|
312
359
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
lock_timeout = backend_utils.WORKSPACE_LOCK_TIMEOUT_SECONDS
|
|
316
|
-
try:
|
|
317
|
-
with locks.get_lock(lock_id, lock_timeout):
|
|
318
|
-
# Validate the configuration changes based on active resources
|
|
319
|
-
_validate_workspace_config_changes(workspace_name,
|
|
320
|
-
current_config, config)
|
|
321
|
-
except locks.LockTimeout as e:
|
|
322
|
-
raise RuntimeError(
|
|
323
|
-
f'Failed to validate workspace {workspace_name!r} due to '
|
|
324
|
-
'a timeout when trying to access database. Please '
|
|
325
|
-
f'try again or manually remove the lock at {lock_id}. '
|
|
326
|
-
f'{common_utils.format_exception(e)}') from None
|
|
360
|
+
_validate_workspace_config_changes_with_lock(workspace_name, current_config,
|
|
361
|
+
config)
|
|
327
362
|
|
|
328
363
|
def update_workspace_fn(workspaces: Dict[str, Any]) -> None:
|
|
329
364
|
"""Function to update workspace inside the lock."""
|
|
@@ -510,7 +545,8 @@ def update_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
510
545
|
# If workspace configuration is changing, validate and mark for checking
|
|
511
546
|
if current_workspace_config != new_workspace_config:
|
|
512
547
|
_validate_workspace_config(workspace_name, new_workspace_config)
|
|
513
|
-
|
|
548
|
+
_validate_workspace_config_changes_with_lock(
|
|
549
|
+
workspace_name, current_workspace_config, new_workspace_config)
|
|
514
550
|
users = workspaces_utils.get_workspace_users(new_workspace_config)
|
|
515
551
|
workspaces_to_check_policy['update'][workspace_name] = users
|
|
516
552
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: skypilot-nightly
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev20250912
|
|
4
4
|
Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
|
|
5
5
|
Author: SkyPilot Team
|
|
6
6
|
License: Apache 2.0
|
|
@@ -53,6 +53,8 @@ Requires-Dist: httpx
|
|
|
53
53
|
Requires-Dist: setproctitle
|
|
54
54
|
Requires-Dist: sqlalchemy
|
|
55
55
|
Requires-Dist: psycopg2-binary
|
|
56
|
+
Requires-Dist: aiosqlite
|
|
57
|
+
Requires-Dist: asyncpg
|
|
56
58
|
Requires-Dist: casbin
|
|
57
59
|
Requires-Dist: sqlalchemy_adapter
|
|
58
60
|
Requires-Dist: prometheus_client>=0.8.0
|
|
@@ -88,10 +90,10 @@ Requires-Dist: ibm-cloud-sdk-core; extra == "ibm"
|
|
|
88
90
|
Requires-Dist: ibm-vpc; extra == "ibm"
|
|
89
91
|
Requires-Dist: ibm-platform-services>=0.48.0; extra == "ibm"
|
|
90
92
|
Requires-Dist: ibm-cos-sdk; extra == "ibm"
|
|
91
|
-
Requires-Dist: ray[default]
|
|
93
|
+
Requires-Dist: ray[default]>=2.6.1; extra == "ibm"
|
|
92
94
|
Provides-Extra: docker
|
|
93
95
|
Requires-Dist: docker; extra == "docker"
|
|
94
|
-
Requires-Dist: ray[default]
|
|
96
|
+
Requires-Dist: ray[default]>=2.6.1; extra == "docker"
|
|
95
97
|
Provides-Extra: lambda
|
|
96
98
|
Provides-Extra: cloudflare
|
|
97
99
|
Requires-Dist: awscli>=1.27.10; extra == "cloudflare"
|
|
@@ -99,7 +101,7 @@ Requires-Dist: botocore>=1.29.10; extra == "cloudflare"
|
|
|
99
101
|
Requires-Dist: boto3>=1.26.1; extra == "cloudflare"
|
|
100
102
|
Requires-Dist: colorama<0.4.5; extra == "cloudflare"
|
|
101
103
|
Provides-Extra: scp
|
|
102
|
-
Requires-Dist: ray[default]
|
|
104
|
+
Requires-Dist: ray[default]>=2.6.1; extra == "scp"
|
|
103
105
|
Provides-Extra: oci
|
|
104
106
|
Requires-Dist: oci; extra == "oci"
|
|
105
107
|
Provides-Extra: kubernetes
|
|
@@ -145,48 +147,48 @@ Requires-Dist: grpcio>=1.63.0; extra == "server"
|
|
|
145
147
|
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
|
|
146
148
|
Requires-Dist: aiosqlite; extra == "server"
|
|
147
149
|
Provides-Extra: all
|
|
150
|
+
Requires-Dist: oci; extra == "all"
|
|
151
|
+
Requires-Dist: azure-identity>=1.19.0; extra == "all"
|
|
152
|
+
Requires-Dist: msgraph-sdk; extra == "all"
|
|
153
|
+
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
148
154
|
Requires-Dist: passlib; extra == "all"
|
|
149
|
-
Requires-Dist:
|
|
150
|
-
Requires-Dist:
|
|
155
|
+
Requires-Dist: cudo-compute>=0.1.10; extra == "all"
|
|
156
|
+
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
|
157
|
+
Requires-Dist: colorama<0.4.5; extra == "all"
|
|
151
158
|
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
152
|
-
Requires-Dist:
|
|
153
|
-
Requires-Dist:
|
|
159
|
+
Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
|
|
160
|
+
Requires-Dist: ibm-cloud-sdk-core; extra == "all"
|
|
161
|
+
Requires-Dist: casbin; extra == "all"
|
|
154
162
|
Requires-Dist: anyio; extra == "all"
|
|
155
|
-
Requires-Dist:
|
|
163
|
+
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
|
164
|
+
Requires-Dist: azure-cli>=2.65.0; extra == "all"
|
|
165
|
+
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
166
|
+
Requires-Dist: pydo>=0.3.0; extra == "all"
|
|
167
|
+
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
168
|
+
Requires-Dist: azure-core>=1.24.0; extra == "all"
|
|
169
|
+
Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
|
|
170
|
+
Requires-Dist: ray[default]>=2.6.1; extra == "all"
|
|
171
|
+
Requires-Dist: boto3>=1.26.1; extra == "all"
|
|
172
|
+
Requires-Dist: docker; extra == "all"
|
|
156
173
|
Requires-Dist: ibm-vpc; extra == "all"
|
|
157
|
-
Requires-Dist:
|
|
158
|
-
Requires-Dist:
|
|
159
|
-
Requires-Dist:
|
|
174
|
+
Requires-Dist: aiosqlite; extra == "all"
|
|
175
|
+
Requires-Dist: websockets; extra == "all"
|
|
176
|
+
Requires-Dist: azure-core>=1.31.0; extra == "all"
|
|
160
177
|
Requires-Dist: nebius>=0.2.47; extra == "all"
|
|
161
178
|
Requires-Dist: azure-common; extra == "all"
|
|
162
|
-
Requires-Dist: azure-core>=1.31.0; extra == "all"
|
|
163
|
-
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
|
164
|
-
Requires-Dist: pydo>=0.3.0; extra == "all"
|
|
165
|
-
Requires-Dist: aiosqlite; extra == "all"
|
|
166
179
|
Requires-Dist: google-cloud-storage; extra == "all"
|
|
167
|
-
Requires-Dist:
|
|
168
|
-
Requires-Dist:
|
|
169
|
-
Requires-Dist:
|
|
170
|
-
Requires-Dist:
|
|
171
|
-
Requires-Dist:
|
|
180
|
+
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
181
|
+
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
182
|
+
Requires-Dist: python-dateutil; extra == "all"
|
|
183
|
+
Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
|
|
184
|
+
Requires-Dist: sqlalchemy_adapter; extra == "all"
|
|
185
|
+
Requires-Dist: runpod>=1.6.1; extra == "all"
|
|
172
186
|
Requires-Dist: msrestazure; extra == "all"
|
|
173
|
-
Requires-Dist:
|
|
174
|
-
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
|
175
|
-
Requires-Dist: msgraph-sdk; extra == "all"
|
|
176
|
-
Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
|
|
177
|
-
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
178
|
-
Requires-Dist: docker; extra == "all"
|
|
179
|
-
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
187
|
+
Requires-Dist: pyjwt; extra == "all"
|
|
180
188
|
Requires-Dist: aiohttp; extra == "all"
|
|
181
|
-
Requires-Dist:
|
|
189
|
+
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
182
190
|
Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
|
|
183
|
-
Requires-Dist:
|
|
184
|
-
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
185
|
-
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
186
|
-
Requires-Dist: ray[default]!=2.6.0,>=2.2.0; extra == "all"
|
|
187
|
-
Requires-Dist: sqlalchemy_adapter; extra == "all"
|
|
188
|
-
Requires-Dist: azure-core>=1.24.0; extra == "all"
|
|
189
|
-
Requires-Dist: cudo-compute>=0.1.10; extra == "all"
|
|
191
|
+
Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
|
|
190
192
|
Dynamic: author
|
|
191
193
|
Dynamic: classifier
|
|
192
194
|
Dynamic: description
|