skypilot-nightly 1.0.0.dev20250807__py3-none-any.whl → 1.0.0.dev20250812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (91) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +5 -2
  3. sky/backends/backend_utils.py +57 -7
  4. sky/backends/cloud_vm_ray_backend.py +50 -8
  5. sky/client/cli/command.py +60 -26
  6. sky/client/sdk.py +132 -65
  7. sky/client/sdk_async.py +1 -1
  8. sky/core.py +10 -2
  9. sky/dashboard/out/404.html +1 -1
  10. sky/dashboard/out/_next/static/{YAirOGsV1z6B2RJ0VIUmD → Fuy7OzApYTUMz2QgoP7dP}/_buildManifest.js +1 -1
  11. sky/dashboard/out/_next/static/chunks/{6601-3e21152fe16da09c.js → 6601-06114c982db410b6.js} +1 -1
  12. sky/dashboard/out/_next/static/chunks/8056-5bdeda81199c0def.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/{8969-318c3dca725e8e5d.js → 8969-c9686994ddafcf01.js} +1 -1
  14. sky/dashboard/out/_next/static/chunks/pages/{_app-1e6de35d15a8d432.js → _app-491a4d699d95e808.js} +1 -1
  15. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-078751bad714c017.js +11 -0
  16. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-da9cc0901349c2e9.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/webpack-7fd0cf9dbecff10f.js +1 -0
  18. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  19. sky/dashboard/out/clusters/[cluster].html +1 -1
  20. sky/dashboard/out/clusters.html +1 -1
  21. sky/dashboard/out/config.html +1 -1
  22. sky/dashboard/out/index.html +1 -1
  23. sky/dashboard/out/infra/[context].html +1 -1
  24. sky/dashboard/out/infra.html +1 -1
  25. sky/dashboard/out/jobs/[job].html +1 -1
  26. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  27. sky/dashboard/out/jobs.html +1 -1
  28. sky/dashboard/out/users.html +1 -1
  29. sky/dashboard/out/volumes.html +1 -1
  30. sky/dashboard/out/workspace/new.html +1 -1
  31. sky/dashboard/out/workspaces/[name].html +1 -1
  32. sky/dashboard/out/workspaces.html +1 -1
  33. sky/execution.py +21 -4
  34. sky/global_user_state.py +110 -1
  35. sky/jobs/client/sdk.py +27 -20
  36. sky/jobs/controller.py +2 -1
  37. sky/jobs/recovery_strategy.py +3 -0
  38. sky/jobs/server/core.py +4 -0
  39. sky/jobs/utils.py +9 -2
  40. sky/provision/__init__.py +3 -2
  41. sky/provision/aws/instance.py +5 -4
  42. sky/provision/azure/instance.py +5 -4
  43. sky/provision/cudo/instance.py +5 -4
  44. sky/provision/do/instance.py +5 -4
  45. sky/provision/fluidstack/instance.py +5 -4
  46. sky/provision/gcp/instance.py +5 -4
  47. sky/provision/hyperbolic/instance.py +5 -4
  48. sky/provision/kubernetes/instance.py +36 -6
  49. sky/provision/lambda_cloud/instance.py +5 -4
  50. sky/provision/nebius/instance.py +5 -4
  51. sky/provision/oci/instance.py +5 -4
  52. sky/provision/paperspace/instance.py +5 -4
  53. sky/provision/provisioner.py +6 -0
  54. sky/provision/runpod/instance.py +5 -4
  55. sky/provision/scp/instance.py +5 -5
  56. sky/provision/vast/instance.py +5 -5
  57. sky/provision/vsphere/instance.py +5 -4
  58. sky/schemas/db/global_user_state/001_initial_schema.py +1 -1
  59. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  60. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  61. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  62. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  63. sky/schemas/db/spot_jobs/001_initial_schema.py +1 -1
  64. sky/serve/client/impl.py +11 -8
  65. sky/serve/client/sdk.py +7 -7
  66. sky/serve/serve_state.py +437 -340
  67. sky/serve/serve_utils.py +37 -3
  68. sky/serve/server/impl.py +2 -2
  69. sky/server/common.py +12 -8
  70. sky/server/constants.py +1 -1
  71. sky/setup_files/alembic.ini +4 -0
  72. sky/skypilot_config.py +4 -4
  73. sky/users/permission.py +1 -1
  74. sky/utils/cli_utils/status_utils.py +10 -1
  75. sky/utils/db/db_utils.py +53 -1
  76. sky/utils/db/migration_utils.py +5 -1
  77. sky/utils/kubernetes/deploy_remote_cluster.py +3 -1
  78. sky/utils/resource_checker.py +162 -21
  79. sky/volumes/client/sdk.py +4 -4
  80. sky/workspaces/core.py +210 -6
  81. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/METADATA +2 -2
  82. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/RECORD +87 -83
  83. sky/dashboard/out/_next/static/chunks/8056-019615038d6ce427.js +0 -1
  84. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6fd1d2d8441aa54b.js +0 -11
  85. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/webpack-76efbdad99742559.js +0 -1
  87. /sky/dashboard/out/_next/static/{YAirOGsV1z6B2RJ0VIUmD → Fuy7OzApYTUMz2QgoP7dP}/_ssgManifest.js +0 -0
  88. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/WHEEL +0 -0
  89. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/entry_points.txt +0 -0
  90. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/licenses/LICENSE +0 -0
  91. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/top_level.txt +0 -0
sky/workspaces/core.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Workspace management core."""
2
2
 
3
+ from dataclasses import dataclass
3
4
  from typing import Any, Callable, Dict, List, Tuple
4
5
 
5
6
  import filelock
@@ -9,12 +10,14 @@ from sky import exceptions
9
10
  from sky import models
10
11
  from sky import sky_logging
11
12
  from sky import skypilot_config
13
+ from sky.backends import backend_utils
12
14
  from sky.skylet import constants
13
15
  from sky.usage import usage_lib
14
16
  from sky.users import permission
15
17
  from sky.utils import annotations
16
18
  from sky.utils import common_utils
17
19
  from sky.utils import config_utils
20
+ from sky.utils import locks
18
21
  from sky.utils import resource_checker
19
22
  from sky.utils import schemas
20
23
  from sky.workspaces import utils as workspaces_utils
@@ -24,6 +27,37 @@ logger = sky_logging.init_logger(__name__)
24
27
  # Lock for workspace configuration updates to prevent race conditions
25
28
  _WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS = 60
26
29
 
30
+
31
+ @dataclass
32
+ class WorkspaceConfigComparison:
33
+ """Result of comparing current and new workspace configurations.
34
+
35
+ This class encapsulates the results of analyzing differences between
36
+ workspace configurations, particularly focusing on user access changes
37
+ and their implications for resource validation.
38
+
39
+ Attributes:
40
+ only_user_access_changes: True if only allowed_users or private changed
41
+ private_changed: True if private setting changed
42
+ private_old: Old private setting value
43
+ private_new: New private setting value
44
+ allowed_users_changed: True if allowed_users changed
45
+ allowed_users_old: Old allowed users list
46
+ allowed_users_new: New allowed users list
47
+ removed_users: Users removed from allowed_users
48
+ added_users: Users added to allowed_users
49
+ """
50
+ only_user_access_changes: bool
51
+ private_changed: bool
52
+ private_old: bool
53
+ private_new: bool
54
+ allowed_users_changed: bool
55
+ allowed_users_old: List[str]
56
+ allowed_users_new: List[str]
57
+ removed_users: List[str]
58
+ added_users: List[str]
59
+
60
+
27
61
  # =========================
28
62
  # = Workspace Management =
29
63
  # =========================
@@ -95,6 +129,153 @@ def _validate_workspace_config(workspace_name: str,
95
129
  raise ValueError(str(e)) from e
96
130
 
97
131
 
132
+ def _compare_workspace_configs(
133
+ current_config: Dict[str, Any],
134
+ new_config: Dict[str, Any],
135
+ ) -> WorkspaceConfigComparison:
136
+ """Compare current and new workspace configurations.
137
+
138
+ Args:
139
+ current_config: The current workspace configuration.
140
+ new_config: The new workspace configuration.
141
+
142
+ Returns:
143
+ WorkspaceConfigComparison object containing the comparison results.
144
+ """
145
+ # Get private settings
146
+ private_old = current_config.get('private', False)
147
+ private_new = new_config.get('private', False)
148
+ private_changed = private_old != private_new
149
+
150
+ # Get allowed users (resolve to user IDs for comparison)
151
+ allowed_users_old = workspaces_utils.get_workspace_users(
152
+ current_config) if private_old else []
153
+ allowed_users_new = workspaces_utils.get_workspace_users(
154
+ new_config) if private_new else []
155
+
156
+ # Convert to sets for easier comparison
157
+ old_users_set = set(allowed_users_old)
158
+ new_users_set = set(allowed_users_new)
159
+
160
+ allowed_users_changed = old_users_set != new_users_set
161
+ removed_users = list(old_users_set - new_users_set)
162
+ added_users = list(new_users_set - old_users_set)
163
+
164
+ # Check if only user access related fields changed
165
+ # Create copies without the user access fields for comparison
166
+ current_without_access = {
167
+ k: v
168
+ for k, v in current_config.items()
169
+ if k not in ['private', 'allowed_users']
170
+ }
171
+ new_without_access = {
172
+ k: v
173
+ for k, v in new_config.items()
174
+ if k not in ['private', 'allowed_users']
175
+ }
176
+
177
+ only_user_access_changes = current_without_access == new_without_access
178
+
179
+ return WorkspaceConfigComparison(
180
+ only_user_access_changes=only_user_access_changes,
181
+ private_changed=private_changed,
182
+ private_old=private_old,
183
+ private_new=private_new,
184
+ allowed_users_changed=allowed_users_changed,
185
+ allowed_users_old=allowed_users_old,
186
+ allowed_users_new=allowed_users_new,
187
+ removed_users=removed_users,
188
+ added_users=added_users)
189
+
190
+
191
+ def _validate_workspace_config_changes(workspace_name: str,
192
+ current_config: Dict[str, Any],
193
+ new_config: Dict[str, Any]) -> None:
194
+ """Validate workspace configuration changes based on active resources.
195
+
196
+ This function implements the logic:
197
+ - If only allowed_users or private changed:
198
+ - If private changed from true to false: allow it
199
+ - If private changed from false to true: check that all active resources
200
+ belong to allowed_users
201
+ - If private didn't change: check that removed users don't have active
202
+ resources
203
+ - Otherwise: check that workspace has no active resources
204
+
205
+ Args:
206
+ workspace_name: The name of the workspace.
207
+ current_config: The current workspace configuration.
208
+ new_config: The new workspace configuration.
209
+
210
+ Raises:
211
+ ValueError: If the configuration change is not allowed due to active
212
+ resources.
213
+ """
214
+ config_comparison = _compare_workspace_configs(current_config, new_config)
215
+
216
+ if config_comparison.only_user_access_changes:
217
+ # Only user access settings changed
218
+ if config_comparison.private_changed:
219
+ if (config_comparison.private_old and
220
+ not config_comparison.private_new):
221
+ # Changed from private to public - always allow
222
+ logger.info(
223
+ f'Workspace {workspace_name!r} changed from private to'
224
+ f' public.')
225
+ return
226
+ elif (not config_comparison.private_old and
227
+ config_comparison.private_new):
228
+ # Changed from public to private - check that all active
229
+ # resources belong to the new allowed users
230
+ logger.info(
231
+ f'Workspace {workspace_name!r} changed from public to'
232
+ f' private. Checking that all active resources belong'
233
+ f' to allowed users.')
234
+
235
+ error_summary, missed_users_names = (
236
+ resource_checker.check_users_workspaces_active_resources(
237
+ config_comparison.allowed_users_new, [workspace_name]))
238
+ if error_summary:
239
+ error_msg=f'Cannot change workspace {workspace_name!r}' \
240
+ f' to private '
241
+ if missed_users_names:
242
+ missed_users_list = ', '.join(missed_users_names)
243
+ if len(missed_users_names) == 1:
244
+ error_msg += f'because the user ' \
245
+ f'{missed_users_list!r} has {error_summary}'
246
+ else:
247
+ error_msg += f'because the users ' \
248
+ f'{missed_users_list!r} have {error_summary}'
249
+ error_msg += ' but not in the allowed_users list.' \
250
+ ' Please either add the users to allowed_users or' \
251
+ ' ask them to terminate their resources.'
252
+ raise ValueError(error_msg)
253
+ else:
254
+ # Private setting didn't change, but allowed_users changed
255
+ if (config_comparison.allowed_users_changed and
256
+ config_comparison.removed_users):
257
+ # Check that removed users don't have active resources
258
+ logger.info(
259
+ f'Checking that removed users'
260
+ f' {config_comparison.removed_users} do not have'
261
+ f' active resources in workspace {workspace_name!r}.')
262
+ user_operations = []
263
+ for user_id in config_comparison.removed_users:
264
+ user_operations.append((user_id, 'remove'))
265
+ resource_checker.check_no_active_resources_for_users(
266
+ user_operations)
267
+ else:
268
+ # Other configuration changes - check that workspace has no active
269
+ # resources
270
+ logger.info(
271
+ f'Non-user-access configuration changes detected for'
272
+ f' workspace {workspace_name!r}. Checking that workspace has'
273
+ f' no active resources.')
274
+ resource_checker.check_no_active_resources_for_workspaces([
275
+ (workspace_name, 'update')
276
+ ])
277
+
278
+
98
279
  @usage_lib.entrypoint
99
280
  def update_workspace(workspace_name: str, config: Dict[str,
100
281
  Any]) -> Dict[str, Any]:
@@ -109,17 +290,40 @@ def update_workspace(workspace_name: str, config: Dict[str,
109
290
 
110
291
  Raises:
111
292
  ValueError: If the workspace configuration is invalid, or if there are
112
- active clusters or managed jobs in the workspace.
293
+ active clusters or managed jobs that prevent the configuration
294
+ change.
295
+ The validation logic depends on what changed:
296
+ - If only allowed_users or private changed:
297
+ - Private true->false: Always allowed
298
+ - Private false->true: All active resources must belong to
299
+ allowed_users
300
+ - allowed_users changes: Removed users must not have active
301
+ resources
302
+ - Other changes: Workspace must have no active resources
113
303
  FileNotFoundError: If the config file cannot be found.
114
304
  PermissionError: If the config file cannot be written.
115
305
  """
116
306
  _validate_workspace_config(workspace_name, config)
117
307
 
118
- # Check for active clusters and managed jobs in the workspace
119
- # TODO(zhwu): we should allow the edits that only contain changes to
120
- # allowed_users or private.
121
- resource_checker.check_no_active_resources_for_workspaces([(workspace_name,
122
- 'update')])
308
+ # Get the current workspace configuration for comparison
309
+ current_workspaces = skypilot_config.get_nested(('workspaces',),
310
+ default_value={})
311
+ current_config = current_workspaces.get(workspace_name, {})
312
+
313
+ if current_config:
314
+ lock_id = backend_utils.workspace_lock_id(workspace_name)
315
+ lock_timeout = backend_utils.WORKSPACE_LOCK_TIMEOUT_SECONDS
316
+ try:
317
+ with locks.get_lock(lock_id, lock_timeout):
318
+ # Validate the configuration changes based on active resources
319
+ _validate_workspace_config_changes(workspace_name,
320
+ current_config, config)
321
+ except locks.LockTimeout as e:
322
+ raise RuntimeError(
323
+ f'Failed to validate workspace {workspace_name!r} due to '
324
+ 'a timeout when trying to access database. Please '
325
+ f'try again or manually remove the lock at {lock_id}. '
326
+ f'{common_utils.format_exception(e)}') from None
123
327
 
124
328
  def update_workspace_fn(workspaces: Dict[str, Any]) -> None:
125
329
  """Function to update workspace inside the lock."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250807
3
+ Version: 1.0.0.dev20250812
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -238,7 +238,7 @@ Dynamic: summary
238
238
  ----
239
239
 
240
240
  :fire: *News* :fire:
241
- - [Aug 2025] Run and serve **OpenAI GPT-OSS models** (gpt-oss-120b, gpt-oss-20b) with one command on any infra: [**example**](./llm/gpt-oss/)
241
+ - [Aug 2025] Serve and finetune **OpenAI GPT-OSS models** (gpt-oss-120b, gpt-oss-20b) with one command on any infra: [**serve**](./llm/gpt-oss/) + [**LoRA and full finetuning**](./llm/gpt-oss-finetuning/)
242
242
  - [Jul 2025] Run distributed **RL training for LLMs** with Verl (PPO, GRPO) on any cloud: [**example**](./llm/verl/)
243
243
  - [Jul 2025] 🎉 SkyPilot v0.10.0 released! [**blog post**](https://blog.skypilot.co/announcing-skypilot-0.10.0/), [**release notes**](https://github.com/skypilot-org/skypilot/releases/tag/v0.10.0)
244
244
  - [Jul 2025] Finetune **Llama4** on any distributed cluster/cloud: [**example**](./llm/llama-4-finetuning/)