skypilot-nightly 1.0.0.dev20250523__py3-none-any.whl → 1.0.0.dev20250526__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +62 -45
  3. sky/backends/cloud_vm_ray_backend.py +3 -1
  4. sky/check.py +335 -170
  5. sky/cli.py +56 -13
  6. sky/client/cli.py +56 -13
  7. sky/client/sdk.py +54 -10
  8. sky/clouds/gcp.py +19 -3
  9. sky/core.py +5 -2
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/7GEgRyZKRaSnYZCV1Jwol/_buildManifest.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/25-062253ea41fb8eec.js +6 -0
  13. sky/dashboard/out/_next/static/chunks/480-5a0de8b6570ea105.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
  15. sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
  16. sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
  17. sky/dashboard/out/_next/static/chunks/578-d351125af46c293f.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/734-a6e01d7f98904741.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/938-59956af3950b02ed.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/pages/_app-96a715a6fb01e228.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-3b5aad09a25f64b7.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
  25. sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/infra-abb7d744ecf15109.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-48dc8d67d4b60be1.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
  29. sky/dashboard/out/_next/static/chunks/pages/users-b8acf6e6735323a2.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/workspace/new-bbf436f41381e169.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7733c960685b4385.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/pages/workspaces-5ed48b3201b998c8.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
  34. sky/dashboard/out/_next/static/css/28558d57108b05ae.css +3 -0
  35. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  36. sky/dashboard/out/clusters/[cluster].html +1 -1
  37. sky/dashboard/out/clusters.html +1 -1
  38. sky/dashboard/out/index.html +1 -1
  39. sky/dashboard/out/infra.html +1 -1
  40. sky/dashboard/out/jobs/[job].html +1 -1
  41. sky/dashboard/out/jobs.html +1 -1
  42. sky/dashboard/out/users.html +1 -0
  43. sky/dashboard/out/workspace/new.html +1 -0
  44. sky/dashboard/out/workspaces/[name].html +1 -0
  45. sky/dashboard/out/workspaces.html +1 -0
  46. sky/data/storage.py +1 -1
  47. sky/global_user_state.py +606 -543
  48. sky/jobs/constants.py +1 -1
  49. sky/jobs/server/core.py +72 -56
  50. sky/jobs/state.py +26 -5
  51. sky/jobs/utils.py +65 -13
  52. sky/optimizer.py +6 -3
  53. sky/provision/fluidstack/instance.py +1 -0
  54. sky/serve/server/core.py +9 -6
  55. sky/server/html/token_page.html +6 -1
  56. sky/server/requests/executor.py +1 -0
  57. sky/server/requests/payloads.py +28 -0
  58. sky/server/server.py +59 -5
  59. sky/setup_files/dependencies.py +1 -0
  60. sky/skylet/constants.py +4 -1
  61. sky/skypilot_config.py +107 -11
  62. sky/utils/cli_utils/status_utils.py +18 -8
  63. sky/utils/db_utils.py +53 -0
  64. sky/utils/kubernetes/config_map_utils.py +133 -0
  65. sky/utils/kubernetes/deploy_remote_cluster.py +166 -147
  66. sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
  67. sky/utils/kubernetes/ssh-tunnel.sh +20 -28
  68. sky/utils/log_utils.py +4 -0
  69. sky/utils/schemas.py +54 -0
  70. sky/workspaces/__init__.py +0 -0
  71. sky/workspaces/core.py +295 -0
  72. sky/workspaces/server.py +62 -0
  73. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/METADATA +2 -1
  74. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/RECORD +79 -63
  75. sky/dashboard/out/_next/static/ECKwDNS9v9y3_IKFZ2lpp/_buildManifest.js +0 -1
  76. sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
  77. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  78. sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
  79. sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
  80. sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
  81. sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
  82. sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
  83. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
  84. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
  85. sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +0 -1
  87. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
  88. sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
  89. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  90. sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
  91. /sky/dashboard/out/_next/static/{ECKwDNS9v9y3_IKFZ2lpp → 7GEgRyZKRaSnYZCV1Jwol}/_ssgManifest.js +0 -0
  92. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/WHEEL +0 -0
  93. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/entry_points.txt +0 -0
  94. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/licenses/LICENSE +0 -0
  95. {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/top_level.txt +0 -0
sky/workspaces/core.py ADDED
@@ -0,0 +1,295 @@
1
+ """Workspace management core."""
2
+
3
+ import concurrent.futures
4
+ from typing import Any, Callable, Dict
5
+
6
+ import filelock
7
+
8
+ from sky import check as sky_check
9
+ from sky import exceptions
10
+ from sky import global_user_state
11
+ from sky import sky_logging
12
+ from sky import skypilot_config
13
+ from sky.skylet import constants
14
+ from sky.usage import usage_lib
15
+ from sky.utils import common_utils
16
+ from sky.utils import schemas
17
+
18
+ logger = sky_logging.init_logger(__name__)
19
+
20
+ # Lock for workspace configuration updates to prevent race conditions
21
+ _WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS = 60
22
+
23
+ # =========================
24
+ # = Workspace Management =
25
+ # =========================
26
+
27
+
28
+ def get_workspaces() -> Dict[str, Any]:
29
+ """Returns the workspace config."""
30
+ workspaces = skypilot_config.get_nested(('workspaces',), default_value={})
31
+ if constants.SKYPILOT_DEFAULT_WORKSPACE not in workspaces:
32
+ workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
33
+ return workspaces
34
+
35
+
36
+ def _update_workspaces_config(
37
+ workspace_modifier_fn: Callable[[Dict[str, Any]],
38
+ None]) -> Dict[str, Any]:
39
+ """Update the workspaces configuration in the config file.
40
+
41
+ This function uses file locking to prevent race conditions when multiple
42
+ processes try to update the workspace configuration simultaneously.
43
+
44
+ Args:
45
+ workspace_modifier_fn: A function that takes the current workspaces
46
+ dict and modifies it in-place. This ensures all read-modify-write
47
+ operations happen atomically inside the lock.
48
+
49
+ Returns:
50
+ The updated workspaces configuration.
51
+ """
52
+ lock_path = skypilot_config.get_skypilot_config_lock_path()
53
+ try:
54
+ with filelock.FileLock(lock_path,
55
+ _WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
56
+ # Read the current config inside the lock to ensure we have
57
+ # the latest state
58
+ current_config = skypilot_config.to_dict()
59
+ current_workspaces = current_config.get('workspaces', {}).copy()
60
+
61
+ # Apply the modification inside the lock
62
+ workspace_modifier_fn(current_workspaces)
63
+
64
+ # Update the config with the modified workspaces
65
+ current_config['workspaces'] = current_workspaces
66
+
67
+ # Write the configuration back to the file
68
+ skypilot_config.update_config_no_lock(current_config)
69
+
70
+ return current_workspaces
71
+ except filelock.Timeout as e:
72
+ raise RuntimeError(
73
+ f'Failed to update workspace configuration due to a timeout '
74
+ f'when trying to acquire the lock at {lock_path}. This may '
75
+ 'indicate another SkyPilot process is currently updating the '
76
+ 'configuration. Please try again or manually remove the lock '
77
+ f'file if you believe it is stale.') from e
78
+
79
+
80
+ def _check_workspace_has_no_active_resources(workspace_name: str,
81
+ operation: str) -> None:
82
+ """Check if a workspace has active clusters or managed jobs.
83
+
84
+ Args:
85
+ workspace_name: The name of the workspace to check.
86
+ operation: The operation being performed ('update' or 'delete').
87
+
88
+ Raises:
89
+ ValueError: If the workspace has active clusters or managed jobs.
90
+ """
91
+
92
+ def check_clusters():
93
+ # Check for active clusters
94
+ all_clusters = global_user_state.get_clusters()
95
+ workspace_clusters = [
96
+ cluster for cluster in all_clusters
97
+ if (cluster.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE)
98
+ == workspace_name)
99
+ ]
100
+ return workspace_clusters
101
+
102
+ def check_managed_jobs():
103
+ # Check for active managed jobs using the jobs controller
104
+ # pylint: disable=import-outside-toplevel
105
+ from sky.jobs.server import core as managed_jobs_core
106
+
107
+ try:
108
+ # Get active managed jobs from the jobs controller
109
+ # (skip_finished=True)
110
+ managed_jobs = managed_jobs_core.queue(refresh=False,
111
+ skip_finished=True,
112
+ all_users=True)
113
+
114
+ workspace_active_jobs = [
115
+ job for job in managed_jobs
116
+ if job.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) ==
117
+ workspace_name
118
+ ]
119
+
120
+ return workspace_active_jobs
121
+
122
+ except exceptions.ClusterNotUpError:
123
+ # If we can't check managed jobs (e.g., controller not running),
124
+ # log a warning but don't fail the operation
125
+ logger.warning('All jobs should be finished in workspace.')
126
+ return []
127
+
128
+ # Run both checks in parallel
129
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
130
+ cluster_future = executor.submit(check_clusters)
131
+ jobs_future = executor.submit(check_managed_jobs)
132
+
133
+ # Wait for both to complete
134
+ workspace_clusters = cluster_future.result()
135
+ workspace_active_jobs = jobs_future.result()
136
+
137
+ # Check results
138
+ if workspace_clusters:
139
+ active_cluster_names = [
140
+ cluster['name'] for cluster in workspace_clusters
141
+ ]
142
+ cluster_list = ', '.join(active_cluster_names)
143
+ raise ValueError(
144
+ f'Cannot {operation} workspace {workspace_name!r} because it has '
145
+ f'{len(workspace_clusters)} active cluster(s): {cluster_list}. '
146
+ f'Please terminate these clusters first.')
147
+
148
+ if workspace_active_jobs:
149
+ job_names = [job['job_id'] for job in workspace_active_jobs]
150
+ job_list = ', '.join(job_names)
151
+ raise ValueError(
152
+ f'Cannot {operation} workspace {workspace_name!r} because it has '
153
+ f'{len(workspace_active_jobs)} active managed job(s): '
154
+ f'{job_list}. Please cancel these jobs first.')
155
+
156
+
157
+ def _validate_workspace_config(workspace_name: str,
158
+ workspace_config: Dict[str, Any]) -> None:
159
+ """Validate the workspace configuration.
160
+ """
161
+ workspace_schema = schemas.get_config_schema(
162
+ )['properties']['workspaces']['additionalProperties']
163
+ try:
164
+ common_utils.validate_schema(
165
+ workspace_config, workspace_schema,
166
+ f'Invalid configuration for workspace {workspace_name!r}: ')
167
+ except exceptions.InvalidSkyPilotConfigError as e:
168
+ # We need to replace this exception with a ValueError because: a) it is
169
+ # more user-friendly and b) it will not be caught by the try-except by
170
+ # the caller the may cause confusion.
171
+ raise ValueError(str(e)) from e
172
+
173
+
174
+ @usage_lib.entrypoint
175
+ def update_workspace(workspace_name: str, config: Dict[str,
176
+ Any]) -> Dict[str, Any]:
177
+ """Updates a specific workspace configuration.
178
+
179
+ Args:
180
+ workspace_name: The name of the workspace to update.
181
+ config: The new configuration for the workspace.
182
+
183
+ Returns:
184
+ The updated workspaces configuration.
185
+
186
+ Raises:
187
+ ValueError: If the workspace configuration is invalid, or if there are
188
+ active clusters or managed jobs in the workspace.
189
+ FileNotFoundError: If the config file cannot be found.
190
+ PermissionError: If the config file cannot be written.
191
+ """
192
+ # Check for active clusters and managed jobs in the workspace
193
+ _check_workspace_has_no_active_resources(workspace_name, 'update')
194
+
195
+ _validate_workspace_config(workspace_name, config)
196
+
197
+ def update_workspace_fn(workspaces: Dict[str, Any]) -> None:
198
+ """Function to update workspace inside the lock."""
199
+ workspaces[workspace_name] = config
200
+
201
+ # Use the internal helper function to save
202
+ result = _update_workspaces_config(update_workspace_fn)
203
+
204
+ # Validate the workspace by running sky check for it
205
+ try:
206
+ sky_check.check(quiet=True, workspace=workspace_name)
207
+ except Exception as e: # pylint: disable=broad-except
208
+ logger.warning(f'Workspace {workspace_name} configuration saved but '
209
+ f'validation check failed: {e}')
210
+ # Don't fail the update if the check fails, just warn
211
+
212
+ return result
213
+
214
+
215
+ @usage_lib.entrypoint
216
+ def create_workspace(workspace_name: str, config: Dict[str,
217
+ Any]) -> Dict[str, Any]:
218
+ """Creates a new workspace configuration.
219
+
220
+ Args:
221
+ workspace_name: The name of the workspace to create.
222
+ config: The configuration for the new workspace.
223
+
224
+ Returns:
225
+ The updated workspaces configuration.
226
+
227
+ Raises:
228
+ ValueError: If the workspace already exists or configuration is invalid.
229
+ FileNotFoundError: If the config file cannot be found.
230
+ PermissionError: If the config file cannot be written.
231
+ """
232
+ # Validate the workspace name
233
+ if not workspace_name or not isinstance(workspace_name, str):
234
+ raise ValueError('Workspace name must be a non-empty string.')
235
+
236
+ _validate_workspace_config(workspace_name, config)
237
+
238
+ def create_workspace_fn(workspaces: Dict[str, Any]) -> None:
239
+ """Function to create workspace inside the lock."""
240
+ if workspace_name in workspaces:
241
+ raise ValueError(f'Workspace {workspace_name!r} already exists. '
242
+ 'Use update instead.')
243
+ workspaces[workspace_name] = config
244
+
245
+ # Use the internal helper function to save
246
+ result = _update_workspaces_config(create_workspace_fn)
247
+
248
+ # Validate the workspace by running sky check for it
249
+ try:
250
+ sky_check.check(quiet=True, workspace=workspace_name)
251
+ except Exception as e: # pylint: disable=broad-except
252
+ logger.warning(f'Workspace {workspace_name} configuration saved but '
253
+ f'validation check failed: {e}')
254
+ # Don't fail the update if the check fails, just warn
255
+
256
+ return result
257
+
258
+
259
+ @usage_lib.entrypoint
260
+ def delete_workspace(workspace_name: str) -> Dict[str, Any]:
261
+ """Deletes a workspace configuration.
262
+
263
+ Args:
264
+ workspace_name: The name of the workspace to delete.
265
+
266
+ Returns:
267
+ The updated workspaces configuration.
268
+
269
+ Raises:
270
+ ValueError: If the workspace doesn't exist, is the default workspace,
271
+ or has active clusters or managed jobs.
272
+ FileNotFoundError: If the config file cannot be found.
273
+ PermissionError: If the config file cannot be written.
274
+ """
275
+ # Prevent deletion of default workspace
276
+ if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
277
+ raise ValueError(f'Cannot delete the default workspace '
278
+ f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
279
+
280
+ # Check if workspace exists
281
+ current_workspaces = get_workspaces()
282
+ if workspace_name not in current_workspaces:
283
+ raise ValueError(f'Workspace {workspace_name!r} does not exist.')
284
+
285
+ # Check for active clusters and managed jobs in the workspace
286
+ _check_workspace_has_no_active_resources(workspace_name, 'delete')
287
+
288
+ def delete_workspace_fn(workspaces: Dict[str, Any]) -> None:
289
+ """Function to delete workspace inside the lock."""
290
+ if workspace_name not in workspaces:
291
+ raise ValueError(f'Workspace {workspace_name!r} does not exist.')
292
+ del workspaces[workspace_name]
293
+
294
+ # Use the internal helper function to save
295
+ return _update_workspaces_config(delete_workspace_fn)
@@ -0,0 +1,62 @@
1
+ """REST API for workspace management."""
2
+
3
+ import fastapi
4
+
5
+ from sky.server.requests import executor
6
+ from sky.server.requests import payloads
7
+ from sky.server.requests import requests as api_requests
8
+ from sky.workspaces import core
9
+
10
+ router = fastapi.APIRouter()
11
+
12
+
13
+ @router.get('')
14
+ # pylint: disable=redefined-builtin
15
+ async def get(request: fastapi.Request) -> None:
16
+ """Gets workspace config on the server."""
17
+ executor.schedule_request(
18
+ request_id=request.state.request_id,
19
+ request_name='workspaces.get',
20
+ request_body=payloads.RequestBody(),
21
+ func=core.get_workspaces,
22
+ schedule_type=api_requests.ScheduleType.SHORT,
23
+ )
24
+
25
+
26
+ @router.post('/update')
27
+ async def update(request: fastapi.Request,
28
+ update_workspace_body: payloads.UpdateWorkspaceBody) -> None:
29
+ """Updates a specific workspace configuration."""
30
+ executor.schedule_request(
31
+ request_id=request.state.request_id,
32
+ request_name='workspaces.update',
33
+ request_body=update_workspace_body,
34
+ func=core.update_workspace,
35
+ schedule_type=api_requests.ScheduleType.SHORT,
36
+ )
37
+
38
+
39
+ @router.post('/create')
40
+ async def create(request: fastapi.Request,
41
+ create_workspace_body: payloads.CreateWorkspaceBody) -> None:
42
+ """Creates a new workspace configuration."""
43
+ executor.schedule_request(
44
+ request_id=request.state.request_id,
45
+ request_name='workspaces.create',
46
+ request_body=create_workspace_body,
47
+ func=core.create_workspace,
48
+ schedule_type=api_requests.ScheduleType.SHORT,
49
+ )
50
+
51
+
52
+ @router.post('/delete')
53
+ async def delete(request: fastapi.Request,
54
+ delete_workspace_body: payloads.DeleteWorkspaceBody) -> None:
55
+ """Deletes a workspace configuration."""
56
+ executor.schedule_request(
57
+ request_id=request.state.request_id,
58
+ request_name='workspaces.delete',
59
+ request_body=delete_workspace_body,
60
+ func=core.delete_workspace,
61
+ schedule_type=api_requests.ScheduleType.SHORT,
62
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250523
3
+ Version: 1.0.0.dev20250526
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -47,6 +47,7 @@ Requires-Dist: python-multipart
47
47
  Requires-Dist: aiofiles
48
48
  Requires-Dist: httpx
49
49
  Requires-Dist: setproctitle
50
+ Requires-Dist: sqlalchemy
50
51
  Provides-Extra: aws
51
52
  Requires-Dist: awscli>=1.27.10; extra == "aws"
52
53
  Requires-Dist: botocore>=1.29.10; extra == "aws"