skypilot-nightly 1.0.0.dev20250617__py3-none-any.whl → 1.0.0.dev20250619__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +7 -0
  3. sky/backends/cloud_vm_ray_backend.py +48 -36
  4. sky/cli.py +5 -5729
  5. sky/client/cli/__init__.py +0 -0
  6. sky/client/{cli.py → cli/command.py} +108 -632
  7. sky/client/cli/deprecation_utils.py +99 -0
  8. sky/client/cli/flags.py +342 -0
  9. sky/client/sdk.py +22 -2
  10. sky/clouds/kubernetes.py +5 -0
  11. sky/dashboard/out/404.html +1 -1
  12. sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +50 -0
  14. sky/dashboard/out/_next/static/chunks/pages/users-928edf039219e47b.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/webpack-0263b00d6a10e64a.js +1 -0
  16. sky/dashboard/out/_next/static/css/6c12ecc3bd2239b6.css +3 -0
  17. sky/dashboard/out/_next/static/{vA3PPpkBwpRTRNBHFYAw_ → whetcrnbXtqQcMRbXUbhW}/_buildManifest.js +1 -1
  18. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  19. sky/dashboard/out/clusters/[cluster].html +1 -1
  20. sky/dashboard/out/clusters.html +1 -1
  21. sky/dashboard/out/config.html +1 -1
  22. sky/dashboard/out/index.html +1 -1
  23. sky/dashboard/out/infra/[context].html +1 -1
  24. sky/dashboard/out/infra.html +1 -1
  25. sky/dashboard/out/jobs/[job].html +1 -1
  26. sky/dashboard/out/jobs.html +1 -1
  27. sky/dashboard/out/users.html +1 -1
  28. sky/dashboard/out/workspace/new.html +1 -1
  29. sky/dashboard/out/workspaces/[name].html +1 -1
  30. sky/dashboard/out/workspaces.html +1 -1
  31. sky/global_user_state.py +50 -11
  32. sky/jobs/constants.py +0 -2
  33. sky/jobs/scheduler.py +7 -4
  34. sky/jobs/server/core.py +6 -3
  35. sky/jobs/state.py +9 -8
  36. sky/jobs/utils.py +1 -1
  37. sky/logs/__init__.py +17 -0
  38. sky/logs/agent.py +73 -0
  39. sky/logs/gcp.py +91 -0
  40. sky/models.py +1 -0
  41. sky/provision/common.py +10 -0
  42. sky/provision/instance_setup.py +35 -0
  43. sky/provision/provisioner.py +11 -0
  44. sky/resources.py +7 -6
  45. sky/serve/server/core.py +5 -0
  46. sky/server/common.py +21 -9
  47. sky/server/requests/payloads.py +19 -1
  48. sky/server/server.py +121 -29
  49. sky/setup_files/dependencies.py +11 -1
  50. sky/skylet/constants.py +13 -1
  51. sky/skylet/job_lib.py +75 -19
  52. sky/templates/kubernetes-ray.yml.j2 +9 -0
  53. sky/users/permission.py +49 -19
  54. sky/users/rbac.py +10 -1
  55. sky/users/server.py +274 -9
  56. sky/utils/env_options.py +6 -0
  57. sky/utils/schemas.py +42 -2
  58. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250619.dist-info}/METADATA +9 -1
  59. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250619.dist-info}/RECORD +70 -63
  60. sky/dashboard/out/_next/static/chunks/600.bd2ed8c076b720ec.js +0 -16
  61. sky/dashboard/out/_next/static/chunks/pages/users-c69ffcab9d6e5269.js +0 -1
  62. sky/dashboard/out/_next/static/chunks/webpack-1b69b196a4dbffef.js +0 -1
  63. sky/dashboard/out/_next/static/css/8e97adcaacc15293.css +0 -3
  64. /sky/dashboard/out/_next/static/chunks/{37-824c707421f6f003.js → 37-3a4d77ad62932eaf.js} +0 -0
  65. /sky/dashboard/out/_next/static/chunks/{843-ab9c4f609239155f.js → 843-b3040e493f6e7947.js} +0 -0
  66. /sky/dashboard/out/_next/static/chunks/{938-385d190b95815e11.js → 938-1493ac755eadeb35.js} +0 -0
  67. /sky/dashboard/out/_next/static/chunks/{973-c807fc34f09c7df3.js → 973-db3c97c2bfbceb65.js} +0 -0
  68. /sky/dashboard/out/_next/static/chunks/pages/{_app-32b2caae3445bf3b.js → _app-c416e87d5c2715cf.js} +0 -0
  69. /sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-c8c2191328532b7d.js → [name]-c4ff1ec05e2f3daf.js} +0 -0
  70. /sky/dashboard/out/_next/static/{vA3PPpkBwpRTRNBHFYAw_ → whetcrnbXtqQcMRbXUbhW}/_ssgManifest.js +0 -0
  71. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250619.dist-info}/WHEEL +0 -0
  72. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250619.dist-info}/entry_points.txt +0 -0
  73. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250619.dist-info}/licenses/LICENSE +0 -0
  74. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250619.dist-info}/top_level.txt +0 -0
sky/users/server.py CHANGED
@@ -1,19 +1,30 @@
1
1
  """REST API for workspace management."""
2
2
 
3
- from typing import Any, Dict, List
3
+ import contextlib
4
+ import hashlib
5
+ import os
6
+ from typing import Any, Dict, Generator, List
4
7
 
5
8
  import fastapi
9
+ import filelock
10
+ from passlib.hash import apr_md5_crypt
6
11
 
7
12
  from sky import global_user_state
13
+ from sky import models
8
14
  from sky import sky_logging
9
15
  from sky.server.requests import payloads
10
16
  from sky.skylet import constants
11
17
  from sky.users import permission
12
18
  from sky.users import rbac
13
19
  from sky.utils import common
20
+ from sky.utils import common_utils
14
21
 
15
22
  logger = sky_logging.init_logger(__name__)
16
23
 
24
+ # Filelocks for the user management.
25
+ USER_LOCK_PATH = os.path.expanduser('~/.sky/.{user_id}.lock')
26
+ USER_LOCK_TIMEOUT_SECONDS = 20
27
+
17
28
  router = fastapi.APIRouter()
18
29
 
19
30
 
@@ -39,29 +50,283 @@ async def get_current_user_role(request: fastapi.Request):
39
50
  # hash for the request without 'X-Auth-Request-Email' header?
40
51
  auth_user = request.state.auth_user
41
52
  if auth_user is None:
42
- return {'name': '', 'role': rbac.RoleName.ADMIN.value}
53
+ return {'id': '', 'name': '', 'role': rbac.RoleName.ADMIN.value}
43
54
  user_roles = permission.permission_service.get_user_roles(auth_user.id)
44
- return {'name': auth_user.name, 'role': user_roles[0] if user_roles else ''}
55
+ return {
56
+ 'id': auth_user.id,
57
+ 'name': auth_user.name,
58
+ 'role': user_roles[0] if user_roles else ''
59
+ }
60
+
61
+
62
+ @router.post('/create')
63
+ async def user_create(user_create_body: payloads.UserCreateBody) -> None:
64
+ username = user_create_body.username
65
+ password = user_create_body.password
66
+ role = user_create_body.role
67
+
68
+ if not username or not password:
69
+ raise fastapi.HTTPException(status_code=400,
70
+ detail='Username and password are required')
71
+ if role and role not in rbac.get_supported_roles():
72
+ raise fastapi.HTTPException(status_code=400,
73
+ detail=f'Invalid role: {role}')
74
+
75
+ if not role:
76
+ role = rbac.get_default_role()
77
+
78
+ # Create user
79
+ password_hash = apr_md5_crypt.hash(password)
80
+ user_hash = hashlib.md5(
81
+ username.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
82
+ with _user_lock(user_hash):
83
+ # Check if user already exists
84
+ if global_user_state.get_user_by_name(username):
85
+ raise fastapi.HTTPException(
86
+ status_code=400, detail=f'User {username!r} already exists')
87
+ global_user_state.add_or_update_user(
88
+ models.User(id=user_hash, name=username, password=password_hash))
89
+ permission.permission_service.update_role(user_hash, role)
45
90
 
46
91
 
47
92
  @router.post('/update')
48
- async def user_update(user_update_body: payloads.UserUpdateBody) -> None:
93
+ async def user_update(request: fastapi.Request,
94
+ user_update_body: payloads.UserUpdateBody) -> None:
49
95
  """Updates the user role."""
50
96
  user_id = user_update_body.user_id
51
97
  role = user_update_body.role
98
+ password = user_update_body.password
52
99
  supported_roles = rbac.get_supported_roles()
53
- if role not in supported_roles:
100
+ if role and role not in supported_roles:
54
101
  raise fastapi.HTTPException(status_code=400,
55
102
  detail=f'Invalid role: {role}')
103
+ target_user_roles = permission.permission_service.get_user_roles(user_id)
104
+ need_update_role = role and (not target_user_roles or
105
+ (role != target_user_roles[0]))
106
+ current_user = request.state.auth_user
107
+ if current_user is not None:
108
+ current_user_roles = permission.permission_service.get_user_roles(
109
+ current_user.id)
110
+ if not current_user_roles:
111
+ raise fastapi.HTTPException(status_code=403, detail='Invalid user')
112
+ if current_user_roles[0] != rbac.RoleName.ADMIN.value:
113
+ if need_update_role:
114
+ raise fastapi.HTTPException(
115
+ status_code=403, detail='Only admin can update user role')
116
+ if password and user_id != current_user.id:
117
+ raise fastapi.HTTPException(
118
+ status_code=403,
119
+ detail='Only admin can update password for other users')
56
120
  user_info = global_user_state.get_user(user_id)
57
121
  if user_info is None:
58
122
  raise fastapi.HTTPException(status_code=400,
59
123
  detail=f'User {user_id} does not exist')
60
- # Disallow updating roles for the internal users.
61
- if user_info.id in [common.SERVER_ID, constants.SKYPILOT_SYSTEM_USER_ID]:
124
+ # Disallow updating the internal users.
125
+ if need_update_role and user_info.id in [
126
+ common.SERVER_ID, constants.SKYPILOT_SYSTEM_USER_ID
127
+ ]:
62
128
  raise fastapi.HTTPException(status_code=400,
63
129
  detail=f'Cannot update role for internal '
64
130
  f'API server user {user_info.name}')
131
+ if password and user_info.id == constants.SKYPILOT_SYSTEM_USER_ID:
132
+ raise fastapi.HTTPException(
133
+ status_code=400,
134
+ detail=f'Cannot update password for internal '
135
+ f'API server user {user_info.name}')
136
+
137
+ with _user_lock(user_info.id):
138
+ if password:
139
+ password_hash = apr_md5_crypt.hash(password)
140
+ global_user_state.add_or_update_user(
141
+ models.User(id=user_info.id,
142
+ name=user_info.name,
143
+ password=password_hash))
144
+ if role and need_update_role:
145
+ # Update user role in casbin policy
146
+ permission.permission_service.update_role(user_info.id, role)
147
+
148
+
149
+ @router.post('/delete')
150
+ async def user_delete(user_delete_body: payloads.UserDeleteBody) -> None:
151
+ user_id = user_delete_body.user_id
152
+
153
+ user_info = global_user_state.get_user(user_id)
154
+ if user_info is None:
155
+ raise fastapi.HTTPException(status_code=400,
156
+ detail=f'User {user_id} does not exist')
157
+ # Disallow deleting the internal users.
158
+ if user_info.id in [common.SERVER_ID, constants.SKYPILOT_SYSTEM_USER_ID]:
159
+ raise fastapi.HTTPException(status_code=400,
160
+ detail=f'Cannot delete internal '
161
+ f'API server user {user_info.name}')
162
+ with _user_lock(user_id):
163
+ global_user_state.delete_user(user_id)
164
+ permission.permission_service.delete_user(user_id)
165
+
166
+
167
+ @router.post('/import')
168
+ async def user_import(
169
+ user_import_body: payloads.UserImportBody) -> Dict[str, Any]:
170
+ """Import users from CSV content."""
171
+ csv_content = user_import_body.csv_content
172
+
173
+ if not csv_content:
174
+ raise fastapi.HTTPException(status_code=400,
175
+ detail='CSV content is required')
176
+
177
+ # Parse CSV content
178
+ lines = csv_content.strip().split('\n')
179
+ if len(lines) < 2:
180
+ raise fastapi.HTTPException(
181
+ status_code=400,
182
+ detail='CSV must have at least a header row and one data row')
183
+
184
+ # Parse headers
185
+ headers = [h.strip().lower() for h in lines[0].split(',')]
186
+ required_headers = ['username', 'password', 'role']
187
+
188
+ # Check if all required headers are present
189
+ missing_headers = [
190
+ header for header in required_headers if header not in headers
191
+ ]
192
+ if missing_headers:
193
+ raise fastapi.HTTPException(
194
+ status_code=400,
195
+ detail=f'Missing required columns: {", ".join(missing_headers)}')
196
+
197
+ # Parse user data
198
+ users_to_create = []
199
+ parse_errors = []
200
+
201
+ for i, line in enumerate(lines[1:], start=2):
202
+ if not line.strip():
203
+ continue # Skip empty lines
204
+
205
+ values = [v.strip() for v in line.split(',')]
206
+ if len(values) != len(headers):
207
+ parse_errors.append(f'Line {i}: Invalid number of columns')
208
+ continue
209
+
210
+ user_data = dict(zip(headers, values))
211
+
212
+ # Validate required fields
213
+ if not user_data.get('username') or not user_data.get('password'):
214
+ parse_errors.append(f'Line {i}: Username and password are required')
215
+ continue
216
+
217
+ # Validate role
218
+ role = user_data.get('role', '').lower()
219
+ if role and role not in rbac.get_supported_roles():
220
+ role = rbac.get_default_role() # Default to default role if invalid
221
+ elif not role:
222
+ role = rbac.get_default_role()
223
+
224
+ users_to_create.append({
225
+ 'username': user_data['username'],
226
+ 'password': user_data['password'],
227
+ 'role': role
228
+ })
229
+
230
+ if not users_to_create and parse_errors:
231
+ raise fastapi.HTTPException(
232
+ status_code=400,
233
+ detail=f'No valid users found. Errors: {"; ".join(parse_errors)}')
234
+
235
+ # Create users
236
+ success_count = 0
237
+ error_count = 0
238
+ creation_errors = []
239
+
240
+ for user_data in users_to_create:
241
+ try:
242
+ username = user_data['username']
243
+ password = user_data['password']
244
+ role = user_data['role']
245
+
246
+ # Check if user already exists
247
+ if global_user_state.get_user_by_name(username):
248
+ error_count += 1
249
+ creation_errors.append(f'{username}: User already exists')
250
+ continue
251
+
252
+ # Check if password is already hashed (APR1 hash)
253
+ if password.startswith('$apr1$'):
254
+ # Password is already hashed, use it directly
255
+ password_hash = password
256
+ else:
257
+ # Password is plain text, hash it
258
+ password_hash = apr_md5_crypt.hash(password)
259
+
260
+ user_hash = hashlib.md5(
261
+ username.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
262
+
263
+ with _user_lock(user_hash):
264
+ global_user_state.add_or_update_user(
265
+ models.User(id=user_hash,
266
+ name=username,
267
+ password=password_hash))
268
+ permission.permission_service.update_role(user_hash, role)
269
+
270
+ success_count += 1
271
+
272
+ except Exception as e: # pylint: disable=broad-except
273
+ error_count += 1
274
+ creation_errors.append(f'{user_data["username"]}: {str(e)}')
275
+
276
+ return {
277
+ 'success_count': success_count,
278
+ 'error_count': error_count,
279
+ 'total_processed': len(users_to_create),
280
+ 'parse_errors': parse_errors,
281
+ 'creation_errors': creation_errors
282
+ }
283
+
284
+
285
+ @router.get('/export')
286
+ async def user_export() -> Dict[str, Any]:
287
+ """Export all users as CSV content."""
288
+ try:
289
+ # Get all users
290
+ user_list = global_user_state.get_all_users()
291
+
292
+ # Create CSV content
293
+ csv_lines = ['username,password,role'] # Header
294
+
295
+ for user in user_list:
296
+ # Get user role
297
+ user_roles = permission.permission_service.get_user_roles(user.id)
298
+ role = user_roles[0] if user_roles else rbac.get_default_role()
299
+ # Avoid exporting `None` values
300
+ line = ''
301
+ if user.name:
302
+ line += user.name
303
+ line += ','
304
+ if user.password:
305
+ line += user.password
306
+ line += ','
307
+ if role:
308
+ line += role
309
+ csv_lines.append(line)
310
+
311
+ csv_content = '\n'.join(csv_lines)
312
+
313
+ return {'csv_content': csv_content, 'user_count': len(user_list)}
314
+
315
+ except Exception as e:
316
+ raise fastapi.HTTPException(status_code=500,
317
+ detail=f'Failed to export users: {str(e)}')
318
+
65
319
 
66
- # Update user role in casbin policy
67
- permission.permission_service.update_role(user_info.id, role)
320
+ @contextlib.contextmanager
321
+ def _user_lock(user_id: str) -> Generator[None, None, None]:
322
+ """Context manager for user lock."""
323
+ try:
324
+ with filelock.FileLock(USER_LOCK_PATH.format(user_id=user_id),
325
+ USER_LOCK_TIMEOUT_SECONDS):
326
+ yield
327
+ except filelock.Timeout as e:
328
+ raise RuntimeError(f'Failed to update user due to a timeout '
329
+ f'when trying to acquire the lock at '
330
+ f'{USER_LOCK_PATH.format(user_id=user_id)}. '
331
+ 'Please try again or manually remove the lock '
332
+ f'file if you believe it is stale.') from e
sky/utils/env_options.py CHANGED
@@ -19,8 +19,14 @@ class Options(enum.Enum):
19
19
  # will not be multiple identities, and skipping the check can increase
20
20
  # robustness.
21
21
  SKIP_CLOUD_IDENTITY_CHECK = ('SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK', False)
22
+ # Internal: This environment variable is set to "true" by Buildkite
23
+ # agent when running tests. It is used to identify when SkyPilot is
24
+ # running in a Buildkite container environment, which requires special
25
+ # handling for networking between containers.
26
+ RUNNING_IN_BUILDKITE = ('BUILDKITE', False)
22
27
 
23
28
  def __init__(self, env_var: str, default: bool) -> None:
29
+ super().__init__()
24
30
  self.env_var = env_var
25
31
  self.default = default
26
32
 
sky/utils/schemas.py CHANGED
@@ -291,8 +291,8 @@ def _get_single_resources_schema():
291
291
  'autostop': _AUTOSTOP_SCHEMA,
292
292
  'priority': {
293
293
  'type': 'integer',
294
- 'minimum': 0,
295
- 'maximum': 1000,
294
+ 'minimum': constants.MIN_PRIORITY,
295
+ 'maximum': constants.MAX_PRIORITY,
296
296
  },
297
297
  # The following fields are for internal use only. Should not be
298
298
  # specified in the task config.
@@ -1090,6 +1090,16 @@ def get_config_schema():
1090
1090
  }
1091
1091
  }
1092
1092
  },
1093
+ 'kueue': {
1094
+ 'type': 'object',
1095
+ 'required': [],
1096
+ 'additionalProperties': False,
1097
+ 'properties': {
1098
+ 'local_queue_name': {
1099
+ 'type': 'string',
1100
+ },
1101
+ },
1102
+ },
1093
1103
  }
1094
1104
  },
1095
1105
  'ssh': {
@@ -1380,6 +1390,35 @@ def get_config_schema():
1380
1390
  }
1381
1391
  }
1382
1392
 
1393
+ logs_schema = {
1394
+ 'type': 'object',
1395
+ 'required': ['store'],
1396
+ 'additionalProperties': False,
1397
+ 'properties': {
1398
+ 'store': {
1399
+ 'type': 'string',
1400
+ 'case_insensitive_enum': ['gcp'],
1401
+ },
1402
+ 'gcp': {
1403
+ 'type': 'object',
1404
+ 'properties': {
1405
+ 'project_id': {
1406
+ 'type': 'string',
1407
+ },
1408
+ 'credentials_file': {
1409
+ 'type': 'string',
1410
+ },
1411
+ 'additional_labels': {
1412
+ 'type': 'object',
1413
+ 'additionalProperties': {
1414
+ 'type': 'string',
1415
+ },
1416
+ },
1417
+ },
1418
+ },
1419
+ },
1420
+ }
1421
+
1383
1422
  for cloud, config in cloud_configs.items():
1384
1423
  if cloud == 'aws':
1385
1424
  config['properties'].update(
@@ -1412,6 +1451,7 @@ def get_config_schema():
1412
1451
  'workspaces': workspaces_schema,
1413
1452
  'provision': provision_configs,
1414
1453
  'rbac': rbac_schema,
1454
+ 'logs': logs_schema,
1415
1455
  **cloud_configs,
1416
1456
  },
1417
1457
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250617
3
+ Version: 1.0.0.dev20250619
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -51,6 +51,7 @@ Requires-Dist: sqlalchemy
51
51
  Requires-Dist: psycopg2-binary
52
52
  Requires-Dist: casbin
53
53
  Requires-Dist: sqlalchemy_adapter
54
+ Requires-Dist: passlib
54
55
  Provides-Extra: aws
55
56
  Requires-Dist: awscli>=1.27.10; extra == "aws"
56
57
  Requires-Dist: botocore>=1.29.10; extra == "aws"
@@ -120,6 +121,10 @@ Requires-Dist: botocore>=1.29.10; extra == "nebius"
120
121
  Requires-Dist: boto3>=1.26.1; extra == "nebius"
121
122
  Requires-Dist: colorama<0.4.5; extra == "nebius"
122
123
  Provides-Extra: hyperbolic
124
+ Provides-Extra: server
125
+ Requires-Dist: casbin; extra == "server"
126
+ Requires-Dist: sqlalchemy_adapter; extra == "server"
127
+ Requires-Dist: passlib; extra == "server"
123
128
  Provides-Extra: all
124
129
  Requires-Dist: awscli>=1.27.10; extra == "all"
125
130
  Requires-Dist: botocore>=1.29.10; extra == "all"
@@ -169,6 +174,9 @@ Requires-Dist: awscli>=1.27.10; extra == "all"
169
174
  Requires-Dist: botocore>=1.29.10; extra == "all"
170
175
  Requires-Dist: boto3>=1.26.1; extra == "all"
171
176
  Requires-Dist: colorama<0.4.5; extra == "all"
177
+ Requires-Dist: casbin; extra == "all"
178
+ Requires-Dist: sqlalchemy_adapter; extra == "all"
179
+ Requires-Dist: passlib; extra == "all"
172
180
  Dynamic: author
173
181
  Dynamic: classifier
174
182
  Dynamic: description