skypilot-nightly 1.0.0.dev20250627__py3-none-any.whl → 1.0.0.dev20250628__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +7 -0
  3. sky/adaptors/nebius.py +2 -2
  4. sky/authentication.py +12 -5
  5. sky/backends/backend_utils.py +92 -26
  6. sky/check.py +5 -2
  7. sky/client/cli/command.py +38 -6
  8. sky/client/sdk.py +217 -167
  9. sky/client/service_account_auth.py +47 -0
  10. sky/clouds/aws.py +10 -4
  11. sky/clouds/azure.py +5 -2
  12. sky/clouds/cloud.py +5 -2
  13. sky/clouds/gcp.py +31 -18
  14. sky/clouds/kubernetes.py +54 -34
  15. sky/clouds/nebius.py +8 -2
  16. sky/clouds/ssh.py +5 -2
  17. sky/clouds/utils/aws_utils.py +10 -4
  18. sky/clouds/utils/gcp_utils.py +22 -7
  19. sky/clouds/utils/oci_utils.py +62 -14
  20. sky/dashboard/out/404.html +1 -1
  21. sky/dashboard/out/_next/static/{HudU4f4Xsy-cP51JvXSZ- → ZYLkkWSYZjJhLVsObh20y}/_buildManifest.js +1 -1
  22. sky/dashboard/out/_next/static/chunks/43-f38a531f6692f281.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/601-111d06d9ded11d00.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/{616-d6128fa9e7cae6e6.js → 616-50a620ac4a23deb4.js} +1 -1
  25. sky/dashboard/out/_next/static/chunks/691.fd9292250ab089af.js +21 -0
  26. sky/dashboard/out/_next/static/chunks/{785.dc2686c3c1235554.js → 785.3446c12ffdf3d188.js} +1 -1
  27. sky/dashboard/out/_next/static/chunks/871-e547295e7e21399c.js +6 -0
  28. sky/dashboard/out/_next/static/chunks/937.72796f7afe54075b.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/938-0a770415b5ce4649.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/982.d7bd80ed18cad4cc.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-21080826c6095f21.js +6 -0
  32. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-77d4816945b04793.js +6 -0
  33. sky/dashboard/out/_next/static/chunks/pages/{clusters-f119a5630a1efd61.js → clusters-65b2c90320b8afb8.js} +1 -1
  34. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-64bdc0b2d3a44709.js +16 -0
  35. sky/dashboard/out/_next/static/chunks/pages/{jobs-0a5695ff3075d94a.js → jobs-df7407b5e37d3750.js} +1 -1
  36. sky/dashboard/out/_next/static/chunks/pages/{users-4978cbb093e141e7.js → users-d7684eaa04c4f58f.js} +1 -1
  37. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-cb7e720b739de53a.js → [name]-04e1b3ad4207b1e9.js} +1 -1
  38. sky/dashboard/out/_next/static/chunks/pages/{workspaces-50e230828730cfb3.js → workspaces-c470366a6179f16e.js} +1 -1
  39. sky/dashboard/out/_next/static/chunks/{webpack-08fdb9e6070127fc.js → webpack-75a3310ef922a299.js} +1 -1
  40. sky/dashboard/out/_next/static/css/605ac87514049058.css +3 -0
  41. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  42. sky/dashboard/out/clusters/[cluster].html +1 -1
  43. sky/dashboard/out/clusters.html +1 -1
  44. sky/dashboard/out/config.html +1 -1
  45. sky/dashboard/out/index.html +1 -1
  46. sky/dashboard/out/infra/[context].html +1 -1
  47. sky/dashboard/out/infra.html +1 -1
  48. sky/dashboard/out/jobs/[job].html +1 -1
  49. sky/dashboard/out/jobs.html +1 -1
  50. sky/dashboard/out/users.html +1 -1
  51. sky/dashboard/out/volumes.html +1 -1
  52. sky/dashboard/out/workspace/new.html +1 -1
  53. sky/dashboard/out/workspaces/[name].html +1 -1
  54. sky/dashboard/out/workspaces.html +1 -1
  55. sky/data/storage.py +8 -3
  56. sky/global_user_state.py +257 -9
  57. sky/jobs/client/sdk.py +20 -25
  58. sky/models.py +16 -0
  59. sky/provision/kubernetes/config.py +1 -1
  60. sky/provision/kubernetes/instance.py +7 -4
  61. sky/provision/kubernetes/network.py +15 -9
  62. sky/provision/kubernetes/network_utils.py +42 -23
  63. sky/provision/kubernetes/utils.py +73 -35
  64. sky/provision/nebius/utils.py +10 -4
  65. sky/resources.py +10 -4
  66. sky/serve/client/sdk.py +28 -34
  67. sky/server/common.py +51 -3
  68. sky/server/constants.py +3 -0
  69. sky/server/requests/executor.py +4 -0
  70. sky/server/requests/payloads.py +33 -0
  71. sky/server/requests/requests.py +19 -0
  72. sky/server/rest.py +6 -15
  73. sky/server/server.py +121 -6
  74. sky/skylet/constants.py +6 -0
  75. sky/skypilot_config.py +32 -4
  76. sky/users/permission.py +29 -0
  77. sky/users/server.py +384 -5
  78. sky/users/token_service.py +196 -0
  79. sky/utils/common_utils.py +4 -5
  80. sky/utils/config_utils.py +41 -0
  81. sky/utils/controller_utils.py +5 -1
  82. sky/utils/resource_checker.py +153 -0
  83. sky/utils/resources_utils.py +12 -4
  84. sky/utils/schemas.py +87 -60
  85. sky/utils/subprocess_utils.py +2 -6
  86. sky/workspaces/core.py +9 -117
  87. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/METADATA +1 -1
  88. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/RECORD +94 -91
  89. sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +0 -1
  90. sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +0 -16
  91. sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +0 -6
  92. sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +0 -1
  93. sky/dashboard/out/_next/static/chunks/938-068520cc11738deb.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +0 -1
  96. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +0 -6
  97. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +0 -6
  98. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +0 -16
  99. sky/dashboard/out/_next/static/css/52082cf558ec9705.css +0 -3
  100. /sky/dashboard/out/_next/static/{HudU4f4Xsy-cP51JvXSZ- → ZYLkkWSYZjJhLVsObh20y}/_ssgManifest.js +0 -0
  101. /sky/dashboard/out/_next/static/chunks/pages/{_app-9a3ce3170d2edcec.js → _app-050a9e637b057b24.js} +0 -0
  102. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/WHEEL +0 -0
  103. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/entry_points.txt +0 -0
  104. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/licenses/LICENSE +0 -0
  105. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/top_level.txt +0 -0
sky/users/server.py CHANGED
@@ -3,6 +3,9 @@
3
3
  import contextlib
4
4
  import hashlib
5
5
  import os
6
+ import re
7
+ import secrets
8
+ import time
6
9
  from typing import Any, Dict, Generator, List
7
10
 
8
11
  import fastapi
@@ -16,8 +19,10 @@ from sky.server.requests import payloads
16
19
  from sky.skylet import constants
17
20
  from sky.users import permission
18
21
  from sky.users import rbac
22
+ from sky.users import token_service
19
23
  from sky.utils import common
20
24
  from sky.utils import common_utils
25
+ from sky.utils import resource_checker
21
26
 
22
27
  logger = sky_logging.init_logger(__name__)
23
28
 
@@ -34,10 +39,15 @@ async def users() -> List[Dict[str, Any]]:
34
39
  all_users = []
35
40
  user_list = global_user_state.get_all_users()
36
41
  for user in user_list:
42
+ # Filter out service accounts - they have IDs starting with "sa-"
43
+ if user.is_service_account():
44
+ continue
45
+
37
46
  user_roles = permission.permission_service.get_user_roles(user.id)
38
47
  all_users.append({
39
48
  'id': user.id,
40
49
  'name': user.name,
50
+ 'created_at': user.created_at,
41
51
  'role': user_roles[0] if user_roles else ''
42
52
  })
43
53
  return all_users
@@ -146,10 +156,8 @@ async def user_update(request: fastapi.Request,
146
156
  permission.permission_service.update_role(user_info.id, role)
147
157
 
148
158
 
149
- @router.post('/delete')
150
- async def user_delete(user_delete_body: payloads.UserDeleteBody) -> None:
151
- user_id = user_delete_body.user_id
152
-
159
+ def _delete_user(user_id: str) -> None:
160
+ """Delete a user."""
153
161
  user_info = global_user_state.get_user(user_id)
154
162
  if user_info is None:
155
163
  raise fastapi.HTTPException(status_code=400,
@@ -159,11 +167,25 @@ async def user_delete(user_delete_body: payloads.UserDeleteBody) -> None:
159
167
  raise fastapi.HTTPException(status_code=400,
160
168
  detail=f'Cannot delete internal '
161
169
  f'API server user {user_info.name}')
170
+
171
+ # Check for active clusters and managed jobs owned by the user
172
+ try:
173
+ resource_checker.check_no_active_resources_for_users([(user_id,
174
+ 'delete')])
175
+ except ValueError as e:
176
+ raise fastapi.HTTPException(status_code=400, detail=str(e))
177
+
162
178
  with _user_lock(user_id):
163
179
  global_user_state.delete_user(user_id)
164
180
  permission.permission_service.delete_user(user_id)
165
181
 
166
182
 
183
+ @router.post('/delete')
184
+ async def user_delete(user_delete_body: payloads.UserDeleteBody) -> None:
185
+ user_id = user_delete_body.user_id
186
+ _delete_user(user_id)
187
+
188
+
167
189
  @router.post('/import')
168
190
  async def user_import(
169
191
  user_import_body: payloads.UserImportBody) -> Dict[str, Any]:
@@ -292,7 +314,12 @@ async def user_export() -> Dict[str, Any]:
292
314
  # Create CSV content
293
315
  csv_lines = ['username,password,role'] # Header
294
316
 
317
+ exported_users = []
295
318
  for user in user_list:
319
+ # Filter out service accounts - they have IDs starting with "sa-"
320
+ if user.is_service_account():
321
+ continue
322
+
296
323
  # Get user role
297
324
  user_roles = permission.permission_service.get_user_roles(user.id)
298
325
  role = user_roles[0] if user_roles else rbac.get_default_role()
@@ -307,10 +334,11 @@ async def user_export() -> Dict[str, Any]:
307
334
  if role:
308
335
  line += role
309
336
  csv_lines.append(line)
337
+ exported_users.append(user)
310
338
 
311
339
  csv_content = '\n'.join(csv_lines)
312
340
 
313
- return {'csv_content': csv_content, 'user_count': len(user_list)}
341
+ return {'csv_content': csv_content, 'user_count': len(exported_users)}
314
342
 
315
343
  except Exception as e:
316
344
  raise fastapi.HTTPException(status_code=500,
@@ -330,3 +358,354 @@ def _user_lock(user_id: str) -> Generator[None, None, None]:
330
358
  f'{USER_LOCK_PATH.format(user_id=user_id)}. '
331
359
  'Please try again or manually remove the lock '
332
360
  f'file if you believe it is stale.') from e
361
+
362
+
363
+ # ===============================
364
+ # Service account tokens
365
+ # ===============================
366
+ # SkyPilot currently does not distinguish between service accounts and service
367
+ # account tokens, i.e. service accounts have a 1-1 mapping to service account
368
+ # tokens.
369
+
370
+
371
+ @router.get('/service-account-tokens')
372
+ async def get_service_account_tokens(
373
+ request: fastapi.Request) -> List[Dict[str, Any]]:
374
+ """Get service account tokens. All users can see all tokens."""
375
+ auth_user = request.state.auth_user
376
+ if auth_user is None:
377
+ raise fastapi.HTTPException(status_code=401,
378
+ detail='Authentication required')
379
+
380
+ # All authenticated users can see all tokens
381
+ tokens = global_user_state.get_all_service_account_tokens()
382
+
383
+ result = []
384
+ for token in tokens:
385
+ token_info = {
386
+ 'token_id': token['token_id'],
387
+ 'token_name': token['token_name'],
388
+ 'created_at': token['created_at'],
389
+ 'last_used_at': token['last_used_at'],
390
+ 'expires_at': token['expires_at'],
391
+ 'creator_user_hash': token['creator_user_hash'],
392
+ 'service_account_user_id': token['service_account_user_id'],
393
+ }
394
+
395
+ # Add creator display name
396
+ creator_user = global_user_state.get_user(token['creator_user_hash'])
397
+ token_info[
398
+ 'creator_name'] = creator_user.name if creator_user else 'Unknown'
399
+
400
+ # Add service account name
401
+ sa_user = global_user_state.get_user(token['service_account_user_id'])
402
+ token_info['service_account_name'] = (sa_user.name if sa_user else
403
+ token['token_name'])
404
+
405
+ # Add service account roles
406
+ roles = permission.permission_service.get_user_roles(
407
+ token['service_account_user_id'])
408
+ token_info['service_account_roles'] = roles
409
+
410
+ result.append(token_info)
411
+
412
+ return result
413
+
414
+
415
+ def _generate_service_account_user_id() -> str:
416
+ """Generate a unique user ID for a service account."""
417
+ random_suffix = secrets.token_hex(16) # 16 character hex string
418
+ service_account_id = (f'sa-{random_suffix}')
419
+ return service_account_id
420
+
421
+
422
+ @router.post('/service-account-tokens')
423
+ async def create_service_account_token(
424
+ request: fastapi.Request,
425
+ token_body: payloads.ServiceAccountTokenCreateBody) -> Dict[str, Any]:
426
+ """Create a new service account token."""
427
+ auth_user = request.state.auth_user
428
+ if auth_user is None:
429
+ raise fastapi.HTTPException(status_code=401,
430
+ detail='Authentication required')
431
+
432
+ token_name = token_body.token_name.strip()
433
+
434
+ # Check if token follows a valid format
435
+ if not re.match(constants.CLUSTER_NAME_VALID_REGEX, token_name):
436
+ raise fastapi.HTTPException(
437
+ status_code=400,
438
+ detail='Token name must contain only letters, numbers, and '
439
+ 'underscores. Please use a different name.')
440
+
441
+ # Validate expiration (allow 0 as special value for "never expire")
442
+ if (token_body.expires_in_days is not None and
443
+ token_body.expires_in_days < 0):
444
+ raise fastapi.HTTPException(
445
+ status_code=400,
446
+ detail='Expiration days must be positive or 0 for never expire')
447
+
448
+ try:
449
+ # Generate a unique service account user ID
450
+ service_account_user_id = _generate_service_account_user_id()
451
+
452
+ # Create a user entry for the service account
453
+ service_account_user = models.User(id=service_account_user_id,
454
+ name=token_name)
455
+ is_new_user = global_user_state.add_or_update_user(
456
+ service_account_user, allow_duplicate_name=False)
457
+
458
+ if not is_new_user:
459
+ raise fastapi.HTTPException(
460
+ status_code=400,
461
+ detail=f'Service account with name {token_name!r} '
462
+ f'already exists ({service_account_user_id}). '
463
+ 'Please use a different name.')
464
+
465
+ # Add service account to permission system with default role
466
+ # Import here to avoid circular imports
467
+ # pylint: disable=import-outside-toplevel
468
+ from sky.users.permission import permission_service
469
+ permission_service.add_user_if_not_exists(service_account_user_id)
470
+
471
+ # Handle expiration: 0 means "never expire"
472
+ expires_in_days = token_body.expires_in_days
473
+ if expires_in_days == 0:
474
+ expires_in_days = None
475
+
476
+ # Create JWT-based token with service account user ID
477
+ token_data = token_service.token_service.create_token(
478
+ creator_user_id=auth_user.id,
479
+ service_account_user_id=service_account_user_id,
480
+ token_name=token_name,
481
+ expires_in_days=expires_in_days)
482
+
483
+ # Store token metadata in database
484
+ global_user_state.add_service_account_token(
485
+ token_id=token_data['token_id'],
486
+ token_name=token_name,
487
+ token_hash=token_data['token_hash'],
488
+ creator_user_hash=auth_user.id,
489
+ service_account_user_id=service_account_user_id,
490
+ expires_at=token_data['expires_at'])
491
+
492
+ # Return the JWT token only once (never stored in plain text)
493
+ return {
494
+ 'token_id': token_data['token_id'],
495
+ 'token_name': token_name,
496
+ 'token': token_data['token'], # Full JWT token with sky_ prefix
497
+ 'expires_at': token_data['expires_at'],
498
+ 'service_account_user_id': service_account_user_id,
499
+ 'creator_user_id': auth_user.id,
500
+ 'message': 'Please save this token - it will not be shown again!'
501
+ }
502
+
503
+ except Exception as e: # pylint: disable=broad-except
504
+ logger.error(f'Failed to create service account token: {e}')
505
+ raise fastapi.HTTPException(
506
+ status_code=500,
507
+ detail=f'Failed to create service account token: {e}')
508
+
509
+
510
+ @router.post('/service-account-tokens/delete')
511
+ async def delete_service_account_token(
512
+ request: fastapi.Request,
513
+ token_body: payloads.ServiceAccountTokenDeleteBody) -> Dict[str, str]:
514
+ """Delete a service account token.
515
+
516
+ Admins can delete any token, users can only delete their own.
517
+ """
518
+ auth_user = request.state.auth_user
519
+ if auth_user is None:
520
+ raise fastapi.HTTPException(status_code=401,
521
+ detail='Authentication required')
522
+
523
+ # Get token info first
524
+ token_info = global_user_state.get_service_account_token(
525
+ token_body.token_id)
526
+ if token_info is None:
527
+ raise fastapi.HTTPException(status_code=404, detail='Token not found')
528
+
529
+ # Check permissions using Casbin policy system
530
+ if not permission.permission_service.check_service_account_token_permission(
531
+ auth_user.id, token_info['creator_user_hash'], 'delete'):
532
+ raise fastapi.HTTPException(
533
+ status_code=403,
534
+ detail='You can only delete your own tokens. Only admins can '
535
+ 'delete tokens owned by other users.')
536
+
537
+ # Try to delete the service account user first to make sure there is no
538
+ # active resources owned by the service account.
539
+ service_account_user_id = token_info['service_account_user_id']
540
+ _delete_user(service_account_user_id)
541
+
542
+ # Delete the token
543
+ deleted = global_user_state.delete_service_account_token(
544
+ token_body.token_id)
545
+ if not deleted:
546
+ raise fastapi.HTTPException(status_code=404, detail='Token not found')
547
+
548
+ return {'message': 'Token deleted successfully'}
549
+
550
+
551
+ @router.post('/service-account-tokens/get-role')
552
+ async def get_service_account_role(
553
+ request: fastapi.Request,
554
+ role_body: payloads.ServiceAccountTokenRoleBody) -> Dict[str, Any]:
555
+ """Get the role of a service account."""
556
+ auth_user = request.state.auth_user
557
+ if auth_user is None:
558
+ raise fastapi.HTTPException(status_code=401,
559
+ detail='Authentication required')
560
+
561
+ # Get token info to find the service account user ID
562
+ token_info = global_user_state.get_service_account_token(role_body.token_id)
563
+ if token_info is None:
564
+ raise fastapi.HTTPException(status_code=404, detail='Token not found')
565
+
566
+ # Check permissions - only creator or admin can view roles
567
+ if not permission.permission_service.check_service_account_token_permission(
568
+ auth_user.id, token_info['creator_user_hash'], 'view'):
569
+ raise fastapi.HTTPException(
570
+ status_code=403,
571
+ detail='You can only view roles for your own service accounts. '
572
+ 'Only admins can view roles for service accounts owned by other '
573
+ 'users.')
574
+
575
+ # Get service account roles
576
+ service_account_user_id = token_info['service_account_user_id']
577
+ roles = permission.permission_service.get_user_roles(
578
+ service_account_user_id)
579
+
580
+ return {
581
+ 'token_id': role_body.token_id,
582
+ 'service_account_user_id': service_account_user_id,
583
+ 'roles': roles
584
+ }
585
+
586
+
587
+ @router.post('/service-account-tokens/update-role')
588
+ async def update_service_account_role(
589
+ request: fastapi.Request,
590
+ role_body: payloads.ServiceAccountTokenUpdateRoleBody
591
+ ) -> Dict[str, str]:
592
+ """Update the role of a service account."""
593
+ auth_user = request.state.auth_user
594
+ if auth_user is None:
595
+ raise fastapi.HTTPException(status_code=401,
596
+ detail='Authentication required')
597
+
598
+ # Get token info to find the service account user ID
599
+ token_info = global_user_state.get_service_account_token(role_body.token_id)
600
+ if token_info is None:
601
+ raise fastapi.HTTPException(status_code=404, detail='Token not found')
602
+
603
+ # Check permissions - only creator or admin can update roles
604
+ if not permission.permission_service.check_service_account_token_permission(
605
+ auth_user.id, token_info['creator_user_hash'], 'update'):
606
+ raise fastapi.HTTPException(
607
+ status_code=403,
608
+ detail='You can only update roles for your own service accounts. '
609
+ 'Only admins can update roles for service accounts owned by other '
610
+ 'users.')
611
+
612
+ try:
613
+ # Update service account role
614
+ service_account_user_id = token_info['service_account_user_id']
615
+ permission.permission_service.update_role(service_account_user_id,
616
+ role_body.role)
617
+
618
+ return {
619
+ 'message': f'Service account role updated to {role_body.role}',
620
+ 'token_id': role_body.token_id,
621
+ 'service_account_user_id': service_account_user_id,
622
+ 'new_role': role_body.role
623
+ }
624
+ except Exception as e: # pylint: disable=broad-except
625
+ logger.error(f'Failed to update service account role: {e}')
626
+ raise fastapi.HTTPException(
627
+ status_code=500, detail='Failed to update service account role')
628
+
629
+
630
+ @router.post('/service-account-tokens/rotate')
631
+ async def rotate_service_account_token(
632
+ request: fastapi.Request,
633
+ token_body: payloads.ServiceAccountTokenRotateBody) -> Dict[str, Any]:
634
+ """Rotate a service account token.
635
+
636
+ Generates a new token value for an existing service account while keeping
637
+ the same service account identity and roles.
638
+ """
639
+ auth_user = request.state.auth_user
640
+ if auth_user is None:
641
+ raise fastapi.HTTPException(status_code=401,
642
+ detail='Authentication required')
643
+
644
+ # Get token info
645
+ token_info = global_user_state.get_service_account_token(
646
+ token_body.token_id)
647
+ if token_info is None:
648
+ raise fastapi.HTTPException(status_code=404, detail='Token not found')
649
+
650
+ # Check permissions - same as delete permission (only creator or admin)
651
+ if not permission.permission_service.check_service_account_token_permission(
652
+ auth_user.id, token_info['creator_user_hash'], 'delete'):
653
+ raise fastapi.HTTPException(
654
+ status_code=403,
655
+ detail='You can only rotate your own tokens. Only admins can '
656
+ 'rotate tokens owned by other users.')
657
+
658
+ # Validate expiration if provided (allow 0 as special value for "never
659
+ # expire")
660
+ if (token_body.expires_in_days is not None and
661
+ token_body.expires_in_days < 0):
662
+ raise fastapi.HTTPException(
663
+ status_code=400,
664
+ detail='Expiration days must be positive or 0 for never expire')
665
+
666
+ try:
667
+ # Use provided expiration or preserve original expiration logic
668
+ expires_in_days = token_body.expires_in_days
669
+ if expires_in_days == 0:
670
+ # Special value 0 means "never expire"
671
+ expires_in_days = None
672
+ elif expires_in_days is None:
673
+ # No expiration specified, try to preserve original expiration
674
+ if token_info['expires_at']:
675
+ current_time = time.time()
676
+ remaining_seconds = token_info['expires_at'] - current_time
677
+ if remaining_seconds > 0:
678
+ expires_in_days = max(1,
679
+ int(remaining_seconds / (24 * 3600)))
680
+ else:
681
+ # Token already expired, default to 30 days
682
+ expires_in_days = 30
683
+
684
+ # Generate new JWT token with same service account user ID
685
+ token_data = token_service.token_service.create_token(
686
+ creator_user_id=token_info['creator_user_hash'],
687
+ service_account_user_id=token_info['service_account_user_id'],
688
+ token_name=token_info['token_name'],
689
+ expires_in_days=expires_in_days)
690
+
691
+ # Update token in database with new token hash
692
+ global_user_state.rotate_service_account_token(
693
+ token_id=token_body.token_id,
694
+ new_token_hash=token_data['token_hash'],
695
+ new_expires_at=token_data['expires_at'])
696
+
697
+ # Return the new JWT token only once (never stored in plain text)
698
+ return {
699
+ 'token_id': token_body.token_id,
700
+ 'token_name': token_info['token_name'],
701
+ 'token': token_data['token'], # Full JWT token with sky_ prefix
702
+ 'expires_at': token_data['expires_at'],
703
+ 'service_account_user_id': token_info['service_account_user_id'],
704
+ 'message': ('Token rotated successfully! Please save this new '
705
+ 'token - it will not be shown again!')
706
+ }
707
+
708
+ except Exception as e: # pylint: disable=broad-except
709
+ logger.error(f'Failed to rotate service account token: {e}')
710
+ raise fastapi.HTTPException(
711
+ status_code=500, detail='Failed to rotate service account token')
@@ -0,0 +1,196 @@
1
+ """JWT-based service account token management for SkyPilot."""
2
+
3
+ import contextlib
4
+ import datetime
5
+ import hashlib
6
+ import os
7
+ import secrets
8
+ from typing import Any, Dict, Generator, Optional
9
+
10
+ import filelock
11
+ import jwt
12
+
13
+ from sky import global_user_state
14
+ from sky import sky_logging
15
+
16
+ logger = sky_logging.init_logger(__name__)
17
+
18
+ # JWT Configuration
19
+ JWT_ALGORITHM = 'HS256'
20
+ JWT_ISSUER = 'sky' # Shortened for compact tokens
21
+ JWT_SECRET_DB_KEY = 'jwt_secret'
22
+
23
+ # File lock for JWT secret initialization
24
+ JWT_SECRET_LOCK_PATH = os.path.expanduser('~/.sky/.jwt_secret_init.lock')
25
+ JWT_SECRET_LOCK_TIMEOUT_SECONDS = 20
26
+
27
+
28
+ @contextlib.contextmanager
29
+ def _jwt_secret_lock() -> Generator[None, None, None]:
30
+ """Context manager for JWT secret initialization lock."""
31
+ try:
32
+ with filelock.FileLock(JWT_SECRET_LOCK_PATH,
33
+ JWT_SECRET_LOCK_TIMEOUT_SECONDS):
34
+ yield
35
+ except filelock.Timeout as e:
36
+ raise RuntimeError(f'Failed to initialize JWT secret due to a timeout '
37
+ f'when trying to acquire the lock at '
38
+ f'{JWT_SECRET_LOCK_PATH}. '
39
+ 'Please try again or manually remove the lock '
40
+ f'file if you believe it is stale.') from e
41
+
42
+
43
+ class TokenService:
44
+ """Service for managing JWT-based service account tokens."""
45
+
46
+ def __init__(self):
47
+ self.secret_key = self._get_or_generate_secret()
48
+
49
+ def _get_or_generate_secret(self) -> str:
50
+ """Get JWT secret from database or generate a new one."""
51
+ with _jwt_secret_lock():
52
+ # Try to get from database (persistent across deployments)
53
+ try:
54
+ db_secret = global_user_state.get_system_config(
55
+ JWT_SECRET_DB_KEY)
56
+ if db_secret:
57
+ logger.debug('Retrieved existing JWT secret from database')
58
+ return db_secret
59
+ except Exception as e: # pylint: disable=broad-except
60
+ logger.debug(f'Failed to get JWT secret from database: {e}')
61
+
62
+ # Generate a new secret and store in database
63
+ new_secret = secrets.token_urlsafe(64)
64
+ try:
65
+ global_user_state.set_system_config(JWT_SECRET_DB_KEY,
66
+ new_secret)
67
+ logger.info(
68
+ 'Generated new JWT secret and stored in database. '
69
+ 'This secret will persist across API server restarts.')
70
+ except Exception as e: # pylint: disable=broad-except
71
+ logger.warning(
72
+ f'Failed to store new JWT secret in database: {e}. '
73
+ f'Using in-memory secret (tokens will not persist '
74
+ f'across restarts).')
75
+
76
+ return new_secret
77
+
78
+ def create_token(self,
79
+ creator_user_id: str,
80
+ service_account_user_id: str,
81
+ token_name: str,
82
+ expires_in_days: Optional[int] = None) -> Dict[str, Any]:
83
+ """Create a new JWT service account token.
84
+
85
+ Args:
86
+ creator_user_id: The creator's user hash
87
+ service_account_user_id: The service account's own user ID
88
+ token_name: Descriptive name for the token
89
+ expires_in_days: Optional expiration in days
90
+
91
+ Returns:
92
+ Dict containing token info including the JWT token
93
+ """
94
+ now = datetime.datetime.now(datetime.timezone.utc)
95
+ token_id = secrets.token_urlsafe(12) # Shorter ID for JWT
96
+
97
+ # Build minimal JWT payload with single-character field names for
98
+ # compactness
99
+ payload = {
100
+ 'i': JWT_ISSUER, # Issuer (use constant)
101
+ 't': int(now.timestamp()), # Issued at (shortened from 'iat')
102
+ # Service account user ID (shortened from 'sub')
103
+ 'u': service_account_user_id,
104
+ 'k': token_id, # Token ID (shortened from 'token_id')
105
+ 'y': 'sa', # Type: service account (shortened from 'type')
106
+ }
107
+
108
+ # Add expiration if specified
109
+ expires_at = None
110
+ if expires_in_days:
111
+ exp_time = now + datetime.timedelta(days=expires_in_days)
112
+ payload['e'] = int(
113
+ exp_time.timestamp()) # Expiration (shortened from 'exp')
114
+ expires_at = int(exp_time.timestamp())
115
+
116
+ # Generate JWT
117
+ jwt_token = jwt.encode(payload,
118
+ self.secret_key,
119
+ algorithm=JWT_ALGORITHM)
120
+
121
+ # Create token with SkyPilot prefix
122
+ full_token = f'sky_{jwt_token}'
123
+
124
+ # Generate hash for database storage (we still hash the full token)
125
+ token_hash = hashlib.sha256(full_token.encode()).hexdigest()
126
+
127
+ return {
128
+ 'token_id': token_id,
129
+ 'token': full_token,
130
+ 'token_hash': token_hash,
131
+ 'creator_user_id': creator_user_id,
132
+ 'service_account_user_id': service_account_user_id,
133
+ 'token_name': token_name,
134
+ 'created_at': int(now.timestamp()),
135
+ 'expires_at': expires_at,
136
+ }
137
+
138
+ def verify_token(self, token: str) -> Optional[Dict[str, Any]]:
139
+ """Verify and decode a JWT token.
140
+
141
+ Args:
142
+ token: The full token (with sky_ prefix)
143
+
144
+ Returns:
145
+ Decoded token payload or None if invalid
146
+ """
147
+ if not token.startswith('sky_'):
148
+ return None
149
+
150
+ # Remove the sky_ prefix
151
+ jwt_token = token[4:]
152
+
153
+ try:
154
+ # Decode and verify JWT (without issuer verification)
155
+ payload = jwt.decode(jwt_token,
156
+ self.secret_key,
157
+ algorithms=[JWT_ALGORITHM])
158
+
159
+ # Manually verify issuer using our shortened field name
160
+ token_issuer = payload.get('i')
161
+ if token_issuer != JWT_ISSUER:
162
+ logger.warning(f'Invalid token issuer: {token_issuer}')
163
+ return None
164
+
165
+ # Verify token type
166
+ token_type = payload.get('y')
167
+ if token_type != 'sa':
168
+ logger.warning(f'Invalid token type: {token_type}')
169
+ return None
170
+
171
+ # Convert shortened field names back to standard names for
172
+ # compatibility
173
+ normalized_payload = {
174
+ 'iss': payload.get('i'), # issuer
175
+ 'iat': payload.get('t'), # issued at
176
+ 'sub': payload.get('u'), # subject (service account user ID)
177
+ 'token_id': payload.get('k'), # token ID
178
+ 'type': 'service_account', # expand shortened type
179
+ }
180
+
181
+ # Add expiration if present
182
+ if 'e' in payload:
183
+ normalized_payload['exp'] = payload['e']
184
+
185
+ return normalized_payload
186
+
187
+ except jwt.ExpiredSignatureError:
188
+ logger.warning('Token has expired')
189
+ return None
190
+ except jwt.InvalidTokenError as e:
191
+ logger.warning(f'Invalid token: {e}')
192
+ return None
193
+
194
+
195
+ # Singleton instance
196
+ token_service = TokenService()
sky/utils/common_utils.py CHANGED
@@ -71,11 +71,10 @@ def get_usage_run_id() -> str:
71
71
  def is_valid_user_hash(user_hash: Optional[str]) -> bool:
72
72
  if user_hash is None:
73
73
  return False
74
- try:
75
- int(user_hash, 16)
76
- except (TypeError, ValueError):
77
- return False
78
- return len(user_hash) == USER_HASH_LENGTH
74
+ # Must start with a letter, followed by alphanumeric characters and hyphens
75
+ # This covers both old hex format (e.g., "abc123") and new service account
76
+ # format (e.g., "sa-abc123-token-xyz")
77
+ return bool(re.match(r'^[a-zA-Z0-9][a-zA-Z0-9-]*$', user_hash))
79
78
 
80
79
 
81
80
  def generate_user_hash() -> str: