skypilot-nightly 1.0.0.dev20250627__py3-none-any.whl → 1.0.0.dev20250630__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +14 -0
  3. sky/adaptors/nebius.py +2 -2
  4. sky/authentication.py +12 -5
  5. sky/backends/backend_utils.py +92 -26
  6. sky/check.py +5 -2
  7. sky/client/cli/command.py +39 -8
  8. sky/client/sdk.py +217 -167
  9. sky/client/service_account_auth.py +47 -0
  10. sky/clouds/aws.py +10 -4
  11. sky/clouds/azure.py +5 -2
  12. sky/clouds/cloud.py +5 -2
  13. sky/clouds/gcp.py +31 -18
  14. sky/clouds/kubernetes.py +54 -34
  15. sky/clouds/nebius.py +8 -2
  16. sky/clouds/ssh.py +5 -2
  17. sky/clouds/utils/aws_utils.py +10 -4
  18. sky/clouds/utils/gcp_utils.py +22 -7
  19. sky/clouds/utils/oci_utils.py +62 -14
  20. sky/dashboard/out/404.html +1 -1
  21. sky/dashboard/out/_next/static/NdypbqMxaYucRGfopkKXa/_buildManifest.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/1043-1b39779691bb4030.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/{141-fa5a20cbf401b351.js → 1141-726e5a3f00b67185.js} +2 -2
  24. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/1664-d65361e92b85e786.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/1691.44e378727a41f3b5.js +21 -0
  27. sky/dashboard/out/_next/static/chunks/1871-80dea41717729fa5.js +6 -0
  28. sky/dashboard/out/_next/static/chunks/2544.27f70672535675ed.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/{875.52c962183328b3f2.js → 2875.c24c6d57dc82e436.js} +1 -1
  30. sky/dashboard/out/_next/static/chunks/3256.7257acd01b481bed.js +11 -0
  31. sky/dashboard/out/_next/static/chunks/3698-52ad1ca228faa776.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/3785.b3cc2bc1d49d2c3c.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/3937.d7f1c55d1916c7f2.js +1 -0
  34. sky/dashboard/out/_next/static/chunks/{947-6620842ef80ae879.js → 3947-b059261d6fa88a1f.js} +1 -1
  35. sky/dashboard/out/_next/static/chunks/{697.6460bf72e760addd.js → 4697.f5421144224da9fc.js} +1 -1
  36. sky/dashboard/out/_next/static/chunks/4725.4c849b1e05c8e9ad.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/5230-df791914b54d91d9.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/{491.b3d264269613fe09.js → 5491.918ffed0ba7a5294.js} +1 -1
  39. sky/dashboard/out/_next/static/chunks/5739-5ea3ffa10fc884f2.js +8 -0
  40. sky/dashboard/out/_next/static/chunks/616-162f3033ffcd3d31.js +39 -0
  41. sky/dashboard/out/_next/static/chunks/6601-fcfad0ddf92ec7ab.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/6989-6ff4e45dfb49d11d.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/6990-d0dc765474fa0eca.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/8969-909d53833da080cb.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/8982.a2e214068f30a857.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/{25.76c246239df93d50.js → 9025.a7c44babfe56ce09.js} +2 -2
  47. sky/dashboard/out/_next/static/chunks/938-044ad21de8b4626b.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/9470-21d059a1dfa03f61.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/9984.739ae958a066298d.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/fd9d1056-61f2257a9cd8b32b.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/{framework-87d061ee6ed71b28.js → framework-efc06c2733009cd3.js} +1 -1
  52. sky/dashboard/out/_next/static/chunks/main-app-68c028b1bc5e1b72.js +1 -0
  53. sky/dashboard/out/_next/static/chunks/{main-e0e2335212e72357.js → main-c0a4f1ea606d48d2.js} +1 -1
  54. sky/dashboard/out/_next/static/chunks/pages/{_app-9a3ce3170d2edcec.js → _app-a37b06ddb64521fd.js} +2 -2
  55. sky/dashboard/out/_next/static/chunks/pages/_error-c72a1f77a3c0be1b.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8135aba0712bda37.js +6 -0
  57. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b8e1114e6d38218c.js +6 -0
  58. sky/dashboard/out/_next/static/chunks/pages/clusters-9744c271a1642f76.js +1 -0
  59. sky/dashboard/out/_next/static/chunks/pages/config-a2673b256b6d416f.js +1 -0
  60. sky/dashboard/out/_next/static/chunks/pages/index-927ddeebe57a8ac3.js +1 -0
  61. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-8b0809f59034d509.js +1 -0
  62. sky/dashboard/out/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js +1 -0
  63. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-c4d5cfac7fbc0668.js +16 -0
  64. sky/dashboard/out/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js +1 -0
  65. sky/dashboard/out/_next/static/chunks/pages/users-cd43fb3c122eedde.js +1 -0
  66. sky/dashboard/out/_next/static/chunks/pages/volumes-4ebf6484f7216387.js +1 -0
  67. sky/dashboard/out/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js +1 -0
  68. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7c0187f43757a548.js +1 -0
  69. sky/dashboard/out/_next/static/chunks/pages/workspaces-06bde99155fa6292.js +1 -0
  70. sky/dashboard/out/_next/static/chunks/webpack-d427db53e54de9ce.js +1 -0
  71. sky/dashboard/out/_next/static/css/0da6afe66176678a.css +3 -0
  72. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  73. sky/dashboard/out/clusters/[cluster].html +1 -1
  74. sky/dashboard/out/clusters.html +1 -1
  75. sky/dashboard/out/config.html +1 -1
  76. sky/dashboard/out/index.html +1 -1
  77. sky/dashboard/out/infra/[context].html +1 -1
  78. sky/dashboard/out/infra.html +1 -1
  79. sky/dashboard/out/jobs/[job].html +1 -1
  80. sky/dashboard/out/jobs.html +1 -1
  81. sky/dashboard/out/users.html +1 -1
  82. sky/dashboard/out/volumes.html +1 -1
  83. sky/dashboard/out/workspace/new.html +1 -1
  84. sky/dashboard/out/workspaces/[name].html +1 -1
  85. sky/dashboard/out/workspaces.html +1 -1
  86. sky/data/storage.py +8 -3
  87. sky/global_user_state.py +257 -9
  88. sky/jobs/client/sdk.py +20 -25
  89. sky/models.py +16 -0
  90. sky/optimizer.py +46 -0
  91. sky/provision/__init__.py +14 -6
  92. sky/provision/kubernetes/config.py +1 -1
  93. sky/provision/kubernetes/constants.py +9 -0
  94. sky/provision/kubernetes/instance.py +24 -18
  95. sky/provision/kubernetes/network.py +15 -9
  96. sky/provision/kubernetes/network_utils.py +42 -23
  97. sky/provision/kubernetes/utils.py +73 -35
  98. sky/provision/kubernetes/volume.py +77 -15
  99. sky/provision/nebius/utils.py +10 -4
  100. sky/resources.py +10 -4
  101. sky/serve/client/sdk.py +28 -34
  102. sky/server/common.py +51 -3
  103. sky/server/constants.py +3 -0
  104. sky/server/requests/executor.py +4 -0
  105. sky/server/requests/payloads.py +33 -0
  106. sky/server/requests/requests.py +19 -0
  107. sky/server/rest.py +6 -15
  108. sky/server/server.py +121 -6
  109. sky/skylet/constants.py +7 -0
  110. sky/skypilot_config.py +32 -4
  111. sky/task.py +12 -0
  112. sky/users/permission.py +29 -0
  113. sky/users/server.py +384 -5
  114. sky/users/token_service.py +196 -0
  115. sky/utils/common_utils.py +4 -5
  116. sky/utils/config_utils.py +41 -0
  117. sky/utils/controller_utils.py +5 -1
  118. sky/utils/log_utils.py +68 -0
  119. sky/utils/resource_checker.py +153 -0
  120. sky/utils/resources_utils.py +12 -4
  121. sky/utils/schemas.py +87 -60
  122. sky/utils/subprocess_utils.py +2 -6
  123. sky/volumes/server/core.py +103 -78
  124. sky/volumes/utils.py +22 -5
  125. sky/workspaces/core.py +9 -117
  126. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250630.dist-info}/METADATA +1 -1
  127. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250630.dist-info}/RECORD +133 -128
  128. sky/dashboard/out/_next/static/HudU4f4Xsy-cP51JvXSZ-/_buildManifest.js +0 -1
  129. sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +0 -1
  130. sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +0 -1
  131. sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +0 -1
  132. sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +0 -1
  133. sky/dashboard/out/_next/static/chunks/616-d6128fa9e7cae6e6.js +0 -39
  134. sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +0 -1
  135. sky/dashboard/out/_next/static/chunks/664-047bc03493fda379.js +0 -1
  136. sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +0 -16
  137. sky/dashboard/out/_next/static/chunks/785.dc2686c3c1235554.js +0 -1
  138. sky/dashboard/out/_next/static/chunks/798-c0525dc3f21e488d.js +0 -1
  139. sky/dashboard/out/_next/static/chunks/799-3625946b2ec2eb30.js +0 -8
  140. sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +0 -6
  141. sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +0 -1
  142. sky/dashboard/out/_next/static/chunks/938-068520cc11738deb.js +0 -1
  143. sky/dashboard/out/_next/static/chunks/969-d3a0b53f728d280a.js +0 -1
  144. sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +0 -1
  145. sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +0 -1
  146. sky/dashboard/out/_next/static/chunks/984.e8bac186a24e5178.js +0 -1
  147. sky/dashboard/out/_next/static/chunks/989-db34c16ad7ea6155.js +0 -1
  148. sky/dashboard/out/_next/static/chunks/990-0ad5ea1699e03ee8.js +0 -1
  149. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  150. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  151. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  152. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +0 -6
  153. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +0 -6
  154. sky/dashboard/out/_next/static/chunks/pages/clusters-f119a5630a1efd61.js +0 -1
  155. sky/dashboard/out/_next/static/chunks/pages/config-6b255eae088da6a3.js +0 -1
  156. sky/dashboard/out/_next/static/chunks/pages/index-6b0d9e5031b70c58.js +0 -1
  157. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-b302aea4d65766bf.js +0 -1
  158. sky/dashboard/out/_next/static/chunks/pages/infra-ee8cc4d449945d19.js +0 -1
  159. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +0 -16
  160. sky/dashboard/out/_next/static/chunks/pages/jobs-0a5695ff3075d94a.js +0 -1
  161. sky/dashboard/out/_next/static/chunks/pages/users-4978cbb093e141e7.js +0 -1
  162. sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +0 -1
  163. sky/dashboard/out/_next/static/chunks/pages/workspace/new-5b59bce9eb208d84.js +0 -1
  164. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js +0 -1
  165. sky/dashboard/out/_next/static/chunks/pages/workspaces-50e230828730cfb3.js +0 -1
  166. sky/dashboard/out/_next/static/chunks/webpack-08fdb9e6070127fc.js +0 -1
  167. sky/dashboard/out/_next/static/css/52082cf558ec9705.css +0 -3
  168. /sky/dashboard/out/_next/static/{HudU4f4Xsy-cP51JvXSZ- → NdypbqMxaYucRGfopkKXa}/_ssgManifest.js +0 -0
  169. /sky/dashboard/out/_next/static/chunks/{804-4c9fc53aa74bc191.js → 804-9f5e98ce84d46bdd.js} +0 -0
  170. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250630.dist-info}/WHEEL +0 -0
  171. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250630.dist-info}/entry_points.txt +0 -0
  172. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250630.dist-info}/licenses/LICENSE +0 -0
  173. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250630.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,196 @@
1
+ """JWT-based service account token management for SkyPilot."""
2
+
3
+ import contextlib
4
+ import datetime
5
+ import hashlib
6
+ import os
7
+ import secrets
8
+ from typing import Any, Dict, Generator, Optional
9
+
10
+ import filelock
11
+ import jwt
12
+
13
+ from sky import global_user_state
14
+ from sky import sky_logging
15
+
16
+ logger = sky_logging.init_logger(__name__)
17
+
18
+ # JWT Configuration
19
+ JWT_ALGORITHM = 'HS256'
20
+ JWT_ISSUER = 'sky' # Shortened for compact tokens
21
+ JWT_SECRET_DB_KEY = 'jwt_secret'
22
+
23
+ # File lock for JWT secret initialization
24
+ JWT_SECRET_LOCK_PATH = os.path.expanduser('~/.sky/.jwt_secret_init.lock')
25
+ JWT_SECRET_LOCK_TIMEOUT_SECONDS = 20
26
+
27
+
28
+ @contextlib.contextmanager
29
+ def _jwt_secret_lock() -> Generator[None, None, None]:
30
+ """Context manager for JWT secret initialization lock."""
31
+ try:
32
+ with filelock.FileLock(JWT_SECRET_LOCK_PATH,
33
+ JWT_SECRET_LOCK_TIMEOUT_SECONDS):
34
+ yield
35
+ except filelock.Timeout as e:
36
+ raise RuntimeError(f'Failed to initialize JWT secret due to a timeout '
37
+ f'when trying to acquire the lock at '
38
+ f'{JWT_SECRET_LOCK_PATH}. '
39
+ 'Please try again or manually remove the lock '
40
+ f'file if you believe it is stale.') from e
41
+
42
+
43
+ class TokenService:
44
+ """Service for managing JWT-based service account tokens."""
45
+
46
+ def __init__(self):
47
+ self.secret_key = self._get_or_generate_secret()
48
+
49
+ def _get_or_generate_secret(self) -> str:
50
+ """Get JWT secret from database or generate a new one."""
51
+ with _jwt_secret_lock():
52
+ # Try to get from database (persistent across deployments)
53
+ try:
54
+ db_secret = global_user_state.get_system_config(
55
+ JWT_SECRET_DB_KEY)
56
+ if db_secret:
57
+ logger.debug('Retrieved existing JWT secret from database')
58
+ return db_secret
59
+ except Exception as e: # pylint: disable=broad-except
60
+ logger.debug(f'Failed to get JWT secret from database: {e}')
61
+
62
+ # Generate a new secret and store in database
63
+ new_secret = secrets.token_urlsafe(64)
64
+ try:
65
+ global_user_state.set_system_config(JWT_SECRET_DB_KEY,
66
+ new_secret)
67
+ logger.info(
68
+ 'Generated new JWT secret and stored in database. '
69
+ 'This secret will persist across API server restarts.')
70
+ except Exception as e: # pylint: disable=broad-except
71
+ logger.warning(
72
+ f'Failed to store new JWT secret in database: {e}. '
73
+ f'Using in-memory secret (tokens will not persist '
74
+ f'across restarts).')
75
+
76
+ return new_secret
77
+
78
+ def create_token(self,
79
+ creator_user_id: str,
80
+ service_account_user_id: str,
81
+ token_name: str,
82
+ expires_in_days: Optional[int] = None) -> Dict[str, Any]:
83
+ """Create a new JWT service account token.
84
+
85
+ Args:
86
+ creator_user_id: The creator's user hash
87
+ service_account_user_id: The service account's own user ID
88
+ token_name: Descriptive name for the token
89
+ expires_in_days: Optional expiration in days
90
+
91
+ Returns:
92
+ Dict containing token info including the JWT token
93
+ """
94
+ now = datetime.datetime.now(datetime.timezone.utc)
95
+ token_id = secrets.token_urlsafe(12) # Shorter ID for JWT
96
+
97
+ # Build minimal JWT payload with single-character field names for
98
+ # compactness
99
+ payload = {
100
+ 'i': JWT_ISSUER, # Issuer (use constant)
101
+ 't': int(now.timestamp()), # Issued at (shortened from 'iat')
102
+ # Service account user ID (shortened from 'sub')
103
+ 'u': service_account_user_id,
104
+ 'k': token_id, # Token ID (shortened from 'token_id')
105
+ 'y': 'sa', # Type: service account (shortened from 'type')
106
+ }
107
+
108
+ # Add expiration if specified
109
+ expires_at = None
110
+ if expires_in_days:
111
+ exp_time = now + datetime.timedelta(days=expires_in_days)
112
+ payload['e'] = int(
113
+ exp_time.timestamp()) # Expiration (shortened from 'exp')
114
+ expires_at = int(exp_time.timestamp())
115
+
116
+ # Generate JWT
117
+ jwt_token = jwt.encode(payload,
118
+ self.secret_key,
119
+ algorithm=JWT_ALGORITHM)
120
+
121
+ # Create token with SkyPilot prefix
122
+ full_token = f'sky_{jwt_token}'
123
+
124
+ # Generate hash for database storage (we still hash the full token)
125
+ token_hash = hashlib.sha256(full_token.encode()).hexdigest()
126
+
127
+ return {
128
+ 'token_id': token_id,
129
+ 'token': full_token,
130
+ 'token_hash': token_hash,
131
+ 'creator_user_id': creator_user_id,
132
+ 'service_account_user_id': service_account_user_id,
133
+ 'token_name': token_name,
134
+ 'created_at': int(now.timestamp()),
135
+ 'expires_at': expires_at,
136
+ }
137
+
138
+ def verify_token(self, token: str) -> Optional[Dict[str, Any]]:
139
+ """Verify and decode a JWT token.
140
+
141
+ Args:
142
+ token: The full token (with sky_ prefix)
143
+
144
+ Returns:
145
+ Decoded token payload or None if invalid
146
+ """
147
+ if not token.startswith('sky_'):
148
+ return None
149
+
150
+ # Remove the sky_ prefix
151
+ jwt_token = token[4:]
152
+
153
+ try:
154
+ # Decode and verify JWT (without issuer verification)
155
+ payload = jwt.decode(jwt_token,
156
+ self.secret_key,
157
+ algorithms=[JWT_ALGORITHM])
158
+
159
+ # Manually verify issuer using our shortened field name
160
+ token_issuer = payload.get('i')
161
+ if token_issuer != JWT_ISSUER:
162
+ logger.warning(f'Invalid token issuer: {token_issuer}')
163
+ return None
164
+
165
+ # Verify token type
166
+ token_type = payload.get('y')
167
+ if token_type != 'sa':
168
+ logger.warning(f'Invalid token type: {token_type}')
169
+ return None
170
+
171
+ # Convert shortened field names back to standard names for
172
+ # compatibility
173
+ normalized_payload = {
174
+ 'iss': payload.get('i'), # issuer
175
+ 'iat': payload.get('t'), # issued at
176
+ 'sub': payload.get('u'), # subject (service account user ID)
177
+ 'token_id': payload.get('k'), # token ID
178
+ 'type': 'service_account', # expand shortened type
179
+ }
180
+
181
+ # Add expiration if present
182
+ if 'e' in payload:
183
+ normalized_payload['exp'] = payload['e']
184
+
185
+ return normalized_payload
186
+
187
+ except jwt.ExpiredSignatureError:
188
+ logger.warning('Token has expired')
189
+ return None
190
+ except jwt.InvalidTokenError as e:
191
+ logger.warning(f'Invalid token: {e}')
192
+ return None
193
+
194
+
195
+ # Singleton instance
196
+ token_service = TokenService()
sky/utils/common_utils.py CHANGED
@@ -71,11 +71,10 @@ def get_usage_run_id() -> str:
71
71
  def is_valid_user_hash(user_hash: Optional[str]) -> bool:
72
72
  if user_hash is None:
73
73
  return False
74
- try:
75
- int(user_hash, 16)
76
- except (TypeError, ValueError):
77
- return False
78
- return len(user_hash) == USER_HASH_LENGTH
74
+ # Must start with a letter, followed by alphanumeric characters and hyphens
75
+ # This covers both old hex format (e.g., "abc123") and new service account
76
+ # format (e.g., "sa-abc123-token-xyz")
77
+ return bool(re.match(r'^[a-zA-Z0-9][a-zA-Z0-9-]*$', user_hash))
79
78
 
80
79
 
81
80
  def generate_user_hash() -> str:
sky/utils/config_utils.py CHANGED
@@ -226,3 +226,44 @@ def merge_k8s_configs(
226
226
  base_config[key].extend(value)
227
227
  else:
228
228
  base_config[key] = value
229
+
230
+
231
+ def get_cloud_config_value_from_dict(
232
+ dict_config: Dict[str, Any],
233
+ cloud: str,
234
+ keys: Tuple[str, ...],
235
+ region: Optional[str] = None,
236
+ default_value: Optional[Any] = None,
237
+ override_configs: Optional[Dict[str, Any]] = None) -> Any:
238
+ """Returns the nested key value by reading from config
239
+ Order to get the property_name value:
240
+ 1. if region is specified,
241
+ try to get the value from <cloud>/<region_key>/<region>/keys
242
+ 2. if no region or no override,
243
+ try to get it at the cloud level <cloud>/keys
244
+ 3. if not found at cloud level,
245
+ return either default_value if specified or None
246
+ """
247
+ input_config = Config(dict_config)
248
+ region_key = None
249
+ if cloud == 'kubernetes':
250
+ region_key = 'context_configs'
251
+
252
+ per_context_config = None
253
+ if region is not None and region_key is not None:
254
+ per_context_config = input_config.get_nested(
255
+ keys=(cloud, region_key, region) + keys,
256
+ default_value=None,
257
+ override_configs=override_configs)
258
+ # if no override found for specified region
259
+ general_config = input_config.get_nested(keys=(cloud,) + keys,
260
+ default_value=default_value,
261
+ override_configs=override_configs)
262
+
263
+ if (cloud == 'kubernetes' and isinstance(general_config, dict) and
264
+ isinstance(per_context_config, dict)):
265
+ merge_k8s_configs(general_config, per_context_config)
266
+ return general_config
267
+ else:
268
+ return (general_config
269
+ if per_context_config is None else per_context_config)
@@ -733,7 +733,11 @@ def _setup_proxy_command_on_controller(
733
733
  config = config_utils.Config.from_dict(user_config)
734
734
  proxy_command_key = (str(controller_launched_cloud).lower(),
735
735
  'ssh_proxy_command')
736
- ssh_proxy_command = config.get_nested(proxy_command_key, None)
736
+ ssh_proxy_command = skypilot_config.get_effective_region_config(
737
+ cloud=str(controller_launched_cloud).lower(),
738
+ region=None,
739
+ keys=('ssh_proxy_command',),
740
+ default_value=None)
737
741
  if isinstance(ssh_proxy_command, str):
738
742
  config.set_nested(proxy_command_key, None)
739
743
  elif isinstance(ssh_proxy_command, dict):
sky/utils/log_utils.py CHANGED
@@ -573,6 +573,74 @@ def readable_time_duration(start: Optional[float],
573
573
  return diff
574
574
 
575
575
 
576
+ def human_duration(start: int, end: Optional[int] = None) -> str:
577
+ """Calculates the time elapsed between two timestamps and returns
578
+ it as a human-readable string, similar to Kubernetes' duration format.
579
+
580
+ Args:
581
+ start: The start time as a Unix timestamp (seconds since epoch).
582
+ end: The end time as a Unix timestamp (seconds since epoch).
583
+ If None, current time is used.
584
+
585
+ Returns:
586
+ A string representing the duration, e.g., "2d3h", "15m", "30s".
587
+ Returns "0s" for zero, negative durations, or if the timestamp
588
+ is invalid.
589
+ """
590
+ if not start or start <= 0:
591
+ return '0s'
592
+
593
+ if end is None:
594
+ end = int(time.time())
595
+ duration_seconds = end - start
596
+
597
+ units = {
598
+ 'y': 365 * 24 * 60 * 60,
599
+ 'd': 60 * 60 * 24,
600
+ 'h': 60 * 60,
601
+ 'm': 60,
602
+ 's': 1,
603
+ }
604
+
605
+ if duration_seconds <= 0:
606
+ return '0s'
607
+ elif duration_seconds < 60 * 2:
608
+ return f'{duration_seconds}s'
609
+
610
+ minutes = int(duration_seconds / units['m'])
611
+ if minutes < 10:
612
+ s = int(duration_seconds / units['s']) % 60
613
+ if s == 0:
614
+ return f'{minutes}m'
615
+ return f'{minutes}m{s}s'
616
+ elif minutes < 60 * 3:
617
+ return f'{minutes}m'
618
+
619
+ hours = int(duration_seconds / units['h'])
620
+ days = int(hours / 24)
621
+ years = int(hours / 24 / 365)
622
+ if hours < 8:
623
+ m = int(duration_seconds / units['m']) % 60
624
+ if m == 0:
625
+ return f'{hours}h'
626
+ return f'{hours}h{m}m'
627
+ elif hours < 48:
628
+ return f'{hours}h'
629
+ elif hours < 24 * 8:
630
+ h = hours % 24
631
+ if h == 0:
632
+ return f'{days}d'
633
+ return f'{days}d{h}h'
634
+ elif hours < 24 * 365 * 2:
635
+ return f'{days}d'
636
+ elif hours < 24 * 365 * 8:
637
+ dy = int(hours / 24) % 365
638
+ if dy == 0:
639
+ return f'{years}y'
640
+ return f'{years}y{dy}d'
641
+ return f'{years}y'
642
+
643
+
576
644
  def follow_logs(
577
645
  file: TextIO,
578
646
  *,
@@ -0,0 +1,153 @@
1
+ """Resource checking utilities for finding active clusters and managed jobs."""
2
+
3
+ import concurrent.futures
4
+ from typing import Any, Callable, Dict, List, Tuple
5
+
6
+ from sky import exceptions
7
+ from sky import global_user_state
8
+ from sky import sky_logging
9
+ from sky.skylet import constants
10
+
11
+ logger = sky_logging.init_logger(__name__)
12
+
13
+
14
+ def check_no_active_resources_for_users(
15
+ user_operations: List[Tuple[str, str]]) -> None:
16
+ """Check if users have active clusters or managed jobs.
17
+
18
+ Args:
19
+ user_operations: List of tuples (user_id, operation) where
20
+ operation is 'update' or 'delete'.
21
+
22
+ Raises:
23
+ ValueError: If any user has active clusters or managed jobs.
24
+ The error message will include all users with issues.
25
+ """
26
+ if not user_operations:
27
+ return
28
+
29
+ def filter_by_user(user_id: str):
30
+ return lambda resource: resource.get('user_hash') == user_id
31
+
32
+ _check_active_resources(user_operations, filter_by_user, 'user')
33
+
34
+
35
+ def check_no_active_resources_for_workspaces(
36
+ workspace_operations: List[Tuple[str, str]]) -> None:
37
+ """Check if workspaces have active clusters or managed jobs.
38
+
39
+ Args:
40
+ workspace_operations: List of tuples (workspace_name, operation) where
41
+ operation is 'update' or 'delete'.
42
+
43
+ Raises:
44
+ ValueError: If any workspace has active clusters or managed jobs.
45
+ The error message will include all workspaces with issues.
46
+ """
47
+ if not workspace_operations:
48
+ return
49
+
50
+ def filter_by_workspace(workspace_name: str):
51
+ return lambda resource: (resource.get(
52
+ 'workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) == workspace_name
53
+ )
54
+
55
+ _check_active_resources(workspace_operations, filter_by_workspace,
56
+ 'workspace')
57
+
58
+
59
+ def _check_active_resources(resource_operations: List[Tuple[str, str]],
60
+ filter_factory: Callable[[str],
61
+ Callable[[Dict[str, Any]],
62
+ bool]],
63
+ resource_type: str) -> None:
64
+ """Check if resource entities have active clusters or managed jobs.
65
+
66
+ Args:
67
+ resource_operations: List of tuples (resource_name, operation) where
68
+ operation is 'update' or 'delete'.
69
+ filter_factory: Function that takes a resource_name and returns a filter
70
+ function for clusters/jobs.
71
+ resource_type: Type of resource being checked ('user' or 'workspace').
72
+
73
+ Raises:
74
+ ValueError: If any resource has active clusters or managed jobs.
75
+ """
76
+
77
+ def get_all_clusters():
78
+ return global_user_state.get_clusters()
79
+
80
+ def get_all_managed_jobs():
81
+ # pylint: disable=import-outside-toplevel
82
+ from sky.jobs.server import core as managed_jobs_core
83
+ try:
84
+ return managed_jobs_core.queue(refresh=False,
85
+ skip_finished=True,
86
+ all_users=True)
87
+ except exceptions.ClusterNotUpError:
88
+ logger.warning('All jobs should be finished.')
89
+ return []
90
+
91
+ # Fetch both clusters and jobs in parallel
92
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
93
+ clusters_future = executor.submit(get_all_clusters)
94
+ jobs_future = executor.submit(get_all_managed_jobs)
95
+
96
+ all_clusters = clusters_future.result()
97
+ all_managed_jobs = jobs_future.result()
98
+
99
+ # Collect all error messages instead of raising immediately
100
+ error_messages = []
101
+
102
+ # Check each resource against the fetched data
103
+ for resource_name, operation in resource_operations:
104
+ resource_filter = filter_factory(resource_name)
105
+
106
+ # Filter clusters for this resource
107
+ resource_clusters = [
108
+ cluster for cluster in all_clusters if resource_filter(cluster)
109
+ ]
110
+
111
+ # Filter managed jobs for this resource
112
+ resource_active_jobs = [
113
+ job for job in all_managed_jobs if resource_filter(job)
114
+ ]
115
+
116
+ # Collect error messages for this resource
117
+ resource_errors = []
118
+
119
+ if resource_clusters:
120
+ active_cluster_names = [
121
+ cluster['name'] for cluster in resource_clusters
122
+ ]
123
+ cluster_list = ', '.join(active_cluster_names)
124
+ resource_errors.append(
125
+ f'{len(resource_clusters)} active cluster(s): {cluster_list}')
126
+
127
+ if resource_active_jobs:
128
+ job_names = [str(job['job_id']) for job in resource_active_jobs]
129
+ job_list = ', '.join(job_names)
130
+ resource_errors.append(
131
+ f'{len(resource_active_jobs)} active managed job(s): '
132
+ f'{job_list}')
133
+
134
+ # If this resource has issues, add to overall error messages
135
+ if resource_errors:
136
+ resource_error_summary = ' and '.join(resource_errors)
137
+ error_messages.append(
138
+ f'Cannot {operation} {resource_type} {resource_name!r} '
139
+ f'because it has {resource_error_summary}.')
140
+
141
+ # If we collected any errors, raise them all together
142
+ if error_messages:
143
+ if len(error_messages) == 1:
144
+ # Single resource error
145
+ full_message = error_messages[
146
+ 0] + ' Please terminate these resources first.'
147
+ else:
148
+ # Multiple resource errors
149
+ full_message = (f'Cannot proceed due to active resources in '
150
+ f'{len(error_messages)} {resource_type}(s):\n' +
151
+ '\n'.join(f'• {msg}' for msg in error_messages) +
152
+ '\nPlease terminate these resources first.')
153
+ raise ValueError(full_message)
@@ -273,10 +273,18 @@ def need_to_query_reservations() -> bool:
273
273
  clouds that do not use reservations.
274
274
  """
275
275
  for cloud_str in registry.CLOUD_REGISTRY.keys():
276
- cloud_specific_reservations = skypilot_config.get_nested(
277
- (cloud_str, 'specific_reservations'), None)
278
- cloud_prioritize_reservations = skypilot_config.get_nested(
279
- (cloud_str, 'prioritize_reservations'), False)
276
+ cloud_specific_reservations = (
277
+ skypilot_config.get_effective_region_config(
278
+ cloud=cloud_str,
279
+ region=None,
280
+ keys=('specific_reservations',),
281
+ default_value=None))
282
+ cloud_prioritize_reservations = (
283
+ skypilot_config.get_effective_region_config(
284
+ cloud=cloud_str,
285
+ region=None,
286
+ keys=('prioritize_reservations',),
287
+ default_value=False))
280
288
  if (cloud_specific_reservations is not None or
281
289
  cloud_prioritize_reservations):
282
290
  return True