skypilot-nightly 1.0.0.dev20250720__py3-none-any.whl → 1.0.0.dev20250724__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (113) hide show
  1. sky/__init__.py +2 -2
  2. sky/admin_policy.py +11 -4
  3. sky/backends/backend_utils.py +27 -11
  4. sky/backends/cloud_vm_ray_backend.py +22 -27
  5. sky/client/cli/command.py +44 -28
  6. sky/client/sdk.py +52 -7
  7. sky/client/sdk.pyi +296 -0
  8. sky/clouds/nebius.py +2 -5
  9. sky/clouds/utils/oci_utils.py +16 -40
  10. sky/clouds/vast.py +2 -1
  11. sky/dashboard/out/404.html +1 -1
  12. sky/dashboard/out/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/{1141-d8c6404a7c6fffe6.js → 1141-e49a159c30a6c4a7.js} +1 -1
  14. sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +30 -0
  15. sky/dashboard/out/_next/static/chunks/{1871-a821dcaaae2a3823.js → 1871-ea0e7283886407ca.js} +2 -2
  16. sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +15 -0
  19. sky/dashboard/out/_next/static/chunks/{2641.5233e938f14e31a7.js → 2641.74c19c4d45a2c034.js} +1 -1
  20. sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +16 -0
  22. sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +15 -0
  23. sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +55 -0
  25. sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +41 -0
  27. sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +6 -0
  28. sky/dashboard/out/_next/static/chunks/{938-63fc419cb82ad9b3.js → 938-7ee806653aef0609.js} +1 -1
  29. sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +30 -0
  30. sky/dashboard/out/_next/static/chunks/{9984.2b5e3fa69171bff9.js → 9984.0460de9d3adf5582.js} +1 -1
  31. sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +34 -0
  32. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-fa406155b4223d0d.js → [job]-2186770cc2de1623.js} +2 -2
  33. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-0c37ee1ac5f3474d.js → [cluster]-95afb019ab85801c.js} +1 -1
  34. sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-c5b357bfd9502fbe.js → [job]-dc0299ffefebcdbe.js} +2 -2
  39. sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/pages/{users-19e98664bdd61643.js → users-6790fcefd5487b13.js} +1 -1
  41. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/webpack-b6447da22305b14a.js +1 -0
  44. sky/dashboard/out/_next/static/css/b3227360726f12eb.css +3 -0
  45. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  46. sky/dashboard/out/clusters/[cluster].html +1 -1
  47. sky/dashboard/out/clusters.html +1 -1
  48. sky/dashboard/out/config.html +1 -1
  49. sky/dashboard/out/index.html +1 -1
  50. sky/dashboard/out/infra/[context].html +1 -1
  51. sky/dashboard/out/infra.html +1 -1
  52. sky/dashboard/out/jobs/[job].html +1 -1
  53. sky/dashboard/out/jobs.html +1 -1
  54. sky/dashboard/out/users.html +1 -1
  55. sky/dashboard/out/volumes.html +1 -1
  56. sky/dashboard/out/workspace/new.html +1 -1
  57. sky/dashboard/out/workspaces/[name].html +1 -1
  58. sky/dashboard/out/workspaces.html +1 -1
  59. sky/data/mounting_utils.py +93 -32
  60. sky/exceptions.py +8 -0
  61. sky/global_user_state.py +2 -3
  62. sky/jobs/state.py +2 -2
  63. sky/logs/__init__.py +4 -0
  64. sky/logs/agent.py +14 -0
  65. sky/logs/aws.py +276 -0
  66. sky/provision/nebius/utils.py +3 -6
  67. sky/server/common.py +9 -4
  68. sky/server/requests/payloads.py +20 -4
  69. sky/server/rest.py +6 -0
  70. sky/server/server.py +2 -1
  71. sky/setup_files/MANIFEST.in +1 -1
  72. sky/setup_files/alembic.ini +0 -4
  73. sky/skylet/constants.py +4 -0
  74. sky/skypilot_config.py +5 -31
  75. sky/utils/common_utils.py +8 -3
  76. sky/utils/config_utils.py +17 -0
  77. sky/utils/db/migration_utils.py +44 -4
  78. sky/utils/locks.py +319 -0
  79. sky/utils/rich_utils.py +2 -3
  80. sky/utils/schemas.py +92 -56
  81. sky/utils/timeline.py +41 -0
  82. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250724.dist-info}/METADATA +1 -1
  83. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250724.dist-info}/RECORD +88 -86
  84. sky/dashboard/out/_next/static/chunks/1746.27d40aedc22bd2d6.js +0 -60
  85. sky/dashboard/out/_next/static/chunks/2544.27f70672535675ed.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/2875.c24c6d57dc82e436.js +0 -25
  87. sky/dashboard/out/_next/static/chunks/3785.95b94f18aaec7233.js +0 -1
  88. sky/dashboard/out/_next/static/chunks/3947-b059261d6fa88a1f.js +0 -35
  89. sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +0 -1
  90. sky/dashboard/out/_next/static/chunks/4869.c7c055a5c2814f33.js +0 -16
  91. sky/dashboard/out/_next/static/chunks/5491.918ffed0ba7a5294.js +0 -20
  92. sky/dashboard/out/_next/static/chunks/6990-dcb411b566e64cde.js +0 -1
  93. sky/dashboard/out/_next/static/chunks/804-9f5e98ce84d46bdd.js +0 -21
  94. sky/dashboard/out/_next/static/chunks/9025.133e9ba5c780afeb.js +0 -6
  95. sky/dashboard/out/_next/static/chunks/9470-8178183f3bae198f.js +0 -1
  96. sky/dashboard/out/_next/static/chunks/9847.46e613d000c55859.js +0 -30
  97. sky/dashboard/out/_next/static/chunks/pages/_app-507712f30cd3cec3.js +0 -20
  98. sky/dashboard/out/_next/static/chunks/pages/clusters-102d169e87913ba1.js +0 -1
  99. sky/dashboard/out/_next/static/chunks/pages/index-927ddeebe57a8ac3.js +0 -1
  100. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-8b0809f59034d509.js +0 -1
  101. sky/dashboard/out/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js +0 -1
  102. sky/dashboard/out/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js +0 -1
  103. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7c0187f43757a548.js +0 -1
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces-a1e43d9ef51a9cea.js +0 -1
  105. sky/dashboard/out/_next/static/chunks/webpack-26cdc782eed15a7d.js +0 -1
  106. sky/dashboard/out/_next/static/css/5122cb0a08486fd3.css +0 -3
  107. sky/dashboard/out/_next/static/pTQKG61ng32Zc7gsAROFJ/_buildManifest.js +0 -1
  108. sky/schemas/db/skypilot_config/001_initial_schema.py +0 -30
  109. /sky/dashboard/out/_next/static/{pTQKG61ng32Zc7gsAROFJ → BURfWrKsQk9psMPv0OXrh}/_ssgManifest.js +0 -0
  110. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250724.dist-info}/WHEEL +0 -0
  111. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250724.dist-info}/entry_points.txt +0 -0
  112. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250724.dist-info}/licenses/LICENSE +0 -0
  113. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250724.dist-info}/top_level.txt +0 -0
sky/utils/locks.py ADDED
@@ -0,0 +1,319 @@
1
+ """Lock for SkyPilot.
2
+
3
+ This module provides an abstraction for locking that can use
4
+ either local file locks or database-based distributed locks.
5
+ """
6
+ import abc
7
+ import hashlib
8
+ import logging
9
+ import os
10
+ import time
11
+ from typing import Any, Optional
12
+
13
+ import filelock
14
+ import sqlalchemy
15
+
16
+ from sky import global_user_state
17
+ from sky.skylet import constants
18
+ from sky.utils import common_utils
19
+ from sky.utils.db import db_utils
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class LockTimeout(RuntimeError):
25
+ """Raised when a lock acquisition times out."""
26
+ pass
27
+
28
+
29
+ class AcquireReturnProxy:
30
+ """A context manager that releases the lock when exiting.
31
+
32
+ This proxy is returned by acquire() and ensures proper cleanup
33
+ when used in a with statement.
34
+ """
35
+
36
+ def __init__(self, lock: 'DistributedLock') -> None:
37
+ self.lock = lock
38
+
39
+ def __enter__(self) -> 'DistributedLock':
40
+ return self.lock
41
+
42
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
43
+ self.lock.release()
44
+
45
+
46
+ class DistributedLock(abc.ABC):
47
+ """Abstract base class for a distributed lock.
48
+
49
+ Provides a context manager interface for acquiring and releasing locks
50
+ that can work across multiple processes and potentially multiple machines.
51
+ """
52
+
53
+ def __init__(self,
54
+ lock_id: str,
55
+ timeout: Optional[float] = None,
56
+ poll_interval: float = 0.1):
57
+ """Initialize the lock.
58
+
59
+ Args:
60
+ lock_id: Unique identifier for the lock.
61
+ timeout: Maximum time to wait for lock acquisition.
62
+ If None, wait indefinitely.
63
+ poll_interval: Interval in seconds to poll for lock acquisition.
64
+ """
65
+ self.lock_id = lock_id
66
+ self.timeout = timeout
67
+ self.poll_interval = poll_interval
68
+
69
+ @abc.abstractmethod
70
+ def acquire(self, blocking: bool = True) -> AcquireReturnProxy:
71
+ """Acquire the lock.
72
+
73
+ Args:
74
+ blocking: If True, block until lock is acquired or timeout.
75
+ If False, return immediately.
76
+
77
+ Returns:
78
+ AcquireReturnProxy that can be used as a context manager.
79
+
80
+ Raises:
81
+ LockTimeout: If lock cannot be acquired.
82
+ """
83
+ pass
84
+
85
+ @abc.abstractmethod
86
+ def release(self) -> None:
87
+ """Release the lock."""
88
+ pass
89
+
90
+ @abc.abstractmethod
91
+ def force_unlock(self) -> None:
92
+ """Force unlock the lock if it is acquired."""
93
+ pass
94
+
95
+ @abc.abstractmethod
96
+ def is_locked(self) -> bool:
97
+ """Check if the lock is acquired."""
98
+ pass
99
+
100
+ def __enter__(self) -> 'DistributedLock':
101
+ """Context manager entry."""
102
+ self.acquire()
103
+ return self
104
+
105
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
106
+ """Context manager exit."""
107
+ self.release()
108
+
109
+
110
+ class FileLock(DistributedLock):
111
+ """A wrapper around filelock.FileLock.
112
+
113
+ This implements a distributed lock that works across multiple processes
114
+ when they share the same filesystem.
115
+ """
116
+
117
+ def __init__(self,
118
+ lock_id: str,
119
+ timeout: Optional[float] = None,
120
+ poll_interval: float = 0.1):
121
+ """Initialize the file lock.
122
+
123
+ Args:
124
+ lock_id: Unique identifier for the lock.
125
+ timeout: Maximum time to wait for lock acquisition.
126
+ poll_interval: Interval in seconds to poll for lock acquisition.
127
+ """
128
+ super().__init__(lock_id, timeout, poll_interval)
129
+ os.makedirs(constants.SKY_LOCKS_DIR, exist_ok=True)
130
+ self.lock_path = os.path.join(constants.SKY_LOCKS_DIR,
131
+ f'.{lock_id}.lock')
132
+ if timeout is None:
133
+ timeout = -1
134
+ self._filelock: filelock.FileLock = filelock.FileLock(self.lock_path,
135
+ timeout=timeout)
136
+
137
+ def acquire(self, blocking: bool = True) -> AcquireReturnProxy:
138
+ """Acquire the file lock."""
139
+ try:
140
+ acquired = self._filelock.acquire(blocking=blocking)
141
+ if not acquired:
142
+ raise LockTimeout(f'Failed to acquire file lock {self.lock_id}')
143
+ return AcquireReturnProxy(self)
144
+ except filelock.Timeout as e:
145
+ raise LockTimeout(
146
+ f'Failed to acquire file lock {self.lock_id}') from e
147
+
148
+ def release(self) -> None:
149
+ """Release the file lock."""
150
+ self._filelock.release()
151
+
152
+ def force_unlock(self) -> None:
153
+ """Force unlock the file lock."""
154
+ common_utils.remove_file_if_exists(self.lock_path)
155
+
156
+ def is_locked(self) -> bool:
157
+ return self._filelock.is_locked()
158
+
159
+
160
+ class PostgresLock(DistributedLock):
161
+ """PostgreSQL advisory lock implementation.
162
+
163
+ Uses PostgreSQL advisory locks to implement distributed locking
164
+ that works across multiple machines sharing the same database.
165
+ Reference:
166
+ https://www.postgresql.org/docs/current/explicit-locking.html
167
+ #ADVISORY-LOCKS
168
+ """
169
+
170
+ def __init__(self,
171
+ lock_id: str,
172
+ timeout: Optional[float] = None,
173
+ poll_interval: float = 1):
174
+ """Initialize the postgres lock.
175
+
176
+ Args:
177
+ lock_id: Unique identifier for the lock.
178
+ timeout: Maximum time to wait for lock acquisition.
179
+ poll_interval: Interval in seconds to poll for lock acquisition,
180
+ default to 1 second to avoid storming the database.
181
+ """
182
+ super().__init__(lock_id, timeout, poll_interval)
183
+ # Convert string lock_id to integer for postgres advisory locks
184
+ self._lock_key = self._string_to_lock_key(lock_id)
185
+ self._acquired = False
186
+ self._connection: Optional[sqlalchemy.pool.PoolProxiedConnection] = None
187
+
188
+ def _string_to_lock_key(self, s: str) -> int:
189
+ """Convert string to a 64-bit integer for advisory lock key."""
190
+ hash_digest = hashlib.sha256(s.encode('utf-8')).digest()
191
+ # Take first 8 bytes and convert to int, ensure positive 64-bit
192
+ return int.from_bytes(hash_digest[:8], 'big') & ((1 << 63) - 1)
193
+
194
+ def _get_connection(self) -> sqlalchemy.pool.PoolProxiedConnection:
195
+ """Get database connection."""
196
+ engine = global_user_state.initialize_and_get_db()
197
+ if engine.dialect.name != db_utils.SQLAlchemyDialect.POSTGRESQL.value:
198
+ raise ValueError('PostgresLock requires PostgreSQL database. '
199
+ f'Current dialect: {engine.dialect.name}')
200
+ return engine.raw_connection()
201
+
202
+ def acquire(self, blocking: bool = True) -> AcquireReturnProxy:
203
+ """Acquire the postgres advisory lock."""
204
+ if self._acquired:
205
+ return AcquireReturnProxy(self)
206
+
207
+ self._connection = self._get_connection()
208
+ cursor = self._connection.cursor()
209
+
210
+ start_time = time.time()
211
+
212
+ try:
213
+ while True:
214
+ cursor.execute('SELECT pg_try_advisory_lock(%s)',
215
+ (self._lock_key,))
216
+ result = cursor.fetchone()[0]
217
+
218
+ if result:
219
+ self._acquired = True
220
+ return AcquireReturnProxy(self)
221
+
222
+ if not blocking:
223
+ raise LockTimeout(
224
+ f'Failed to immediately acquire postgres lock '
225
+ f'{self.lock_id}')
226
+
227
+ if (self.timeout is not None and
228
+ time.time() - start_time > self.timeout):
229
+ raise LockTimeout(
230
+ f'Failed to acquire postgres lock {self.lock_id} '
231
+ f'within {self.timeout} seconds')
232
+
233
+ time.sleep(self.poll_interval)
234
+
235
+ except Exception:
236
+ if self._connection:
237
+ self._connection.close()
238
+ self._connection = None
239
+ raise
240
+
241
+ def release(self) -> None:
242
+ """Release the postgres advisory lock."""
243
+ if not self._acquired or not self._connection:
244
+ return
245
+
246
+ try:
247
+ cursor = self._connection.cursor()
248
+ cursor.execute('SELECT pg_advisory_unlock(%s)', (self._lock_key,))
249
+ self._connection.commit()
250
+ self._acquired = False
251
+ finally:
252
+ if self._connection:
253
+ self._connection.close()
254
+ self._connection = None
255
+
256
+ def force_unlock(self) -> None:
257
+ """Force unlock the postgres advisory lock."""
258
+ try:
259
+ if not self._connection:
260
+ self._connection = self._get_connection()
261
+ cursor = self._connection.cursor()
262
+ cursor.execute('SELECT pg_advisory_unlock(%s)', (self._lock_key,))
263
+ self._connection.commit()
264
+ except Exception as e:
265
+ raise RuntimeError(
266
+ f'Failed to force unlock postgres lock {self.lock_id}: {e}'
267
+ ) from e
268
+ finally:
269
+ if self._connection:
270
+ self._connection.close()
271
+ self._connection = None
272
+
273
+ def is_locked(self) -> bool:
274
+ """Check if the postgres advisory lock is acquired."""
275
+ return self._acquired
276
+
277
+
278
+ def get_lock(lock_id: str,
279
+ timeout: Optional[float] = None,
280
+ lock_type: Optional[str] = None,
281
+ poll_interval: Optional[float] = None) -> DistributedLock:
282
+ """Create a distributed lock instance.
283
+
284
+ Args:
285
+ lock_id: Unique identifier for the lock.
286
+ timeout: Maximum time seconds to wait for lock acquisition,
287
+ None means wait indefinitely.
288
+ lock_type: Type of lock to create ('filelock' or 'postgres').
289
+ If None, auto-detect based on database configuration.
290
+
291
+ Returns:
292
+ DistributedLock instance.
293
+ """
294
+ if lock_type is None:
295
+ lock_type = _detect_lock_type()
296
+
297
+ if lock_type == 'postgres':
298
+ if poll_interval is None:
299
+ return PostgresLock(lock_id, timeout)
300
+ return PostgresLock(lock_id, timeout, poll_interval)
301
+ elif lock_type == 'filelock':
302
+ if poll_interval is None:
303
+ return FileLock(lock_id, timeout)
304
+ return FileLock(lock_id, timeout, poll_interval)
305
+ else:
306
+ raise ValueError(f'Unknown lock type: {lock_type}')
307
+
308
+
309
+ def _detect_lock_type() -> str:
310
+ """Auto-detect the appropriate lock type based on configuration."""
311
+ try:
312
+ engine = global_user_state.initialize_and_get_db()
313
+ if engine.dialect.name == db_utils.SQLAlchemyDialect.POSTGRESQL.value:
314
+ return 'postgres'
315
+ except Exception: # pylint: disable=broad-except
316
+ # Fall back to filelock if database detection fails
317
+ pass
318
+
319
+ return 'filelock'
sky/utils/rich_utils.py CHANGED
@@ -368,9 +368,8 @@ def decode_rich_status(
368
368
  continue
369
369
 
370
370
  if control == Control.RETRY:
371
- raise exceptions.ServerTemporarilyUnavailableError(
372
- 'The server is temporarily unavailable. Please try '
373
- 'again.')
371
+ raise exceptions.RequestInterruptedError(
372
+ 'Streaming interrupted. Please retry.')
374
373
  # control is not None, i.e. it is a rich status control message.
375
374
  if threading.current_thread() is not threading.main_thread():
376
375
  yield None
sky/utils/schemas.py CHANGED
@@ -1323,27 +1323,33 @@ def get_config_schema():
1323
1323
  'oci': {
1324
1324
  'type': 'object',
1325
1325
  'required': [],
1326
- 'properties': {},
1327
- # Properties are either 'default' or a region name.
1328
- 'additionalProperties': {
1329
- 'type': 'object',
1330
- 'required': [],
1331
- 'additionalProperties': False,
1332
- 'properties': {
1333
- 'compartment_ocid': {
1334
- 'type': 'string',
1335
- },
1336
- 'image_tag_general': {
1337
- 'type': 'string',
1338
- },
1339
- 'image_tag_gpu': {
1340
- 'type': 'string',
1341
- },
1342
- 'vcn_ocid': {
1343
- 'type': 'string',
1344
- },
1345
- 'vcn_subnet': {
1346
- 'type': 'string',
1326
+ 'properties': {
1327
+ 'region_configs': {
1328
+ 'type': 'object',
1329
+ 'required': [],
1330
+ 'properties': {},
1331
+ # Properties are either 'default' or a region name.
1332
+ 'additionalProperties': {
1333
+ 'type': 'object',
1334
+ 'required': [],
1335
+ 'additionalProperties': False,
1336
+ 'properties': {
1337
+ 'compartment_ocid': {
1338
+ 'type': 'string',
1339
+ },
1340
+ 'image_tag_general': {
1341
+ 'type': 'string',
1342
+ },
1343
+ 'image_tag_gpu': {
1344
+ 'type': 'string',
1345
+ },
1346
+ 'vcn_ocid': {
1347
+ 'type': 'string',
1348
+ },
1349
+ 'vcn_subnet': {
1350
+ 'type': 'string',
1351
+ },
1352
+ }
1347
1353
  },
1348
1354
  }
1349
1355
  },
@@ -1352,43 +1358,47 @@ def get_config_schema():
1352
1358
  'type': 'object',
1353
1359
  'required': [],
1354
1360
  'properties': {
1355
- **_NETWORK_CONFIG_SCHEMA,
1356
- 'tenant_id': {
1361
+ **_NETWORK_CONFIG_SCHEMA, 'tenant_id': {
1357
1362
  'type': 'string',
1358
1363
  },
1359
- },
1360
- 'additionalProperties': {
1361
- 'type': 'object',
1362
- 'required': [],
1363
- 'additionalProperties': False,
1364
- 'properties': {
1365
- 'project_id': {
1366
- 'type': 'string',
1367
- },
1368
- 'fabric': {
1369
- 'type': 'string',
1370
- },
1371
- 'filesystems': {
1372
- 'type': 'array',
1373
- 'items': {
1374
- 'type': 'object',
1375
- 'additionalProperties': False,
1376
- 'properties': {
1377
- 'filesystem_id': {
1378
- 'type': 'string',
1379
- },
1380
- 'attach_mode': {
1381
- 'type': 'string',
1382
- 'case_sensitive_enum': [
1383
- 'READ_WRITE', 'READ_ONLY'
1384
- ]
1385
- },
1386
- 'mount_path': {
1387
- 'type': 'string',
1364
+ 'region_configs': {
1365
+ 'type': 'object',
1366
+ 'required': [],
1367
+ 'properties': {},
1368
+ 'additionalProperties': {
1369
+ 'type': 'object',
1370
+ 'required': [],
1371
+ 'additionalProperties': False,
1372
+ 'properties': {
1373
+ 'project_id': {
1374
+ 'type': 'string',
1375
+ },
1376
+ 'fabric': {
1377
+ 'type': 'string',
1378
+ },
1379
+ 'filesystems': {
1380
+ 'type': 'array',
1381
+ 'items': {
1382
+ 'type': 'object',
1383
+ 'additionalProperties': False,
1384
+ 'properties': {
1385
+ 'filesystem_id': {
1386
+ 'type': 'string',
1387
+ },
1388
+ 'attach_mode': {
1389
+ 'type': 'string',
1390
+ 'case_sensitive_enum': [
1391
+ 'READ_WRITE', 'READ_ONLY'
1392
+ ]
1393
+ },
1394
+ 'mount_path': {
1395
+ 'type': 'string',
1396
+ }
1397
+ }
1388
1398
  }
1389
- }
1390
- }
1391
- },
1399
+ },
1400
+ },
1401
+ }
1392
1402
  }
1393
1403
  },
1394
1404
  }
@@ -1611,7 +1621,7 @@ def get_config_schema():
1611
1621
  'properties': {
1612
1622
  'store': {
1613
1623
  'type': 'string',
1614
- 'case_insensitive_enum': ['gcp'],
1624
+ 'case_insensitive_enum': ['gcp', 'aws'],
1615
1625
  },
1616
1626
  'gcp': {
1617
1627
  'type': 'object',
@@ -1630,6 +1640,32 @@ def get_config_schema():
1630
1640
  },
1631
1641
  },
1632
1642
  },
1643
+ 'aws': {
1644
+ 'type': 'object',
1645
+ 'properties': {
1646
+ 'region': {
1647
+ 'type': 'string',
1648
+ },
1649
+ 'credentials_file': {
1650
+ 'type': 'string',
1651
+ },
1652
+ 'log_group_name': {
1653
+ 'type': 'string',
1654
+ },
1655
+ 'log_stream_prefix': {
1656
+ 'type': 'string',
1657
+ },
1658
+ 'auto_create_group': {
1659
+ 'type': 'boolean',
1660
+ },
1661
+ 'additional_tags': {
1662
+ 'type': 'object',
1663
+ 'additionalProperties': {
1664
+ 'type': 'string',
1665
+ },
1666
+ },
1667
+ },
1668
+ },
1633
1669
  },
1634
1670
  }
1635
1671
 
sky/utils/timeline.py CHANGED
@@ -15,6 +15,7 @@ from typing import Callable, Optional, Union
15
15
  import filelock
16
16
 
17
17
  from sky.utils import common_utils
18
+ from sky.utils import locks
18
19
 
19
20
  _events = []
20
21
 
@@ -76,6 +77,46 @@ def event(name_or_fn: Union[str, Callable], message: Optional[str] = None):
76
77
  return common_utils.make_decorator(Event, name_or_fn, message=message)
77
78
 
78
79
 
80
+ class DistributedLockEvent:
81
+ """Serve both as a distributed lock and event for the lock."""
82
+
83
+ def __init__(self, lock_id: str, timeout: Optional[float] = None):
84
+ self._lock_id = lock_id
85
+ self._lock = locks.get_lock(lock_id, timeout)
86
+ self._hold_lock_event = Event(f'[DistributedLock.hold]:{lock_id}')
87
+
88
+ def acquire(self):
89
+ was_locked = self._lock.is_locked
90
+ with Event(f'[DistributedLock.acquire]:{self._lock_id}'):
91
+ self._lock.acquire()
92
+ if not was_locked and self._lock.is_locked:
93
+ # start holding the lock after initial acquiring
94
+ self._hold_lock_event.begin()
95
+
96
+ def release(self):
97
+ was_locked = self._lock.is_locked
98
+ self._lock.release()
99
+ if was_locked and not self._lock.is_locked:
100
+ # stop holding the lock after initial releasing
101
+ self._hold_lock_event.end()
102
+
103
+ def __enter__(self):
104
+ self.acquire()
105
+ return self
106
+
107
+ def __exit__(self, exc_type, exc_val, exc_tb):
108
+ self.release()
109
+
110
+ def __call__(self, f):
111
+
112
+ @functools.wraps(f)
113
+ def wrapper(*args, **kwargs):
114
+ with self:
115
+ return f(*args, **kwargs)
116
+
117
+ return wrapper
118
+
119
+
79
120
  class FileLockEvent:
80
121
  """Serve both as a file lock and event for the lock."""
81
122
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250720
3
+ Version: 1.0.0.dev20250724
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0