skypilot-nightly 1.0.0.dev20250720__py3-none-any.whl → 1.0.0.dev20250723__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (103) hide show
  1. sky/__init__.py +2 -2
  2. sky/admin_policy.py +11 -4
  3. sky/backends/backend_utils.py +27 -11
  4. sky/backends/cloud_vm_ray_backend.py +22 -27
  5. sky/client/cli/command.py +38 -23
  6. sky/client/sdk.py +52 -7
  7. sky/clouds/nebius.py +2 -5
  8. sky/clouds/vast.py +2 -1
  9. sky/dashboard/out/404.html +1 -1
  10. sky/dashboard/out/_next/static/chunks/{1141-d8c6404a7c6fffe6.js → 1141-e49a159c30a6c4a7.js} +1 -1
  11. sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +30 -0
  12. sky/dashboard/out/_next/static/chunks/{1871-a821dcaaae2a3823.js → 1871-ea0e7283886407ca.js} +2 -2
  13. sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +15 -0
  16. sky/dashboard/out/_next/static/chunks/{2641.5233e938f14e31a7.js → 2641.74c19c4d45a2c034.js} +1 -1
  17. sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +16 -0
  19. sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +15 -0
  20. sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +55 -0
  22. sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +41 -0
  24. sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +6 -0
  25. sky/dashboard/out/_next/static/chunks/{938-63fc419cb82ad9b3.js → 938-7ee806653aef0609.js} +1 -1
  26. sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +30 -0
  27. sky/dashboard/out/_next/static/chunks/{9984.2b5e3fa69171bff9.js → 9984.0460de9d3adf5582.js} +1 -1
  28. sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +34 -0
  29. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-fa406155b4223d0d.js → [job]-2186770cc2de1623.js} +2 -2
  30. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-0c37ee1ac5f3474d.js → [cluster]-95afb019ab85801c.js} +1 -1
  31. sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +1 -0
  34. sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-c5b357bfd9502fbe.js → [job]-dc0299ffefebcdbe.js} +2 -2
  36. sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/pages/{users-19e98664bdd61643.js → users-6790fcefd5487b13.js} +1 -1
  38. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +1 -0
  39. sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +1 -0
  41. sky/dashboard/out/_next/static/css/b3227360726f12eb.css +3 -0
  42. sky/dashboard/out/_next/static/mym3Ciwp-zqU7ZpOLGnrW/_buildManifest.js +1 -0
  43. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  44. sky/dashboard/out/clusters/[cluster].html +1 -1
  45. sky/dashboard/out/clusters.html +1 -1
  46. sky/dashboard/out/config.html +1 -1
  47. sky/dashboard/out/index.html +1 -1
  48. sky/dashboard/out/infra/[context].html +1 -1
  49. sky/dashboard/out/infra.html +1 -1
  50. sky/dashboard/out/jobs/[job].html +1 -1
  51. sky/dashboard/out/jobs.html +1 -1
  52. sky/dashboard/out/users.html +1 -1
  53. sky/dashboard/out/volumes.html +1 -1
  54. sky/dashboard/out/workspace/new.html +1 -1
  55. sky/dashboard/out/workspaces/[name].html +1 -1
  56. sky/dashboard/out/workspaces.html +1 -1
  57. sky/data/mounting_utils.py +93 -32
  58. sky/global_user_state.py +2 -3
  59. sky/jobs/state.py +2 -2
  60. sky/provision/nebius/utils.py +3 -6
  61. sky/server/common.py +4 -3
  62. sky/setup_files/MANIFEST.in +1 -1
  63. sky/setup_files/alembic.ini +0 -4
  64. sky/skylet/constants.py +4 -0
  65. sky/skypilot_config.py +5 -31
  66. sky/utils/common_utils.py +8 -3
  67. sky/utils/config_utils.py +15 -0
  68. sky/utils/db/migration_utils.py +44 -4
  69. sky/utils/locks.py +319 -0
  70. sky/utils/schemas.py +38 -34
  71. sky/utils/timeline.py +41 -0
  72. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/METADATA +1 -1
  73. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/RECORD +78 -78
  74. sky/dashboard/out/_next/static/chunks/1746.27d40aedc22bd2d6.js +0 -60
  75. sky/dashboard/out/_next/static/chunks/2544.27f70672535675ed.js +0 -1
  76. sky/dashboard/out/_next/static/chunks/2875.c24c6d57dc82e436.js +0 -25
  77. sky/dashboard/out/_next/static/chunks/3785.95b94f18aaec7233.js +0 -1
  78. sky/dashboard/out/_next/static/chunks/3947-b059261d6fa88a1f.js +0 -35
  79. sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +0 -1
  80. sky/dashboard/out/_next/static/chunks/4869.c7c055a5c2814f33.js +0 -16
  81. sky/dashboard/out/_next/static/chunks/5491.918ffed0ba7a5294.js +0 -20
  82. sky/dashboard/out/_next/static/chunks/6990-dcb411b566e64cde.js +0 -1
  83. sky/dashboard/out/_next/static/chunks/804-9f5e98ce84d46bdd.js +0 -21
  84. sky/dashboard/out/_next/static/chunks/9025.133e9ba5c780afeb.js +0 -6
  85. sky/dashboard/out/_next/static/chunks/9470-8178183f3bae198f.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/9847.46e613d000c55859.js +0 -30
  87. sky/dashboard/out/_next/static/chunks/pages/_app-507712f30cd3cec3.js +0 -20
  88. sky/dashboard/out/_next/static/chunks/pages/clusters-102d169e87913ba1.js +0 -1
  89. sky/dashboard/out/_next/static/chunks/pages/index-927ddeebe57a8ac3.js +0 -1
  90. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-8b0809f59034d509.js +0 -1
  91. sky/dashboard/out/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js +0 -1
  92. sky/dashboard/out/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js +0 -1
  93. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7c0187f43757a548.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/pages/workspaces-a1e43d9ef51a9cea.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/webpack-26cdc782eed15a7d.js +0 -1
  96. sky/dashboard/out/_next/static/css/5122cb0a08486fd3.css +0 -3
  97. sky/dashboard/out/_next/static/pTQKG61ng32Zc7gsAROFJ/_buildManifest.js +0 -1
  98. sky/schemas/db/skypilot_config/001_initial_schema.py +0 -30
  99. /sky/dashboard/out/_next/static/{pTQKG61ng32Zc7gsAROFJ → mym3Ciwp-zqU7ZpOLGnrW}/_ssgManifest.js +0 -0
  100. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/WHEEL +0 -0
  101. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/entry_points.txt +0 -0
  102. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/licenses/LICENSE +0 -0
  103. {skypilot_nightly-1.0.0.dev20250720.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/top_level.txt +0 -0
sky/utils/locks.py ADDED
@@ -0,0 +1,319 @@
1
+ """Lock for SkyPilot.
2
+
3
+ This module provides an abstraction for locking that can use
4
+ either local file locks or database-based distributed locks.
5
+ """
6
+ import abc
7
+ import hashlib
8
+ import logging
9
+ import os
10
+ import time
11
+ from typing import Any, Optional
12
+
13
+ import filelock
14
+ import sqlalchemy
15
+
16
+ from sky import global_user_state
17
+ from sky.skylet import constants
18
+ from sky.utils import common_utils
19
+ from sky.utils.db import db_utils
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class LockTimeout(RuntimeError):
25
+ """Raised when a lock acquisition times out."""
26
+ pass
27
+
28
+
29
+ class AcquireReturnProxy:
30
+ """A context manager that releases the lock when exiting.
31
+
32
+ This proxy is returned by acquire() and ensures proper cleanup
33
+ when used in a with statement.
34
+ """
35
+
36
+ def __init__(self, lock: 'DistributedLock') -> None:
37
+ self.lock = lock
38
+
39
+ def __enter__(self) -> 'DistributedLock':
40
+ return self.lock
41
+
42
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
43
+ self.lock.release()
44
+
45
+
46
+ class DistributedLock(abc.ABC):
47
+ """Abstract base class for a distributed lock.
48
+
49
+ Provides a context manager interface for acquiring and releasing locks
50
+ that can work across multiple processes and potentially multiple machines.
51
+ """
52
+
53
+ def __init__(self,
54
+ lock_id: str,
55
+ timeout: Optional[float] = None,
56
+ poll_interval: float = 0.1):
57
+ """Initialize the lock.
58
+
59
+ Args:
60
+ lock_id: Unique identifier for the lock.
61
+ timeout: Maximum time to wait for lock acquisition.
62
+ If None, wait indefinitely.
63
+ poll_interval: Interval in seconds to poll for lock acquisition.
64
+ """
65
+ self.lock_id = lock_id
66
+ self.timeout = timeout
67
+ self.poll_interval = poll_interval
68
+
69
+ @abc.abstractmethod
70
+ def acquire(self, blocking: bool = True) -> AcquireReturnProxy:
71
+ """Acquire the lock.
72
+
73
+ Args:
74
+ blocking: If True, block until lock is acquired or timeout.
75
+ If False, return immediately.
76
+
77
+ Returns:
78
+ AcquireReturnProxy that can be used as a context manager.
79
+
80
+ Raises:
81
+ LockTimeout: If lock cannot be acquired.
82
+ """
83
+ pass
84
+
85
+ @abc.abstractmethod
86
+ def release(self) -> None:
87
+ """Release the lock."""
88
+ pass
89
+
90
+ @abc.abstractmethod
91
+ def force_unlock(self) -> None:
92
+ """Force unlock the lock if it is acquired."""
93
+ pass
94
+
95
+ @abc.abstractmethod
96
+ def is_locked(self) -> bool:
97
+ """Check if the lock is acquired."""
98
+ pass
99
+
100
+ def __enter__(self) -> 'DistributedLock':
101
+ """Context manager entry."""
102
+ self.acquire()
103
+ return self
104
+
105
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
106
+ """Context manager exit."""
107
+ self.release()
108
+
109
+
110
+ class FileLock(DistributedLock):
111
+ """A wrapper around filelock.FileLock.
112
+
113
+ This implements a distributed lock that works across multiple processes
114
+ when they share the same filesystem.
115
+ """
116
+
117
+ def __init__(self,
118
+ lock_id: str,
119
+ timeout: Optional[float] = None,
120
+ poll_interval: float = 0.1):
121
+ """Initialize the file lock.
122
+
123
+ Args:
124
+ lock_id: Unique identifier for the lock.
125
+ timeout: Maximum time to wait for lock acquisition.
126
+ poll_interval: Interval in seconds to poll for lock acquisition.
127
+ """
128
+ super().__init__(lock_id, timeout, poll_interval)
129
+ os.makedirs(constants.SKY_LOCKS_DIR, exist_ok=True)
130
+ self.lock_path = os.path.join(constants.SKY_LOCKS_DIR,
131
+ f'.{lock_id}.lock')
132
+ if timeout is None:
133
+ timeout = -1
134
+ self._filelock: filelock.FileLock = filelock.FileLock(self.lock_path,
135
+ timeout=timeout)
136
+
137
+ def acquire(self, blocking: bool = True) -> AcquireReturnProxy:
138
+ """Acquire the file lock."""
139
+ try:
140
+ acquired = self._filelock.acquire(blocking=blocking)
141
+ if not acquired:
142
+ raise LockTimeout(f'Failed to acquire file lock {self.lock_id}')
143
+ return AcquireReturnProxy(self)
144
+ except filelock.Timeout as e:
145
+ raise LockTimeout(
146
+ f'Failed to acquire file lock {self.lock_id}') from e
147
+
148
+ def release(self) -> None:
149
+ """Release the file lock."""
150
+ self._filelock.release()
151
+
152
+ def force_unlock(self) -> None:
153
+ """Force unlock the file lock."""
154
+ common_utils.remove_file_if_exists(self.lock_path)
155
+
156
+ def is_locked(self) -> bool:
157
+ return self._filelock.is_locked()
158
+
159
+
160
+ class PostgresLock(DistributedLock):
161
+ """PostgreSQL advisory lock implementation.
162
+
163
+ Uses PostgreSQL advisory locks to implement distributed locking
164
+ that works across multiple machines sharing the same database.
165
+ Reference:
166
+ https://www.postgresql.org/docs/current/explicit-locking.html
167
+ #ADVISORY-LOCKS
168
+ """
169
+
170
+ def __init__(self,
171
+ lock_id: str,
172
+ timeout: Optional[float] = None,
173
+ poll_interval: float = 1):
174
+ """Initialize the postgres lock.
175
+
176
+ Args:
177
+ lock_id: Unique identifier for the lock.
178
+ timeout: Maximum time to wait for lock acquisition.
179
+ poll_interval: Interval in seconds to poll for lock acquisition,
180
+ default to 1 second to avoid storming the database.
181
+ """
182
+ super().__init__(lock_id, timeout, poll_interval)
183
+ # Convert string lock_id to integer for postgres advisory locks
184
+ self._lock_key = self._string_to_lock_key(lock_id)
185
+ self._acquired = False
186
+ self._connection: Optional[sqlalchemy.pool.PoolProxiedConnection] = None
187
+
188
+ def _string_to_lock_key(self, s: str) -> int:
189
+ """Convert string to a 64-bit integer for advisory lock key."""
190
+ hash_digest = hashlib.sha256(s.encode('utf-8')).digest()
191
+ # Take first 8 bytes and convert to int, ensure positive 64-bit
192
+ return int.from_bytes(hash_digest[:8], 'big') & ((1 << 63) - 1)
193
+
194
+ def _get_connection(self) -> sqlalchemy.pool.PoolProxiedConnection:
195
+ """Get database connection."""
196
+ engine = global_user_state.initialize_and_get_db()
197
+ if engine.dialect.name != db_utils.SQLAlchemyDialect.POSTGRESQL.value:
198
+ raise ValueError('PostgresLock requires PostgreSQL database. '
199
+ f'Current dialect: {engine.dialect.name}')
200
+ return engine.raw_connection()
201
+
202
+ def acquire(self, blocking: bool = True) -> AcquireReturnProxy:
203
+ """Acquire the postgres advisory lock."""
204
+ if self._acquired:
205
+ return AcquireReturnProxy(self)
206
+
207
+ self._connection = self._get_connection()
208
+ cursor = self._connection.cursor()
209
+
210
+ start_time = time.time()
211
+
212
+ try:
213
+ while True:
214
+ cursor.execute('SELECT pg_try_advisory_lock(%s)',
215
+ (self._lock_key,))
216
+ result = cursor.fetchone()[0]
217
+
218
+ if result:
219
+ self._acquired = True
220
+ return AcquireReturnProxy(self)
221
+
222
+ if not blocking:
223
+ raise LockTimeout(
224
+ f'Failed to immediately acquire postgres lock '
225
+ f'{self.lock_id}')
226
+
227
+ if (self.timeout is not None and
228
+ time.time() - start_time > self.timeout):
229
+ raise LockTimeout(
230
+ f'Failed to acquire postgres lock {self.lock_id} '
231
+ f'within {self.timeout} seconds')
232
+
233
+ time.sleep(self.poll_interval)
234
+
235
+ except Exception:
236
+ if self._connection:
237
+ self._connection.close()
238
+ self._connection = None
239
+ raise
240
+
241
+ def release(self) -> None:
242
+ """Release the postgres advisory lock."""
243
+ if not self._acquired or not self._connection:
244
+ return
245
+
246
+ try:
247
+ cursor = self._connection.cursor()
248
+ cursor.execute('SELECT pg_advisory_unlock(%s)', (self._lock_key,))
249
+ self._connection.commit()
250
+ self._acquired = False
251
+ finally:
252
+ if self._connection:
253
+ self._connection.close()
254
+ self._connection = None
255
+
256
+ def force_unlock(self) -> None:
257
+ """Force unlock the postgres advisory lock."""
258
+ try:
259
+ if not self._connection:
260
+ self._connection = self._get_connection()
261
+ cursor = self._connection.cursor()
262
+ cursor.execute('SELECT pg_advisory_unlock(%s)', (self._lock_key,))
263
+ self._connection.commit()
264
+ except Exception as e:
265
+ raise RuntimeError(
266
+ f'Failed to force unlock postgres lock {self.lock_id}: {e}'
267
+ ) from e
268
+ finally:
269
+ if self._connection:
270
+ self._connection.close()
271
+ self._connection = None
272
+
273
+ def is_locked(self) -> bool:
274
+ """Check if the postgres advisory lock is acquired."""
275
+ return self._acquired
276
+
277
+
278
+ def get_lock(lock_id: str,
279
+ timeout: Optional[float] = None,
280
+ lock_type: Optional[str] = None,
281
+ poll_interval: Optional[float] = None) -> DistributedLock:
282
+ """Create a distributed lock instance.
283
+
284
+ Args:
285
+ lock_id: Unique identifier for the lock.
286
+ timeout: Maximum time seconds to wait for lock acquisition,
287
+ None means wait indefinitely.
288
+ lock_type: Type of lock to create ('filelock' or 'postgres').
289
+ If None, auto-detect based on database configuration.
290
+
291
+ Returns:
292
+ DistributedLock instance.
293
+ """
294
+ if lock_type is None:
295
+ lock_type = _detect_lock_type()
296
+
297
+ if lock_type == 'postgres':
298
+ if poll_interval is None:
299
+ return PostgresLock(lock_id, timeout)
300
+ return PostgresLock(lock_id, timeout, poll_interval)
301
+ elif lock_type == 'filelock':
302
+ if poll_interval is None:
303
+ return FileLock(lock_id, timeout)
304
+ return FileLock(lock_id, timeout, poll_interval)
305
+ else:
306
+ raise ValueError(f'Unknown lock type: {lock_type}')
307
+
308
+
309
+ def _detect_lock_type() -> str:
310
+ """Auto-detect the appropriate lock type based on configuration."""
311
+ try:
312
+ engine = global_user_state.initialize_and_get_db()
313
+ if engine.dialect.name == db_utils.SQLAlchemyDialect.POSTGRESQL.value:
314
+ return 'postgres'
315
+ except Exception: # pylint: disable=broad-except
316
+ # Fall back to filelock if database detection fails
317
+ pass
318
+
319
+ return 'filelock'
sky/utils/schemas.py CHANGED
@@ -1352,43 +1352,47 @@ def get_config_schema():
1352
1352
  'type': 'object',
1353
1353
  'required': [],
1354
1354
  'properties': {
1355
- **_NETWORK_CONFIG_SCHEMA,
1356
- 'tenant_id': {
1355
+ **_NETWORK_CONFIG_SCHEMA, 'tenant_id': {
1357
1356
  'type': 'string',
1358
1357
  },
1359
- },
1360
- 'additionalProperties': {
1361
- 'type': 'object',
1362
- 'required': [],
1363
- 'additionalProperties': False,
1364
- 'properties': {
1365
- 'project_id': {
1366
- 'type': 'string',
1367
- },
1368
- 'fabric': {
1369
- 'type': 'string',
1370
- },
1371
- 'filesystems': {
1372
- 'type': 'array',
1373
- 'items': {
1374
- 'type': 'object',
1375
- 'additionalProperties': False,
1376
- 'properties': {
1377
- 'filesystem_id': {
1378
- 'type': 'string',
1379
- },
1380
- 'attach_mode': {
1381
- 'type': 'string',
1382
- 'case_sensitive_enum': [
1383
- 'READ_WRITE', 'READ_ONLY'
1384
- ]
1385
- },
1386
- 'mount_path': {
1387
- 'type': 'string',
1358
+ 'region_configs': {
1359
+ 'type': 'object',
1360
+ 'required': [],
1361
+ 'properties': {},
1362
+ 'additionalProperties': {
1363
+ 'type': 'object',
1364
+ 'required': [],
1365
+ 'additionalProperties': False,
1366
+ 'properties': {
1367
+ 'project_id': {
1368
+ 'type': 'string',
1369
+ },
1370
+ 'fabric': {
1371
+ 'type': 'string',
1372
+ },
1373
+ 'filesystems': {
1374
+ 'type': 'array',
1375
+ 'items': {
1376
+ 'type': 'object',
1377
+ 'additionalProperties': False,
1378
+ 'properties': {
1379
+ 'filesystem_id': {
1380
+ 'type': 'string',
1381
+ },
1382
+ 'attach_mode': {
1383
+ 'type': 'string',
1384
+ 'case_sensitive_enum': [
1385
+ 'READ_WRITE', 'READ_ONLY'
1386
+ ]
1387
+ },
1388
+ 'mount_path': {
1389
+ 'type': 'string',
1390
+ }
1391
+ }
1388
1392
  }
1389
- }
1390
- }
1391
- },
1393
+ },
1394
+ },
1395
+ }
1392
1396
  }
1393
1397
  },
1394
1398
  }
sky/utils/timeline.py CHANGED
@@ -15,6 +15,7 @@ from typing import Callable, Optional, Union
15
15
  import filelock
16
16
 
17
17
  from sky.utils import common_utils
18
+ from sky.utils import locks
18
19
 
19
20
  _events = []
20
21
 
@@ -76,6 +77,46 @@ def event(name_or_fn: Union[str, Callable], message: Optional[str] = None):
76
77
  return common_utils.make_decorator(Event, name_or_fn, message=message)
77
78
 
78
79
 
80
+ class DistributedLockEvent:
81
+ """Serve both as a distributed lock and event for the lock."""
82
+
83
+ def __init__(self, lock_id: str, timeout: Optional[float] = None):
84
+ self._lock_id = lock_id
85
+ self._lock = locks.get_lock(lock_id, timeout)
86
+ self._hold_lock_event = Event(f'[DistributedLock.hold]:{lock_id}')
87
+
88
+ def acquire(self):
89
+ was_locked = self._lock.is_locked
90
+ with Event(f'[DistributedLock.acquire]:{self._lock_id}'):
91
+ self._lock.acquire()
92
+ if not was_locked and self._lock.is_locked:
93
+ # start holding the lock after initial acquiring
94
+ self._hold_lock_event.begin()
95
+
96
+ def release(self):
97
+ was_locked = self._lock.is_locked
98
+ self._lock.release()
99
+ if was_locked and not self._lock.is_locked:
100
+ # stop holding the lock after initial releasing
101
+ self._hold_lock_event.end()
102
+
103
+ def __enter__(self):
104
+ self.acquire()
105
+ return self
106
+
107
+ def __exit__(self, exc_type, exc_val, exc_tb):
108
+ self.release()
109
+
110
+ def __call__(self, f):
111
+
112
+ @functools.wraps(f)
113
+ def wrapper(*args, **kwargs):
114
+ with self:
115
+ return f(*args, **kwargs)
116
+
117
+ return wrapper
118
+
119
+
79
120
  class FileLockEvent:
80
121
  """Serve both as a file lock and event for the lock."""
81
122
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250720
3
+ Version: 1.0.0.dev20250723
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0