b10-transfer 0.0.2__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- b10_transfer/__init__.py +1 -1
- b10_transfer/async_transfers.py +8 -0
- b10_transfer/constants.py +3 -0
- b10_transfer/core.py +12 -3
- b10_transfer/space_monitor.py +13 -0
- b10_transfer/torch_cache.py +2 -0
- {b10_transfer-0.0.2.dist-info → b10_transfer-0.1.0.dist-info}/METADATA +1 -1
- b10_transfer-0.1.0.dist-info/RECORD +15 -0
- b10_transfer-0.0.2.dist-info/RECORD +0 -15
- {b10_transfer-0.0.2.dist-info → b10_transfer-0.1.0.dist-info}/WHEEL +0 -0
b10_transfer/__init__.py
CHANGED
b10_transfer/async_transfers.py
CHANGED
@@ -39,6 +39,7 @@ class TransferProgress:
|
|
39
39
|
AsyncTransferStatus.ERROR,
|
40
40
|
AsyncTransferStatus.INTERRUPTED,
|
41
41
|
AsyncTransferStatus.CANCELLED,
|
42
|
+
AsyncTransferStatus.DOES_NOT_EXIST,
|
42
43
|
]:
|
43
44
|
self.completed_at = datetime.now()
|
44
45
|
|
@@ -140,6 +141,12 @@ class AsyncTransferManager:
|
|
140
141
|
"Transfer interrupted due to insufficient disk space",
|
141
142
|
)
|
142
143
|
logger.warning(f"Transfer interrupted: {operation_id}")
|
144
|
+
elif result == TransferStatus.DOES_NOT_EXIST:
|
145
|
+
progress.update_status(
|
146
|
+
AsyncTransferStatus.DOES_NOT_EXIST,
|
147
|
+
"Cache file not found",
|
148
|
+
)
|
149
|
+
logger.info(f"Transfer failed - file not found: {operation_id}")
|
143
150
|
else:
|
144
151
|
progress.update_status(
|
145
152
|
AsyncTransferStatus.ERROR, "Transfer operation failed"
|
@@ -166,6 +173,7 @@ class AsyncTransferManager:
|
|
166
173
|
AsyncTransferStatus.ERROR,
|
167
174
|
AsyncTransferStatus.INTERRUPTED,
|
168
175
|
AsyncTransferStatus.CANCELLED,
|
176
|
+
AsyncTransferStatus.DOES_NOT_EXIST,
|
169
177
|
]
|
170
178
|
|
171
179
|
def wait_for_completion(
|
b10_transfer/constants.py
CHANGED
@@ -113,6 +113,7 @@ class WorkerStatus(Enum):
|
|
113
113
|
SUCCESS = auto()
|
114
114
|
ERROR = auto()
|
115
115
|
CANCELLED = auto()
|
116
|
+
FILE_NOT_FOUND = auto()
|
116
117
|
|
117
118
|
|
118
119
|
class LoadStatus(Enum):
|
@@ -138,6 +139,7 @@ class TransferStatus(Enum):
|
|
138
139
|
SUCCESS = auto()
|
139
140
|
ERROR = auto()
|
140
141
|
INTERRUPTED = auto()
|
142
|
+
DOES_NOT_EXIST = auto()
|
141
143
|
|
142
144
|
|
143
145
|
class AsyncTransferStatus(Enum):
|
@@ -147,3 +149,4 @@ class AsyncTransferStatus(Enum):
|
|
147
149
|
ERROR = auto()
|
148
150
|
INTERRUPTED = auto()
|
149
151
|
CANCELLED = auto()
|
152
|
+
DOES_NOT_EXIST = auto()
|
b10_transfer/core.py
CHANGED
@@ -11,6 +11,7 @@ from .space_monitor import (
|
|
11
11
|
check_sufficient_disk_space,
|
12
12
|
CacheSpaceMonitor,
|
13
13
|
CacheOperationInterrupted,
|
14
|
+
CacheFileNotFoundError,
|
14
15
|
run_monitored_process,
|
15
16
|
)
|
16
17
|
from .constants import (
|
@@ -118,9 +119,13 @@ def transfer(
|
|
118
119
|
if primary_monitor is None:
|
119
120
|
# No monitoring requested, execute callback directly
|
120
121
|
logger.info(f"Starting transfer (no monitoring): {source} -> {dest}")
|
121
|
-
|
122
|
-
|
123
|
-
|
122
|
+
try:
|
123
|
+
callback(source, dest, *callback_args, **callback_kwargs)
|
124
|
+
logger.info("Transfer complete")
|
125
|
+
return TransferStatus.SUCCESS
|
126
|
+
except (FileNotFoundError, CacheFileNotFoundError) as e:
|
127
|
+
logger.info(f"Transfer failed - file not found: {e}")
|
128
|
+
return TransferStatus.DOES_NOT_EXIST
|
124
129
|
|
125
130
|
# Start the primary space monitor
|
126
131
|
primary_monitor.start()
|
@@ -151,6 +156,10 @@ def transfer(
|
|
151
156
|
logger.info("Transfer complete (unmonitored)")
|
152
157
|
return TransferStatus.SUCCESS
|
153
158
|
|
159
|
+
except (FileNotFoundError, CacheFileNotFoundError) as e:
|
160
|
+
logger.info(f"Transfer failed - file not found: {e}")
|
161
|
+
return TransferStatus.DOES_NOT_EXIST
|
162
|
+
|
154
163
|
except CacheOperationInterrupted as e:
|
155
164
|
logger.warning(f"Transfer interrupted: {e}")
|
156
165
|
return TransferStatus.INTERRUPTED
|
b10_transfer/space_monitor.py
CHANGED
@@ -24,6 +24,12 @@ class CacheOperationInterrupted(Exception):
|
|
24
24
|
pass
|
25
25
|
|
26
26
|
|
27
|
+
class CacheFileNotFoundError(Exception):
|
28
|
+
"""Raised when a cache file is not found during transfer operations."""
|
29
|
+
|
30
|
+
pass
|
31
|
+
|
32
|
+
|
27
33
|
def worker_process(cancelled_message: str):
|
28
34
|
"""Decorator for worker process functions to handle common try/catch/result_queue pattern.
|
29
35
|
|
@@ -60,6 +66,8 @@ def worker_process(cancelled_message: str):
|
|
60
66
|
# If we get here, the function completed successfully
|
61
67
|
result_queue.put((WorkerStatus.SUCCESS.value, None))
|
62
68
|
|
69
|
+
except FileNotFoundError as e:
|
70
|
+
result_queue.put((WorkerStatus.FILE_NOT_FOUND.value, str(e)))
|
63
71
|
except Exception as e:
|
64
72
|
result_queue.put((WorkerStatus.ERROR.value, str(e)))
|
65
73
|
|
@@ -286,6 +294,11 @@ def run_monitored_process(
|
|
286
294
|
if cleanup_func:
|
287
295
|
cleanup_func()
|
288
296
|
raise CacheOperationInterrupted(error_msg)
|
297
|
+
elif status == WorkerStatus.FILE_NOT_FOUND.value:
|
298
|
+
logger.info(
|
299
|
+
f"{operation_name} worker failed - file not found: {error_msg}"
|
300
|
+
)
|
301
|
+
raise CacheFileNotFoundError(error_msg)
|
289
302
|
# status == WorkerStatus.SUCCESS.value - continue normally
|
290
303
|
|
291
304
|
logger.debug(f"{operation_name} completed successfully")
|
b10_transfer/torch_cache.py
CHANGED
@@ -136,6 +136,8 @@ def torch_cache_load_callback(
|
|
136
136
|
@critical_section_b10fs_file_lock("copy_out")
|
137
137
|
def _copy_from_b10fs():
|
138
138
|
logger.info(f"Copying from b10fs: {source_file} -> {temp_archive}")
|
139
|
+
if not source_file.exists():
|
140
|
+
raise FileNotFoundError(f"Cache file not found: {source_file}")
|
139
141
|
shutil.copy2(source_file, temp_archive)
|
140
142
|
|
141
143
|
_copy_from_b10fs()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: b10-transfer
|
3
|
-
Version: 0.0
|
3
|
+
Version: 0.1.0
|
4
4
|
Summary: Distributed PyTorch compilation cache for Baseten - Environment-aware, lock-free compilation cache management
|
5
5
|
License: MIT
|
6
6
|
Keywords: pytorch,torch.compile,cache,machine-learning,inference
|
@@ -0,0 +1,15 @@
|
|
1
|
+
b10_transfer/__init__.py,sha256=Z_p771iwuROcCSNWKjUZ9j-V7ICmbtwr_qet5FCsnkQ,1400
|
2
|
+
b10_transfer/archive.py,sha256=GKb0mi0-YeM7ch4FLAoOLHXw0T6LkRerYad2N2y9TYM,6400
|
3
|
+
b10_transfer/async_torch_cache.py,sha256=4hMjVR44SLlGes25e_cjgMTywFfIYjH0TnUmg9o-iyI,1903
|
4
|
+
b10_transfer/async_transfers.py,sha256=luqdIStT_j4YduImY67HvX5WDurqV9Q5RjEyMI7bh1k,9476
|
5
|
+
b10_transfer/cleanup.py,sha256=xjKStmBjaarZPxhPTT1-Ds_pvUR7kdJw5Kp19BLvzzY,6224
|
6
|
+
b10_transfer/constants.py,sha256=R2JE_634Ri_9rf8adwiAzcfiej5weAGP1x1ccSZLX8k,4829
|
7
|
+
b10_transfer/core.py,sha256=d-aaQwKYqKIafBYBNahNcnOpwcanOSrWLwdzXpjVLBs,6350
|
8
|
+
b10_transfer/environment.py,sha256=aC0biEMQrtHk0ke_3epdcq1X9J5fPmPpBVt0fH7XF2Y,5625
|
9
|
+
b10_transfer/info.py,sha256=I3iOuImZ5r6DMJTDeBtVvzlSn6IuyPJbLJYUO_OF0ks,6299
|
10
|
+
b10_transfer/space_monitor.py,sha256=G_3wLSJa7HTCihSpLoow2oKo2cARJ2PtvY1XOQZl3-s,11028
|
11
|
+
b10_transfer/torch_cache.py,sha256=e41mDdnP_h61WNwB7TG5c4a7ecw0-K63ytJiKsX0keY,14907
|
12
|
+
b10_transfer/utils.py,sha256=Stee0DFK-8MRRYNIocqaK64cJvfs4jPW3Mpx7zkWV6Y,11932
|
13
|
+
b10_transfer-0.1.0.dist-info/METADATA,sha256=wc0a--Bgr-7filvyS4uUAic9fO1JJbKqc5iNp36A-iU,7502
|
14
|
+
b10_transfer-0.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
15
|
+
b10_transfer-0.1.0.dist-info/RECORD,,
|
@@ -1,15 +0,0 @@
|
|
1
|
-
b10_transfer/__init__.py,sha256=rQP-cwO-4tMz8CqXI-AE9K0eQMgQ-QIFhm9SFH8_7cc,1400
|
2
|
-
b10_transfer/archive.py,sha256=GKb0mi0-YeM7ch4FLAoOLHXw0T6LkRerYad2N2y9TYM,6400
|
3
|
-
b10_transfer/async_torch_cache.py,sha256=4hMjVR44SLlGes25e_cjgMTywFfIYjH0TnUmg9o-iyI,1903
|
4
|
-
b10_transfer/async_transfers.py,sha256=AAML562qYzF9NyX9AdfiJ0OcQw6vXr985IZWXZSot9Q,9083
|
5
|
-
b10_transfer/cleanup.py,sha256=xjKStmBjaarZPxhPTT1-Ds_pvUR7kdJw5Kp19BLvzzY,6224
|
6
|
-
b10_transfer/constants.py,sha256=KjSUO6heScDJXQwFlHdeNV4KBBqKz7CKeJzo44-9qMM,4745
|
7
|
-
b10_transfer/core.py,sha256=BOnA6FXkZRm74_CtQBMudpx3q7HTEGEORUV26fb6cvQ,5920
|
8
|
-
b10_transfer/environment.py,sha256=aC0biEMQrtHk0ke_3epdcq1X9J5fPmPpBVt0fH7XF2Y,5625
|
9
|
-
b10_transfer/info.py,sha256=I3iOuImZ5r6DMJTDeBtVvzlSn6IuyPJbLJYUO_OF0ks,6299
|
10
|
-
b10_transfer/space_monitor.py,sha256=5pwW643KAHI3mtT61hYf29953UD9LekzWFF1K-QeYbw,10529
|
11
|
-
b10_transfer/torch_cache.py,sha256=g001niSJyEKSXeqZcKbXUOofhULTk6aj2uPjah8tz-4,14778
|
12
|
-
b10_transfer/utils.py,sha256=Stee0DFK-8MRRYNIocqaK64cJvfs4jPW3Mpx7zkWV6Y,11932
|
13
|
-
b10_transfer-0.0.2.dist-info/METADATA,sha256=Hn47gD8xg-PrGRF2giMUX9Zm8TEDe4z2u9Tqz3sSLmU,7502
|
14
|
-
b10_transfer-0.0.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
15
|
-
b10_transfer-0.0.2.dist-info/RECORD,,
|
File without changes
|