b10-transfer 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
b10_transfer/__init__.py CHANGED
@@ -21,7 +21,7 @@ from .info import get_cache_info, list_available_caches
21
21
  from .constants import SaveStatus, LoadStatus, TransferStatus, AsyncTransferStatus
22
22
 
23
23
  # Version
24
- __version__ = "0.0.1"
24
+ __version__ = "0.1.0"
25
25
 
26
26
  __all__ = [
27
27
  "CacheError",
@@ -39,6 +39,7 @@ class TransferProgress:
39
39
  AsyncTransferStatus.ERROR,
40
40
  AsyncTransferStatus.INTERRUPTED,
41
41
  AsyncTransferStatus.CANCELLED,
42
+ AsyncTransferStatus.DOES_NOT_EXIST,
42
43
  ]:
43
44
  self.completed_at = datetime.now()
44
45
 
@@ -140,6 +141,12 @@ class AsyncTransferManager:
140
141
  "Transfer interrupted due to insufficient disk space",
141
142
  )
142
143
  logger.warning(f"Transfer interrupted: {operation_id}")
144
+ elif result == TransferStatus.DOES_NOT_EXIST:
145
+ progress.update_status(
146
+ AsyncTransferStatus.DOES_NOT_EXIST,
147
+ "Cache file not found",
148
+ )
149
+ logger.info(f"Transfer failed - file not found: {operation_id}")
143
150
  else:
144
151
  progress.update_status(
145
152
  AsyncTransferStatus.ERROR, "Transfer operation failed"
@@ -166,6 +173,7 @@ class AsyncTransferManager:
166
173
  AsyncTransferStatus.ERROR,
167
174
  AsyncTransferStatus.INTERRUPTED,
168
175
  AsyncTransferStatus.CANCELLED,
176
+ AsyncTransferStatus.DOES_NOT_EXIST,
169
177
  ]
170
178
 
171
179
  def wait_for_completion(
b10_transfer/constants.py CHANGED
@@ -113,6 +113,7 @@ class WorkerStatus(Enum):
113
113
  SUCCESS = auto()
114
114
  ERROR = auto()
115
115
  CANCELLED = auto()
116
+ FILE_NOT_FOUND = auto()
116
117
 
117
118
 
118
119
  class LoadStatus(Enum):
@@ -138,6 +139,7 @@ class TransferStatus(Enum):
138
139
  SUCCESS = auto()
139
140
  ERROR = auto()
140
141
  INTERRUPTED = auto()
142
+ DOES_NOT_EXIST = auto()
141
143
 
142
144
 
143
145
  class AsyncTransferStatus(Enum):
@@ -147,3 +149,4 @@ class AsyncTransferStatus(Enum):
147
149
  ERROR = auto()
148
150
  INTERRUPTED = auto()
149
151
  CANCELLED = auto()
152
+ DOES_NOT_EXIST = auto()
b10_transfer/core.py CHANGED
@@ -11,6 +11,7 @@ from .space_monitor import (
11
11
  check_sufficient_disk_space,
12
12
  CacheSpaceMonitor,
13
13
  CacheOperationInterrupted,
14
+ CacheFileNotFoundError,
14
15
  run_monitored_process,
15
16
  )
16
17
  from .constants import (
@@ -118,9 +119,13 @@ def transfer(
118
119
  if primary_monitor is None:
119
120
  # No monitoring requested, execute callback directly
120
121
  logger.info(f"Starting transfer (no monitoring): {source} -> {dest}")
121
- callback(source, dest, *callback_args, **callback_kwargs)
122
- logger.info("Transfer complete")
123
- return TransferStatus.SUCCESS
122
+ try:
123
+ callback(source, dest, *callback_args, **callback_kwargs)
124
+ logger.info("Transfer complete")
125
+ return TransferStatus.SUCCESS
126
+ except (FileNotFoundError, CacheFileNotFoundError) as e:
127
+ logger.info(f"Transfer failed - file not found: {e}")
128
+ return TransferStatus.DOES_NOT_EXIST
124
129
 
125
130
  # Start the primary space monitor
126
131
  primary_monitor.start()
@@ -151,6 +156,10 @@ def transfer(
151
156
  logger.info("Transfer complete (unmonitored)")
152
157
  return TransferStatus.SUCCESS
153
158
 
159
+ except (FileNotFoundError, CacheFileNotFoundError) as e:
160
+ logger.info(f"Transfer failed - file not found: {e}")
161
+ return TransferStatus.DOES_NOT_EXIST
162
+
154
163
  except CacheOperationInterrupted as e:
155
164
  logger.warning(f"Transfer interrupted: {e}")
156
165
  return TransferStatus.INTERRUPTED
@@ -24,6 +24,12 @@ class CacheOperationInterrupted(Exception):
24
24
  pass
25
25
 
26
26
 
27
+ class CacheFileNotFoundError(Exception):
28
+ """Raised when a cache file is not found during transfer operations."""
29
+
30
+ pass
31
+
32
+
27
33
  def worker_process(cancelled_message: str):
28
34
  """Decorator for worker process functions to handle common try/catch/result_queue pattern.
29
35
 
@@ -60,6 +66,8 @@ def worker_process(cancelled_message: str):
60
66
  # If we get here, the function completed successfully
61
67
  result_queue.put((WorkerStatus.SUCCESS.value, None))
62
68
 
69
+ except FileNotFoundError as e:
70
+ result_queue.put((WorkerStatus.FILE_NOT_FOUND.value, str(e)))
63
71
  except Exception as e:
64
72
  result_queue.put((WorkerStatus.ERROR.value, str(e)))
65
73
 
@@ -286,6 +294,11 @@ def run_monitored_process(
286
294
  if cleanup_func:
287
295
  cleanup_func()
288
296
  raise CacheOperationInterrupted(error_msg)
297
+ elif status == WorkerStatus.FILE_NOT_FOUND.value:
298
+ logger.info(
299
+ f"{operation_name} worker failed - file not found: {error_msg}"
300
+ )
301
+ raise CacheFileNotFoundError(error_msg)
289
302
  # status == WorkerStatus.SUCCESS.value - continue normally
290
303
 
291
304
  logger.debug(f"{operation_name} completed successfully")
@@ -40,7 +40,7 @@ logger = logging.getLogger(__name__)
40
40
 
41
41
 
42
42
  def torch_cache_save_callback(
43
- source_dir: Path, dest_file: Path, max_size_mb: int
43
+ source_dir: Path, dest_file: Path, max_size_mb: int = None, *args, **kwargs
44
44
  ) -> None:
45
45
  """Callback function for saving torch cache: compress then copy to b10fs.
46
46
 
@@ -51,8 +51,14 @@ def torch_cache_save_callback(
51
51
  Args:
52
52
  source_dir: Path to the torch cache directory to compress
53
53
  dest_file: Path to the final cache file in b10fs
54
- max_size_mb: Maximum allowed archive size in megabytes
54
+ max_size_mb: Maximum allowed archive size in megabytes (can be passed as kwarg)
55
+ *args: Additional arguments passed by the transfer system (ignored)
56
+ **kwargs: Additional keyword arguments passed by the transfer system (may contain max_size_mb)
55
57
  """
58
+ # Handle max_size_mb from kwargs if not provided as positional argument
59
+ if max_size_mb is None:
60
+ max_size_mb = kwargs.get("max_size_mb", MAX_CACHE_SIZE_MB)
61
+
56
62
  work_dir = Path(LOCAL_WORK_DIR)
57
63
 
58
64
  # Create temporary archive in local work directory
@@ -99,7 +105,9 @@ def torch_cache_save_callback(
99
105
  raise
100
106
 
101
107
 
102
- def torch_cache_load_callback(source_file: Path, dest_dir: Path) -> None:
108
+ def torch_cache_load_callback(
109
+ source_file: Path, dest_dir: Path, *args, **kwargs
110
+ ) -> None:
103
111
  """Callback function for loading torch cache: copy from b10fs then extract.
104
112
 
105
113
  This function handles the torch-specific load logic:
@@ -109,6 +117,8 @@ def torch_cache_load_callback(source_file: Path, dest_dir: Path) -> None:
109
117
  Args:
110
118
  source_file: Path to the cache file in b10fs
111
119
  dest_dir: Path to the torch cache directory where files will be extracted
120
+ *args: Additional arguments passed by the transfer system (ignored)
121
+ **kwargs: Additional keyword arguments passed by the transfer system (ignored)
112
122
  """
113
123
  work_dir = Path(LOCAL_WORK_DIR)
114
124
 
@@ -126,6 +136,8 @@ def torch_cache_load_callback(source_file: Path, dest_dir: Path) -> None:
126
136
  @critical_section_b10fs_file_lock("copy_out")
127
137
  def _copy_from_b10fs():
128
138
  logger.info(f"Copying from b10fs: {source_file} -> {temp_archive}")
139
+ if not source_file.exists():
140
+ raise FileNotFoundError(f"Cache file not found: {source_file}")
129
141
  shutil.copy2(source_file, temp_archive)
130
142
 
131
143
  _copy_from_b10fs()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: b10-transfer
3
- Version: 0.0.1
3
+ Version: 0.1.0
4
4
  Summary: Distributed PyTorch compilation cache for Baseten - Environment-aware, lock-free compilation cache management
5
5
  License: MIT
6
6
  Keywords: pytorch,torch.compile,cache,machine-learning,inference
@@ -0,0 +1,15 @@
1
+ b10_transfer/__init__.py,sha256=Z_p771iwuROcCSNWKjUZ9j-V7ICmbtwr_qet5FCsnkQ,1400
2
+ b10_transfer/archive.py,sha256=GKb0mi0-YeM7ch4FLAoOLHXw0T6LkRerYad2N2y9TYM,6400
3
+ b10_transfer/async_torch_cache.py,sha256=4hMjVR44SLlGes25e_cjgMTywFfIYjH0TnUmg9o-iyI,1903
4
+ b10_transfer/async_transfers.py,sha256=luqdIStT_j4YduImY67HvX5WDurqV9Q5RjEyMI7bh1k,9476
5
+ b10_transfer/cleanup.py,sha256=xjKStmBjaarZPxhPTT1-Ds_pvUR7kdJw5Kp19BLvzzY,6224
6
+ b10_transfer/constants.py,sha256=R2JE_634Ri_9rf8adwiAzcfiej5weAGP1x1ccSZLX8k,4829
7
+ b10_transfer/core.py,sha256=d-aaQwKYqKIafBYBNahNcnOpwcanOSrWLwdzXpjVLBs,6350
8
+ b10_transfer/environment.py,sha256=aC0biEMQrtHk0ke_3epdcq1X9J5fPmPpBVt0fH7XF2Y,5625
9
+ b10_transfer/info.py,sha256=I3iOuImZ5r6DMJTDeBtVvzlSn6IuyPJbLJYUO_OF0ks,6299
10
+ b10_transfer/space_monitor.py,sha256=G_3wLSJa7HTCihSpLoow2oKo2cARJ2PtvY1XOQZl3-s,11028
11
+ b10_transfer/torch_cache.py,sha256=e41mDdnP_h61WNwB7TG5c4a7ecw0-K63ytJiKsX0keY,14907
12
+ b10_transfer/utils.py,sha256=Stee0DFK-8MRRYNIocqaK64cJvfs4jPW3Mpx7zkWV6Y,11932
13
+ b10_transfer-0.1.0.dist-info/METADATA,sha256=wc0a--Bgr-7filvyS4uUAic9fO1JJbKqc5iNp36A-iU,7502
14
+ b10_transfer-0.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
15
+ b10_transfer-0.1.0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- b10_transfer/__init__.py,sha256=o1ej-OtAOsfrJbvh5C3PnqxW2qfcO7l8rllVD-07lXE,1400
2
- b10_transfer/archive.py,sha256=GKb0mi0-YeM7ch4FLAoOLHXw0T6LkRerYad2N2y9TYM,6400
3
- b10_transfer/async_torch_cache.py,sha256=4hMjVR44SLlGes25e_cjgMTywFfIYjH0TnUmg9o-iyI,1903
4
- b10_transfer/async_transfers.py,sha256=AAML562qYzF9NyX9AdfiJ0OcQw6vXr985IZWXZSot9Q,9083
5
- b10_transfer/cleanup.py,sha256=xjKStmBjaarZPxhPTT1-Ds_pvUR7kdJw5Kp19BLvzzY,6224
6
- b10_transfer/constants.py,sha256=KjSUO6heScDJXQwFlHdeNV4KBBqKz7CKeJzo44-9qMM,4745
7
- b10_transfer/core.py,sha256=BOnA6FXkZRm74_CtQBMudpx3q7HTEGEORUV26fb6cvQ,5920
8
- b10_transfer/environment.py,sha256=aC0biEMQrtHk0ke_3epdcq1X9J5fPmPpBVt0fH7XF2Y,5625
9
- b10_transfer/info.py,sha256=I3iOuImZ5r6DMJTDeBtVvzlSn6IuyPJbLJYUO_OF0ks,6299
10
- b10_transfer/space_monitor.py,sha256=5pwW643KAHI3mtT61hYf29953UD9LekzWFF1K-QeYbw,10529
11
- b10_transfer/torch_cache.py,sha256=Oe_OeUPGAlmK9wY-L9w4aPaXOoMnL_kD596hew6ETcw,14192
12
- b10_transfer/utils.py,sha256=Stee0DFK-8MRRYNIocqaK64cJvfs4jPW3Mpx7zkWV6Y,11932
13
- b10_transfer-0.0.1.dist-info/METADATA,sha256=hESeWyidAEbtWkIgepBn1Cxlo9--jIj9vcLxM4zP7lY,7502
14
- b10_transfer-0.0.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
15
- b10_transfer-0.0.1.dist-info/RECORD,,