rclone-api 1.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. rclone_api/__init__.py +951 -0
  2. rclone_api/assets/example.txt +1 -0
  3. rclone_api/cli.py +15 -0
  4. rclone_api/cmd/analyze.py +51 -0
  5. rclone_api/cmd/copy_large_s3.py +111 -0
  6. rclone_api/cmd/copy_large_s3_finish.py +81 -0
  7. rclone_api/cmd/list_files.py +27 -0
  8. rclone_api/cmd/save_to_db.py +77 -0
  9. rclone_api/completed_process.py +60 -0
  10. rclone_api/config.py +87 -0
  11. rclone_api/convert.py +31 -0
  12. rclone_api/db/__init__.py +3 -0
  13. rclone_api/db/db.py +277 -0
  14. rclone_api/db/models.py +57 -0
  15. rclone_api/deprecated.py +24 -0
  16. rclone_api/detail/copy_file_parts_resumable.py +42 -0
  17. rclone_api/detail/walk.py +116 -0
  18. rclone_api/diff.py +164 -0
  19. rclone_api/dir.py +113 -0
  20. rclone_api/dir_listing.py +66 -0
  21. rclone_api/exec.py +40 -0
  22. rclone_api/experimental/flags.py +89 -0
  23. rclone_api/experimental/flags_base.py +58 -0
  24. rclone_api/file.py +205 -0
  25. rclone_api/file_item.py +68 -0
  26. rclone_api/file_part.py +198 -0
  27. rclone_api/file_stream.py +52 -0
  28. rclone_api/filelist.py +30 -0
  29. rclone_api/group_files.py +256 -0
  30. rclone_api/http_server.py +244 -0
  31. rclone_api/install.py +95 -0
  32. rclone_api/log.py +44 -0
  33. rclone_api/mount.py +55 -0
  34. rclone_api/mount_util.py +247 -0
  35. rclone_api/process.py +187 -0
  36. rclone_api/rclone_impl.py +1285 -0
  37. rclone_api/remote.py +21 -0
  38. rclone_api/rpath.py +102 -0
  39. rclone_api/s3/api.py +109 -0
  40. rclone_api/s3/basic_ops.py +61 -0
  41. rclone_api/s3/chunk_task.py +187 -0
  42. rclone_api/s3/create.py +107 -0
  43. rclone_api/s3/multipart/file_info.py +7 -0
  44. rclone_api/s3/multipart/finished_piece.py +69 -0
  45. rclone_api/s3/multipart/info_json.py +239 -0
  46. rclone_api/s3/multipart/merge_state.py +147 -0
  47. rclone_api/s3/multipart/upload_info.py +62 -0
  48. rclone_api/s3/multipart/upload_parts_inline.py +356 -0
  49. rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
  50. rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
  51. rclone_api/s3/multipart/upload_state.py +165 -0
  52. rclone_api/s3/types.py +67 -0
  53. rclone_api/scan_missing_folders.py +153 -0
  54. rclone_api/types.py +402 -0
  55. rclone_api/util.py +324 -0
  56. rclone_api-1.5.8.dist-info/LICENSE +21 -0
  57. rclone_api-1.5.8.dist-info/METADATA +969 -0
  58. rclone_api-1.5.8.dist-info/RECORD +61 -0
  59. rclone_api-1.5.8.dist-info/WHEEL +5 -0
  60. rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
  61. rclone_api-1.5.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,356 @@
1
+ import _thread
2
+ import os
3
+ import traceback
4
+ import warnings
5
+ from concurrent.futures import Future, ThreadPoolExecutor
6
+ from pathlib import Path
7
+ from queue import Queue
8
+ from threading import Event, Thread
9
+ from typing import Any, Callable
10
+
11
+ from botocore.client import BaseClient
12
+
13
+ from rclone_api.file_part import FilePart
14
+ from rclone_api.s3.chunk_task import file_chunker
15
+ from rclone_api.s3.multipart.file_info import S3FileInfo
16
+ from rclone_api.s3.multipart.finished_piece import FinishedPiece
17
+ from rclone_api.s3.multipart.upload_info import UploadInfo
18
+ from rclone_api.s3.multipart.upload_state import UploadState
19
+ from rclone_api.s3.types import MultiUploadResult
20
+ from rclone_api.types import EndOfStream
21
+ from rclone_api.util import locked_print
22
+
23
+ _MIN_UPLOAD_CHUNK_SIZE = 5 * 1024 * 1024 # 5MB
24
+
25
+
26
+ def upload_task(
27
+ info: UploadInfo,
28
+ chunk: FilePart,
29
+ part_number: int,
30
+ retries: int,
31
+ ) -> FinishedPiece:
32
+ file_or_err: Path | Exception = chunk.get_file()
33
+ if isinstance(file_or_err, Exception):
34
+ raise file_or_err
35
+ file: Path = file_or_err
36
+ size = os.path.getsize(file)
37
+ retries = retries + 1 # Add one for the initial attempt
38
+ for retry in range(retries):
39
+ try:
40
+ if retry > 0:
41
+ locked_print(f"Retrying part {part_number} for {info.src_file_path}")
42
+ locked_print(
43
+ f"Uploading part {part_number} for {info.src_file_path} of size {size}"
44
+ )
45
+
46
+ with open(file, "rb") as f:
47
+ part = info.s3_client.upload_part(
48
+ Bucket=info.bucket_name,
49
+ Key=info.object_name,
50
+ PartNumber=part_number,
51
+ UploadId=info.upload_id,
52
+ Body=f,
53
+ )
54
+ out: FinishedPiece = FinishedPiece(
55
+ etag=part["ETag"], part_number=part_number
56
+ )
57
+ chunk.dispose()
58
+ return out
59
+ except Exception as e:
60
+ if retry == retries - 1:
61
+ locked_print(f"Error uploading part {part_number}: {e}")
62
+ chunk.dispose()
63
+ raise e
64
+ else:
65
+ locked_print(f"Error uploading part {part_number}: {e}, retrying")
66
+ continue
67
+ raise Exception("Should not reach here")
68
+
69
+
70
+ def handle_upload(
71
+ upload_info: UploadInfo, fp: FilePart | EndOfStream
72
+ ) -> FinishedPiece | Exception | EndOfStream:
73
+ if isinstance(fp, EndOfStream):
74
+ eos: EndOfStream = fp
75
+ return eos
76
+ part_number: int | None = None
77
+ try:
78
+ assert isinstance(fp.extra, S3FileInfo)
79
+ extra: S3FileInfo = fp.extra
80
+ part_number = extra.part_number
81
+ print(f"Handling upload for {part_number}, size {fp.size}")
82
+
83
+ part: FinishedPiece = upload_task(
84
+ info=upload_info,
85
+ chunk=fp,
86
+ part_number=part_number,
87
+ retries=upload_info.retries,
88
+ )
89
+ return part
90
+ except Exception as e:
91
+ stacktrace = traceback.format_exc()
92
+ msg = f"Error uploading part {part_number}: {e}\n{stacktrace}"
93
+ warnings.warn(msg)
94
+ return e
95
+ finally:
96
+ fp.dispose()
97
+
98
+
99
+ def prepare_upload_file_multipart(
100
+ s3_client: BaseClient,
101
+ bucket_name: str,
102
+ file_path: Path,
103
+ file_size: int | None,
104
+ object_name: str,
105
+ chunk_size: int,
106
+ retries: int,
107
+ ) -> UploadInfo:
108
+ """Upload a file to the bucket using multipart upload with customizable chunk size."""
109
+
110
+ # Initiate multipart upload
111
+ locked_print(
112
+ f"Creating multipart upload for {file_path} to {bucket_name}/{object_name}"
113
+ )
114
+ mpu = s3_client.create_multipart_upload(Bucket=bucket_name, Key=object_name)
115
+ upload_id = mpu["UploadId"]
116
+
117
+ file_size = file_size if file_size is not None else os.path.getsize(file_path)
118
+
119
+ upload_info: UploadInfo = UploadInfo(
120
+ s3_client=s3_client,
121
+ bucket_name=bucket_name,
122
+ object_name=object_name,
123
+ src_file_path=file_path,
124
+ upload_id=upload_id,
125
+ retries=retries,
126
+ chunk_size=chunk_size,
127
+ file_size=file_size,
128
+ )
129
+ return upload_info
130
+
131
+
132
+ def _abort_previous_upload(upload_state: UploadState) -> None:
133
+ if upload_state.upload_info.upload_id:
134
+ try:
135
+ upload_state.upload_info.s3_client.abort_multipart_upload(
136
+ Bucket=upload_state.upload_info.bucket_name,
137
+ Key=upload_state.upload_info.object_name,
138
+ UploadId=upload_state.upload_info.upload_id,
139
+ )
140
+ except Exception as e:
141
+ locked_print(f"Error aborting previous upload: {e}")
142
+
143
+
144
+ def upload_runner(
145
+ upload_state: UploadState,
146
+ upload_info: UploadInfo,
147
+ upload_threads: int,
148
+ queue_upload: Queue[FilePart | EndOfStream],
149
+ cancel_chunker_event: Event,
150
+ ) -> None:
151
+ # import semaphre
152
+ import threading
153
+
154
+ semaphore = threading.Semaphore(upload_threads)
155
+ with ThreadPoolExecutor(max_workers=upload_threads) as executor:
156
+ try:
157
+ while True:
158
+ file_chunk: FilePart | EndOfStream = queue_upload.get()
159
+ if isinstance(file_chunk, EndOfStream):
160
+ break
161
+
162
+ def task(upload_info=upload_info, file_chunk=file_chunk):
163
+ return handle_upload(upload_info, file_chunk)
164
+
165
+ semaphore.acquire()
166
+
167
+ fut = executor.submit(task)
168
+
169
+ def done_cb(fut=fut):
170
+ semaphore.release()
171
+ result = fut.result()
172
+ if isinstance(result, Exception):
173
+ warnings.warn(f"Error uploading part: {result}, skipping")
174
+ return
175
+ # upload_state.finished_parts.put(result)
176
+ upload_state.add_finished(result)
177
+
178
+ fut.add_done_callback(done_cb)
179
+ except Exception:
180
+ cancel_chunker_event.set()
181
+ executor.shutdown(wait=False, cancel_futures=True)
182
+ raise
183
+
184
+
185
+ def upload_file_multipart(
186
+ s3_client: BaseClient,
187
+ chunk_fetcher: Callable[[int, int, Any], Future[FilePart]],
188
+ bucket_name: str,
189
+ file_path: Path,
190
+ file_size: int | None,
191
+ object_name: str,
192
+ resumable_info_path: Path | None,
193
+ chunk_size: int = 16 * 1024 * 1024, # Default chunk size is 16MB; can be overridden
194
+ upload_threads: int = 16,
195
+ retries: int = 20,
196
+ max_chunks_before_suspension: int | None = None,
197
+ abort_transfer_on_failure: bool = False,
198
+ ) -> MultiUploadResult:
199
+ """Upload a file to the bucket using multipart upload with customizable chunk size."""
200
+ file_size = file_size if file_size is not None else os.path.getsize(str(file_path))
201
+ if chunk_size < _MIN_UPLOAD_CHUNK_SIZE:
202
+ raise ValueError(
203
+ f"Chunk size {chunk_size} is less than minimum upload chunk size {_MIN_UPLOAD_CHUNK_SIZE}"
204
+ )
205
+
206
+ def get_upload_state() -> UploadState | None:
207
+ if resumable_info_path is None:
208
+ locked_print(f"No resumable info path provided for {file_path}")
209
+ return None
210
+ if not resumable_info_path.exists():
211
+ locked_print(
212
+ f"Resumable info path {resumable_info_path} does not exist for {file_path}"
213
+ )
214
+ return None
215
+ upload_state = UploadState.load(s3_client=s3_client, path=resumable_info_path)
216
+ return upload_state
217
+
218
+ def make_new_state() -> UploadState:
219
+ locked_print(f"Creating new upload state for {file_path}")
220
+ upload_info = prepare_upload_file_multipart(
221
+ s3_client=s3_client,
222
+ bucket_name=bucket_name,
223
+ file_path=file_path,
224
+ file_size=file_size,
225
+ object_name=object_name,
226
+ chunk_size=chunk_size,
227
+ retries=retries,
228
+ )
229
+ upload_state = UploadState(
230
+ upload_info=upload_info,
231
+ parts=[],
232
+ peristant=resumable_info_path,
233
+ )
234
+ return upload_state
235
+
236
+ work_que_max = 1
237
+
238
+ new_state = make_new_state()
239
+ loaded_state = get_upload_state()
240
+
241
+ if loaded_state is None:
242
+ upload_state = new_state
243
+ else:
244
+ # if the file size has changed, we cannot resume
245
+ if (
246
+ loaded_state.upload_info.fingerprint()
247
+ != new_state.upload_info.fingerprint()
248
+ ):
249
+ locked_print(
250
+ f"Cannot resume upload: file size changed, starting over for {file_path}"
251
+ )
252
+ _abort_previous_upload(loaded_state)
253
+ upload_state = new_state
254
+ else:
255
+ upload_state = loaded_state
256
+
257
+ try:
258
+ upload_state.update_source_file(file_path, file_size)
259
+ except ValueError as e:
260
+ locked_print(f"Cannot resume upload: {e}, size changed, starting over")
261
+ _abort_previous_upload(upload_state)
262
+ upload_state = make_new_state()
263
+ upload_state.save()
264
+ if upload_state.is_done():
265
+ return MultiUploadResult.ALREADY_DONE
266
+ finished = upload_state.finished()
267
+ if finished > 0:
268
+ locked_print(
269
+ f"Resuming upload for {file_path}, {finished} parts already uploaded"
270
+ )
271
+ started_new_upload = finished == 0
272
+ upload_info = upload_state.upload_info
273
+
274
+ queue_upload: Queue[FilePart | EndOfStream] = Queue(work_que_max)
275
+ chunker_errors: Queue[Exception] = Queue()
276
+ cancel_chunker_event = Event()
277
+
278
+ def chunker_task(
279
+ upload_state=upload_state,
280
+ chunk_fetcher=chunk_fetcher,
281
+ queue_upload=queue_upload,
282
+ max_chunks=max_chunks_before_suspension,
283
+ cancel_signal=cancel_chunker_event,
284
+ queue_errors=chunker_errors,
285
+ ) -> None:
286
+ try:
287
+ file_chunker(
288
+ upload_state=upload_state,
289
+ fetcher=chunk_fetcher,
290
+ queue_upload=queue_upload,
291
+ max_chunks=max_chunks,
292
+ cancel_signal=cancel_signal,
293
+ )
294
+ except Exception as e:
295
+ queue_errors.put(e)
296
+ _thread.interrupt_main()
297
+ raise
298
+ print("#########################################")
299
+ print("# CHUNKER TASK COMPLETED")
300
+ print("#########################################")
301
+
302
+ try:
303
+ thread_chunker = Thread(target=chunker_task, daemon=True)
304
+ thread_chunker.start()
305
+ upload_runner(
306
+ upload_state=upload_state,
307
+ upload_info=upload_info,
308
+ upload_threads=upload_threads,
309
+ queue_upload=queue_upload,
310
+ cancel_chunker_event=cancel_chunker_event,
311
+ )
312
+ # upload_state.finished_parts.put(None) # Signal the end of the queue
313
+ upload_state.add_finished(EndOfStream())
314
+ thread_chunker.join()
315
+
316
+ if not chunker_errors.empty():
317
+ raise chunker_errors.get()
318
+ if not upload_state.is_done():
319
+ upload_state.save()
320
+ return MultiUploadResult.SUSPENDED
321
+ ######################## COMPLETE UPLOAD #######################
322
+ # Final part now is to complete the upload
323
+ msg = "\n########################################"
324
+ msg += f"# Upload complete, sorting {len(upload_state.parts)} parts to complete upload"
325
+ msg += "########################################\n"
326
+ locked_print(msg)
327
+ parts: list[FinishedPiece] = [
328
+ p for p in upload_state.parts if not isinstance(p, EndOfStream)
329
+ ]
330
+ locked_print(f"Upload complete, sorting {len(parts)} parts to complete upload")
331
+ parts.sort(key=lambda x: x.part_number) # Some backends need this.
332
+ parts_s3: list[dict] = [
333
+ {"ETag": p.etag, "PartNumber": p.part_number} for p in parts
334
+ ]
335
+ locked_print(f"Sending multi part completion message for {file_path}")
336
+ s3_client.complete_multipart_upload(
337
+ Bucket=bucket_name,
338
+ Key=object_name,
339
+ UploadId=upload_info.upload_id,
340
+ MultipartUpload={"Parts": parts_s3},
341
+ )
342
+ locked_print(
343
+ f"Multipart upload completed: {file_path} to {bucket_name}/{object_name}"
344
+ )
345
+ except Exception:
346
+ if upload_info.upload_id and abort_transfer_on_failure:
347
+ try:
348
+ s3_client.abort_multipart_upload(
349
+ Bucket=bucket_name, Key=object_name, UploadId=upload_info.upload_id
350
+ )
351
+ except Exception:
352
+ pass
353
+ raise
354
+ if started_new_upload:
355
+ return MultiUploadResult.UPLOADED_FRESH
356
+ return MultiUploadResult.UPLOADED_RESUME
@@ -0,0 +1,304 @@
1
+ import _thread
2
+ import atexit
3
+ import os
4
+ import shutil
5
+ import threading
6
+ import warnings
7
+ from concurrent.futures import Future, ThreadPoolExecutor
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+
11
+ from rclone_api.http_server import HttpServer
12
+ from rclone_api.rclone_impl import RcloneImpl
13
+ from rclone_api.s3.multipart.info_json import InfoJson
14
+ from rclone_api.types import (
15
+ PartInfo,
16
+ Range,
17
+ SizeSuffix,
18
+ )
19
+
20
+
21
+ @dataclass
22
+ class UploadPart:
23
+ chunk: Path
24
+ dst_part: str
25
+ exception: Exception | None = None
26
+ finished: bool = False
27
+
28
+ def dispose(self):
29
+ try:
30
+ if self.chunk.exists():
31
+ self.chunk.unlink()
32
+ self.finished = True
33
+ except Exception as e:
34
+ warnings.warn(f"Failed to delete file {self.chunk}: {e}")
35
+
36
+ def __del__(self):
37
+ self.dispose()
38
+
39
+
40
+ def _gen_name(part_number: int, offset: SizeSuffix, end: SizeSuffix) -> str:
41
+ return f"part.{part_number:05d}_{offset.as_int()}-{end.as_int()}"
42
+
43
+
44
+ def upload_task(self: RcloneImpl, upload_part: UploadPart) -> UploadPart:
45
+ try:
46
+ if upload_part.exception is not None:
47
+ return upload_part
48
+ # print(f"Uploading {upload_part.chunk} to {upload_part.dst_part}")
49
+ msg = "\n#########################################\n"
50
+ msg += f"# Uploading {upload_part.chunk} to {upload_part.dst_part}\n"
51
+ msg += "#########################################\n"
52
+ print(msg)
53
+ self.copy_to(upload_part.chunk.as_posix(), upload_part.dst_part)
54
+ return upload_part
55
+ except Exception as e:
56
+ upload_part.exception = e
57
+ return upload_part
58
+ finally:
59
+ upload_part.dispose()
60
+
61
+
62
+ def read_task(
63
+ http_server: HttpServer,
64
+ src_name: str,
65
+ tmpdir: Path,
66
+ offset: SizeSuffix,
67
+ length: SizeSuffix,
68
+ part_dst: str,
69
+ ) -> UploadPart:
70
+ outchunk: Path = tmpdir / f"{offset.as_int()}-{(offset + length).as_int()}.chunk"
71
+ range = Range(offset.as_int(), (offset + length).as_int())
72
+
73
+ try:
74
+ err = http_server.download(
75
+ path=src_name,
76
+ range=range,
77
+ dst=outchunk,
78
+ )
79
+ if isinstance(err, Exception):
80
+ out = UploadPart(chunk=outchunk, dst_part="", exception=err)
81
+ out.dispose()
82
+ return out
83
+ return UploadPart(chunk=outchunk, dst_part=part_dst)
84
+ except KeyboardInterrupt as ke:
85
+ _thread.interrupt_main()
86
+ raise ke
87
+ except SystemExit as se:
88
+ _thread.interrupt_main()
89
+ raise se
90
+ except Exception as e:
91
+ return UploadPart(chunk=outchunk, dst_part=part_dst, exception=e)
92
+
93
+
94
+ def collapse_runs(numbers: list[int]) -> list[str]:
95
+ if not numbers:
96
+ return []
97
+
98
+ runs = []
99
+ start = numbers[0]
100
+ prev = numbers[0]
101
+
102
+ for num in numbers[1:]:
103
+ if num == prev + 1:
104
+ # Continue current run
105
+ prev = num
106
+ else:
107
+ # End current run
108
+ if start == prev:
109
+ runs.append(str(start))
110
+ else:
111
+ runs.append(f"{start}-{prev}")
112
+ start = num
113
+ prev = num
114
+
115
+ # Append the final run
116
+ if start == prev:
117
+ runs.append(str(start))
118
+ else:
119
+ runs.append(f"{start}-{prev}")
120
+
121
+ return runs
122
+
123
+
124
+ _MIN_PART_UPLOAD_SIZE = SizeSuffix("5MB")
125
+
126
+
127
+ def _check_part_size(parts: list[PartInfo]) -> Exception | None:
128
+ if len(parts) == 0:
129
+ return Exception("No parts to upload")
130
+ part = parts[0]
131
+ chunk = part.range.end - part.range.start
132
+ if chunk < _MIN_PART_UPLOAD_SIZE:
133
+ return Exception(
134
+ f"Part size {chunk} is too small to upload. Minimum size for server side merge is {_MIN_PART_UPLOAD_SIZE}"
135
+ )
136
+ return None
137
+
138
+
139
+ def upload_parts_resumable(
140
+ self: RcloneImpl,
141
+ src: str, # src:/Bucket/path/myfile.large.zst
142
+ dst_dir: str, # dst:/Bucket/path/myfile.large.zst-parts/
143
+ part_infos: list[PartInfo] | None = None,
144
+ threads: int = 1,
145
+ verbose: bool | None = None,
146
+ ) -> Exception | None:
147
+ """Copy parts of a file from source to destination."""
148
+ from rclone_api.util import random_str
149
+
150
+ def verbose_print(*args, **kwargs):
151
+ if verbose:
152
+ print(*args, **kwargs)
153
+
154
+ if dst_dir.endswith("/"):
155
+ dst_dir = dst_dir[:-1]
156
+ src_size = self.size_file(src)
157
+
158
+ if isinstance(src_size, Exception):
159
+ return src_size
160
+
161
+ part_info: PartInfo
162
+ src_dir = os.path.dirname(src)
163
+ src_name = os.path.basename(src)
164
+ http_server: HttpServer
165
+
166
+ full_part_infos: list[PartInfo] | Exception = PartInfo.split_parts(
167
+ src_size, SizeSuffix("96MB")
168
+ )
169
+ if isinstance(full_part_infos, Exception):
170
+ return full_part_infos
171
+ assert isinstance(full_part_infos, list)
172
+
173
+ if part_infos is None:
174
+ src_size = self.size_file(src)
175
+ if isinstance(src_size, Exception):
176
+ return src_size
177
+ part_infos = full_part_infos.copy()
178
+
179
+ err = _check_part_size(part_infos)
180
+ if err:
181
+ return err
182
+
183
+ all_part_numbers: list[int] = [p.part_number for p in part_infos]
184
+ src_info_json = f"{dst_dir}/info.json"
185
+ info_json = InfoJson(self, src, src_info_json)
186
+
187
+ if not info_json.load():
188
+ verbose_print(f"New: {src_info_json}")
189
+ # info_json.save()
190
+
191
+ all_numbers_already_done: set[int] = set(
192
+ info_json.fetch_all_finished_part_numbers()
193
+ )
194
+
195
+ first_part_number = part_infos[0].part_number
196
+ last_part_number = part_infos[-1].part_number
197
+
198
+ verbose_print(
199
+ f"all_numbers_already_done: {collapse_runs(sorted(list(all_numbers_already_done)))}"
200
+ )
201
+
202
+ filtered_part_infos: list[PartInfo] = []
203
+ for part_info in part_infos:
204
+ if part_info.part_number not in all_numbers_already_done:
205
+ filtered_part_infos.append(part_info)
206
+ part_infos = filtered_part_infos
207
+ remaining_part_numbers: list[int] = [p.part_number for p in part_infos]
208
+ verbose_print(f"remaining_part_numbers: {collapse_runs(remaining_part_numbers)}")
209
+ num_remaining_to_upload = len(part_infos)
210
+ verbose_print(
211
+ f"num_remaining_to_upload: {num_remaining_to_upload} / {len(full_part_infos)}"
212
+ )
213
+
214
+ if num_remaining_to_upload == 0:
215
+ return None
216
+ chunk_size = SizeSuffix(part_infos[0].range.end - part_infos[0].range.start)
217
+
218
+ info_json.chunksize = chunk_size
219
+
220
+ info_json.first_part = first_part_number
221
+ info_json.last_part = last_part_number
222
+ info_json.save()
223
+
224
+ # We are now validated
225
+ info_json.load()
226
+ info_json.print()
227
+
228
+ print(info_json)
229
+
230
+ finished_tasks: list[UploadPart] = []
231
+ tmp_dir = str(Path("chunks") / random_str(12))
232
+
233
+ atexit.register(lambda: shutil.rmtree(tmp_dir, ignore_errors=True))
234
+
235
+ with self.serve_http(src_dir) as http_server:
236
+ tmpdir: Path = Path(tmp_dir)
237
+ write_semaphore = threading.Semaphore(threads)
238
+ with ThreadPoolExecutor(max_workers=threads) as upload_executor:
239
+ with ThreadPoolExecutor(max_workers=threads) as read_executor:
240
+ for part_info in part_infos:
241
+ part_number: int = part_info.part_number
242
+ range: Range = part_info.range
243
+ offset: SizeSuffix = SizeSuffix(range.start)
244
+ length: SizeSuffix = SizeSuffix(range.end - range.start)
245
+ end = offset + length
246
+ suffix = _gen_name(part_number, offset, end)
247
+ part_dst = f"{dst_dir}/{suffix}"
248
+
249
+ def _read_task(
250
+ src_name=src_name,
251
+ http_server=http_server,
252
+ tmpdir=tmpdir,
253
+ offset=offset,
254
+ length=length,
255
+ part_dst=part_dst,
256
+ ) -> UploadPart:
257
+ return read_task(
258
+ src_name=src_name,
259
+ http_server=http_server,
260
+ tmpdir=tmpdir,
261
+ offset=offset,
262
+ length=length,
263
+ part_dst=part_dst,
264
+ )
265
+
266
+ read_fut: Future[UploadPart] = read_executor.submit(_read_task)
267
+
268
+ # Releases the semaphore when the write task is done
269
+ def queue_upload_task(
270
+ read_fut=read_fut,
271
+ ) -> None:
272
+ upload_part = read_fut.result()
273
+ upload_fut: Future[UploadPart] = upload_executor.submit(
274
+ upload_task, self, upload_part
275
+ )
276
+ # SEMAPHORE RELEASE!!!
277
+ upload_fut.add_done_callback(
278
+ lambda _: write_semaphore.release()
279
+ )
280
+ upload_fut.add_done_callback(
281
+ lambda fut: finished_tasks.append(fut.result())
282
+ )
283
+
284
+ read_fut.add_done_callback(queue_upload_task)
285
+ # SEMAPHORE ACQUIRE!!!
286
+ # If we are back filled on the writers, then we stall.
287
+ write_semaphore.acquire()
288
+
289
+ exceptions: list[Exception] = [
290
+ t.exception for t in finished_tasks if t.exception is not None
291
+ ]
292
+
293
+ shutil.rmtree(tmp_dir, ignore_errors=True)
294
+
295
+ if len(exceptions) > 0:
296
+ return Exception(f"Failed to copy parts: {exceptions}", exceptions)
297
+
298
+ finished_parts: list[int] = info_json.fetch_all_finished_part_numbers()
299
+ print(f"finished_names: {finished_parts}")
300
+
301
+ diff_set = set(all_part_numbers).symmetric_difference(set(finished_parts))
302
+ all_part_numbers_done = len(diff_set) == 0
303
+ print(f"all_part_numbers_done: {all_part_numbers_done}")
304
+ return None