rclone-api 1.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +951 -0
- rclone_api/assets/example.txt +1 -0
- rclone_api/cli.py +15 -0
- rclone_api/cmd/analyze.py +51 -0
- rclone_api/cmd/copy_large_s3.py +111 -0
- rclone_api/cmd/copy_large_s3_finish.py +81 -0
- rclone_api/cmd/list_files.py +27 -0
- rclone_api/cmd/save_to_db.py +77 -0
- rclone_api/completed_process.py +60 -0
- rclone_api/config.py +87 -0
- rclone_api/convert.py +31 -0
- rclone_api/db/__init__.py +3 -0
- rclone_api/db/db.py +277 -0
- rclone_api/db/models.py +57 -0
- rclone_api/deprecated.py +24 -0
- rclone_api/detail/copy_file_parts_resumable.py +42 -0
- rclone_api/detail/walk.py +116 -0
- rclone_api/diff.py +164 -0
- rclone_api/dir.py +113 -0
- rclone_api/dir_listing.py +66 -0
- rclone_api/exec.py +40 -0
- rclone_api/experimental/flags.py +89 -0
- rclone_api/experimental/flags_base.py +58 -0
- rclone_api/file.py +205 -0
- rclone_api/file_item.py +68 -0
- rclone_api/file_part.py +198 -0
- rclone_api/file_stream.py +52 -0
- rclone_api/filelist.py +30 -0
- rclone_api/group_files.py +256 -0
- rclone_api/http_server.py +244 -0
- rclone_api/install.py +95 -0
- rclone_api/log.py +44 -0
- rclone_api/mount.py +55 -0
- rclone_api/mount_util.py +247 -0
- rclone_api/process.py +187 -0
- rclone_api/rclone_impl.py +1285 -0
- rclone_api/remote.py +21 -0
- rclone_api/rpath.py +102 -0
- rclone_api/s3/api.py +109 -0
- rclone_api/s3/basic_ops.py +61 -0
- rclone_api/s3/chunk_task.py +187 -0
- rclone_api/s3/create.py +107 -0
- rclone_api/s3/multipart/file_info.py +7 -0
- rclone_api/s3/multipart/finished_piece.py +69 -0
- rclone_api/s3/multipart/info_json.py +239 -0
- rclone_api/s3/multipart/merge_state.py +147 -0
- rclone_api/s3/multipart/upload_info.py +62 -0
- rclone_api/s3/multipart/upload_parts_inline.py +356 -0
- rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
- rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
- rclone_api/s3/multipart/upload_state.py +165 -0
- rclone_api/s3/types.py +67 -0
- rclone_api/scan_missing_folders.py +153 -0
- rclone_api/types.py +402 -0
- rclone_api/util.py +324 -0
- rclone_api-1.5.8.dist-info/LICENSE +21 -0
- rclone_api-1.5.8.dist-info/METADATA +969 -0
- rclone_api-1.5.8.dist-info/RECORD +61 -0
- rclone_api-1.5.8.dist-info/WHEEL +5 -0
- rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
- rclone_api-1.5.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,356 @@
|
|
1
|
+
import _thread
|
2
|
+
import os
|
3
|
+
import traceback
|
4
|
+
import warnings
|
5
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
6
|
+
from pathlib import Path
|
7
|
+
from queue import Queue
|
8
|
+
from threading import Event, Thread
|
9
|
+
from typing import Any, Callable
|
10
|
+
|
11
|
+
from botocore.client import BaseClient
|
12
|
+
|
13
|
+
from rclone_api.file_part import FilePart
|
14
|
+
from rclone_api.s3.chunk_task import file_chunker
|
15
|
+
from rclone_api.s3.multipart.file_info import S3FileInfo
|
16
|
+
from rclone_api.s3.multipart.finished_piece import FinishedPiece
|
17
|
+
from rclone_api.s3.multipart.upload_info import UploadInfo
|
18
|
+
from rclone_api.s3.multipart.upload_state import UploadState
|
19
|
+
from rclone_api.s3.types import MultiUploadResult
|
20
|
+
from rclone_api.types import EndOfStream
|
21
|
+
from rclone_api.util import locked_print
|
22
|
+
|
23
|
+
_MIN_UPLOAD_CHUNK_SIZE = 5 * 1024 * 1024 # 5MB
|
24
|
+
|
25
|
+
|
26
|
+
def upload_task(
|
27
|
+
info: UploadInfo,
|
28
|
+
chunk: FilePart,
|
29
|
+
part_number: int,
|
30
|
+
retries: int,
|
31
|
+
) -> FinishedPiece:
|
32
|
+
file_or_err: Path | Exception = chunk.get_file()
|
33
|
+
if isinstance(file_or_err, Exception):
|
34
|
+
raise file_or_err
|
35
|
+
file: Path = file_or_err
|
36
|
+
size = os.path.getsize(file)
|
37
|
+
retries = retries + 1 # Add one for the initial attempt
|
38
|
+
for retry in range(retries):
|
39
|
+
try:
|
40
|
+
if retry > 0:
|
41
|
+
locked_print(f"Retrying part {part_number} for {info.src_file_path}")
|
42
|
+
locked_print(
|
43
|
+
f"Uploading part {part_number} for {info.src_file_path} of size {size}"
|
44
|
+
)
|
45
|
+
|
46
|
+
with open(file, "rb") as f:
|
47
|
+
part = info.s3_client.upload_part(
|
48
|
+
Bucket=info.bucket_name,
|
49
|
+
Key=info.object_name,
|
50
|
+
PartNumber=part_number,
|
51
|
+
UploadId=info.upload_id,
|
52
|
+
Body=f,
|
53
|
+
)
|
54
|
+
out: FinishedPiece = FinishedPiece(
|
55
|
+
etag=part["ETag"], part_number=part_number
|
56
|
+
)
|
57
|
+
chunk.dispose()
|
58
|
+
return out
|
59
|
+
except Exception as e:
|
60
|
+
if retry == retries - 1:
|
61
|
+
locked_print(f"Error uploading part {part_number}: {e}")
|
62
|
+
chunk.dispose()
|
63
|
+
raise e
|
64
|
+
else:
|
65
|
+
locked_print(f"Error uploading part {part_number}: {e}, retrying")
|
66
|
+
continue
|
67
|
+
raise Exception("Should not reach here")
|
68
|
+
|
69
|
+
|
70
|
+
def handle_upload(
|
71
|
+
upload_info: UploadInfo, fp: FilePart | EndOfStream
|
72
|
+
) -> FinishedPiece | Exception | EndOfStream:
|
73
|
+
if isinstance(fp, EndOfStream):
|
74
|
+
eos: EndOfStream = fp
|
75
|
+
return eos
|
76
|
+
part_number: int | None = None
|
77
|
+
try:
|
78
|
+
assert isinstance(fp.extra, S3FileInfo)
|
79
|
+
extra: S3FileInfo = fp.extra
|
80
|
+
part_number = extra.part_number
|
81
|
+
print(f"Handling upload for {part_number}, size {fp.size}")
|
82
|
+
|
83
|
+
part: FinishedPiece = upload_task(
|
84
|
+
info=upload_info,
|
85
|
+
chunk=fp,
|
86
|
+
part_number=part_number,
|
87
|
+
retries=upload_info.retries,
|
88
|
+
)
|
89
|
+
return part
|
90
|
+
except Exception as e:
|
91
|
+
stacktrace = traceback.format_exc()
|
92
|
+
msg = f"Error uploading part {part_number}: {e}\n{stacktrace}"
|
93
|
+
warnings.warn(msg)
|
94
|
+
return e
|
95
|
+
finally:
|
96
|
+
fp.dispose()
|
97
|
+
|
98
|
+
|
99
|
+
def prepare_upload_file_multipart(
|
100
|
+
s3_client: BaseClient,
|
101
|
+
bucket_name: str,
|
102
|
+
file_path: Path,
|
103
|
+
file_size: int | None,
|
104
|
+
object_name: str,
|
105
|
+
chunk_size: int,
|
106
|
+
retries: int,
|
107
|
+
) -> UploadInfo:
|
108
|
+
"""Upload a file to the bucket using multipart upload with customizable chunk size."""
|
109
|
+
|
110
|
+
# Initiate multipart upload
|
111
|
+
locked_print(
|
112
|
+
f"Creating multipart upload for {file_path} to {bucket_name}/{object_name}"
|
113
|
+
)
|
114
|
+
mpu = s3_client.create_multipart_upload(Bucket=bucket_name, Key=object_name)
|
115
|
+
upload_id = mpu["UploadId"]
|
116
|
+
|
117
|
+
file_size = file_size if file_size is not None else os.path.getsize(file_path)
|
118
|
+
|
119
|
+
upload_info: UploadInfo = UploadInfo(
|
120
|
+
s3_client=s3_client,
|
121
|
+
bucket_name=bucket_name,
|
122
|
+
object_name=object_name,
|
123
|
+
src_file_path=file_path,
|
124
|
+
upload_id=upload_id,
|
125
|
+
retries=retries,
|
126
|
+
chunk_size=chunk_size,
|
127
|
+
file_size=file_size,
|
128
|
+
)
|
129
|
+
return upload_info
|
130
|
+
|
131
|
+
|
132
|
+
def _abort_previous_upload(upload_state: UploadState) -> None:
|
133
|
+
if upload_state.upload_info.upload_id:
|
134
|
+
try:
|
135
|
+
upload_state.upload_info.s3_client.abort_multipart_upload(
|
136
|
+
Bucket=upload_state.upload_info.bucket_name,
|
137
|
+
Key=upload_state.upload_info.object_name,
|
138
|
+
UploadId=upload_state.upload_info.upload_id,
|
139
|
+
)
|
140
|
+
except Exception as e:
|
141
|
+
locked_print(f"Error aborting previous upload: {e}")
|
142
|
+
|
143
|
+
|
144
|
+
def upload_runner(
|
145
|
+
upload_state: UploadState,
|
146
|
+
upload_info: UploadInfo,
|
147
|
+
upload_threads: int,
|
148
|
+
queue_upload: Queue[FilePart | EndOfStream],
|
149
|
+
cancel_chunker_event: Event,
|
150
|
+
) -> None:
|
151
|
+
# import semaphre
|
152
|
+
import threading
|
153
|
+
|
154
|
+
semaphore = threading.Semaphore(upload_threads)
|
155
|
+
with ThreadPoolExecutor(max_workers=upload_threads) as executor:
|
156
|
+
try:
|
157
|
+
while True:
|
158
|
+
file_chunk: FilePart | EndOfStream = queue_upload.get()
|
159
|
+
if isinstance(file_chunk, EndOfStream):
|
160
|
+
break
|
161
|
+
|
162
|
+
def task(upload_info=upload_info, file_chunk=file_chunk):
|
163
|
+
return handle_upload(upload_info, file_chunk)
|
164
|
+
|
165
|
+
semaphore.acquire()
|
166
|
+
|
167
|
+
fut = executor.submit(task)
|
168
|
+
|
169
|
+
def done_cb(fut=fut):
|
170
|
+
semaphore.release()
|
171
|
+
result = fut.result()
|
172
|
+
if isinstance(result, Exception):
|
173
|
+
warnings.warn(f"Error uploading part: {result}, skipping")
|
174
|
+
return
|
175
|
+
# upload_state.finished_parts.put(result)
|
176
|
+
upload_state.add_finished(result)
|
177
|
+
|
178
|
+
fut.add_done_callback(done_cb)
|
179
|
+
except Exception:
|
180
|
+
cancel_chunker_event.set()
|
181
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
182
|
+
raise
|
183
|
+
|
184
|
+
|
185
|
+
def upload_file_multipart(
|
186
|
+
s3_client: BaseClient,
|
187
|
+
chunk_fetcher: Callable[[int, int, Any], Future[FilePart]],
|
188
|
+
bucket_name: str,
|
189
|
+
file_path: Path,
|
190
|
+
file_size: int | None,
|
191
|
+
object_name: str,
|
192
|
+
resumable_info_path: Path | None,
|
193
|
+
chunk_size: int = 16 * 1024 * 1024, # Default chunk size is 16MB; can be overridden
|
194
|
+
upload_threads: int = 16,
|
195
|
+
retries: int = 20,
|
196
|
+
max_chunks_before_suspension: int | None = None,
|
197
|
+
abort_transfer_on_failure: bool = False,
|
198
|
+
) -> MultiUploadResult:
|
199
|
+
"""Upload a file to the bucket using multipart upload with customizable chunk size."""
|
200
|
+
file_size = file_size if file_size is not None else os.path.getsize(str(file_path))
|
201
|
+
if chunk_size < _MIN_UPLOAD_CHUNK_SIZE:
|
202
|
+
raise ValueError(
|
203
|
+
f"Chunk size {chunk_size} is less than minimum upload chunk size {_MIN_UPLOAD_CHUNK_SIZE}"
|
204
|
+
)
|
205
|
+
|
206
|
+
def get_upload_state() -> UploadState | None:
|
207
|
+
if resumable_info_path is None:
|
208
|
+
locked_print(f"No resumable info path provided for {file_path}")
|
209
|
+
return None
|
210
|
+
if not resumable_info_path.exists():
|
211
|
+
locked_print(
|
212
|
+
f"Resumable info path {resumable_info_path} does not exist for {file_path}"
|
213
|
+
)
|
214
|
+
return None
|
215
|
+
upload_state = UploadState.load(s3_client=s3_client, path=resumable_info_path)
|
216
|
+
return upload_state
|
217
|
+
|
218
|
+
def make_new_state() -> UploadState:
|
219
|
+
locked_print(f"Creating new upload state for {file_path}")
|
220
|
+
upload_info = prepare_upload_file_multipart(
|
221
|
+
s3_client=s3_client,
|
222
|
+
bucket_name=bucket_name,
|
223
|
+
file_path=file_path,
|
224
|
+
file_size=file_size,
|
225
|
+
object_name=object_name,
|
226
|
+
chunk_size=chunk_size,
|
227
|
+
retries=retries,
|
228
|
+
)
|
229
|
+
upload_state = UploadState(
|
230
|
+
upload_info=upload_info,
|
231
|
+
parts=[],
|
232
|
+
peristant=resumable_info_path,
|
233
|
+
)
|
234
|
+
return upload_state
|
235
|
+
|
236
|
+
work_que_max = 1
|
237
|
+
|
238
|
+
new_state = make_new_state()
|
239
|
+
loaded_state = get_upload_state()
|
240
|
+
|
241
|
+
if loaded_state is None:
|
242
|
+
upload_state = new_state
|
243
|
+
else:
|
244
|
+
# if the file size has changed, we cannot resume
|
245
|
+
if (
|
246
|
+
loaded_state.upload_info.fingerprint()
|
247
|
+
!= new_state.upload_info.fingerprint()
|
248
|
+
):
|
249
|
+
locked_print(
|
250
|
+
f"Cannot resume upload: file size changed, starting over for {file_path}"
|
251
|
+
)
|
252
|
+
_abort_previous_upload(loaded_state)
|
253
|
+
upload_state = new_state
|
254
|
+
else:
|
255
|
+
upload_state = loaded_state
|
256
|
+
|
257
|
+
try:
|
258
|
+
upload_state.update_source_file(file_path, file_size)
|
259
|
+
except ValueError as e:
|
260
|
+
locked_print(f"Cannot resume upload: {e}, size changed, starting over")
|
261
|
+
_abort_previous_upload(upload_state)
|
262
|
+
upload_state = make_new_state()
|
263
|
+
upload_state.save()
|
264
|
+
if upload_state.is_done():
|
265
|
+
return MultiUploadResult.ALREADY_DONE
|
266
|
+
finished = upload_state.finished()
|
267
|
+
if finished > 0:
|
268
|
+
locked_print(
|
269
|
+
f"Resuming upload for {file_path}, {finished} parts already uploaded"
|
270
|
+
)
|
271
|
+
started_new_upload = finished == 0
|
272
|
+
upload_info = upload_state.upload_info
|
273
|
+
|
274
|
+
queue_upload: Queue[FilePart | EndOfStream] = Queue(work_que_max)
|
275
|
+
chunker_errors: Queue[Exception] = Queue()
|
276
|
+
cancel_chunker_event = Event()
|
277
|
+
|
278
|
+
def chunker_task(
|
279
|
+
upload_state=upload_state,
|
280
|
+
chunk_fetcher=chunk_fetcher,
|
281
|
+
queue_upload=queue_upload,
|
282
|
+
max_chunks=max_chunks_before_suspension,
|
283
|
+
cancel_signal=cancel_chunker_event,
|
284
|
+
queue_errors=chunker_errors,
|
285
|
+
) -> None:
|
286
|
+
try:
|
287
|
+
file_chunker(
|
288
|
+
upload_state=upload_state,
|
289
|
+
fetcher=chunk_fetcher,
|
290
|
+
queue_upload=queue_upload,
|
291
|
+
max_chunks=max_chunks,
|
292
|
+
cancel_signal=cancel_signal,
|
293
|
+
)
|
294
|
+
except Exception as e:
|
295
|
+
queue_errors.put(e)
|
296
|
+
_thread.interrupt_main()
|
297
|
+
raise
|
298
|
+
print("#########################################")
|
299
|
+
print("# CHUNKER TASK COMPLETED")
|
300
|
+
print("#########################################")
|
301
|
+
|
302
|
+
try:
|
303
|
+
thread_chunker = Thread(target=chunker_task, daemon=True)
|
304
|
+
thread_chunker.start()
|
305
|
+
upload_runner(
|
306
|
+
upload_state=upload_state,
|
307
|
+
upload_info=upload_info,
|
308
|
+
upload_threads=upload_threads,
|
309
|
+
queue_upload=queue_upload,
|
310
|
+
cancel_chunker_event=cancel_chunker_event,
|
311
|
+
)
|
312
|
+
# upload_state.finished_parts.put(None) # Signal the end of the queue
|
313
|
+
upload_state.add_finished(EndOfStream())
|
314
|
+
thread_chunker.join()
|
315
|
+
|
316
|
+
if not chunker_errors.empty():
|
317
|
+
raise chunker_errors.get()
|
318
|
+
if not upload_state.is_done():
|
319
|
+
upload_state.save()
|
320
|
+
return MultiUploadResult.SUSPENDED
|
321
|
+
######################## COMPLETE UPLOAD #######################
|
322
|
+
# Final part now is to complete the upload
|
323
|
+
msg = "\n########################################"
|
324
|
+
msg += f"# Upload complete, sorting {len(upload_state.parts)} parts to complete upload"
|
325
|
+
msg += "########################################\n"
|
326
|
+
locked_print(msg)
|
327
|
+
parts: list[FinishedPiece] = [
|
328
|
+
p for p in upload_state.parts if not isinstance(p, EndOfStream)
|
329
|
+
]
|
330
|
+
locked_print(f"Upload complete, sorting {len(parts)} parts to complete upload")
|
331
|
+
parts.sort(key=lambda x: x.part_number) # Some backends need this.
|
332
|
+
parts_s3: list[dict] = [
|
333
|
+
{"ETag": p.etag, "PartNumber": p.part_number} for p in parts
|
334
|
+
]
|
335
|
+
locked_print(f"Sending multi part completion message for {file_path}")
|
336
|
+
s3_client.complete_multipart_upload(
|
337
|
+
Bucket=bucket_name,
|
338
|
+
Key=object_name,
|
339
|
+
UploadId=upload_info.upload_id,
|
340
|
+
MultipartUpload={"Parts": parts_s3},
|
341
|
+
)
|
342
|
+
locked_print(
|
343
|
+
f"Multipart upload completed: {file_path} to {bucket_name}/{object_name}"
|
344
|
+
)
|
345
|
+
except Exception:
|
346
|
+
if upload_info.upload_id and abort_transfer_on_failure:
|
347
|
+
try:
|
348
|
+
s3_client.abort_multipart_upload(
|
349
|
+
Bucket=bucket_name, Key=object_name, UploadId=upload_info.upload_id
|
350
|
+
)
|
351
|
+
except Exception:
|
352
|
+
pass
|
353
|
+
raise
|
354
|
+
if started_new_upload:
|
355
|
+
return MultiUploadResult.UPLOADED_FRESH
|
356
|
+
return MultiUploadResult.UPLOADED_RESUME
|
@@ -0,0 +1,304 @@
|
|
1
|
+
import _thread
|
2
|
+
import atexit
|
3
|
+
import os
|
4
|
+
import shutil
|
5
|
+
import threading
|
6
|
+
import warnings
|
7
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from pathlib import Path
|
10
|
+
|
11
|
+
from rclone_api.http_server import HttpServer
|
12
|
+
from rclone_api.rclone_impl import RcloneImpl
|
13
|
+
from rclone_api.s3.multipart.info_json import InfoJson
|
14
|
+
from rclone_api.types import (
|
15
|
+
PartInfo,
|
16
|
+
Range,
|
17
|
+
SizeSuffix,
|
18
|
+
)
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class UploadPart:
|
23
|
+
chunk: Path
|
24
|
+
dst_part: str
|
25
|
+
exception: Exception | None = None
|
26
|
+
finished: bool = False
|
27
|
+
|
28
|
+
def dispose(self):
|
29
|
+
try:
|
30
|
+
if self.chunk.exists():
|
31
|
+
self.chunk.unlink()
|
32
|
+
self.finished = True
|
33
|
+
except Exception as e:
|
34
|
+
warnings.warn(f"Failed to delete file {self.chunk}: {e}")
|
35
|
+
|
36
|
+
def __del__(self):
|
37
|
+
self.dispose()
|
38
|
+
|
39
|
+
|
40
|
+
def _gen_name(part_number: int, offset: SizeSuffix, end: SizeSuffix) -> str:
|
41
|
+
return f"part.{part_number:05d}_{offset.as_int()}-{end.as_int()}"
|
42
|
+
|
43
|
+
|
44
|
+
def upload_task(self: RcloneImpl, upload_part: UploadPart) -> UploadPart:
|
45
|
+
try:
|
46
|
+
if upload_part.exception is not None:
|
47
|
+
return upload_part
|
48
|
+
# print(f"Uploading {upload_part.chunk} to {upload_part.dst_part}")
|
49
|
+
msg = "\n#########################################\n"
|
50
|
+
msg += f"# Uploading {upload_part.chunk} to {upload_part.dst_part}\n"
|
51
|
+
msg += "#########################################\n"
|
52
|
+
print(msg)
|
53
|
+
self.copy_to(upload_part.chunk.as_posix(), upload_part.dst_part)
|
54
|
+
return upload_part
|
55
|
+
except Exception as e:
|
56
|
+
upload_part.exception = e
|
57
|
+
return upload_part
|
58
|
+
finally:
|
59
|
+
upload_part.dispose()
|
60
|
+
|
61
|
+
|
62
|
+
def read_task(
|
63
|
+
http_server: HttpServer,
|
64
|
+
src_name: str,
|
65
|
+
tmpdir: Path,
|
66
|
+
offset: SizeSuffix,
|
67
|
+
length: SizeSuffix,
|
68
|
+
part_dst: str,
|
69
|
+
) -> UploadPart:
|
70
|
+
outchunk: Path = tmpdir / f"{offset.as_int()}-{(offset + length).as_int()}.chunk"
|
71
|
+
range = Range(offset.as_int(), (offset + length).as_int())
|
72
|
+
|
73
|
+
try:
|
74
|
+
err = http_server.download(
|
75
|
+
path=src_name,
|
76
|
+
range=range,
|
77
|
+
dst=outchunk,
|
78
|
+
)
|
79
|
+
if isinstance(err, Exception):
|
80
|
+
out = UploadPart(chunk=outchunk, dst_part="", exception=err)
|
81
|
+
out.dispose()
|
82
|
+
return out
|
83
|
+
return UploadPart(chunk=outchunk, dst_part=part_dst)
|
84
|
+
except KeyboardInterrupt as ke:
|
85
|
+
_thread.interrupt_main()
|
86
|
+
raise ke
|
87
|
+
except SystemExit as se:
|
88
|
+
_thread.interrupt_main()
|
89
|
+
raise se
|
90
|
+
except Exception as e:
|
91
|
+
return UploadPart(chunk=outchunk, dst_part=part_dst, exception=e)
|
92
|
+
|
93
|
+
|
94
|
+
def collapse_runs(numbers: list[int]) -> list[str]:
|
95
|
+
if not numbers:
|
96
|
+
return []
|
97
|
+
|
98
|
+
runs = []
|
99
|
+
start = numbers[0]
|
100
|
+
prev = numbers[0]
|
101
|
+
|
102
|
+
for num in numbers[1:]:
|
103
|
+
if num == prev + 1:
|
104
|
+
# Continue current run
|
105
|
+
prev = num
|
106
|
+
else:
|
107
|
+
# End current run
|
108
|
+
if start == prev:
|
109
|
+
runs.append(str(start))
|
110
|
+
else:
|
111
|
+
runs.append(f"{start}-{prev}")
|
112
|
+
start = num
|
113
|
+
prev = num
|
114
|
+
|
115
|
+
# Append the final run
|
116
|
+
if start == prev:
|
117
|
+
runs.append(str(start))
|
118
|
+
else:
|
119
|
+
runs.append(f"{start}-{prev}")
|
120
|
+
|
121
|
+
return runs
|
122
|
+
|
123
|
+
|
124
|
+
_MIN_PART_UPLOAD_SIZE = SizeSuffix("5MB")
|
125
|
+
|
126
|
+
|
127
|
+
def _check_part_size(parts: list[PartInfo]) -> Exception | None:
|
128
|
+
if len(parts) == 0:
|
129
|
+
return Exception("No parts to upload")
|
130
|
+
part = parts[0]
|
131
|
+
chunk = part.range.end - part.range.start
|
132
|
+
if chunk < _MIN_PART_UPLOAD_SIZE:
|
133
|
+
return Exception(
|
134
|
+
f"Part size {chunk} is too small to upload. Minimum size for server side merge is {_MIN_PART_UPLOAD_SIZE}"
|
135
|
+
)
|
136
|
+
return None
|
137
|
+
|
138
|
+
|
139
|
+
def upload_parts_resumable(
|
140
|
+
self: RcloneImpl,
|
141
|
+
src: str, # src:/Bucket/path/myfile.large.zst
|
142
|
+
dst_dir: str, # dst:/Bucket/path/myfile.large.zst-parts/
|
143
|
+
part_infos: list[PartInfo] | None = None,
|
144
|
+
threads: int = 1,
|
145
|
+
verbose: bool | None = None,
|
146
|
+
) -> Exception | None:
|
147
|
+
"""Copy parts of a file from source to destination."""
|
148
|
+
from rclone_api.util import random_str
|
149
|
+
|
150
|
+
def verbose_print(*args, **kwargs):
|
151
|
+
if verbose:
|
152
|
+
print(*args, **kwargs)
|
153
|
+
|
154
|
+
if dst_dir.endswith("/"):
|
155
|
+
dst_dir = dst_dir[:-1]
|
156
|
+
src_size = self.size_file(src)
|
157
|
+
|
158
|
+
if isinstance(src_size, Exception):
|
159
|
+
return src_size
|
160
|
+
|
161
|
+
part_info: PartInfo
|
162
|
+
src_dir = os.path.dirname(src)
|
163
|
+
src_name = os.path.basename(src)
|
164
|
+
http_server: HttpServer
|
165
|
+
|
166
|
+
full_part_infos: list[PartInfo] | Exception = PartInfo.split_parts(
|
167
|
+
src_size, SizeSuffix("96MB")
|
168
|
+
)
|
169
|
+
if isinstance(full_part_infos, Exception):
|
170
|
+
return full_part_infos
|
171
|
+
assert isinstance(full_part_infos, list)
|
172
|
+
|
173
|
+
if part_infos is None:
|
174
|
+
src_size = self.size_file(src)
|
175
|
+
if isinstance(src_size, Exception):
|
176
|
+
return src_size
|
177
|
+
part_infos = full_part_infos.copy()
|
178
|
+
|
179
|
+
err = _check_part_size(part_infos)
|
180
|
+
if err:
|
181
|
+
return err
|
182
|
+
|
183
|
+
all_part_numbers: list[int] = [p.part_number for p in part_infos]
|
184
|
+
src_info_json = f"{dst_dir}/info.json"
|
185
|
+
info_json = InfoJson(self, src, src_info_json)
|
186
|
+
|
187
|
+
if not info_json.load():
|
188
|
+
verbose_print(f"New: {src_info_json}")
|
189
|
+
# info_json.save()
|
190
|
+
|
191
|
+
all_numbers_already_done: set[int] = set(
|
192
|
+
info_json.fetch_all_finished_part_numbers()
|
193
|
+
)
|
194
|
+
|
195
|
+
first_part_number = part_infos[0].part_number
|
196
|
+
last_part_number = part_infos[-1].part_number
|
197
|
+
|
198
|
+
verbose_print(
|
199
|
+
f"all_numbers_already_done: {collapse_runs(sorted(list(all_numbers_already_done)))}"
|
200
|
+
)
|
201
|
+
|
202
|
+
filtered_part_infos: list[PartInfo] = []
|
203
|
+
for part_info in part_infos:
|
204
|
+
if part_info.part_number not in all_numbers_already_done:
|
205
|
+
filtered_part_infos.append(part_info)
|
206
|
+
part_infos = filtered_part_infos
|
207
|
+
remaining_part_numbers: list[int] = [p.part_number for p in part_infos]
|
208
|
+
verbose_print(f"remaining_part_numbers: {collapse_runs(remaining_part_numbers)}")
|
209
|
+
num_remaining_to_upload = len(part_infos)
|
210
|
+
verbose_print(
|
211
|
+
f"num_remaining_to_upload: {num_remaining_to_upload} / {len(full_part_infos)}"
|
212
|
+
)
|
213
|
+
|
214
|
+
if num_remaining_to_upload == 0:
|
215
|
+
return None
|
216
|
+
chunk_size = SizeSuffix(part_infos[0].range.end - part_infos[0].range.start)
|
217
|
+
|
218
|
+
info_json.chunksize = chunk_size
|
219
|
+
|
220
|
+
info_json.first_part = first_part_number
|
221
|
+
info_json.last_part = last_part_number
|
222
|
+
info_json.save()
|
223
|
+
|
224
|
+
# We are now validated
|
225
|
+
info_json.load()
|
226
|
+
info_json.print()
|
227
|
+
|
228
|
+
print(info_json)
|
229
|
+
|
230
|
+
finished_tasks: list[UploadPart] = []
|
231
|
+
tmp_dir = str(Path("chunks") / random_str(12))
|
232
|
+
|
233
|
+
atexit.register(lambda: shutil.rmtree(tmp_dir, ignore_errors=True))
|
234
|
+
|
235
|
+
with self.serve_http(src_dir) as http_server:
|
236
|
+
tmpdir: Path = Path(tmp_dir)
|
237
|
+
write_semaphore = threading.Semaphore(threads)
|
238
|
+
with ThreadPoolExecutor(max_workers=threads) as upload_executor:
|
239
|
+
with ThreadPoolExecutor(max_workers=threads) as read_executor:
|
240
|
+
for part_info in part_infos:
|
241
|
+
part_number: int = part_info.part_number
|
242
|
+
range: Range = part_info.range
|
243
|
+
offset: SizeSuffix = SizeSuffix(range.start)
|
244
|
+
length: SizeSuffix = SizeSuffix(range.end - range.start)
|
245
|
+
end = offset + length
|
246
|
+
suffix = _gen_name(part_number, offset, end)
|
247
|
+
part_dst = f"{dst_dir}/{suffix}"
|
248
|
+
|
249
|
+
def _read_task(
|
250
|
+
src_name=src_name,
|
251
|
+
http_server=http_server,
|
252
|
+
tmpdir=tmpdir,
|
253
|
+
offset=offset,
|
254
|
+
length=length,
|
255
|
+
part_dst=part_dst,
|
256
|
+
) -> UploadPart:
|
257
|
+
return read_task(
|
258
|
+
src_name=src_name,
|
259
|
+
http_server=http_server,
|
260
|
+
tmpdir=tmpdir,
|
261
|
+
offset=offset,
|
262
|
+
length=length,
|
263
|
+
part_dst=part_dst,
|
264
|
+
)
|
265
|
+
|
266
|
+
read_fut: Future[UploadPart] = read_executor.submit(_read_task)
|
267
|
+
|
268
|
+
# Releases the semaphore when the write task is done
|
269
|
+
def queue_upload_task(
|
270
|
+
read_fut=read_fut,
|
271
|
+
) -> None:
|
272
|
+
upload_part = read_fut.result()
|
273
|
+
upload_fut: Future[UploadPart] = upload_executor.submit(
|
274
|
+
upload_task, self, upload_part
|
275
|
+
)
|
276
|
+
# SEMAPHORE RELEASE!!!
|
277
|
+
upload_fut.add_done_callback(
|
278
|
+
lambda _: write_semaphore.release()
|
279
|
+
)
|
280
|
+
upload_fut.add_done_callback(
|
281
|
+
lambda fut: finished_tasks.append(fut.result())
|
282
|
+
)
|
283
|
+
|
284
|
+
read_fut.add_done_callback(queue_upload_task)
|
285
|
+
# SEMAPHORE ACQUIRE!!!
|
286
|
+
# If we are back filled on the writers, then we stall.
|
287
|
+
write_semaphore.acquire()
|
288
|
+
|
289
|
+
exceptions: list[Exception] = [
|
290
|
+
t.exception for t in finished_tasks if t.exception is not None
|
291
|
+
]
|
292
|
+
|
293
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
294
|
+
|
295
|
+
if len(exceptions) > 0:
|
296
|
+
return Exception(f"Failed to copy parts: {exceptions}", exceptions)
|
297
|
+
|
298
|
+
finished_parts: list[int] = info_json.fetch_all_finished_part_numbers()
|
299
|
+
print(f"finished_names: {finished_parts}")
|
300
|
+
|
301
|
+
diff_set = set(all_part_numbers).symmetric_difference(set(finished_parts))
|
302
|
+
all_part_numbers_done = len(diff_set) == 0
|
303
|
+
print(f"all_part_numbers_done: {all_part_numbers_done}")
|
304
|
+
return None
|