rclone-api 1.4.15__py2.py3-none-any.whl → 1.4.19__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/cmd/copy_large_s3_finish.py +11 -108
- rclone_api/detail/copy_file_parts.py +4 -1
- rclone_api/process.py +65 -40
- rclone_api/s3/merge_state.py +147 -0
- rclone_api/s3/multipart/finished_piece.py +15 -5
- rclone_api/s3/s3_multipart_uploader_by_copy.py +400 -151
- rclone_api/util.py +84 -23
- {rclone_api-1.4.15.dist-info → rclone_api-1.4.19.dist-info}/METADATA +1 -1
- {rclone_api-1.4.15.dist-info → rclone_api-1.4.19.dist-info}/RECORD +13 -12
- {rclone_api-1.4.15.dist-info → rclone_api-1.4.19.dist-info}/LICENSE +0 -0
- {rclone_api-1.4.15.dist-info → rclone_api-1.4.19.dist-info}/WHEEL +0 -0
- {rclone_api-1.4.15.dist-info → rclone_api-1.4.19.dist-info}/entry_points.txt +0 -0
- {rclone_api-1.4.15.dist-info → rclone_api-1.4.19.dist-info}/top_level.txt +0 -0
@@ -6,38 +6,39 @@ This module provides functionality for S3 multipart uploads, including copying p
|
|
6
6
|
from existing S3 objects using upload_part_copy.
|
7
7
|
"""
|
8
8
|
|
9
|
+
import json
|
10
|
+
import os
|
11
|
+
import time
|
12
|
+
import warnings
|
9
13
|
from concurrent.futures import Future, ThreadPoolExecutor
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
|
14
|
-
|
15
|
-
from
|
16
|
-
|
14
|
+
from queue import Queue
|
15
|
+
from threading import Semaphore, Thread
|
16
|
+
from typing import Callable
|
17
|
+
|
18
|
+
from rclone_api.detail.copy_file_parts import InfoJson
|
19
|
+
from rclone_api.rclone_impl import RcloneImpl
|
20
|
+
from rclone_api.s3.create import (
|
21
|
+
BaseClient,
|
22
|
+
S3Config,
|
23
|
+
create_s3_client,
|
24
|
+
)
|
25
|
+
from rclone_api.s3.merge_state import MergeState, Part
|
17
26
|
from rclone_api.s3.multipart.finished_piece import FinishedPiece
|
27
|
+
from rclone_api.types import EndOfStream
|
18
28
|
from rclone_api.util import locked_print
|
19
29
|
|
30
|
+
DEFAULT_MAX_WORKERS = 5 # Backblaze can do 10 with exponential backoff, so let's try 5
|
20
31
|
|
21
|
-
|
22
|
-
|
23
|
-
"""Simplified upload information for multipart uploads."""
|
24
|
-
|
25
|
-
s3_client: BaseClient
|
26
|
-
bucket_name: str
|
27
|
-
object_name: str
|
28
|
-
upload_id: str
|
29
|
-
chunk_size: int
|
30
|
-
retries: int
|
31
|
-
file_size: Optional[int] = None
|
32
|
-
src_file_path: Optional[Path] = None
|
32
|
+
_TIMEOUT_READ = 900
|
33
|
+
_TIMEOUT_CONNECTION = 900
|
33
34
|
|
34
35
|
|
35
|
-
def
|
36
|
-
|
36
|
+
def _upload_part_copy_task(
|
37
|
+
s3_client: BaseClient,
|
38
|
+
state: MergeState,
|
37
39
|
source_bucket: str,
|
38
40
|
source_key: str,
|
39
41
|
part_number: int,
|
40
|
-
retries: int = 3,
|
41
42
|
) -> FinishedPiece | Exception:
|
42
43
|
"""
|
43
44
|
Upload a part by copying from an existing S3 object.
|
@@ -56,51 +57,59 @@ def upload_part_copy_task(
|
|
56
57
|
copy_source = {"Bucket": source_bucket, "Key": source_key}
|
57
58
|
|
58
59
|
# from botocore.exceptions import NoSuchKey
|
59
|
-
|
60
|
-
retries =
|
60
|
+
default_retries = 9
|
61
|
+
retries = default_retries + 1 # Add one for the initial attempt
|
61
62
|
for retry in range(retries):
|
62
63
|
params: dict = {}
|
63
64
|
try:
|
64
65
|
if retry > 0:
|
65
|
-
locked_print(f"Retrying part copy {part_number} for {
|
66
|
+
locked_print(f"Retrying part copy {part_number} for {state.dst_key}")
|
66
67
|
|
67
68
|
locked_print(
|
68
|
-
f"Copying part {part_number} for {
|
69
|
+
f"Copying part {part_number} for {state.dst_key} from {source_bucket}/{source_key}"
|
69
70
|
)
|
70
71
|
|
71
72
|
# Prepare the upload_part_copy parameters
|
72
73
|
params = {
|
73
|
-
"Bucket":
|
74
|
+
"Bucket": state.bucket,
|
74
75
|
"CopySource": copy_source,
|
75
|
-
"Key":
|
76
|
+
"Key": state.dst_key,
|
76
77
|
"PartNumber": part_number,
|
77
|
-
"UploadId":
|
78
|
+
"UploadId": state.upload_id,
|
78
79
|
}
|
79
80
|
|
80
81
|
# Execute the copy operation
|
81
|
-
part =
|
82
|
+
part = s3_client.upload_part_copy(**params)
|
82
83
|
|
83
84
|
# Extract ETag from the response
|
84
85
|
etag = part["CopyPartResult"]["ETag"]
|
85
|
-
|
86
|
+
out = FinishedPiece(etag=etag, part_number=part_number)
|
87
|
+
locked_print(f"Finished part {part_number} for {state.dst_key}")
|
88
|
+
return out
|
86
89
|
|
87
90
|
except Exception as e:
|
88
|
-
msg =
|
89
|
-
|
91
|
+
msg = (
|
92
|
+
f"Error copying {copy_source} -> {state.dst_key}: {e}, params={params}"
|
93
|
+
)
|
94
|
+
if "An error occurred (InternalError)" in str(e):
|
95
|
+
locked_print(msg)
|
96
|
+
elif "NoSuchKey" in str(e):
|
90
97
|
locked_print(msg)
|
91
|
-
return e
|
92
98
|
if retry == retries - 1:
|
93
99
|
locked_print(msg)
|
94
100
|
return e
|
95
101
|
else:
|
96
102
|
locked_print(f"{msg}, retrying")
|
103
|
+
# sleep
|
104
|
+
sleep_time = 2**retry
|
105
|
+
locked_print(f"Sleeping for {sleep_time} seconds")
|
97
106
|
continue
|
98
107
|
|
99
108
|
return Exception("Should not reach here")
|
100
109
|
|
101
110
|
|
102
|
-
def
|
103
|
-
|
111
|
+
def _complete_multipart_upload_from_parts(
|
112
|
+
s3_client: BaseClient, state: MergeState, finished_parts: list[FinishedPiece]
|
104
113
|
) -> str:
|
105
114
|
"""
|
106
115
|
Complete a multipart upload using the provided parts.
|
@@ -113,105 +122,61 @@ def complete_multipart_upload_from_parts(
|
|
113
122
|
The URL of the completed object
|
114
123
|
"""
|
115
124
|
# Sort parts by part number to ensure correct order
|
116
|
-
|
117
|
-
|
118
|
-
# Prepare the parts list for the complete_multipart_upload call
|
119
|
-
multipart_parts = [
|
120
|
-
{"ETag": part.etag, "PartNumber": part.part_number} for part in parts
|
121
|
-
]
|
125
|
+
finished_parts.sort(key=lambda x: x.part_number)
|
126
|
+
multipart_parts = FinishedPiece.to_json_array(finished_parts)
|
122
127
|
|
123
128
|
# Complete the multipart upload
|
124
|
-
response =
|
125
|
-
Bucket=
|
126
|
-
Key=
|
127
|
-
UploadId=
|
129
|
+
response = s3_client.complete_multipart_upload(
|
130
|
+
Bucket=state.bucket,
|
131
|
+
Key=state.dst_key,
|
132
|
+
UploadId=state.upload_id,
|
128
133
|
MultipartUpload={"Parts": multipart_parts},
|
129
134
|
)
|
130
135
|
|
131
136
|
# Return the URL of the completed object
|
132
|
-
return response.get("Location", f"s3://{
|
137
|
+
return response.get("Location", f"s3://{state.bucket}/{state.dst_key}")
|
133
138
|
|
134
139
|
|
135
|
-
def
|
140
|
+
def _do_upload_task(
|
136
141
|
s3_client: BaseClient,
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
destination_key: str,
|
142
|
-
chunk_size: int, # 5MB default
|
143
|
-
max_workers: int = 100,
|
144
|
-
retries: int = 3,
|
145
|
-
) -> str:
|
146
|
-
"""
|
147
|
-
Finish a multipart upload by copying parts from existing S3 objects.
|
148
|
-
|
149
|
-
Args:
|
150
|
-
s3_client: Boto3 S3 client
|
151
|
-
source_bucket: Source bucket name
|
152
|
-
source_keys: List of source object keys to copy from
|
153
|
-
destination_bucket: Destination bucket name
|
154
|
-
destination_key: Destination object key
|
155
|
-
chunk_size: Size of each part in bytes
|
156
|
-
retries: Number of retry attempts
|
157
|
-
byte_ranges: Optional list of byte ranges corresponding to source_keys
|
158
|
-
|
159
|
-
Returns:
|
160
|
-
The URL of the completed object
|
161
|
-
"""
|
162
|
-
|
163
|
-
# Initiate multipart upload
|
164
|
-
locked_print(
|
165
|
-
f"Creating multipart upload for {destination_bucket}/{destination_key} from {len(parts)} source objects"
|
166
|
-
)
|
167
|
-
|
168
|
-
create_params: dict[str, str] = {
|
169
|
-
"Bucket": destination_bucket,
|
170
|
-
"Key": destination_key,
|
171
|
-
}
|
172
|
-
print(f"Creating multipart upload with {create_params}")
|
173
|
-
mpu = s3_client.create_multipart_upload(**create_params)
|
174
|
-
print(f"Created multipart upload: {mpu}")
|
175
|
-
upload_id = mpu["UploadId"]
|
176
|
-
|
177
|
-
# Create upload info
|
178
|
-
upload_info = MultipartUploadInfo(
|
179
|
-
s3_client=s3_client,
|
180
|
-
bucket_name=destination_bucket,
|
181
|
-
object_name=destination_key,
|
182
|
-
upload_id=upload_id,
|
183
|
-
retries=retries,
|
184
|
-
chunk_size=chunk_size,
|
185
|
-
file_size=final_size,
|
186
|
-
)
|
187
|
-
|
142
|
+
max_workers: int,
|
143
|
+
merge_state: MergeState,
|
144
|
+
on_finished: Callable[[FinishedPiece | EndOfStream], None],
|
145
|
+
) -> Exception | None:
|
188
146
|
futures: list[Future[FinishedPiece | Exception]] = []
|
189
|
-
|
147
|
+
parts = merge_state.remaining_parts()
|
148
|
+
source_bucket = merge_state.bucket
|
190
149
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
for part_number, source_key in parts:
|
150
|
+
semaphore = Semaphore(max_workers)
|
151
|
+
for part in parts:
|
152
|
+
part_number, s3_key = part.part_number, part.s3_key
|
195
153
|
|
196
154
|
def task(
|
197
|
-
|
155
|
+
s3_client=s3_client,
|
156
|
+
state=merge_state,
|
198
157
|
source_bucket=source_bucket,
|
199
|
-
|
158
|
+
s3_key=s3_key,
|
200
159
|
part_number=part_number,
|
201
|
-
retries=retries,
|
202
160
|
):
|
203
|
-
|
204
|
-
|
161
|
+
out = _upload_part_copy_task(
|
162
|
+
s3_client=s3_client,
|
163
|
+
state=state,
|
205
164
|
source_bucket=source_bucket,
|
206
|
-
source_key=
|
165
|
+
source_key=s3_key,
|
207
166
|
part_number=part_number,
|
208
|
-
retries=retries,
|
209
167
|
)
|
168
|
+
if isinstance(out, Exception):
|
169
|
+
return out
|
170
|
+
# merge_state.on_finished(out)
|
171
|
+
on_finished(out)
|
172
|
+
return out
|
210
173
|
|
211
174
|
fut = executor.submit(task)
|
212
175
|
fut.add_done_callback(lambda x: semaphore.release())
|
213
176
|
futures.append(fut)
|
214
|
-
|
177
|
+
|
178
|
+
while not semaphore.acquire(blocking=False):
|
179
|
+
time.sleep(0.1)
|
215
180
|
|
216
181
|
# Upload parts by copying from source objects
|
217
182
|
finished_parts: list[FinishedPiece] = []
|
@@ -220,50 +185,334 @@ def finish_multipart_upload_from_keys(
|
|
220
185
|
finished_part = fut.result()
|
221
186
|
if isinstance(finished_part, Exception):
|
222
187
|
executor.shutdown(wait=True, cancel_futures=True)
|
223
|
-
|
188
|
+
return finished_part
|
224
189
|
finished_parts.append(finished_part)
|
225
190
|
|
226
|
-
|
227
|
-
|
191
|
+
on_finished(EndOfStream())
|
192
|
+
|
193
|
+
try:
|
194
|
+
# Complete the multipart upload
|
195
|
+
_complete_multipart_upload_from_parts(
|
196
|
+
s3_client=s3_client, state=merge_state, finished_parts=finished_parts
|
197
|
+
)
|
198
|
+
except Exception as e:
|
199
|
+
warnings.warn(f"Error completing multipart upload: {e}")
|
200
|
+
return e
|
201
|
+
return None
|
202
|
+
|
203
|
+
|
204
|
+
def _begin_upload(
|
205
|
+
s3_client: BaseClient,
|
206
|
+
parts: list[Part],
|
207
|
+
bucket: str,
|
208
|
+
dst_key: str,
|
209
|
+
verbose: bool,
|
210
|
+
) -> str:
|
211
|
+
"""
|
212
|
+
Finish a multipart upload by copying parts from existing S3 objects.
|
213
|
+
|
214
|
+
Args:
|
215
|
+
s3_client: Boto3 S3 client
|
216
|
+
source_bucket: Source bucket name
|
217
|
+
source_keys: List of source object keys to copy from
|
218
|
+
bucket: Destination bucket name
|
219
|
+
dst_key: Destination object key
|
220
|
+
retries: Number of retry attempts
|
221
|
+
byte_ranges: Optional list of byte ranges corresponding to source_keys
|
222
|
+
|
223
|
+
Returns:
|
224
|
+
The upload id of the multipart upload
|
225
|
+
"""
|
226
|
+
|
227
|
+
# Initiate multipart upload
|
228
|
+
if verbose:
|
229
|
+
locked_print(
|
230
|
+
f"Creating multipart upload for {bucket}/{dst_key} from {len(parts)} source objects"
|
231
|
+
)
|
232
|
+
create_params: dict[str, str] = {
|
233
|
+
"Bucket": bucket,
|
234
|
+
"Key": dst_key,
|
235
|
+
}
|
236
|
+
if verbose:
|
237
|
+
locked_print(f"Creating multipart upload with {create_params}")
|
238
|
+
mpu = s3_client.create_multipart_upload(**create_params)
|
239
|
+
if verbose:
|
240
|
+
locked_print(f"Created multipart upload: {mpu}")
|
241
|
+
upload_id = mpu["UploadId"]
|
242
|
+
return upload_id
|
243
|
+
|
244
|
+
|
245
|
+
class WriteMergeStateThread(Thread):
|
246
|
+
def __init__(self, rclone_impl: RcloneImpl, merge_state: MergeState):
|
247
|
+
super().__init__(daemon=True)
|
248
|
+
assert isinstance(merge_state, MergeState)
|
249
|
+
self.merge_state = merge_state
|
250
|
+
self.merge_path = merge_state.merge_path
|
251
|
+
self.rclone_impl = rclone_impl
|
252
|
+
self.queue: Queue[FinishedPiece | EndOfStream] = Queue()
|
253
|
+
self.start()
|
254
|
+
|
255
|
+
def _get_next(self) -> FinishedPiece | EndOfStream:
|
256
|
+
item = self.queue.get()
|
257
|
+
if isinstance(item, EndOfStream):
|
258
|
+
return item
|
259
|
+
# see if there are more items in the queue, only write the last one
|
260
|
+
while not self.queue.empty():
|
261
|
+
item = self.queue.get()
|
262
|
+
if isinstance(item, EndOfStream):
|
263
|
+
# put it back in for next time
|
264
|
+
self.queue.put(item)
|
265
|
+
return item
|
266
|
+
return item
|
267
|
+
|
268
|
+
def run(self):
|
269
|
+
while True:
|
270
|
+
item = self._get_next()
|
271
|
+
if isinstance(item, EndOfStream):
|
272
|
+
warnings.warn("End of stream")
|
273
|
+
break
|
274
|
+
|
275
|
+
assert isinstance(item, FinishedPiece)
|
276
|
+
# piece: FinishedPiece = item
|
277
|
+
# at this point just write out the whole json str
|
278
|
+
json_str = self.merge_state.to_json_str()
|
279
|
+
err = self.rclone_impl.write_text(self.merge_path, json_str)
|
280
|
+
if isinstance(err, Exception):
|
281
|
+
warnings.warn(f"Error writing merge state: {err}")
|
282
|
+
break
|
283
|
+
|
284
|
+
def add_finished(self, finished: FinishedPiece) -> None:
|
285
|
+
self.queue.put(finished)
|
286
|
+
|
287
|
+
def add_eos(self) -> None:
|
288
|
+
self.queue.put(EndOfStream())
|
289
|
+
|
290
|
+
|
291
|
+
def _cleanup_merge(rclone: RcloneImpl, info: InfoJson) -> Exception | None:
|
292
|
+
size = info.size
|
293
|
+
dst = info.dst
|
294
|
+
parts_dir = info.parts_dir
|
295
|
+
if not rclone.exists(dst):
|
296
|
+
return FileNotFoundError(f"Destination file not found: {dst}")
|
297
|
+
|
298
|
+
write_size = rclone.size_file(dst)
|
299
|
+
if write_size != size:
|
300
|
+
return ValueError(f"Size mismatch: {write_size} != {size}")
|
301
|
+
|
302
|
+
print(f"Upload complete: {dst}")
|
303
|
+
cp = rclone.purge(parts_dir)
|
304
|
+
if cp.failed():
|
305
|
+
return Exception(f"Failed to purge parts dir: {cp}")
|
306
|
+
return None
|
307
|
+
|
308
|
+
|
309
|
+
def _get_merge_path(info_path: str) -> str:
|
310
|
+
par_dir = os.path.dirname(info_path)
|
311
|
+
merge_path = f"{par_dir}/merge.json"
|
312
|
+
return merge_path
|
313
|
+
|
314
|
+
|
315
|
+
def _begin_or_resume_merge(
|
316
|
+
rclone: RcloneImpl,
|
317
|
+
info: InfoJson,
|
318
|
+
max_workers: int = DEFAULT_MAX_WORKERS,
|
319
|
+
) -> "S3MultiPartMerger | Exception":
|
320
|
+
try:
|
321
|
+
merger: S3MultiPartMerger = S3MultiPartMerger(
|
322
|
+
rclone_impl=rclone,
|
323
|
+
info=info,
|
324
|
+
verbose=True,
|
325
|
+
max_workers=max_workers,
|
326
|
+
)
|
327
|
+
|
328
|
+
s3_bucket = merger.bucket
|
329
|
+
is_done = info.fetch_is_done()
|
330
|
+
assert is_done, f"Upload is not done: {info}"
|
331
|
+
|
332
|
+
merge_path = _get_merge_path(info_path=info.src_info)
|
333
|
+
merge_json_text = rclone.read_text(merge_path)
|
334
|
+
if isinstance(merge_json_text, str):
|
335
|
+
# Attempt to do a resume
|
336
|
+
merge_data = json.loads(merge_json_text)
|
337
|
+
merge_state = MergeState.from_json(rclone_impl=rclone, json=merge_data)
|
338
|
+
if isinstance(merge_state, MergeState):
|
339
|
+
merger._begin_resume_merge(merge_state=merge_state)
|
340
|
+
return merger
|
341
|
+
warnings.warn(f"Failed to resume merge: {merge_state}, starting new merge")
|
342
|
+
|
343
|
+
parts_dir = info.parts_dir
|
344
|
+
source_keys = info.fetch_all_finished()
|
345
|
+
|
346
|
+
parts_path = parts_dir.split(s3_bucket)[1]
|
347
|
+
if parts_path.startswith("/"):
|
348
|
+
parts_path = parts_path[1:]
|
349
|
+
|
350
|
+
first_part: int | None = info.first_part
|
351
|
+
last_part: int | None = info.last_part
|
352
|
+
|
353
|
+
assert first_part is not None
|
354
|
+
assert last_part is not None
|
355
|
+
|
356
|
+
def _to_s3_key(name: str | None) -> str:
|
357
|
+
if name:
|
358
|
+
out = f"{parts_path}/{name}"
|
359
|
+
return out
|
360
|
+
out = f"{parts_path}"
|
361
|
+
return out
|
362
|
+
|
363
|
+
parts: list[Part] = []
|
364
|
+
part_num = first_part
|
365
|
+
for part_key in source_keys:
|
366
|
+
assert part_num <= last_part and part_num >= first_part
|
367
|
+
s3_key = _to_s3_key(name=part_key)
|
368
|
+
part = Part(part_number=part_num, s3_key=s3_key)
|
369
|
+
parts.append(part)
|
370
|
+
part_num += 1
|
371
|
+
|
372
|
+
dst_name = info.dst_name
|
373
|
+
dst_dir = os.path.dirname(parts_path)
|
374
|
+
dst_key = f"{dst_dir}/{dst_name}"
|
375
|
+
|
376
|
+
err = merger._begin_new_merge(
|
377
|
+
merge_path=merge_path,
|
378
|
+
parts=parts,
|
379
|
+
bucket=merger.bucket,
|
380
|
+
dst_key=dst_key,
|
381
|
+
)
|
382
|
+
if isinstance(err, Exception):
|
383
|
+
return err
|
384
|
+
return merger
|
385
|
+
except Exception as e:
|
386
|
+
return e
|
228
387
|
|
229
388
|
|
230
|
-
class
|
231
|
-
def __init__(
|
232
|
-
self
|
389
|
+
class S3MultiPartMerger:
|
390
|
+
def __init__(
|
391
|
+
self,
|
392
|
+
rclone_impl: RcloneImpl,
|
393
|
+
info: InfoJson,
|
394
|
+
s3_config: S3Config | None = None,
|
395
|
+
verbose: bool = False,
|
396
|
+
max_workers: int = DEFAULT_MAX_WORKERS,
|
397
|
+
) -> None:
|
398
|
+
self.rclone_impl: RcloneImpl = rclone_impl
|
399
|
+
self.info = info
|
400
|
+
self.s3_creds = rclone_impl.get_s3_credentials(remote=info.dst)
|
233
401
|
self.verbose = verbose
|
402
|
+
s3_config = s3_config or S3Config(
|
403
|
+
verbose=verbose,
|
404
|
+
timeout_read=_TIMEOUT_READ,
|
405
|
+
timeout_connection=_TIMEOUT_CONNECTION,
|
406
|
+
max_pool_connections=max_workers,
|
407
|
+
)
|
408
|
+
self.max_workers = s3_config.max_pool_connections or DEFAULT_MAX_WORKERS
|
409
|
+
self.client = create_s3_client(s3_creds=self.s3_creds, s3_config=s3_config)
|
410
|
+
self.state: MergeState | None = None
|
411
|
+
self.write_thread: WriteMergeStateThread | None = None
|
412
|
+
|
413
|
+
@staticmethod
|
414
|
+
def create(
|
415
|
+
rclone: RcloneImpl, info: InfoJson, max_workers: int
|
416
|
+
) -> "S3MultiPartMerger | Exception":
|
417
|
+
return _begin_or_resume_merge(rclone=rclone, info=info, max_workers=max_workers)
|
418
|
+
|
419
|
+
@property
|
420
|
+
def bucket(self) -> str:
|
421
|
+
return self.s3_creds.bucket_name
|
422
|
+
|
423
|
+
def start_write_thread(self) -> None:
|
424
|
+
assert self.state is not None
|
425
|
+
assert self.write_thread is None
|
426
|
+
self.write_thread = WriteMergeStateThread(
|
427
|
+
rclone_impl=self.rclone_impl,
|
428
|
+
merge_state=self.state,
|
429
|
+
)
|
234
430
|
|
235
|
-
def
|
431
|
+
def _begin_new_merge(
|
236
432
|
self,
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
433
|
+
parts: list[Part],
|
434
|
+
merge_path: str,
|
435
|
+
bucket: str,
|
436
|
+
dst_key: str,
|
437
|
+
) -> Exception | None:
|
438
|
+
try:
|
439
|
+
upload_id: str = _begin_upload(
|
440
|
+
s3_client=self.client,
|
441
|
+
parts=parts,
|
442
|
+
bucket=bucket,
|
443
|
+
dst_key=dst_key,
|
444
|
+
verbose=self.verbose,
|
445
|
+
)
|
446
|
+
merge_state = MergeState(
|
447
|
+
rclone_impl=self.rclone_impl,
|
448
|
+
merge_path=merge_path,
|
449
|
+
upload_id=upload_id,
|
450
|
+
bucket=bucket,
|
451
|
+
dst_key=dst_key,
|
452
|
+
finished=[],
|
453
|
+
all_parts=parts,
|
454
|
+
)
|
455
|
+
self.state = merge_state
|
456
|
+
return None
|
457
|
+
except Exception as e:
|
458
|
+
return e
|
459
|
+
|
460
|
+
def _begin_resume_merge(
|
461
|
+
self,
|
462
|
+
merge_state: MergeState,
|
463
|
+
) -> None:
|
464
|
+
self.state = merge_state
|
465
|
+
|
466
|
+
def _on_piece_finished(self, finished_piece: FinishedPiece | EndOfStream) -> None:
|
467
|
+
assert self.write_thread is not None
|
468
|
+
assert self.state is not None
|
469
|
+
if isinstance(finished_piece, EndOfStream):
|
470
|
+
self.write_thread.add_eos()
|
471
|
+
else:
|
472
|
+
self.state.on_finished(finished_piece)
|
473
|
+
self.write_thread.add_finished(finished_piece)
|
474
|
+
|
475
|
+
def merge(
|
476
|
+
self,
|
477
|
+
) -> Exception | None:
|
478
|
+
state = self.state
|
479
|
+
if state is None:
|
480
|
+
return Exception("No merge state loaded")
|
481
|
+
self.start_write_thread()
|
482
|
+
err = _do_upload_task(
|
483
|
+
s3_client=self.client,
|
484
|
+
merge_state=state,
|
485
|
+
max_workers=self.max_workers,
|
486
|
+
on_finished=self._on_piece_finished,
|
487
|
+
)
|
488
|
+
if isinstance(err, Exception):
|
489
|
+
return err
|
490
|
+
return None
|
491
|
+
|
492
|
+
def cleanup(self) -> Exception | None:
|
493
|
+
return _cleanup_merge(rclone=self.rclone_impl, info=self.info)
|
494
|
+
|
495
|
+
|
496
|
+
def s3_server_side_multi_part_merge(
|
497
|
+
rclone: RcloneImpl, info_path: str, max_workers: int = DEFAULT_MAX_WORKERS
|
498
|
+
) -> Exception | None:
|
499
|
+
info = InfoJson(rclone, src=None, src_info=info_path)
|
500
|
+
loaded = info.load()
|
501
|
+
if not loaded:
|
502
|
+
return FileNotFoundError(
|
503
|
+
f"Info file not found, has the upload finished? {info_path}"
|
269
504
|
)
|
505
|
+
merger: S3MultiPartMerger | Exception = S3MultiPartMerger.create(
|
506
|
+
rclone=rclone, info=info, max_workers=max_workers
|
507
|
+
)
|
508
|
+
if isinstance(merger, Exception):
|
509
|
+
return merger
|
510
|
+
|
511
|
+
err = merger.merge()
|
512
|
+
if isinstance(err, Exception):
|
513
|
+
return err
|
514
|
+
|
515
|
+
err = merger.cleanup()
|
516
|
+
if isinstance(err, Exception):
|
517
|
+
err
|
518
|
+
return None
|