rclone-api 1.4.7__py2.py3-none-any.whl → 1.4.8__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/cmd/copy_large_s3.py +5 -13
- rclone_api/cmd/copy_large_s3_finish.py +217 -0
- rclone_api/detail/copy_file_parts.py +75 -15
- rclone_api/process.py +6 -14
- rclone_api/rclone_impl.py +61 -44
- rclone_api/s3/multipart/finished_piece.py +4 -1
- rclone_api/s3/s3_multipart_uploader_by_copy.py +395 -0
- rclone_api/s3/types.py +1 -0
- rclone_api/types.py +13 -0
- rclone_api/util.py +52 -9
- {rclone_api-1.4.7.dist-info → rclone_api-1.4.8.dist-info}/METADATA +1 -1
- {rclone_api-1.4.7.dist-info → rclone_api-1.4.8.dist-info}/RECORD +16 -15
- rclone_api/s3/s3_multipart_uploader.py +0 -138
- {rclone_api-1.4.7.dist-info → rclone_api-1.4.8.dist-info}/LICENSE +0 -0
- {rclone_api-1.4.7.dist-info → rclone_api-1.4.8.dist-info}/WHEEL +0 -0
- {rclone_api-1.4.7.dist-info → rclone_api-1.4.8.dist-info}/entry_points.txt +0 -0
- {rclone_api-1.4.7.dist-info → rclone_api-1.4.8.dist-info}/top_level.txt +0 -0
rclone_api/cmd/copy_large_s3.py
CHANGED
@@ -11,8 +11,7 @@ class Args:
|
|
11
11
|
src: str
|
12
12
|
dst: str
|
13
13
|
chunk_size: SizeSuffix
|
14
|
-
|
15
|
-
write_threads: int
|
14
|
+
threads: int
|
16
15
|
retries: int
|
17
16
|
save_state_json: Path
|
18
17
|
verbose: bool
|
@@ -40,16 +39,10 @@ def _parse_args() -> Args:
|
|
40
39
|
default="128MB", # if this is too low or too high an s3 service
|
41
40
|
)
|
42
41
|
parser.add_argument(
|
43
|
-
"--
|
44
|
-
help="Number of concurrent read threads per chunk, only one chunk will be read at a time",
|
45
|
-
type=int,
|
46
|
-
default=8,
|
47
|
-
)
|
48
|
-
parser.add_argument(
|
49
|
-
"--write-threads",
|
42
|
+
"--threads",
|
50
43
|
help="Max number of chunks to upload in parallel to the destination, each chunk is uploaded in a separate thread",
|
51
44
|
type=int,
|
52
|
-
default=
|
45
|
+
default=8,
|
53
46
|
)
|
54
47
|
parser.add_argument("--retries", help="Number of retries", type=int, default=3)
|
55
48
|
parser.add_argument(
|
@@ -70,9 +63,8 @@ def _parse_args() -> Args:
|
|
70
63
|
config_path=config,
|
71
64
|
src=args.src,
|
72
65
|
dst=args.dst,
|
66
|
+
threads=args.threads,
|
73
67
|
chunk_size=SizeSuffix(args.chunk_size),
|
74
|
-
read_threads=args.read_threads,
|
75
|
-
write_threads=args.write_threads,
|
76
68
|
retries=args.retries,
|
77
69
|
save_state_json=args.resume_json,
|
78
70
|
verbose=args.verbose,
|
@@ -122,6 +114,6 @@ if __name__ == "__main__":
|
|
122
114
|
"45061:aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst"
|
123
115
|
)
|
124
116
|
sys.argv.append(
|
125
|
-
"dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst"
|
117
|
+
"dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst-parts"
|
126
118
|
)
|
127
119
|
main()
|
@@ -0,0 +1,217 @@
|
|
1
|
+
import argparse
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from rclone_api import Rclone
|
6
|
+
from rclone_api.detail.copy_file_parts import InfoJson
|
7
|
+
from rclone_api.s3.s3_multipart_uploader_by_copy import (
|
8
|
+
finish_multipart_upload_from_keys,
|
9
|
+
)
|
10
|
+
from rclone_api.types import SizeSuffix
|
11
|
+
|
12
|
+
DATA_SOURCE = (
|
13
|
+
"dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst"
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
# response = client.upload_part_copy(
|
18
|
+
# Bucket='string',
|
19
|
+
# CopySource='string' or {'Bucket': 'string', 'Key': 'string', 'VersionId': 'string'},
|
20
|
+
# CopySourceIfMatch='string',
|
21
|
+
# CopySourceIfModifiedSince=datetime(2015, 1, 1),
|
22
|
+
# CopySourceIfNoneMatch='string',
|
23
|
+
# CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
|
24
|
+
# CopySourceRange='string',
|
25
|
+
# Key='string',
|
26
|
+
# PartNumber=123,
|
27
|
+
# UploadId='string',
|
28
|
+
# SSECustomerAlgorithm='string',
|
29
|
+
# SSECustomerKey='string',
|
30
|
+
# CopySourceSSECustomerAlgorithm='string',
|
31
|
+
# CopySourceSSECustomerKey='string',
|
32
|
+
# RequestPayer='requester',
|
33
|
+
# ExpectedBucketOwner='string',
|
34
|
+
# ExpectedSourceBucketOwner='string'
|
35
|
+
# )
|
36
|
+
|
37
|
+
|
38
|
+
@dataclass
|
39
|
+
class Args:
|
40
|
+
config_path: Path
|
41
|
+
src: str # like dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst-parts/ (info.json will be located here)
|
42
|
+
dst: str # like dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst
|
43
|
+
verbose: bool
|
44
|
+
|
45
|
+
|
46
|
+
def list_files(rclone: Rclone, path: str):
|
47
|
+
"""List files in a remote path."""
|
48
|
+
for dirlisting in rclone.walk(path):
|
49
|
+
for file in dirlisting.files:
|
50
|
+
print(file.path)
|
51
|
+
|
52
|
+
|
53
|
+
def _parse_args() -> Args:
|
54
|
+
parser = argparse.ArgumentParser(description="List files in a remote path.")
|
55
|
+
parser.add_argument("src", help="File to copy")
|
56
|
+
parser.add_argument("dst", help="Destination file")
|
57
|
+
parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true")
|
58
|
+
parser.add_argument(
|
59
|
+
"--config", help="Path to rclone config file", type=Path, required=False
|
60
|
+
)
|
61
|
+
parser.add_argument(
|
62
|
+
"--chunk-size",
|
63
|
+
help="Chunk size that will be read and uploaded in SizeSuffix form, too low or too high will cause issues",
|
64
|
+
type=str,
|
65
|
+
default="128MB", # if this is too low or too high an s3 service
|
66
|
+
)
|
67
|
+
|
68
|
+
args = parser.parse_args()
|
69
|
+
config: Path | None = args.config
|
70
|
+
if config is None:
|
71
|
+
config = Path("rclone.conf")
|
72
|
+
if not config.exists():
|
73
|
+
raise FileNotFoundError(f"Config file not found: {config}")
|
74
|
+
assert config is not None
|
75
|
+
out = Args(
|
76
|
+
config_path=config,
|
77
|
+
src=args.src,
|
78
|
+
dst=args.dst,
|
79
|
+
verbose=args.verbose,
|
80
|
+
)
|
81
|
+
return out
|
82
|
+
|
83
|
+
|
84
|
+
# from dataclasses import dataclass
|
85
|
+
|
86
|
+
# def parse_info_json(text: str) -> UploadInfo:
|
87
|
+
# import json
|
88
|
+
# data = json.loads(text)
|
89
|
+
# chunk_size = data["chunksize_int"]
|
90
|
+
# first_part = data["first_part"]
|
91
|
+
# last_part = data["last_part"]
|
92
|
+
# assert isinstance(chunk_size, int)
|
93
|
+
# assert isinstance(first_part, int)
|
94
|
+
# assert isinstance(last_part, int)
|
95
|
+
# assert first_part <= last_part
|
96
|
+
# parts: list[str] = []
|
97
|
+
# fmt = "part.{:05d}_{}-{}"
|
98
|
+
# for i in range(first_part, last_part + 1):
|
99
|
+
# offset: int = i * chunk_size
|
100
|
+
# end: int = (i + 1) * chunk_size
|
101
|
+
# part = fmt.format(i, offset, end)
|
102
|
+
# parts.append(part)
|
103
|
+
# return UploadInfo(chunk_size=chunk_size, parts=parts)
|
104
|
+
|
105
|
+
|
106
|
+
def do_finish_part(rclone: Rclone, info: InfoJson, dst: str) -> None:
|
107
|
+
from rclone_api.s3.create import BaseClient, S3Credentials, create_s3_client
|
108
|
+
|
109
|
+
s3_creds: S3Credentials = rclone.impl.get_s3_credentials(remote=dst)
|
110
|
+
s3_client: BaseClient = create_s3_client(s3_creds)
|
111
|
+
s3_bucket = s3_creds.bucket_name
|
112
|
+
is_done = info.fetch_is_done()
|
113
|
+
assert is_done, f"Upload is not done: {info}"
|
114
|
+
|
115
|
+
parts_dir = info.parts_dir
|
116
|
+
if parts_dir.endswith("/"):
|
117
|
+
parts_dir = parts_dir[:-1]
|
118
|
+
source_keys = info.fetch_all_finished()
|
119
|
+
|
120
|
+
print(parts_dir)
|
121
|
+
print(source_keys)
|
122
|
+
|
123
|
+
parent_path = parts_dir.split(s3_bucket)[1]
|
124
|
+
if parent_path.startswith("/"):
|
125
|
+
parent_path = parent_path[1:]
|
126
|
+
|
127
|
+
first_part: int | None = info.first_part
|
128
|
+
last_part: int | None = info.last_part
|
129
|
+
size: SizeSuffix | None = info.size
|
130
|
+
|
131
|
+
assert first_part is not None
|
132
|
+
assert last_part is not None
|
133
|
+
assert size is not None
|
134
|
+
|
135
|
+
def _to_s3_key(name: str) -> str:
|
136
|
+
out = f"{parent_path}/{name}"
|
137
|
+
return out
|
138
|
+
|
139
|
+
# s3_keys: list[str] = [_to_s3_key(name=p) for p in source_keys]
|
140
|
+
parts: list[tuple[int, str]] = []
|
141
|
+
for i in range(first_part, last_part + 1):
|
142
|
+
part_name = f"part.{i:05d}"
|
143
|
+
s3_key = _to_s3_key(name=part_name)
|
144
|
+
parts.append((i, s3_key))
|
145
|
+
|
146
|
+
# for key in parts:
|
147
|
+
# print(key)
|
148
|
+
|
149
|
+
chunksize = info.chunksize
|
150
|
+
assert chunksize is not None
|
151
|
+
|
152
|
+
finish_multipart_upload_from_keys(
|
153
|
+
s3_client=s3_client,
|
154
|
+
source_bucket=s3_creds.bucket_name,
|
155
|
+
parts=parts,
|
156
|
+
destination_bucket=s3_creds.bucket_name,
|
157
|
+
destination_key=dst,
|
158
|
+
chunk_size=chunksize.as_int(),
|
159
|
+
final_size=size.as_int(),
|
160
|
+
retries=3,
|
161
|
+
)
|
162
|
+
|
163
|
+
if False:
|
164
|
+
print(finish_multipart_upload_from_keys)
|
165
|
+
print(s3_client)
|
166
|
+
print("done")
|
167
|
+
|
168
|
+
# def finish_multipart_upload_from_keys(
|
169
|
+
# s3_client: BaseClient,
|
170
|
+
# source_bucket: str,
|
171
|
+
# source_keys: list[str],
|
172
|
+
# destination_bucket: str,
|
173
|
+
# destination_key: str,
|
174
|
+
# chunk_size: int = 5 * 1024 * 1024, # 5MB default
|
175
|
+
# retries: int = 3,
|
176
|
+
# byte_ranges: list[str] | None = None,
|
177
|
+
|
178
|
+
# if False:
|
179
|
+
# finish_multipart_upload_from_keys(
|
180
|
+
# s3_client=s3_client,
|
181
|
+
# source_bucket="TODO",
|
182
|
+
# source_keys=[p.key for p in all_parts],
|
183
|
+
# destination_bucket=info.dst_bucket,
|
184
|
+
# destination_key=info.dst_key,
|
185
|
+
# chunk_size=5 * 1024 * 1024,
|
186
|
+
# retries=3,
|
187
|
+
# byte_ranges=None,
|
188
|
+
# )
|
189
|
+
|
190
|
+
# print(all_parts)
|
191
|
+
|
192
|
+
|
193
|
+
def main() -> int:
|
194
|
+
"""Main entry point."""
|
195
|
+
args = _parse_args()
|
196
|
+
rclone = Rclone(rclone_conf=args.config_path)
|
197
|
+
info_json = f"{args.src}/info.json".replace("//", "/")
|
198
|
+
info = InfoJson(rclone.impl, src=None, src_info=info_json)
|
199
|
+
loaded = info.load()
|
200
|
+
assert loaded
|
201
|
+
print(info)
|
202
|
+
do_finish_part(rclone=rclone, info=info, dst=args.dst)
|
203
|
+
return 0
|
204
|
+
|
205
|
+
|
206
|
+
if __name__ == "__main__":
|
207
|
+
import sys
|
208
|
+
|
209
|
+
sys.argv.append("--config")
|
210
|
+
sys.argv.append("rclone.conf")
|
211
|
+
sys.argv.append(
|
212
|
+
"dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst-parts/"
|
213
|
+
)
|
214
|
+
sys.argv.append(
|
215
|
+
"dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst"
|
216
|
+
)
|
217
|
+
main()
|
@@ -53,7 +53,7 @@ def upload_task(self: RcloneImpl, upload_part: UploadPart) -> UploadPart:
|
|
53
53
|
msg = "\n#########################################\n"
|
54
54
|
msg += f"# Uploading {upload_part.chunk} to {upload_part.dst_part}\n"
|
55
55
|
msg += "#########################################\n"
|
56
|
-
print
|
56
|
+
print(msg)
|
57
57
|
self.copy_to(upload_part.chunk.as_posix(), upload_part.dst_part)
|
58
58
|
return upload_part
|
59
59
|
except Exception as e:
|
@@ -106,11 +106,24 @@ def _fetch_all_names(
|
|
106
106
|
return filtered
|
107
107
|
|
108
108
|
|
109
|
-
def _get_info_json(self: RcloneImpl, src: str, src_info: str) -> dict:
|
109
|
+
def _get_info_json(self: RcloneImpl, src: str | None, src_info: str) -> dict:
|
110
110
|
from rclone_api.file import File
|
111
111
|
|
112
|
+
data: dict
|
113
|
+
text: str
|
114
|
+
if src is None:
|
115
|
+
# just try to load the file
|
116
|
+
text_or_err = self.read_text(src_info)
|
117
|
+
if isinstance(text_or_err, Exception):
|
118
|
+
raise FileNotFoundError(f"Could not load {src_info}: {text_or_err}")
|
119
|
+
assert isinstance(text_or_err, str)
|
120
|
+
text = text_or_err
|
121
|
+
data = json.loads(text)
|
122
|
+
return data
|
123
|
+
|
112
124
|
src_stat: File | Exception = self.stat(src)
|
113
125
|
if isinstance(src_stat, Exception):
|
126
|
+
# just try to load the file
|
114
127
|
raise FileNotFoundError(f"Failed to stat {src}: {src_stat}")
|
115
128
|
|
116
129
|
now: datetime = datetime.now()
|
@@ -133,12 +146,11 @@ def _get_info_json(self: RcloneImpl, src: str, src_info: str) -> dict:
|
|
133
146
|
warnings.warn(f"Failed to read {src_info}: {text_or_err}")
|
134
147
|
return new_data
|
135
148
|
assert isinstance(text_or_err, str)
|
136
|
-
text
|
149
|
+
text = text_or_err
|
137
150
|
|
138
151
|
if err is not None:
|
139
152
|
return new_data
|
140
153
|
|
141
|
-
data: dict = {}
|
142
154
|
try:
|
143
155
|
data = json.loads(text)
|
144
156
|
return data
|
@@ -168,13 +180,14 @@ def _save_info_json(self: RcloneImpl, src: str, data: dict) -> None:
|
|
168
180
|
|
169
181
|
|
170
182
|
class InfoJson:
|
171
|
-
def __init__(self, rclone: RcloneImpl, src: str, src_info: str) -> None:
|
183
|
+
def __init__(self, rclone: RcloneImpl, src: str | None, src_info: str) -> None:
|
172
184
|
self.rclone = rclone
|
173
185
|
self.src = src
|
174
186
|
self.src_info = src_info
|
175
187
|
self.data: dict = {}
|
176
188
|
|
177
189
|
def load(self) -> bool:
|
190
|
+
"""Returns true if the file exist and is now loaded."""
|
178
191
|
self.data = _get_info_json(self.rclone, self.src, self.src_info)
|
179
192
|
return not self.data.get("new", False)
|
180
193
|
|
@@ -194,16 +207,60 @@ class InfoJson:
|
|
194
207
|
part_numbers = [int(name.split("_")[0].split(".")[1]) for name in names]
|
195
208
|
return part_numbers
|
196
209
|
|
210
|
+
@property
|
211
|
+
def parts_dir(self) -> str:
|
212
|
+
return os.path.dirname(self.src_info)
|
213
|
+
|
214
|
+
def compute_all_parts(self) -> list[PartInfo] | Exception:
|
215
|
+
# full_part_infos: list[PartInfo] | Exception = PartInfo.split_parts(
|
216
|
+
# src_size, SizeSuffix("96MB")
|
217
|
+
try:
|
218
|
+
|
219
|
+
src_size = self.size
|
220
|
+
chunk_size = self.chunksize
|
221
|
+
assert isinstance(src_size, SizeSuffix)
|
222
|
+
assert isinstance(chunk_size, SizeSuffix)
|
223
|
+
first_part = self.data["first_part"]
|
224
|
+
last_part = self.data["last_part"]
|
225
|
+
full_part_infos: list[PartInfo] = PartInfo.split_parts(src_size, chunk_size)
|
226
|
+
return full_part_infos[first_part : last_part + 1]
|
227
|
+
except Exception as e:
|
228
|
+
return e
|
229
|
+
|
230
|
+
def compute_all_part_numbers(self) -> list[int] | Exception:
|
231
|
+
all_parts: list[PartInfo] | Exception = self.compute_all_parts()
|
232
|
+
if isinstance(all_parts, Exception):
|
233
|
+
raise all_parts
|
234
|
+
|
235
|
+
all_part_nums: list[int] = [p.part_number for p in all_parts]
|
236
|
+
return all_part_nums
|
237
|
+
|
238
|
+
def fetch_remaining_part_numbers(self) -> list[int] | Exception:
|
239
|
+
all_part_nums: list[int] | Exception = self.compute_all_part_numbers()
|
240
|
+
if isinstance(all_part_nums, Exception):
|
241
|
+
return all_part_nums
|
242
|
+
finished_part_nums: list[int] = self.fetch_all_finished_part_numbers()
|
243
|
+
remaining_part_nums: list[int] = list(
|
244
|
+
set(all_part_nums) - set(finished_part_nums)
|
245
|
+
)
|
246
|
+
return sorted(remaining_part_nums)
|
247
|
+
|
248
|
+
def fetch_is_done(self) -> bool:
|
249
|
+
remaining_part_nums: list[int] | Exception = self.fetch_remaining_part_numbers()
|
250
|
+
if isinstance(remaining_part_nums, Exception):
|
251
|
+
return False
|
252
|
+
return len(remaining_part_nums) == 0
|
253
|
+
|
197
254
|
@property
|
198
255
|
def new(self) -> bool:
|
199
256
|
return self.data.get("new", False)
|
200
257
|
|
201
258
|
@property
|
202
259
|
def chunksize(self) -> SizeSuffix | None:
|
203
|
-
|
204
|
-
if
|
260
|
+
chunksize_int: int | None = self.data.get("chunksize_int")
|
261
|
+
if chunksize_int is None:
|
205
262
|
return None
|
206
|
-
return SizeSuffix(
|
263
|
+
return SizeSuffix(chunksize_int)
|
207
264
|
|
208
265
|
@chunksize.setter
|
209
266
|
def chunksize(self, value: SizeSuffix) -> None:
|
@@ -219,21 +276,24 @@ class InfoJson:
|
|
219
276
|
self.data["src_modtime"] = value.isoformat()
|
220
277
|
|
221
278
|
@property
|
222
|
-
def
|
279
|
+
def size(self) -> SizeSuffix:
|
280
|
+
return SizeSuffix(self.data["size"])
|
281
|
+
|
282
|
+
def _get_first_part(self) -> int | None:
|
223
283
|
return self.data.get("first_part")
|
224
284
|
|
225
|
-
|
226
|
-
def first_part(self, value: int) -> None:
|
285
|
+
def _set_first_part(self, value: int) -> None:
|
227
286
|
self.data["first_part"] = value
|
228
287
|
|
229
|
-
|
230
|
-
def last_part(self) -> int | None:
|
288
|
+
def _get_last_part(self) -> int | None:
|
231
289
|
return self.data.get("last_part")
|
232
290
|
|
233
|
-
|
234
|
-
def last_part(self, value: int) -> None:
|
291
|
+
def _set_last_part(self, value: int) -> None:
|
235
292
|
self.data["last_part"] = value
|
236
293
|
|
294
|
+
first_part: int | None = property(_get_first_part, _set_first_part) # type: ignore
|
295
|
+
last_part: int | None = property(_get_last_part, _set_last_part) # type: ignore
|
296
|
+
|
237
297
|
@property
|
238
298
|
def hash(self) -> str | None:
|
239
299
|
return self.data.get("hash")
|
rclone_api/process.py
CHANGED
@@ -5,11 +5,10 @@ import time
|
|
5
5
|
import weakref
|
6
6
|
from dataclasses import dataclass
|
7
7
|
from pathlib import Path
|
8
|
-
from tempfile import TemporaryDirectory
|
9
8
|
from typing import Any
|
10
9
|
|
11
10
|
from rclone_api.config import Config
|
12
|
-
from rclone_api.util import get_verbose
|
11
|
+
from rclone_api.util import clear_temp_config_file, get_verbose, make_temp_config_file
|
13
12
|
|
14
13
|
|
15
14
|
@dataclass
|
@@ -28,17 +27,14 @@ class Process:
|
|
28
27
|
assert args.rclone_exe.exists()
|
29
28
|
self.args = args
|
30
29
|
self.log = args.log
|
31
|
-
self.
|
30
|
+
self.tempfile: Path | None = None
|
32
31
|
verbose = get_verbose(args.verbose)
|
33
32
|
if isinstance(args.rclone_conf, Config):
|
34
|
-
self.
|
35
|
-
tmpfile
|
36
|
-
|
37
|
-
rclone_conf = tmpfile
|
38
|
-
self.needs_cleanup = True
|
33
|
+
self.tmpfile = make_temp_config_file()
|
34
|
+
self.tmpfile.write_text(args.rclone_conf.text, encoding="utf-8")
|
35
|
+
rclone_conf = self.tmpfile
|
39
36
|
else:
|
40
37
|
rclone_conf = args.rclone_conf
|
41
|
-
self.needs_cleanup = False
|
42
38
|
|
43
39
|
assert rclone_conf.exists()
|
44
40
|
|
@@ -81,11 +77,7 @@ class Process:
|
|
81
77
|
self.cleanup()
|
82
78
|
|
83
79
|
def cleanup(self) -> None:
|
84
|
-
|
85
|
-
try:
|
86
|
-
self.tempdir.cleanup()
|
87
|
-
except Exception as e:
|
88
|
-
print(f"Error cleaning up tempdir: {e}")
|
80
|
+
clear_temp_config_file(self.tempfile)
|
89
81
|
|
90
82
|
def _atexit_terminate(self) -> None:
|
91
83
|
"""
|
rclone_api/rclone_impl.py
CHANGED
@@ -32,6 +32,7 @@ from rclone_api.mount import Mount, clean_mount, prepare_mount
|
|
32
32
|
from rclone_api.process import Process
|
33
33
|
from rclone_api.remote import Remote
|
34
34
|
from rclone_api.rpath import RPath
|
35
|
+
from rclone_api.s3.create import S3Credentials
|
35
36
|
from rclone_api.s3.types import (
|
36
37
|
MultiUploadResult,
|
37
38
|
S3MutliPartUploadConfig,
|
@@ -862,6 +863,64 @@ class RcloneImpl:
|
|
862
863
|
)
|
863
864
|
return SizeSuffix(out.total_size)
|
864
865
|
|
866
|
+
def get_s3_credentials(
|
867
|
+
self, remote: str, verbose: bool | None = None
|
868
|
+
) -> S3Credentials:
|
869
|
+
from rclone_api.util import S3PathInfo, split_s3_path
|
870
|
+
|
871
|
+
verbose = get_verbose(verbose)
|
872
|
+
path_info: S3PathInfo = split_s3_path(remote)
|
873
|
+
|
874
|
+
# path_info: S3PathInfo = split_s3_path(remote)
|
875
|
+
remote = path_info.remote
|
876
|
+
bucket_name = path_info.bucket
|
877
|
+
|
878
|
+
remote = path_info.remote
|
879
|
+
parsed: Parsed = self.config.parse()
|
880
|
+
sections: dict[str, Section] = parsed.sections
|
881
|
+
if remote not in sections:
|
882
|
+
raise ValueError(
|
883
|
+
f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
|
884
|
+
)
|
885
|
+
|
886
|
+
section: Section = sections[remote]
|
887
|
+
dst_type = section.type()
|
888
|
+
if dst_type != "s3" and dst_type != "b2":
|
889
|
+
raise ValueError(
|
890
|
+
f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
|
891
|
+
)
|
892
|
+
|
893
|
+
def get_provider_str(section=section) -> str | None:
|
894
|
+
type: str = section.type()
|
895
|
+
provider: str | None = section.provider()
|
896
|
+
if provider is not None:
|
897
|
+
return provider
|
898
|
+
if type == "b2":
|
899
|
+
return S3Provider.BACKBLAZE.value
|
900
|
+
if type != "s3":
|
901
|
+
raise ValueError(f"Remote {remote} is not an S3 remote")
|
902
|
+
return S3Provider.S3.value
|
903
|
+
|
904
|
+
provider: str
|
905
|
+
if provided_provider_str := get_provider_str():
|
906
|
+
if verbose:
|
907
|
+
print(f"Using provided provider: {provided_provider_str}")
|
908
|
+
provider = provided_provider_str
|
909
|
+
else:
|
910
|
+
if verbose:
|
911
|
+
print(f"Using default provider: {S3Provider.S3.value}")
|
912
|
+
provider = S3Provider.S3.value
|
913
|
+
provider_enum = S3Provider.from_str(provider)
|
914
|
+
|
915
|
+
s3_creds: S3Credentials = S3Credentials(
|
916
|
+
bucket_name=bucket_name,
|
917
|
+
provider=provider_enum,
|
918
|
+
access_key_id=section.access_key_id(),
|
919
|
+
secret_access_key=section.secret_access_key(),
|
920
|
+
endpoint_url=section.endpoint(),
|
921
|
+
)
|
922
|
+
return s3_creds
|
923
|
+
|
865
924
|
def copy_file_resumable_s3(
|
866
925
|
self,
|
867
926
|
src: str,
|
@@ -878,7 +937,6 @@ class RcloneImpl:
|
|
878
937
|
"""For massive files that rclone can't handle in one go, this function will copy the file in chunks to an S3 store"""
|
879
938
|
from rclone_api.http_server import HttpFetcher, HttpServer
|
880
939
|
from rclone_api.s3.api import S3Client
|
881
|
-
from rclone_api.s3.create import S3Credentials
|
882
940
|
from rclone_api.util import S3PathInfo, split_s3_path
|
883
941
|
|
884
942
|
src_path = Path(src)
|
@@ -909,51 +967,10 @@ class RcloneImpl:
|
|
909
967
|
)
|
910
968
|
|
911
969
|
path_info: S3PathInfo = split_s3_path(dst)
|
912
|
-
remote = path_info.remote
|
970
|
+
# remote = path_info.remote
|
913
971
|
bucket_name = path_info.bucket
|
914
972
|
s3_key = path_info.key
|
915
|
-
|
916
|
-
sections: dict[str, Section] = parsed.sections
|
917
|
-
if remote not in sections:
|
918
|
-
raise ValueError(
|
919
|
-
f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
|
920
|
-
)
|
921
|
-
|
922
|
-
section: Section = sections[remote]
|
923
|
-
dst_type = section.type()
|
924
|
-
if dst_type != "s3" and dst_type != "b2":
|
925
|
-
raise ValueError(
|
926
|
-
f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
|
927
|
-
)
|
928
|
-
|
929
|
-
def get_provider_str(section=section) -> str | None:
|
930
|
-
type: str = section.type()
|
931
|
-
provider: str | None = section.provider()
|
932
|
-
if provider is not None:
|
933
|
-
return provider
|
934
|
-
if type == "b2":
|
935
|
-
return S3Provider.BACKBLAZE.value
|
936
|
-
if type != "s3":
|
937
|
-
raise ValueError(f"Remote {remote} is not an S3 remote")
|
938
|
-
return S3Provider.S3.value
|
939
|
-
|
940
|
-
provider: str
|
941
|
-
if provided_provider_str := get_provider_str():
|
942
|
-
if verbose:
|
943
|
-
print(f"Using provided provider: {provided_provider_str}")
|
944
|
-
provider = provided_provider_str
|
945
|
-
else:
|
946
|
-
if verbose:
|
947
|
-
print(f"Using default provider: {S3Provider.S3.value}")
|
948
|
-
provider = S3Provider.S3.value
|
949
|
-
provider_enum = S3Provider.from_str(provider)
|
950
|
-
|
951
|
-
s3_creds: S3Credentials = S3Credentials(
|
952
|
-
provider=provider_enum,
|
953
|
-
access_key_id=section.access_key_id(),
|
954
|
-
secret_access_key=section.secret_access_key(),
|
955
|
-
endpoint_url=section.endpoint(),
|
956
|
-
)
|
973
|
+
s3_creds: S3Credentials = self.get_s3_credentials(dst, verbose=verbose)
|
957
974
|
|
958
975
|
port = random.randint(10000, 20000)
|
959
976
|
http_server: HttpServer = self.serve_http(
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import warnings
|
2
3
|
from dataclasses import dataclass
|
3
4
|
|
4
5
|
from rclone_api.types import EndOfStream
|
@@ -28,7 +29,9 @@ class FinishedPiece:
|
|
28
29
|
for p in parts:
|
29
30
|
if p is EndOfStream:
|
30
31
|
count_eos += 1
|
31
|
-
assert count_eos <= 1, "Only one EndOfStream should be present"
|
32
|
+
# assert count_eos <= 1, "Only one EndOfStream should be present"
|
33
|
+
if count_eos > 1:
|
34
|
+
warnings.warn(f"Only one EndOfStream should be present, found {count_eos}")
|
32
35
|
return [p.to_json() for p in non_none]
|
33
36
|
|
34
37
|
@staticmethod
|
@@ -0,0 +1,395 @@
|
|
1
|
+
"""
|
2
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_part_copy.html
|
3
|
+
* client.upload_part_copy
|
4
|
+
|
5
|
+
This module provides functionality for S3 multipart uploads, including copying parts
|
6
|
+
from existing S3 objects using upload_part_copy.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
10
|
+
from dataclasses import dataclass
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Optional
|
13
|
+
|
14
|
+
from botocore.client import BaseClient
|
15
|
+
|
16
|
+
from rclone_api.s3.multipart.finished_piece import FinishedPiece
|
17
|
+
from rclone_api.util import locked_print
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class MultipartUploadInfo:
|
22
|
+
"""Simplified upload information for multipart uploads."""
|
23
|
+
|
24
|
+
s3_client: BaseClient
|
25
|
+
bucket_name: str
|
26
|
+
object_name: str
|
27
|
+
upload_id: str
|
28
|
+
chunk_size: int
|
29
|
+
retries: int
|
30
|
+
file_size: Optional[int] = None
|
31
|
+
src_file_path: Optional[Path] = None
|
32
|
+
|
33
|
+
|
34
|
+
# response = client.upload_part_copy(
|
35
|
+
# Bucket='string',
|
36
|
+
# CopySource='string' or {'Bucket': 'string', 'Key': 'string', 'VersionId': 'string'},
|
37
|
+
# CopySourceIfMatch='string',
|
38
|
+
# CopySourceIfModifiedSince=datetime(2015, 1, 1),
|
39
|
+
# CopySourceIfNoneMatch='string',
|
40
|
+
# CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
|
41
|
+
# CopySourceRange='string',
|
42
|
+
# Key='string',
|
43
|
+
# PartNumber=123,
|
44
|
+
# UploadId='string',
|
45
|
+
# SSECustomerAlgorithm='string',
|
46
|
+
# SSECustomerKey='string',
|
47
|
+
# CopySourceSSECustomerAlgorithm='string',
|
48
|
+
# CopySourceSSECustomerKey='string',
|
49
|
+
# RequestPayer='requester',
|
50
|
+
# ExpectedBucketOwner='string',
|
51
|
+
# ExpectedSourceBucketOwner='string'
|
52
|
+
# )
|
53
|
+
|
54
|
+
# import _thread
|
55
|
+
# import os
|
56
|
+
# import traceback
|
57
|
+
# import warnings
|
58
|
+
# from concurrent.futures import Future, ThreadPoolExecutor
|
59
|
+
# from pathlib import Path
|
60
|
+
# from queue import Queue
|
61
|
+
# from threading import Event, Thread
|
62
|
+
# from typing import Any, Callable
|
63
|
+
|
64
|
+
# from botocore.client import BaseClient
|
65
|
+
|
66
|
+
# from rclone_api.mount_read_chunker import FilePart
|
67
|
+
# from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
|
68
|
+
# from rclone_api.s3.chunk_types import (
|
69
|
+
# FinishedPiece,
|
70
|
+
# UploadInfo,
|
71
|
+
# UploadState,
|
72
|
+
# )
|
73
|
+
# from rclone_api.s3.types import MultiUploadResult
|
74
|
+
# from rclone_api.types import EndOfStream
|
75
|
+
# from rclone_api.util import locked_print
|
76
|
+
|
77
|
+
|
78
|
+
# This is how you upload large parts through multi part upload, then the final call
|
79
|
+
# is to assemble the parts that have already been uploaded through a multi part uploader
|
80
|
+
# and then call complete_multipart_upload to finish the upload
|
81
|
+
# response = (
|
82
|
+
# client.upload_part_copy(
|
83
|
+
# Bucket='string',
|
84
|
+
# CopySource='string' or {'Bucket': 'string', 'Key': 'string', 'VersionId': 'string'},
|
85
|
+
# CopySourceIfMatch='string',
|
86
|
+
# CopySourceIfModifiedSince=datetime(2015, 1, 1),
|
87
|
+
# CopySourceIfNoneMatch='string',
|
88
|
+
# CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
|
89
|
+
# CopySourceRange='string',
|
90
|
+
# Key='string',
|
91
|
+
# PartNumber=123,
|
92
|
+
# UploadId='string',
|
93
|
+
# SSECustomerAlgorithm='string',
|
94
|
+
# SSECustomerKey='string',
|
95
|
+
# CopySourceSSECustomerAlgorithm='string',
|
96
|
+
# CopySourceSSECustomerKey='string',
|
97
|
+
# RequestPayer='requester',
|
98
|
+
# ExpectedBucketOwner='string',
|
99
|
+
# ExpectedSourceBucketOwner='string'
|
100
|
+
# )
|
101
|
+
|
102
|
+
|
103
|
+
# def upload_task(
|
104
|
+
# info: UploadInfo,
|
105
|
+
# chunk: FilePart,
|
106
|
+
# part_number: int,
|
107
|
+
# retries: int,
|
108
|
+
# ) -> FinishedPiece:
|
109
|
+
# file_or_err: Path | Exception = chunk.get_file()
|
110
|
+
# if isinstance(file_or_err, Exception):
|
111
|
+
# raise file_or_err
|
112
|
+
# file: Path = file_or_err
|
113
|
+
# size = os.path.getsize(file)
|
114
|
+
# retries = retries + 1 # Add one for the initial attempt
|
115
|
+
# for retry in range(retries):
|
116
|
+
# try:
|
117
|
+
# if retry > 0:
|
118
|
+
# locked_print(f"Retrying part {part_number} for {info.src_file_path}")
|
119
|
+
# locked_print(
|
120
|
+
# f"Uploading part {part_number} for {info.src_file_path} of size {size}"
|
121
|
+
# )
|
122
|
+
|
123
|
+
# with open(file, "rb") as f:
|
124
|
+
# part = info.s3_client.upload_part(
|
125
|
+
# Bucket=info.bucket_name,
|
126
|
+
# Key=info.object_name,
|
127
|
+
# PartNumber=part_number,
|
128
|
+
# UploadId=info.upload_id,
|
129
|
+
# Body=f,
|
130
|
+
# )
|
131
|
+
# out: FinishedPiece = FinishedPiece(
|
132
|
+
# etag=part["ETag"], part_number=part_number
|
133
|
+
# )
|
134
|
+
# chunk.dispose()
|
135
|
+
# return out
|
136
|
+
# except Exception as e:
|
137
|
+
# if retry == retries - 1:
|
138
|
+
# locked_print(f"Error uploading part {part_number}: {e}")
|
139
|
+
# chunk.dispose()
|
140
|
+
# raise e
|
141
|
+
# else:
|
142
|
+
# locked_print(f"Error uploading part {part_number}: {e}, retrying")
|
143
|
+
# continue
|
144
|
+
# raise Exception("Should not reach here")
|
145
|
+
|
146
|
+
|
147
|
+
# def prepare_upload_file_multipart(
|
148
|
+
# s3_client: BaseClient,
|
149
|
+
# bucket_name: str,
|
150
|
+
# file_path: Path,
|
151
|
+
# file_size: int | None,
|
152
|
+
# object_name: str,
|
153
|
+
# chunk_size: int,
|
154
|
+
# retries: int,
|
155
|
+
# ) -> UploadInfo:
|
156
|
+
# """Upload a file to the bucket using multipart upload with customizable chunk size."""
|
157
|
+
|
158
|
+
# # Initiate multipart upload
|
159
|
+
# locked_print(
|
160
|
+
# f"Creating multipart upload for {file_path} to {bucket_name}/{object_name}"
|
161
|
+
# )
|
162
|
+
# mpu = s3_client.create_multipart_upload(Bucket=bucket_name, Key=object_name)
|
163
|
+
# upload_id = mpu["UploadId"]
|
164
|
+
|
165
|
+
# file_size = file_size if file_size is not None else os.path.getsize(file_path)
|
166
|
+
|
167
|
+
# upload_info: UploadInfo = UploadInfo(
|
168
|
+
# s3_client=s3_client,
|
169
|
+
# bucket_name=bucket_name,
|
170
|
+
# object_name=object_name,
|
171
|
+
# src_file_path=file_path,
|
172
|
+
# upload_id=upload_id,
|
173
|
+
# retries=retries,
|
174
|
+
# chunk_size=chunk_size,
|
175
|
+
# file_size=file_size,
|
176
|
+
# )
|
177
|
+
# return upload_info
|
178
|
+
|
179
|
+
|
180
|
+
def upload_part_copy_task(
|
181
|
+
info: MultipartUploadInfo,
|
182
|
+
source_bucket: str,
|
183
|
+
source_key: str,
|
184
|
+
part_number: int,
|
185
|
+
retries: int = 3,
|
186
|
+
) -> FinishedPiece:
|
187
|
+
"""
|
188
|
+
Upload a part by copying from an existing S3 object.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
info: Upload information
|
192
|
+
source_bucket: Source bucket name
|
193
|
+
source_key: Source object key
|
194
|
+
part_number: Part number (1-10000)
|
195
|
+
byte_range: Optional byte range in format 'bytes=start-end'
|
196
|
+
retries: Number of retry attempts
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
FinishedPiece with ETag and part number
|
200
|
+
"""
|
201
|
+
copy_source = {"Bucket": source_bucket, "Key": source_key}
|
202
|
+
|
203
|
+
retries = retries + 1 # Add one for the initial attempt
|
204
|
+
for retry in range(retries):
|
205
|
+
try:
|
206
|
+
if retry > 0:
|
207
|
+
locked_print(f"Retrying part copy {part_number} for {info.object_name}")
|
208
|
+
|
209
|
+
locked_print(
|
210
|
+
f"Copying part {part_number} for {info.object_name} from {source_bucket}/{source_key}"
|
211
|
+
)
|
212
|
+
|
213
|
+
# Prepare the upload_part_copy parameters
|
214
|
+
params = {
|
215
|
+
"Bucket": info.bucket_name,
|
216
|
+
"CopySource": copy_source,
|
217
|
+
"Key": info.object_name,
|
218
|
+
"PartNumber": part_number,
|
219
|
+
"UploadId": info.upload_id,
|
220
|
+
}
|
221
|
+
|
222
|
+
# Execute the copy operation
|
223
|
+
part = info.s3_client.upload_part_copy(**params)
|
224
|
+
|
225
|
+
# Extract ETag from the response
|
226
|
+
etag = part["CopyPartResult"]["ETag"]
|
227
|
+
|
228
|
+
return FinishedPiece(etag=etag, part_number=part_number)
|
229
|
+
|
230
|
+
except Exception as e:
|
231
|
+
if retry == retries - 1:
|
232
|
+
locked_print(f"Error copying part {part_number}: {e}")
|
233
|
+
raise e
|
234
|
+
else:
|
235
|
+
locked_print(f"Error copying part {part_number}: {e}, retrying")
|
236
|
+
continue
|
237
|
+
|
238
|
+
raise Exception("Should not reach here")
|
239
|
+
|
240
|
+
|
241
|
+
def complete_multipart_upload_from_parts(
|
242
|
+
info: MultipartUploadInfo, parts: list[FinishedPiece]
|
243
|
+
) -> str:
|
244
|
+
"""
|
245
|
+
Complete a multipart upload using the provided parts.
|
246
|
+
|
247
|
+
Args:
|
248
|
+
info: Upload information
|
249
|
+
parts: List of finished pieces with ETags
|
250
|
+
|
251
|
+
Returns:
|
252
|
+
The URL of the completed object
|
253
|
+
"""
|
254
|
+
# Sort parts by part number to ensure correct order
|
255
|
+
parts.sort(key=lambda x: x.part_number)
|
256
|
+
|
257
|
+
# Prepare the parts list for the complete_multipart_upload call
|
258
|
+
multipart_parts = [
|
259
|
+
{"ETag": part.etag, "PartNumber": part.part_number} for part in parts
|
260
|
+
]
|
261
|
+
|
262
|
+
# Complete the multipart upload
|
263
|
+
response = info.s3_client.complete_multipart_upload(
|
264
|
+
Bucket=info.bucket_name,
|
265
|
+
Key=info.object_name,
|
266
|
+
UploadId=info.upload_id,
|
267
|
+
MultipartUpload={"Parts": multipart_parts},
|
268
|
+
)
|
269
|
+
|
270
|
+
# Return the URL of the completed object
|
271
|
+
return response.get("Location", f"s3://{info.bucket_name}/{info.object_name}")
|
272
|
+
|
273
|
+
|
274
|
+
def finish_multipart_upload_from_keys(
|
275
|
+
s3_client: BaseClient,
|
276
|
+
source_bucket: str,
|
277
|
+
parts: list[tuple[int, str]],
|
278
|
+
final_size: int,
|
279
|
+
destination_bucket: str,
|
280
|
+
destination_key: str,
|
281
|
+
chunk_size: int, # 5MB default
|
282
|
+
max_workers: int = 100,
|
283
|
+
retries: int = 3,
|
284
|
+
) -> str:
|
285
|
+
"""
|
286
|
+
Finish a multipart upload by copying parts from existing S3 objects.
|
287
|
+
|
288
|
+
Args:
|
289
|
+
s3_client: Boto3 S3 client
|
290
|
+
source_bucket: Source bucket name
|
291
|
+
source_keys: List of source object keys to copy from
|
292
|
+
destination_bucket: Destination bucket name
|
293
|
+
destination_key: Destination object key
|
294
|
+
chunk_size: Size of each part in bytes
|
295
|
+
retries: Number of retry attempts
|
296
|
+
byte_ranges: Optional list of byte ranges corresponding to source_keys
|
297
|
+
|
298
|
+
Returns:
|
299
|
+
The URL of the completed object
|
300
|
+
"""
|
301
|
+
|
302
|
+
# Initiate multipart upload
|
303
|
+
locked_print(
|
304
|
+
f"Creating multipart upload for {destination_bucket}/{destination_key} from {len(parts)} source objects"
|
305
|
+
)
|
306
|
+
mpu = s3_client.create_multipart_upload(
|
307
|
+
Bucket=destination_bucket, Key=destination_key
|
308
|
+
)
|
309
|
+
upload_id = mpu["UploadId"]
|
310
|
+
|
311
|
+
# Create upload info
|
312
|
+
upload_info = MultipartUploadInfo(
|
313
|
+
s3_client=s3_client,
|
314
|
+
bucket_name=destination_bucket,
|
315
|
+
object_name=destination_key,
|
316
|
+
upload_id=upload_id,
|
317
|
+
retries=retries,
|
318
|
+
chunk_size=chunk_size,
|
319
|
+
file_size=final_size,
|
320
|
+
)
|
321
|
+
|
322
|
+
futures: list[Future[FinishedPiece]] = []
|
323
|
+
|
324
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
325
|
+
for part_number, source_key in parts:
|
326
|
+
|
327
|
+
def task(
|
328
|
+
info=upload_info,
|
329
|
+
source_bucket=source_bucket,
|
330
|
+
source_key=source_key,
|
331
|
+
part_number=part_number,
|
332
|
+
retries=retries,
|
333
|
+
):
|
334
|
+
return upload_part_copy_task(
|
335
|
+
info=info,
|
336
|
+
source_bucket=source_bucket,
|
337
|
+
source_key=source_key,
|
338
|
+
part_number=part_number,
|
339
|
+
retries=retries,
|
340
|
+
)
|
341
|
+
|
342
|
+
fut = executor.submit(task)
|
343
|
+
futures.append(fut)
|
344
|
+
|
345
|
+
# Upload parts by copying from source objects
|
346
|
+
finished_parts = []
|
347
|
+
|
348
|
+
for fut in futures:
|
349
|
+
finished_part = fut.result()
|
350
|
+
finished_parts.append(finished_part)
|
351
|
+
|
352
|
+
# Complete the multipart upload
|
353
|
+
return complete_multipart_upload_from_parts(upload_info, finished_parts)
|
354
|
+
|
355
|
+
|
356
|
+
class S3MultiPartUploader:
|
357
|
+
def __init__(self, s3_client: BaseClient, verbose: bool) -> None:
|
358
|
+
self.s3_client = s3_client
|
359
|
+
self.verbose = verbose
|
360
|
+
|
361
|
+
def finish_from_keys(
|
362
|
+
self,
|
363
|
+
source_bucket: str,
|
364
|
+
parts: list[tuple[int, str]],
|
365
|
+
destination_bucket: str,
|
366
|
+
destination_key: str,
|
367
|
+
chunk_size: int,
|
368
|
+
final_size: int,
|
369
|
+
retries: int = 3,
|
370
|
+
) -> str:
|
371
|
+
"""
|
372
|
+
Finish a multipart upload by copying parts from existing S3 objects.
|
373
|
+
|
374
|
+
Args:
|
375
|
+
source_bucket: Source bucket name
|
376
|
+
source_keys: List of source object keys to copy from
|
377
|
+
destination_bucket: Destination bucket name
|
378
|
+
destination_key: Destination object key
|
379
|
+
chunk_size: Size of each part in bytes
|
380
|
+
retries: Number of retry attempts
|
381
|
+
byte_ranges: Optional list of byte ranges corresponding to source_keys
|
382
|
+
|
383
|
+
Returns:
|
384
|
+
The URL of the completed object
|
385
|
+
"""
|
386
|
+
return finish_multipart_upload_from_keys(
|
387
|
+
s3_client=self.s3_client,
|
388
|
+
source_bucket=source_bucket,
|
389
|
+
parts=parts,
|
390
|
+
destination_bucket=destination_bucket,
|
391
|
+
destination_key=destination_key,
|
392
|
+
chunk_size=chunk_size,
|
393
|
+
final_size=final_size,
|
394
|
+
retries=retries,
|
395
|
+
)
|
rclone_api/s3/types.py
CHANGED
rclone_api/types.py
CHANGED
@@ -309,6 +309,13 @@ class Range:
|
|
309
309
|
val = f"bytes={self.start.as_int()}-{last.as_int()}"
|
310
310
|
return {"Range": val}
|
311
311
|
|
312
|
+
def __repr__(self) -> str:
|
313
|
+
length = self.end - self.start
|
314
|
+
return f"Range(start={self.start}, length={length})"
|
315
|
+
|
316
|
+
def __str__(self) -> str:
|
317
|
+
return self.__repr__()
|
318
|
+
|
312
319
|
|
313
320
|
_MAX_PART_NUMBER = 10000
|
314
321
|
|
@@ -387,3 +394,9 @@ class PartInfo:
|
|
387
394
|
end = SizeSuffix(self.range.end._size).as_int()
|
388
395
|
dst_name = f"part.{partnumber}_{offset}-{end}"
|
389
396
|
return dst_name
|
397
|
+
|
398
|
+
def __repr__(self) -> str:
|
399
|
+
return f"PartInfo(part_number={self.part_number}, range={self.range})"
|
400
|
+
|
401
|
+
def __str__(self) -> str:
|
402
|
+
return self.__repr__()
|
rclone_api/util.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
+
import atexit
|
1
2
|
import os
|
2
3
|
import random
|
3
4
|
import shutil
|
5
|
+
import signal
|
4
6
|
import subprocess
|
5
7
|
import warnings
|
6
8
|
from pathlib import Path
|
7
|
-
from tempfile import TemporaryDirectory
|
8
9
|
from threading import Lock
|
9
10
|
from typing import Any
|
10
11
|
|
@@ -18,6 +19,53 @@ from rclone_api.types import S3PathInfo
|
|
18
19
|
|
19
20
|
_PRINT_LOCK = Lock()
|
20
21
|
|
22
|
+
_TMP_CONFIG_DIR = Path(".") / ".rclone" / "tmp_config"
|
23
|
+
_RCLONE_CONFIGS_LIST: list[Path] = []
|
24
|
+
_DO_CLEANUP = os.getenv("RCLONE_API_CLEANUP", "1") == "1"
|
25
|
+
|
26
|
+
|
27
|
+
def _clean_configs(signum=None, frame=None) -> None:
|
28
|
+
if not _DO_CLEANUP:
|
29
|
+
return
|
30
|
+
for config in _RCLONE_CONFIGS_LIST:
|
31
|
+
try:
|
32
|
+
config.unlink()
|
33
|
+
except Exception as e:
|
34
|
+
print(f"Error deleting config file: {config}, {e}")
|
35
|
+
_RCLONE_CONFIGS_LIST.clear()
|
36
|
+
if signum is not None:
|
37
|
+
signal.signal(signum, signal.SIG_DFL)
|
38
|
+
os.kill(os.getpid(), signum)
|
39
|
+
|
40
|
+
|
41
|
+
def _init_cleanup() -> None:
|
42
|
+
atexit.register(_clean_configs)
|
43
|
+
|
44
|
+
for sig in (signal.SIGINT, signal.SIGTERM):
|
45
|
+
signal.signal(sig, _clean_configs)
|
46
|
+
|
47
|
+
|
48
|
+
_init_cleanup()
|
49
|
+
|
50
|
+
|
51
|
+
def make_temp_config_file() -> Path:
|
52
|
+
from rclone_api.util import random_str
|
53
|
+
|
54
|
+
tmpdir = _TMP_CONFIG_DIR / random_str(32)
|
55
|
+
tmpdir.mkdir(parents=True, exist_ok=True)
|
56
|
+
tmpfile = tmpdir / "rclone.conf"
|
57
|
+
_RCLONE_CONFIGS_LIST.append(tmpfile)
|
58
|
+
return tmpfile
|
59
|
+
|
60
|
+
|
61
|
+
def clear_temp_config_file(path: Path | None) -> None:
|
62
|
+
if (path is None) or (not path.exists()) or (not _DO_CLEANUP):
|
63
|
+
return
|
64
|
+
try:
|
65
|
+
path.unlink()
|
66
|
+
except Exception as e:
|
67
|
+
print(f"Error deleting config file: {path}, {e}")
|
68
|
+
|
21
69
|
|
22
70
|
def locked_print(*args, **kwargs):
|
23
71
|
with _PRINT_LOCK:
|
@@ -116,7 +164,7 @@ def rclone_execute(
|
|
116
164
|
capture: bool | Path | None = None,
|
117
165
|
verbose: bool | None = None,
|
118
166
|
) -> subprocess.CompletedProcess:
|
119
|
-
|
167
|
+
tmpfile: Path | None = None
|
120
168
|
verbose = get_verbose(verbose)
|
121
169
|
|
122
170
|
# Handle the Path case for capture
|
@@ -131,8 +179,7 @@ def rclone_execute(
|
|
131
179
|
|
132
180
|
try:
|
133
181
|
if isinstance(rclone_conf, Config):
|
134
|
-
|
135
|
-
tmpfile = Path(tempdir.name) / "rclone.conf"
|
182
|
+
tmpfile = make_temp_config_file()
|
136
183
|
tmpfile.write_text(rclone_conf.text, encoding="utf-8")
|
137
184
|
rclone_conf = tmpfile
|
138
185
|
cmd = (
|
@@ -168,11 +215,7 @@ def rclone_execute(
|
|
168
215
|
)
|
169
216
|
return cp
|
170
217
|
finally:
|
171
|
-
|
172
|
-
try:
|
173
|
-
tempdir.cleanup()
|
174
|
-
except Exception as e:
|
175
|
-
print(f"Error cleaning up tempdir: {e}")
|
218
|
+
clear_temp_config_file(tmpfile)
|
176
219
|
|
177
220
|
|
178
221
|
def split_s3_path(path: str) -> S3PathInfo:
|
@@ -17,22 +17,23 @@ rclone_api/group_files.py,sha256=H92xPW9lQnbNw5KbtZCl00bD6iRh9yRbCuxku4j_3dg,803
|
|
17
17
|
rclone_api/http_server.py,sha256=3fPBV6l50erTe32DyeJBNmsDrn5KuujsbmEAbx13T-c,8720
|
18
18
|
rclone_api/log.py,sha256=VZHM7pNSXip2ZLBKMP7M1u-rp_F7zoafFDuR8CPUoKI,1271
|
19
19
|
rclone_api/mount.py,sha256=TE_VIBMW7J1UkF_6HRCt8oi_jGdMov4S51bm2OgxFAM,10045
|
20
|
-
rclone_api/process.py,sha256=
|
21
|
-
rclone_api/rclone_impl.py,sha256=
|
20
|
+
rclone_api/process.py,sha256=I7B4arAIbFcTBsek27cZ0t-l5YRWVHJJPji7G6ZLGjQ,4989
|
21
|
+
rclone_api/rclone_impl.py,sha256=xTTriz6-zn_aSrkY8B7wzT-zRXax7Og7ns6xu6-7O6g,48769
|
22
22
|
rclone_api/remote.py,sha256=mTgMTQTwxUmbLjTpr-AGTId2ycXKI9mLX5L7PPpDIoc,520
|
23
23
|
rclone_api/rpath.py,sha256=Y1JjQWcie39EgQrq-UtbfDz5yDLCwwfu27W7AQXllSE,2860
|
24
24
|
rclone_api/scan_missing_folders.py,sha256=-8NCwpCaHeHrX-IepCoAEsX1rl8S-GOCxcIhTr_w3gA,4747
|
25
|
-
rclone_api/types.py,sha256=
|
26
|
-
rclone_api/util.py,sha256=
|
25
|
+
rclone_api/types.py,sha256=HkpEZgZWhr5Gb04iHq5NxMRXxieWoN-PKmOfJFrg5Qg,12155
|
26
|
+
rclone_api/util.py,sha256=9w_m6W62l_X42Jw5q8p_p30h-QoxAqufvnCLI4PTMOE,7056
|
27
27
|
rclone_api/assets/example.txt,sha256=lTBovRjiz0_TgtAtbA1C5hNi2ffbqnNPqkKg6UiKCT8,54
|
28
28
|
rclone_api/cmd/analyze.py,sha256=RHbvk1G5ZUc3qLqlm1AZEyQzd_W_ZjcbCNDvW4YpTKQ,1252
|
29
|
-
rclone_api/cmd/copy_large_s3.py,sha256=
|
29
|
+
rclone_api/cmd/copy_large_s3.py,sha256=B17GliDQyAauNglJCpsey0d3eArT2DAcT9g684TMQk8,3514
|
30
|
+
rclone_api/cmd/copy_large_s3_finish.py,sha256=ybZWaRa_89lyq7WpXUjpbhc_TQctUC-fKCZmdffPpKo,6731
|
30
31
|
rclone_api/cmd/list_files.py,sha256=x8FHODEilwKqwdiU1jdkeJbLwOqUkUQuDWPo2u_zpf0,741
|
31
32
|
rclone_api/cmd/save_to_db.py,sha256=ylvnhg_yzexM-m6Zr7XDiswvoDVSl56ELuFAdb9gqBY,1957
|
32
33
|
rclone_api/db/__init__.py,sha256=OSRUdnSWUlDTOHmjdjVmxYTUNpTbtaJ5Ll9sl-PfZg0,40
|
33
34
|
rclone_api/db/db.py,sha256=YRnYrCaXHwytQt07uEZ_mMpvPHo9-0IWcOb95fVOOfs,10086
|
34
35
|
rclone_api/db/models.py,sha256=v7qaXUehvsDvU51uk69JI23fSIs9JFGcOa-Tv1c_wVs,1600
|
35
|
-
rclone_api/detail/copy_file_parts.py,sha256=
|
36
|
+
rclone_api/detail/copy_file_parts.py,sha256=CXiFuCJKSAdopDzigdO0j8uYrNl1N2Y9X9sbeGffqDU,15919
|
36
37
|
rclone_api/detail/walk.py,sha256=-54NVE8EJcCstwDoaC_UtHm73R2HrZwVwQmsnv55xNU,3369
|
37
38
|
rclone_api/experimental/flags.py,sha256=qCVD--fSTmzlk9hloRLr0q9elzAOFzPsvVpKM3aB1Mk,2739
|
38
39
|
rclone_api/experimental/flags_base.py,sha256=ajU_czkTcAxXYU-SlmiCfHY7aCQGHvpCLqJ-Z8uZLk0,2102
|
@@ -40,16 +41,16 @@ rclone_api/s3/api.py,sha256=PafsIEyWDpLWAXsZAjFm9CY14vJpsDr9lOsn0kGRLZ0,4009
|
|
40
41
|
rclone_api/s3/basic_ops.py,sha256=hK3366xhVEzEcjz9Gk_8lFx6MRceAk72cax6mUrr6ko,2104
|
41
42
|
rclone_api/s3/chunk_task.py,sha256=waEYe-iYQ1_BR3NCS4BrzVrK9UANvH1EcbXx2I6Z_NM,6839
|
42
43
|
rclone_api/s3/create.py,sha256=wgfkapv_j904CfKuWyiBIWJVxfAx_ftemFSUV14aT68,3149
|
43
|
-
rclone_api/s3/
|
44
|
-
rclone_api/s3/types.py,sha256=
|
44
|
+
rclone_api/s3/s3_multipart_uploader_by_copy.py,sha256=kXKAqpxi6WuyTjJ4BsBQ0wH3Px8aJEc8CH9Ugcni82A,12944
|
45
|
+
rclone_api/s3/types.py,sha256=cYI5MbXRNdT-ps5kGIRQaYrseHyx_ozT4AcwBABTKwk,1616
|
45
46
|
rclone_api/s3/upload_file_multipart.py,sha256=V7syKjFyVIe4U9Ahl5XgqVTzt9akiew3MFjGmufLo2w,12503
|
46
47
|
rclone_api/s3/multipart/file_info.py,sha256=8v_07_eADo0K-Nsv7F0Ac1wcv3lkIsrR3MaRCmkYLTQ,105
|
47
|
-
rclone_api/s3/multipart/finished_piece.py,sha256=
|
48
|
+
rclone_api/s3/multipart/finished_piece.py,sha256=9nMWnVZ8S99wi2VFQsm1h1ZHqmebkhMGgd2s56wNj9w,1331
|
48
49
|
rclone_api/s3/multipart/upload_info.py,sha256=d6_OfzFR_vtDzCEegFfzCfWi2kUBUV4aXZzqAEVp1c4,1874
|
49
50
|
rclone_api/s3/multipart/upload_state.py,sha256=f-Aq2NqtAaMUMhYitlICSNIxCKurWAl2gDEUVizLIqw,6019
|
50
|
-
rclone_api-1.4.
|
51
|
-
rclone_api-1.4.
|
52
|
-
rclone_api-1.4.
|
53
|
-
rclone_api-1.4.
|
54
|
-
rclone_api-1.4.
|
55
|
-
rclone_api-1.4.
|
51
|
+
rclone_api-1.4.8.dist-info/LICENSE,sha256=b6pOoifSXiUaz_lDS84vWlG3fr4yUKwB8fzkrH9R8bQ,1064
|
52
|
+
rclone_api-1.4.8.dist-info/METADATA,sha256=GfvhP_JxvsCbW0r6NVyiEHzkac5QhkVjasG7vyP8IMo,4627
|
53
|
+
rclone_api-1.4.8.dist-info/WHEEL,sha256=rF4EZyR2XVS6irmOHQIJx2SUqXLZKRMUrjsg8UwN-XQ,109
|
54
|
+
rclone_api-1.4.8.dist-info/entry_points.txt,sha256=fJteOlYVwgX3UbNuL9jJ0zUTuX2O79JFAeNgK7Sw7EQ,255
|
55
|
+
rclone_api-1.4.8.dist-info/top_level.txt,sha256=EvZ7uuruUpe9RiUyEp25d1Keq7PWYNT0O_-mr8FCG5g,11
|
56
|
+
rclone_api-1.4.8.dist-info/RECORD,,
|
@@ -1,138 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_part_copy.html
|
3
|
-
* client.upload_part_copy
|
4
|
-
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
# import _thread
|
9
|
-
# import os
|
10
|
-
# import traceback
|
11
|
-
# import warnings
|
12
|
-
# from concurrent.futures import Future, ThreadPoolExecutor
|
13
|
-
# from pathlib import Path
|
14
|
-
# from queue import Queue
|
15
|
-
# from threading import Event, Thread
|
16
|
-
# from typing import Any, Callable
|
17
|
-
|
18
|
-
# from botocore.client import BaseClient
|
19
|
-
|
20
|
-
# from rclone_api.mount_read_chunker import FilePart
|
21
|
-
# from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
|
22
|
-
# from rclone_api.s3.chunk_types import (
|
23
|
-
# FinishedPiece,
|
24
|
-
# UploadInfo,
|
25
|
-
# UploadState,
|
26
|
-
# )
|
27
|
-
# from rclone_api.s3.types import MultiUploadResult
|
28
|
-
# from rclone_api.types import EndOfStream
|
29
|
-
# from rclone_api.util import locked_print
|
30
|
-
|
31
|
-
|
32
|
-
# This is how you upload large parts through multi part upload, then the final call
|
33
|
-
# is to assemble the parts that have already been uploaded through a multi part uploader
|
34
|
-
# and then call complete_multipart_upload to finish the upload
|
35
|
-
# response = (
|
36
|
-
# client.upload_part_copy(
|
37
|
-
# Bucket='string',
|
38
|
-
# CopySource='string' or {'Bucket': 'string', 'Key': 'string', 'VersionId': 'string'},
|
39
|
-
# CopySourceIfMatch='string',
|
40
|
-
# CopySourceIfModifiedSince=datetime(2015, 1, 1),
|
41
|
-
# CopySourceIfNoneMatch='string',
|
42
|
-
# CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
|
43
|
-
# CopySourceRange='string',
|
44
|
-
# Key='string',
|
45
|
-
# PartNumber=123,
|
46
|
-
# UploadId='string',
|
47
|
-
# SSECustomerAlgorithm='string',
|
48
|
-
# SSECustomerKey='string',
|
49
|
-
# CopySourceSSECustomerAlgorithm='string',
|
50
|
-
# CopySourceSSECustomerKey='string',
|
51
|
-
# RequestPayer='requester',
|
52
|
-
# ExpectedBucketOwner='string',
|
53
|
-
# ExpectedSourceBucketOwner='string'
|
54
|
-
# )
|
55
|
-
|
56
|
-
|
57
|
-
# def upload_task(
|
58
|
-
# info: UploadInfo,
|
59
|
-
# chunk: FilePart,
|
60
|
-
# part_number: int,
|
61
|
-
# retries: int,
|
62
|
-
# ) -> FinishedPiece:
|
63
|
-
# file_or_err: Path | Exception = chunk.get_file()
|
64
|
-
# if isinstance(file_or_err, Exception):
|
65
|
-
# raise file_or_err
|
66
|
-
# file: Path = file_or_err
|
67
|
-
# size = os.path.getsize(file)
|
68
|
-
# retries = retries + 1 # Add one for the initial attempt
|
69
|
-
# for retry in range(retries):
|
70
|
-
# try:
|
71
|
-
# if retry > 0:
|
72
|
-
# locked_print(f"Retrying part {part_number} for {info.src_file_path}")
|
73
|
-
# locked_print(
|
74
|
-
# f"Uploading part {part_number} for {info.src_file_path} of size {size}"
|
75
|
-
# )
|
76
|
-
|
77
|
-
# with open(file, "rb") as f:
|
78
|
-
# part = info.s3_client.upload_part(
|
79
|
-
# Bucket=info.bucket_name,
|
80
|
-
# Key=info.object_name,
|
81
|
-
# PartNumber=part_number,
|
82
|
-
# UploadId=info.upload_id,
|
83
|
-
# Body=f,
|
84
|
-
# )
|
85
|
-
# out: FinishedPiece = FinishedPiece(
|
86
|
-
# etag=part["ETag"], part_number=part_number
|
87
|
-
# )
|
88
|
-
# chunk.dispose()
|
89
|
-
# return out
|
90
|
-
# except Exception as e:
|
91
|
-
# if retry == retries - 1:
|
92
|
-
# locked_print(f"Error uploading part {part_number}: {e}")
|
93
|
-
# chunk.dispose()
|
94
|
-
# raise e
|
95
|
-
# else:
|
96
|
-
# locked_print(f"Error uploading part {part_number}: {e}, retrying")
|
97
|
-
# continue
|
98
|
-
# raise Exception("Should not reach here")
|
99
|
-
|
100
|
-
|
101
|
-
# def prepare_upload_file_multipart(
|
102
|
-
# s3_client: BaseClient,
|
103
|
-
# bucket_name: str,
|
104
|
-
# file_path: Path,
|
105
|
-
# file_size: int | None,
|
106
|
-
# object_name: str,
|
107
|
-
# chunk_size: int,
|
108
|
-
# retries: int,
|
109
|
-
# ) -> UploadInfo:
|
110
|
-
# """Upload a file to the bucket using multipart upload with customizable chunk size."""
|
111
|
-
|
112
|
-
# # Initiate multipart upload
|
113
|
-
# locked_print(
|
114
|
-
# f"Creating multipart upload for {file_path} to {bucket_name}/{object_name}"
|
115
|
-
# )
|
116
|
-
# mpu = s3_client.create_multipart_upload(Bucket=bucket_name, Key=object_name)
|
117
|
-
# upload_id = mpu["UploadId"]
|
118
|
-
|
119
|
-
# file_size = file_size if file_size is not None else os.path.getsize(file_path)
|
120
|
-
|
121
|
-
# upload_info: UploadInfo = UploadInfo(
|
122
|
-
# s3_client=s3_client,
|
123
|
-
# bucket_name=bucket_name,
|
124
|
-
# object_name=object_name,
|
125
|
-
# src_file_path=file_path,
|
126
|
-
# upload_id=upload_id,
|
127
|
-
# retries=retries,
|
128
|
-
# chunk_size=chunk_size,
|
129
|
-
# file_size=file_size,
|
130
|
-
# )
|
131
|
-
# return upload_info
|
132
|
-
|
133
|
-
# class S3MultiPartUploader:
|
134
|
-
# def __init__(self, s3_client: BaseClient, verbose: bool) -> None:
|
135
|
-
# self.s3_client = s3_client
|
136
|
-
# self.verbose = verbose
|
137
|
-
|
138
|
-
# def prepare(self) -> UploadInfo:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|