rclone-api 1.4.18__py2.py3-none-any.whl → 1.4.20__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,17 @@
1
1
  import argparse
2
- import os
3
2
  from dataclasses import dataclass
4
3
  from pathlib import Path
5
4
 
6
5
  from rclone_api import Rclone
7
- from rclone_api.detail.copy_file_parts import InfoJson
8
6
  from rclone_api.s3.s3_multipart_uploader_by_copy import (
9
- Part,
10
- S3MultiPartUploader,
7
+ s3_server_side_multi_part_merge,
11
8
  )
12
9
 
13
- _TIMEOUT_READ = 900
14
- _TIMEOUT_CONNECTION = 900
15
- _MAX_WORKERS = 10
16
-
17
10
 
18
11
  @dataclass
19
12
  class Args:
20
13
  config_path: Path
21
14
  src: str # like dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst-parts/ (info.json will be located here)
22
- dst: str # like dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst
23
15
  verbose: bool
24
16
 
25
17
 
@@ -32,8 +24,7 @@ def list_files(rclone: Rclone, path: str):
32
24
 
33
25
  def _parse_args() -> Args:
34
26
  parser = argparse.ArgumentParser(description="List files in a remote path.")
35
- parser.add_argument("src", help="File to copy")
36
- parser.add_argument("dst", help="Destination file")
27
+ parser.add_argument("src", help="Directory that holds the info.json file")
37
28
  parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true")
38
29
  parser.add_argument(
39
30
  "--config", help="Path to rclone config file", type=Path, required=False
@@ -48,166 +39,26 @@ def _parse_args() -> Args:
48
39
  out = Args(
49
40
  config_path=config,
50
41
  src=args.src,
51
- dst=args.dst,
52
42
  verbose=args.verbose,
53
43
  )
54
44
  return out
55
45
 
56
46
 
57
- # def finish_multipart_upload_from_keys(
58
- # s3_client: BaseClient,
59
- # source_bucket: str,
60
- # parts: list[Part],
61
- # destination_bucket: str,
62
- # destination_key: str,
63
- # chunk_size: int, # 5MB default
64
- # max_workers: int = 100,
65
- # retries: int = 3,
66
- # ) -> str | Exception:
67
- # """
68
- # Finish a multipart upload by copying parts from existing S3 objects.
69
-
70
- # Args:
71
- # s3_client: Boto3 S3 client
72
- # source_bucket: Source bucket name
73
- # source_keys: List of source object keys to copy from
74
- # destination_bucket: Destination bucket name
75
- # destination_key: Destination object key
76
- # chunk_size: Size of each part in bytes
77
- # retries: Number of retry attempts
78
- # byte_ranges: Optional list of byte ranges corresponding to source_keys
79
-
80
- # Returns:
81
- # The URL of the completed object
82
- # """
83
-
84
- # # Create upload info
85
- # info = begin_upload(
86
- # s3_client=s3_client,
87
- # parts=parts,
88
- # destination_bucket=destination_bucket,
89
- # destination_key=destination_key,
90
- # chunk_size=chunk_size,
91
- # retries=retries,
92
- # )
93
-
94
- # out = do_body_work(
95
- # info=info,
96
- # source_bucket=source_bucket,
97
- # parts=parts,
98
- # max_workers=max_workers,
99
- # retries=retries,
100
- # )
101
-
102
- # return out
103
-
104
-
105
- def do_finish_part(rclone: Rclone, info: InfoJson, dst: str) -> Exception | None:
106
- from rclone_api.s3.create import (
107
- BaseClient,
108
- S3Config,
109
- S3Credentials,
110
- create_s3_client,
111
- )
112
-
113
- s3_config = S3Config(
114
- verbose=False,
115
- timeout_read=_TIMEOUT_READ,
116
- timeout_connection=_TIMEOUT_CONNECTION,
117
- )
118
-
119
- s3_creds: S3Credentials = rclone.impl.get_s3_credentials(remote=dst)
120
- s3_client: BaseClient = create_s3_client(s3_creds=s3_creds, s3_config=s3_config)
121
- s3_bucket = s3_creds.bucket_name
122
- is_done = info.fetch_is_done()
123
- size = info.size
124
- assert is_done, f"Upload is not done: {info}"
125
-
126
- parts_dir = info.parts_dir
127
- if parts_dir.endswith("/"):
128
- parts_dir = parts_dir[:-1]
129
- source_keys = info.fetch_all_finished()
130
- # print(parts_dir)
131
- # print(source_keys)
132
-
133
- parts_path = parts_dir.split(s3_bucket)[1]
134
- if parts_path.startswith("/"):
135
- parts_path = parts_path[1:]
136
-
137
- first_part: int | None = info.first_part
138
- last_part: int | None = info.last_part
139
-
140
- assert first_part is not None
141
- assert last_part is not None
142
-
143
- def _to_s3_key(name: str | None) -> str:
144
- if name:
145
- out = f"{parts_path}/{name}"
146
- return out
147
- out = f"{parts_path}"
148
- return out
149
-
150
- parts: list[Part] = []
151
- part_num = 1
152
- for part_key in source_keys:
153
- s3_key = _to_s3_key(name=part_key)
154
- part = Part(part_number=part_num, s3_key=s3_key)
155
- parts.append(part)
156
- part_num += 1
157
-
158
- chunksize = info.chunksize
159
- assert chunksize is not None
160
-
161
- dst_name = info.dst_name
162
- dst_dir = os.path.dirname(parts_path)
163
- # dst_key =
164
- dst_key = f"{dst_dir}/{dst_name}"
165
-
166
- uploader: S3MultiPartUploader = S3MultiPartUploader(
167
- s3_client=s3_client,
168
- verbose=True,
169
- )
170
-
171
- from rclone_api.s3.s3_multipart_uploader_by_copy import MultipartUploadInfo
172
-
173
- state: MultipartUploadInfo = uploader.begin_new_upload(
174
- parts=parts,
175
- destination_bucket=s3_creds.bucket_name,
176
- destination_key=dst_key,
177
- chunk_size=chunksize.as_int(),
178
- )
179
-
180
- uploader.start_upload(info=state, parts=parts, max_workers=_MAX_WORKERS)
181
-
182
- # now check if the dst now exists, if so, delete the parts folder.
183
- # if rclone.exists(dst):
184
- # rclone.purge(parts_dir)
185
-
186
- if not rclone.exists(dst):
187
- return FileNotFoundError(f"Destination file not found: {dst}")
188
-
189
- write_size = rclone.size_file(dst)
190
- if write_size != size:
191
- return ValueError(f"Size mismatch: {write_size} != {size}")
192
-
193
- print(f"Upload complete: {dst}")
194
- rclone.purge(parts_dir)
195
- return None
47
+ def _get_info_path(src: str) -> str:
48
+ if src.endswith("/"):
49
+ src = src[:-1]
50
+ info_path = f"{src}/info.json"
51
+ return info_path
196
52
 
197
53
 
198
54
  def main() -> int:
199
55
  """Main entry point."""
200
56
  args = _parse_args()
201
57
  rclone = Rclone(rclone_conf=args.config_path)
202
- info_json = f"{args.src}/info.json".replace("//", "/")
203
- info = InfoJson(rclone.impl, src=None, src_info=info_json)
204
- loaded = info.load()
205
- if not loaded:
206
- raise FileNotFoundError(
207
- f"Info file not found, has the upload finished? {info_json}"
208
- )
209
- print(info)
210
- do_finish_part(rclone=rclone, info=info, dst=args.dst)
58
+ info_path = _get_info_path(src=args.src)
59
+ s3_server_side_multi_part_merge(
60
+ rclone=rclone.impl, info_path=info_path, max_workers=5
61
+ )
211
62
  return 0
212
63
 
213
64
 
@@ -219,7 +70,4 @@ if __name__ == "__main__":
219
70
  sys.argv.append(
220
71
  "dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst-parts/"
221
72
  )
222
- sys.argv.append(
223
- "dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst"
224
- )
225
73
  main()
@@ -209,7 +209,10 @@ class InfoJson:
209
209
 
210
210
  @property
211
211
  def parts_dir(self) -> str:
212
- return os.path.dirname(self.src_info)
212
+ parts_dir = os.path.dirname(self.src_info)
213
+ if parts_dir.endswith("/"):
214
+ parts_dir = parts_dir[:-1]
215
+ return parts_dir
213
216
 
214
217
  @property
215
218
  def dst(self) -> str:
@@ -8,8 +8,9 @@ from existing S3 objects using upload_part_copy.
8
8
 
9
9
  import json
10
10
  from dataclasses import dataclass
11
- from typing import Any, Callable
11
+ from typing import Any
12
12
 
13
+ from rclone_api.rclone_impl import RcloneImpl
13
14
  from rclone_api.s3.multipart.finished_piece import FinishedPiece
14
15
 
15
16
 
@@ -46,34 +47,59 @@ class Part:
46
47
 
47
48
  class MergeState:
48
49
 
49
- def __init__(self, finished: list[FinishedPiece], all_parts: list[Part]) -> None:
50
+ def __init__(
51
+ self,
52
+ rclone_impl: RcloneImpl,
53
+ merge_path: str,
54
+ upload_id: str,
55
+ bucket: str,
56
+ dst_key: str,
57
+ finished: list[FinishedPiece],
58
+ all_parts: list[Part],
59
+ ) -> None:
60
+ self.rclone_impl: RcloneImpl = rclone_impl
61
+ self.merge_path: str = merge_path
62
+ self.merge_parts_path: str = f"{merge_path}/merge" # future use?
63
+ self.upload_id: str = upload_id
64
+ self.bucket: str = bucket
65
+ self.dst_key: str = dst_key
50
66
  self.finished: list[FinishedPiece] = list(finished)
51
67
  self.all_parts: list[Part] = list(all_parts)
52
- self.callbacks: list[Callable[[FinishedPiece], None]] = []
53
-
54
- def add_callback(self, callback: Callable[[FinishedPiece], None]) -> None:
55
- self.callbacks.append(callback)
56
68
 
57
69
  def on_finished(self, finished_piece: FinishedPiece) -> None:
58
- for callback in list(self.callbacks):
59
- callback(finished_piece)
70
+ self.finished.append(finished_piece)
71
+
72
+ def remaining_parts(self) -> list[Part]:
73
+ finished_parts: set[int] = set([p.part_number for p in self.finished])
74
+ remaining = [p for p in self.all_parts if p.part_number not in finished_parts]
75
+ return remaining
60
76
 
61
77
  @staticmethod
62
- def from_json_array(json_array: dict) -> "MergeState | Exception":
78
+ def from_json(rclone_impl: RcloneImpl, json: dict) -> "MergeState | Exception":
63
79
  try:
80
+ merge_path = json["merge_path"]
81
+ bucket = json["bucket"]
82
+ dst_key = json["dst_key"]
64
83
  finished: list[FinishedPiece] = FinishedPiece.from_json_array(
65
- json_array["finished"]
84
+ json["finished"]
66
85
  )
67
- all_parts: list[Part | Exception] = [
68
- Part.from_json(j) for j in json_array["all"]
69
- ]
86
+ all_parts: list[Part | Exception] = [Part.from_json(j) for j in json["all"]]
70
87
  all_parts_no_err: list[Part] = [
71
88
  p for p in all_parts if not isinstance(p, Exception)
72
89
  ]
90
+ upload_id: str = json["upload_id"]
73
91
  errs: list[Exception] = [p for p in all_parts if isinstance(p, Exception)]
74
92
  if len(errs):
75
93
  return Exception(f"Errors in parts: {errs}")
76
- return MergeState(finished=finished, all_parts=all_parts_no_err)
94
+ return MergeState(
95
+ rclone_impl=rclone_impl,
96
+ merge_path=merge_path,
97
+ upload_id=upload_id,
98
+ bucket=bucket,
99
+ dst_key=dst_key,
100
+ finished=finished,
101
+ all_parts=all_parts_no_err,
102
+ )
77
103
  except Exception as e:
78
104
  return e
79
105
 
@@ -81,12 +107,18 @@ class MergeState:
81
107
  finished = self.finished.copy()
82
108
  all_parts = self.all_parts.copy()
83
109
  return {
110
+ "merge_path": self.merge_path,
111
+ "bucket": self.bucket,
112
+ "dst_key": self.dst_key,
113
+ "upload_id": self.upload_id,
84
114
  "finished": FinishedPiece.to_json_array(finished),
85
115
  "all": [part.to_json() for part in all_parts],
86
116
  }
87
117
 
88
118
  def to_json_str(self) -> str:
89
- return json.dumps(self.to_json(), indent=1)
119
+ data = self.to_json()
120
+ out = json.dumps(data, indent=2)
121
+ return out
90
122
 
91
123
  def __str__(self):
92
124
  return self.to_json_str()
@@ -1,4 +1,3 @@
1
- import json
2
1
  import warnings
3
2
  from dataclasses import dataclass
4
3
 
@@ -13,9 +12,6 @@ class FinishedPiece:
13
12
  def to_json(self) -> dict:
14
13
  return {"part_number": self.part_number, "etag": self.etag}
15
14
 
16
- def to_json_str(self) -> str:
17
- return json.dumps(self.to_json(), indent=0)
18
-
19
15
  @staticmethod
20
16
  def to_json_array(
21
17
  parts: list["FinishedPiece | EndOfStream"] | list["FinishedPiece"],
@@ -6,33 +6,36 @@ This module provides functionality for S3 multipart uploads, including copying p
6
6
  from existing S3 objects using upload_part_copy.
7
7
  """
8
8
 
9
+ import json
10
+ import os
11
+ import time
12
+ import warnings
9
13
  from concurrent.futures import Future, ThreadPoolExecutor
10
- from dataclasses import dataclass
11
- from pathlib import Path
12
- from threading import Semaphore
13
- from typing import Optional
14
-
15
- from botocore.client import BaseClient
16
-
14
+ from queue import Queue
15
+ from threading import Semaphore, Thread
16
+ from typing import Callable
17
+
18
+ from rclone_api.detail.copy_file_parts import InfoJson
19
+ from rclone_api.rclone_impl import RcloneImpl
20
+ from rclone_api.s3.create import (
21
+ BaseClient,
22
+ S3Config,
23
+ create_s3_client,
24
+ )
17
25
  from rclone_api.s3.merge_state import MergeState, Part
18
26
  from rclone_api.s3.multipart.finished_piece import FinishedPiece
27
+ from rclone_api.types import EndOfStream
19
28
  from rclone_api.util import locked_print
20
29
 
30
+ DEFAULT_MAX_WORKERS = 5 # Backblaze can do 10 with exponential backoff, so let's try 5
21
31
 
22
- @dataclass
23
- class MultipartUploadInfo:
24
- """Simplified upload information for multipart uploads."""
25
-
26
- s3_client: BaseClient
27
- bucket_name: str
28
- object_name: str
29
- upload_id: str
30
- chunk_size: int
31
- src_file_path: Optional[Path] = None
32
+ _TIMEOUT_READ = 900
33
+ _TIMEOUT_CONNECTION = 900
32
34
 
33
35
 
34
- def upload_part_copy_task(
35
- info: MultipartUploadInfo,
36
+ def _upload_part_copy_task(
37
+ s3_client: BaseClient,
38
+ state: MergeState,
36
39
  source_bucket: str,
37
40
  source_key: str,
38
41
  part_number: int,
@@ -60,32 +63,34 @@ def upload_part_copy_task(
60
63
  params: dict = {}
61
64
  try:
62
65
  if retry > 0:
63
- locked_print(f"Retrying part copy {part_number} for {info.object_name}")
66
+ locked_print(f"Retrying part copy {part_number} for {state.dst_key}")
64
67
 
65
68
  locked_print(
66
- f"Copying part {part_number} for {info.object_name} from {source_bucket}/{source_key}"
69
+ f"Copying part {part_number} for {state.dst_key} from {source_bucket}/{source_key}"
67
70
  )
68
71
 
69
72
  # Prepare the upload_part_copy parameters
70
73
  params = {
71
- "Bucket": info.bucket_name,
74
+ "Bucket": state.bucket,
72
75
  "CopySource": copy_source,
73
- "Key": info.object_name,
76
+ "Key": state.dst_key,
74
77
  "PartNumber": part_number,
75
- "UploadId": info.upload_id,
78
+ "UploadId": state.upload_id,
76
79
  }
77
80
 
78
81
  # Execute the copy operation
79
- part = info.s3_client.upload_part_copy(**params)
82
+ part = s3_client.upload_part_copy(**params)
80
83
 
81
84
  # Extract ETag from the response
82
85
  etag = part["CopyPartResult"]["ETag"]
83
86
  out = FinishedPiece(etag=etag, part_number=part_number)
84
- locked_print(f"Finished part {part_number} for {info.object_name}")
87
+ locked_print(f"Finished part {part_number} for {state.dst_key}")
85
88
  return out
86
89
 
87
90
  except Exception as e:
88
- msg = f"Error copying {copy_source} -> {info.object_name}: {e}, params={params}"
91
+ msg = (
92
+ f"Error copying {copy_source} -> {state.dst_key}: {e}, params={params}"
93
+ )
89
94
  if "An error occurred (InternalError)" in str(e):
90
95
  locked_print(msg)
91
96
  elif "NoSuchKey" in str(e):
@@ -103,8 +108,8 @@ def upload_part_copy_task(
103
108
  return Exception("Should not reach here")
104
109
 
105
110
 
106
- def complete_multipart_upload_from_parts(
107
- info: MultipartUploadInfo, parts: list[FinishedPiece]
111
+ def _complete_multipart_upload_from_parts(
112
+ s3_client: BaseClient, state: MergeState, finished_parts: list[FinishedPiece]
108
113
  ) -> str:
109
114
  """
110
115
  Complete a multipart upload using the provided parts.
@@ -117,61 +122,61 @@ def complete_multipart_upload_from_parts(
117
122
  The URL of the completed object
118
123
  """
119
124
  # Sort parts by part number to ensure correct order
120
- parts.sort(key=lambda x: x.part_number)
121
-
122
- # Prepare the parts list for the complete_multipart_upload call
123
- multipart_parts = [
124
- {"ETag": part.etag, "PartNumber": part.part_number} for part in parts
125
- ]
125
+ finished_parts.sort(key=lambda x: x.part_number)
126
+ multipart_parts = FinishedPiece.to_json_array(finished_parts)
126
127
 
127
128
  # Complete the multipart upload
128
- response = info.s3_client.complete_multipart_upload(
129
- Bucket=info.bucket_name,
130
- Key=info.object_name,
131
- UploadId=info.upload_id,
129
+ response = s3_client.complete_multipart_upload(
130
+ Bucket=state.bucket,
131
+ Key=state.dst_key,
132
+ UploadId=state.upload_id,
132
133
  MultipartUpload={"Parts": multipart_parts},
133
134
  )
134
135
 
135
136
  # Return the URL of the completed object
136
- return response.get("Location", f"s3://{info.bucket_name}/{info.object_name}")
137
+ return response.get("Location", f"s3://{state.bucket}/{state.dst_key}")
137
138
 
138
139
 
139
- def do_body_work(
140
- info: MultipartUploadInfo,
141
- source_bucket: str,
140
+ def _do_upload_task(
141
+ s3_client: BaseClient,
142
142
  max_workers: int,
143
143
  merge_state: MergeState,
144
- ) -> str | Exception:
145
-
144
+ on_finished: Callable[[FinishedPiece | EndOfStream], None],
145
+ ) -> Exception | None:
146
146
  futures: list[Future[FinishedPiece | Exception]] = []
147
- parts = list(merge_state.all_parts)
148
-
147
+ parts = merge_state.remaining_parts()
148
+ source_bucket = merge_state.bucket
149
149
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
150
150
  semaphore = Semaphore(max_workers)
151
151
  for part in parts:
152
152
  part_number, s3_key = part.part_number, part.s3_key
153
153
 
154
154
  def task(
155
- info=info,
155
+ s3_client=s3_client,
156
+ state=merge_state,
156
157
  source_bucket=source_bucket,
157
158
  s3_key=s3_key,
158
159
  part_number=part_number,
159
160
  ):
160
- out = upload_part_copy_task(
161
- info=info,
161
+ out = _upload_part_copy_task(
162
+ s3_client=s3_client,
163
+ state=state,
162
164
  source_bucket=source_bucket,
163
165
  source_key=s3_key,
164
166
  part_number=part_number,
165
167
  )
166
168
  if isinstance(out, Exception):
167
169
  return out
168
- merge_state.on_finished(out)
170
+ # merge_state.on_finished(out)
171
+ on_finished(out)
169
172
  return out
170
173
 
171
174
  fut = executor.submit(task)
172
175
  fut.add_done_callback(lambda x: semaphore.release())
173
176
  futures.append(fut)
174
- semaphore.acquire()
177
+
178
+ while not semaphore.acquire(blocking=False):
179
+ time.sleep(0.1)
175
180
 
176
181
  # Upload parts by copying from source objects
177
182
  finished_parts: list[FinishedPiece] = []
@@ -183,17 +188,26 @@ def do_body_work(
183
188
  return finished_part
184
189
  finished_parts.append(finished_part)
185
190
 
186
- # Complete the multipart upload
187
- return complete_multipart_upload_from_parts(info, finished_parts)
191
+ on_finished(EndOfStream())
188
192
 
193
+ try:
194
+ # Complete the multipart upload
195
+ _complete_multipart_upload_from_parts(
196
+ s3_client=s3_client, state=merge_state, finished_parts=finished_parts
197
+ )
198
+ except Exception as e:
199
+ warnings.warn(f"Error completing multipart upload: {e}")
200
+ return e
201
+ return None
189
202
 
190
- def begin_upload(
203
+
204
+ def _begin_upload(
191
205
  s3_client: BaseClient,
192
206
  parts: list[Part],
193
- destination_bucket: str,
194
- destination_key: str,
195
- chunk_size: int,
196
- ) -> MultipartUploadInfo:
207
+ bucket: str,
208
+ dst_key: str,
209
+ verbose: bool,
210
+ ) -> str:
197
211
  """
198
212
  Finish a multipart upload by copying parts from existing S3 objects.
199
213
 
@@ -201,127 +215,310 @@ def begin_upload(
201
215
  s3_client: Boto3 S3 client
202
216
  source_bucket: Source bucket name
203
217
  source_keys: List of source object keys to copy from
204
- destination_bucket: Destination bucket name
205
- destination_key: Destination object key
206
- chunk_size: Size of each part in bytes
218
+ bucket: Destination bucket name
219
+ dst_key: Destination object key
207
220
  retries: Number of retry attempts
208
221
  byte_ranges: Optional list of byte ranges corresponding to source_keys
209
222
 
210
223
  Returns:
211
- The URL of the completed object
224
+ The upload id of the multipart upload
212
225
  """
213
226
 
214
227
  # Initiate multipart upload
215
- locked_print(
216
- f"Creating multipart upload for {destination_bucket}/{destination_key} from {len(parts)} source objects"
217
- )
228
+ if verbose:
229
+ locked_print(
230
+ f"Creating multipart upload for {bucket}/{dst_key} from {len(parts)} source objects"
231
+ )
218
232
  create_params: dict[str, str] = {
219
- "Bucket": destination_bucket,
220
- "Key": destination_key,
233
+ "Bucket": bucket,
234
+ "Key": dst_key,
221
235
  }
222
- print(f"Creating multipart upload with {create_params}")
236
+ if verbose:
237
+ locked_print(f"Creating multipart upload with {create_params}")
223
238
  mpu = s3_client.create_multipart_upload(**create_params)
224
- print(f"Created multipart upload: {mpu}")
239
+ if verbose:
240
+ locked_print(f"Created multipart upload: {mpu}")
225
241
  upload_id = mpu["UploadId"]
242
+ return upload_id
243
+
244
+
245
+ class WriteMergeStateThread(Thread):
246
+ def __init__(self, rclone_impl: RcloneImpl, merge_state: MergeState):
247
+ super().__init__(daemon=True)
248
+ assert isinstance(merge_state, MergeState)
249
+ self.merge_state = merge_state
250
+ self.merge_path = merge_state.merge_path
251
+ self.rclone_impl = rclone_impl
252
+ self.queue: Queue[FinishedPiece | EndOfStream] = Queue()
253
+ self.start()
254
+
255
+ def _get_next(self) -> FinishedPiece | EndOfStream:
256
+ item = self.queue.get()
257
+ if isinstance(item, EndOfStream):
258
+ return item
259
+ # see if there are more items in the queue, only write the last one
260
+ while not self.queue.empty():
261
+ item = self.queue.get()
262
+ if isinstance(item, EndOfStream):
263
+ # put it back in for next time
264
+ self.queue.put(item)
265
+ return item
266
+ return item
267
+
268
+ def run(self):
269
+ while True:
270
+ item = self._get_next()
271
+ if isinstance(item, EndOfStream):
272
+ warnings.warn("End of stream")
273
+ break
274
+
275
+ assert isinstance(item, FinishedPiece)
276
+ # piece: FinishedPiece = item
277
+ # at this point just write out the whole json str
278
+ json_str = self.merge_state.to_json_str()
279
+ err = self.rclone_impl.write_text(self.merge_path, json_str)
280
+ if isinstance(err, Exception):
281
+ warnings.warn(f"Error writing merge state: {err}")
282
+ break
283
+
284
+ def add_finished(self, finished: FinishedPiece) -> None:
285
+ self.queue.put(finished)
286
+
287
+ def add_eos(self) -> None:
288
+ self.queue.put(EndOfStream())
289
+
290
+
291
+ def _cleanup_merge(rclone: RcloneImpl, info: InfoJson) -> Exception | None:
292
+ size = info.size
293
+ dst = info.dst
294
+ parts_dir = info.parts_dir
295
+ if not rclone.exists(dst):
296
+ return FileNotFoundError(f"Destination file not found: {dst}")
297
+
298
+ write_size = rclone.size_file(dst)
299
+ if write_size != size:
300
+ return ValueError(f"Size mismatch: {write_size} != {size}")
301
+
302
+ print(f"Upload complete: {dst}")
303
+ cp = rclone.purge(parts_dir)
304
+ if cp.failed():
305
+ return Exception(f"Failed to purge parts dir: {cp}")
306
+ return None
307
+
308
+
309
+ def _get_merge_path(info_path: str) -> str:
310
+ par_dir = os.path.dirname(info_path)
311
+ merge_path = f"{par_dir}/merge.json"
312
+ return merge_path
313
+
314
+
315
+ def _begin_or_resume_merge(
316
+ rclone: RcloneImpl,
317
+ info: InfoJson,
318
+ verbose: bool = False,
319
+ max_workers: int = DEFAULT_MAX_WORKERS,
320
+ ) -> "S3MultiPartMerger | Exception":
321
+ try:
322
+ merger: S3MultiPartMerger = S3MultiPartMerger(
323
+ rclone_impl=rclone,
324
+ info=info,
325
+ verbose=verbose,
326
+ max_workers=max_workers,
327
+ )
226
328
 
227
- # Create upload info
228
- info = MultipartUploadInfo(
229
- s3_client=s3_client,
230
- bucket_name=destination_bucket,
231
- object_name=destination_key,
232
- upload_id=upload_id,
233
- chunk_size=chunk_size,
234
- )
235
- return info
236
-
237
-
238
- def finish_multipart_upload_from_keys(
239
- s3_client: BaseClient,
240
- source_bucket: str,
241
- parts: list[Part],
242
- destination_bucket: str,
243
- destination_key: str,
244
- chunk_size: int, # 5MB default
245
- max_workers: int = 100,
246
- ) -> str | Exception:
247
- """
248
- Finish a multipart upload by copying parts from existing S3 objects.
249
-
250
- Args:
251
- s3_client: Boto3 S3 client
252
- source_bucket: Source bucket name
253
- source_keys: List of source object keys to copy from
254
- destination_bucket: Destination bucket name
255
- destination_key: Destination object key
256
- chunk_size: Size of each part in bytes
257
- retries: Number of retry attempts
258
- byte_ranges: Optional list of byte ranges corresponding to source_keys
259
-
260
- Returns:
261
- The URL of the completed object
262
- """
263
-
264
- merge_state = MergeState(finished=[], all_parts=parts)
265
-
266
- # Create upload info
267
- info = begin_upload(
268
- s3_client=s3_client,
269
- parts=merge_state.all_parts,
270
- destination_bucket=destination_bucket,
271
- destination_key=destination_key,
272
- chunk_size=chunk_size,
273
- )
274
-
275
- out = do_body_work(
276
- info=info,
277
- source_bucket=source_bucket,
278
- max_workers=max_workers,
279
- merge_state=merge_state,
280
- )
329
+ s3_bucket = merger.bucket
330
+ is_done = info.fetch_is_done()
331
+ assert is_done, f"Upload is not done: {info}"
332
+
333
+ merge_path = _get_merge_path(info_path=info.src_info)
334
+ merge_json_text = rclone.read_text(merge_path)
335
+ if isinstance(merge_json_text, str):
336
+ # Attempt to do a resume
337
+ merge_data = json.loads(merge_json_text)
338
+ merge_state = MergeState.from_json(rclone_impl=rclone, json=merge_data)
339
+ if isinstance(merge_state, MergeState):
340
+ merger._begin_resume_merge(merge_state=merge_state)
341
+ return merger
342
+ warnings.warn(f"Failed to resume merge: {merge_state}, starting new merge")
343
+
344
+ parts_dir = info.parts_dir
345
+ source_keys = info.fetch_all_finished()
346
+
347
+ parts_path = parts_dir.split(s3_bucket)[1]
348
+ if parts_path.startswith("/"):
349
+ parts_path = parts_path[1:]
350
+
351
+ first_part: int | None = info.first_part
352
+ last_part: int | None = info.last_part
353
+
354
+ assert first_part is not None
355
+ assert last_part is not None
356
+
357
+ def _to_s3_key(name: str | None) -> str:
358
+ if name:
359
+ out = f"{parts_path}/{name}"
360
+ return out
361
+ out = f"{parts_path}"
362
+ return out
281
363
 
282
- return out
364
+ parts: list[Part] = []
365
+ part_num = first_part
366
+ for part_key in source_keys:
367
+ assert part_num <= last_part and part_num >= first_part
368
+ s3_key = _to_s3_key(name=part_key)
369
+ part = Part(part_number=part_num, s3_key=s3_key)
370
+ parts.append(part)
371
+ part_num += 1
372
+
373
+ dst_name = info.dst_name
374
+ dst_dir = os.path.dirname(parts_path)
375
+ dst_key = f"{dst_dir}/{dst_name}"
376
+
377
+ err = merger._begin_new_merge(
378
+ merge_path=merge_path,
379
+ parts=parts,
380
+ bucket=merger.bucket,
381
+ dst_key=dst_key,
382
+ )
383
+ if isinstance(err, Exception):
384
+ return err
385
+ return merger
386
+ except Exception as e:
387
+ return e
283
388
 
284
389
 
285
- _DEFAULT_MAX_WORKERS = 10
390
+ class S3MultiPartMerger:
391
+ def __init__(
392
+ self,
393
+ rclone_impl: RcloneImpl,
394
+ info: InfoJson,
395
+ s3_config: S3Config | None = None,
396
+ verbose: bool = False,
397
+ max_workers: int = DEFAULT_MAX_WORKERS,
398
+ ) -> None:
399
+ self.rclone_impl: RcloneImpl = rclone_impl
400
+ self.info = info
401
+ self.s3_creds = rclone_impl.get_s3_credentials(remote=info.dst)
402
+ self.verbose = verbose
403
+ s3_config = s3_config or S3Config(
404
+ verbose=verbose,
405
+ timeout_read=_TIMEOUT_READ,
406
+ timeout_connection=_TIMEOUT_CONNECTION,
407
+ max_pool_connections=max_workers,
408
+ )
409
+ self.max_workers = s3_config.max_pool_connections or DEFAULT_MAX_WORKERS
410
+ self.client = create_s3_client(s3_creds=self.s3_creds, s3_config=s3_config)
411
+ self.state: MergeState | None = None
412
+ self.write_thread: WriteMergeStateThread | None = None
413
+
414
+ @staticmethod
415
+ def create(
416
+ rclone: RcloneImpl, info: InfoJson, max_workers: int, verbose: bool
417
+ ) -> "S3MultiPartMerger | Exception":
418
+ return _begin_or_resume_merge(
419
+ rclone=rclone, info=info, max_workers=max_workers, verbose=verbose
420
+ )
286
421
 
422
+ @property
423
+ def bucket(self) -> str:
424
+ return self.s3_creds.bucket_name
287
425
 
288
- class S3MultiPartUploader:
289
- def __init__(self, s3_client: BaseClient, verbose: bool = False) -> None:
290
- self.verbose = verbose
291
- self.client: BaseClient = s3_client
426
+ def start_write_thread(self) -> None:
427
+ assert self.state is not None
428
+ assert self.write_thread is None
429
+ self.write_thread = WriteMergeStateThread(
430
+ rclone_impl=self.rclone_impl,
431
+ merge_state=self.state,
432
+ )
292
433
 
293
- def begin_new_upload(
434
+ def _begin_new_merge(
294
435
  self,
295
436
  parts: list[Part],
296
- destination_bucket: str,
297
- destination_key: str,
298
- chunk_size: int,
299
- ) -> MultipartUploadInfo:
300
- return begin_upload(
301
- s3_client=self.client,
302
- parts=parts,
303
- destination_bucket=destination_bucket,
304
- destination_key=destination_key,
305
- chunk_size=chunk_size,
306
- )
437
+ merge_path: str,
438
+ bucket: str,
439
+ dst_key: str,
440
+ ) -> Exception | None:
441
+ try:
442
+ upload_id: str = _begin_upload(
443
+ s3_client=self.client,
444
+ parts=parts,
445
+ bucket=bucket,
446
+ dst_key=dst_key,
447
+ verbose=self.verbose,
448
+ )
449
+ merge_state = MergeState(
450
+ rclone_impl=self.rclone_impl,
451
+ merge_path=merge_path,
452
+ upload_id=upload_id,
453
+ bucket=bucket,
454
+ dst_key=dst_key,
455
+ finished=[],
456
+ all_parts=parts,
457
+ )
458
+ self.state = merge_state
459
+ return None
460
+ except Exception as e:
461
+ return e
307
462
 
308
- def start_upload_resume(
463
+ def _begin_resume_merge(
309
464
  self,
310
- info: MultipartUploadInfo,
311
- state: MergeState,
312
- max_workers: int = _DEFAULT_MAX_WORKERS,
313
- ) -> MultipartUploadInfo | Exception:
314
- return Exception("Not implemented")
315
-
316
- def start_upload(
465
+ merge_state: MergeState,
466
+ ) -> None:
467
+ self.state = merge_state
468
+
469
+ def _on_piece_finished(self, finished_piece: FinishedPiece | EndOfStream) -> None:
470
+ assert self.write_thread is not None
471
+ assert self.state is not None
472
+ if isinstance(finished_piece, EndOfStream):
473
+ self.write_thread.add_eos()
474
+ else:
475
+ self.state.on_finished(finished_piece)
476
+ self.write_thread.add_finished(finished_piece)
477
+
478
+ def merge(
317
479
  self,
318
- info: MultipartUploadInfo,
319
- parts: list[Part],
320
- max_workers: int = _DEFAULT_MAX_WORKERS,
321
- ) -> str | Exception:
322
- return do_body_work(
323
- info=info,
324
- source_bucket=info.bucket_name,
325
- max_workers=max_workers,
326
- merge_state=MergeState(finished=[], all_parts=parts),
480
+ ) -> Exception | None:
481
+ state = self.state
482
+ if state is None:
483
+ return Exception("No merge state loaded")
484
+ self.start_write_thread()
485
+ err = _do_upload_task(
486
+ s3_client=self.client,
487
+ merge_state=state,
488
+ max_workers=self.max_workers,
489
+ on_finished=self._on_piece_finished,
327
490
  )
491
+ if isinstance(err, Exception):
492
+ return err
493
+ return None
494
+
495
+ def cleanup(self) -> Exception | None:
496
+ return _cleanup_merge(rclone=self.rclone_impl, info=self.info)
497
+
498
+
499
+ def s3_server_side_multi_part_merge(
500
+ rclone: RcloneImpl,
501
+ info_path: str,
502
+ max_workers: int = DEFAULT_MAX_WORKERS,
503
+ verbose: bool = False,
504
+ ) -> Exception | None:
505
+ info = InfoJson(rclone, src=None, src_info=info_path)
506
+ loaded = info.load()
507
+ if not loaded:
508
+ return FileNotFoundError(
509
+ f"Info file not found, has the upload finished? {info_path}"
510
+ )
511
+ merger: S3MultiPartMerger | Exception = S3MultiPartMerger.create(
512
+ rclone=rclone, info=info, max_workers=max_workers, verbose=verbose
513
+ )
514
+ if isinstance(merger, Exception):
515
+ return merger
516
+
517
+ err = merger.merge()
518
+ if isinstance(err, Exception):
519
+ return err
520
+
521
+ err = merger.cleanup()
522
+ if isinstance(err, Exception):
523
+ err
524
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rclone_api
3
- Version: 1.4.18
3
+ Version: 1.4.20
4
4
  Summary: rclone api in python
5
5
  Home-page: https://github.com/zackees/rclone-api
6
6
  License: BSD 3-Clause License
@@ -27,13 +27,13 @@ rclone_api/util.py,sha256=yY72YKpmpT_ZM7AleVtPpl0YZZYQPTwTdqKn9qPwm8Y,9290
27
27
  rclone_api/assets/example.txt,sha256=lTBovRjiz0_TgtAtbA1C5hNi2ffbqnNPqkKg6UiKCT8,54
28
28
  rclone_api/cmd/analyze.py,sha256=RHbvk1G5ZUc3qLqlm1AZEyQzd_W_ZjcbCNDvW4YpTKQ,1252
29
29
  rclone_api/cmd/copy_large_s3.py,sha256=B17GliDQyAauNglJCpsey0d3eArT2DAcT9g684TMQk8,3514
30
- rclone_api/cmd/copy_large_s3_finish.py,sha256=FeeFZsuc1Gh7Xpso3alvirTX1_eXvHoAIsddMrVdCyw,6521
30
+ rclone_api/cmd/copy_large_s3_finish.py,sha256=EQjynfT1Iwyh7eObO-sNL26QtMmtd4GI6LkSFnGIHMo,2050
31
31
  rclone_api/cmd/list_files.py,sha256=x8FHODEilwKqwdiU1jdkeJbLwOqUkUQuDWPo2u_zpf0,741
32
32
  rclone_api/cmd/save_to_db.py,sha256=ylvnhg_yzexM-m6Zr7XDiswvoDVSl56ELuFAdb9gqBY,1957
33
33
  rclone_api/db/__init__.py,sha256=OSRUdnSWUlDTOHmjdjVmxYTUNpTbtaJ5Ll9sl-PfZg0,40
34
34
  rclone_api/db/db.py,sha256=YRnYrCaXHwytQt07uEZ_mMpvPHo9-0IWcOb95fVOOfs,10086
35
35
  rclone_api/db/models.py,sha256=v7qaXUehvsDvU51uk69JI23fSIs9JFGcOa-Tv1c_wVs,1600
36
- rclone_api/detail/copy_file_parts.py,sha256=dpqZ0d7l195dZg6Vob2Ty43Uah1v0ozQu5kMtblGqYc,16175
36
+ rclone_api/detail/copy_file_parts.py,sha256=1h-5JJmZdB0_TuVcuYMIClHqAgCXUI4eLyZHbdRiCHg,16280
37
37
  rclone_api/detail/walk.py,sha256=-54NVE8EJcCstwDoaC_UtHm73R2HrZwVwQmsnv55xNU,3369
38
38
  rclone_api/experimental/flags.py,sha256=qCVD--fSTmzlk9hloRLr0q9elzAOFzPsvVpKM3aB1Mk,2739
39
39
  rclone_api/experimental/flags_base.py,sha256=ajU_czkTcAxXYU-SlmiCfHY7aCQGHvpCLqJ-Z8uZLk0,2102
@@ -41,17 +41,17 @@ rclone_api/s3/api.py,sha256=owoQ1H-R0hXcUozxC6sl53D7NmMOewHk2pUxK-ye8ms,4061
41
41
  rclone_api/s3/basic_ops.py,sha256=hK3366xhVEzEcjz9Gk_8lFx6MRceAk72cax6mUrr6ko,2104
42
42
  rclone_api/s3/chunk_task.py,sha256=waEYe-iYQ1_BR3NCS4BrzVrK9UANvH1EcbXx2I6Z_NM,6839
43
43
  rclone_api/s3/create.py,sha256=_Q-faQ4Zl8XKTB28gireRxVXWP-YNxoAK4bligxDtiI,3998
44
- rclone_api/s3/merge_state.py,sha256=FJOQvOVLvAe9E_M_UgPffrTSN2YzRYfBMnzkOtBTqwg,3939
45
- rclone_api/s3/s3_multipart_uploader_by_copy.py,sha256=Rww9S81pbCUUu_f72xkxU4HQ_xOf69_C5MyMmmNchcw,10339
44
+ rclone_api/s3/merge_state.py,sha256=ziTB9CYV-OWaky5C1fOT9hifSY2zgUrk5HmX1Xeu2UA,4978
45
+ rclone_api/s3/s3_multipart_uploader_by_copy.py,sha256=qBUZlNooqA151u9sV6hrZKXIZbpT68kKzXMuuwJuv3s,17331
46
46
  rclone_api/s3/types.py,sha256=cYI5MbXRNdT-ps5kGIRQaYrseHyx_ozT4AcwBABTKwk,1616
47
47
  rclone_api/s3/upload_file_multipart.py,sha256=V7syKjFyVIe4U9Ahl5XgqVTzt9akiew3MFjGmufLo2w,12503
48
48
  rclone_api/s3/multipart/file_info.py,sha256=8v_07_eADo0K-Nsv7F0Ac1wcv3lkIsrR3MaRCmkYLTQ,105
49
- rclone_api/s3/multipart/finished_piece.py,sha256=6ev7MFOV3dWqylJFEttOIeoaEA74RMqNWh258L_ENnY,1732
49
+ rclone_api/s3/multipart/finished_piece.py,sha256=LtlX_mm6_hsADR8FxgfC2_pcO5Wou_20-jE34IcRXew,1633
50
50
  rclone_api/s3/multipart/upload_info.py,sha256=d6_OfzFR_vtDzCEegFfzCfWi2kUBUV4aXZzqAEVp1c4,1874
51
51
  rclone_api/s3/multipart/upload_state.py,sha256=f-Aq2NqtAaMUMhYitlICSNIxCKurWAl2gDEUVizLIqw,6019
52
- rclone_api-1.4.18.dist-info/LICENSE,sha256=b6pOoifSXiUaz_lDS84vWlG3fr4yUKwB8fzkrH9R8bQ,1064
53
- rclone_api-1.4.18.dist-info/METADATA,sha256=Ym9QwhV3G0YNmRao9cluDShuY7AEwclfGTDEz-B2xio,4628
54
- rclone_api-1.4.18.dist-info/WHEEL,sha256=rF4EZyR2XVS6irmOHQIJx2SUqXLZKRMUrjsg8UwN-XQ,109
55
- rclone_api-1.4.18.dist-info/entry_points.txt,sha256=fJteOlYVwgX3UbNuL9jJ0zUTuX2O79JFAeNgK7Sw7EQ,255
56
- rclone_api-1.4.18.dist-info/top_level.txt,sha256=EvZ7uuruUpe9RiUyEp25d1Keq7PWYNT0O_-mr8FCG5g,11
57
- rclone_api-1.4.18.dist-info/RECORD,,
52
+ rclone_api-1.4.20.dist-info/LICENSE,sha256=b6pOoifSXiUaz_lDS84vWlG3fr4yUKwB8fzkrH9R8bQ,1064
53
+ rclone_api-1.4.20.dist-info/METADATA,sha256=vd-0g-1byVjdVbQX_fN4AsQB-xA4Z1pI8vjj8RVF3N0,4628
54
+ rclone_api-1.4.20.dist-info/WHEEL,sha256=rF4EZyR2XVS6irmOHQIJx2SUqXLZKRMUrjsg8UwN-XQ,109
55
+ rclone_api-1.4.20.dist-info/entry_points.txt,sha256=fJteOlYVwgX3UbNuL9jJ0zUTuX2O79JFAeNgK7Sw7EQ,255
56
+ rclone_api-1.4.20.dist-info/top_level.txt,sha256=EvZ7uuruUpe9RiUyEp25d1Keq7PWYNT0O_-mr8FCG5g,11
57
+ rclone_api-1.4.20.dist-info/RECORD,,