rclone-api 1.3.27__py2.py3-none-any.whl → 1.4.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. rclone_api/__init__.py +491 -4
  2. rclone_api/cmd/copy_large_s3.py +17 -10
  3. rclone_api/db/db.py +3 -3
  4. rclone_api/detail/copy_file_parts.py +382 -0
  5. rclone_api/dir.py +1 -1
  6. rclone_api/dir_listing.py +1 -1
  7. rclone_api/file.py +8 -0
  8. rclone_api/file_part.py +198 -0
  9. rclone_api/file_stream.py +52 -0
  10. rclone_api/http_server.py +15 -21
  11. rclone_api/{rclone.py → rclone_impl.py} +153 -321
  12. rclone_api/remote.py +3 -3
  13. rclone_api/rpath.py +11 -4
  14. rclone_api/s3/chunk_task.py +3 -19
  15. rclone_api/s3/multipart/file_info.py +7 -0
  16. rclone_api/s3/multipart/finished_piece.py +38 -0
  17. rclone_api/s3/multipart/upload_info.py +62 -0
  18. rclone_api/s3/{chunk_types.py → multipart/upload_state.py} +3 -99
  19. rclone_api/s3/s3_multipart_uploader.py +138 -0
  20. rclone_api/s3/types.py +1 -1
  21. rclone_api/s3/upload_file_multipart.py +14 -14
  22. rclone_api/scan_missing_folders.py +1 -1
  23. rclone_api/types.py +136 -165
  24. rclone_api/util.py +22 -2
  25. {rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/METADATA +1 -1
  26. rclone_api-1.4.1.dist-info/RECORD +55 -0
  27. rclone_api/mount_read_chunker.py +0 -130
  28. rclone_api/profile/mount_copy_bytes.py +0 -311
  29. rclone_api-1.3.27.dist-info/RECORD +0 -50
  30. /rclone_api/{walk.py → detail/walk.py} +0 -0
  31. {rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/LICENSE +0 -0
  32. {rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/WHEEL +0 -0
  33. {rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/entry_points.txt +0 -0
  34. {rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/top_level.txt +0 -0
@@ -6,30 +6,14 @@ from queue import Queue
6
6
  from threading import Event, Lock
7
7
  from typing import Any, Callable
8
8
 
9
- from rclone_api.mount_read_chunker import FilePart
10
- from rclone_api.s3.chunk_types import S3FileInfo, UploadState
9
+ from rclone_api.file_part import FilePart
10
+ from rclone_api.s3.multipart.file_info import S3FileInfo
11
+ from rclone_api.s3.multipart.upload_state import UploadState
11
12
  from rclone_api.types import EndOfStream
12
13
 
13
14
  logger = logging.getLogger(__name__) # noqa
14
15
 
15
16
 
16
- # def _get_file_size(file_path: Path, timeout: int = 60) -> int:
17
- # sleep_time = timeout / 60 if timeout > 0 else 1
18
- # start = time.time()
19
- # while True:
20
- # expired = time.time() - start > timeout
21
- # try:
22
- # time.sleep(sleep_time)
23
- # if file_path.exists():
24
- # return file_path.stat().st_size
25
- # except FileNotFoundError as e:
26
- # if expired:
27
- # print(f"File not found: {file_path}, exception is {e}")
28
- # raise
29
- # if expired:
30
- # raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
31
-
32
-
33
17
  class _ShouldStopChecker:
34
18
  def __init__(self, max_chunks: int | None) -> None:
35
19
  self.count = 0
@@ -0,0 +1,7 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class S3FileInfo:
6
+ upload_id: str
7
+ part_number: int
@@ -0,0 +1,38 @@
1
+ import json
2
+ from dataclasses import dataclass
3
+
4
+ from rclone_api.types import EndOfStream
5
+
6
+
7
+ @dataclass
8
+ class FinishedPiece:
9
+ part_number: int
10
+ etag: str
11
+
12
+ def to_json(self) -> dict:
13
+ return {"part_number": self.part_number, "etag": self.etag}
14
+
15
+ def to_json_str(self) -> str:
16
+ return json.dumps(self.to_json(), indent=0)
17
+
18
+ @staticmethod
19
+ def to_json_array(parts: list["FinishedPiece | EndOfStream"]) -> list[dict]:
20
+ non_none: list[FinishedPiece] = []
21
+ for p in parts:
22
+ if not isinstance(p, EndOfStream):
23
+ non_none.append(p)
24
+ non_none.sort(key=lambda x: x.part_number)
25
+ # all_nones: list[None] = [None for p in parts if p is None]
26
+ # assert len(all_nones) <= 1, "Only one None should be present"
27
+ count_eos = 0
28
+ for p in parts:
29
+ if p is EndOfStream:
30
+ count_eos += 1
31
+ assert count_eos <= 1, "Only one EndOfStream should be present"
32
+ return [p.to_json() for p in non_none]
33
+
34
+ @staticmethod
35
+ def from_json(json: dict | None) -> "FinishedPiece | EndOfStream":
36
+ if json is None:
37
+ return EndOfStream()
38
+ return FinishedPiece(**json)
@@ -0,0 +1,62 @@
1
+ import hashlib
2
+ from dataclasses import dataclass, fields
3
+ from pathlib import Path
4
+
5
+ from botocore.client import BaseClient
6
+
7
+
8
+ @dataclass
9
+ class UploadInfo:
10
+ s3_client: BaseClient
11
+ bucket_name: str
12
+ object_name: str
13
+ src_file_path: Path
14
+ upload_id: str
15
+ retries: int
16
+ chunk_size: int
17
+ file_size: int
18
+ _total_chunks: int | None = None
19
+
20
+ def total_chunks(self) -> int:
21
+ out = self.file_size // self.chunk_size
22
+ if self.file_size % self.chunk_size:
23
+ return out + 1
24
+ return out
25
+
26
+ def __post_init__(self):
27
+ if self._total_chunks is not None:
28
+ return
29
+ self._total_chunks = self.total_chunks()
30
+
31
+ def fingerprint(self) -> str:
32
+ # hash the attributes that are used to identify the upload
33
+ hasher = hashlib.sha256()
34
+ # first is file size
35
+ hasher.update(str(self.file_size).encode("utf-8"))
36
+ # next is chunk size
37
+ hasher.update(str(self.chunk_size).encode("utf-8"))
38
+ # next is the number of parts
39
+ hasher.update(str(self._total_chunks).encode("utf-8"))
40
+ return hasher.hexdigest()
41
+
42
+ def to_json(self) -> dict:
43
+ json_dict = {}
44
+ for f in fields(self):
45
+ value = getattr(self, f.name)
46
+ # Convert non-serializable objects (like s3_client) to a string representation.
47
+ if f.name == "s3_client":
48
+ continue
49
+ else:
50
+ if isinstance(value, Path):
51
+ value = str(value)
52
+ json_dict[f.name] = value
53
+
54
+ return json_dict
55
+
56
+ @staticmethod
57
+ def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
58
+ # json_dict.pop("s3_client") # Remove the placeholder string
59
+ if "s3_client" in json_dict:
60
+ json_dict.pop("s3_client")
61
+
62
+ return UploadInfo(s3_client=s3_client, **json_dict)
@@ -1,12 +1,13 @@
1
- import hashlib
2
1
  import json
3
2
  import os
4
- from dataclasses import dataclass, field, fields
3
+ from dataclasses import dataclass, field
5
4
  from pathlib import Path
6
5
  from threading import Lock
7
6
 
8
7
  from botocore.client import BaseClient
9
8
 
9
+ from rclone_api.s3.multipart.finished_piece import FinishedPiece
10
+ from rclone_api.s3.multipart.upload_info import UploadInfo
10
11
  from rclone_api.types import EndOfStream, SizeSuffix
11
12
  from rclone_api.util import locked_print
12
13
 
@@ -14,103 +15,6 @@ from rclone_api.util import locked_print
14
15
  _SAVE_STATE_LOCK = Lock()
15
16
 
16
17
 
17
- @dataclass
18
- class S3FileInfo:
19
- upload_id: str
20
- part_number: int
21
-
22
-
23
- @dataclass
24
- class UploadInfo:
25
- s3_client: BaseClient
26
- bucket_name: str
27
- object_name: str
28
- src_file_path: Path
29
- upload_id: str
30
- retries: int
31
- chunk_size: int
32
- file_size: int
33
- _total_chunks: int | None = None
34
-
35
- def total_chunks(self) -> int:
36
- out = self.file_size // self.chunk_size
37
- if self.file_size % self.chunk_size:
38
- return out + 1
39
- return out
40
-
41
- def __post_init__(self):
42
- if self._total_chunks is not None:
43
- return
44
- self._total_chunks = self.total_chunks()
45
-
46
- def fingerprint(self) -> str:
47
- # hash the attributes that are used to identify the upload
48
- hasher = hashlib.sha256()
49
- # first is file size
50
- hasher.update(str(self.file_size).encode("utf-8"))
51
- # next is chunk size
52
- hasher.update(str(self.chunk_size).encode("utf-8"))
53
- # next is the number of parts
54
- hasher.update(str(self._total_chunks).encode("utf-8"))
55
- return hasher.hexdigest()
56
-
57
- def to_json(self) -> dict:
58
- json_dict = {}
59
- for f in fields(self):
60
- value = getattr(self, f.name)
61
- # Convert non-serializable objects (like s3_client) to a string representation.
62
- if f.name == "s3_client":
63
- continue
64
- else:
65
- if isinstance(value, Path):
66
- value = str(value)
67
- json_dict[f.name] = value
68
-
69
- return json_dict
70
-
71
- @staticmethod
72
- def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
73
- # json_dict.pop("s3_client") # Remove the placeholder string
74
- if "s3_client" in json_dict:
75
- json_dict.pop("s3_client")
76
-
77
- return UploadInfo(s3_client=s3_client, **json_dict)
78
-
79
-
80
- @dataclass
81
- class FinishedPiece:
82
- part_number: int
83
- etag: str
84
-
85
- def to_json(self) -> dict:
86
- return {"part_number": self.part_number, "etag": self.etag}
87
-
88
- def to_json_str(self) -> str:
89
- return json.dumps(self.to_json(), indent=0)
90
-
91
- @staticmethod
92
- def to_json_array(parts: list["FinishedPiece | EndOfStream"]) -> list[dict]:
93
- non_none: list[FinishedPiece] = []
94
- for p in parts:
95
- if not isinstance(p, EndOfStream):
96
- non_none.append(p)
97
- non_none.sort(key=lambda x: x.part_number)
98
- # all_nones: list[None] = [None for p in parts if p is None]
99
- # assert len(all_nones) <= 1, "Only one None should be present"
100
- count_eos = 0
101
- for p in parts:
102
- if p is EndOfStream:
103
- count_eos += 1
104
- assert count_eos <= 1, "Only one EndOfStream should be present"
105
- return [p.to_json() for p in non_none]
106
-
107
- @staticmethod
108
- def from_json(json: dict | None) -> "FinishedPiece | EndOfStream":
109
- if json is None:
110
- return EndOfStream()
111
- return FinishedPiece(**json)
112
-
113
-
114
18
  @dataclass
115
19
  class UploadState:
116
20
  upload_info: UploadInfo
@@ -0,0 +1,138 @@
1
+ """
2
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_part_copy.html
3
+ * client.upload_part_copy
4
+
5
+
6
+ """
7
+
8
+ # import _thread
9
+ # import os
10
+ # import traceback
11
+ # import warnings
12
+ # from concurrent.futures import Future, ThreadPoolExecutor
13
+ # from pathlib import Path
14
+ # from queue import Queue
15
+ # from threading import Event, Thread
16
+ # from typing import Any, Callable
17
+
18
+ # from botocore.client import BaseClient
19
+
20
+ # from rclone_api.mount_read_chunker import FilePart
21
+ # from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
22
+ # from rclone_api.s3.chunk_types import (
23
+ # FinishedPiece,
24
+ # UploadInfo,
25
+ # UploadState,
26
+ # )
27
+ # from rclone_api.s3.types import MultiUploadResult
28
+ # from rclone_api.types import EndOfStream
29
+ # from rclone_api.util import locked_print
30
+
31
+
32
+ # This is how you upload large parts through multi part upload, then the final call
33
+ # is to assemble the parts that have already been uploaded through a multi part uploader
34
+ # and then call complete_multipart_upload to finish the upload
35
+ # response = (
36
+ # client.upload_part_copy(
37
+ # Bucket='string',
38
+ # CopySource='string' or {'Bucket': 'string', 'Key': 'string', 'VersionId': 'string'},
39
+ # CopySourceIfMatch='string',
40
+ # CopySourceIfModifiedSince=datetime(2015, 1, 1),
41
+ # CopySourceIfNoneMatch='string',
42
+ # CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
43
+ # CopySourceRange='string',
44
+ # Key='string',
45
+ # PartNumber=123,
46
+ # UploadId='string',
47
+ # SSECustomerAlgorithm='string',
48
+ # SSECustomerKey='string',
49
+ # CopySourceSSECustomerAlgorithm='string',
50
+ # CopySourceSSECustomerKey='string',
51
+ # RequestPayer='requester',
52
+ # ExpectedBucketOwner='string',
53
+ # ExpectedSourceBucketOwner='string'
54
+ # )
55
+
56
+
57
+ # def upload_task(
58
+ # info: UploadInfo,
59
+ # chunk: FilePart,
60
+ # part_number: int,
61
+ # retries: int,
62
+ # ) -> FinishedPiece:
63
+ # file_or_err: Path | Exception = chunk.get_file()
64
+ # if isinstance(file_or_err, Exception):
65
+ # raise file_or_err
66
+ # file: Path = file_or_err
67
+ # size = os.path.getsize(file)
68
+ # retries = retries + 1 # Add one for the initial attempt
69
+ # for retry in range(retries):
70
+ # try:
71
+ # if retry > 0:
72
+ # locked_print(f"Retrying part {part_number} for {info.src_file_path}")
73
+ # locked_print(
74
+ # f"Uploading part {part_number} for {info.src_file_path} of size {size}"
75
+ # )
76
+
77
+ # with open(file, "rb") as f:
78
+ # part = info.s3_client.upload_part(
79
+ # Bucket=info.bucket_name,
80
+ # Key=info.object_name,
81
+ # PartNumber=part_number,
82
+ # UploadId=info.upload_id,
83
+ # Body=f,
84
+ # )
85
+ # out: FinishedPiece = FinishedPiece(
86
+ # etag=part["ETag"], part_number=part_number
87
+ # )
88
+ # chunk.dispose()
89
+ # return out
90
+ # except Exception as e:
91
+ # if retry == retries - 1:
92
+ # locked_print(f"Error uploading part {part_number}: {e}")
93
+ # chunk.dispose()
94
+ # raise e
95
+ # else:
96
+ # locked_print(f"Error uploading part {part_number}: {e}, retrying")
97
+ # continue
98
+ # raise Exception("Should not reach here")
99
+
100
+
101
+ # def prepare_upload_file_multipart(
102
+ # s3_client: BaseClient,
103
+ # bucket_name: str,
104
+ # file_path: Path,
105
+ # file_size: int | None,
106
+ # object_name: str,
107
+ # chunk_size: int,
108
+ # retries: int,
109
+ # ) -> UploadInfo:
110
+ # """Upload a file to the bucket using multipart upload with customizable chunk size."""
111
+
112
+ # # Initiate multipart upload
113
+ # locked_print(
114
+ # f"Creating multipart upload for {file_path} to {bucket_name}/{object_name}"
115
+ # )
116
+ # mpu = s3_client.create_multipart_upload(Bucket=bucket_name, Key=object_name)
117
+ # upload_id = mpu["UploadId"]
118
+
119
+ # file_size = file_size if file_size is not None else os.path.getsize(file_path)
120
+
121
+ # upload_info: UploadInfo = UploadInfo(
122
+ # s3_client=s3_client,
123
+ # bucket_name=bucket_name,
124
+ # object_name=object_name,
125
+ # src_file_path=file_path,
126
+ # upload_id=upload_id,
127
+ # retries=retries,
128
+ # chunk_size=chunk_size,
129
+ # file_size=file_size,
130
+ # )
131
+ # return upload_info
132
+
133
+ # class S3MultiPartUploader:
134
+ # def __init__(self, s3_client: BaseClient, verbose: bool) -> None:
135
+ # self.s3_client = s3_client
136
+ # self.verbose = verbose
137
+
138
+ # def prepare(self) -> UploadInfo:
rclone_api/s3/types.py CHANGED
@@ -4,7 +4,7 @@ from enum import Enum
4
4
  from pathlib import Path
5
5
  from typing import Any, Callable
6
6
 
7
- from rclone_api.mount_read_chunker import FilePart
7
+ from rclone_api.file_part import FilePart
8
8
 
9
9
 
10
10
  class S3Provider(Enum):
@@ -10,13 +10,12 @@ from typing import Any, Callable
10
10
 
11
11
  from botocore.client import BaseClient
12
12
 
13
- from rclone_api.mount_read_chunker import FilePart
14
- from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
15
- from rclone_api.s3.chunk_types import (
16
- FinishedPiece,
17
- UploadInfo,
18
- UploadState,
19
- )
13
+ from rclone_api.file_part import FilePart
14
+ from rclone_api.s3.chunk_task import file_chunker
15
+ from rclone_api.s3.multipart.file_info import S3FileInfo
16
+ from rclone_api.s3.multipart.finished_piece import FinishedPiece
17
+ from rclone_api.s3.multipart.upload_info import UploadInfo
18
+ from rclone_api.s3.multipart.upload_state import UploadState
20
19
  from rclone_api.s3.types import MultiUploadResult
21
20
  from rclone_api.types import EndOfStream
22
21
  from rclone_api.util import locked_print
@@ -149,6 +148,10 @@ def upload_runner(
149
148
  queue_upload: Queue[FilePart | EndOfStream],
150
149
  cancel_chunker_event: Event,
151
150
  ) -> None:
151
+ # import semaphre
152
+ import threading
153
+
154
+ semaphore = threading.Semaphore(upload_threads)
152
155
  with ThreadPoolExecutor(max_workers=upload_threads) as executor:
153
156
  try:
154
157
  while True:
@@ -159,9 +162,12 @@ def upload_runner(
159
162
  def task(upload_info=upload_info, file_chunk=file_chunk):
160
163
  return handle_upload(upload_info, file_chunk)
161
164
 
165
+ semaphore.acquire()
166
+
162
167
  fut = executor.submit(task)
163
168
 
164
169
  def done_cb(fut=fut):
170
+ semaphore.release()
165
171
  result = fut.result()
166
172
  if isinstance(result, Exception):
167
173
  warnings.warn(f"Error uploading part: {result}, skipping")
@@ -192,12 +198,6 @@ def upload_file_multipart(
192
198
  ) -> MultiUploadResult:
193
199
  """Upload a file to the bucket using multipart upload with customizable chunk size."""
194
200
  file_size = file_size if file_size is not None else os.path.getsize(str(file_path))
195
- # if chunk_size > file_size:
196
- # warnings.warn(
197
- # f"Chunk size {chunk_size} is greater than file size {file_size}, using file size"
198
- # )
199
- # chunk_size = file_size
200
-
201
201
  if chunk_size < _MIN_UPLOAD_CHUNK_SIZE:
202
202
  raise ValueError(
203
203
  f"Chunk size {chunk_size} is less than minimum upload chunk size {_MIN_UPLOAD_CHUNK_SIZE}"
@@ -233,7 +233,7 @@ def upload_file_multipart(
233
233
  )
234
234
  return upload_state
235
235
 
236
- work_que_max = upload_threads // 2 + 2
236
+ work_que_max = 1
237
237
 
238
238
  new_state = make_new_state()
239
239
  loaded_state = get_upload_state()
@@ -6,9 +6,9 @@ from threading import Thread
6
6
  from typing import Generator
7
7
 
8
8
  from rclone_api import Dir
9
+ from rclone_api.detail.walk import walk_runner_depth_first
9
10
  from rclone_api.dir_listing import DirListing
10
11
  from rclone_api.types import ListingOption, Order
11
- from rclone_api.walk import walk_runner_depth_first
12
12
 
13
13
  _MAX_OUT_QUEUE_SIZE = 50
14
14