rclone-api 1.3.28__py2.py3-none-any.whl → 1.4.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +491 -4
- rclone_api/cmd/copy_large_s3.py +18 -10
- rclone_api/db/db.py +3 -3
- rclone_api/detail/copy_file_parts.py +382 -0
- rclone_api/dir.py +1 -1
- rclone_api/dir_listing.py +1 -1
- rclone_api/file.py +8 -0
- rclone_api/file_part.py +198 -0
- rclone_api/file_stream.py +52 -0
- rclone_api/http_server.py +15 -21
- rclone_api/{rclone.py → rclone_impl.py} +153 -321
- rclone_api/remote.py +3 -3
- rclone_api/rpath.py +11 -4
- rclone_api/s3/chunk_task.py +3 -19
- rclone_api/s3/multipart/file_info.py +7 -0
- rclone_api/s3/multipart/finished_piece.py +38 -0
- rclone_api/s3/multipart/upload_info.py +62 -0
- rclone_api/s3/{chunk_types.py → multipart/upload_state.py} +3 -99
- rclone_api/s3/s3_multipart_uploader.py +138 -28
- rclone_api/s3/types.py +1 -1
- rclone_api/s3/upload_file_multipart.py +6 -13
- rclone_api/scan_missing_folders.py +1 -1
- rclone_api/types.py +136 -165
- rclone_api/util.py +22 -2
- {rclone_api-1.3.28.dist-info → rclone_api-1.4.2.dist-info}/METADATA +1 -1
- rclone_api-1.4.2.dist-info/RECORD +55 -0
- rclone_api/mount_read_chunker.py +0 -130
- rclone_api/profile/mount_copy_bytes.py +0 -311
- rclone_api-1.3.28.dist-info/RECORD +0 -51
- /rclone_api/{walk.py → detail/walk.py} +0 -0
- {rclone_api-1.3.28.dist-info → rclone_api-1.4.2.dist-info}/LICENSE +0 -0
- {rclone_api-1.3.28.dist-info → rclone_api-1.4.2.dist-info}/WHEEL +0 -0
- {rclone_api-1.3.28.dist-info → rclone_api-1.4.2.dist-info}/entry_points.txt +0 -0
- {rclone_api-1.3.28.dist-info → rclone_api-1.4.2.dist-info}/top_level.txt +0 -0
rclone_api/s3/chunk_task.py
CHANGED
@@ -6,30 +6,14 @@ from queue import Queue
|
|
6
6
|
from threading import Event, Lock
|
7
7
|
from typing import Any, Callable
|
8
8
|
|
9
|
-
from rclone_api.
|
10
|
-
from rclone_api.s3.
|
9
|
+
from rclone_api.file_part import FilePart
|
10
|
+
from rclone_api.s3.multipart.file_info import S3FileInfo
|
11
|
+
from rclone_api.s3.multipart.upload_state import UploadState
|
11
12
|
from rclone_api.types import EndOfStream
|
12
13
|
|
13
14
|
logger = logging.getLogger(__name__) # noqa
|
14
15
|
|
15
16
|
|
16
|
-
# def _get_file_size(file_path: Path, timeout: int = 60) -> int:
|
17
|
-
# sleep_time = timeout / 60 if timeout > 0 else 1
|
18
|
-
# start = time.time()
|
19
|
-
# while True:
|
20
|
-
# expired = time.time() - start > timeout
|
21
|
-
# try:
|
22
|
-
# time.sleep(sleep_time)
|
23
|
-
# if file_path.exists():
|
24
|
-
# return file_path.stat().st_size
|
25
|
-
# except FileNotFoundError as e:
|
26
|
-
# if expired:
|
27
|
-
# print(f"File not found: {file_path}, exception is {e}")
|
28
|
-
# raise
|
29
|
-
# if expired:
|
30
|
-
# raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
|
31
|
-
|
32
|
-
|
33
17
|
class _ShouldStopChecker:
|
34
18
|
def __init__(self, max_chunks: int | None) -> None:
|
35
19
|
self.count = 0
|
@@ -0,0 +1,38 @@
|
|
1
|
+
import json
|
2
|
+
from dataclasses import dataclass
|
3
|
+
|
4
|
+
from rclone_api.types import EndOfStream
|
5
|
+
|
6
|
+
|
7
|
+
@dataclass
|
8
|
+
class FinishedPiece:
|
9
|
+
part_number: int
|
10
|
+
etag: str
|
11
|
+
|
12
|
+
def to_json(self) -> dict:
|
13
|
+
return {"part_number": self.part_number, "etag": self.etag}
|
14
|
+
|
15
|
+
def to_json_str(self) -> str:
|
16
|
+
return json.dumps(self.to_json(), indent=0)
|
17
|
+
|
18
|
+
@staticmethod
|
19
|
+
def to_json_array(parts: list["FinishedPiece | EndOfStream"]) -> list[dict]:
|
20
|
+
non_none: list[FinishedPiece] = []
|
21
|
+
for p in parts:
|
22
|
+
if not isinstance(p, EndOfStream):
|
23
|
+
non_none.append(p)
|
24
|
+
non_none.sort(key=lambda x: x.part_number)
|
25
|
+
# all_nones: list[None] = [None for p in parts if p is None]
|
26
|
+
# assert len(all_nones) <= 1, "Only one None should be present"
|
27
|
+
count_eos = 0
|
28
|
+
for p in parts:
|
29
|
+
if p is EndOfStream:
|
30
|
+
count_eos += 1
|
31
|
+
assert count_eos <= 1, "Only one EndOfStream should be present"
|
32
|
+
return [p.to_json() for p in non_none]
|
33
|
+
|
34
|
+
@staticmethod
|
35
|
+
def from_json(json: dict | None) -> "FinishedPiece | EndOfStream":
|
36
|
+
if json is None:
|
37
|
+
return EndOfStream()
|
38
|
+
return FinishedPiece(**json)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import hashlib
|
2
|
+
from dataclasses import dataclass, fields
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from botocore.client import BaseClient
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class UploadInfo:
|
10
|
+
s3_client: BaseClient
|
11
|
+
bucket_name: str
|
12
|
+
object_name: str
|
13
|
+
src_file_path: Path
|
14
|
+
upload_id: str
|
15
|
+
retries: int
|
16
|
+
chunk_size: int
|
17
|
+
file_size: int
|
18
|
+
_total_chunks: int | None = None
|
19
|
+
|
20
|
+
def total_chunks(self) -> int:
|
21
|
+
out = self.file_size // self.chunk_size
|
22
|
+
if self.file_size % self.chunk_size:
|
23
|
+
return out + 1
|
24
|
+
return out
|
25
|
+
|
26
|
+
def __post_init__(self):
|
27
|
+
if self._total_chunks is not None:
|
28
|
+
return
|
29
|
+
self._total_chunks = self.total_chunks()
|
30
|
+
|
31
|
+
def fingerprint(self) -> str:
|
32
|
+
# hash the attributes that are used to identify the upload
|
33
|
+
hasher = hashlib.sha256()
|
34
|
+
# first is file size
|
35
|
+
hasher.update(str(self.file_size).encode("utf-8"))
|
36
|
+
# next is chunk size
|
37
|
+
hasher.update(str(self.chunk_size).encode("utf-8"))
|
38
|
+
# next is the number of parts
|
39
|
+
hasher.update(str(self._total_chunks).encode("utf-8"))
|
40
|
+
return hasher.hexdigest()
|
41
|
+
|
42
|
+
def to_json(self) -> dict:
|
43
|
+
json_dict = {}
|
44
|
+
for f in fields(self):
|
45
|
+
value = getattr(self, f.name)
|
46
|
+
# Convert non-serializable objects (like s3_client) to a string representation.
|
47
|
+
if f.name == "s3_client":
|
48
|
+
continue
|
49
|
+
else:
|
50
|
+
if isinstance(value, Path):
|
51
|
+
value = str(value)
|
52
|
+
json_dict[f.name] = value
|
53
|
+
|
54
|
+
return json_dict
|
55
|
+
|
56
|
+
@staticmethod
|
57
|
+
def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
|
58
|
+
# json_dict.pop("s3_client") # Remove the placeholder string
|
59
|
+
if "s3_client" in json_dict:
|
60
|
+
json_dict.pop("s3_client")
|
61
|
+
|
62
|
+
return UploadInfo(s3_client=s3_client, **json_dict)
|
@@ -1,12 +1,13 @@
|
|
1
|
-
import hashlib
|
2
1
|
import json
|
3
2
|
import os
|
4
|
-
from dataclasses import dataclass, field
|
3
|
+
from dataclasses import dataclass, field
|
5
4
|
from pathlib import Path
|
6
5
|
from threading import Lock
|
7
6
|
|
8
7
|
from botocore.client import BaseClient
|
9
8
|
|
9
|
+
from rclone_api.s3.multipart.finished_piece import FinishedPiece
|
10
|
+
from rclone_api.s3.multipart.upload_info import UploadInfo
|
10
11
|
from rclone_api.types import EndOfStream, SizeSuffix
|
11
12
|
from rclone_api.util import locked_print
|
12
13
|
|
@@ -14,103 +15,6 @@ from rclone_api.util import locked_print
|
|
14
15
|
_SAVE_STATE_LOCK = Lock()
|
15
16
|
|
16
17
|
|
17
|
-
@dataclass
|
18
|
-
class S3FileInfo:
|
19
|
-
upload_id: str
|
20
|
-
part_number: int
|
21
|
-
|
22
|
-
|
23
|
-
@dataclass
|
24
|
-
class UploadInfo:
|
25
|
-
s3_client: BaseClient
|
26
|
-
bucket_name: str
|
27
|
-
object_name: str
|
28
|
-
src_file_path: Path
|
29
|
-
upload_id: str
|
30
|
-
retries: int
|
31
|
-
chunk_size: int
|
32
|
-
file_size: int
|
33
|
-
_total_chunks: int | None = None
|
34
|
-
|
35
|
-
def total_chunks(self) -> int:
|
36
|
-
out = self.file_size // self.chunk_size
|
37
|
-
if self.file_size % self.chunk_size:
|
38
|
-
return out + 1
|
39
|
-
return out
|
40
|
-
|
41
|
-
def __post_init__(self):
|
42
|
-
if self._total_chunks is not None:
|
43
|
-
return
|
44
|
-
self._total_chunks = self.total_chunks()
|
45
|
-
|
46
|
-
def fingerprint(self) -> str:
|
47
|
-
# hash the attributes that are used to identify the upload
|
48
|
-
hasher = hashlib.sha256()
|
49
|
-
# first is file size
|
50
|
-
hasher.update(str(self.file_size).encode("utf-8"))
|
51
|
-
# next is chunk size
|
52
|
-
hasher.update(str(self.chunk_size).encode("utf-8"))
|
53
|
-
# next is the number of parts
|
54
|
-
hasher.update(str(self._total_chunks).encode("utf-8"))
|
55
|
-
return hasher.hexdigest()
|
56
|
-
|
57
|
-
def to_json(self) -> dict:
|
58
|
-
json_dict = {}
|
59
|
-
for f in fields(self):
|
60
|
-
value = getattr(self, f.name)
|
61
|
-
# Convert non-serializable objects (like s3_client) to a string representation.
|
62
|
-
if f.name == "s3_client":
|
63
|
-
continue
|
64
|
-
else:
|
65
|
-
if isinstance(value, Path):
|
66
|
-
value = str(value)
|
67
|
-
json_dict[f.name] = value
|
68
|
-
|
69
|
-
return json_dict
|
70
|
-
|
71
|
-
@staticmethod
|
72
|
-
def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
|
73
|
-
# json_dict.pop("s3_client") # Remove the placeholder string
|
74
|
-
if "s3_client" in json_dict:
|
75
|
-
json_dict.pop("s3_client")
|
76
|
-
|
77
|
-
return UploadInfo(s3_client=s3_client, **json_dict)
|
78
|
-
|
79
|
-
|
80
|
-
@dataclass
|
81
|
-
class FinishedPiece:
|
82
|
-
part_number: int
|
83
|
-
etag: str
|
84
|
-
|
85
|
-
def to_json(self) -> dict:
|
86
|
-
return {"part_number": self.part_number, "etag": self.etag}
|
87
|
-
|
88
|
-
def to_json_str(self) -> str:
|
89
|
-
return json.dumps(self.to_json(), indent=0)
|
90
|
-
|
91
|
-
@staticmethod
|
92
|
-
def to_json_array(parts: list["FinishedPiece | EndOfStream"]) -> list[dict]:
|
93
|
-
non_none: list[FinishedPiece] = []
|
94
|
-
for p in parts:
|
95
|
-
if not isinstance(p, EndOfStream):
|
96
|
-
non_none.append(p)
|
97
|
-
non_none.sort(key=lambda x: x.part_number)
|
98
|
-
# all_nones: list[None] = [None for p in parts if p is None]
|
99
|
-
# assert len(all_nones) <= 1, "Only one None should be present"
|
100
|
-
count_eos = 0
|
101
|
-
for p in parts:
|
102
|
-
if p is EndOfStream:
|
103
|
-
count_eos += 1
|
104
|
-
assert count_eos <= 1, "Only one EndOfStream should be present"
|
105
|
-
return [p.to_json() for p in non_none]
|
106
|
-
|
107
|
-
@staticmethod
|
108
|
-
def from_json(json: dict | None) -> "FinishedPiece | EndOfStream":
|
109
|
-
if json is None:
|
110
|
-
return EndOfStream()
|
111
|
-
return FinishedPiece(**json)
|
112
|
-
|
113
|
-
|
114
18
|
@dataclass
|
115
19
|
class UploadState:
|
116
20
|
upload_info: UploadInfo
|
@@ -1,28 +1,138 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
from
|
14
|
-
from
|
15
|
-
from
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
from rclone_api.
|
21
|
-
from rclone_api.
|
22
|
-
from rclone_api.
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
1
|
+
"""
|
2
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_part_copy.html
|
3
|
+
* client.upload_part_copy
|
4
|
+
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
# import _thread
|
9
|
+
# import os
|
10
|
+
# import traceback
|
11
|
+
# import warnings
|
12
|
+
# from concurrent.futures import Future, ThreadPoolExecutor
|
13
|
+
# from pathlib import Path
|
14
|
+
# from queue import Queue
|
15
|
+
# from threading import Event, Thread
|
16
|
+
# from typing import Any, Callable
|
17
|
+
|
18
|
+
# from botocore.client import BaseClient
|
19
|
+
|
20
|
+
# from rclone_api.mount_read_chunker import FilePart
|
21
|
+
# from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
|
22
|
+
# from rclone_api.s3.chunk_types import (
|
23
|
+
# FinishedPiece,
|
24
|
+
# UploadInfo,
|
25
|
+
# UploadState,
|
26
|
+
# )
|
27
|
+
# from rclone_api.s3.types import MultiUploadResult
|
28
|
+
# from rclone_api.types import EndOfStream
|
29
|
+
# from rclone_api.util import locked_print
|
30
|
+
|
31
|
+
|
32
|
+
# This is how you upload large parts through multi part upload, then the final call
|
33
|
+
# is to assemble the parts that have already been uploaded through a multi part uploader
|
34
|
+
# and then call complete_multipart_upload to finish the upload
|
35
|
+
# response = (
|
36
|
+
# client.upload_part_copy(
|
37
|
+
# Bucket='string',
|
38
|
+
# CopySource='string' or {'Bucket': 'string', 'Key': 'string', 'VersionId': 'string'},
|
39
|
+
# CopySourceIfMatch='string',
|
40
|
+
# CopySourceIfModifiedSince=datetime(2015, 1, 1),
|
41
|
+
# CopySourceIfNoneMatch='string',
|
42
|
+
# CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
|
43
|
+
# CopySourceRange='string',
|
44
|
+
# Key='string',
|
45
|
+
# PartNumber=123,
|
46
|
+
# UploadId='string',
|
47
|
+
# SSECustomerAlgorithm='string',
|
48
|
+
# SSECustomerKey='string',
|
49
|
+
# CopySourceSSECustomerAlgorithm='string',
|
50
|
+
# CopySourceSSECustomerKey='string',
|
51
|
+
# RequestPayer='requester',
|
52
|
+
# ExpectedBucketOwner='string',
|
53
|
+
# ExpectedSourceBucketOwner='string'
|
54
|
+
# )
|
55
|
+
|
56
|
+
|
57
|
+
# def upload_task(
|
58
|
+
# info: UploadInfo,
|
59
|
+
# chunk: FilePart,
|
60
|
+
# part_number: int,
|
61
|
+
# retries: int,
|
62
|
+
# ) -> FinishedPiece:
|
63
|
+
# file_or_err: Path | Exception = chunk.get_file()
|
64
|
+
# if isinstance(file_or_err, Exception):
|
65
|
+
# raise file_or_err
|
66
|
+
# file: Path = file_or_err
|
67
|
+
# size = os.path.getsize(file)
|
68
|
+
# retries = retries + 1 # Add one for the initial attempt
|
69
|
+
# for retry in range(retries):
|
70
|
+
# try:
|
71
|
+
# if retry > 0:
|
72
|
+
# locked_print(f"Retrying part {part_number} for {info.src_file_path}")
|
73
|
+
# locked_print(
|
74
|
+
# f"Uploading part {part_number} for {info.src_file_path} of size {size}"
|
75
|
+
# )
|
76
|
+
|
77
|
+
# with open(file, "rb") as f:
|
78
|
+
# part = info.s3_client.upload_part(
|
79
|
+
# Bucket=info.bucket_name,
|
80
|
+
# Key=info.object_name,
|
81
|
+
# PartNumber=part_number,
|
82
|
+
# UploadId=info.upload_id,
|
83
|
+
# Body=f,
|
84
|
+
# )
|
85
|
+
# out: FinishedPiece = FinishedPiece(
|
86
|
+
# etag=part["ETag"], part_number=part_number
|
87
|
+
# )
|
88
|
+
# chunk.dispose()
|
89
|
+
# return out
|
90
|
+
# except Exception as e:
|
91
|
+
# if retry == retries - 1:
|
92
|
+
# locked_print(f"Error uploading part {part_number}: {e}")
|
93
|
+
# chunk.dispose()
|
94
|
+
# raise e
|
95
|
+
# else:
|
96
|
+
# locked_print(f"Error uploading part {part_number}: {e}, retrying")
|
97
|
+
# continue
|
98
|
+
# raise Exception("Should not reach here")
|
99
|
+
|
100
|
+
|
101
|
+
# def prepare_upload_file_multipart(
|
102
|
+
# s3_client: BaseClient,
|
103
|
+
# bucket_name: str,
|
104
|
+
# file_path: Path,
|
105
|
+
# file_size: int | None,
|
106
|
+
# object_name: str,
|
107
|
+
# chunk_size: int,
|
108
|
+
# retries: int,
|
109
|
+
# ) -> UploadInfo:
|
110
|
+
# """Upload a file to the bucket using multipart upload with customizable chunk size."""
|
111
|
+
|
112
|
+
# # Initiate multipart upload
|
113
|
+
# locked_print(
|
114
|
+
# f"Creating multipart upload for {file_path} to {bucket_name}/{object_name}"
|
115
|
+
# )
|
116
|
+
# mpu = s3_client.create_multipart_upload(Bucket=bucket_name, Key=object_name)
|
117
|
+
# upload_id = mpu["UploadId"]
|
118
|
+
|
119
|
+
# file_size = file_size if file_size is not None else os.path.getsize(file_path)
|
120
|
+
|
121
|
+
# upload_info: UploadInfo = UploadInfo(
|
122
|
+
# s3_client=s3_client,
|
123
|
+
# bucket_name=bucket_name,
|
124
|
+
# object_name=object_name,
|
125
|
+
# src_file_path=file_path,
|
126
|
+
# upload_id=upload_id,
|
127
|
+
# retries=retries,
|
128
|
+
# chunk_size=chunk_size,
|
129
|
+
# file_size=file_size,
|
130
|
+
# )
|
131
|
+
# return upload_info
|
132
|
+
|
133
|
+
# class S3MultiPartUploader:
|
134
|
+
# def __init__(self, s3_client: BaseClient, verbose: bool) -> None:
|
135
|
+
# self.s3_client = s3_client
|
136
|
+
# self.verbose = verbose
|
137
|
+
|
138
|
+
# def prepare(self) -> UploadInfo:
|
rclone_api/s3/types.py
CHANGED
@@ -10,13 +10,12 @@ from typing import Any, Callable
|
|
10
10
|
|
11
11
|
from botocore.client import BaseClient
|
12
12
|
|
13
|
-
from rclone_api.
|
14
|
-
from rclone_api.s3.chunk_task import
|
15
|
-
from rclone_api.s3.
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
)
|
13
|
+
from rclone_api.file_part import FilePart
|
14
|
+
from rclone_api.s3.chunk_task import file_chunker
|
15
|
+
from rclone_api.s3.multipart.file_info import S3FileInfo
|
16
|
+
from rclone_api.s3.multipart.finished_piece import FinishedPiece
|
17
|
+
from rclone_api.s3.multipart.upload_info import UploadInfo
|
18
|
+
from rclone_api.s3.multipart.upload_state import UploadState
|
20
19
|
from rclone_api.s3.types import MultiUploadResult
|
21
20
|
from rclone_api.types import EndOfStream
|
22
21
|
from rclone_api.util import locked_print
|
@@ -199,12 +198,6 @@ def upload_file_multipart(
|
|
199
198
|
) -> MultiUploadResult:
|
200
199
|
"""Upload a file to the bucket using multipart upload with customizable chunk size."""
|
201
200
|
file_size = file_size if file_size is not None else os.path.getsize(str(file_path))
|
202
|
-
# if chunk_size > file_size:
|
203
|
-
# warnings.warn(
|
204
|
-
# f"Chunk size {chunk_size} is greater than file size {file_size}, using file size"
|
205
|
-
# )
|
206
|
-
# chunk_size = file_size
|
207
|
-
|
208
201
|
if chunk_size < _MIN_UPLOAD_CHUNK_SIZE:
|
209
202
|
raise ValueError(
|
210
203
|
f"Chunk size {chunk_size} is less than minimum upload chunk size {_MIN_UPLOAD_CHUNK_SIZE}"
|
@@ -6,9 +6,9 @@ from threading import Thread
|
|
6
6
|
from typing import Generator
|
7
7
|
|
8
8
|
from rclone_api import Dir
|
9
|
+
from rclone_api.detail.walk import walk_runner_depth_first
|
9
10
|
from rclone_api.dir_listing import DirListing
|
10
11
|
from rclone_api.types import ListingOption, Order
|
11
|
-
from rclone_api.walk import walk_runner_depth_first
|
12
12
|
|
13
13
|
_MAX_OUT_QUEUE_SIZE = 50
|
14
14
|
|