rclone-api 1.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +951 -0
- rclone_api/assets/example.txt +1 -0
- rclone_api/cli.py +15 -0
- rclone_api/cmd/analyze.py +51 -0
- rclone_api/cmd/copy_large_s3.py +111 -0
- rclone_api/cmd/copy_large_s3_finish.py +81 -0
- rclone_api/cmd/list_files.py +27 -0
- rclone_api/cmd/save_to_db.py +77 -0
- rclone_api/completed_process.py +60 -0
- rclone_api/config.py +87 -0
- rclone_api/convert.py +31 -0
- rclone_api/db/__init__.py +3 -0
- rclone_api/db/db.py +277 -0
- rclone_api/db/models.py +57 -0
- rclone_api/deprecated.py +24 -0
- rclone_api/detail/copy_file_parts_resumable.py +42 -0
- rclone_api/detail/walk.py +116 -0
- rclone_api/diff.py +164 -0
- rclone_api/dir.py +113 -0
- rclone_api/dir_listing.py +66 -0
- rclone_api/exec.py +40 -0
- rclone_api/experimental/flags.py +89 -0
- rclone_api/experimental/flags_base.py +58 -0
- rclone_api/file.py +205 -0
- rclone_api/file_item.py +68 -0
- rclone_api/file_part.py +198 -0
- rclone_api/file_stream.py +52 -0
- rclone_api/filelist.py +30 -0
- rclone_api/group_files.py +256 -0
- rclone_api/http_server.py +244 -0
- rclone_api/install.py +95 -0
- rclone_api/log.py +44 -0
- rclone_api/mount.py +55 -0
- rclone_api/mount_util.py +247 -0
- rclone_api/process.py +187 -0
- rclone_api/rclone_impl.py +1285 -0
- rclone_api/remote.py +21 -0
- rclone_api/rpath.py +102 -0
- rclone_api/s3/api.py +109 -0
- rclone_api/s3/basic_ops.py +61 -0
- rclone_api/s3/chunk_task.py +187 -0
- rclone_api/s3/create.py +107 -0
- rclone_api/s3/multipart/file_info.py +7 -0
- rclone_api/s3/multipart/finished_piece.py +69 -0
- rclone_api/s3/multipart/info_json.py +239 -0
- rclone_api/s3/multipart/merge_state.py +147 -0
- rclone_api/s3/multipart/upload_info.py +62 -0
- rclone_api/s3/multipart/upload_parts_inline.py +356 -0
- rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
- rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
- rclone_api/s3/multipart/upload_state.py +165 -0
- rclone_api/s3/types.py +67 -0
- rclone_api/scan_missing_folders.py +153 -0
- rclone_api/types.py +402 -0
- rclone_api/util.py +324 -0
- rclone_api-1.5.8.dist-info/LICENSE +21 -0
- rclone_api-1.5.8.dist-info/METADATA +969 -0
- rclone_api-1.5.8.dist-info/RECORD +61 -0
- rclone_api-1.5.8.dist-info/WHEEL +5 -0
- rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
- rclone_api-1.5.8.dist-info/top_level.txt +1 -0
rclone_api/remote.py
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
|
4
|
+
class Remote:
|
5
|
+
"""Remote (root) directory."""
|
6
|
+
|
7
|
+
def __init__(self, name: str, rclone: Any) -> None:
|
8
|
+
from rclone_api.rclone_impl import RcloneImpl
|
9
|
+
|
10
|
+
if ":" in name:
|
11
|
+
raise ValueError("Remote name cannot contain ':'")
|
12
|
+
|
13
|
+
assert isinstance(rclone, RcloneImpl)
|
14
|
+
self.name = name
|
15
|
+
self.rclone: RcloneImpl = rclone
|
16
|
+
|
17
|
+
def __str__(self) -> str:
|
18
|
+
return f"{self.name}:"
|
19
|
+
|
20
|
+
def __repr__(self) -> str:
|
21
|
+
return f"Remote({self.name!r})"
|
rclone_api/rpath.py
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
import json
|
2
|
+
from datetime import datetime
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from rclone_api.remote import Remote
|
6
|
+
|
7
|
+
|
8
|
+
class RPath:
|
9
|
+
"""Remote file dataclass."""
|
10
|
+
|
11
|
+
def __init__(
|
12
|
+
self,
|
13
|
+
remote: Remote,
|
14
|
+
path: str,
|
15
|
+
name: str,
|
16
|
+
size: int,
|
17
|
+
mime_type: str,
|
18
|
+
mod_time: str,
|
19
|
+
is_dir: bool,
|
20
|
+
) -> None:
|
21
|
+
from rclone_api.rclone_impl import RcloneImpl
|
22
|
+
|
23
|
+
if path.endswith("/"):
|
24
|
+
path = path[:-1]
|
25
|
+
self.remote = remote
|
26
|
+
self.path = path
|
27
|
+
self.name = name
|
28
|
+
self.size = size
|
29
|
+
self.mime_type = mime_type
|
30
|
+
self.mod_time = mod_time
|
31
|
+
self.is_dir = is_dir
|
32
|
+
self.rclone: RcloneImpl | None = None
|
33
|
+
|
34
|
+
def mod_time_dt(self) -> datetime:
|
35
|
+
"""Return the modification time as a datetime object."""
|
36
|
+
return datetime.fromisoformat(self.mod_time)
|
37
|
+
|
38
|
+
def set_rclone(self, rclone: Any) -> None:
|
39
|
+
"""Set the rclone object."""
|
40
|
+
from rclone_api.rclone_impl import RcloneImpl
|
41
|
+
|
42
|
+
assert isinstance(rclone, RcloneImpl)
|
43
|
+
self.rclone = rclone
|
44
|
+
|
45
|
+
@staticmethod
|
46
|
+
def from_dict(
|
47
|
+
data: dict, remote: Remote, parent_path: str | None = None
|
48
|
+
) -> "RPath":
|
49
|
+
"""Create a File from a dictionary."""
|
50
|
+
path = data["Path"]
|
51
|
+
if parent_path is not None:
|
52
|
+
path = f"{parent_path}/{path}"
|
53
|
+
return RPath(
|
54
|
+
remote,
|
55
|
+
path,
|
56
|
+
data["Name"],
|
57
|
+
data["Size"],
|
58
|
+
data["MimeType"],
|
59
|
+
data["ModTime"],
|
60
|
+
data["IsDir"],
|
61
|
+
# data["IsBucket"],
|
62
|
+
)
|
63
|
+
|
64
|
+
@staticmethod
|
65
|
+
def from_array(
|
66
|
+
data: list[dict], remote: Remote, parent_path: str | None = None
|
67
|
+
) -> list["RPath"]:
|
68
|
+
"""Create a File from a dictionary."""
|
69
|
+
out: list[RPath] = []
|
70
|
+
for d in data:
|
71
|
+
file: RPath = RPath.from_dict(d, remote, parent_path)
|
72
|
+
out.append(file)
|
73
|
+
return out
|
74
|
+
|
75
|
+
@staticmethod
|
76
|
+
def from_json_str(
|
77
|
+
json_str: str, remote: Remote, parent_path: str | None = None
|
78
|
+
) -> list["RPath"]:
|
79
|
+
"""Create a File from a JSON string."""
|
80
|
+
json_obj = json.loads(json_str)
|
81
|
+
if isinstance(json_obj, dict):
|
82
|
+
return [RPath.from_dict(json_obj, remote, parent_path)]
|
83
|
+
return RPath.from_array(json_obj, remote, parent_path)
|
84
|
+
|
85
|
+
def to_json(self) -> dict:
|
86
|
+
return {
|
87
|
+
"Path": self.path,
|
88
|
+
"Name": self.name,
|
89
|
+
"Size": self.size,
|
90
|
+
"MimeType": self.mime_type,
|
91
|
+
"ModTime": self.mod_time,
|
92
|
+
"IsDir": self.is_dir,
|
93
|
+
# "IsBucket": self.is_bucket,
|
94
|
+
}
|
95
|
+
|
96
|
+
def __str__(self) -> str:
|
97
|
+
return f"{self.remote.name}:{self.path}"
|
98
|
+
|
99
|
+
def __repr__(self):
|
100
|
+
data = self.to_json()
|
101
|
+
data["Remote"] = self.remote.name
|
102
|
+
return json.dumps(data)
|
rclone_api/s3/api.py
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
import json
|
2
|
+
import warnings
|
3
|
+
|
4
|
+
from botocore.client import BaseClient
|
5
|
+
|
6
|
+
from rclone_api.s3.basic_ops import (
|
7
|
+
download_file,
|
8
|
+
head,
|
9
|
+
list_bucket_contents,
|
10
|
+
upload_file,
|
11
|
+
)
|
12
|
+
from rclone_api.s3.create import S3Config, create_s3_client
|
13
|
+
from rclone_api.s3.multipart.upload_parts_inline import (
|
14
|
+
MultiUploadResult,
|
15
|
+
upload_file_multipart,
|
16
|
+
)
|
17
|
+
from rclone_api.s3.types import S3Credentials, S3MutliPartUploadConfig, S3UploadTarget
|
18
|
+
|
19
|
+
_MIN_THRESHOLD_FOR_CHUNKING = 5 * 1024 * 1024
|
20
|
+
|
21
|
+
|
22
|
+
class S3Client:
|
23
|
+
def __init__(self, s3_creds: S3Credentials, verbose: bool = False) -> None:
|
24
|
+
self.verbose = verbose
|
25
|
+
self.credentials: S3Credentials = s3_creds
|
26
|
+
self.client: BaseClient = create_s3_client(
|
27
|
+
s3_creds=s3_creds, s3_config=S3Config(verbose=verbose)
|
28
|
+
)
|
29
|
+
|
30
|
+
def list_bucket_contents(self, bucket_name: str) -> None:
|
31
|
+
list_bucket_contents(self.client, bucket_name)
|
32
|
+
|
33
|
+
def upload_file(self, target: S3UploadTarget) -> Exception | None:
|
34
|
+
bucket_name = target.bucket_name
|
35
|
+
file_path = target.src_file
|
36
|
+
object_name = target.s3_key
|
37
|
+
return upload_file(
|
38
|
+
s3_client=self.client,
|
39
|
+
bucket_name=bucket_name,
|
40
|
+
file_path=file_path,
|
41
|
+
object_name=object_name,
|
42
|
+
)
|
43
|
+
|
44
|
+
def download_file(self, bucket_name: str, object_name: str, file_path: str) -> None:
|
45
|
+
download_file(self.client, bucket_name, object_name, file_path)
|
46
|
+
|
47
|
+
def head(self, bucket_name: str, object_name: str) -> dict | None:
|
48
|
+
return head(self.client, bucket_name, object_name)
|
49
|
+
|
50
|
+
def upload_file_multipart(
|
51
|
+
self,
|
52
|
+
upload_target: S3UploadTarget,
|
53
|
+
upload_config: S3MutliPartUploadConfig,
|
54
|
+
) -> MultiUploadResult:
|
55
|
+
|
56
|
+
chunk_size = upload_config.chunk_size
|
57
|
+
retries = upload_config.retries
|
58
|
+
resume_path_json = upload_config.resume_path_json
|
59
|
+
max_chunks_before_suspension = upload_config.max_chunks_before_suspension
|
60
|
+
bucket_name = upload_target.bucket_name
|
61
|
+
|
62
|
+
try:
|
63
|
+
|
64
|
+
if upload_target.src_file_size is None:
|
65
|
+
filesize = upload_target.src_file.stat().st_size
|
66
|
+
else:
|
67
|
+
filesize = upload_target.src_file_size
|
68
|
+
|
69
|
+
if filesize < _MIN_THRESHOLD_FOR_CHUNKING:
|
70
|
+
warnings.warn(
|
71
|
+
f"File size {filesize} is less than the minimum threshold for chunking ({_MIN_THRESHOLD_FOR_CHUNKING}), switching to single threaded upload."
|
72
|
+
)
|
73
|
+
err = self.upload_file(upload_target)
|
74
|
+
if err:
|
75
|
+
raise err
|
76
|
+
return MultiUploadResult.UPLOADED_FRESH
|
77
|
+
|
78
|
+
out = upload_file_multipart(
|
79
|
+
s3_client=self.client,
|
80
|
+
chunk_fetcher=upload_config.chunk_fetcher,
|
81
|
+
bucket_name=bucket_name,
|
82
|
+
file_path=upload_target.src_file,
|
83
|
+
file_size=filesize,
|
84
|
+
object_name=upload_target.s3_key,
|
85
|
+
resumable_info_path=resume_path_json,
|
86
|
+
chunk_size=chunk_size,
|
87
|
+
retries=retries,
|
88
|
+
max_chunks_before_suspension=max_chunks_before_suspension,
|
89
|
+
)
|
90
|
+
return out
|
91
|
+
except Exception as e:
|
92
|
+
key = upload_target.s3_key
|
93
|
+
access_key_id = self.credentials.access_key_id[:4] + "..."
|
94
|
+
secret = self.credentials.secret_access_key[:4] + "..."
|
95
|
+
endpoint_url = self.credentials.endpoint_url
|
96
|
+
provider = self.credentials.provider.value
|
97
|
+
region_name = self.credentials.region_name
|
98
|
+
info_json = {
|
99
|
+
"bucket": bucket_name,
|
100
|
+
"key": key,
|
101
|
+
"access_key_id": access_key_id[:4] + "...",
|
102
|
+
"secret": secret[:4] + "...",
|
103
|
+
"endpoint_url": endpoint_url,
|
104
|
+
"provider": provider,
|
105
|
+
"region": region_name,
|
106
|
+
}
|
107
|
+
info_json_str = json.dumps(info_json, indent=2)
|
108
|
+
warnings.warn(f"Error uploading file: {e}\nInfo:\n\n{info_json_str}")
|
109
|
+
raise
|
@@ -0,0 +1,61 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from botocore.client import BaseClient
|
4
|
+
|
5
|
+
|
6
|
+
def list_bucket_contents(s3_client: BaseClient, bucket_name: str) -> None:
|
7
|
+
"""List contents of the specified bucket."""
|
8
|
+
try:
|
9
|
+
response = s3_client.list_objects_v2(Bucket=bucket_name)
|
10
|
+
if "Contents" in response:
|
11
|
+
for obj in response["Contents"]:
|
12
|
+
print(f"File: {obj['Key']} | Size: {obj['Size']} bytes")
|
13
|
+
else:
|
14
|
+
print(f"The bucket '{bucket_name}' is empty.")
|
15
|
+
except Exception as e:
|
16
|
+
print(f"Error listing bucket contents: {e}")
|
17
|
+
|
18
|
+
|
19
|
+
def upload_file(
|
20
|
+
s3_client: BaseClient,
|
21
|
+
bucket_name: str,
|
22
|
+
file_path: Path,
|
23
|
+
object_name: str,
|
24
|
+
) -> Exception | None:
|
25
|
+
"""Upload a file to the bucket."""
|
26
|
+
try:
|
27
|
+
s3_client.upload_file(str(file_path), bucket_name, object_name)
|
28
|
+
print(f"Uploaded {file_path} to {bucket_name}/{object_name}")
|
29
|
+
except Exception as e:
|
30
|
+
print(f"Error uploading file: {e}")
|
31
|
+
return e
|
32
|
+
return None
|
33
|
+
|
34
|
+
|
35
|
+
def download_file(
|
36
|
+
s3_client: BaseClient, bucket_name: str, object_name: str, file_path: str
|
37
|
+
) -> None:
|
38
|
+
"""Download a file from the bucket."""
|
39
|
+
try:
|
40
|
+
s3_client.download_file(bucket_name, object_name, file_path)
|
41
|
+
print(f"Downloaded {object_name} from {bucket_name} to {file_path}")
|
42
|
+
except Exception as e:
|
43
|
+
print(f"Error downloading file: {e}")
|
44
|
+
|
45
|
+
|
46
|
+
def head(s3_client: BaseClient, bucket_name: str, object_name: str) -> dict | None:
|
47
|
+
"""
|
48
|
+
Retrieve metadata for the specified object using a HEAD operation.
|
49
|
+
|
50
|
+
:param s3_client: The S3 client to use.
|
51
|
+
:param bucket_name: The name of the bucket containing the object.
|
52
|
+
:param object_name: The key of the object.
|
53
|
+
:return: A dictionary containing the object's metadata if successful, otherwise None.
|
54
|
+
"""
|
55
|
+
try:
|
56
|
+
response = s3_client.head_object(Bucket=bucket_name, Key=object_name)
|
57
|
+
print(f"Metadata for {object_name} in {bucket_name}: {response}")
|
58
|
+
return response
|
59
|
+
except Exception as e:
|
60
|
+
print(f"Error retrieving metadata for {object_name}: {e}")
|
61
|
+
return None
|
@@ -0,0 +1,187 @@
|
|
1
|
+
import logging
|
2
|
+
import time
|
3
|
+
from concurrent.futures import Future
|
4
|
+
from pathlib import Path
|
5
|
+
from queue import Queue
|
6
|
+
from threading import Event, Lock
|
7
|
+
from typing import Any, Callable
|
8
|
+
|
9
|
+
from rclone_api.file_part import FilePart
|
10
|
+
from rclone_api.s3.multipart.file_info import S3FileInfo
|
11
|
+
from rclone_api.s3.multipart.upload_state import UploadState
|
12
|
+
from rclone_api.types import EndOfStream
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__) # noqa
|
15
|
+
|
16
|
+
|
17
|
+
class _ShouldStopChecker:
|
18
|
+
def __init__(self, max_chunks: int | None) -> None:
|
19
|
+
self.count = 0
|
20
|
+
self.max_chunks = max_chunks
|
21
|
+
|
22
|
+
def should_stop(self) -> bool:
|
23
|
+
if self.max_chunks is None:
|
24
|
+
return False
|
25
|
+
if self.count >= self.max_chunks:
|
26
|
+
logger.info(
|
27
|
+
f"Stopping file chunker after {self.count} chunks because it exceeded max_chunks {self.max_chunks}"
|
28
|
+
)
|
29
|
+
return True
|
30
|
+
# self.count += 1
|
31
|
+
return False
|
32
|
+
|
33
|
+
def increment(self):
|
34
|
+
self.count += 1
|
35
|
+
|
36
|
+
|
37
|
+
class _PartNumberTracker:
|
38
|
+
def __init__(
|
39
|
+
self, start_part_value: int, last_part_value: int, done_parts: set[int]
|
40
|
+
) -> None:
|
41
|
+
# self._num_parts = (last_part_value - start_part_value) + 1
|
42
|
+
self._start_part_value = start_part_value
|
43
|
+
self._last_part_value = last_part_value
|
44
|
+
self._done_part_numbers: set[int] = done_parts
|
45
|
+
self._curr_part_number = start_part_value
|
46
|
+
self._finished = False
|
47
|
+
self._lock = Lock()
|
48
|
+
|
49
|
+
def next_part_number(self) -> int | None:
|
50
|
+
with self._lock:
|
51
|
+
while self._curr_part_number in self._done_part_numbers:
|
52
|
+
self._curr_part_number += 1
|
53
|
+
if self._curr_part_number > self._last_part_value:
|
54
|
+
self._finished = True
|
55
|
+
return None
|
56
|
+
curr_part_number = self._curr_part_number
|
57
|
+
self._curr_part_number += (
|
58
|
+
1 # prevent a second thread from getting the same part number
|
59
|
+
)
|
60
|
+
return curr_part_number
|
61
|
+
|
62
|
+
def is_finished(self) -> bool:
|
63
|
+
with self._lock:
|
64
|
+
return self._finished
|
65
|
+
|
66
|
+
def add_finished_part_number(self, part_number: int) -> None:
|
67
|
+
with self._lock:
|
68
|
+
self._done_part_numbers.add(part_number)
|
69
|
+
|
70
|
+
|
71
|
+
class _OnCompleteHandler:
|
72
|
+
def __init__(
|
73
|
+
self,
|
74
|
+
part_number_tracker: _PartNumberTracker,
|
75
|
+
file_path: Path,
|
76
|
+
queue_upload: Queue[FilePart | EndOfStream],
|
77
|
+
) -> None:
|
78
|
+
self.part_number_tracker = part_number_tracker
|
79
|
+
self.file_path = file_path
|
80
|
+
self.queue_upload = queue_upload
|
81
|
+
|
82
|
+
def on_complete(self, fut: Future[FilePart]) -> None:
|
83
|
+
logger.debug("Chunk read complete")
|
84
|
+
fp: FilePart = fut.result()
|
85
|
+
extra: S3FileInfo = fp.extra
|
86
|
+
assert isinstance(extra, S3FileInfo)
|
87
|
+
part_number = extra.part_number
|
88
|
+
if fp.is_error():
|
89
|
+
logger.warning(f"Error reading file: {fp}, skipping part {part_number}")
|
90
|
+
return
|
91
|
+
|
92
|
+
if fp.n_bytes() == 0:
|
93
|
+
logger.warning(f"Empty data for part {part_number} of {self.file_path}")
|
94
|
+
raise ValueError(f"Empty data for part {part_number} of {self.file_path}")
|
95
|
+
|
96
|
+
if isinstance(fp.payload, Exception):
|
97
|
+
logger.warning(f"Error reading file because of error: {fp.payload}")
|
98
|
+
return
|
99
|
+
|
100
|
+
# done_part_numbers.add(part_number)
|
101
|
+
# queue_upload.put(fp)
|
102
|
+
self.part_number_tracker.add_finished_part_number(
|
103
|
+
part_number
|
104
|
+
) # in memory database, not persistant to resume.json
|
105
|
+
self.queue_upload.put(fp)
|
106
|
+
|
107
|
+
|
108
|
+
def file_chunker(
|
109
|
+
upload_state: UploadState,
|
110
|
+
fetcher: Callable[[int, int, Any], Future[FilePart]],
|
111
|
+
max_chunks: int | None,
|
112
|
+
cancel_signal: Event,
|
113
|
+
queue_upload: Queue[FilePart | EndOfStream],
|
114
|
+
) -> None:
|
115
|
+
final_part_number = upload_state.upload_info.total_chunks() + 1
|
116
|
+
should_stop_checker = _ShouldStopChecker(max_chunks)
|
117
|
+
|
118
|
+
upload_info = upload_state.upload_info
|
119
|
+
file_path = upload_info.src_file_path
|
120
|
+
chunk_size = upload_info.chunk_size
|
121
|
+
|
122
|
+
done_part_numbers: set[int] = {
|
123
|
+
p.part_number for p in upload_state.parts if not isinstance(p, EndOfStream)
|
124
|
+
}
|
125
|
+
|
126
|
+
part_tracker = _PartNumberTracker(
|
127
|
+
start_part_value=1,
|
128
|
+
last_part_value=final_part_number,
|
129
|
+
done_parts=done_part_numbers,
|
130
|
+
)
|
131
|
+
|
132
|
+
callback = _OnCompleteHandler(part_tracker, file_path, queue_upload)
|
133
|
+
|
134
|
+
try:
|
135
|
+
num_parts = upload_info.total_chunks()
|
136
|
+
|
137
|
+
if cancel_signal.is_set():
|
138
|
+
logger.info(
|
139
|
+
f"Cancel signal is set for file chunker while processing {file_path}, returning"
|
140
|
+
)
|
141
|
+
return
|
142
|
+
|
143
|
+
while not should_stop_checker.should_stop():
|
144
|
+
should_stop_checker.increment()
|
145
|
+
logger.debug("Processing next chunk")
|
146
|
+
curr_part_number = part_tracker.next_part_number()
|
147
|
+
if curr_part_number is None:
|
148
|
+
logger.info(f"File {file_path} has completed chunking all parts")
|
149
|
+
break
|
150
|
+
|
151
|
+
assert curr_part_number is not None
|
152
|
+
offset = (curr_part_number - 1) * chunk_size
|
153
|
+
file_size = upload_info.file_size
|
154
|
+
|
155
|
+
assert offset < file_size, f"Offset {offset} is greater than file size"
|
156
|
+
fetch_size = max(0, min(chunk_size, file_size - offset))
|
157
|
+
if fetch_size == 0:
|
158
|
+
logger.error(
|
159
|
+
f"Empty data for part {curr_part_number} of {file_path}, is this the last chunk?"
|
160
|
+
)
|
161
|
+
# assert final_part_number == curr_part_number, f"Final part number is {final_part_number} but current part number is {curr_part_number}"
|
162
|
+
if final_part_number != curr_part_number:
|
163
|
+
raise ValueError(
|
164
|
+
f"This should have been the last part, but it is not: {final_part_number} != {curr_part_number}"
|
165
|
+
)
|
166
|
+
|
167
|
+
assert curr_part_number is not None
|
168
|
+
logger.info(
|
169
|
+
f"Reading chunk {curr_part_number} of {num_parts} for {file_path}"
|
170
|
+
)
|
171
|
+
logger.debug(
|
172
|
+
f"Fetching part {curr_part_number} with offset {offset} and size {fetch_size}"
|
173
|
+
)
|
174
|
+
fut = fetcher(
|
175
|
+
offset, fetch_size, S3FileInfo(upload_info.upload_id, curr_part_number)
|
176
|
+
)
|
177
|
+
fut.add_done_callback(callback.on_complete)
|
178
|
+
# wait until the queue_upload queue can accept the next chunk
|
179
|
+
qsize = queue_upload.qsize()
|
180
|
+
print(f"queue_upload_size: {qsize}")
|
181
|
+
while queue_upload.full():
|
182
|
+
time.sleep(0.1)
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Error reading file: {e}", exc_info=True)
|
185
|
+
finally:
|
186
|
+
logger.info(f"Finishing FILE CHUNKER for {file_path} and adding EndOfStream")
|
187
|
+
queue_upload.put(EndOfStream())
|
rclone_api/s3/create.py
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
import warnings
|
2
|
+
from dataclasses import dataclass
|
3
|
+
|
4
|
+
import boto3
|
5
|
+
from botocore.client import BaseClient
|
6
|
+
from botocore.config import Config
|
7
|
+
|
8
|
+
from rclone_api.s3.types import S3Credentials, S3Provider
|
9
|
+
|
10
|
+
_DEFAULT_BACKBLAZE_ENDPOINT = "https://s3.us-west-002.backblazeb2.com"
|
11
|
+
_MAX_CONNECTIONS = 10
|
12
|
+
_TIMEOUT_READ = 120
|
13
|
+
_TIMEOUT_CONNECT = 60
|
14
|
+
|
15
|
+
|
16
|
+
@dataclass
|
17
|
+
class S3Config:
|
18
|
+
max_pool_connections: int | None = None
|
19
|
+
timeout_connection: int | None = None
|
20
|
+
timeout_read: int | None = None
|
21
|
+
verbose: bool | None = None
|
22
|
+
|
23
|
+
def resolve_defaults(self) -> None:
|
24
|
+
self.max_pool_connections = self.max_pool_connections or _MAX_CONNECTIONS
|
25
|
+
self.timeout_connection = self.timeout_connection or _TIMEOUT_CONNECT
|
26
|
+
self.timeout_read = self.timeout_read or _TIMEOUT_READ
|
27
|
+
self.verbose = self.verbose or False
|
28
|
+
|
29
|
+
|
30
|
+
# Create a Boto3 session and S3 client, this is back blaze specific.
|
31
|
+
# Add a function if you want to use a different S3 provider.
|
32
|
+
# If AWS support is added in a fork then please merge it back here.
|
33
|
+
def _create_backblaze_s3_client(
|
34
|
+
s3_creds: S3Credentials, s3_config: S3Config
|
35
|
+
) -> BaseClient:
|
36
|
+
"""Create and return an S3 client."""
|
37
|
+
region_name = s3_creds.region_name
|
38
|
+
access_key = s3_creds.access_key_id
|
39
|
+
secret_key = s3_creds.secret_access_key
|
40
|
+
endpoint_url = s3_creds.endpoint_url
|
41
|
+
endpoint_url = endpoint_url or _DEFAULT_BACKBLAZE_ENDPOINT
|
42
|
+
s3_config.resolve_defaults()
|
43
|
+
session = boto3.session.Session() # type: ignore
|
44
|
+
return session.client(
|
45
|
+
service_name="s3",
|
46
|
+
aws_access_key_id=access_key,
|
47
|
+
aws_secret_access_key=secret_key,
|
48
|
+
endpoint_url=endpoint_url,
|
49
|
+
# verify=False, # Disables SSL certificate verification
|
50
|
+
config=Config(
|
51
|
+
signature_version="s3v4",
|
52
|
+
region_name=region_name,
|
53
|
+
max_pool_connections=s3_config.max_pool_connections,
|
54
|
+
read_timeout=s3_config.timeout_read,
|
55
|
+
connect_timeout=s3_config.timeout_connection,
|
56
|
+
# Note that BackBlase has a boko3 bug where it doesn't support the new
|
57
|
+
# checksum header, the following line was an attempt of fix it on the newest
|
58
|
+
# version of boto3, but it didn't work.
|
59
|
+
s3={"payload_signing_enabled": False}, # Disable checksum header
|
60
|
+
),
|
61
|
+
)
|
62
|
+
|
63
|
+
|
64
|
+
def _create_unknown_s3_client(
|
65
|
+
s3_creds: S3Credentials, s3_config: S3Config
|
66
|
+
) -> BaseClient:
|
67
|
+
"""Create and return an S3 client."""
|
68
|
+
access_key = s3_creds.access_key_id
|
69
|
+
secret_key = s3_creds.secret_access_key
|
70
|
+
endpoint_url = s3_creds.endpoint_url
|
71
|
+
if (endpoint_url is not None) and not (endpoint_url.startswith("http")):
|
72
|
+
if s3_config.verbose:
|
73
|
+
warnings.warn(
|
74
|
+
f"Endpoint URL is schema naive: {endpoint_url}, assuming HTTPS"
|
75
|
+
)
|
76
|
+
endpoint_url = f"https://{endpoint_url}"
|
77
|
+
s3_config.resolve_defaults()
|
78
|
+
session = boto3.session.Session() # type: ignore
|
79
|
+
return session.client(
|
80
|
+
service_name="s3",
|
81
|
+
aws_access_key_id=access_key,
|
82
|
+
aws_secret_access_key=secret_key,
|
83
|
+
endpoint_url=endpoint_url,
|
84
|
+
config=Config(
|
85
|
+
signature_version="s3v4",
|
86
|
+
region_name=s3_creds.region_name,
|
87
|
+
max_pool_connections=s3_config.max_pool_connections,
|
88
|
+
read_timeout=s3_config.timeout_read,
|
89
|
+
connect_timeout=s3_config.timeout_connection,
|
90
|
+
),
|
91
|
+
)
|
92
|
+
|
93
|
+
|
94
|
+
def create_s3_client(
|
95
|
+
s3_creds: S3Credentials, s3_config: S3Config | None = None
|
96
|
+
) -> BaseClient:
|
97
|
+
"""Create and return an S3 client."""
|
98
|
+
s3_config = s3_config or S3Config()
|
99
|
+
provider = s3_creds.provider
|
100
|
+
if provider == S3Provider.BACKBLAZE:
|
101
|
+
if s3_config.verbose:
|
102
|
+
print("Creating BackBlaze S3 client")
|
103
|
+
return _create_backblaze_s3_client(s3_creds=s3_creds, s3_config=s3_config)
|
104
|
+
else:
|
105
|
+
if s3_config.verbose:
|
106
|
+
print("Creating generic/unknown S3 client")
|
107
|
+
return _create_unknown_s3_client(s3_creds=s3_creds, s3_config=s3_config)
|
@@ -0,0 +1,69 @@
|
|
1
|
+
import warnings
|
2
|
+
from dataclasses import dataclass
|
3
|
+
|
4
|
+
from rclone_api.types import EndOfStream
|
5
|
+
|
6
|
+
|
7
|
+
@dataclass
|
8
|
+
class FinishedPiece:
|
9
|
+
part_number: int
|
10
|
+
etag: str
|
11
|
+
|
12
|
+
def to_json(self) -> dict:
|
13
|
+
# return {"part_number": self.part_number, "etag": self.etag}
|
14
|
+
# amazon s3 style dict
|
15
|
+
tag = self.etag
|
16
|
+
if not tag.startswith('"'):
|
17
|
+
tag = f'"{tag}"'
|
18
|
+
out = {"PartNumber": self.part_number, "ETag": self.etag}
|
19
|
+
return out
|
20
|
+
|
21
|
+
def __post_init__(self):
|
22
|
+
assert isinstance(self.part_number, int)
|
23
|
+
assert isinstance(self.etag, str)
|
24
|
+
|
25
|
+
@staticmethod
|
26
|
+
def to_json_array(
|
27
|
+
parts: list["FinishedPiece | EndOfStream"] | list["FinishedPiece"],
|
28
|
+
) -> list[dict]:
|
29
|
+
non_none: list[FinishedPiece] = []
|
30
|
+
for p in parts:
|
31
|
+
if not isinstance(p, EndOfStream):
|
32
|
+
non_none.append(p)
|
33
|
+
non_none.sort(key=lambda x: x.part_number)
|
34
|
+
# all_nones: list[None] = [None for p in parts if p is None]
|
35
|
+
# assert len(all_nones) <= 1, "Only one None should be present"
|
36
|
+
count_eos = 0
|
37
|
+
for p in parts:
|
38
|
+
if p is EndOfStream:
|
39
|
+
count_eos += 1
|
40
|
+
# assert count_eos <= 1, "Only one EndOfStream should be present"
|
41
|
+
if count_eos > 1:
|
42
|
+
warnings.warn(f"Only one EndOfStream should be present, found {count_eos}")
|
43
|
+
out = [p.to_json() for p in non_none]
|
44
|
+
return out
|
45
|
+
|
46
|
+
@staticmethod
|
47
|
+
def from_json(json: dict | None) -> "FinishedPiece | EndOfStream":
|
48
|
+
if json is None:
|
49
|
+
return EndOfStream()
|
50
|
+
part_number = json.get("PartNumber") or json.get("part_number")
|
51
|
+
etag = json.get("ETag") or json.get("etag")
|
52
|
+
assert isinstance(etag, str)
|
53
|
+
# handle the double quotes around the etag
|
54
|
+
etag = etag.replace('"', "")
|
55
|
+
assert isinstance(part_number, int)
|
56
|
+
assert isinstance(etag, str)
|
57
|
+
return FinishedPiece(part_number=part_number, etag=etag)
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
def from_json_array(json: dict) -> list["FinishedPiece"]:
|
61
|
+
tmp = [FinishedPiece.from_json(j) for j in json]
|
62
|
+
out: list[FinishedPiece] = []
|
63
|
+
for t in tmp:
|
64
|
+
if isinstance(t, FinishedPiece):
|
65
|
+
out.append(t)
|
66
|
+
return out
|
67
|
+
|
68
|
+
def __hash__(self) -> int:
|
69
|
+
return hash(self.part_number)
|