rclone-api 1.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +951 -0
- rclone_api/assets/example.txt +1 -0
- rclone_api/cli.py +15 -0
- rclone_api/cmd/analyze.py +51 -0
- rclone_api/cmd/copy_large_s3.py +111 -0
- rclone_api/cmd/copy_large_s3_finish.py +81 -0
- rclone_api/cmd/list_files.py +27 -0
- rclone_api/cmd/save_to_db.py +77 -0
- rclone_api/completed_process.py +60 -0
- rclone_api/config.py +87 -0
- rclone_api/convert.py +31 -0
- rclone_api/db/__init__.py +3 -0
- rclone_api/db/db.py +277 -0
- rclone_api/db/models.py +57 -0
- rclone_api/deprecated.py +24 -0
- rclone_api/detail/copy_file_parts_resumable.py +42 -0
- rclone_api/detail/walk.py +116 -0
- rclone_api/diff.py +164 -0
- rclone_api/dir.py +113 -0
- rclone_api/dir_listing.py +66 -0
- rclone_api/exec.py +40 -0
- rclone_api/experimental/flags.py +89 -0
- rclone_api/experimental/flags_base.py +58 -0
- rclone_api/file.py +205 -0
- rclone_api/file_item.py +68 -0
- rclone_api/file_part.py +198 -0
- rclone_api/file_stream.py +52 -0
- rclone_api/filelist.py +30 -0
- rclone_api/group_files.py +256 -0
- rclone_api/http_server.py +244 -0
- rclone_api/install.py +95 -0
- rclone_api/log.py +44 -0
- rclone_api/mount.py +55 -0
- rclone_api/mount_util.py +247 -0
- rclone_api/process.py +187 -0
- rclone_api/rclone_impl.py +1285 -0
- rclone_api/remote.py +21 -0
- rclone_api/rpath.py +102 -0
- rclone_api/s3/api.py +109 -0
- rclone_api/s3/basic_ops.py +61 -0
- rclone_api/s3/chunk_task.py +187 -0
- rclone_api/s3/create.py +107 -0
- rclone_api/s3/multipart/file_info.py +7 -0
- rclone_api/s3/multipart/finished_piece.py +69 -0
- rclone_api/s3/multipart/info_json.py +239 -0
- rclone_api/s3/multipart/merge_state.py +147 -0
- rclone_api/s3/multipart/upload_info.py +62 -0
- rclone_api/s3/multipart/upload_parts_inline.py +356 -0
- rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
- rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
- rclone_api/s3/multipart/upload_state.py +165 -0
- rclone_api/s3/types.py +67 -0
- rclone_api/scan_missing_folders.py +153 -0
- rclone_api/types.py +402 -0
- rclone_api/util.py +324 -0
- rclone_api-1.5.8.dist-info/LICENSE +21 -0
- rclone_api-1.5.8.dist-info/METADATA +969 -0
- rclone_api-1.5.8.dist-info/RECORD +61 -0
- rclone_api-1.5.8.dist-info/WHEEL +5 -0
- rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
- rclone_api-1.5.8.dist-info/top_level.txt +1 -0
rclone_api/s3/types.py
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
from concurrent.futures import Future
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from enum import Enum
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any, Callable
|
6
|
+
|
7
|
+
from rclone_api.file_part import FilePart
|
8
|
+
|
9
|
+
|
10
|
+
class S3Provider(Enum):
|
11
|
+
S3 = "s3" # generic S3
|
12
|
+
BACKBLAZE = "b2"
|
13
|
+
DIGITAL_OCEAN = "DigitalOcean"
|
14
|
+
|
15
|
+
@staticmethod
|
16
|
+
def from_str(value: str) -> "S3Provider":
|
17
|
+
"""Convert string to S3Provider."""
|
18
|
+
if value == "b2":
|
19
|
+
return S3Provider.BACKBLAZE
|
20
|
+
if value == "DigitalOcean":
|
21
|
+
return S3Provider.DIGITAL_OCEAN
|
22
|
+
raise ValueError(f"Unknown S3Provider: {value}")
|
23
|
+
|
24
|
+
|
25
|
+
@dataclass
|
26
|
+
class S3Credentials:
|
27
|
+
"""Credentials for accessing S3."""
|
28
|
+
|
29
|
+
bucket_name: str
|
30
|
+
provider: S3Provider
|
31
|
+
access_key_id: str
|
32
|
+
secret_access_key: str
|
33
|
+
session_token: str | None = None
|
34
|
+
region_name: str | None = None
|
35
|
+
endpoint_url: str | None = None
|
36
|
+
|
37
|
+
|
38
|
+
@dataclass
|
39
|
+
class S3UploadTarget:
|
40
|
+
"""Target information for S3 upload."""
|
41
|
+
|
42
|
+
src_file: Path
|
43
|
+
src_file_size: int | None
|
44
|
+
bucket_name: str
|
45
|
+
s3_key: str
|
46
|
+
|
47
|
+
|
48
|
+
@dataclass
|
49
|
+
class S3MutliPartUploadConfig:
|
50
|
+
"""Input for multi-part upload."""
|
51
|
+
|
52
|
+
chunk_size: int
|
53
|
+
retries: int
|
54
|
+
chunk_fetcher: Callable[[int, int, Any], Future[FilePart]]
|
55
|
+
resume_path_json: Path
|
56
|
+
max_write_threads: int
|
57
|
+
max_chunks_before_suspension: int | None = None
|
58
|
+
mount_path: Path | None = (
|
59
|
+
None # If set this will be used to mount the src file, otherwise it's one is chosen automatically
|
60
|
+
)
|
61
|
+
|
62
|
+
|
63
|
+
class MultiUploadResult(Enum):
|
64
|
+
UPLOADED_FRESH = 1
|
65
|
+
UPLOADED_RESUME = 2
|
66
|
+
SUSPENDED = 3
|
67
|
+
ALREADY_DONE = 4
|
@@ -0,0 +1,153 @@
|
|
1
|
+
import random
|
2
|
+
import time
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
4
|
+
from queue import Empty, Queue
|
5
|
+
from threading import Thread
|
6
|
+
from typing import Generator
|
7
|
+
|
8
|
+
from rclone_api import Dir
|
9
|
+
from rclone_api.detail.walk import walk_runner_depth_first
|
10
|
+
from rclone_api.dir_listing import DirListing
|
11
|
+
from rclone_api.types import ListingOption, Order
|
12
|
+
|
13
|
+
_MAX_OUT_QUEUE_SIZE = 50
|
14
|
+
|
15
|
+
|
16
|
+
def _reorder_inplace(data: list, order: Order) -> None:
|
17
|
+
if order == Order.NORMAL:
|
18
|
+
return
|
19
|
+
elif order == Order.REVERSE:
|
20
|
+
data.reverse()
|
21
|
+
return
|
22
|
+
elif order == Order.RANDOM:
|
23
|
+
random.shuffle(data)
|
24
|
+
return
|
25
|
+
else:
|
26
|
+
raise ValueError(f"Invalid order: {order}")
|
27
|
+
|
28
|
+
|
29
|
+
# ONLY Works from src -> dst diffing.
|
30
|
+
def _async_diff_dir_walk_task(
|
31
|
+
src: Dir, dst: Dir, max_depth: int, out_queue: Queue[Dir | None], order: Order
|
32
|
+
) -> None:
|
33
|
+
can_scan_two_deep = max_depth > 1 or max_depth == -1
|
34
|
+
ls_depth = 2 if can_scan_two_deep else 1
|
35
|
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
36
|
+
t1 = executor.submit(
|
37
|
+
src.ls,
|
38
|
+
listing_option=ListingOption.DIRS_ONLY,
|
39
|
+
order=order,
|
40
|
+
max_depth=ls_depth,
|
41
|
+
)
|
42
|
+
t2 = executor.submit(
|
43
|
+
dst.ls,
|
44
|
+
listing_option=ListingOption.DIRS_ONLY,
|
45
|
+
order=order,
|
46
|
+
max_depth=ls_depth,
|
47
|
+
)
|
48
|
+
src_dir_listing: DirListing = t1.result()
|
49
|
+
dst_dir_listing: DirListing = t2.result()
|
50
|
+
next_depth = max_depth - ls_depth if max_depth > 0 else max_depth
|
51
|
+
dst_dirs: list[str] = [d.relative_to(src) for d in dst_dir_listing.dirs]
|
52
|
+
src_dirs: list[str] = [d.relative_to(dst) for d in src_dir_listing.dirs]
|
53
|
+
dst_files_set: set[str] = set(dst_dirs)
|
54
|
+
matching_dirs: list[str] = []
|
55
|
+
_reorder_inplace(src_dirs, order)
|
56
|
+
_reorder_inplace(dst_dirs, order)
|
57
|
+
for i, src_dir in enumerate(src_dirs):
|
58
|
+
src_dir_dir = src / src_dir
|
59
|
+
if src_dir not in dst_files_set:
|
60
|
+
queue_dir_listing: Queue[DirListing | None] = Queue()
|
61
|
+
if next_depth > 0 or next_depth == -1:
|
62
|
+
walk_runner_depth_first(
|
63
|
+
dir=src_dir_dir,
|
64
|
+
out_queue=queue_dir_listing,
|
65
|
+
order=order,
|
66
|
+
max_depth=next_depth,
|
67
|
+
)
|
68
|
+
out_queue.put(src)
|
69
|
+
while dirlisting := queue_dir_listing.get():
|
70
|
+
if dirlisting is None:
|
71
|
+
break
|
72
|
+
# print(f"dirlisting: {dirlisting}")
|
73
|
+
for d in dirlisting.dirs:
|
74
|
+
out_queue.put(d)
|
75
|
+
else:
|
76
|
+
matching_dirs.append(src_dir)
|
77
|
+
|
78
|
+
for matching_dir in matching_dirs:
|
79
|
+
# print(f"matching dir: {matching_dir}")
|
80
|
+
if next_depth > 0 or next_depth == -1:
|
81
|
+
src_next = src / matching_dir
|
82
|
+
dst_next = dst / matching_dir
|
83
|
+
_async_diff_dir_walk_task(
|
84
|
+
src=src_next,
|
85
|
+
dst=dst_next,
|
86
|
+
max_depth=next_depth,
|
87
|
+
out_queue=out_queue,
|
88
|
+
order=order,
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
def async_diff_dir_walk_task(
|
93
|
+
src: Dir, dst: Dir, max_depth: int, out_queue: Queue[Dir | None], order: Order
|
94
|
+
) -> None:
|
95
|
+
try:
|
96
|
+
_async_diff_dir_walk_task(
|
97
|
+
src=src, dst=dst, max_depth=max_depth, out_queue=out_queue, order=order
|
98
|
+
)
|
99
|
+
except Exception:
|
100
|
+
import _thread
|
101
|
+
|
102
|
+
_thread.interrupt_main()
|
103
|
+
raise
|
104
|
+
finally:
|
105
|
+
out_queue.put(None)
|
106
|
+
|
107
|
+
|
108
|
+
def scan_missing_folders(
|
109
|
+
src: Dir,
|
110
|
+
dst: Dir,
|
111
|
+
max_depth: int = -1,
|
112
|
+
order: Order = Order.NORMAL,
|
113
|
+
) -> Generator[Dir, None, None]:
|
114
|
+
"""Walk through the given directory recursively.
|
115
|
+
|
116
|
+
Args:
|
117
|
+
dir: Directory or Remote to walk through
|
118
|
+
max_depth: Maximum depth to traverse (-1 for unlimited)
|
119
|
+
|
120
|
+
Yields:
|
121
|
+
DirListing: Directory listing for each directory encountered
|
122
|
+
"""
|
123
|
+
|
124
|
+
try:
|
125
|
+
out_queue: Queue[Dir | None] = Queue(maxsize=_MAX_OUT_QUEUE_SIZE)
|
126
|
+
|
127
|
+
def task() -> None:
|
128
|
+
async_diff_dir_walk_task(
|
129
|
+
src=src,
|
130
|
+
dst=dst,
|
131
|
+
max_depth=max_depth,
|
132
|
+
out_queue=out_queue,
|
133
|
+
order=order,
|
134
|
+
)
|
135
|
+
|
136
|
+
worker = Thread(
|
137
|
+
target=task,
|
138
|
+
daemon=True,
|
139
|
+
)
|
140
|
+
worker.start()
|
141
|
+
|
142
|
+
while True:
|
143
|
+
try:
|
144
|
+
dir = out_queue.get_nowait()
|
145
|
+
if dir is None:
|
146
|
+
break
|
147
|
+
yield dir
|
148
|
+
except Empty:
|
149
|
+
time.sleep(0.1)
|
150
|
+
|
151
|
+
worker.join()
|
152
|
+
except KeyboardInterrupt:
|
153
|
+
pass
|
rclone_api/types.py
ADDED
@@ -0,0 +1,402 @@
|
|
1
|
+
import os
|
2
|
+
import re
|
3
|
+
import time
|
4
|
+
import warnings
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from enum import Enum
|
7
|
+
from pathlib import Path
|
8
|
+
from threading import Lock
|
9
|
+
|
10
|
+
|
11
|
+
class ModTimeStrategy(Enum):
|
12
|
+
USE_SERVER_MODTIME = "use-server-modtime"
|
13
|
+
NO_MODTIME = "no-modtime"
|
14
|
+
|
15
|
+
|
16
|
+
class ListingOption(Enum):
|
17
|
+
DIRS_ONLY = "dirs-only"
|
18
|
+
FILES_ONLY = "files-only"
|
19
|
+
ALL = "all"
|
20
|
+
|
21
|
+
|
22
|
+
class Order(Enum):
|
23
|
+
NORMAL = "normal"
|
24
|
+
REVERSE = "reverse"
|
25
|
+
RANDOM = "random"
|
26
|
+
|
27
|
+
|
28
|
+
@dataclass
|
29
|
+
class S3PathInfo:
|
30
|
+
remote: str
|
31
|
+
bucket: str
|
32
|
+
key: str
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class SizeResult:
|
37
|
+
"""Size result dataclass."""
|
38
|
+
|
39
|
+
prefix: str
|
40
|
+
total_size: int
|
41
|
+
file_sizes: dict[str, int]
|
42
|
+
|
43
|
+
|
44
|
+
def _to_size_suffix(size: int) -> str:
|
45
|
+
def _convert(size: int) -> tuple[float, str]:
|
46
|
+
val: float
|
47
|
+
unit: str
|
48
|
+
if size < 1024:
|
49
|
+
val = size
|
50
|
+
unit = "B"
|
51
|
+
elif size < 1024**2:
|
52
|
+
val = size / 1024
|
53
|
+
unit = "K"
|
54
|
+
elif size < 1024**3:
|
55
|
+
val = size / (1024**2)
|
56
|
+
unit = "M"
|
57
|
+
elif size < 1024**4:
|
58
|
+
val = size / (1024**3)
|
59
|
+
unit = "G"
|
60
|
+
elif size < 1024**5:
|
61
|
+
val = size / (1024**4)
|
62
|
+
unit = "T"
|
63
|
+
elif size < 1024**6:
|
64
|
+
val = size / (1024**5)
|
65
|
+
unit = "P"
|
66
|
+
else:
|
67
|
+
raise ValueError(f"Invalid size: {size}")
|
68
|
+
|
69
|
+
return val, unit
|
70
|
+
|
71
|
+
def _fmt(_val: float | int, _unit: str) -> str:
|
72
|
+
# If the float is an integer, drop the decimal, otherwise format with one decimal.
|
73
|
+
val_str: str = str(_val)
|
74
|
+
if not val_str.endswith(".0"):
|
75
|
+
first_str: str = f"{_val:.1f}"
|
76
|
+
else:
|
77
|
+
first_str = str(int(_val))
|
78
|
+
return first_str + _unit
|
79
|
+
|
80
|
+
val, unit = _convert(size)
|
81
|
+
out = _fmt(val, unit)
|
82
|
+
# Now round trip the value to fix floating point issues via rounding.
|
83
|
+
int_val = _from_size_suffix(out)
|
84
|
+
val, unit = _convert(int_val)
|
85
|
+
out = _fmt(val, unit)
|
86
|
+
return out
|
87
|
+
|
88
|
+
|
89
|
+
# Update regex to allow decimals (e.g., 16.5MB)
|
90
|
+
_PATTERN_SIZE_SUFFIX = re.compile(r"^(\d+(?:\.\d+)?)([A-Za-z]+)$")
|
91
|
+
|
92
|
+
|
93
|
+
def _parse_elements(value: str) -> tuple[str, str] | None:
|
94
|
+
match = _PATTERN_SIZE_SUFFIX.match(value)
|
95
|
+
if match is None:
|
96
|
+
return None
|
97
|
+
return match.group(1), match.group(2)
|
98
|
+
|
99
|
+
|
100
|
+
def _from_size_suffix(size: str) -> int:
|
101
|
+
if size == "0":
|
102
|
+
return 0
|
103
|
+
pair = _parse_elements(size)
|
104
|
+
if pair is None:
|
105
|
+
raise ValueError(f"Invalid size suffix: {size}")
|
106
|
+
num_str, suffix = pair
|
107
|
+
n = float(num_str)
|
108
|
+
# Determine the unit from the first letter (e.g., "M" from "MB")
|
109
|
+
unit = suffix[0].upper()
|
110
|
+
if unit == "B":
|
111
|
+
return int(n)
|
112
|
+
if unit == "K":
|
113
|
+
return int(n * 1024)
|
114
|
+
if unit == "M":
|
115
|
+
return int(n * 1024 * 1024)
|
116
|
+
if unit == "G":
|
117
|
+
return int(n * 1024 * 1024 * 1024)
|
118
|
+
if unit == "T":
|
119
|
+
return int(n * 1024**4)
|
120
|
+
if unit == "P":
|
121
|
+
return int(n * 1024**5)
|
122
|
+
raise ValueError(f"Invalid size suffix: {suffix}")
|
123
|
+
|
124
|
+
|
125
|
+
class SizeSuffix:
|
126
|
+
def __init__(self, size: "int | str | SizeSuffix"):
|
127
|
+
self._size: int
|
128
|
+
if isinstance(size, SizeSuffix):
|
129
|
+
self._size = size._size
|
130
|
+
elif isinstance(size, int):
|
131
|
+
self._size = size
|
132
|
+
elif isinstance(size, str):
|
133
|
+
self._size = _from_size_suffix(size)
|
134
|
+
elif isinstance(size, float):
|
135
|
+
self._size = int(size)
|
136
|
+
else:
|
137
|
+
raise ValueError(f"Invalid type for size: {type(size)}")
|
138
|
+
|
139
|
+
def as_int(self) -> int:
|
140
|
+
return self._size
|
141
|
+
|
142
|
+
def as_str(self) -> str:
|
143
|
+
return _to_size_suffix(self._size)
|
144
|
+
|
145
|
+
def __repr__(self) -> str:
|
146
|
+
return self.as_str()
|
147
|
+
|
148
|
+
def __str__(self) -> str:
|
149
|
+
return self.as_str()
|
150
|
+
|
151
|
+
@staticmethod
|
152
|
+
def _to_size(size: "int | SizeSuffix") -> int:
|
153
|
+
if isinstance(size, int):
|
154
|
+
return size
|
155
|
+
elif isinstance(size, SizeSuffix):
|
156
|
+
return size._size
|
157
|
+
else:
|
158
|
+
raise ValueError(f"Invalid type for size: {type(size)}")
|
159
|
+
|
160
|
+
def __mul__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
161
|
+
other_int = SizeSuffix(other)
|
162
|
+
return SizeSuffix(self._size * other_int._size)
|
163
|
+
|
164
|
+
# multiply when int is on the left
|
165
|
+
def __rmul__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
166
|
+
return self.__mul__(other)
|
167
|
+
|
168
|
+
def __add__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
169
|
+
other_int = SizeSuffix(other)
|
170
|
+
return SizeSuffix(self._size + other_int._size)
|
171
|
+
|
172
|
+
def __radd__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
173
|
+
return self.__add__(other)
|
174
|
+
|
175
|
+
def __sub__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
176
|
+
other_int = SizeSuffix(other)
|
177
|
+
return SizeSuffix(self._size - other_int._size)
|
178
|
+
|
179
|
+
def __rsub__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
180
|
+
other_int = SizeSuffix(other)
|
181
|
+
return SizeSuffix(other_int._size - self._size)
|
182
|
+
|
183
|
+
def __truediv__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
184
|
+
other_int = SizeSuffix(other)
|
185
|
+
if other_int._size == 0:
|
186
|
+
raise ZeroDivisionError("Division by zero is undefined")
|
187
|
+
# Use floor division to maintain integer arithmetic.
|
188
|
+
return SizeSuffix(self._size // other_int._size)
|
189
|
+
|
190
|
+
def __rtruediv__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
191
|
+
other_int = SizeSuffix(other)
|
192
|
+
if self._size == 0:
|
193
|
+
raise ZeroDivisionError("Division by zero is undefined")
|
194
|
+
# Use floor division to maintain integer arithmetic.
|
195
|
+
return SizeSuffix(other_int._size // self._size)
|
196
|
+
|
197
|
+
# support / division
|
198
|
+
def __floordiv__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
199
|
+
other_int = SizeSuffix(other)
|
200
|
+
if other_int._size == 0:
|
201
|
+
raise ZeroDivisionError("Division by zero is undefined")
|
202
|
+
# Use floor division to maintain integer arithmetic.
|
203
|
+
return SizeSuffix(self._size // other_int._size)
|
204
|
+
|
205
|
+
def __rfloordiv__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
206
|
+
other_int = SizeSuffix(other)
|
207
|
+
if self._size == 0:
|
208
|
+
raise ZeroDivisionError("Division by zero is undefined")
|
209
|
+
# Use floor division to maintain integer arithmetic.
|
210
|
+
return SizeSuffix(other_int._size // self._size)
|
211
|
+
|
212
|
+
def __eq__(self, other: object) -> bool:
|
213
|
+
if not isinstance(other, SizeSuffix) and not isinstance(other, int):
|
214
|
+
return False
|
215
|
+
return self._size == SizeSuffix(other)._size
|
216
|
+
|
217
|
+
def __ne__(self, other: object) -> bool:
|
218
|
+
return not self.__eq__(other)
|
219
|
+
|
220
|
+
def __lt__(self, other: "int | SizeSuffix") -> bool:
|
221
|
+
# if not isinstance(other, SizeSuffix):
|
222
|
+
# return False
|
223
|
+
# return self._size < other._size
|
224
|
+
return self._size < SizeSuffix(other)._size
|
225
|
+
|
226
|
+
def __le__(self, other: "int | SizeSuffix") -> bool:
|
227
|
+
# if not isinstance(other, SizeSuffix):
|
228
|
+
# return False
|
229
|
+
# return self._size <= other._size
|
230
|
+
return self._size < SizeSuffix(other)._size
|
231
|
+
|
232
|
+
def __gt__(self, other: "int | SizeSuffix") -> bool:
|
233
|
+
return self._size > SizeSuffix(other)._size
|
234
|
+
|
235
|
+
def __ge__(self, other: "int | SizeSuffix") -> bool:
|
236
|
+
return self._size >= SizeSuffix(other)._size
|
237
|
+
|
238
|
+
def __hash__(self) -> int:
|
239
|
+
return hash(self._size)
|
240
|
+
|
241
|
+
def __int__(self) -> int:
|
242
|
+
return self._size
|
243
|
+
|
244
|
+
def __iadd__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
245
|
+
other_int = SizeSuffix(other)
|
246
|
+
self._size += other_int._size
|
247
|
+
return self
|
248
|
+
|
249
|
+
def __isub__(self, other: "int | SizeSuffix") -> "SizeSuffix":
|
250
|
+
other_int = SizeSuffix(other)
|
251
|
+
self._size -= other_int._size
|
252
|
+
return self
|
253
|
+
|
254
|
+
|
255
|
+
_TMP_DIR_ACCESS_LOCK = Lock()
|
256
|
+
|
257
|
+
|
258
|
+
def _clean_old_files(out: Path) -> None:
|
259
|
+
# clean up files older than 1 day
|
260
|
+
from rclone_api.util import locked_print
|
261
|
+
|
262
|
+
now = time.time()
|
263
|
+
# Erase all stale files and then purge empty directories.
|
264
|
+
for root, dirs, files in os.walk(out):
|
265
|
+
for name in files:
|
266
|
+
f = Path(root) / name
|
267
|
+
filemod = f.stat().st_mtime
|
268
|
+
diff_secs = now - filemod
|
269
|
+
diff_days = diff_secs / (60 * 60 * 24)
|
270
|
+
if diff_days > 1:
|
271
|
+
locked_print(f"Removing old file: {f}")
|
272
|
+
f.unlink()
|
273
|
+
|
274
|
+
for root, dirs, _ in os.walk(out):
|
275
|
+
for dir in dirs:
|
276
|
+
d = Path(root) / dir
|
277
|
+
if not list(d.iterdir()):
|
278
|
+
locked_print(f"Removing empty directory: {d}")
|
279
|
+
d.rmdir()
|
280
|
+
|
281
|
+
|
282
|
+
def get_chunk_tmpdir() -> Path:
|
283
|
+
with _TMP_DIR_ACCESS_LOCK:
|
284
|
+
dat = get_chunk_tmpdir.__dict__
|
285
|
+
if "out" in dat:
|
286
|
+
return dat["out"] # Folder already validated.
|
287
|
+
out = Path("chunk_store")
|
288
|
+
if out.exists():
|
289
|
+
# first access, clean up directory
|
290
|
+
_clean_old_files(out)
|
291
|
+
out.mkdir(exist_ok=True, parents=True)
|
292
|
+
dat["out"] = out
|
293
|
+
return out
|
294
|
+
|
295
|
+
|
296
|
+
class EndOfStream:
|
297
|
+
pass
|
298
|
+
|
299
|
+
|
300
|
+
class Range:
|
301
|
+
def __init__(self, start: int | SizeSuffix, end: int | SizeSuffix):
|
302
|
+
self.start: SizeSuffix = SizeSuffix(start) # inclusive
|
303
|
+
self.end: SizeSuffix = SizeSuffix(
|
304
|
+
end
|
305
|
+
) # exclusive (not like http byte range which is inclusive)
|
306
|
+
|
307
|
+
def to_header(self) -> dict[str, str]:
|
308
|
+
last = self.end - 1
|
309
|
+
val = f"bytes={self.start.as_int()}-{last.as_int()}"
|
310
|
+
return {"Range": val}
|
311
|
+
|
312
|
+
def __repr__(self) -> str:
|
313
|
+
length = self.end - self.start
|
314
|
+
return f"Range(start={self.start}, length={length})"
|
315
|
+
|
316
|
+
def __str__(self) -> str:
|
317
|
+
return self.__repr__()
|
318
|
+
|
319
|
+
|
320
|
+
_MAX_PART_NUMBER = 10000
|
321
|
+
|
322
|
+
|
323
|
+
def _get_chunk_size(
|
324
|
+
src_size: int | SizeSuffix, target_chunk_size: int | SizeSuffix
|
325
|
+
) -> SizeSuffix:
|
326
|
+
src_size = SizeSuffix(src_size)
|
327
|
+
target_chunk_size = SizeSuffix(target_chunk_size)
|
328
|
+
min_chunk_size = src_size // (_MAX_PART_NUMBER - 1) # overriden
|
329
|
+
# chunk_size = max(min_chunk_size, target_chunk_size)
|
330
|
+
if min_chunk_size > target_chunk_size:
|
331
|
+
warnings.warn(
|
332
|
+
f"min_chunk_size: {min_chunk_size} is greater than target_chunk_size: {target_chunk_size}, adjusting target_chunk_size to min_chunk_size"
|
333
|
+
)
|
334
|
+
chunk_size = SizeSuffix(min_chunk_size)
|
335
|
+
else:
|
336
|
+
chunk_size = SizeSuffix(target_chunk_size)
|
337
|
+
return chunk_size
|
338
|
+
|
339
|
+
|
340
|
+
def _create_part_infos(
|
341
|
+
src_size: int | SizeSuffix, target_chunk_size: int | SizeSuffix
|
342
|
+
) -> list["PartInfo"]:
|
343
|
+
# now break it up into 10 parts
|
344
|
+
target_chunk_size = SizeSuffix(target_chunk_size)
|
345
|
+
src_size = SizeSuffix(src_size)
|
346
|
+
chunk_size = _get_chunk_size(src_size=src_size, target_chunk_size=target_chunk_size)
|
347
|
+
|
348
|
+
part_infos: list[PartInfo] = []
|
349
|
+
curr_offset: int = 0
|
350
|
+
part_number: int = 0
|
351
|
+
while True:
|
352
|
+
part_number += 1
|
353
|
+
done = False
|
354
|
+
end = curr_offset + chunk_size
|
355
|
+
if end > src_size:
|
356
|
+
done = True
|
357
|
+
chunk_size = src_size - curr_offset
|
358
|
+
range: Range = Range(start=curr_offset, end=curr_offset + chunk_size)
|
359
|
+
part_info = PartInfo(
|
360
|
+
part_number=part_number,
|
361
|
+
range=range,
|
362
|
+
)
|
363
|
+
part_infos.append(part_info)
|
364
|
+
curr_offset += chunk_size.as_int()
|
365
|
+
if curr_offset >= src_size:
|
366
|
+
break
|
367
|
+
if done:
|
368
|
+
break
|
369
|
+
return part_infos
|
370
|
+
|
371
|
+
|
372
|
+
@dataclass
|
373
|
+
class PartInfo:
|
374
|
+
part_number: int
|
375
|
+
range: Range
|
376
|
+
|
377
|
+
@staticmethod
|
378
|
+
def split_parts(
|
379
|
+
size: int | SizeSuffix, target_chunk_size: int | SizeSuffix
|
380
|
+
) -> list["PartInfo"]:
|
381
|
+
out = _create_part_infos(size, target_chunk_size)
|
382
|
+
return out
|
383
|
+
|
384
|
+
def __post_init__(self):
|
385
|
+
assert self.part_number >= 0
|
386
|
+
assert self.part_number <= 10000
|
387
|
+
assert self.range.start >= 0
|
388
|
+
assert self.range.end > self.range.start
|
389
|
+
|
390
|
+
@property
|
391
|
+
def name(self) -> str:
|
392
|
+
partnumber = f"{self.part_number:05d}"
|
393
|
+
offset = self.range.start.as_int()
|
394
|
+
end = SizeSuffix(self.range.end._size).as_int()
|
395
|
+
dst_name = f"part.{partnumber}_{offset}-{end}"
|
396
|
+
return dst_name
|
397
|
+
|
398
|
+
def __repr__(self) -> str:
|
399
|
+
return f"PartInfo(part_number={self.part_number}, range={self.range})"
|
400
|
+
|
401
|
+
def __str__(self) -> str:
|
402
|
+
return self.__repr__()
|