rclone-api 1.5.38__py3-none-any.whl → 1.5.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +1003 -1003
- rclone_api/config.py +186 -153
- rclone_api/db/__init__.py +3 -3
- rclone_api/detail/walk.py +116 -116
- rclone_api/dir.py +113 -113
- rclone_api/log.py +44 -44
- rclone_api/rclone_impl.py +1360 -1360
- rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -546
- rclone_api/scan_missing_folders.py +153 -153
- {rclone_api-1.5.38.dist-info → rclone_api-1.5.39.dist-info}/METADATA +1100 -1100
- {rclone_api-1.5.38.dist-info → rclone_api-1.5.39.dist-info}/RECORD +15 -15
- {rclone_api-1.5.38.dist-info → rclone_api-1.5.39.dist-info}/WHEEL +0 -0
- {rclone_api-1.5.38.dist-info → rclone_api-1.5.39.dist-info}/entry_points.txt +0 -0
- {rclone_api-1.5.38.dist-info → rclone_api-1.5.39.dist-info}/licenses/LICENSE +0 -0
- {rclone_api-1.5.38.dist-info → rclone_api-1.5.39.dist-info}/top_level.txt +0 -0
rclone_api/rclone_impl.py
CHANGED
@@ -1,1360 +1,1360 @@
|
|
1
|
-
"""
|
2
|
-
Unit test file.
|
3
|
-
"""
|
4
|
-
|
5
|
-
import os
|
6
|
-
import random
|
7
|
-
import subprocess
|
8
|
-
import time
|
9
|
-
import tracemalloc
|
10
|
-
import warnings
|
11
|
-
from concurrent.futures import Future, ThreadPoolExecutor
|
12
|
-
from datetime import datetime
|
13
|
-
from fnmatch import fnmatch
|
14
|
-
from pathlib import Path
|
15
|
-
from tempfile import TemporaryDirectory
|
16
|
-
from typing import Generator
|
17
|
-
|
18
|
-
from rclone_api import Dir
|
19
|
-
from rclone_api.completed_process import CompletedProcess
|
20
|
-
from rclone_api.config import Config, Parsed, Section
|
21
|
-
from rclone_api.convert import convert_to_filestr_list, convert_to_str
|
22
|
-
from rclone_api.deprecated import deprecated
|
23
|
-
from rclone_api.detail.walk import walk
|
24
|
-
from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
|
25
|
-
from rclone_api.dir_listing import DirListing
|
26
|
-
from rclone_api.exec import RcloneExec
|
27
|
-
from rclone_api.file import File
|
28
|
-
from rclone_api.file_stream import FilesStream
|
29
|
-
from rclone_api.fs import FSPath, RemoteFS
|
30
|
-
from rclone_api.group_files import group_files
|
31
|
-
from rclone_api.http_server import HttpServer
|
32
|
-
from rclone_api.mount import Mount
|
33
|
-
from rclone_api.process import Process
|
34
|
-
from rclone_api.remote import Remote
|
35
|
-
from rclone_api.rpath import RPath
|
36
|
-
from rclone_api.s3.create import S3Credentials
|
37
|
-
from rclone_api.s3.types import (
|
38
|
-
S3Provider,
|
39
|
-
)
|
40
|
-
from rclone_api.types import (
|
41
|
-
ListingOption,
|
42
|
-
ModTimeStrategy,
|
43
|
-
Order,
|
44
|
-
PartInfo,
|
45
|
-
SizeResult,
|
46
|
-
SizeSuffix,
|
47
|
-
)
|
48
|
-
from rclone_api.util import (
|
49
|
-
find_free_port,
|
50
|
-
get_check,
|
51
|
-
get_rclone_exe,
|
52
|
-
get_verbose,
|
53
|
-
to_path,
|
54
|
-
)
|
55
|
-
|
56
|
-
# Enable tracing memory usage always
|
57
|
-
tracemalloc.start()
|
58
|
-
|
59
|
-
|
60
|
-
def rclone_verbose(verbose: bool | None) -> bool:
|
61
|
-
if verbose is not None:
|
62
|
-
os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
|
63
|
-
return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
|
64
|
-
|
65
|
-
|
66
|
-
def _to_rclone_conf(config: Config | Path) -> Config:
|
67
|
-
if isinstance(config, Path):
|
68
|
-
content = config.read_text(encoding="utf-8")
|
69
|
-
return Config(content)
|
70
|
-
else:
|
71
|
-
return config
|
72
|
-
|
73
|
-
|
74
|
-
def _parse_paths(src: str) -> list[Path] | Exception:
|
75
|
-
# Config file: C:\Users\niteris\AppData\Roaming\rclone\rclone.conf
|
76
|
-
# Cache dir: C:\Users\niteris\AppData\Local\rclone
|
77
|
-
# Temp dir: C:\Users\niteris\AppData\Local\Temp
|
78
|
-
lines = src.splitlines()
|
79
|
-
paths: list[Path] = []
|
80
|
-
for line in lines:
|
81
|
-
try:
|
82
|
-
parts = line.split(":")
|
83
|
-
if len(parts) != 2:
|
84
|
-
continue
|
85
|
-
path = Path(parts[1].strip())
|
86
|
-
paths.append(path)
|
87
|
-
except Exception as e:
|
88
|
-
return e
|
89
|
-
return paths
|
90
|
-
|
91
|
-
|
92
|
-
class RcloneImpl:
|
93
|
-
def __init__(
|
94
|
-
self, rclone_conf: Path | Config | None, rclone_exe: Path | None = None
|
95
|
-
) -> None:
|
96
|
-
if isinstance(rclone_conf, Path):
|
97
|
-
if not rclone_conf.exists():
|
98
|
-
raise ValueError(f"Rclone config file not found: {rclone_conf}")
|
99
|
-
if rclone_conf is None:
|
100
|
-
from rclone_api.config import find_conf_file
|
101
|
-
|
102
|
-
maybe_path = find_conf_file(self)
|
103
|
-
if not isinstance(maybe_path, Path):
|
104
|
-
raise ValueError("Rclone config file not found")
|
105
|
-
rclone_conf = _to_rclone_conf(maybe_path)
|
106
|
-
self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
|
107
|
-
self.config: Config = _to_rclone_conf(rclone_conf)
|
108
|
-
|
109
|
-
def _run(
|
110
|
-
self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
|
111
|
-
) -> subprocess.CompletedProcess:
|
112
|
-
return self._exec.execute(cmd, check=check, capture=capture)
|
113
|
-
|
114
|
-
def _launch_process(
|
115
|
-
self, cmd: list[str], capture: bool | None = None, log: Path | None = None
|
116
|
-
) -> Process:
|
117
|
-
return self._exec.launch_process(cmd, capture=capture, log=log)
|
118
|
-
|
119
|
-
def _get_tmp_mount_dir(self) -> Path:
|
120
|
-
return Path("tmp_mnts")
|
121
|
-
|
122
|
-
def _get_cache_dir(self) -> Path:
|
123
|
-
return Path("cache")
|
124
|
-
|
125
|
-
def webgui(self, other_args: list[str] | None = None) -> Process:
|
126
|
-
"""Launch the Rclone web GUI."""
|
127
|
-
cmd = ["rcd", "--rc-web-gui"]
|
128
|
-
if other_args:
|
129
|
-
cmd += other_args
|
130
|
-
return self._launch_process(cmd, capture=False)
|
131
|
-
|
132
|
-
def filesystem(self, src: str) -> RemoteFS:
|
133
|
-
return RemoteFS(self.config, src)
|
134
|
-
|
135
|
-
def cwd(self, src: str) -> FSPath:
|
136
|
-
return self.filesystem(src).cwd()
|
137
|
-
|
138
|
-
def launch_server(
|
139
|
-
self,
|
140
|
-
addr: str,
|
141
|
-
user: str | None = None,
|
142
|
-
password: str | None = None,
|
143
|
-
other_args: list[str] | None = None,
|
144
|
-
) -> Process:
|
145
|
-
"""Launch the Rclone server so it can receive commands"""
|
146
|
-
cmd = ["rcd"]
|
147
|
-
if addr is not None:
|
148
|
-
cmd += ["--rc-addr", addr]
|
149
|
-
if user is not None:
|
150
|
-
cmd += ["--rc-user", user]
|
151
|
-
if password is not None:
|
152
|
-
cmd += ["--rc-pass", password]
|
153
|
-
if other_args:
|
154
|
-
cmd += other_args
|
155
|
-
out = self._launch_process(cmd, capture=False)
|
156
|
-
time.sleep(1) # Give it some time to launch
|
157
|
-
return out
|
158
|
-
|
159
|
-
def remote_control(
|
160
|
-
self,
|
161
|
-
addr: str,
|
162
|
-
user: str | None = None,
|
163
|
-
password: str | None = None,
|
164
|
-
capture: bool | None = None,
|
165
|
-
other_args: list[str] | None = None,
|
166
|
-
) -> CompletedProcess:
|
167
|
-
cmd = ["rc"]
|
168
|
-
if addr:
|
169
|
-
cmd += ["--rc-addr", addr]
|
170
|
-
if user is not None:
|
171
|
-
cmd += ["--rc-user", user]
|
172
|
-
if password is not None:
|
173
|
-
cmd += ["--rc-pass", password]
|
174
|
-
if other_args:
|
175
|
-
cmd += other_args
|
176
|
-
cp = self._run(cmd, capture=capture)
|
177
|
-
return CompletedProcess.from_subprocess(cp)
|
178
|
-
|
179
|
-
def obscure(self, password: str) -> str:
|
180
|
-
"""Obscure a password for use in rclone config files."""
|
181
|
-
cmd_list: list[str] = ["obscure", password]
|
182
|
-
cp = self._run(cmd_list)
|
183
|
-
return cp.stdout.strip()
|
184
|
-
|
185
|
-
def ls_stream(
|
186
|
-
self,
|
187
|
-
src: str,
|
188
|
-
max_depth: int = -1,
|
189
|
-
fast_list: bool = False,
|
190
|
-
) -> FilesStream:
|
191
|
-
"""
|
192
|
-
List files in the given path
|
193
|
-
|
194
|
-
Args:
|
195
|
-
src: Remote path to list
|
196
|
-
max_depth: Maximum recursion depth (-1 for unlimited)
|
197
|
-
fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket, or it's small)
|
198
|
-
"""
|
199
|
-
cmd = ["lsjson", src, "--files-only"]
|
200
|
-
recurse = max_depth < 0 or max_depth > 1
|
201
|
-
if recurse:
|
202
|
-
cmd.append("-R")
|
203
|
-
if max_depth > 1:
|
204
|
-
cmd += ["--max-depth", str(max_depth)]
|
205
|
-
if fast_list:
|
206
|
-
cmd.append("--fast-list")
|
207
|
-
streamer = FilesStream(src, self._launch_process(cmd, capture=True))
|
208
|
-
return streamer
|
209
|
-
|
210
|
-
def save_to_db(
|
211
|
-
self,
|
212
|
-
src: str,
|
213
|
-
db_url: str,
|
214
|
-
max_depth: int = -1,
|
215
|
-
fast_list: bool = False,
|
216
|
-
) -> None:
|
217
|
-
"""
|
218
|
-
Save files to a database (sqlite, mysql, postgres)
|
219
|
-
|
220
|
-
Args:
|
221
|
-
src: Remote path to list, this will be used to populate an entire table, so always use the root-most path.
|
222
|
-
db_url: Database URL, like sqlite:///data.db or mysql://user:pass@localhost/db or postgres://user:pass@localhost/db
|
223
|
-
max_depth: Maximum depth to traverse (-1 for unlimited)
|
224
|
-
fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket)
|
225
|
-
|
226
|
-
"""
|
227
|
-
from rclone_api.db import DB
|
228
|
-
|
229
|
-
db = DB(db_url)
|
230
|
-
with self.ls_stream(src, max_depth, fast_list) as stream:
|
231
|
-
for page in stream.files_paged(page_size=10000):
|
232
|
-
db.add_files(page)
|
233
|
-
|
234
|
-
def ls(
|
235
|
-
self,
|
236
|
-
src: Dir | Remote | str | None = None,
|
237
|
-
max_depth: int | None = None,
|
238
|
-
glob: str | None = None,
|
239
|
-
order: Order = Order.NORMAL,
|
240
|
-
listing_option: ListingOption = ListingOption.ALL,
|
241
|
-
) -> DirListing:
|
242
|
-
"""List files in the given path.
|
243
|
-
|
244
|
-
Args:
|
245
|
-
src: Remote path or Remote object to list
|
246
|
-
max_depth: Maximum recursion depth (0 means no recursion)
|
247
|
-
|
248
|
-
Returns:
|
249
|
-
List of File objects found at the path
|
250
|
-
"""
|
251
|
-
|
252
|
-
if src is None:
|
253
|
-
# list remotes instead
|
254
|
-
list_remotes: list[Remote] = self.listremotes()
|
255
|
-
dirs: list[Dir] = [Dir(remote) for remote in list_remotes]
|
256
|
-
for d in dirs:
|
257
|
-
d.path.path = ""
|
258
|
-
rpaths = [d.path for d in dirs]
|
259
|
-
return DirListing(rpaths)
|
260
|
-
|
261
|
-
if isinstance(src, str):
|
262
|
-
src = Dir(
|
263
|
-
to_path(src, self)
|
264
|
-
) # assume it's a directory if ls is being called.
|
265
|
-
|
266
|
-
cmd = ["lsjson"]
|
267
|
-
if max_depth is not None:
|
268
|
-
if max_depth < 0:
|
269
|
-
cmd.append("--recursive")
|
270
|
-
if max_depth > 0:
|
271
|
-
cmd.append("--max-depth")
|
272
|
-
cmd.append(str(max_depth))
|
273
|
-
if listing_option != ListingOption.ALL:
|
274
|
-
cmd.append(f"--{listing_option.value}")
|
275
|
-
|
276
|
-
cmd.append(str(src))
|
277
|
-
remote = src.remote if isinstance(src, Dir) else src
|
278
|
-
assert isinstance(remote, Remote)
|
279
|
-
|
280
|
-
cp = self._run(cmd, check=True)
|
281
|
-
text = cp.stdout
|
282
|
-
parent_path: str | None = None
|
283
|
-
if isinstance(src, Dir):
|
284
|
-
parent_path = src.path.path
|
285
|
-
paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
|
286
|
-
# print(parent_path)
|
287
|
-
for o in paths:
|
288
|
-
o.set_rclone(self)
|
289
|
-
|
290
|
-
# do we have a glob pattern?
|
291
|
-
if glob is not None:
|
292
|
-
paths = [p for p in paths if fnmatch(p.path, glob)]
|
293
|
-
|
294
|
-
if order == Order.REVERSE:
|
295
|
-
paths.reverse()
|
296
|
-
elif order == Order.RANDOM:
|
297
|
-
random.shuffle(paths)
|
298
|
-
return DirListing(paths)
|
299
|
-
|
300
|
-
def print(self, src: str) -> Exception | None:
|
301
|
-
"""Print the contents of a file."""
|
302
|
-
try:
|
303
|
-
text_or_err = self.read_text(src)
|
304
|
-
if isinstance(text_or_err, Exception):
|
305
|
-
return text_or_err
|
306
|
-
print(text_or_err)
|
307
|
-
except Exception as e:
|
308
|
-
return e
|
309
|
-
return None
|
310
|
-
|
311
|
-
def stat(self, src: str) -> File | Exception:
|
312
|
-
"""Get the status of a file or directory."""
|
313
|
-
dirlist: DirListing = self.ls(src)
|
314
|
-
if len(dirlist.files) == 0:
|
315
|
-
# raise FileNotFoundError(f"File not found: {src}")
|
316
|
-
return FileNotFoundError(f"File not found: {src}")
|
317
|
-
try:
|
318
|
-
file: File = dirlist.files[0]
|
319
|
-
return file
|
320
|
-
except Exception as e:
|
321
|
-
return e
|
322
|
-
|
323
|
-
def modtime(self, src: str) -> str | Exception:
|
324
|
-
"""Get the modification time of a file or directory."""
|
325
|
-
try:
|
326
|
-
file: File | Exception = self.stat(src)
|
327
|
-
if isinstance(file, Exception):
|
328
|
-
return file
|
329
|
-
return file.mod_time()
|
330
|
-
except Exception as e:
|
331
|
-
return e
|
332
|
-
|
333
|
-
def modtime_dt(self, src: str) -> datetime | Exception:
|
334
|
-
"""Get the modification time of a file or directory."""
|
335
|
-
modtime: str | Exception = self.modtime(src)
|
336
|
-
if isinstance(modtime, Exception):
|
337
|
-
return modtime
|
338
|
-
return datetime.fromisoformat(modtime)
|
339
|
-
|
340
|
-
def listremotes(self) -> list[Remote]:
|
341
|
-
cmd = ["listremotes"]
|
342
|
-
cp = self._run(cmd)
|
343
|
-
text: str = cp.stdout
|
344
|
-
tmp = text.splitlines()
|
345
|
-
tmp = [t.strip() for t in tmp]
|
346
|
-
# strip out ":" from the end
|
347
|
-
tmp = [t.replace(":", "") for t in tmp]
|
348
|
-
out = [Remote(name=t, rclone=self) for t in tmp]
|
349
|
-
return out
|
350
|
-
|
351
|
-
def diff(
|
352
|
-
self,
|
353
|
-
src: str,
|
354
|
-
dst: str,
|
355
|
-
min_size: (
|
356
|
-
str | None
|
357
|
-
) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
|
358
|
-
max_size: (
|
359
|
-
str | None
|
360
|
-
) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
|
361
|
-
diff_option: DiffOption = DiffOption.COMBINED,
|
362
|
-
fast_list: bool = True,
|
363
|
-
size_only: bool | None = None,
|
364
|
-
checkers: int | None = None,
|
365
|
-
other_args: list[str] | None = None,
|
366
|
-
) -> Generator[DiffItem, None, None]:
|
367
|
-
"""Be extra careful with the src and dst values. If you are off by one
|
368
|
-
parent directory, you will get a huge amount of false diffs."""
|
369
|
-
other_args = other_args or []
|
370
|
-
if checkers is None or checkers < 1:
|
371
|
-
checkers = 1000
|
372
|
-
cmd = [
|
373
|
-
"check",
|
374
|
-
src,
|
375
|
-
dst,
|
376
|
-
"--checkers",
|
377
|
-
str(checkers),
|
378
|
-
"--log-level",
|
379
|
-
"INFO",
|
380
|
-
f"--{diff_option.value}",
|
381
|
-
"-",
|
382
|
-
]
|
383
|
-
if size_only is None:
|
384
|
-
size_only = diff_option in [
|
385
|
-
DiffOption.MISSING_ON_DST,
|
386
|
-
DiffOption.MISSING_ON_SRC,
|
387
|
-
]
|
388
|
-
if size_only:
|
389
|
-
cmd += ["--size-only"]
|
390
|
-
if fast_list:
|
391
|
-
cmd += ["--fast-list"]
|
392
|
-
if min_size:
|
393
|
-
cmd += ["--min-size", min_size]
|
394
|
-
if max_size:
|
395
|
-
cmd += ["--max-size", max_size]
|
396
|
-
if diff_option == DiffOption.MISSING_ON_DST:
|
397
|
-
cmd += ["--one-way"]
|
398
|
-
if other_args:
|
399
|
-
cmd += other_args
|
400
|
-
proc = self._launch_process(cmd, capture=True)
|
401
|
-
item: DiffItem
|
402
|
-
for item in diff_stream_from_running_process(
|
403
|
-
running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
|
404
|
-
):
|
405
|
-
if item is None:
|
406
|
-
break
|
407
|
-
yield item
|
408
|
-
|
409
|
-
def walk(
|
410
|
-
self,
|
411
|
-
src: Dir | Remote | str,
|
412
|
-
max_depth: int = -1,
|
413
|
-
breadth_first: bool = True,
|
414
|
-
order: Order = Order.NORMAL,
|
415
|
-
) -> Generator[DirListing, None, None]:
|
416
|
-
"""Walk through the given path recursively.
|
417
|
-
|
418
|
-
Args:
|
419
|
-
src: Remote path or Remote object to walk through
|
420
|
-
max_depth: Maximum depth to traverse (-1 for unlimited)
|
421
|
-
|
422
|
-
Yields:
|
423
|
-
DirListing: Directory listing for each directory encountered
|
424
|
-
"""
|
425
|
-
dir_obj: Dir
|
426
|
-
if isinstance(src, Dir):
|
427
|
-
# Create a Remote object for the path
|
428
|
-
remote = src.remote
|
429
|
-
rpath = RPath(
|
430
|
-
remote=remote,
|
431
|
-
path=src.path.path,
|
432
|
-
name=src.path.name,
|
433
|
-
size=0,
|
434
|
-
mime_type="inode/directory",
|
435
|
-
mod_time="",
|
436
|
-
is_dir=True,
|
437
|
-
)
|
438
|
-
rpath.set_rclone(self)
|
439
|
-
dir_obj = Dir(rpath)
|
440
|
-
elif isinstance(src, str):
|
441
|
-
dir_obj = Dir(to_path(src, self))
|
442
|
-
elif isinstance(src, Remote):
|
443
|
-
dir_obj = Dir(src)
|
444
|
-
else:
|
445
|
-
dir_obj = Dir(src) # shut up pyright
|
446
|
-
assert f"Invalid type for path: {type(src)}"
|
447
|
-
|
448
|
-
yield from walk(
|
449
|
-
dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
|
450
|
-
)
|
451
|
-
|
452
|
-
def scan_missing_folders(
|
453
|
-
self,
|
454
|
-
src: Dir | Remote | str,
|
455
|
-
dst: Dir | Remote | str,
|
456
|
-
max_depth: int = -1,
|
457
|
-
order: Order = Order.NORMAL,
|
458
|
-
) -> Generator[Dir, None, None]:
|
459
|
-
"""Walk through the given path recursively.
|
460
|
-
|
461
|
-
WORK IN PROGRESS!!
|
462
|
-
|
463
|
-
Args:
|
464
|
-
src: Source directory or Remote to walk through
|
465
|
-
dst: Destination directory or Remote to walk through
|
466
|
-
max_depth: Maximum depth to traverse (-1 for unlimited)
|
467
|
-
|
468
|
-
Yields:
|
469
|
-
DirListing: Directory listing for each directory encountered
|
470
|
-
"""
|
471
|
-
from rclone_api.scan_missing_folders import scan_missing_folders
|
472
|
-
|
473
|
-
src_dir = Dir(to_path(src, self))
|
474
|
-
dst_dir = Dir(to_path(dst, self))
|
475
|
-
yield from scan_missing_folders(
|
476
|
-
src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
|
477
|
-
)
|
478
|
-
|
479
|
-
def cleanup(
|
480
|
-
self, src: str, other_args: list[str] | None = None
|
481
|
-
) -> CompletedProcess:
|
482
|
-
"""Cleanup any resources used by the Rclone instance."""
|
483
|
-
# rclone cleanup remote:path [flags]
|
484
|
-
cmd = ["cleanup", src]
|
485
|
-
if other_args:
|
486
|
-
cmd += other_args
|
487
|
-
out = self._run(cmd)
|
488
|
-
return CompletedProcess.from_subprocess(out)
|
489
|
-
|
490
|
-
def get_verbose(self) -> bool:
|
491
|
-
return get_verbose(None)
|
492
|
-
|
493
|
-
def copy_to(
|
494
|
-
self,
|
495
|
-
src: File | str,
|
496
|
-
dst: File | str,
|
497
|
-
check: bool | None = None,
|
498
|
-
verbose: bool | None = None,
|
499
|
-
other_args: list[str] | None = None,
|
500
|
-
) -> CompletedProcess:
|
501
|
-
"""Copy one file from source to destination.
|
502
|
-
|
503
|
-
Warning - slow.
|
504
|
-
|
505
|
-
"""
|
506
|
-
check = get_check(check)
|
507
|
-
verbose = get_verbose(verbose)
|
508
|
-
src = src if isinstance(src, str) else str(src.path)
|
509
|
-
dst = dst if isinstance(dst, str) else str(dst.path)
|
510
|
-
cmd_list: list[str] = ["copyto", src, dst, "--s3-no-check-bucket"]
|
511
|
-
if other_args is not None:
|
512
|
-
cmd_list += other_args
|
513
|
-
cp = self._run(cmd_list, check=check)
|
514
|
-
return CompletedProcess.from_subprocess(cp)
|
515
|
-
|
516
|
-
def copy_files(
|
517
|
-
self,
|
518
|
-
src: str,
|
519
|
-
dst: str,
|
520
|
-
files: list[str] | Path,
|
521
|
-
check: bool | None = None,
|
522
|
-
max_backlog: int | None = None,
|
523
|
-
verbose: bool | None = None,
|
524
|
-
checkers: int | None = None,
|
525
|
-
transfers: int | None = None,
|
526
|
-
low_level_retries: int | None = None,
|
527
|
-
retries: int | None = None,
|
528
|
-
retries_sleep: str | None = None,
|
529
|
-
metadata: bool | None = None,
|
530
|
-
timeout: str | None = None,
|
531
|
-
max_partition_workers: int | None = None,
|
532
|
-
multi_thread_streams: int | None = None,
|
533
|
-
other_args: list[str] | None = None,
|
534
|
-
) -> list[CompletedProcess]:
|
535
|
-
"""Copy multiple files from source to destination.
|
536
|
-
|
537
|
-
Args:
|
538
|
-
payload: Dictionary of source and destination file paths
|
539
|
-
"""
|
540
|
-
check = get_check(check)
|
541
|
-
max_partition_workers = max_partition_workers or 1
|
542
|
-
low_level_retries = low_level_retries or 10
|
543
|
-
retries = retries or 3
|
544
|
-
other_args = other_args or []
|
545
|
-
other_args.append("--s3-no-check-bucket")
|
546
|
-
checkers = checkers or 1000
|
547
|
-
transfers = transfers or 32
|
548
|
-
verbose = get_verbose(verbose)
|
549
|
-
payload: list[str] = (
|
550
|
-
files
|
551
|
-
if isinstance(files, list)
|
552
|
-
else [f.strip() for f in files.read_text().splitlines() if f.strip()]
|
553
|
-
)
|
554
|
-
if len(payload) == 0:
|
555
|
-
return []
|
556
|
-
|
557
|
-
for p in payload:
|
558
|
-
if ":" in p:
|
559
|
-
raise ValueError(
|
560
|
-
f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
|
561
|
-
)
|
562
|
-
|
563
|
-
using_fast_list = "--fast-list" in other_args
|
564
|
-
if using_fast_list:
|
565
|
-
warnings.warn(
|
566
|
-
"It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
|
567
|
-
)
|
568
|
-
|
569
|
-
if max_partition_workers > 1:
|
570
|
-
datalists: dict[str, list[str]] = group_files(
|
571
|
-
payload, fully_qualified=False
|
572
|
-
)
|
573
|
-
else:
|
574
|
-
datalists = {"": payload}
|
575
|
-
# out: subprocess.CompletedProcess | None = None
|
576
|
-
out: list[CompletedProcess] = []
|
577
|
-
|
578
|
-
futures: list[Future] = []
|
579
|
-
|
580
|
-
with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
|
581
|
-
for common_prefix, files in datalists.items():
|
582
|
-
|
583
|
-
def _task(
|
584
|
-
files: list[str] | Path = files,
|
585
|
-
) -> subprocess.CompletedProcess:
|
586
|
-
with TemporaryDirectory() as tmpdir:
|
587
|
-
filelist: list[str] = []
|
588
|
-
filepath: Path
|
589
|
-
if isinstance(files, list):
|
590
|
-
include_files_txt = Path(tmpdir) / "include_files.txt"
|
591
|
-
include_files_txt.write_text(
|
592
|
-
"\n".join(files), encoding="utf-8"
|
593
|
-
)
|
594
|
-
filelist = list(files)
|
595
|
-
filepath = Path(include_files_txt)
|
596
|
-
elif isinstance(files, Path):
|
597
|
-
filelist = [
|
598
|
-
f.strip()
|
599
|
-
for f in files.read_text().splitlines()
|
600
|
-
if f.strip()
|
601
|
-
]
|
602
|
-
filepath = files
|
603
|
-
if common_prefix:
|
604
|
-
src_path = f"{src}/{common_prefix}"
|
605
|
-
dst_path = f"{dst}/{common_prefix}"
|
606
|
-
else:
|
607
|
-
src_path = src
|
608
|
-
dst_path = dst
|
609
|
-
|
610
|
-
if verbose:
|
611
|
-
nfiles = len(filelist)
|
612
|
-
files_fqdn = [f" {src_path}/{f}" for f in filelist]
|
613
|
-
print(f"Copying {nfiles} files:")
|
614
|
-
chunk_size = 100
|
615
|
-
for i in range(0, nfiles, chunk_size):
|
616
|
-
chunk = files_fqdn[i : i + chunk_size]
|
617
|
-
files_str = "\n".join(chunk)
|
618
|
-
print(f"{files_str}")
|
619
|
-
cmd_list: list[str] = [
|
620
|
-
"copy",
|
621
|
-
src_path,
|
622
|
-
dst_path,
|
623
|
-
"--files-from",
|
624
|
-
str(filepath),
|
625
|
-
"--checkers",
|
626
|
-
str(checkers),
|
627
|
-
"--transfers",
|
628
|
-
str(transfers),
|
629
|
-
"--low-level-retries",
|
630
|
-
str(low_level_retries),
|
631
|
-
"--retries",
|
632
|
-
str(retries),
|
633
|
-
]
|
634
|
-
if metadata:
|
635
|
-
cmd_list.append("--metadata")
|
636
|
-
if retries_sleep is not None:
|
637
|
-
cmd_list += ["--retries-sleep", retries_sleep]
|
638
|
-
if timeout is not None:
|
639
|
-
cmd_list += ["--timeout", timeout]
|
640
|
-
if max_backlog is not None:
|
641
|
-
cmd_list += ["--max-backlog", str(max_backlog)]
|
642
|
-
if multi_thread_streams is not None:
|
643
|
-
cmd_list += [
|
644
|
-
"--multi-thread-streams",
|
645
|
-
str(multi_thread_streams),
|
646
|
-
]
|
647
|
-
if verbose:
|
648
|
-
if not any(["-v" in x for x in other_args]):
|
649
|
-
cmd_list.append("-vvvv")
|
650
|
-
if not any(["--progress" in x for x in other_args]):
|
651
|
-
cmd_list.append("--progress")
|
652
|
-
if other_args:
|
653
|
-
cmd_list += other_args
|
654
|
-
out = self._run(cmd_list, capture=not verbose)
|
655
|
-
return out
|
656
|
-
|
657
|
-
fut: Future = executor.submit(_task)
|
658
|
-
futures.append(fut)
|
659
|
-
for fut in futures:
|
660
|
-
cp: subprocess.CompletedProcess = fut.result()
|
661
|
-
assert cp is not None
|
662
|
-
out.append(CompletedProcess.from_subprocess(cp))
|
663
|
-
if cp.returncode != 0:
|
664
|
-
if check:
|
665
|
-
raise ValueError(f"Error deleting files: {cp.stderr}")
|
666
|
-
else:
|
667
|
-
warnings.warn(f"Error deleting files: {cp.stderr}")
|
668
|
-
return out
|
669
|
-
|
670
|
-
def copy(
|
671
|
-
self,
|
672
|
-
src: Dir | str,
|
673
|
-
dst: Dir | str,
|
674
|
-
check: bool | None = None,
|
675
|
-
transfers: int | None = None,
|
676
|
-
checkers: int | None = None,
|
677
|
-
multi_thread_streams: int | None = None,
|
678
|
-
low_level_retries: int | None = None,
|
679
|
-
retries: int | None = None,
|
680
|
-
other_args: list[str] | None = None,
|
681
|
-
) -> CompletedProcess:
|
682
|
-
"""Copy files from source to destination.
|
683
|
-
|
684
|
-
Args:
|
685
|
-
src: Source directory
|
686
|
-
dst: Destination directory
|
687
|
-
"""
|
688
|
-
# src_dir = src.path.path
|
689
|
-
# dst_dir = dst.path.path
|
690
|
-
src_dir = convert_to_str(src)
|
691
|
-
dst_dir = convert_to_str(dst)
|
692
|
-
check = get_check(check)
|
693
|
-
checkers = checkers or 1000
|
694
|
-
transfers = transfers or 32
|
695
|
-
low_level_retries = low_level_retries or 10
|
696
|
-
retries = retries or 3
|
697
|
-
cmd_list: list[str] = ["copy", src_dir, dst_dir]
|
698
|
-
cmd_list += ["--checkers", str(checkers)]
|
699
|
-
cmd_list += ["--transfers", str(transfers)]
|
700
|
-
cmd_list += ["--low-level-retries", str(low_level_retries)]
|
701
|
-
cmd_list.append("--s3-no-check-bucket")
|
702
|
-
if multi_thread_streams is not None:
|
703
|
-
cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
|
704
|
-
if other_args:
|
705
|
-
cmd_list += other_args
|
706
|
-
cp = self._run(cmd_list, check=check, capture=False)
|
707
|
-
return CompletedProcess.from_subprocess(cp)
|
708
|
-
|
709
|
-
def purge(self, src: Dir | str) -> CompletedProcess:
|
710
|
-
"""Purge a directory"""
|
711
|
-
# path should always be a string
|
712
|
-
src = src if isinstance(src, str) else str(src.path)
|
713
|
-
cmd_list: list[str] = ["purge", str(src)]
|
714
|
-
cp = self._run(cmd_list)
|
715
|
-
return CompletedProcess.from_subprocess(cp)
|
716
|
-
|
717
|
-
def delete_files(
|
718
|
-
self,
|
719
|
-
files: str | File | list[str] | list[File],
|
720
|
-
check: bool | None = None,
|
721
|
-
rmdirs=False,
|
722
|
-
verbose: bool | None = None,
|
723
|
-
max_partition_workers: int | None = None,
|
724
|
-
other_args: list[str] | None = None,
|
725
|
-
) -> CompletedProcess:
|
726
|
-
"""Delete a directory"""
|
727
|
-
check = get_check(check)
|
728
|
-
verbose = get_verbose(verbose)
|
729
|
-
payload: list[str] = convert_to_filestr_list(files)
|
730
|
-
if len(payload) == 0:
|
731
|
-
if verbose:
|
732
|
-
print("No files to delete")
|
733
|
-
cp = subprocess.CompletedProcess(
|
734
|
-
args=["rclone", "delete", "--files-from", "[]"],
|
735
|
-
returncode=0,
|
736
|
-
stdout="",
|
737
|
-
stderr="",
|
738
|
-
)
|
739
|
-
return CompletedProcess.from_subprocess(cp)
|
740
|
-
|
741
|
-
datalists: dict[str, list[str]] = group_files(payload)
|
742
|
-
completed_processes: list[subprocess.CompletedProcess] = []
|
743
|
-
|
744
|
-
futures: list[Future] = []
|
745
|
-
|
746
|
-
with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
|
747
|
-
|
748
|
-
for remote, files in datalists.items():
|
749
|
-
|
750
|
-
def _task(
|
751
|
-
files=files, check=check, remote=remote
|
752
|
-
) -> subprocess.CompletedProcess:
|
753
|
-
with TemporaryDirectory() as tmpdir:
|
754
|
-
include_files_txt = Path(tmpdir) / "include_files.txt"
|
755
|
-
include_files_txt.write_text("\n".join(files), encoding="utf-8")
|
756
|
-
|
757
|
-
# print(include_files_txt)
|
758
|
-
cmd_list: list[str] = [
|
759
|
-
"delete",
|
760
|
-
remote,
|
761
|
-
"--files-from",
|
762
|
-
str(include_files_txt),
|
763
|
-
"--checkers",
|
764
|
-
"1000",
|
765
|
-
"--transfers",
|
766
|
-
"1000",
|
767
|
-
]
|
768
|
-
if verbose:
|
769
|
-
cmd_list.append("-vvvv")
|
770
|
-
if rmdirs:
|
771
|
-
cmd_list.append("--rmdirs")
|
772
|
-
if other_args:
|
773
|
-
cmd_list += other_args
|
774
|
-
out = self._run(cmd_list, check=check)
|
775
|
-
if out.returncode != 0:
|
776
|
-
if check:
|
777
|
-
completed_processes.append(out)
|
778
|
-
raise ValueError(f"Error deleting files: {out}")
|
779
|
-
else:
|
780
|
-
warnings.warn(f"Error deleting files: {out}")
|
781
|
-
return out
|
782
|
-
|
783
|
-
fut: Future = executor.submit(_task)
|
784
|
-
futures.append(fut)
|
785
|
-
|
786
|
-
for fut in futures:
|
787
|
-
out = fut.result()
|
788
|
-
assert out is not None
|
789
|
-
completed_processes.append(out)
|
790
|
-
|
791
|
-
return CompletedProcess(completed_processes)
|
792
|
-
|
793
|
-
@deprecated("delete_files")
|
794
|
-
def deletefiles(
|
795
|
-
self, files: str | File | list[str] | list[File]
|
796
|
-
) -> CompletedProcess:
|
797
|
-
out = self.delete_files(files)
|
798
|
-
return out
|
799
|
-
|
800
|
-
def exists(self, src: Dir | Remote | str | File) -> bool:
|
801
|
-
"""Check if a file or directory exists."""
|
802
|
-
arg: str = convert_to_str(src)
|
803
|
-
assert isinstance(arg, str)
|
804
|
-
try:
|
805
|
-
dir_listing = self.ls(arg)
|
806
|
-
# print(dir_listing)
|
807
|
-
return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
|
808
|
-
except subprocess.CalledProcessError:
|
809
|
-
return False
|
810
|
-
|
811
|
-
def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
|
812
|
-
"""Check if two directories are in sync."""
|
813
|
-
src = convert_to_str(src)
|
814
|
-
dst = convert_to_str(dst)
|
815
|
-
cmd_list: list[str] = ["check", str(src), str(dst)]
|
816
|
-
try:
|
817
|
-
self._run(cmd_list, check=True)
|
818
|
-
return True
|
819
|
-
except subprocess.CalledProcessError:
|
820
|
-
return False
|
821
|
-
|
822
|
-
def copy_file_s3_resumable(
|
823
|
-
self,
|
824
|
-
src: str, # src:/Bucket/path/myfile.large.zst
|
825
|
-
dst: str, # dst:/Bucket/path/myfile.large
|
826
|
-
part_infos: list[PartInfo] | None = None,
|
827
|
-
upload_threads: int = 8,
|
828
|
-
merge_threads: int = 4,
|
829
|
-
) -> Exception | None:
|
830
|
-
"""Copy parts of a file from source to destination."""
|
831
|
-
from rclone_api.detail.copy_file_parts_resumable import (
|
832
|
-
copy_file_parts_resumable,
|
833
|
-
)
|
834
|
-
|
835
|
-
if dst.endswith("/"):
|
836
|
-
dst = dst[:-1]
|
837
|
-
dst_dir = f"{dst}-parts"
|
838
|
-
|
839
|
-
out = copy_file_parts_resumable(
|
840
|
-
self=self,
|
841
|
-
src=src,
|
842
|
-
dst_dir=dst_dir,
|
843
|
-
part_infos=part_infos,
|
844
|
-
upload_threads=upload_threads,
|
845
|
-
merge_threads=merge_threads,
|
846
|
-
)
|
847
|
-
return out
|
848
|
-
|
849
|
-
def write_text(
|
850
|
-
self,
|
851
|
-
dst: str,
|
852
|
-
text: str,
|
853
|
-
) -> Exception | None:
|
854
|
-
"""Write text to a file."""
|
855
|
-
data = text.encode("utf-8")
|
856
|
-
return self.write_bytes(dst=dst, data=data)
|
857
|
-
|
858
|
-
def write_bytes(
|
859
|
-
self,
|
860
|
-
dst: str,
|
861
|
-
data: bytes,
|
862
|
-
) -> Exception | None:
|
863
|
-
"""Write bytes to a file."""
|
864
|
-
with TemporaryDirectory() as tmpdir:
|
865
|
-
tmpfile = Path(tmpdir) / "file.bin"
|
866
|
-
tmpfile.write_bytes(data)
|
867
|
-
completed_proc = self.copy_to(str(tmpfile), dst, check=True)
|
868
|
-
if completed_proc.returncode != 0:
|
869
|
-
return Exception(f"Failed to write bytes to {dst}", completed_proc)
|
870
|
-
return None
|
871
|
-
|
872
|
-
def read_bytes(self, src: str) -> bytes | Exception:
|
873
|
-
"""Read bytes from a file."""
|
874
|
-
with TemporaryDirectory() as tmpdir:
|
875
|
-
tmpfile = Path(tmpdir) / "file.bin"
|
876
|
-
completed_proc = self.copy_to(src, str(tmpfile), check=True)
|
877
|
-
if completed_proc.returncode != 0:
|
878
|
-
return Exception(f"Failed to read bytes from {src}", completed_proc)
|
879
|
-
|
880
|
-
if not tmpfile.exists():
|
881
|
-
return Exception(f"Failed to read bytes from {src}, file not found")
|
882
|
-
try:
|
883
|
-
return tmpfile.read_bytes()
|
884
|
-
except Exception as e:
|
885
|
-
return Exception(f"Failed to read bytes from {src}", e)
|
886
|
-
|
887
|
-
def read_text(self, src: str) -> str | Exception:
|
888
|
-
"""Read text from a file."""
|
889
|
-
data = self.read_bytes(src)
|
890
|
-
if isinstance(data, Exception):
|
891
|
-
return data
|
892
|
-
try:
|
893
|
-
return data.decode("utf-8")
|
894
|
-
except UnicodeDecodeError as e:
|
895
|
-
return Exception(f"Failed to decode text from {src}", e)
|
896
|
-
|
897
|
-
def size_file(self, src: str) -> SizeSuffix | Exception:
|
898
|
-
"""Get the size of a file or directory."""
|
899
|
-
# src_parent = os.path.dirname(src)
|
900
|
-
# src_name = os.path.basename(src)
|
901
|
-
# can't use this because it's only one file.
|
902
|
-
# out: SizeResult = self.size_files(src_parent, [src_name])
|
903
|
-
# one_file = len(out.file_sizes) == 1
|
904
|
-
# if not one_file:
|
905
|
-
# return Exception(
|
906
|
-
# f"More than one result returned, is this is a directory? {out}"
|
907
|
-
# )
|
908
|
-
# return SizeSuffix(out.total_size)
|
909
|
-
dirlist: DirListing = self.ls(
|
910
|
-
src, listing_option=ListingOption.FILES_ONLY, max_depth=0
|
911
|
-
)
|
912
|
-
if len(dirlist.files) == 0:
|
913
|
-
return FileNotFoundError(f"File not found: {src}")
|
914
|
-
if len(dirlist.files) > 1:
|
915
|
-
return Exception(f"More than one file found: {src}")
|
916
|
-
file: File = dirlist.files[0]
|
917
|
-
return SizeSuffix(file.size)
|
918
|
-
|
919
|
-
def get_s3_credentials(
|
920
|
-
self, remote: str, verbose: bool | None = None
|
921
|
-
) -> S3Credentials:
|
922
|
-
from rclone_api.util import S3PathInfo, split_s3_path
|
923
|
-
|
924
|
-
verbose = get_verbose(verbose)
|
925
|
-
path_info: S3PathInfo = split_s3_path(remote)
|
926
|
-
|
927
|
-
# path_info: S3PathInfo = split_s3_path(remote)
|
928
|
-
remote = path_info.remote
|
929
|
-
bucket_name = path_info.bucket
|
930
|
-
|
931
|
-
remote = path_info.remote
|
932
|
-
parsed: Parsed = self.config.parse()
|
933
|
-
sections: dict[str, Section] = parsed.sections
|
934
|
-
if remote not in sections:
|
935
|
-
raise ValueError(
|
936
|
-
f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
|
937
|
-
)
|
938
|
-
|
939
|
-
section: Section = sections[remote]
|
940
|
-
dst_type = section.type()
|
941
|
-
if dst_type != "s3" and dst_type != "b2":
|
942
|
-
raise ValueError(
|
943
|
-
f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
|
944
|
-
)
|
945
|
-
|
946
|
-
def get_provider_str(section=section) -> str | None:
|
947
|
-
type: str = section.type()
|
948
|
-
provider: str | None = section.provider()
|
949
|
-
if provider is not None:
|
950
|
-
return provider
|
951
|
-
if type == "b2":
|
952
|
-
return S3Provider.BACKBLAZE.value
|
953
|
-
if type != "s3":
|
954
|
-
raise ValueError(f"Remote {remote} is not an S3 remote")
|
955
|
-
return S3Provider.S3.value
|
956
|
-
|
957
|
-
provider: str
|
958
|
-
if provided_provider_str := get_provider_str():
|
959
|
-
if verbose:
|
960
|
-
print(f"Using provided provider: {provided_provider_str}")
|
961
|
-
provider = provided_provider_str
|
962
|
-
else:
|
963
|
-
if verbose:
|
964
|
-
print(f"Using default provider: {S3Provider.S3.value}")
|
965
|
-
provider = S3Provider.S3.value
|
966
|
-
provider_enum = S3Provider.from_str(provider)
|
967
|
-
|
968
|
-
s3_creds: S3Credentials = S3Credentials(
|
969
|
-
bucket_name=bucket_name,
|
970
|
-
provider=provider_enum,
|
971
|
-
access_key_id=section.access_key_id(),
|
972
|
-
secret_access_key=section.secret_access_key(),
|
973
|
-
endpoint_url=section.endpoint(),
|
974
|
-
)
|
975
|
-
return s3_creds
|
976
|
-
|
977
|
-
def copy_bytes(
|
978
|
-
self,
|
979
|
-
src: str,
|
980
|
-
offset: int | SizeSuffix,
|
981
|
-
length: int | SizeSuffix,
|
982
|
-
outfile: Path,
|
983
|
-
other_args: list[str] | None = None,
|
984
|
-
) -> Exception | None:
|
985
|
-
"""Copy a slice of bytes from the src file to dst."""
|
986
|
-
offset = SizeSuffix(offset).as_int()
|
987
|
-
length = SizeSuffix(length).as_int()
|
988
|
-
cmd_list: list[str] = [
|
989
|
-
"cat",
|
990
|
-
"--offset",
|
991
|
-
str(offset),
|
992
|
-
"--count",
|
993
|
-
str(length),
|
994
|
-
src,
|
995
|
-
]
|
996
|
-
if other_args:
|
997
|
-
cmd_list.extend(other_args)
|
998
|
-
try:
|
999
|
-
cp = self._run(cmd_list, capture=outfile)
|
1000
|
-
if cp.returncode == 0:
|
1001
|
-
return None
|
1002
|
-
return Exception(cp.stderr)
|
1003
|
-
except subprocess.CalledProcessError as e:
|
1004
|
-
return e
|
1005
|
-
|
1006
|
-
def copy_dir(
|
1007
|
-
self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
|
1008
|
-
) -> CompletedProcess:
|
1009
|
-
"""Copy a directory from source to destination."""
|
1010
|
-
# convert src to str, also dst
|
1011
|
-
src = convert_to_str(src)
|
1012
|
-
dst = convert_to_str(dst)
|
1013
|
-
cmd_list: list[str] = ["copy", src, dst, "--s3-no-check-bucket"]
|
1014
|
-
if args is not None:
|
1015
|
-
cmd_list += args
|
1016
|
-
cp = self._run(cmd_list)
|
1017
|
-
return CompletedProcess.from_subprocess(cp)
|
1018
|
-
|
1019
|
-
def copy_remote(
|
1020
|
-
self, src: Remote, dst: Remote, args: list[str] | None = None
|
1021
|
-
) -> CompletedProcess:
|
1022
|
-
"""Copy a remote to another remote."""
|
1023
|
-
cmd_list: list[str] = ["copy", str(src), str(dst), "--s3-no-check-bucket"]
|
1024
|
-
if args is not None:
|
1025
|
-
cmd_list += args
|
1026
|
-
# return self._run(cmd_list)
|
1027
|
-
cp = self._run(cmd_list)
|
1028
|
-
return CompletedProcess.from_subprocess(cp)
|
1029
|
-
|
1030
|
-
def mount(
|
1031
|
-
self,
|
1032
|
-
src: Remote | Dir | str,
|
1033
|
-
outdir: Path,
|
1034
|
-
allow_writes: bool | None = False,
|
1035
|
-
transfers: int | None = None,
|
1036
|
-
use_links: bool | None = None,
|
1037
|
-
vfs_cache_mode: str | None = None,
|
1038
|
-
verbose: bool | None = None,
|
1039
|
-
cache_dir: Path | None = None,
|
1040
|
-
cache_dir_delete_on_exit: bool | None = None,
|
1041
|
-
log: Path | None = None,
|
1042
|
-
other_args: list[str] | None = None,
|
1043
|
-
) -> Mount:
|
1044
|
-
"""Mount a remote or directory to a local path.
|
1045
|
-
|
1046
|
-
Args:
|
1047
|
-
src: Remote or directory to mount
|
1048
|
-
outdir: Local path to mount to
|
1049
|
-
|
1050
|
-
Returns:
|
1051
|
-
CompletedProcess from the mount command execution
|
1052
|
-
|
1053
|
-
Raises:
|
1054
|
-
subprocess.CalledProcessError: If the mount operation fails
|
1055
|
-
"""
|
1056
|
-
from rclone_api.mount_util import clean_mount, prepare_mount
|
1057
|
-
|
1058
|
-
allow_writes = allow_writes or False
|
1059
|
-
use_links = use_links or True
|
1060
|
-
verbose = get_verbose(verbose) or (log is not None)
|
1061
|
-
vfs_cache_mode = vfs_cache_mode or "full"
|
1062
|
-
clean_mount(outdir, verbose=verbose)
|
1063
|
-
prepare_mount(outdir, verbose=verbose)
|
1064
|
-
debug_fuse = log is not None
|
1065
|
-
src_str = convert_to_str(src)
|
1066
|
-
cmd_list: list[str] = ["mount", src_str, str(outdir)]
|
1067
|
-
if not allow_writes:
|
1068
|
-
cmd_list.append("--read-only")
|
1069
|
-
if use_links:
|
1070
|
-
cmd_list.append("--links")
|
1071
|
-
if vfs_cache_mode:
|
1072
|
-
cmd_list.append("--vfs-cache-mode")
|
1073
|
-
cmd_list.append(vfs_cache_mode)
|
1074
|
-
if cache_dir:
|
1075
|
-
cmd_list.append("--cache-dir")
|
1076
|
-
cmd_list.append(str(cache_dir.absolute()))
|
1077
|
-
if transfers is not None:
|
1078
|
-
cmd_list.append("--transfers")
|
1079
|
-
cmd_list.append(str(transfers))
|
1080
|
-
if debug_fuse:
|
1081
|
-
cmd_list.append("--debug-fuse")
|
1082
|
-
if verbose:
|
1083
|
-
cmd_list.append("-vvvv")
|
1084
|
-
if other_args:
|
1085
|
-
cmd_list += other_args
|
1086
|
-
proc = self._launch_process(cmd_list, log=log)
|
1087
|
-
mount_read_only = not allow_writes
|
1088
|
-
mount: Mount = Mount(
|
1089
|
-
src=src_str,
|
1090
|
-
mount_path=outdir,
|
1091
|
-
process=proc,
|
1092
|
-
read_only=mount_read_only,
|
1093
|
-
cache_dir=cache_dir,
|
1094
|
-
cache_dir_delete_on_exit=cache_dir_delete_on_exit,
|
1095
|
-
)
|
1096
|
-
return mount
|
1097
|
-
|
1098
|
-
# Settings optimized for s3.
|
1099
|
-
def mount_s3(
|
1100
|
-
self,
|
1101
|
-
url: str,
|
1102
|
-
outdir: Path,
|
1103
|
-
allow_writes=False,
|
1104
|
-
vfs_cache_mode="full",
|
1105
|
-
dir_cache_time: str | None = "1h",
|
1106
|
-
attribute_timeout: str | None = "1h",
|
1107
|
-
vfs_disk_space_total_size: str | None = "100M",
|
1108
|
-
transfers: int | None = 128,
|
1109
|
-
modtime_strategy: (
|
1110
|
-
ModTimeStrategy | None
|
1111
|
-
) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
|
1112
|
-
vfs_read_chunk_streams: int | None = 16,
|
1113
|
-
vfs_read_chunk_size: str | None = "4M",
|
1114
|
-
vfs_fast_fingerprint: bool = True,
|
1115
|
-
# vfs-refresh
|
1116
|
-
vfs_refresh: bool = True,
|
1117
|
-
other_args: list[str] | None = None,
|
1118
|
-
) -> Mount:
|
1119
|
-
"""Mount a remote or directory to a local path.
|
1120
|
-
|
1121
|
-
Args:
|
1122
|
-
src: Remote or directory to mount
|
1123
|
-
outdir: Local path to mount to
|
1124
|
-
"""
|
1125
|
-
other_args = other_args or []
|
1126
|
-
if modtime_strategy is not None:
|
1127
|
-
other_args.append(f"--{modtime_strategy.value}")
|
1128
|
-
if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
|
1129
|
-
transfers is not None and "--transfers" not in other_args
|
1130
|
-
):
|
1131
|
-
other_args.append("--transfers")
|
1132
|
-
other_args.append(str(transfers))
|
1133
|
-
if dir_cache_time is not None and "--dir-cache-time" not in other_args:
|
1134
|
-
other_args.append("--dir-cache-time")
|
1135
|
-
other_args.append(dir_cache_time)
|
1136
|
-
if (
|
1137
|
-
vfs_disk_space_total_size is not None
|
1138
|
-
and "--vfs-cache-max-size" not in other_args
|
1139
|
-
):
|
1140
|
-
other_args.append("--vfs-cache-max-size")
|
1141
|
-
other_args.append(vfs_disk_space_total_size)
|
1142
|
-
if vfs_refresh and "--vfs-refresh" not in other_args:
|
1143
|
-
other_args.append("--vfs-refresh")
|
1144
|
-
if attribute_timeout is not None and "--attr-timeout" not in other_args:
|
1145
|
-
other_args.append("--attr-timeout")
|
1146
|
-
other_args.append(attribute_timeout)
|
1147
|
-
if vfs_read_chunk_streams:
|
1148
|
-
other_args.append("--vfs-read-chunk-streams")
|
1149
|
-
other_args.append(str(vfs_read_chunk_streams))
|
1150
|
-
if vfs_read_chunk_size:
|
1151
|
-
other_args.append("--vfs-read-chunk-size")
|
1152
|
-
other_args.append(vfs_read_chunk_size)
|
1153
|
-
if vfs_fast_fingerprint:
|
1154
|
-
other_args.append("--vfs-fast-fingerprint")
|
1155
|
-
|
1156
|
-
other_args = other_args if other_args else None
|
1157
|
-
return self.mount(
|
1158
|
-
url,
|
1159
|
-
outdir,
|
1160
|
-
allow_writes=allow_writes,
|
1161
|
-
vfs_cache_mode=vfs_cache_mode,
|
1162
|
-
other_args=other_args,
|
1163
|
-
)
|
1164
|
-
|
1165
|
-
def serve_webdav(
|
1166
|
-
self,
|
1167
|
-
src: Remote | Dir | str,
|
1168
|
-
user: str,
|
1169
|
-
password: str,
|
1170
|
-
addr: str = "localhost:2049",
|
1171
|
-
allow_other: bool = False,
|
1172
|
-
other_args: list[str] | None = None,
|
1173
|
-
) -> Process:
|
1174
|
-
"""Serve a remote or directory via NFS.
|
1175
|
-
|
1176
|
-
Args:
|
1177
|
-
src: Remote or directory to serve
|
1178
|
-
addr: Network address and port to serve on (default: localhost:2049)
|
1179
|
-
allow_other: Allow other users to access the share
|
1180
|
-
|
1181
|
-
Returns:
|
1182
|
-
Process: The running webdev server process
|
1183
|
-
|
1184
|
-
Raises:
|
1185
|
-
ValueError: If the NFS server fails to start
|
1186
|
-
"""
|
1187
|
-
src_str = convert_to_str(src)
|
1188
|
-
cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
|
1189
|
-
cmd_list.extend(["--user", user, "--pass", password])
|
1190
|
-
if allow_other:
|
1191
|
-
cmd_list.append("--allow-other")
|
1192
|
-
if other_args:
|
1193
|
-
cmd_list += other_args
|
1194
|
-
proc = self._launch_process(cmd_list)
|
1195
|
-
time.sleep(2) # give it a moment to start
|
1196
|
-
if proc.poll() is not None:
|
1197
|
-
raise ValueError("NFS serve process failed to start")
|
1198
|
-
return proc
|
1199
|
-
|
1200
|
-
def serve_http(
|
1201
|
-
self,
|
1202
|
-
src: str,
|
1203
|
-
cache_mode: str | None,
|
1204
|
-
addr: str | None = None,
|
1205
|
-
serve_http_log: Path | None = None,
|
1206
|
-
other_args: list[str] | None = None,
|
1207
|
-
) -> HttpServer:
|
1208
|
-
"""Serve a remote or directory via HTTP.
|
1209
|
-
|
1210
|
-
Args:
|
1211
|
-
src: Remote or directory to serve
|
1212
|
-
addr: Network address and port to serve on (default: localhost:8080)
|
1213
|
-
"""
|
1214
|
-
addr = addr or f"localhost:{find_free_port()}"
|
1215
|
-
_, subpath = src.split(":", 1) # might not work on local paths.
|
1216
|
-
cmd_list: list[str] = [
|
1217
|
-
"serve",
|
1218
|
-
"http",
|
1219
|
-
"--addr",
|
1220
|
-
addr,
|
1221
|
-
src,
|
1222
|
-
"--vfs-disk-space-total-size",
|
1223
|
-
"0",
|
1224
|
-
"--vfs-read-chunk-size-limit",
|
1225
|
-
"512M",
|
1226
|
-
]
|
1227
|
-
|
1228
|
-
if cache_mode:
|
1229
|
-
cmd_list += [
|
1230
|
-
"--vfs-cache-mode",
|
1231
|
-
cache_mode,
|
1232
|
-
]
|
1233
|
-
if serve_http_log:
|
1234
|
-
cmd_list += ["--log-file", str(serve_http_log)]
|
1235
|
-
cmd_list += ["-vvvv"]
|
1236
|
-
if other_args:
|
1237
|
-
cmd_list += other_args
|
1238
|
-
proc = self._launch_process(cmd_list, log=serve_http_log)
|
1239
|
-
time.sleep(2)
|
1240
|
-
if proc.poll() is not None:
|
1241
|
-
raise ValueError("HTTP serve process failed to start")
|
1242
|
-
out: HttpServer = HttpServer(
|
1243
|
-
url=f"http://{addr}", subpath=subpath, process=proc
|
1244
|
-
)
|
1245
|
-
return out
|
1246
|
-
|
1247
|
-
def config_paths(
|
1248
|
-
self, remote: str | None = None, obscure: bool = False, no_obscure: bool = False
|
1249
|
-
) -> list[Path] | Exception:
|
1250
|
-
"""Show the current configuration.
|
1251
|
-
|
1252
|
-
Args:
|
1253
|
-
remote: Optional remote name to show configuration for
|
1254
|
-
obscure: Show obscured passwords
|
1255
|
-
no_obscure: Show passwords in plain text
|
1256
|
-
|
1257
|
-
Returns:
|
1258
|
-
Configuration as text or an Exception if an error occurred
|
1259
|
-
"""
|
1260
|
-
cmd_list: list[str] = ["config", "show"]
|
1261
|
-
|
1262
|
-
if remote is not None:
|
1263
|
-
cmd_list.append(remote)
|
1264
|
-
|
1265
|
-
if obscure:
|
1266
|
-
cmd_list.append("--obscure")
|
1267
|
-
|
1268
|
-
if no_obscure:
|
1269
|
-
cmd_list.append("--no-obscure")
|
1270
|
-
|
1271
|
-
try:
|
1272
|
-
cp = self._run(cmd_list, capture=True, check=True)
|
1273
|
-
stdout: str | bytes = cp.stdout
|
1274
|
-
if isinstance(stdout, bytes):
|
1275
|
-
stdout = stdout.decode("utf-8")
|
1276
|
-
out = _parse_paths(stdout)
|
1277
|
-
return out
|
1278
|
-
except subprocess.CalledProcessError as e:
|
1279
|
-
return e
|
1280
|
-
|
1281
|
-
def size_files(
|
1282
|
-
self,
|
1283
|
-
src: str,
|
1284
|
-
files: list[str],
|
1285
|
-
fast_list: bool = False, # Recommend that this is False
|
1286
|
-
other_args: list[str] | None = None,
|
1287
|
-
check: bool | None = False,
|
1288
|
-
verbose: bool | None = None,
|
1289
|
-
) -> SizeResult | Exception:
|
1290
|
-
"""Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
|
1291
|
-
verbose = get_verbose(verbose)
|
1292
|
-
check = get_check(check)
|
1293
|
-
if len(files) < 2:
|
1294
|
-
tmp = self.size_file(files[0])
|
1295
|
-
if isinstance(tmp, Exception):
|
1296
|
-
return tmp
|
1297
|
-
assert isinstance(tmp, SizeSuffix)
|
1298
|
-
return SizeResult(
|
1299
|
-
prefix=src, total_size=tmp.as_int(), file_sizes={files[0]: tmp.as_int()}
|
1300
|
-
)
|
1301
|
-
if fast_list or (other_args and "--fast-list" in other_args):
|
1302
|
-
warnings.warn(
|
1303
|
-
"It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
|
1304
|
-
)
|
1305
|
-
files = list(files)
|
1306
|
-
all_files: list[File] = []
|
1307
|
-
# prefix, files = group_under_one_prefix(src, files)
|
1308
|
-
cmd = ["lsjson", src, "--files-only", "-R"]
|
1309
|
-
with TemporaryDirectory() as tmpdir:
|
1310
|
-
# print("files: " + ",".join(files))
|
1311
|
-
include_files_txt = Path(tmpdir) / "include_files.txt"
|
1312
|
-
include_files_txt.write_text("\n".join(files), encoding="utf-8")
|
1313
|
-
cmd += ["--files-from", str(include_files_txt)]
|
1314
|
-
if fast_list:
|
1315
|
-
cmd.append("--fast-list")
|
1316
|
-
if other_args:
|
1317
|
-
cmd += other_args
|
1318
|
-
cp = self._run(cmd, check=check)
|
1319
|
-
|
1320
|
-
if cp.returncode != 0:
|
1321
|
-
if check:
|
1322
|
-
raise ValueError(f"Error getting file sizes: {cp.stderr}")
|
1323
|
-
else:
|
1324
|
-
warnings.warn(f"Error getting file sizes: {cp.stderr}")
|
1325
|
-
stdout = cp.stdout
|
1326
|
-
pieces = src.split(":", 1)
|
1327
|
-
remote_name = pieces[0]
|
1328
|
-
parent_path: str | None
|
1329
|
-
if len(pieces) > 1:
|
1330
|
-
parent_path = pieces[1]
|
1331
|
-
else:
|
1332
|
-
parent_path = None
|
1333
|
-
remote = Remote(name=remote_name, rclone=self)
|
1334
|
-
paths: list[RPath] = RPath.from_json_str(
|
1335
|
-
stdout, remote, parent_path=parent_path
|
1336
|
-
)
|
1337
|
-
# print(paths)
|
1338
|
-
all_files += [File(p) for p in paths]
|
1339
|
-
file_sizes: dict[str, int] = {}
|
1340
|
-
f: File
|
1341
|
-
for f in all_files:
|
1342
|
-
p = f.to_string(include_remote=True)
|
1343
|
-
if p in file_sizes:
|
1344
|
-
warnings.warn(f"Duplicate file found: {p}")
|
1345
|
-
continue
|
1346
|
-
size = f.size
|
1347
|
-
if size == 0:
|
1348
|
-
warnings.warn(f"File size is 0: {p}")
|
1349
|
-
file_sizes[p] = f.size
|
1350
|
-
total_size = sum(file_sizes.values())
|
1351
|
-
file_sizes_path_corrected: dict[str, int] = {}
|
1352
|
-
for path, size in file_sizes.items():
|
1353
|
-
# remove the prefix
|
1354
|
-
path_path = Path(path)
|
1355
|
-
path_str = path_path.relative_to(src).as_posix()
|
1356
|
-
file_sizes_path_corrected[path_str] = size
|
1357
|
-
out: SizeResult = SizeResult(
|
1358
|
-
prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
|
1359
|
-
)
|
1360
|
-
return out
|
1
|
+
"""
|
2
|
+
Unit test file.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import random
|
7
|
+
import subprocess
|
8
|
+
import time
|
9
|
+
import tracemalloc
|
10
|
+
import warnings
|
11
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
12
|
+
from datetime import datetime
|
13
|
+
from fnmatch import fnmatch
|
14
|
+
from pathlib import Path
|
15
|
+
from tempfile import TemporaryDirectory
|
16
|
+
from typing import Generator
|
17
|
+
|
18
|
+
from rclone_api import Dir
|
19
|
+
from rclone_api.completed_process import CompletedProcess
|
20
|
+
from rclone_api.config import Config, Parsed, Section
|
21
|
+
from rclone_api.convert import convert_to_filestr_list, convert_to_str
|
22
|
+
from rclone_api.deprecated import deprecated
|
23
|
+
from rclone_api.detail.walk import walk
|
24
|
+
from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
|
25
|
+
from rclone_api.dir_listing import DirListing
|
26
|
+
from rclone_api.exec import RcloneExec
|
27
|
+
from rclone_api.file import File
|
28
|
+
from rclone_api.file_stream import FilesStream
|
29
|
+
from rclone_api.fs import FSPath, RemoteFS
|
30
|
+
from rclone_api.group_files import group_files
|
31
|
+
from rclone_api.http_server import HttpServer
|
32
|
+
from rclone_api.mount import Mount
|
33
|
+
from rclone_api.process import Process
|
34
|
+
from rclone_api.remote import Remote
|
35
|
+
from rclone_api.rpath import RPath
|
36
|
+
from rclone_api.s3.create import S3Credentials
|
37
|
+
from rclone_api.s3.types import (
|
38
|
+
S3Provider,
|
39
|
+
)
|
40
|
+
from rclone_api.types import (
|
41
|
+
ListingOption,
|
42
|
+
ModTimeStrategy,
|
43
|
+
Order,
|
44
|
+
PartInfo,
|
45
|
+
SizeResult,
|
46
|
+
SizeSuffix,
|
47
|
+
)
|
48
|
+
from rclone_api.util import (
|
49
|
+
find_free_port,
|
50
|
+
get_check,
|
51
|
+
get_rclone_exe,
|
52
|
+
get_verbose,
|
53
|
+
to_path,
|
54
|
+
)
|
55
|
+
|
56
|
+
# Enable tracing memory usage always
|
57
|
+
tracemalloc.start()
|
58
|
+
|
59
|
+
|
60
|
+
def rclone_verbose(verbose: bool | None) -> bool:
|
61
|
+
if verbose is not None:
|
62
|
+
os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
|
63
|
+
return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
|
64
|
+
|
65
|
+
|
66
|
+
def _to_rclone_conf(config: Config | Path) -> Config:
|
67
|
+
if isinstance(config, Path):
|
68
|
+
content = config.read_text(encoding="utf-8")
|
69
|
+
return Config(content)
|
70
|
+
else:
|
71
|
+
return config
|
72
|
+
|
73
|
+
|
74
|
+
def _parse_paths(src: str) -> list[Path] | Exception:
|
75
|
+
# Config file: C:\Users\niteris\AppData\Roaming\rclone\rclone.conf
|
76
|
+
# Cache dir: C:\Users\niteris\AppData\Local\rclone
|
77
|
+
# Temp dir: C:\Users\niteris\AppData\Local\Temp
|
78
|
+
lines = src.splitlines()
|
79
|
+
paths: list[Path] = []
|
80
|
+
for line in lines:
|
81
|
+
try:
|
82
|
+
parts = line.split(":")
|
83
|
+
if len(parts) != 2:
|
84
|
+
continue
|
85
|
+
path = Path(parts[1].strip())
|
86
|
+
paths.append(path)
|
87
|
+
except Exception as e:
|
88
|
+
return e
|
89
|
+
return paths
|
90
|
+
|
91
|
+
|
92
|
+
class RcloneImpl:
|
93
|
+
def __init__(
|
94
|
+
self, rclone_conf: Path | Config | None, rclone_exe: Path | None = None
|
95
|
+
) -> None:
|
96
|
+
if isinstance(rclone_conf, Path):
|
97
|
+
if not rclone_conf.exists():
|
98
|
+
raise ValueError(f"Rclone config file not found: {rclone_conf}")
|
99
|
+
if rclone_conf is None:
|
100
|
+
from rclone_api.config import find_conf_file
|
101
|
+
|
102
|
+
maybe_path = find_conf_file(self)
|
103
|
+
if not isinstance(maybe_path, Path):
|
104
|
+
raise ValueError("Rclone config file not found")
|
105
|
+
rclone_conf = _to_rclone_conf(maybe_path)
|
106
|
+
self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
|
107
|
+
self.config: Config = _to_rclone_conf(rclone_conf)
|
108
|
+
|
109
|
+
def _run(
|
110
|
+
self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
|
111
|
+
) -> subprocess.CompletedProcess:
|
112
|
+
return self._exec.execute(cmd, check=check, capture=capture)
|
113
|
+
|
114
|
+
def _launch_process(
|
115
|
+
self, cmd: list[str], capture: bool | None = None, log: Path | None = None
|
116
|
+
) -> Process:
|
117
|
+
return self._exec.launch_process(cmd, capture=capture, log=log)
|
118
|
+
|
119
|
+
def _get_tmp_mount_dir(self) -> Path:
|
120
|
+
return Path("tmp_mnts")
|
121
|
+
|
122
|
+
def _get_cache_dir(self) -> Path:
|
123
|
+
return Path("cache")
|
124
|
+
|
125
|
+
def webgui(self, other_args: list[str] | None = None) -> Process:
|
126
|
+
"""Launch the Rclone web GUI."""
|
127
|
+
cmd = ["rcd", "--rc-web-gui"]
|
128
|
+
if other_args:
|
129
|
+
cmd += other_args
|
130
|
+
return self._launch_process(cmd, capture=False)
|
131
|
+
|
132
|
+
def filesystem(self, src: str) -> RemoteFS:
|
133
|
+
return RemoteFS(self.config, src)
|
134
|
+
|
135
|
+
def cwd(self, src: str) -> FSPath:
|
136
|
+
return self.filesystem(src).cwd()
|
137
|
+
|
138
|
+
def launch_server(
|
139
|
+
self,
|
140
|
+
addr: str,
|
141
|
+
user: str | None = None,
|
142
|
+
password: str | None = None,
|
143
|
+
other_args: list[str] | None = None,
|
144
|
+
) -> Process:
|
145
|
+
"""Launch the Rclone server so it can receive commands"""
|
146
|
+
cmd = ["rcd"]
|
147
|
+
if addr is not None:
|
148
|
+
cmd += ["--rc-addr", addr]
|
149
|
+
if user is not None:
|
150
|
+
cmd += ["--rc-user", user]
|
151
|
+
if password is not None:
|
152
|
+
cmd += ["--rc-pass", password]
|
153
|
+
if other_args:
|
154
|
+
cmd += other_args
|
155
|
+
out = self._launch_process(cmd, capture=False)
|
156
|
+
time.sleep(1) # Give it some time to launch
|
157
|
+
return out
|
158
|
+
|
159
|
+
def remote_control(
|
160
|
+
self,
|
161
|
+
addr: str,
|
162
|
+
user: str | None = None,
|
163
|
+
password: str | None = None,
|
164
|
+
capture: bool | None = None,
|
165
|
+
other_args: list[str] | None = None,
|
166
|
+
) -> CompletedProcess:
|
167
|
+
cmd = ["rc"]
|
168
|
+
if addr:
|
169
|
+
cmd += ["--rc-addr", addr]
|
170
|
+
if user is not None:
|
171
|
+
cmd += ["--rc-user", user]
|
172
|
+
if password is not None:
|
173
|
+
cmd += ["--rc-pass", password]
|
174
|
+
if other_args:
|
175
|
+
cmd += other_args
|
176
|
+
cp = self._run(cmd, capture=capture)
|
177
|
+
return CompletedProcess.from_subprocess(cp)
|
178
|
+
|
179
|
+
def obscure(self, password: str) -> str:
|
180
|
+
"""Obscure a password for use in rclone config files."""
|
181
|
+
cmd_list: list[str] = ["obscure", password]
|
182
|
+
cp = self._run(cmd_list)
|
183
|
+
return cp.stdout.strip()
|
184
|
+
|
185
|
+
def ls_stream(
|
186
|
+
self,
|
187
|
+
src: str,
|
188
|
+
max_depth: int = -1,
|
189
|
+
fast_list: bool = False,
|
190
|
+
) -> FilesStream:
|
191
|
+
"""
|
192
|
+
List files in the given path
|
193
|
+
|
194
|
+
Args:
|
195
|
+
src: Remote path to list
|
196
|
+
max_depth: Maximum recursion depth (-1 for unlimited)
|
197
|
+
fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket, or it's small)
|
198
|
+
"""
|
199
|
+
cmd = ["lsjson", src, "--files-only"]
|
200
|
+
recurse = max_depth < 0 or max_depth > 1
|
201
|
+
if recurse:
|
202
|
+
cmd.append("-R")
|
203
|
+
if max_depth > 1:
|
204
|
+
cmd += ["--max-depth", str(max_depth)]
|
205
|
+
if fast_list:
|
206
|
+
cmd.append("--fast-list")
|
207
|
+
streamer = FilesStream(src, self._launch_process(cmd, capture=True))
|
208
|
+
return streamer
|
209
|
+
|
210
|
+
def save_to_db(
|
211
|
+
self,
|
212
|
+
src: str,
|
213
|
+
db_url: str,
|
214
|
+
max_depth: int = -1,
|
215
|
+
fast_list: bool = False,
|
216
|
+
) -> None:
|
217
|
+
"""
|
218
|
+
Save files to a database (sqlite, mysql, postgres)
|
219
|
+
|
220
|
+
Args:
|
221
|
+
src: Remote path to list, this will be used to populate an entire table, so always use the root-most path.
|
222
|
+
db_url: Database URL, like sqlite:///data.db or mysql://user:pass@localhost/db or postgres://user:pass@localhost/db
|
223
|
+
max_depth: Maximum depth to traverse (-1 for unlimited)
|
224
|
+
fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket)
|
225
|
+
|
226
|
+
"""
|
227
|
+
from rclone_api.db import DB
|
228
|
+
|
229
|
+
db = DB(db_url)
|
230
|
+
with self.ls_stream(src, max_depth, fast_list) as stream:
|
231
|
+
for page in stream.files_paged(page_size=10000):
|
232
|
+
db.add_files(page)
|
233
|
+
|
234
|
+
def ls(
|
235
|
+
self,
|
236
|
+
src: Dir | Remote | str | None = None,
|
237
|
+
max_depth: int | None = None,
|
238
|
+
glob: str | None = None,
|
239
|
+
order: Order = Order.NORMAL,
|
240
|
+
listing_option: ListingOption = ListingOption.ALL,
|
241
|
+
) -> DirListing:
|
242
|
+
"""List files in the given path.
|
243
|
+
|
244
|
+
Args:
|
245
|
+
src: Remote path or Remote object to list
|
246
|
+
max_depth: Maximum recursion depth (0 means no recursion)
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
List of File objects found at the path
|
250
|
+
"""
|
251
|
+
|
252
|
+
if src is None:
|
253
|
+
# list remotes instead
|
254
|
+
list_remotes: list[Remote] = self.listremotes()
|
255
|
+
dirs: list[Dir] = [Dir(remote) for remote in list_remotes]
|
256
|
+
for d in dirs:
|
257
|
+
d.path.path = ""
|
258
|
+
rpaths = [d.path for d in dirs]
|
259
|
+
return DirListing(rpaths)
|
260
|
+
|
261
|
+
if isinstance(src, str):
|
262
|
+
src = Dir(
|
263
|
+
to_path(src, self)
|
264
|
+
) # assume it's a directory if ls is being called.
|
265
|
+
|
266
|
+
cmd = ["lsjson"]
|
267
|
+
if max_depth is not None:
|
268
|
+
if max_depth < 0:
|
269
|
+
cmd.append("--recursive")
|
270
|
+
if max_depth > 0:
|
271
|
+
cmd.append("--max-depth")
|
272
|
+
cmd.append(str(max_depth))
|
273
|
+
if listing_option != ListingOption.ALL:
|
274
|
+
cmd.append(f"--{listing_option.value}")
|
275
|
+
|
276
|
+
cmd.append(str(src))
|
277
|
+
remote = src.remote if isinstance(src, Dir) else src
|
278
|
+
assert isinstance(remote, Remote)
|
279
|
+
|
280
|
+
cp = self._run(cmd, check=True)
|
281
|
+
text = cp.stdout
|
282
|
+
parent_path: str | None = None
|
283
|
+
if isinstance(src, Dir):
|
284
|
+
parent_path = src.path.path
|
285
|
+
paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
|
286
|
+
# print(parent_path)
|
287
|
+
for o in paths:
|
288
|
+
o.set_rclone(self)
|
289
|
+
|
290
|
+
# do we have a glob pattern?
|
291
|
+
if glob is not None:
|
292
|
+
paths = [p for p in paths if fnmatch(p.path, glob)]
|
293
|
+
|
294
|
+
if order == Order.REVERSE:
|
295
|
+
paths.reverse()
|
296
|
+
elif order == Order.RANDOM:
|
297
|
+
random.shuffle(paths)
|
298
|
+
return DirListing(paths)
|
299
|
+
|
300
|
+
def print(self, src: str) -> Exception | None:
|
301
|
+
"""Print the contents of a file."""
|
302
|
+
try:
|
303
|
+
text_or_err = self.read_text(src)
|
304
|
+
if isinstance(text_or_err, Exception):
|
305
|
+
return text_or_err
|
306
|
+
print(text_or_err)
|
307
|
+
except Exception as e:
|
308
|
+
return e
|
309
|
+
return None
|
310
|
+
|
311
|
+
def stat(self, src: str) -> File | Exception:
|
312
|
+
"""Get the status of a file or directory."""
|
313
|
+
dirlist: DirListing = self.ls(src)
|
314
|
+
if len(dirlist.files) == 0:
|
315
|
+
# raise FileNotFoundError(f"File not found: {src}")
|
316
|
+
return FileNotFoundError(f"File not found: {src}")
|
317
|
+
try:
|
318
|
+
file: File = dirlist.files[0]
|
319
|
+
return file
|
320
|
+
except Exception as e:
|
321
|
+
return e
|
322
|
+
|
323
|
+
def modtime(self, src: str) -> str | Exception:
|
324
|
+
"""Get the modification time of a file or directory."""
|
325
|
+
try:
|
326
|
+
file: File | Exception = self.stat(src)
|
327
|
+
if isinstance(file, Exception):
|
328
|
+
return file
|
329
|
+
return file.mod_time()
|
330
|
+
except Exception as e:
|
331
|
+
return e
|
332
|
+
|
333
|
+
def modtime_dt(self, src: str) -> datetime | Exception:
|
334
|
+
"""Get the modification time of a file or directory."""
|
335
|
+
modtime: str | Exception = self.modtime(src)
|
336
|
+
if isinstance(modtime, Exception):
|
337
|
+
return modtime
|
338
|
+
return datetime.fromisoformat(modtime)
|
339
|
+
|
340
|
+
def listremotes(self) -> list[Remote]:
|
341
|
+
cmd = ["listremotes"]
|
342
|
+
cp = self._run(cmd)
|
343
|
+
text: str = cp.stdout
|
344
|
+
tmp = text.splitlines()
|
345
|
+
tmp = [t.strip() for t in tmp]
|
346
|
+
# strip out ":" from the end
|
347
|
+
tmp = [t.replace(":", "") for t in tmp]
|
348
|
+
out = [Remote(name=t, rclone=self) for t in tmp]
|
349
|
+
return out
|
350
|
+
|
351
|
+
def diff(
|
352
|
+
self,
|
353
|
+
src: str,
|
354
|
+
dst: str,
|
355
|
+
min_size: (
|
356
|
+
str | None
|
357
|
+
) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
|
358
|
+
max_size: (
|
359
|
+
str | None
|
360
|
+
) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
|
361
|
+
diff_option: DiffOption = DiffOption.COMBINED,
|
362
|
+
fast_list: bool = True,
|
363
|
+
size_only: bool | None = None,
|
364
|
+
checkers: int | None = None,
|
365
|
+
other_args: list[str] | None = None,
|
366
|
+
) -> Generator[DiffItem, None, None]:
|
367
|
+
"""Be extra careful with the src and dst values. If you are off by one
|
368
|
+
parent directory, you will get a huge amount of false diffs."""
|
369
|
+
other_args = other_args or []
|
370
|
+
if checkers is None or checkers < 1:
|
371
|
+
checkers = 1000
|
372
|
+
cmd = [
|
373
|
+
"check",
|
374
|
+
src,
|
375
|
+
dst,
|
376
|
+
"--checkers",
|
377
|
+
str(checkers),
|
378
|
+
"--log-level",
|
379
|
+
"INFO",
|
380
|
+
f"--{diff_option.value}",
|
381
|
+
"-",
|
382
|
+
]
|
383
|
+
if size_only is None:
|
384
|
+
size_only = diff_option in [
|
385
|
+
DiffOption.MISSING_ON_DST,
|
386
|
+
DiffOption.MISSING_ON_SRC,
|
387
|
+
]
|
388
|
+
if size_only:
|
389
|
+
cmd += ["--size-only"]
|
390
|
+
if fast_list:
|
391
|
+
cmd += ["--fast-list"]
|
392
|
+
if min_size:
|
393
|
+
cmd += ["--min-size", min_size]
|
394
|
+
if max_size:
|
395
|
+
cmd += ["--max-size", max_size]
|
396
|
+
if diff_option == DiffOption.MISSING_ON_DST:
|
397
|
+
cmd += ["--one-way"]
|
398
|
+
if other_args:
|
399
|
+
cmd += other_args
|
400
|
+
proc = self._launch_process(cmd, capture=True)
|
401
|
+
item: DiffItem
|
402
|
+
for item in diff_stream_from_running_process(
|
403
|
+
running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
|
404
|
+
):
|
405
|
+
if item is None:
|
406
|
+
break
|
407
|
+
yield item
|
408
|
+
|
409
|
+
def walk(
|
410
|
+
self,
|
411
|
+
src: Dir | Remote | str,
|
412
|
+
max_depth: int = -1,
|
413
|
+
breadth_first: bool = True,
|
414
|
+
order: Order = Order.NORMAL,
|
415
|
+
) -> Generator[DirListing, None, None]:
|
416
|
+
"""Walk through the given path recursively.
|
417
|
+
|
418
|
+
Args:
|
419
|
+
src: Remote path or Remote object to walk through
|
420
|
+
max_depth: Maximum depth to traverse (-1 for unlimited)
|
421
|
+
|
422
|
+
Yields:
|
423
|
+
DirListing: Directory listing for each directory encountered
|
424
|
+
"""
|
425
|
+
dir_obj: Dir
|
426
|
+
if isinstance(src, Dir):
|
427
|
+
# Create a Remote object for the path
|
428
|
+
remote = src.remote
|
429
|
+
rpath = RPath(
|
430
|
+
remote=remote,
|
431
|
+
path=src.path.path,
|
432
|
+
name=src.path.name,
|
433
|
+
size=0,
|
434
|
+
mime_type="inode/directory",
|
435
|
+
mod_time="",
|
436
|
+
is_dir=True,
|
437
|
+
)
|
438
|
+
rpath.set_rclone(self)
|
439
|
+
dir_obj = Dir(rpath)
|
440
|
+
elif isinstance(src, str):
|
441
|
+
dir_obj = Dir(to_path(src, self))
|
442
|
+
elif isinstance(src, Remote):
|
443
|
+
dir_obj = Dir(src)
|
444
|
+
else:
|
445
|
+
dir_obj = Dir(src) # shut up pyright
|
446
|
+
assert f"Invalid type for path: {type(src)}"
|
447
|
+
|
448
|
+
yield from walk(
|
449
|
+
dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
|
450
|
+
)
|
451
|
+
|
452
|
+
def scan_missing_folders(
|
453
|
+
self,
|
454
|
+
src: Dir | Remote | str,
|
455
|
+
dst: Dir | Remote | str,
|
456
|
+
max_depth: int = -1,
|
457
|
+
order: Order = Order.NORMAL,
|
458
|
+
) -> Generator[Dir, None, None]:
|
459
|
+
"""Walk through the given path recursively.
|
460
|
+
|
461
|
+
WORK IN PROGRESS!!
|
462
|
+
|
463
|
+
Args:
|
464
|
+
src: Source directory or Remote to walk through
|
465
|
+
dst: Destination directory or Remote to walk through
|
466
|
+
max_depth: Maximum depth to traverse (-1 for unlimited)
|
467
|
+
|
468
|
+
Yields:
|
469
|
+
DirListing: Directory listing for each directory encountered
|
470
|
+
"""
|
471
|
+
from rclone_api.scan_missing_folders import scan_missing_folders
|
472
|
+
|
473
|
+
src_dir = Dir(to_path(src, self))
|
474
|
+
dst_dir = Dir(to_path(dst, self))
|
475
|
+
yield from scan_missing_folders(
|
476
|
+
src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
|
477
|
+
)
|
478
|
+
|
479
|
+
def cleanup(
|
480
|
+
self, src: str, other_args: list[str] | None = None
|
481
|
+
) -> CompletedProcess:
|
482
|
+
"""Cleanup any resources used by the Rclone instance."""
|
483
|
+
# rclone cleanup remote:path [flags]
|
484
|
+
cmd = ["cleanup", src]
|
485
|
+
if other_args:
|
486
|
+
cmd += other_args
|
487
|
+
out = self._run(cmd)
|
488
|
+
return CompletedProcess.from_subprocess(out)
|
489
|
+
|
490
|
+
def get_verbose(self) -> bool:
|
491
|
+
return get_verbose(None)
|
492
|
+
|
493
|
+
def copy_to(
|
494
|
+
self,
|
495
|
+
src: File | str,
|
496
|
+
dst: File | str,
|
497
|
+
check: bool | None = None,
|
498
|
+
verbose: bool | None = None,
|
499
|
+
other_args: list[str] | None = None,
|
500
|
+
) -> CompletedProcess:
|
501
|
+
"""Copy one file from source to destination.
|
502
|
+
|
503
|
+
Warning - slow.
|
504
|
+
|
505
|
+
"""
|
506
|
+
check = get_check(check)
|
507
|
+
verbose = get_verbose(verbose)
|
508
|
+
src = src if isinstance(src, str) else str(src.path)
|
509
|
+
dst = dst if isinstance(dst, str) else str(dst.path)
|
510
|
+
cmd_list: list[str] = ["copyto", src, dst, "--s3-no-check-bucket"]
|
511
|
+
if other_args is not None:
|
512
|
+
cmd_list += other_args
|
513
|
+
cp = self._run(cmd_list, check=check)
|
514
|
+
return CompletedProcess.from_subprocess(cp)
|
515
|
+
|
516
|
+
def copy_files(
|
517
|
+
self,
|
518
|
+
src: str,
|
519
|
+
dst: str,
|
520
|
+
files: list[str] | Path,
|
521
|
+
check: bool | None = None,
|
522
|
+
max_backlog: int | None = None,
|
523
|
+
verbose: bool | None = None,
|
524
|
+
checkers: int | None = None,
|
525
|
+
transfers: int | None = None,
|
526
|
+
low_level_retries: int | None = None,
|
527
|
+
retries: int | None = None,
|
528
|
+
retries_sleep: str | None = None,
|
529
|
+
metadata: bool | None = None,
|
530
|
+
timeout: str | None = None,
|
531
|
+
max_partition_workers: int | None = None,
|
532
|
+
multi_thread_streams: int | None = None,
|
533
|
+
other_args: list[str] | None = None,
|
534
|
+
) -> list[CompletedProcess]:
|
535
|
+
"""Copy multiple files from source to destination.
|
536
|
+
|
537
|
+
Args:
|
538
|
+
payload: Dictionary of source and destination file paths
|
539
|
+
"""
|
540
|
+
check = get_check(check)
|
541
|
+
max_partition_workers = max_partition_workers or 1
|
542
|
+
low_level_retries = low_level_retries or 10
|
543
|
+
retries = retries or 3
|
544
|
+
other_args = other_args or []
|
545
|
+
other_args.append("--s3-no-check-bucket")
|
546
|
+
checkers = checkers or 1000
|
547
|
+
transfers = transfers or 32
|
548
|
+
verbose = get_verbose(verbose)
|
549
|
+
payload: list[str] = (
|
550
|
+
files
|
551
|
+
if isinstance(files, list)
|
552
|
+
else [f.strip() for f in files.read_text().splitlines() if f.strip()]
|
553
|
+
)
|
554
|
+
if len(payload) == 0:
|
555
|
+
return []
|
556
|
+
|
557
|
+
for p in payload:
|
558
|
+
if ":" in p:
|
559
|
+
raise ValueError(
|
560
|
+
f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
|
561
|
+
)
|
562
|
+
|
563
|
+
using_fast_list = "--fast-list" in other_args
|
564
|
+
if using_fast_list:
|
565
|
+
warnings.warn(
|
566
|
+
"It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
|
567
|
+
)
|
568
|
+
|
569
|
+
if max_partition_workers > 1:
|
570
|
+
datalists: dict[str, list[str]] = group_files(
|
571
|
+
payload, fully_qualified=False
|
572
|
+
)
|
573
|
+
else:
|
574
|
+
datalists = {"": payload}
|
575
|
+
# out: subprocess.CompletedProcess | None = None
|
576
|
+
out: list[CompletedProcess] = []
|
577
|
+
|
578
|
+
futures: list[Future] = []
|
579
|
+
|
580
|
+
with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
|
581
|
+
for common_prefix, files in datalists.items():
|
582
|
+
|
583
|
+
def _task(
|
584
|
+
files: list[str] | Path = files,
|
585
|
+
) -> subprocess.CompletedProcess:
|
586
|
+
with TemporaryDirectory() as tmpdir:
|
587
|
+
filelist: list[str] = []
|
588
|
+
filepath: Path
|
589
|
+
if isinstance(files, list):
|
590
|
+
include_files_txt = Path(tmpdir) / "include_files.txt"
|
591
|
+
include_files_txt.write_text(
|
592
|
+
"\n".join(files), encoding="utf-8"
|
593
|
+
)
|
594
|
+
filelist = list(files)
|
595
|
+
filepath = Path(include_files_txt)
|
596
|
+
elif isinstance(files, Path):
|
597
|
+
filelist = [
|
598
|
+
f.strip()
|
599
|
+
for f in files.read_text().splitlines()
|
600
|
+
if f.strip()
|
601
|
+
]
|
602
|
+
filepath = files
|
603
|
+
if common_prefix:
|
604
|
+
src_path = f"{src}/{common_prefix}"
|
605
|
+
dst_path = f"{dst}/{common_prefix}"
|
606
|
+
else:
|
607
|
+
src_path = src
|
608
|
+
dst_path = dst
|
609
|
+
|
610
|
+
if verbose:
|
611
|
+
nfiles = len(filelist)
|
612
|
+
files_fqdn = [f" {src_path}/{f}" for f in filelist]
|
613
|
+
print(f"Copying {nfiles} files:")
|
614
|
+
chunk_size = 100
|
615
|
+
for i in range(0, nfiles, chunk_size):
|
616
|
+
chunk = files_fqdn[i : i + chunk_size]
|
617
|
+
files_str = "\n".join(chunk)
|
618
|
+
print(f"{files_str}")
|
619
|
+
cmd_list: list[str] = [
|
620
|
+
"copy",
|
621
|
+
src_path,
|
622
|
+
dst_path,
|
623
|
+
"--files-from",
|
624
|
+
str(filepath),
|
625
|
+
"--checkers",
|
626
|
+
str(checkers),
|
627
|
+
"--transfers",
|
628
|
+
str(transfers),
|
629
|
+
"--low-level-retries",
|
630
|
+
str(low_level_retries),
|
631
|
+
"--retries",
|
632
|
+
str(retries),
|
633
|
+
]
|
634
|
+
if metadata:
|
635
|
+
cmd_list.append("--metadata")
|
636
|
+
if retries_sleep is not None:
|
637
|
+
cmd_list += ["--retries-sleep", retries_sleep]
|
638
|
+
if timeout is not None:
|
639
|
+
cmd_list += ["--timeout", timeout]
|
640
|
+
if max_backlog is not None:
|
641
|
+
cmd_list += ["--max-backlog", str(max_backlog)]
|
642
|
+
if multi_thread_streams is not None:
|
643
|
+
cmd_list += [
|
644
|
+
"--multi-thread-streams",
|
645
|
+
str(multi_thread_streams),
|
646
|
+
]
|
647
|
+
if verbose:
|
648
|
+
if not any(["-v" in x for x in other_args]):
|
649
|
+
cmd_list.append("-vvvv")
|
650
|
+
if not any(["--progress" in x for x in other_args]):
|
651
|
+
cmd_list.append("--progress")
|
652
|
+
if other_args:
|
653
|
+
cmd_list += other_args
|
654
|
+
out = self._run(cmd_list, capture=not verbose)
|
655
|
+
return out
|
656
|
+
|
657
|
+
fut: Future = executor.submit(_task)
|
658
|
+
futures.append(fut)
|
659
|
+
for fut in futures:
|
660
|
+
cp: subprocess.CompletedProcess = fut.result()
|
661
|
+
assert cp is not None
|
662
|
+
out.append(CompletedProcess.from_subprocess(cp))
|
663
|
+
if cp.returncode != 0:
|
664
|
+
if check:
|
665
|
+
raise ValueError(f"Error deleting files: {cp.stderr}")
|
666
|
+
else:
|
667
|
+
warnings.warn(f"Error deleting files: {cp.stderr}")
|
668
|
+
return out
|
669
|
+
|
670
|
+
def copy(
|
671
|
+
self,
|
672
|
+
src: Dir | str,
|
673
|
+
dst: Dir | str,
|
674
|
+
check: bool | None = None,
|
675
|
+
transfers: int | None = None,
|
676
|
+
checkers: int | None = None,
|
677
|
+
multi_thread_streams: int | None = None,
|
678
|
+
low_level_retries: int | None = None,
|
679
|
+
retries: int | None = None,
|
680
|
+
other_args: list[str] | None = None,
|
681
|
+
) -> CompletedProcess:
|
682
|
+
"""Copy files from source to destination.
|
683
|
+
|
684
|
+
Args:
|
685
|
+
src: Source directory
|
686
|
+
dst: Destination directory
|
687
|
+
"""
|
688
|
+
# src_dir = src.path.path
|
689
|
+
# dst_dir = dst.path.path
|
690
|
+
src_dir = convert_to_str(src)
|
691
|
+
dst_dir = convert_to_str(dst)
|
692
|
+
check = get_check(check)
|
693
|
+
checkers = checkers or 1000
|
694
|
+
transfers = transfers or 32
|
695
|
+
low_level_retries = low_level_retries or 10
|
696
|
+
retries = retries or 3
|
697
|
+
cmd_list: list[str] = ["copy", src_dir, dst_dir]
|
698
|
+
cmd_list += ["--checkers", str(checkers)]
|
699
|
+
cmd_list += ["--transfers", str(transfers)]
|
700
|
+
cmd_list += ["--low-level-retries", str(low_level_retries)]
|
701
|
+
cmd_list.append("--s3-no-check-bucket")
|
702
|
+
if multi_thread_streams is not None:
|
703
|
+
cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
|
704
|
+
if other_args:
|
705
|
+
cmd_list += other_args
|
706
|
+
cp = self._run(cmd_list, check=check, capture=False)
|
707
|
+
return CompletedProcess.from_subprocess(cp)
|
708
|
+
|
709
|
+
def purge(self, src: Dir | str) -> CompletedProcess:
|
710
|
+
"""Purge a directory"""
|
711
|
+
# path should always be a string
|
712
|
+
src = src if isinstance(src, str) else str(src.path)
|
713
|
+
cmd_list: list[str] = ["purge", str(src)]
|
714
|
+
cp = self._run(cmd_list)
|
715
|
+
return CompletedProcess.from_subprocess(cp)
|
716
|
+
|
717
|
+
def delete_files(
|
718
|
+
self,
|
719
|
+
files: str | File | list[str] | list[File],
|
720
|
+
check: bool | None = None,
|
721
|
+
rmdirs=False,
|
722
|
+
verbose: bool | None = None,
|
723
|
+
max_partition_workers: int | None = None,
|
724
|
+
other_args: list[str] | None = None,
|
725
|
+
) -> CompletedProcess:
|
726
|
+
"""Delete a directory"""
|
727
|
+
check = get_check(check)
|
728
|
+
verbose = get_verbose(verbose)
|
729
|
+
payload: list[str] = convert_to_filestr_list(files)
|
730
|
+
if len(payload) == 0:
|
731
|
+
if verbose:
|
732
|
+
print("No files to delete")
|
733
|
+
cp = subprocess.CompletedProcess(
|
734
|
+
args=["rclone", "delete", "--files-from", "[]"],
|
735
|
+
returncode=0,
|
736
|
+
stdout="",
|
737
|
+
stderr="",
|
738
|
+
)
|
739
|
+
return CompletedProcess.from_subprocess(cp)
|
740
|
+
|
741
|
+
datalists: dict[str, list[str]] = group_files(payload)
|
742
|
+
completed_processes: list[subprocess.CompletedProcess] = []
|
743
|
+
|
744
|
+
futures: list[Future] = []
|
745
|
+
|
746
|
+
with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
|
747
|
+
|
748
|
+
for remote, files in datalists.items():
|
749
|
+
|
750
|
+
def _task(
|
751
|
+
files=files, check=check, remote=remote
|
752
|
+
) -> subprocess.CompletedProcess:
|
753
|
+
with TemporaryDirectory() as tmpdir:
|
754
|
+
include_files_txt = Path(tmpdir) / "include_files.txt"
|
755
|
+
include_files_txt.write_text("\n".join(files), encoding="utf-8")
|
756
|
+
|
757
|
+
# print(include_files_txt)
|
758
|
+
cmd_list: list[str] = [
|
759
|
+
"delete",
|
760
|
+
remote,
|
761
|
+
"--files-from",
|
762
|
+
str(include_files_txt),
|
763
|
+
"--checkers",
|
764
|
+
"1000",
|
765
|
+
"--transfers",
|
766
|
+
"1000",
|
767
|
+
]
|
768
|
+
if verbose:
|
769
|
+
cmd_list.append("-vvvv")
|
770
|
+
if rmdirs:
|
771
|
+
cmd_list.append("--rmdirs")
|
772
|
+
if other_args:
|
773
|
+
cmd_list += other_args
|
774
|
+
out = self._run(cmd_list, check=check)
|
775
|
+
if out.returncode != 0:
|
776
|
+
if check:
|
777
|
+
completed_processes.append(out)
|
778
|
+
raise ValueError(f"Error deleting files: {out}")
|
779
|
+
else:
|
780
|
+
warnings.warn(f"Error deleting files: {out}")
|
781
|
+
return out
|
782
|
+
|
783
|
+
fut: Future = executor.submit(_task)
|
784
|
+
futures.append(fut)
|
785
|
+
|
786
|
+
for fut in futures:
|
787
|
+
out = fut.result()
|
788
|
+
assert out is not None
|
789
|
+
completed_processes.append(out)
|
790
|
+
|
791
|
+
return CompletedProcess(completed_processes)
|
792
|
+
|
793
|
+
@deprecated("delete_files")
|
794
|
+
def deletefiles(
|
795
|
+
self, files: str | File | list[str] | list[File]
|
796
|
+
) -> CompletedProcess:
|
797
|
+
out = self.delete_files(files)
|
798
|
+
return out
|
799
|
+
|
800
|
+
def exists(self, src: Dir | Remote | str | File) -> bool:
|
801
|
+
"""Check if a file or directory exists."""
|
802
|
+
arg: str = convert_to_str(src)
|
803
|
+
assert isinstance(arg, str)
|
804
|
+
try:
|
805
|
+
dir_listing = self.ls(arg)
|
806
|
+
# print(dir_listing)
|
807
|
+
return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
|
808
|
+
except subprocess.CalledProcessError:
|
809
|
+
return False
|
810
|
+
|
811
|
+
def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
|
812
|
+
"""Check if two directories are in sync."""
|
813
|
+
src = convert_to_str(src)
|
814
|
+
dst = convert_to_str(dst)
|
815
|
+
cmd_list: list[str] = ["check", str(src), str(dst)]
|
816
|
+
try:
|
817
|
+
self._run(cmd_list, check=True)
|
818
|
+
return True
|
819
|
+
except subprocess.CalledProcessError:
|
820
|
+
return False
|
821
|
+
|
822
|
+
def copy_file_s3_resumable(
|
823
|
+
self,
|
824
|
+
src: str, # src:/Bucket/path/myfile.large.zst
|
825
|
+
dst: str, # dst:/Bucket/path/myfile.large
|
826
|
+
part_infos: list[PartInfo] | None = None,
|
827
|
+
upload_threads: int = 8,
|
828
|
+
merge_threads: int = 4,
|
829
|
+
) -> Exception | None:
|
830
|
+
"""Copy parts of a file from source to destination."""
|
831
|
+
from rclone_api.detail.copy_file_parts_resumable import (
|
832
|
+
copy_file_parts_resumable,
|
833
|
+
)
|
834
|
+
|
835
|
+
if dst.endswith("/"):
|
836
|
+
dst = dst[:-1]
|
837
|
+
dst_dir = f"{dst}-parts"
|
838
|
+
|
839
|
+
out = copy_file_parts_resumable(
|
840
|
+
self=self,
|
841
|
+
src=src,
|
842
|
+
dst_dir=dst_dir,
|
843
|
+
part_infos=part_infos,
|
844
|
+
upload_threads=upload_threads,
|
845
|
+
merge_threads=merge_threads,
|
846
|
+
)
|
847
|
+
return out
|
848
|
+
|
849
|
+
def write_text(
|
850
|
+
self,
|
851
|
+
dst: str,
|
852
|
+
text: str,
|
853
|
+
) -> Exception | None:
|
854
|
+
"""Write text to a file."""
|
855
|
+
data = text.encode("utf-8")
|
856
|
+
return self.write_bytes(dst=dst, data=data)
|
857
|
+
|
858
|
+
def write_bytes(
|
859
|
+
self,
|
860
|
+
dst: str,
|
861
|
+
data: bytes,
|
862
|
+
) -> Exception | None:
|
863
|
+
"""Write bytes to a file."""
|
864
|
+
with TemporaryDirectory() as tmpdir:
|
865
|
+
tmpfile = Path(tmpdir) / "file.bin"
|
866
|
+
tmpfile.write_bytes(data)
|
867
|
+
completed_proc = self.copy_to(str(tmpfile), dst, check=True)
|
868
|
+
if completed_proc.returncode != 0:
|
869
|
+
return Exception(f"Failed to write bytes to {dst}", completed_proc)
|
870
|
+
return None
|
871
|
+
|
872
|
+
def read_bytes(self, src: str) -> bytes | Exception:
|
873
|
+
"""Read bytes from a file."""
|
874
|
+
with TemporaryDirectory() as tmpdir:
|
875
|
+
tmpfile = Path(tmpdir) / "file.bin"
|
876
|
+
completed_proc = self.copy_to(src, str(tmpfile), check=True)
|
877
|
+
if completed_proc.returncode != 0:
|
878
|
+
return Exception(f"Failed to read bytes from {src}", completed_proc)
|
879
|
+
|
880
|
+
if not tmpfile.exists():
|
881
|
+
return Exception(f"Failed to read bytes from {src}, file not found")
|
882
|
+
try:
|
883
|
+
return tmpfile.read_bytes()
|
884
|
+
except Exception as e:
|
885
|
+
return Exception(f"Failed to read bytes from {src}", e)
|
886
|
+
|
887
|
+
def read_text(self, src: str) -> str | Exception:
|
888
|
+
"""Read text from a file."""
|
889
|
+
data = self.read_bytes(src)
|
890
|
+
if isinstance(data, Exception):
|
891
|
+
return data
|
892
|
+
try:
|
893
|
+
return data.decode("utf-8")
|
894
|
+
except UnicodeDecodeError as e:
|
895
|
+
return Exception(f"Failed to decode text from {src}", e)
|
896
|
+
|
897
|
+
def size_file(self, src: str) -> SizeSuffix | Exception:
|
898
|
+
"""Get the size of a file or directory."""
|
899
|
+
# src_parent = os.path.dirname(src)
|
900
|
+
# src_name = os.path.basename(src)
|
901
|
+
# can't use this because it's only one file.
|
902
|
+
# out: SizeResult = self.size_files(src_parent, [src_name])
|
903
|
+
# one_file = len(out.file_sizes) == 1
|
904
|
+
# if not one_file:
|
905
|
+
# return Exception(
|
906
|
+
# f"More than one result returned, is this is a directory? {out}"
|
907
|
+
# )
|
908
|
+
# return SizeSuffix(out.total_size)
|
909
|
+
dirlist: DirListing = self.ls(
|
910
|
+
src, listing_option=ListingOption.FILES_ONLY, max_depth=0
|
911
|
+
)
|
912
|
+
if len(dirlist.files) == 0:
|
913
|
+
return FileNotFoundError(f"File not found: {src}")
|
914
|
+
if len(dirlist.files) > 1:
|
915
|
+
return Exception(f"More than one file found: {src}")
|
916
|
+
file: File = dirlist.files[0]
|
917
|
+
return SizeSuffix(file.size)
|
918
|
+
|
919
|
+
def get_s3_credentials(
|
920
|
+
self, remote: str, verbose: bool | None = None
|
921
|
+
) -> S3Credentials:
|
922
|
+
from rclone_api.util import S3PathInfo, split_s3_path
|
923
|
+
|
924
|
+
verbose = get_verbose(verbose)
|
925
|
+
path_info: S3PathInfo = split_s3_path(remote)
|
926
|
+
|
927
|
+
# path_info: S3PathInfo = split_s3_path(remote)
|
928
|
+
remote = path_info.remote
|
929
|
+
bucket_name = path_info.bucket
|
930
|
+
|
931
|
+
remote = path_info.remote
|
932
|
+
parsed: Parsed = self.config.parse()
|
933
|
+
sections: dict[str, Section] = parsed.sections
|
934
|
+
if remote not in sections:
|
935
|
+
raise ValueError(
|
936
|
+
f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
|
937
|
+
)
|
938
|
+
|
939
|
+
section: Section = sections[remote]
|
940
|
+
dst_type = section.type()
|
941
|
+
if dst_type != "s3" and dst_type != "b2":
|
942
|
+
raise ValueError(
|
943
|
+
f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
|
944
|
+
)
|
945
|
+
|
946
|
+
def get_provider_str(section=section) -> str | None:
|
947
|
+
type: str = section.type()
|
948
|
+
provider: str | None = section.provider()
|
949
|
+
if provider is not None:
|
950
|
+
return provider
|
951
|
+
if type == "b2":
|
952
|
+
return S3Provider.BACKBLAZE.value
|
953
|
+
if type != "s3":
|
954
|
+
raise ValueError(f"Remote {remote} is not an S3 remote")
|
955
|
+
return S3Provider.S3.value
|
956
|
+
|
957
|
+
provider: str
|
958
|
+
if provided_provider_str := get_provider_str():
|
959
|
+
if verbose:
|
960
|
+
print(f"Using provided provider: {provided_provider_str}")
|
961
|
+
provider = provided_provider_str
|
962
|
+
else:
|
963
|
+
if verbose:
|
964
|
+
print(f"Using default provider: {S3Provider.S3.value}")
|
965
|
+
provider = S3Provider.S3.value
|
966
|
+
provider_enum = S3Provider.from_str(provider)
|
967
|
+
|
968
|
+
s3_creds: S3Credentials = S3Credentials(
|
969
|
+
bucket_name=bucket_name,
|
970
|
+
provider=provider_enum,
|
971
|
+
access_key_id=section.access_key_id(),
|
972
|
+
secret_access_key=section.secret_access_key(),
|
973
|
+
endpoint_url=section.endpoint(),
|
974
|
+
)
|
975
|
+
return s3_creds
|
976
|
+
|
977
|
+
def copy_bytes(
|
978
|
+
self,
|
979
|
+
src: str,
|
980
|
+
offset: int | SizeSuffix,
|
981
|
+
length: int | SizeSuffix,
|
982
|
+
outfile: Path,
|
983
|
+
other_args: list[str] | None = None,
|
984
|
+
) -> Exception | None:
|
985
|
+
"""Copy a slice of bytes from the src file to dst."""
|
986
|
+
offset = SizeSuffix(offset).as_int()
|
987
|
+
length = SizeSuffix(length).as_int()
|
988
|
+
cmd_list: list[str] = [
|
989
|
+
"cat",
|
990
|
+
"--offset",
|
991
|
+
str(offset),
|
992
|
+
"--count",
|
993
|
+
str(length),
|
994
|
+
src,
|
995
|
+
]
|
996
|
+
if other_args:
|
997
|
+
cmd_list.extend(other_args)
|
998
|
+
try:
|
999
|
+
cp = self._run(cmd_list, capture=outfile)
|
1000
|
+
if cp.returncode == 0:
|
1001
|
+
return None
|
1002
|
+
return Exception(cp.stderr)
|
1003
|
+
except subprocess.CalledProcessError as e:
|
1004
|
+
return e
|
1005
|
+
|
1006
|
+
def copy_dir(
|
1007
|
+
self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
|
1008
|
+
) -> CompletedProcess:
|
1009
|
+
"""Copy a directory from source to destination."""
|
1010
|
+
# convert src to str, also dst
|
1011
|
+
src = convert_to_str(src)
|
1012
|
+
dst = convert_to_str(dst)
|
1013
|
+
cmd_list: list[str] = ["copy", src, dst, "--s3-no-check-bucket"]
|
1014
|
+
if args is not None:
|
1015
|
+
cmd_list += args
|
1016
|
+
cp = self._run(cmd_list)
|
1017
|
+
return CompletedProcess.from_subprocess(cp)
|
1018
|
+
|
1019
|
+
def copy_remote(
|
1020
|
+
self, src: Remote, dst: Remote, args: list[str] | None = None
|
1021
|
+
) -> CompletedProcess:
|
1022
|
+
"""Copy a remote to another remote."""
|
1023
|
+
cmd_list: list[str] = ["copy", str(src), str(dst), "--s3-no-check-bucket"]
|
1024
|
+
if args is not None:
|
1025
|
+
cmd_list += args
|
1026
|
+
# return self._run(cmd_list)
|
1027
|
+
cp = self._run(cmd_list)
|
1028
|
+
return CompletedProcess.from_subprocess(cp)
|
1029
|
+
|
1030
|
+
def mount(
|
1031
|
+
self,
|
1032
|
+
src: Remote | Dir | str,
|
1033
|
+
outdir: Path,
|
1034
|
+
allow_writes: bool | None = False,
|
1035
|
+
transfers: int | None = None,
|
1036
|
+
use_links: bool | None = None,
|
1037
|
+
vfs_cache_mode: str | None = None,
|
1038
|
+
verbose: bool | None = None,
|
1039
|
+
cache_dir: Path | None = None,
|
1040
|
+
cache_dir_delete_on_exit: bool | None = None,
|
1041
|
+
log: Path | None = None,
|
1042
|
+
other_args: list[str] | None = None,
|
1043
|
+
) -> Mount:
|
1044
|
+
"""Mount a remote or directory to a local path.
|
1045
|
+
|
1046
|
+
Args:
|
1047
|
+
src: Remote or directory to mount
|
1048
|
+
outdir: Local path to mount to
|
1049
|
+
|
1050
|
+
Returns:
|
1051
|
+
CompletedProcess from the mount command execution
|
1052
|
+
|
1053
|
+
Raises:
|
1054
|
+
subprocess.CalledProcessError: If the mount operation fails
|
1055
|
+
"""
|
1056
|
+
from rclone_api.mount_util import clean_mount, prepare_mount
|
1057
|
+
|
1058
|
+
allow_writes = allow_writes or False
|
1059
|
+
use_links = use_links or True
|
1060
|
+
verbose = get_verbose(verbose) or (log is not None)
|
1061
|
+
vfs_cache_mode = vfs_cache_mode or "full"
|
1062
|
+
clean_mount(outdir, verbose=verbose)
|
1063
|
+
prepare_mount(outdir, verbose=verbose)
|
1064
|
+
debug_fuse = log is not None
|
1065
|
+
src_str = convert_to_str(src)
|
1066
|
+
cmd_list: list[str] = ["mount", src_str, str(outdir)]
|
1067
|
+
if not allow_writes:
|
1068
|
+
cmd_list.append("--read-only")
|
1069
|
+
if use_links:
|
1070
|
+
cmd_list.append("--links")
|
1071
|
+
if vfs_cache_mode:
|
1072
|
+
cmd_list.append("--vfs-cache-mode")
|
1073
|
+
cmd_list.append(vfs_cache_mode)
|
1074
|
+
if cache_dir:
|
1075
|
+
cmd_list.append("--cache-dir")
|
1076
|
+
cmd_list.append(str(cache_dir.absolute()))
|
1077
|
+
if transfers is not None:
|
1078
|
+
cmd_list.append("--transfers")
|
1079
|
+
cmd_list.append(str(transfers))
|
1080
|
+
if debug_fuse:
|
1081
|
+
cmd_list.append("--debug-fuse")
|
1082
|
+
if verbose:
|
1083
|
+
cmd_list.append("-vvvv")
|
1084
|
+
if other_args:
|
1085
|
+
cmd_list += other_args
|
1086
|
+
proc = self._launch_process(cmd_list, log=log)
|
1087
|
+
mount_read_only = not allow_writes
|
1088
|
+
mount: Mount = Mount(
|
1089
|
+
src=src_str,
|
1090
|
+
mount_path=outdir,
|
1091
|
+
process=proc,
|
1092
|
+
read_only=mount_read_only,
|
1093
|
+
cache_dir=cache_dir,
|
1094
|
+
cache_dir_delete_on_exit=cache_dir_delete_on_exit,
|
1095
|
+
)
|
1096
|
+
return mount
|
1097
|
+
|
1098
|
+
# Settings optimized for s3.
|
1099
|
+
def mount_s3(
|
1100
|
+
self,
|
1101
|
+
url: str,
|
1102
|
+
outdir: Path,
|
1103
|
+
allow_writes=False,
|
1104
|
+
vfs_cache_mode="full",
|
1105
|
+
dir_cache_time: str | None = "1h",
|
1106
|
+
attribute_timeout: str | None = "1h",
|
1107
|
+
vfs_disk_space_total_size: str | None = "100M",
|
1108
|
+
transfers: int | None = 128,
|
1109
|
+
modtime_strategy: (
|
1110
|
+
ModTimeStrategy | None
|
1111
|
+
) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
|
1112
|
+
vfs_read_chunk_streams: int | None = 16,
|
1113
|
+
vfs_read_chunk_size: str | None = "4M",
|
1114
|
+
vfs_fast_fingerprint: bool = True,
|
1115
|
+
# vfs-refresh
|
1116
|
+
vfs_refresh: bool = True,
|
1117
|
+
other_args: list[str] | None = None,
|
1118
|
+
) -> Mount:
|
1119
|
+
"""Mount a remote or directory to a local path.
|
1120
|
+
|
1121
|
+
Args:
|
1122
|
+
src: Remote or directory to mount
|
1123
|
+
outdir: Local path to mount to
|
1124
|
+
"""
|
1125
|
+
other_args = other_args or []
|
1126
|
+
if modtime_strategy is not None:
|
1127
|
+
other_args.append(f"--{modtime_strategy.value}")
|
1128
|
+
if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
|
1129
|
+
transfers is not None and "--transfers" not in other_args
|
1130
|
+
):
|
1131
|
+
other_args.append("--transfers")
|
1132
|
+
other_args.append(str(transfers))
|
1133
|
+
if dir_cache_time is not None and "--dir-cache-time" not in other_args:
|
1134
|
+
other_args.append("--dir-cache-time")
|
1135
|
+
other_args.append(dir_cache_time)
|
1136
|
+
if (
|
1137
|
+
vfs_disk_space_total_size is not None
|
1138
|
+
and "--vfs-cache-max-size" not in other_args
|
1139
|
+
):
|
1140
|
+
other_args.append("--vfs-cache-max-size")
|
1141
|
+
other_args.append(vfs_disk_space_total_size)
|
1142
|
+
if vfs_refresh and "--vfs-refresh" not in other_args:
|
1143
|
+
other_args.append("--vfs-refresh")
|
1144
|
+
if attribute_timeout is not None and "--attr-timeout" not in other_args:
|
1145
|
+
other_args.append("--attr-timeout")
|
1146
|
+
other_args.append(attribute_timeout)
|
1147
|
+
if vfs_read_chunk_streams:
|
1148
|
+
other_args.append("--vfs-read-chunk-streams")
|
1149
|
+
other_args.append(str(vfs_read_chunk_streams))
|
1150
|
+
if vfs_read_chunk_size:
|
1151
|
+
other_args.append("--vfs-read-chunk-size")
|
1152
|
+
other_args.append(vfs_read_chunk_size)
|
1153
|
+
if vfs_fast_fingerprint:
|
1154
|
+
other_args.append("--vfs-fast-fingerprint")
|
1155
|
+
|
1156
|
+
other_args = other_args if other_args else None
|
1157
|
+
return self.mount(
|
1158
|
+
url,
|
1159
|
+
outdir,
|
1160
|
+
allow_writes=allow_writes,
|
1161
|
+
vfs_cache_mode=vfs_cache_mode,
|
1162
|
+
other_args=other_args,
|
1163
|
+
)
|
1164
|
+
|
1165
|
+
def serve_webdav(
|
1166
|
+
self,
|
1167
|
+
src: Remote | Dir | str,
|
1168
|
+
user: str,
|
1169
|
+
password: str,
|
1170
|
+
addr: str = "localhost:2049",
|
1171
|
+
allow_other: bool = False,
|
1172
|
+
other_args: list[str] | None = None,
|
1173
|
+
) -> Process:
|
1174
|
+
"""Serve a remote or directory via NFS.
|
1175
|
+
|
1176
|
+
Args:
|
1177
|
+
src: Remote or directory to serve
|
1178
|
+
addr: Network address and port to serve on (default: localhost:2049)
|
1179
|
+
allow_other: Allow other users to access the share
|
1180
|
+
|
1181
|
+
Returns:
|
1182
|
+
Process: The running webdev server process
|
1183
|
+
|
1184
|
+
Raises:
|
1185
|
+
ValueError: If the NFS server fails to start
|
1186
|
+
"""
|
1187
|
+
src_str = convert_to_str(src)
|
1188
|
+
cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
|
1189
|
+
cmd_list.extend(["--user", user, "--pass", password])
|
1190
|
+
if allow_other:
|
1191
|
+
cmd_list.append("--allow-other")
|
1192
|
+
if other_args:
|
1193
|
+
cmd_list += other_args
|
1194
|
+
proc = self._launch_process(cmd_list)
|
1195
|
+
time.sleep(2) # give it a moment to start
|
1196
|
+
if proc.poll() is not None:
|
1197
|
+
raise ValueError("NFS serve process failed to start")
|
1198
|
+
return proc
|
1199
|
+
|
1200
|
+
def serve_http(
|
1201
|
+
self,
|
1202
|
+
src: str,
|
1203
|
+
cache_mode: str | None,
|
1204
|
+
addr: str | None = None,
|
1205
|
+
serve_http_log: Path | None = None,
|
1206
|
+
other_args: list[str] | None = None,
|
1207
|
+
) -> HttpServer:
|
1208
|
+
"""Serve a remote or directory via HTTP.
|
1209
|
+
|
1210
|
+
Args:
|
1211
|
+
src: Remote or directory to serve
|
1212
|
+
addr: Network address and port to serve on (default: localhost:8080)
|
1213
|
+
"""
|
1214
|
+
addr = addr or f"localhost:{find_free_port()}"
|
1215
|
+
_, subpath = src.split(":", 1) # might not work on local paths.
|
1216
|
+
cmd_list: list[str] = [
|
1217
|
+
"serve",
|
1218
|
+
"http",
|
1219
|
+
"--addr",
|
1220
|
+
addr,
|
1221
|
+
src,
|
1222
|
+
"--vfs-disk-space-total-size",
|
1223
|
+
"0",
|
1224
|
+
"--vfs-read-chunk-size-limit",
|
1225
|
+
"512M",
|
1226
|
+
]
|
1227
|
+
|
1228
|
+
if cache_mode:
|
1229
|
+
cmd_list += [
|
1230
|
+
"--vfs-cache-mode",
|
1231
|
+
cache_mode,
|
1232
|
+
]
|
1233
|
+
if serve_http_log:
|
1234
|
+
cmd_list += ["--log-file", str(serve_http_log)]
|
1235
|
+
cmd_list += ["-vvvv"]
|
1236
|
+
if other_args:
|
1237
|
+
cmd_list += other_args
|
1238
|
+
proc = self._launch_process(cmd_list, log=serve_http_log)
|
1239
|
+
time.sleep(2)
|
1240
|
+
if proc.poll() is not None:
|
1241
|
+
raise ValueError("HTTP serve process failed to start")
|
1242
|
+
out: HttpServer = HttpServer(
|
1243
|
+
url=f"http://{addr}", subpath=subpath, process=proc
|
1244
|
+
)
|
1245
|
+
return out
|
1246
|
+
|
1247
|
+
def config_paths(
|
1248
|
+
self, remote: str | None = None, obscure: bool = False, no_obscure: bool = False
|
1249
|
+
) -> list[Path] | Exception:
|
1250
|
+
"""Show the current configuration.
|
1251
|
+
|
1252
|
+
Args:
|
1253
|
+
remote: Optional remote name to show configuration for
|
1254
|
+
obscure: Show obscured passwords
|
1255
|
+
no_obscure: Show passwords in plain text
|
1256
|
+
|
1257
|
+
Returns:
|
1258
|
+
Configuration as text or an Exception if an error occurred
|
1259
|
+
"""
|
1260
|
+
cmd_list: list[str] = ["config", "show"]
|
1261
|
+
|
1262
|
+
if remote is not None:
|
1263
|
+
cmd_list.append(remote)
|
1264
|
+
|
1265
|
+
if obscure:
|
1266
|
+
cmd_list.append("--obscure")
|
1267
|
+
|
1268
|
+
if no_obscure:
|
1269
|
+
cmd_list.append("--no-obscure")
|
1270
|
+
|
1271
|
+
try:
|
1272
|
+
cp = self._run(cmd_list, capture=True, check=True)
|
1273
|
+
stdout: str | bytes = cp.stdout
|
1274
|
+
if isinstance(stdout, bytes):
|
1275
|
+
stdout = stdout.decode("utf-8")
|
1276
|
+
out = _parse_paths(stdout)
|
1277
|
+
return out
|
1278
|
+
except subprocess.CalledProcessError as e:
|
1279
|
+
return e
|
1280
|
+
|
1281
|
+
def size_files(
|
1282
|
+
self,
|
1283
|
+
src: str,
|
1284
|
+
files: list[str],
|
1285
|
+
fast_list: bool = False, # Recommend that this is False
|
1286
|
+
other_args: list[str] | None = None,
|
1287
|
+
check: bool | None = False,
|
1288
|
+
verbose: bool | None = None,
|
1289
|
+
) -> SizeResult | Exception:
|
1290
|
+
"""Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
|
1291
|
+
verbose = get_verbose(verbose)
|
1292
|
+
check = get_check(check)
|
1293
|
+
if len(files) < 2:
|
1294
|
+
tmp = self.size_file(files[0])
|
1295
|
+
if isinstance(tmp, Exception):
|
1296
|
+
return tmp
|
1297
|
+
assert isinstance(tmp, SizeSuffix)
|
1298
|
+
return SizeResult(
|
1299
|
+
prefix=src, total_size=tmp.as_int(), file_sizes={files[0]: tmp.as_int()}
|
1300
|
+
)
|
1301
|
+
if fast_list or (other_args and "--fast-list" in other_args):
|
1302
|
+
warnings.warn(
|
1303
|
+
"It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
|
1304
|
+
)
|
1305
|
+
files = list(files)
|
1306
|
+
all_files: list[File] = []
|
1307
|
+
# prefix, files = group_under_one_prefix(src, files)
|
1308
|
+
cmd = ["lsjson", src, "--files-only", "-R"]
|
1309
|
+
with TemporaryDirectory() as tmpdir:
|
1310
|
+
# print("files: " + ",".join(files))
|
1311
|
+
include_files_txt = Path(tmpdir) / "include_files.txt"
|
1312
|
+
include_files_txt.write_text("\n".join(files), encoding="utf-8")
|
1313
|
+
cmd += ["--files-from", str(include_files_txt)]
|
1314
|
+
if fast_list:
|
1315
|
+
cmd.append("--fast-list")
|
1316
|
+
if other_args:
|
1317
|
+
cmd += other_args
|
1318
|
+
cp = self._run(cmd, check=check)
|
1319
|
+
|
1320
|
+
if cp.returncode != 0:
|
1321
|
+
if check:
|
1322
|
+
raise ValueError(f"Error getting file sizes: {cp.stderr}")
|
1323
|
+
else:
|
1324
|
+
warnings.warn(f"Error getting file sizes: {cp.stderr}")
|
1325
|
+
stdout = cp.stdout
|
1326
|
+
pieces = src.split(":", 1)
|
1327
|
+
remote_name = pieces[0]
|
1328
|
+
parent_path: str | None
|
1329
|
+
if len(pieces) > 1:
|
1330
|
+
parent_path = pieces[1]
|
1331
|
+
else:
|
1332
|
+
parent_path = None
|
1333
|
+
remote = Remote(name=remote_name, rclone=self)
|
1334
|
+
paths: list[RPath] = RPath.from_json_str(
|
1335
|
+
stdout, remote, parent_path=parent_path
|
1336
|
+
)
|
1337
|
+
# print(paths)
|
1338
|
+
all_files += [File(p) for p in paths]
|
1339
|
+
file_sizes: dict[str, int] = {}
|
1340
|
+
f: File
|
1341
|
+
for f in all_files:
|
1342
|
+
p = f.to_string(include_remote=True)
|
1343
|
+
if p in file_sizes:
|
1344
|
+
warnings.warn(f"Duplicate file found: {p}")
|
1345
|
+
continue
|
1346
|
+
size = f.size
|
1347
|
+
if size == 0:
|
1348
|
+
warnings.warn(f"File size is 0: {p}")
|
1349
|
+
file_sizes[p] = f.size
|
1350
|
+
total_size = sum(file_sizes.values())
|
1351
|
+
file_sizes_path_corrected: dict[str, int] = {}
|
1352
|
+
for path, size in file_sizes.items():
|
1353
|
+
# remove the prefix
|
1354
|
+
path_path = Path(path)
|
1355
|
+
path_str = path_path.relative_to(src).as_posix()
|
1356
|
+
file_sizes_path_corrected[path_str] = size
|
1357
|
+
out: SizeResult = SizeResult(
|
1358
|
+
prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
|
1359
|
+
)
|
1360
|
+
return out
|