rclone-api 1.3.5__py2.py3-none-any.whl → 1.3.7__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rclone_api/rclone.py CHANGED
@@ -1,1276 +1,1343 @@
1
- """
2
- Unit test file.
3
- """
4
-
5
- import os
6
- import random
7
- import shutil
8
- import subprocess
9
- import time
10
- import traceback
11
- import warnings
12
- from concurrent.futures import Future, ThreadPoolExecutor
13
- from contextlib import contextmanager
14
- from fnmatch import fnmatch
15
- from pathlib import Path
16
- from tempfile import TemporaryDirectory
17
- from typing import Generator
18
-
19
- from rclone_api import Dir
20
- from rclone_api.completed_process import CompletedProcess
21
- from rclone_api.config import Config, Parsed, Section
22
- from rclone_api.convert import convert_to_filestr_list, convert_to_str
23
- from rclone_api.deprecated import deprecated
24
- from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
- from rclone_api.dir_listing import DirListing
26
- from rclone_api.exec import RcloneExec
27
- from rclone_api.file import File
28
- from rclone_api.group_files import group_files
29
- from rclone_api.mount import Mount, clean_mount, prepare_mount
30
- from rclone_api.mount_read_chunker import MultiMountFileChunker
31
- from rclone_api.process import Process
32
- from rclone_api.remote import Remote
33
- from rclone_api.rpath import RPath
34
- from rclone_api.s3.types import (
35
- MultiUploadResult,
36
- S3MutliPartUploadConfig,
37
- S3Provider,
38
- S3UploadTarget,
39
- )
40
- from rclone_api.types import (
41
- ListingOption,
42
- ModTimeStrategy,
43
- Order,
44
- SizeResult,
45
- SizeSuffix,
46
- )
47
- from rclone_api.util import (
48
- get_check,
49
- get_rclone_exe,
50
- get_verbose,
51
- to_path,
52
- )
53
- from rclone_api.walk import walk
54
-
55
-
56
- def rclone_verbose(verbose: bool | None) -> bool:
57
- if verbose is not None:
58
- os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
59
- return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
60
-
61
-
62
- def _to_rclone_conf(config: Config | Path) -> Config:
63
- if isinstance(config, Path):
64
- content = config.read_text(encoding="utf-8")
65
- return Config(content)
66
- else:
67
- return config
68
-
69
-
70
- class Rclone:
71
- def __init__(
72
- self, rclone_conf: Path | Config, rclone_exe: Path | None = None
73
- ) -> None:
74
- if isinstance(rclone_conf, Path):
75
- if not rclone_conf.exists():
76
- raise ValueError(f"Rclone config file not found: {rclone_conf}")
77
- self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
78
- self.config: Config = _to_rclone_conf(rclone_conf)
79
-
80
- def _run(
81
- self, cmd: list[str], check: bool = False, capture: bool | None = None
82
- ) -> subprocess.CompletedProcess:
83
- return self._exec.execute(cmd, check=check, capture=capture)
84
-
85
- def _launch_process(
86
- self, cmd: list[str], capture: bool | None = None, log: Path | None = None
87
- ) -> Process:
88
- return self._exec.launch_process(cmd, capture=capture, log=log)
89
-
90
- def webgui(self, other_args: list[str] | None = None) -> Process:
91
- """Launch the Rclone web GUI."""
92
- cmd = ["rcd", "--rc-web-gui"]
93
- if other_args:
94
- cmd += other_args
95
- return self._launch_process(cmd, capture=False)
96
-
97
- def launch_server(
98
- self,
99
- addr: str,
100
- user: str | None = None,
101
- password: str | None = None,
102
- other_args: list[str] | None = None,
103
- ) -> Process:
104
- """Launch the Rclone server so it can receive commands"""
105
- cmd = ["rcd"]
106
- if addr is not None:
107
- cmd += ["--rc-addr", addr]
108
- if user is not None:
109
- cmd += ["--rc-user", user]
110
- if password is not None:
111
- cmd += ["--rc-pass", password]
112
- if other_args:
113
- cmd += other_args
114
- out = self._launch_process(cmd, capture=False)
115
- time.sleep(1) # Give it some time to launch
116
- return out
117
-
118
- def remote_control(
119
- self,
120
- addr: str,
121
- user: str | None = None,
122
- password: str | None = None,
123
- capture: bool | None = None,
124
- other_args: list[str] | None = None,
125
- ) -> CompletedProcess:
126
- cmd = ["rc"]
127
- if addr:
128
- cmd += ["--rc-addr", addr]
129
- if user is not None:
130
- cmd += ["--rc-user", user]
131
- if password is not None:
132
- cmd += ["--rc-pass", password]
133
- if other_args:
134
- cmd += other_args
135
- cp = self._run(cmd, capture=capture)
136
- return CompletedProcess.from_subprocess(cp)
137
-
138
- def obscure(self, password: str) -> str:
139
- """Obscure a password for use in rclone config files."""
140
- cmd_list: list[str] = ["obscure", password]
141
- cp = self._run(cmd_list)
142
- return cp.stdout.strip()
143
-
144
- def ls(
145
- self,
146
- path: Dir | Remote | str,
147
- max_depth: int | None = None,
148
- glob: str | None = None,
149
- order: Order = Order.NORMAL,
150
- listing_option: ListingOption = ListingOption.ALL,
151
- ) -> DirListing:
152
- """List files in the given path.
153
-
154
- Args:
155
- path: Remote path or Remote object to list
156
- max_depth: Maximum recursion depth (0 means no recursion)
157
-
158
- Returns:
159
- List of File objects found at the path
160
- """
161
-
162
- if isinstance(path, str):
163
- path = Dir(
164
- to_path(path, self)
165
- ) # assume it's a directory if ls is being called.
166
-
167
- cmd = ["lsjson"]
168
- if max_depth is not None:
169
- if max_depth < 0:
170
- cmd.append("--recursive")
171
- if max_depth > 0:
172
- cmd.append("--max-depth")
173
- cmd.append(str(max_depth))
174
- if listing_option != ListingOption.ALL:
175
- cmd.append(f"--{listing_option.value}")
176
-
177
- cmd.append(str(path))
178
- remote = path.remote if isinstance(path, Dir) else path
179
- assert isinstance(remote, Remote)
180
-
181
- cp = self._run(cmd, check=True)
182
- text = cp.stdout
183
- parent_path: str | None = None
184
- if isinstance(path, Dir):
185
- parent_path = path.path.path
186
- paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
187
- # print(parent_path)
188
- for o in paths:
189
- o.set_rclone(self)
190
-
191
- # do we have a glob pattern?
192
- if glob is not None:
193
- paths = [p for p in paths if fnmatch(p.path, glob)]
194
-
195
- if order == Order.REVERSE:
196
- paths.reverse()
197
- elif order == Order.RANDOM:
198
- random.shuffle(paths)
199
- return DirListing(paths)
200
-
201
- def listremotes(self) -> list[Remote]:
202
- cmd = ["listremotes"]
203
- cp = self._run(cmd)
204
- text: str = cp.stdout
205
- tmp = text.splitlines()
206
- tmp = [t.strip() for t in tmp]
207
- # strip out ":" from the end
208
- tmp = [t.replace(":", "") for t in tmp]
209
- out = [Remote(name=t, rclone=self) for t in tmp]
210
- return out
211
-
212
- def diff(
213
- self,
214
- src: str,
215
- dst: str,
216
- min_size: (
217
- str | None
218
- ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
219
- max_size: (
220
- str | None
221
- ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
222
- diff_option: DiffOption = DiffOption.COMBINED,
223
- fast_list: bool = True,
224
- size_only: bool | None = None,
225
- checkers: int | None = None,
226
- other_args: list[str] | None = None,
227
- ) -> Generator[DiffItem, None, None]:
228
- """Be extra careful with the src and dst values. If you are off by one
229
- parent directory, you will get a huge amount of false diffs."""
230
- other_args = other_args or []
231
- if checkers is None or checkers < 1:
232
- checkers = 1000
233
- cmd = [
234
- "check",
235
- src,
236
- dst,
237
- "--checkers",
238
- str(checkers),
239
- "--log-level",
240
- "INFO",
241
- f"--{diff_option.value}",
242
- "-",
243
- ]
244
- if size_only is None:
245
- size_only = diff_option in [
246
- DiffOption.MISSING_ON_DST,
247
- DiffOption.MISSING_ON_SRC,
248
- ]
249
- if size_only:
250
- cmd += ["--size-only"]
251
- if fast_list:
252
- cmd += ["--fast-list"]
253
- if min_size:
254
- cmd += ["--min-size", min_size]
255
- if max_size:
256
- cmd += ["--max-size", max_size]
257
- if diff_option == DiffOption.MISSING_ON_DST:
258
- cmd += ["--one-way"]
259
- if other_args:
260
- cmd += other_args
261
- proc = self._launch_process(cmd, capture=True)
262
- item: DiffItem
263
- for item in diff_stream_from_running_process(
264
- running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
265
- ):
266
- if item is None:
267
- break
268
- yield item
269
-
270
- def walk(
271
- self,
272
- path: Dir | Remote | str,
273
- max_depth: int = -1,
274
- breadth_first: bool = True,
275
- order: Order = Order.NORMAL,
276
- ) -> Generator[DirListing, None, None]:
277
- """Walk through the given path recursively.
278
-
279
- Args:
280
- path: Remote path or Remote object to walk through
281
- max_depth: Maximum depth to traverse (-1 for unlimited)
282
-
283
- Yields:
284
- DirListing: Directory listing for each directory encountered
285
- """
286
- dir_obj: Dir
287
- if isinstance(path, Dir):
288
- # Create a Remote object for the path
289
- remote = path.remote
290
- rpath = RPath(
291
- remote=remote,
292
- path=path.path.path,
293
- name=path.path.name,
294
- size=0,
295
- mime_type="inode/directory",
296
- mod_time="",
297
- is_dir=True,
298
- )
299
- rpath.set_rclone(self)
300
- dir_obj = Dir(rpath)
301
- elif isinstance(path, str):
302
- dir_obj = Dir(to_path(path, self))
303
- elif isinstance(path, Remote):
304
- dir_obj = Dir(path)
305
- else:
306
- dir_obj = Dir(path) # shut up pyright
307
- assert f"Invalid type for path: {type(path)}"
308
-
309
- yield from walk(
310
- dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
311
- )
312
-
313
- def scan_missing_folders(
314
- self,
315
- src: Dir | Remote | str,
316
- dst: Dir | Remote | str,
317
- max_depth: int = -1,
318
- order: Order = Order.NORMAL,
319
- ) -> Generator[Dir, None, None]:
320
- """Walk through the given path recursively.
321
-
322
- WORK IN PROGRESS!!
323
-
324
- Args:
325
- src: Source directory or Remote to walk through
326
- dst: Destination directory or Remote to walk through
327
- max_depth: Maximum depth to traverse (-1 for unlimited)
328
-
329
- Yields:
330
- DirListing: Directory listing for each directory encountered
331
- """
332
- from rclone_api.scan_missing_folders import scan_missing_folders
333
-
334
- src_dir = Dir(to_path(src, self))
335
- dst_dir = Dir(to_path(dst, self))
336
- yield from scan_missing_folders(
337
- src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
338
- )
339
-
340
- def cleanup(
341
- self, path: str, other_args: list[str] | None = None
342
- ) -> CompletedProcess:
343
- """Cleanup any resources used by the Rclone instance."""
344
- # rclone cleanup remote:path [flags]
345
- cmd = ["cleanup", path]
346
- if other_args:
347
- cmd += other_args
348
- out = self._run(cmd)
349
- return CompletedProcess.from_subprocess(out)
350
-
351
- def copy_to(
352
- self,
353
- src: File | str,
354
- dst: File | str,
355
- check: bool | None = None,
356
- verbose: bool | None = None,
357
- other_args: list[str] | None = None,
358
- ) -> CompletedProcess:
359
- """Copy one file from source to destination.
360
-
361
- Warning - slow.
362
-
363
- """
364
- check = get_check(check)
365
- verbose = get_verbose(verbose)
366
- src = src if isinstance(src, str) else str(src.path)
367
- dst = dst if isinstance(dst, str) else str(dst.path)
368
- cmd_list: list[str] = ["copyto", src, dst]
369
- if other_args is not None:
370
- cmd_list += other_args
371
- cp = self._run(cmd_list, check=check)
372
- return CompletedProcess.from_subprocess(cp)
373
-
374
- def copy_files(
375
- self,
376
- src: str,
377
- dst: str,
378
- files: list[str] | Path,
379
- check: bool | None = None,
380
- max_backlog: int | None = None,
381
- verbose: bool | None = None,
382
- checkers: int | None = None,
383
- transfers: int | None = None,
384
- low_level_retries: int | None = None,
385
- retries: int | None = None,
386
- retries_sleep: str | None = None,
387
- metadata: bool | None = None,
388
- timeout: str | None = None,
389
- max_partition_workers: int | None = None,
390
- multi_thread_streams: int | None = None,
391
- other_args: list[str] | None = None,
392
- ) -> list[CompletedProcess]:
393
- """Copy multiple files from source to destination.
394
-
395
- Args:
396
- payload: Dictionary of source and destination file paths
397
- """
398
- check = get_check(check)
399
- max_partition_workers = max_partition_workers or 1
400
- low_level_retries = low_level_retries or 10
401
- retries = retries or 3
402
- other_args = other_args or []
403
- checkers = checkers or 1000
404
- transfers = transfers or 32
405
- verbose = get_verbose(verbose)
406
- payload: list[str] = (
407
- files
408
- if isinstance(files, list)
409
- else [f.strip() for f in files.read_text().splitlines() if f.strip()]
410
- )
411
- if len(payload) == 0:
412
- return []
413
-
414
- for p in payload:
415
- if ":" in p:
416
- raise ValueError(
417
- f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
418
- )
419
-
420
- using_fast_list = "--fast-list" in other_args
421
- if using_fast_list:
422
- warnings.warn(
423
- "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
424
- )
425
-
426
- if max_partition_workers > 1:
427
- datalists: dict[str, list[str]] = group_files(
428
- payload, fully_qualified=False
429
- )
430
- else:
431
- datalists = {"": payload}
432
- # out: subprocess.CompletedProcess | None = None
433
- out: list[CompletedProcess] = []
434
-
435
- futures: list[Future] = []
436
-
437
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
438
- for common_prefix, files in datalists.items():
439
-
440
- def _task(
441
- files: list[str] | Path = files,
442
- ) -> subprocess.CompletedProcess:
443
- with TemporaryDirectory() as tmpdir:
444
- filelist: list[str] = []
445
- filepath: Path
446
- if isinstance(files, list):
447
- include_files_txt = Path(tmpdir) / "include_files.txt"
448
- include_files_txt.write_text(
449
- "\n".join(files), encoding="utf-8"
450
- )
451
- filelist = list(files)
452
- filepath = Path(include_files_txt)
453
- elif isinstance(files, Path):
454
- filelist = [
455
- f.strip()
456
- for f in files.read_text().splitlines()
457
- if f.strip()
458
- ]
459
- filepath = files
460
- if common_prefix:
461
- src_path = f"{src}/{common_prefix}"
462
- dst_path = f"{dst}/{common_prefix}"
463
- else:
464
- src_path = src
465
- dst_path = dst
466
-
467
- if verbose:
468
- nfiles = len(filelist)
469
- files_fqdn = [f" {src_path}/{f}" for f in filelist]
470
- print(f"Copying {nfiles} files:")
471
- chunk_size = 100
472
- for i in range(0, nfiles, chunk_size):
473
- chunk = files_fqdn[i : i + chunk_size]
474
- files_str = "\n".join(chunk)
475
- print(f"{files_str}")
476
- cmd_list: list[str] = [
477
- "copy",
478
- src_path,
479
- dst_path,
480
- "--files-from",
481
- str(filepath),
482
- "--checkers",
483
- str(checkers),
484
- "--transfers",
485
- str(transfers),
486
- "--low-level-retries",
487
- str(low_level_retries),
488
- "--retries",
489
- str(retries),
490
- ]
491
- if metadata:
492
- cmd_list.append("--metadata")
493
- if retries_sleep is not None:
494
- cmd_list += ["--retries-sleep", retries_sleep]
495
- if timeout is not None:
496
- cmd_list += ["--timeout", timeout]
497
- if max_backlog is not None:
498
- cmd_list += ["--max-backlog", str(max_backlog)]
499
- if multi_thread_streams is not None:
500
- cmd_list += [
501
- "--multi-thread-streams",
502
- str(multi_thread_streams),
503
- ]
504
- if verbose:
505
- if not any(["-v" in x for x in other_args]):
506
- cmd_list.append("-vvvv")
507
- if not any(["--progress" in x for x in other_args]):
508
- cmd_list.append("--progress")
509
- if other_args:
510
- cmd_list += other_args
511
- out = self._run(cmd_list, capture=not verbose)
512
- return out
513
-
514
- fut: Future = executor.submit(_task)
515
- futures.append(fut)
516
- for fut in futures:
517
- cp: subprocess.CompletedProcess = fut.result()
518
- assert cp is not None
519
- out.append(CompletedProcess.from_subprocess(cp))
520
- if cp.returncode != 0:
521
- if check:
522
- raise ValueError(f"Error deleting files: {cp.stderr}")
523
- else:
524
- warnings.warn(f"Error deleting files: {cp.stderr}")
525
- return out
526
-
527
- def copy(
528
- self,
529
- src: Dir | str,
530
- dst: Dir | str,
531
- check: bool | None = None,
532
- transfers: int | None = None,
533
- checkers: int | None = None,
534
- multi_thread_streams: int | None = None,
535
- low_level_retries: int | None = None,
536
- retries: int | None = None,
537
- other_args: list[str] | None = None,
538
- ) -> CompletedProcess:
539
- """Copy files from source to destination.
540
-
541
- Args:
542
- src: Source directory
543
- dst: Destination directory
544
- """
545
- # src_dir = src.path.path
546
- # dst_dir = dst.path.path
547
- src_dir = convert_to_str(src)
548
- dst_dir = convert_to_str(dst)
549
- check = get_check(check)
550
- checkers = checkers or 1000
551
- transfers = transfers or 32
552
- low_level_retries = low_level_retries or 10
553
- retries = retries or 3
554
- cmd_list: list[str] = ["copy", src_dir, dst_dir]
555
- cmd_list += ["--checkers", str(checkers)]
556
- cmd_list += ["--transfers", str(transfers)]
557
- cmd_list += ["--low-level-retries", str(low_level_retries)]
558
- if multi_thread_streams is not None:
559
- cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
560
- if other_args:
561
- cmd_list += other_args
562
- cp = self._run(cmd_list, check=check, capture=False)
563
- return CompletedProcess.from_subprocess(cp)
564
-
565
- def purge(self, path: Dir | str) -> CompletedProcess:
566
- """Purge a directory"""
567
- # path should always be a string
568
- path = path if isinstance(path, str) else str(path.path)
569
- cmd_list: list[str] = ["purge", str(path)]
570
- cp = self._run(cmd_list)
571
- return CompletedProcess.from_subprocess(cp)
572
-
573
- def delete_files(
574
- self,
575
- files: str | File | list[str] | list[File],
576
- check: bool | None = None,
577
- rmdirs=False,
578
- verbose: bool | None = None,
579
- max_partition_workers: int | None = None,
580
- other_args: list[str] | None = None,
581
- ) -> CompletedProcess:
582
- """Delete a directory"""
583
- check = get_check(check)
584
- verbose = get_verbose(verbose)
585
- payload: list[str] = convert_to_filestr_list(files)
586
- if len(payload) == 0:
587
- if verbose:
588
- print("No files to delete")
589
- cp = subprocess.CompletedProcess(
590
- args=["rclone", "delete", "--files-from", "[]"],
591
- returncode=0,
592
- stdout="",
593
- stderr="",
594
- )
595
- return CompletedProcess.from_subprocess(cp)
596
-
597
- datalists: dict[str, list[str]] = group_files(payload)
598
- completed_processes: list[subprocess.CompletedProcess] = []
599
-
600
- futures: list[Future] = []
601
-
602
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
603
-
604
- for remote, files in datalists.items():
605
-
606
- def _task(
607
- files=files, check=check, remote=remote
608
- ) -> subprocess.CompletedProcess:
609
- with TemporaryDirectory() as tmpdir:
610
- include_files_txt = Path(tmpdir) / "include_files.txt"
611
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
612
-
613
- # print(include_files_txt)
614
- cmd_list: list[str] = [
615
- "delete",
616
- remote,
617
- "--files-from",
618
- str(include_files_txt),
619
- "--checkers",
620
- "1000",
621
- "--transfers",
622
- "1000",
623
- ]
624
- if verbose:
625
- cmd_list.append("-vvvv")
626
- if rmdirs:
627
- cmd_list.append("--rmdirs")
628
- if other_args:
629
- cmd_list += other_args
630
- out = self._run(cmd_list, check=check)
631
- if out.returncode != 0:
632
- if check:
633
- completed_processes.append(out)
634
- raise ValueError(f"Error deleting files: {out}")
635
- else:
636
- warnings.warn(f"Error deleting files: {out}")
637
- return out
638
-
639
- fut: Future = executor.submit(_task)
640
- futures.append(fut)
641
-
642
- for fut in futures:
643
- out = fut.result()
644
- assert out is not None
645
- completed_processes.append(out)
646
-
647
- return CompletedProcess(completed_processes)
648
-
649
- @deprecated("delete_files")
650
- def deletefiles(
651
- self, files: str | File | list[str] | list[File]
652
- ) -> CompletedProcess:
653
- out = self.delete_files(files)
654
- return out
655
-
656
- def exists(self, path: Dir | Remote | str | File) -> bool:
657
- """Check if a file or directory exists."""
658
- arg: str = convert_to_str(path)
659
- assert isinstance(arg, str)
660
- try:
661
- dir_listing = self.ls(arg)
662
- # print(dir_listing)
663
- return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
664
- except subprocess.CalledProcessError:
665
- return False
666
-
667
- def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
668
- """Check if two directories are in sync."""
669
- src = convert_to_str(src)
670
- dst = convert_to_str(dst)
671
- cmd_list: list[str] = ["check", str(src), str(dst)]
672
- try:
673
- self._run(cmd_list, check=True)
674
- return True
675
- except subprocess.CalledProcessError:
676
- return False
677
-
678
- def copy_file_resumable_s3(
679
- self,
680
- src: str,
681
- dst: str,
682
- save_state_json: Path,
683
- chunk_size: SizeSuffix | None = None,
684
- read_threads: int = 8,
685
- write_threads: int = 8,
686
- retries: int = 3,
687
- verbose: bool | None = None,
688
- max_chunks_before_suspension: int | None = None,
689
- mount_log: Path | None = None,
690
- ) -> MultiUploadResult:
691
- """For massive files that rclone can't handle in one go, this function will copy the file in chunks to an S3 store"""
692
- from rclone_api.s3.api import S3Client
693
- from rclone_api.s3.create import S3Credentials
694
- from rclone_api.util import S3PathInfo, split_s3_path
695
-
696
- other_args: list[str] = ["--no-modtime", "--vfs-read-wait", "1s"]
697
- chunk_size = chunk_size or SizeSuffix("64M")
698
- unit_chunk_size = chunk_size / read_threads
699
- vfs_read_chunk_size = unit_chunk_size
700
- vfs_read_chunk_size_limit = chunk_size
701
- vfs_read_chunk_streams = read_threads
702
- vfs_disk_space_total_size = chunk_size
703
- assert (
704
- chunk_size.as_int() % vfs_read_chunk_size.as_int() == 0
705
- ), f"chunk_size {chunk_size} must be a multiple of vfs_read_chunk_size {vfs_read_chunk_size}"
706
- other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
707
- other_args += [
708
- "--vfs-read-chunk-size-limit",
709
- vfs_read_chunk_size_limit.as_str(),
710
- ]
711
- other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
712
- other_args += [
713
- "--vfs-disk-space-total-size",
714
- vfs_disk_space_total_size.as_str(),
715
- ]
716
- other_args += ["--read-only"]
717
- other_args += ["--direct-io"]
718
- # --vfs-cache-max-size
719
- other_args += ["--vfs-cache-max-size", vfs_disk_space_total_size.as_str()]
720
- mount_path = Path("tmp_mnts") / "RCLONE_API_DYNAMIC_MOUNT"
721
- src_path = Path(src)
722
- name = src_path.name
723
-
724
- src_parent_path = Path(src).parent.as_posix()
725
- size_result: SizeResult = self.size_files(src_parent_path, [name])
726
-
727
- target_size = SizeSuffix(size_result.total_size)
728
- if target_size < SizeSuffix("5M"):
729
- # fallback to normal copy
730
- completed_proc = self.copy_to(src, dst, check=True)
731
- if completed_proc.ok:
732
- return MultiUploadResult.UPLOADED_FRESH
733
- if size_result.total_size <= 0:
734
- raise ValueError(
735
- f"File {src} has size {size_result.total_size}, is this a directory?"
736
- )
737
-
738
- path_info: S3PathInfo = split_s3_path(dst)
739
- remote = path_info.remote
740
- bucket_name = path_info.bucket
741
- s3_key = path_info.key
742
- parsed: Parsed = self.config.parse()
743
- sections: dict[str, Section] = parsed.sections
744
- if remote not in sections:
745
- raise ValueError(
746
- f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
747
- )
748
-
749
- section: Section = sections[remote]
750
- dst_type = section.type()
751
- if dst_type != "s3" and dst_type != "b2":
752
- raise ValueError(
753
- f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
754
- )
755
-
756
- def get_provider_str(section=section) -> str | None:
757
- type: str = section.type()
758
- provider: str | None = section.provider()
759
- if provider is not None:
760
- return provider
761
- if type == "b2":
762
- return S3Provider.BACKBLAZE.value
763
- if type != "s3":
764
- raise ValueError(f"Remote {remote} is not an S3 remote")
765
- return S3Provider.S3.value
766
-
767
- provider: str
768
- if provided_provider_str := get_provider_str():
769
- if verbose:
770
- print(f"Using provided provider: {provided_provider_str}")
771
- provider = provided_provider_str
772
- else:
773
- if verbose:
774
- print(f"Using default provider: {S3Provider.S3.value}")
775
- provider = S3Provider.S3.value
776
- provider_enum = S3Provider.from_str(provider)
777
-
778
- s3_creds: S3Credentials = S3Credentials(
779
- provider=provider_enum,
780
- access_key_id=section.access_key_id(),
781
- secret_access_key=section.secret_access_key(),
782
- endpoint_url=section.endpoint(),
783
- )
784
-
785
- chunk_fetcher: MultiMountFileChunker = self.get_multi_mount_file_chunker(
786
- src=src_path.as_posix(),
787
- chunk_size=chunk_size,
788
- threads=read_threads,
789
- mount_log=mount_log,
790
- direct_io=True,
791
- )
792
-
793
- client = S3Client(s3_creds)
794
- upload_config: S3MutliPartUploadConfig = S3MutliPartUploadConfig(
795
- chunk_size=chunk_size.as_int(),
796
- chunk_fetcher=chunk_fetcher.fetch,
797
- max_write_threads=write_threads,
798
- retries=retries,
799
- resume_path_json=save_state_json,
800
- max_chunks_before_suspension=max_chunks_before_suspension,
801
- )
802
-
803
- src_file = mount_path / name
804
-
805
- print(f"Uploading {name} to {s3_key} in bucket {bucket_name}")
806
- print(f"Source: {src_path}")
807
- print(f"bucket_name: {bucket_name}")
808
- print(f"upload_config: {upload_config}")
809
-
810
- # get the file size
811
-
812
- upload_target = S3UploadTarget(
813
- src_file=src_file,
814
- src_file_size=size_result.total_size,
815
- bucket_name=bucket_name,
816
- s3_key=s3_key,
817
- )
818
-
819
- try:
820
- out: MultiUploadResult = client.upload_file_multipart(
821
- upload_target=upload_target,
822
- upload_config=upload_config,
823
- )
824
- return out
825
- except Exception as e:
826
- print(f"Error uploading file: {e}")
827
- traceback.print_exc()
828
- raise
829
- finally:
830
- chunk_fetcher.shutdown()
831
-
832
- def get_multi_mount_file_chunker(
833
- self,
834
- src: str,
835
- chunk_size: SizeSuffix,
836
- threads: int,
837
- mount_log: Path | None,
838
- direct_io: bool,
839
- ) -> MultiMountFileChunker:
840
- from rclone_api.util import random_str
841
-
842
- mounts: list[Mount] = []
843
- vfs_read_chunk_size = chunk_size
844
- vfs_read_chunk_size_limit = chunk_size
845
- vfs_read_chunk_streams = 0
846
- vfs_disk_space_total_size = chunk_size
847
- other_args: list[str] = []
848
- other_args += ["--no-modtime"]
849
- other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
850
- other_args += [
851
- "--vfs-read-chunk-size-limit",
852
- vfs_read_chunk_size_limit.as_str(),
853
- ]
854
- other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
855
- other_args += [
856
- "--vfs-disk-space-total-size",
857
- vfs_disk_space_total_size.as_str(),
858
- ]
859
- other_args += ["--read-only"]
860
- if direct_io:
861
- other_args += ["--direct-io"]
862
-
863
- base_mount_dir = Path("tmp_mnts")
864
- base_cache_dir = Path("cache")
865
-
866
- filename = Path(src).name
867
- with ThreadPoolExecutor(max_workers=threads) as executor:
868
- futures: list[Future] = []
869
- try:
870
- for i in range(threads):
871
- tmp_mnts = base_mount_dir / random_str(12)
872
- verbose = mount_log is not None
873
-
874
- src_parent_path = Path(src).parent.as_posix()
875
- cache_dir = base_cache_dir / random_str(12)
876
-
877
- def task(
878
- src_parent_path=src_parent_path,
879
- tmp_mnts=tmp_mnts,
880
- cache_dir=cache_dir,
881
- ):
882
- clean_mount(tmp_mnts, verbose=verbose)
883
- prepare_mount(tmp_mnts, verbose=verbose)
884
- return self.mount(
885
- src=src_parent_path,
886
- outdir=tmp_mnts,
887
- allow_writes=False,
888
- use_links=True,
889
- vfs_cache_mode="minimal",
890
- verbose=False,
891
- cache_dir=cache_dir,
892
- cache_dir_delete_on_exit=True,
893
- log=mount_log,
894
- other_args=other_args,
895
- )
896
-
897
- futures.append(executor.submit(task))
898
- mount_errors: list[Exception] = []
899
- for fut in futures:
900
- try:
901
- mount = fut.result()
902
- mounts.append(mount)
903
- except Exception as er:
904
- warnings.warn(f"Error mounting: {er}")
905
- mount_errors.append(er)
906
- if mount_errors:
907
- warnings.warn(f"Error mounting: {mount_errors}")
908
- raise Exception(mount_errors)
909
- except Exception:
910
- for mount in mounts:
911
- mount.close()
912
- raise
913
-
914
- src_path: Path = Path(src)
915
- src_parent_path = src_path.parent.as_posix()
916
- name = src_path.name
917
- size_result: SizeResult = self.size_files(src_parent_path, [name])
918
- filesize = size_result.total_size
919
-
920
- executor = ThreadPoolExecutor(max_workers=threads)
921
- filechunker: MultiMountFileChunker = MultiMountFileChunker(
922
- filename=filename,
923
- filesize=filesize,
924
- mounts=mounts,
925
- executor=executor,
926
- verbose=mount_log is not None,
927
- )
928
- return filechunker
929
-
930
- def copy_bytes_multimount(
931
- self,
932
- src: str,
933
- offset: int,
934
- length: int,
935
- chunk_size: SizeSuffix,
936
- max_threads: int = 1,
937
- # If outfile is supplied then bytes are written to this file and success returns bytes(0)
938
- outfile: Path | None = None,
939
- mount_log: Path | None = None,
940
- direct_io: bool = True,
941
- ) -> bytes | Exception:
942
- """Copy a slice of bytes from the src file to dst. Parallelism is achieved through multiple mounted files."""
943
- from rclone_api.types import FilePart
944
-
945
- # determine number of threads from chunk size
946
- threads = max(1, min(max_threads, length // chunk_size.as_int()))
947
- filechunker = self.get_multi_mount_file_chunker(
948
- src=src,
949
- chunk_size=chunk_size,
950
- threads=threads,
951
- mount_log=mount_log,
952
- direct_io=direct_io,
953
- )
954
- try:
955
- fut = filechunker.fetch(offset, length, extra=None)
956
- fp: FilePart = fut.result()
957
- payload = fp.payload
958
- if isinstance(payload, Exception):
959
- return payload
960
- try:
961
- if outfile is None:
962
- return payload.read_bytes()
963
- shutil.move(payload, outfile)
964
- return bytes(0)
965
- finally:
966
- fp.close()
967
-
968
- except Exception as e:
969
- warnings.warn(f"Error copying bytes: {e}")
970
- return e
971
- finally:
972
- try:
973
- filechunker.shutdown()
974
- except Exception as e:
975
- warnings.warn(f"Error closing filechunker: {e}")
976
-
977
- def copy_dir(
978
- self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
979
- ) -> CompletedProcess:
980
- """Copy a directory from source to destination."""
981
- # convert src to str, also dst
982
- src = convert_to_str(src)
983
- dst = convert_to_str(dst)
984
- cmd_list: list[str] = ["copy", src, dst]
985
- if args is not None:
986
- cmd_list += args
987
- cp = self._run(cmd_list)
988
- return CompletedProcess.from_subprocess(cp)
989
-
990
- def copy_remote(
991
- self, src: Remote, dst: Remote, args: list[str] | None = None
992
- ) -> CompletedProcess:
993
- """Copy a remote to another remote."""
994
- cmd_list: list[str] = ["copy", str(src), str(dst)]
995
- if args is not None:
996
- cmd_list += args
997
- # return self._run(cmd_list)
998
- cp = self._run(cmd_list)
999
- return CompletedProcess.from_subprocess(cp)
1000
-
1001
- def mount(
1002
- self,
1003
- src: Remote | Dir | str,
1004
- outdir: Path,
1005
- allow_writes: bool | None = False,
1006
- use_links: bool | None = None,
1007
- vfs_cache_mode: str | None = None,
1008
- verbose: bool | None = None,
1009
- cache_dir: Path | None = None,
1010
- cache_dir_delete_on_exit: bool | None = None,
1011
- log: Path | None = None,
1012
- other_args: list[str] | None = None,
1013
- ) -> Mount:
1014
- """Mount a remote or directory to a local path.
1015
-
1016
- Args:
1017
- src: Remote or directory to mount
1018
- outdir: Local path to mount to
1019
-
1020
- Returns:
1021
- CompletedProcess from the mount command execution
1022
-
1023
- Raises:
1024
- subprocess.CalledProcessError: If the mount operation fails
1025
- """
1026
-
1027
- allow_writes = allow_writes or False
1028
- use_links = use_links or True
1029
- verbose = get_verbose(verbose) or (log is not None)
1030
- vfs_cache_mode = vfs_cache_mode or "full"
1031
- clean_mount(outdir, verbose=verbose)
1032
- prepare_mount(outdir, verbose=verbose)
1033
- debug_fuse = log is not None
1034
- src_str = convert_to_str(src)
1035
- cmd_list: list[str] = ["mount", src_str, str(outdir)]
1036
- if not allow_writes:
1037
- cmd_list.append("--read-only")
1038
- if use_links:
1039
- cmd_list.append("--links")
1040
- if vfs_cache_mode:
1041
- cmd_list.append("--vfs-cache-mode")
1042
- cmd_list.append(vfs_cache_mode)
1043
- if cache_dir:
1044
- cmd_list.append("--cache-dir")
1045
- cmd_list.append(str(cache_dir.absolute()))
1046
- if debug_fuse:
1047
- cmd_list.append("--debug-fuse")
1048
- if verbose:
1049
- cmd_list.append("-vvvv")
1050
- if other_args:
1051
- cmd_list += other_args
1052
- proc = self._launch_process(cmd_list, log=log)
1053
- mount_read_only = not allow_writes
1054
- mount: Mount = Mount(
1055
- src=src_str,
1056
- mount_path=outdir,
1057
- process=proc,
1058
- read_only=mount_read_only,
1059
- cache_dir=cache_dir,
1060
- cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1061
- )
1062
- return mount
1063
-
1064
- @contextmanager
1065
- def scoped_mount(
1066
- self,
1067
- src: Remote | Dir | str,
1068
- outdir: Path,
1069
- allow_writes: bool | None = None,
1070
- use_links: bool | None = None,
1071
- vfs_cache_mode: str | None = None,
1072
- verbose: bool | None = None,
1073
- log: Path | None = None,
1074
- cache_dir: Path | None = None,
1075
- cache_dir_delete_on_exit: bool | None = None,
1076
- other_args: list[str] | None = None,
1077
- ) -> Generator[Mount, None, None]:
1078
- """Like mount, but can be used in a context manager."""
1079
- error_happened = False
1080
- mount: Mount = self.mount(
1081
- src,
1082
- outdir,
1083
- allow_writes=allow_writes,
1084
- use_links=use_links,
1085
- vfs_cache_mode=vfs_cache_mode,
1086
- verbose=verbose,
1087
- cache_dir=cache_dir,
1088
- cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1089
- log=log,
1090
- other_args=other_args,
1091
- )
1092
- try:
1093
- yield mount
1094
- except Exception as e:
1095
- error_happened = True
1096
- stack_trace = traceback.format_exc()
1097
- warnings.warn(f"Error in scoped_mount: {e}\n\nStack Trace:\n{stack_trace}")
1098
- raise
1099
- finally:
1100
- if not error_happened or (not allow_writes):
1101
- mount.close()
1102
-
1103
- # Settings optimized for s3.
1104
- def mount_s3(
1105
- self,
1106
- url: str,
1107
- outdir: Path,
1108
- allow_writes=False,
1109
- vfs_cache_mode="full",
1110
- dir_cache_time: str | None = "1h",
1111
- attribute_timeout: str | None = "1h",
1112
- vfs_disk_space_total_size: str | None = "100M",
1113
- transfers: int | None = 128,
1114
- modtime_strategy: (
1115
- ModTimeStrategy | None
1116
- ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1117
- vfs_read_chunk_streams: int | None = 16,
1118
- vfs_read_chunk_size: str | None = "4M",
1119
- vfs_fast_fingerprint: bool = True,
1120
- # vfs-refresh
1121
- vfs_refresh: bool = True,
1122
- other_args: list[str] | None = None,
1123
- ) -> Mount:
1124
- """Mount a remote or directory to a local path.
1125
-
1126
- Args:
1127
- src: Remote or directory to mount
1128
- outdir: Local path to mount to
1129
- """
1130
- other_args = other_args or []
1131
- if modtime_strategy is not None:
1132
- other_args.append(f"--{modtime_strategy.value}")
1133
- if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1134
- transfers is not None and "--transfers" not in other_args
1135
- ):
1136
- other_args.append("--transfers")
1137
- other_args.append(str(transfers))
1138
- if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1139
- other_args.append("--dir-cache-time")
1140
- other_args.append(dir_cache_time)
1141
- if (
1142
- vfs_disk_space_total_size is not None
1143
- and "--vfs-cache-max-size" not in other_args
1144
- ):
1145
- other_args.append("--vfs-cache-max-size")
1146
- other_args.append(vfs_disk_space_total_size)
1147
- if vfs_refresh and "--vfs-refresh" not in other_args:
1148
- other_args.append("--vfs-refresh")
1149
- if attribute_timeout is not None and "--attr-timeout" not in other_args:
1150
- other_args.append("--attr-timeout")
1151
- other_args.append(attribute_timeout)
1152
- if vfs_read_chunk_streams:
1153
- other_args.append("--vfs-read-chunk-streams")
1154
- other_args.append(str(vfs_read_chunk_streams))
1155
- if vfs_read_chunk_size:
1156
- other_args.append("--vfs-read-chunk-size")
1157
- other_args.append(vfs_read_chunk_size)
1158
- if vfs_fast_fingerprint:
1159
- other_args.append("--vfs-fast-fingerprint")
1160
-
1161
- other_args = other_args if other_args else None
1162
- return self.mount(
1163
- url,
1164
- outdir,
1165
- allow_writes=allow_writes,
1166
- vfs_cache_mode=vfs_cache_mode,
1167
- other_args=other_args,
1168
- )
1169
-
1170
- def serve_webdav(
1171
- self,
1172
- src: Remote | Dir | str,
1173
- user: str,
1174
- password: str,
1175
- addr: str = "localhost:2049",
1176
- allow_other: bool = False,
1177
- other_args: list[str] | None = None,
1178
- ) -> Process:
1179
- """Serve a remote or directory via NFS.
1180
-
1181
- Args:
1182
- src: Remote or directory to serve
1183
- addr: Network address and port to serve on (default: localhost:2049)
1184
- allow_other: Allow other users to access the share
1185
-
1186
- Returns:
1187
- Process: The running NFS server process
1188
-
1189
- Raises:
1190
- ValueError: If the NFS server fails to start
1191
- """
1192
- src_str = convert_to_str(src)
1193
- cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1194
- cmd_list.extend(["--user", user, "--pass", password])
1195
- if allow_other:
1196
- cmd_list.append("--allow-other")
1197
- if other_args:
1198
- cmd_list += other_args
1199
- proc = self._launch_process(cmd_list)
1200
- time.sleep(2) # give it a moment to start
1201
- if proc.poll() is not None:
1202
- raise ValueError("NFS serve process failed to start")
1203
- return proc
1204
-
1205
- def size_files(
1206
- self,
1207
- src: str,
1208
- files: list[str],
1209
- fast_list: bool = False, # Recommend that this is False
1210
- other_args: list[str] | None = None,
1211
- check: bool | None = False,
1212
- verbose: bool | None = None,
1213
- ) -> SizeResult:
1214
- """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1215
- verbose = get_verbose(verbose)
1216
- check = get_check(check)
1217
- if fast_list or (other_args and "--fast-list" in other_args):
1218
- warnings.warn(
1219
- "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1220
- )
1221
- files = list(files)
1222
- all_files: list[File] = []
1223
- # prefix, files = group_under_one_prefix(src, files)
1224
- cmd = ["lsjson", src, "--files-only", "-R"]
1225
- with TemporaryDirectory() as tmpdir:
1226
- # print("files: " + ",".join(files))
1227
- include_files_txt = Path(tmpdir) / "include_files.txt"
1228
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
1229
- cmd += ["--files-from", str(include_files_txt)]
1230
- if fast_list:
1231
- cmd.append("--fast-list")
1232
- if other_args:
1233
- cmd += other_args
1234
- cp = self._run(cmd, check=check)
1235
-
1236
- if cp.returncode != 0:
1237
- if check:
1238
- raise ValueError(f"Error getting file sizes: {cp.stderr}")
1239
- else:
1240
- warnings.warn(f"Error getting file sizes: {cp.stderr}")
1241
- stdout = cp.stdout
1242
- pieces = src.split(":", 1)
1243
- remote_name = pieces[0]
1244
- parent_path: str | None
1245
- if len(pieces) > 1:
1246
- parent_path = pieces[1]
1247
- else:
1248
- parent_path = None
1249
- remote = Remote(name=remote_name, rclone=self)
1250
- paths: list[RPath] = RPath.from_json_str(
1251
- stdout, remote, parent_path=parent_path
1252
- )
1253
- # print(paths)
1254
- all_files += [File(p) for p in paths]
1255
- file_sizes: dict[str, int] = {}
1256
- f: File
1257
- for f in all_files:
1258
- p = f.to_string(include_remote=True)
1259
- if p in file_sizes:
1260
- warnings.warn(f"Duplicate file found: {p}")
1261
- continue
1262
- size = f.size
1263
- if size == 0:
1264
- warnings.warn(f"File size is 0: {p}")
1265
- file_sizes[p] = f.size
1266
- total_size = sum(file_sizes.values())
1267
- file_sizes_path_corrected: dict[str, int] = {}
1268
- for path, size in file_sizes.items():
1269
- # remove the prefix
1270
- path_path = Path(path)
1271
- path_str = path_path.relative_to(src).as_posix()
1272
- file_sizes_path_corrected[path_str] = size
1273
- out: SizeResult = SizeResult(
1274
- prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1275
- )
1276
- return out
1
+ """
2
+ Unit test file.
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import shutil
8
+ import subprocess
9
+ import time
10
+ import traceback
11
+ import warnings
12
+ from concurrent.futures import Future, ThreadPoolExecutor
13
+ from contextlib import contextmanager
14
+ from fnmatch import fnmatch
15
+ from pathlib import Path
16
+ from tempfile import TemporaryDirectory
17
+ from typing import Generator
18
+
19
+ from rclone_api import Dir
20
+ from rclone_api.completed_process import CompletedProcess
21
+ from rclone_api.config import Config, Parsed, Section
22
+ from rclone_api.convert import convert_to_filestr_list, convert_to_str
23
+ from rclone_api.deprecated import deprecated
24
+ from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
+ from rclone_api.dir_listing import DirListing
26
+ from rclone_api.exec import RcloneExec
27
+ from rclone_api.file import File, FileItem
28
+ from rclone_api.group_files import group_files
29
+ from rclone_api.mount import Mount, clean_mount, prepare_mount
30
+ from rclone_api.mount_read_chunker import MultiMountFileChunker
31
+ from rclone_api.process import Process
32
+ from rclone_api.remote import Remote
33
+ from rclone_api.rpath import RPath
34
+ from rclone_api.s3.types import (
35
+ MultiUploadResult,
36
+ S3MutliPartUploadConfig,
37
+ S3Provider,
38
+ S3UploadTarget,
39
+ )
40
+ from rclone_api.types import (
41
+ ListingOption,
42
+ ModTimeStrategy,
43
+ Order,
44
+ SizeResult,
45
+ SizeSuffix,
46
+ )
47
+ from rclone_api.util import (
48
+ get_check,
49
+ get_rclone_exe,
50
+ get_verbose,
51
+ to_path,
52
+ )
53
+ from rclone_api.walk import walk
54
+
55
+
56
+ def rclone_verbose(verbose: bool | None) -> bool:
57
+ if verbose is not None:
58
+ os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
59
+ return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
60
+
61
+
62
+ def _to_rclone_conf(config: Config | Path) -> Config:
63
+ if isinstance(config, Path):
64
+ content = config.read_text(encoding="utf-8")
65
+ return Config(content)
66
+ else:
67
+ return config
68
+
69
+
70
+ class Rclone:
71
+ def __init__(
72
+ self, rclone_conf: Path | Config, rclone_exe: Path | None = None
73
+ ) -> None:
74
+ if isinstance(rclone_conf, Path):
75
+ if not rclone_conf.exists():
76
+ raise ValueError(f"Rclone config file not found: {rclone_conf}")
77
+ self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
78
+ self.config: Config = _to_rclone_conf(rclone_conf)
79
+
80
+ def _run(
81
+ self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
82
+ ) -> subprocess.CompletedProcess:
83
+ return self._exec.execute(cmd, check=check, capture=capture)
84
+
85
+ def _launch_process(
86
+ self, cmd: list[str], capture: bool | None = None, log: Path | None = None
87
+ ) -> Process:
88
+ return self._exec.launch_process(cmd, capture=capture, log=log)
89
+
90
+ def _get_tmp_mount_dir(self) -> Path:
91
+ return Path("tmp_mnts")
92
+
93
+ def _get_cache_dir(self) -> Path:
94
+ return Path("cache")
95
+
96
+ def webgui(self, other_args: list[str] | None = None) -> Process:
97
+ """Launch the Rclone web GUI."""
98
+ cmd = ["rcd", "--rc-web-gui"]
99
+ if other_args:
100
+ cmd += other_args
101
+ return self._launch_process(cmd, capture=False)
102
+
103
+ def launch_server(
104
+ self,
105
+ addr: str,
106
+ user: str | None = None,
107
+ password: str | None = None,
108
+ other_args: list[str] | None = None,
109
+ ) -> Process:
110
+ """Launch the Rclone server so it can receive commands"""
111
+ cmd = ["rcd"]
112
+ if addr is not None:
113
+ cmd += ["--rc-addr", addr]
114
+ if user is not None:
115
+ cmd += ["--rc-user", user]
116
+ if password is not None:
117
+ cmd += ["--rc-pass", password]
118
+ if other_args:
119
+ cmd += other_args
120
+ out = self._launch_process(cmd, capture=False)
121
+ time.sleep(1) # Give it some time to launch
122
+ return out
123
+
124
+ def remote_control(
125
+ self,
126
+ addr: str,
127
+ user: str | None = None,
128
+ password: str | None = None,
129
+ capture: bool | None = None,
130
+ other_args: list[str] | None = None,
131
+ ) -> CompletedProcess:
132
+ cmd = ["rc"]
133
+ if addr:
134
+ cmd += ["--rc-addr", addr]
135
+ if user is not None:
136
+ cmd += ["--rc-user", user]
137
+ if password is not None:
138
+ cmd += ["--rc-pass", password]
139
+ if other_args:
140
+ cmd += other_args
141
+ cp = self._run(cmd, capture=capture)
142
+ return CompletedProcess.from_subprocess(cp)
143
+
144
+ def obscure(self, password: str) -> str:
145
+ """Obscure a password for use in rclone config files."""
146
+ cmd_list: list[str] = ["obscure", password]
147
+ cp = self._run(cmd_list)
148
+ return cp.stdout.strip()
149
+
150
+ def ls_stream_files(
151
+ self,
152
+ path: str,
153
+ max_depth: int = -1,
154
+ fast_list: bool = False,
155
+ ) -> Generator[FileItem, None, None]:
156
+ """List files in the given path"""
157
+ cmd = ["lsjson", path]
158
+ if max_depth < 0:
159
+ cmd.append("--recursive")
160
+ elif max_depth > 0:
161
+ cmd += ["--max-depth", str(max_depth)]
162
+ if fast_list:
163
+ cmd.append("--fast-list")
164
+ with self._launch_process(cmd, capture=True) as process:
165
+ for line in process.stdout:
166
+ linestr = line.decode("utf-8").strip()
167
+ if linestr.startswith("["):
168
+ continue
169
+ if linestr.endswith(","):
170
+ linestr = linestr[:-1]
171
+ if linestr.endswith("]"):
172
+ continue
173
+ fileitem: FileItem | None = FileItem.from_json_str(linestr)
174
+ if fileitem is None:
175
+ continue
176
+ yield fileitem
177
+
178
+ def ls(
179
+ self,
180
+ path: Dir | Remote | str,
181
+ max_depth: int | None = None,
182
+ glob: str | None = None,
183
+ order: Order = Order.NORMAL,
184
+ listing_option: ListingOption = ListingOption.ALL,
185
+ ) -> DirListing:
186
+ """List files in the given path.
187
+
188
+ Args:
189
+ path: Remote path or Remote object to list
190
+ max_depth: Maximum recursion depth (0 means no recursion)
191
+
192
+ Returns:
193
+ List of File objects found at the path
194
+ """
195
+
196
+ if isinstance(path, str):
197
+ path = Dir(
198
+ to_path(path, self)
199
+ ) # assume it's a directory if ls is being called.
200
+
201
+ cmd = ["lsjson"]
202
+ if max_depth is not None:
203
+ if max_depth < 0:
204
+ cmd.append("--recursive")
205
+ if max_depth > 0:
206
+ cmd.append("--max-depth")
207
+ cmd.append(str(max_depth))
208
+ if listing_option != ListingOption.ALL:
209
+ cmd.append(f"--{listing_option.value}")
210
+
211
+ cmd.append(str(path))
212
+ remote = path.remote if isinstance(path, Dir) else path
213
+ assert isinstance(remote, Remote)
214
+
215
+ cp = self._run(cmd, check=True)
216
+ text = cp.stdout
217
+ parent_path: str | None = None
218
+ if isinstance(path, Dir):
219
+ parent_path = path.path.path
220
+ paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
221
+ # print(parent_path)
222
+ for o in paths:
223
+ o.set_rclone(self)
224
+
225
+ # do we have a glob pattern?
226
+ if glob is not None:
227
+ paths = [p for p in paths if fnmatch(p.path, glob)]
228
+
229
+ if order == Order.REVERSE:
230
+ paths.reverse()
231
+ elif order == Order.RANDOM:
232
+ random.shuffle(paths)
233
+ return DirListing(paths)
234
+
235
+ def listremotes(self) -> list[Remote]:
236
+ cmd = ["listremotes"]
237
+ cp = self._run(cmd)
238
+ text: str = cp.stdout
239
+ tmp = text.splitlines()
240
+ tmp = [t.strip() for t in tmp]
241
+ # strip out ":" from the end
242
+ tmp = [t.replace(":", "") for t in tmp]
243
+ out = [Remote(name=t, rclone=self) for t in tmp]
244
+ return out
245
+
246
+ def diff(
247
+ self,
248
+ src: str,
249
+ dst: str,
250
+ min_size: (
251
+ str | None
252
+ ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
253
+ max_size: (
254
+ str | None
255
+ ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
256
+ diff_option: DiffOption = DiffOption.COMBINED,
257
+ fast_list: bool = True,
258
+ size_only: bool | None = None,
259
+ checkers: int | None = None,
260
+ other_args: list[str] | None = None,
261
+ ) -> Generator[DiffItem, None, None]:
262
+ """Be extra careful with the src and dst values. If you are off by one
263
+ parent directory, you will get a huge amount of false diffs."""
264
+ other_args = other_args or []
265
+ if checkers is None or checkers < 1:
266
+ checkers = 1000
267
+ cmd = [
268
+ "check",
269
+ src,
270
+ dst,
271
+ "--checkers",
272
+ str(checkers),
273
+ "--log-level",
274
+ "INFO",
275
+ f"--{diff_option.value}",
276
+ "-",
277
+ ]
278
+ if size_only is None:
279
+ size_only = diff_option in [
280
+ DiffOption.MISSING_ON_DST,
281
+ DiffOption.MISSING_ON_SRC,
282
+ ]
283
+ if size_only:
284
+ cmd += ["--size-only"]
285
+ if fast_list:
286
+ cmd += ["--fast-list"]
287
+ if min_size:
288
+ cmd += ["--min-size", min_size]
289
+ if max_size:
290
+ cmd += ["--max-size", max_size]
291
+ if diff_option == DiffOption.MISSING_ON_DST:
292
+ cmd += ["--one-way"]
293
+ if other_args:
294
+ cmd += other_args
295
+ proc = self._launch_process(cmd, capture=True)
296
+ item: DiffItem
297
+ for item in diff_stream_from_running_process(
298
+ running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
299
+ ):
300
+ if item is None:
301
+ break
302
+ yield item
303
+
304
+ def walk(
305
+ self,
306
+ path: Dir | Remote | str,
307
+ max_depth: int = -1,
308
+ breadth_first: bool = True,
309
+ order: Order = Order.NORMAL,
310
+ ) -> Generator[DirListing, None, None]:
311
+ """Walk through the given path recursively.
312
+
313
+ Args:
314
+ path: Remote path or Remote object to walk through
315
+ max_depth: Maximum depth to traverse (-1 for unlimited)
316
+
317
+ Yields:
318
+ DirListing: Directory listing for each directory encountered
319
+ """
320
+ dir_obj: Dir
321
+ if isinstance(path, Dir):
322
+ # Create a Remote object for the path
323
+ remote = path.remote
324
+ rpath = RPath(
325
+ remote=remote,
326
+ path=path.path.path,
327
+ name=path.path.name,
328
+ size=0,
329
+ mime_type="inode/directory",
330
+ mod_time="",
331
+ is_dir=True,
332
+ )
333
+ rpath.set_rclone(self)
334
+ dir_obj = Dir(rpath)
335
+ elif isinstance(path, str):
336
+ dir_obj = Dir(to_path(path, self))
337
+ elif isinstance(path, Remote):
338
+ dir_obj = Dir(path)
339
+ else:
340
+ dir_obj = Dir(path) # shut up pyright
341
+ assert f"Invalid type for path: {type(path)}"
342
+
343
+ yield from walk(
344
+ dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
345
+ )
346
+
347
+ def scan_missing_folders(
348
+ self,
349
+ src: Dir | Remote | str,
350
+ dst: Dir | Remote | str,
351
+ max_depth: int = -1,
352
+ order: Order = Order.NORMAL,
353
+ ) -> Generator[Dir, None, None]:
354
+ """Walk through the given path recursively.
355
+
356
+ WORK IN PROGRESS!!
357
+
358
+ Args:
359
+ src: Source directory or Remote to walk through
360
+ dst: Destination directory or Remote to walk through
361
+ max_depth: Maximum depth to traverse (-1 for unlimited)
362
+
363
+ Yields:
364
+ DirListing: Directory listing for each directory encountered
365
+ """
366
+ from rclone_api.scan_missing_folders import scan_missing_folders
367
+
368
+ src_dir = Dir(to_path(src, self))
369
+ dst_dir = Dir(to_path(dst, self))
370
+ yield from scan_missing_folders(
371
+ src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
372
+ )
373
+
374
+ def cleanup(
375
+ self, path: str, other_args: list[str] | None = None
376
+ ) -> CompletedProcess:
377
+ """Cleanup any resources used by the Rclone instance."""
378
+ # rclone cleanup remote:path [flags]
379
+ cmd = ["cleanup", path]
380
+ if other_args:
381
+ cmd += other_args
382
+ out = self._run(cmd)
383
+ return CompletedProcess.from_subprocess(out)
384
+
385
+ def copy_to(
386
+ self,
387
+ src: File | str,
388
+ dst: File | str,
389
+ check: bool | None = None,
390
+ verbose: bool | None = None,
391
+ other_args: list[str] | None = None,
392
+ ) -> CompletedProcess:
393
+ """Copy one file from source to destination.
394
+
395
+ Warning - slow.
396
+
397
+ """
398
+ check = get_check(check)
399
+ verbose = get_verbose(verbose)
400
+ src = src if isinstance(src, str) else str(src.path)
401
+ dst = dst if isinstance(dst, str) else str(dst.path)
402
+ cmd_list: list[str] = ["copyto", src, dst]
403
+ if other_args is not None:
404
+ cmd_list += other_args
405
+ cp = self._run(cmd_list, check=check)
406
+ return CompletedProcess.from_subprocess(cp)
407
+
408
+ def copy_files(
409
+ self,
410
+ src: str,
411
+ dst: str,
412
+ files: list[str] | Path,
413
+ check: bool | None = None,
414
+ max_backlog: int | None = None,
415
+ verbose: bool | None = None,
416
+ checkers: int | None = None,
417
+ transfers: int | None = None,
418
+ low_level_retries: int | None = None,
419
+ retries: int | None = None,
420
+ retries_sleep: str | None = None,
421
+ metadata: bool | None = None,
422
+ timeout: str | None = None,
423
+ max_partition_workers: int | None = None,
424
+ multi_thread_streams: int | None = None,
425
+ other_args: list[str] | None = None,
426
+ ) -> list[CompletedProcess]:
427
+ """Copy multiple files from source to destination.
428
+
429
+ Args:
430
+ payload: Dictionary of source and destination file paths
431
+ """
432
+ check = get_check(check)
433
+ max_partition_workers = max_partition_workers or 1
434
+ low_level_retries = low_level_retries or 10
435
+ retries = retries or 3
436
+ other_args = other_args or []
437
+ checkers = checkers or 1000
438
+ transfers = transfers or 32
439
+ verbose = get_verbose(verbose)
440
+ payload: list[str] = (
441
+ files
442
+ if isinstance(files, list)
443
+ else [f.strip() for f in files.read_text().splitlines() if f.strip()]
444
+ )
445
+ if len(payload) == 0:
446
+ return []
447
+
448
+ for p in payload:
449
+ if ":" in p:
450
+ raise ValueError(
451
+ f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
452
+ )
453
+
454
+ using_fast_list = "--fast-list" in other_args
455
+ if using_fast_list:
456
+ warnings.warn(
457
+ "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
458
+ )
459
+
460
+ if max_partition_workers > 1:
461
+ datalists: dict[str, list[str]] = group_files(
462
+ payload, fully_qualified=False
463
+ )
464
+ else:
465
+ datalists = {"": payload}
466
+ # out: subprocess.CompletedProcess | None = None
467
+ out: list[CompletedProcess] = []
468
+
469
+ futures: list[Future] = []
470
+
471
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
472
+ for common_prefix, files in datalists.items():
473
+
474
+ def _task(
475
+ files: list[str] | Path = files,
476
+ ) -> subprocess.CompletedProcess:
477
+ with TemporaryDirectory() as tmpdir:
478
+ filelist: list[str] = []
479
+ filepath: Path
480
+ if isinstance(files, list):
481
+ include_files_txt = Path(tmpdir) / "include_files.txt"
482
+ include_files_txt.write_text(
483
+ "\n".join(files), encoding="utf-8"
484
+ )
485
+ filelist = list(files)
486
+ filepath = Path(include_files_txt)
487
+ elif isinstance(files, Path):
488
+ filelist = [
489
+ f.strip()
490
+ for f in files.read_text().splitlines()
491
+ if f.strip()
492
+ ]
493
+ filepath = files
494
+ if common_prefix:
495
+ src_path = f"{src}/{common_prefix}"
496
+ dst_path = f"{dst}/{common_prefix}"
497
+ else:
498
+ src_path = src
499
+ dst_path = dst
500
+
501
+ if verbose:
502
+ nfiles = len(filelist)
503
+ files_fqdn = [f" {src_path}/{f}" for f in filelist]
504
+ print(f"Copying {nfiles} files:")
505
+ chunk_size = 100
506
+ for i in range(0, nfiles, chunk_size):
507
+ chunk = files_fqdn[i : i + chunk_size]
508
+ files_str = "\n".join(chunk)
509
+ print(f"{files_str}")
510
+ cmd_list: list[str] = [
511
+ "copy",
512
+ src_path,
513
+ dst_path,
514
+ "--files-from",
515
+ str(filepath),
516
+ "--checkers",
517
+ str(checkers),
518
+ "--transfers",
519
+ str(transfers),
520
+ "--low-level-retries",
521
+ str(low_level_retries),
522
+ "--retries",
523
+ str(retries),
524
+ ]
525
+ if metadata:
526
+ cmd_list.append("--metadata")
527
+ if retries_sleep is not None:
528
+ cmd_list += ["--retries-sleep", retries_sleep]
529
+ if timeout is not None:
530
+ cmd_list += ["--timeout", timeout]
531
+ if max_backlog is not None:
532
+ cmd_list += ["--max-backlog", str(max_backlog)]
533
+ if multi_thread_streams is not None:
534
+ cmd_list += [
535
+ "--multi-thread-streams",
536
+ str(multi_thread_streams),
537
+ ]
538
+ if verbose:
539
+ if not any(["-v" in x for x in other_args]):
540
+ cmd_list.append("-vvvv")
541
+ if not any(["--progress" in x for x in other_args]):
542
+ cmd_list.append("--progress")
543
+ if other_args:
544
+ cmd_list += other_args
545
+ out = self._run(cmd_list, capture=not verbose)
546
+ return out
547
+
548
+ fut: Future = executor.submit(_task)
549
+ futures.append(fut)
550
+ for fut in futures:
551
+ cp: subprocess.CompletedProcess = fut.result()
552
+ assert cp is not None
553
+ out.append(CompletedProcess.from_subprocess(cp))
554
+ if cp.returncode != 0:
555
+ if check:
556
+ raise ValueError(f"Error deleting files: {cp.stderr}")
557
+ else:
558
+ warnings.warn(f"Error deleting files: {cp.stderr}")
559
+ return out
560
+
561
+ def copy(
562
+ self,
563
+ src: Dir | str,
564
+ dst: Dir | str,
565
+ check: bool | None = None,
566
+ transfers: int | None = None,
567
+ checkers: int | None = None,
568
+ multi_thread_streams: int | None = None,
569
+ low_level_retries: int | None = None,
570
+ retries: int | None = None,
571
+ other_args: list[str] | None = None,
572
+ ) -> CompletedProcess:
573
+ """Copy files from source to destination.
574
+
575
+ Args:
576
+ src: Source directory
577
+ dst: Destination directory
578
+ """
579
+ # src_dir = src.path.path
580
+ # dst_dir = dst.path.path
581
+ src_dir = convert_to_str(src)
582
+ dst_dir = convert_to_str(dst)
583
+ check = get_check(check)
584
+ checkers = checkers or 1000
585
+ transfers = transfers or 32
586
+ low_level_retries = low_level_retries or 10
587
+ retries = retries or 3
588
+ cmd_list: list[str] = ["copy", src_dir, dst_dir]
589
+ cmd_list += ["--checkers", str(checkers)]
590
+ cmd_list += ["--transfers", str(transfers)]
591
+ cmd_list += ["--low-level-retries", str(low_level_retries)]
592
+ if multi_thread_streams is not None:
593
+ cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
594
+ if other_args:
595
+ cmd_list += other_args
596
+ cp = self._run(cmd_list, check=check, capture=False)
597
+ return CompletedProcess.from_subprocess(cp)
598
+
599
+ def purge(self, path: Dir | str) -> CompletedProcess:
600
+ """Purge a directory"""
601
+ # path should always be a string
602
+ path = path if isinstance(path, str) else str(path.path)
603
+ cmd_list: list[str] = ["purge", str(path)]
604
+ cp = self._run(cmd_list)
605
+ return CompletedProcess.from_subprocess(cp)
606
+
607
+ def delete_files(
608
+ self,
609
+ files: str | File | list[str] | list[File],
610
+ check: bool | None = None,
611
+ rmdirs=False,
612
+ verbose: bool | None = None,
613
+ max_partition_workers: int | None = None,
614
+ other_args: list[str] | None = None,
615
+ ) -> CompletedProcess:
616
+ """Delete a directory"""
617
+ check = get_check(check)
618
+ verbose = get_verbose(verbose)
619
+ payload: list[str] = convert_to_filestr_list(files)
620
+ if len(payload) == 0:
621
+ if verbose:
622
+ print("No files to delete")
623
+ cp = subprocess.CompletedProcess(
624
+ args=["rclone", "delete", "--files-from", "[]"],
625
+ returncode=0,
626
+ stdout="",
627
+ stderr="",
628
+ )
629
+ return CompletedProcess.from_subprocess(cp)
630
+
631
+ datalists: dict[str, list[str]] = group_files(payload)
632
+ completed_processes: list[subprocess.CompletedProcess] = []
633
+
634
+ futures: list[Future] = []
635
+
636
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
637
+
638
+ for remote, files in datalists.items():
639
+
640
+ def _task(
641
+ files=files, check=check, remote=remote
642
+ ) -> subprocess.CompletedProcess:
643
+ with TemporaryDirectory() as tmpdir:
644
+ include_files_txt = Path(tmpdir) / "include_files.txt"
645
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
646
+
647
+ # print(include_files_txt)
648
+ cmd_list: list[str] = [
649
+ "delete",
650
+ remote,
651
+ "--files-from",
652
+ str(include_files_txt),
653
+ "--checkers",
654
+ "1000",
655
+ "--transfers",
656
+ "1000",
657
+ ]
658
+ if verbose:
659
+ cmd_list.append("-vvvv")
660
+ if rmdirs:
661
+ cmd_list.append("--rmdirs")
662
+ if other_args:
663
+ cmd_list += other_args
664
+ out = self._run(cmd_list, check=check)
665
+ if out.returncode != 0:
666
+ if check:
667
+ completed_processes.append(out)
668
+ raise ValueError(f"Error deleting files: {out}")
669
+ else:
670
+ warnings.warn(f"Error deleting files: {out}")
671
+ return out
672
+
673
+ fut: Future = executor.submit(_task)
674
+ futures.append(fut)
675
+
676
+ for fut in futures:
677
+ out = fut.result()
678
+ assert out is not None
679
+ completed_processes.append(out)
680
+
681
+ return CompletedProcess(completed_processes)
682
+
683
+ @deprecated("delete_files")
684
+ def deletefiles(
685
+ self, files: str | File | list[str] | list[File]
686
+ ) -> CompletedProcess:
687
+ out = self.delete_files(files)
688
+ return out
689
+
690
+ def exists(self, path: Dir | Remote | str | File) -> bool:
691
+ """Check if a file or directory exists."""
692
+ arg: str = convert_to_str(path)
693
+ assert isinstance(arg, str)
694
+ try:
695
+ dir_listing = self.ls(arg)
696
+ # print(dir_listing)
697
+ return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
698
+ except subprocess.CalledProcessError:
699
+ return False
700
+
701
+ def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
702
+ """Check if two directories are in sync."""
703
+ src = convert_to_str(src)
704
+ dst = convert_to_str(dst)
705
+ cmd_list: list[str] = ["check", str(src), str(dst)]
706
+ try:
707
+ self._run(cmd_list, check=True)
708
+ return True
709
+ except subprocess.CalledProcessError:
710
+ return False
711
+
712
+ def copy_file_resumable_s3(
713
+ self,
714
+ src: str,
715
+ dst: str,
716
+ save_state_json: Path,
717
+ chunk_size: SizeSuffix | None = None,
718
+ read_threads: int = 8,
719
+ write_threads: int = 8,
720
+ retries: int = 3,
721
+ verbose: bool | None = None,
722
+ max_chunks_before_suspension: int | None = None,
723
+ mount_log: Path | None = None,
724
+ ) -> MultiUploadResult:
725
+ """For massive files that rclone can't handle in one go, this function will copy the file in chunks to an S3 store"""
726
+ from rclone_api.s3.api import S3Client
727
+ from rclone_api.s3.create import S3Credentials
728
+ from rclone_api.util import S3PathInfo, split_s3_path
729
+
730
+ other_args: list[str] = ["--no-modtime", "--vfs-read-wait", "1s"]
731
+ chunk_size = chunk_size or SizeSuffix("64M")
732
+ unit_chunk_size = chunk_size / read_threads
733
+ tmp_mount_dir = self._get_tmp_mount_dir()
734
+ vfs_read_chunk_size = unit_chunk_size
735
+ vfs_read_chunk_size_limit = chunk_size
736
+ vfs_read_chunk_streams = read_threads
737
+ vfs_disk_space_total_size = chunk_size
738
+ assert (
739
+ chunk_size.as_int() % vfs_read_chunk_size.as_int() == 0
740
+ ), f"chunk_size {chunk_size} must be a multiple of vfs_read_chunk_size {vfs_read_chunk_size}"
741
+ other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
742
+ other_args += [
743
+ "--vfs-read-chunk-size-limit",
744
+ vfs_read_chunk_size_limit.as_str(),
745
+ ]
746
+ other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
747
+ other_args += [
748
+ "--vfs-disk-space-total-size",
749
+ vfs_disk_space_total_size.as_str(),
750
+ ]
751
+ other_args += ["--read-only"]
752
+ other_args += ["--direct-io"]
753
+ # --vfs-cache-max-size
754
+ other_args += ["--vfs-cache-max-size", vfs_disk_space_total_size.as_str()]
755
+ mount_path = tmp_mount_dir / "RCLONE_API_DYNAMIC_MOUNT"
756
+ src_path = Path(src)
757
+ name = src_path.name
758
+
759
+ src_parent_path = Path(src).parent.as_posix()
760
+ size_result: SizeResult = self.size_files(src_parent_path, [name])
761
+
762
+ target_size = SizeSuffix(size_result.total_size)
763
+ if target_size < SizeSuffix("5M"):
764
+ # fallback to normal copy
765
+ completed_proc = self.copy_to(src, dst, check=True)
766
+ if completed_proc.ok:
767
+ return MultiUploadResult.UPLOADED_FRESH
768
+ if size_result.total_size <= 0:
769
+ raise ValueError(
770
+ f"File {src} has size {size_result.total_size}, is this a directory?"
771
+ )
772
+
773
+ path_info: S3PathInfo = split_s3_path(dst)
774
+ remote = path_info.remote
775
+ bucket_name = path_info.bucket
776
+ s3_key = path_info.key
777
+ parsed: Parsed = self.config.parse()
778
+ sections: dict[str, Section] = parsed.sections
779
+ if remote not in sections:
780
+ raise ValueError(
781
+ f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
782
+ )
783
+
784
+ section: Section = sections[remote]
785
+ dst_type = section.type()
786
+ if dst_type != "s3" and dst_type != "b2":
787
+ raise ValueError(
788
+ f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
789
+ )
790
+
791
+ def get_provider_str(section=section) -> str | None:
792
+ type: str = section.type()
793
+ provider: str | None = section.provider()
794
+ if provider is not None:
795
+ return provider
796
+ if type == "b2":
797
+ return S3Provider.BACKBLAZE.value
798
+ if type != "s3":
799
+ raise ValueError(f"Remote {remote} is not an S3 remote")
800
+ return S3Provider.S3.value
801
+
802
+ provider: str
803
+ if provided_provider_str := get_provider_str():
804
+ if verbose:
805
+ print(f"Using provided provider: {provided_provider_str}")
806
+ provider = provided_provider_str
807
+ else:
808
+ if verbose:
809
+ print(f"Using default provider: {S3Provider.S3.value}")
810
+ provider = S3Provider.S3.value
811
+ provider_enum = S3Provider.from_str(provider)
812
+
813
+ s3_creds: S3Credentials = S3Credentials(
814
+ provider=provider_enum,
815
+ access_key_id=section.access_key_id(),
816
+ secret_access_key=section.secret_access_key(),
817
+ endpoint_url=section.endpoint(),
818
+ )
819
+
820
+ chunk_fetcher: MultiMountFileChunker = self.get_multi_mount_file_chunker(
821
+ src=src_path.as_posix(),
822
+ chunk_size=chunk_size,
823
+ threads=read_threads,
824
+ mount_log=mount_log,
825
+ direct_io=True,
826
+ )
827
+
828
+ client = S3Client(s3_creds)
829
+ upload_config: S3MutliPartUploadConfig = S3MutliPartUploadConfig(
830
+ chunk_size=chunk_size.as_int(),
831
+ chunk_fetcher=chunk_fetcher.fetch,
832
+ max_write_threads=write_threads,
833
+ retries=retries,
834
+ resume_path_json=save_state_json,
835
+ max_chunks_before_suspension=max_chunks_before_suspension,
836
+ )
837
+
838
+ src_file = mount_path / name
839
+
840
+ print(f"Uploading {name} to {s3_key} in bucket {bucket_name}")
841
+ print(f"Source: {src_path}")
842
+ print(f"bucket_name: {bucket_name}")
843
+ print(f"upload_config: {upload_config}")
844
+
845
+ # get the file size
846
+
847
+ upload_target = S3UploadTarget(
848
+ src_file=src_file,
849
+ src_file_size=size_result.total_size,
850
+ bucket_name=bucket_name,
851
+ s3_key=s3_key,
852
+ )
853
+
854
+ try:
855
+ out: MultiUploadResult = client.upload_file_multipart(
856
+ upload_target=upload_target,
857
+ upload_config=upload_config,
858
+ )
859
+ return out
860
+ except Exception as e:
861
+ print(f"Error uploading file: {e}")
862
+ traceback.print_exc()
863
+ raise
864
+ finally:
865
+ chunk_fetcher.shutdown()
866
+
867
+ def get_multi_mount_file_chunker(
868
+ self,
869
+ src: str,
870
+ chunk_size: SizeSuffix,
871
+ threads: int,
872
+ mount_log: Path | None,
873
+ direct_io: bool,
874
+ ) -> MultiMountFileChunker:
875
+ from rclone_api.util import random_str
876
+
877
+ mounts: list[Mount] = []
878
+ vfs_read_chunk_size = chunk_size
879
+ vfs_read_chunk_size_limit = chunk_size
880
+ vfs_read_chunk_streams = 0
881
+ vfs_disk_space_total_size = chunk_size
882
+ other_args: list[str] = []
883
+ other_args += ["--no-modtime"]
884
+ other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
885
+ other_args += [
886
+ "--vfs-read-chunk-size-limit",
887
+ vfs_read_chunk_size_limit.as_str(),
888
+ ]
889
+ other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
890
+ other_args += [
891
+ "--vfs-disk-space-total-size",
892
+ vfs_disk_space_total_size.as_str(),
893
+ ]
894
+ other_args += ["--read-only"]
895
+ if direct_io:
896
+ other_args += ["--direct-io"]
897
+
898
+ base_mount_dir = self._get_tmp_mount_dir()
899
+ base_cache_dir = self._get_cache_dir()
900
+
901
+ filename = Path(src).name
902
+ with ThreadPoolExecutor(max_workers=threads) as executor:
903
+ futures: list[Future] = []
904
+ try:
905
+ for i in range(threads):
906
+ tmp_mnts = base_mount_dir / random_str(12)
907
+ verbose = mount_log is not None
908
+
909
+ src_parent_path = Path(src).parent.as_posix()
910
+ cache_dir = base_cache_dir / random_str(12)
911
+
912
+ def task(
913
+ src_parent_path=src_parent_path,
914
+ tmp_mnts=tmp_mnts,
915
+ cache_dir=cache_dir,
916
+ ):
917
+ clean_mount(tmp_mnts, verbose=verbose)
918
+ prepare_mount(tmp_mnts, verbose=verbose)
919
+ return self.mount(
920
+ src=src_parent_path,
921
+ outdir=tmp_mnts,
922
+ allow_writes=False,
923
+ use_links=True,
924
+ vfs_cache_mode="minimal",
925
+ verbose=False,
926
+ cache_dir=cache_dir,
927
+ cache_dir_delete_on_exit=True,
928
+ log=mount_log,
929
+ other_args=other_args,
930
+ )
931
+
932
+ futures.append(executor.submit(task))
933
+ mount_errors: list[Exception] = []
934
+ for fut in futures:
935
+ try:
936
+ mount = fut.result()
937
+ mounts.append(mount)
938
+ except Exception as er:
939
+ warnings.warn(f"Error mounting: {er}")
940
+ mount_errors.append(er)
941
+ if mount_errors:
942
+ warnings.warn(f"Error mounting: {mount_errors}")
943
+ raise Exception(mount_errors)
944
+ except Exception:
945
+ for mount in mounts:
946
+ mount.close()
947
+ raise
948
+
949
+ src_path: Path = Path(src)
950
+ src_parent_path = src_path.parent.as_posix()
951
+ name = src_path.name
952
+ size_result: SizeResult = self.size_files(src_parent_path, [name])
953
+ filesize = size_result.total_size
954
+
955
+ executor = ThreadPoolExecutor(max_workers=threads)
956
+ filechunker: MultiMountFileChunker = MultiMountFileChunker(
957
+ filename=filename,
958
+ filesize=filesize,
959
+ mounts=mounts,
960
+ executor=executor,
961
+ verbose=mount_log is not None,
962
+ )
963
+ return filechunker
964
+
965
+ def copy_bytes(
966
+ self,
967
+ src: str,
968
+ offset: int | SizeSuffix,
969
+ length: int | SizeSuffix,
970
+ outfile: Path,
971
+ other_args: list[str] | None = None,
972
+ ) -> Exception | None:
973
+ """Copy a slice of bytes from the src file to dst."""
974
+ offset = SizeSuffix(offset).as_int()
975
+ length = SizeSuffix(length).as_int()
976
+ cmd_list: list[str] = [
977
+ "cat",
978
+ "--offset",
979
+ str(offset),
980
+ "--count",
981
+ str(length),
982
+ src,
983
+ ]
984
+ if other_args:
985
+ cmd_list.extend(other_args)
986
+ try:
987
+ cp = self._run(cmd_list, capture=outfile)
988
+ if cp.returncode == 0:
989
+ return None
990
+ return Exception(cp.stderr)
991
+ except subprocess.CalledProcessError as e:
992
+ return e
993
+
994
+ def copy_bytes_mount(
995
+ self,
996
+ src: str,
997
+ offset: int | SizeSuffix,
998
+ length: int | SizeSuffix,
999
+ chunk_size: SizeSuffix,
1000
+ max_threads: int = 1,
1001
+ # If outfile is supplied then bytes are written to this file and success returns bytes(0)
1002
+ outfile: Path | None = None,
1003
+ mount_log: Path | None = None,
1004
+ direct_io: bool = True,
1005
+ ) -> bytes | Exception:
1006
+ """Copy a slice of bytes from the src file to dst. Parallelism is achieved through multiple mounted files."""
1007
+ from rclone_api.types import FilePart
1008
+
1009
+ offset = SizeSuffix(offset).as_int()
1010
+ length = SizeSuffix(length).as_int()
1011
+ # determine number of threads from chunk size
1012
+ threads = max(1, min(max_threads, length // chunk_size.as_int()))
1013
+ # todo - implement max threads.
1014
+ filechunker = self.get_multi_mount_file_chunker(
1015
+ src=src,
1016
+ chunk_size=chunk_size,
1017
+ threads=threads,
1018
+ mount_log=mount_log,
1019
+ direct_io=direct_io,
1020
+ )
1021
+ try:
1022
+ fut = filechunker.fetch(offset, length, extra=None)
1023
+ fp: FilePart = fut.result()
1024
+ payload = fp.payload
1025
+ if isinstance(payload, Exception):
1026
+ return payload
1027
+ try:
1028
+ if outfile is None:
1029
+ return payload.read_bytes()
1030
+ shutil.move(payload, outfile)
1031
+ return bytes(0)
1032
+ finally:
1033
+ fp.close()
1034
+
1035
+ except Exception as e:
1036
+ warnings.warn(f"Error copying bytes: {e}")
1037
+ return e
1038
+ finally:
1039
+ try:
1040
+ filechunker.shutdown()
1041
+ except Exception as e:
1042
+ warnings.warn(f"Error closing filechunker: {e}")
1043
+
1044
+ def copy_dir(
1045
+ self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
1046
+ ) -> CompletedProcess:
1047
+ """Copy a directory from source to destination."""
1048
+ # convert src to str, also dst
1049
+ src = convert_to_str(src)
1050
+ dst = convert_to_str(dst)
1051
+ cmd_list: list[str] = ["copy", src, dst]
1052
+ if args is not None:
1053
+ cmd_list += args
1054
+ cp = self._run(cmd_list)
1055
+ return CompletedProcess.from_subprocess(cp)
1056
+
1057
+ def copy_remote(
1058
+ self, src: Remote, dst: Remote, args: list[str] | None = None
1059
+ ) -> CompletedProcess:
1060
+ """Copy a remote to another remote."""
1061
+ cmd_list: list[str] = ["copy", str(src), str(dst)]
1062
+ if args is not None:
1063
+ cmd_list += args
1064
+ # return self._run(cmd_list)
1065
+ cp = self._run(cmd_list)
1066
+ return CompletedProcess.from_subprocess(cp)
1067
+
1068
+ def mount(
1069
+ self,
1070
+ src: Remote | Dir | str,
1071
+ outdir: Path,
1072
+ allow_writes: bool | None = False,
1073
+ use_links: bool | None = None,
1074
+ vfs_cache_mode: str | None = None,
1075
+ verbose: bool | None = None,
1076
+ cache_dir: Path | None = None,
1077
+ cache_dir_delete_on_exit: bool | None = None,
1078
+ log: Path | None = None,
1079
+ other_args: list[str] | None = None,
1080
+ ) -> Mount:
1081
+ """Mount a remote or directory to a local path.
1082
+
1083
+ Args:
1084
+ src: Remote or directory to mount
1085
+ outdir: Local path to mount to
1086
+
1087
+ Returns:
1088
+ CompletedProcess from the mount command execution
1089
+
1090
+ Raises:
1091
+ subprocess.CalledProcessError: If the mount operation fails
1092
+ """
1093
+
1094
+ allow_writes = allow_writes or False
1095
+ use_links = use_links or True
1096
+ verbose = get_verbose(verbose) or (log is not None)
1097
+ vfs_cache_mode = vfs_cache_mode or "full"
1098
+ clean_mount(outdir, verbose=verbose)
1099
+ prepare_mount(outdir, verbose=verbose)
1100
+ debug_fuse = log is not None
1101
+ src_str = convert_to_str(src)
1102
+ cmd_list: list[str] = ["mount", src_str, str(outdir)]
1103
+ if not allow_writes:
1104
+ cmd_list.append("--read-only")
1105
+ if use_links:
1106
+ cmd_list.append("--links")
1107
+ if vfs_cache_mode:
1108
+ cmd_list.append("--vfs-cache-mode")
1109
+ cmd_list.append(vfs_cache_mode)
1110
+ if cache_dir:
1111
+ cmd_list.append("--cache-dir")
1112
+ cmd_list.append(str(cache_dir.absolute()))
1113
+ if debug_fuse:
1114
+ cmd_list.append("--debug-fuse")
1115
+ if verbose:
1116
+ cmd_list.append("-vvvv")
1117
+ if other_args:
1118
+ cmd_list += other_args
1119
+ proc = self._launch_process(cmd_list, log=log)
1120
+ mount_read_only = not allow_writes
1121
+ mount: Mount = Mount(
1122
+ src=src_str,
1123
+ mount_path=outdir,
1124
+ process=proc,
1125
+ read_only=mount_read_only,
1126
+ cache_dir=cache_dir,
1127
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1128
+ )
1129
+ return mount
1130
+
1131
+ @contextmanager
1132
+ def scoped_mount(
1133
+ self,
1134
+ src: Remote | Dir | str,
1135
+ outdir: Path,
1136
+ allow_writes: bool | None = None,
1137
+ use_links: bool | None = None,
1138
+ vfs_cache_mode: str | None = None,
1139
+ verbose: bool | None = None,
1140
+ log: Path | None = None,
1141
+ cache_dir: Path | None = None,
1142
+ cache_dir_delete_on_exit: bool | None = None,
1143
+ other_args: list[str] | None = None,
1144
+ ) -> Generator[Mount, None, None]:
1145
+ """Like mount, but can be used in a context manager."""
1146
+ error_happened = False
1147
+ mount: Mount = self.mount(
1148
+ src,
1149
+ outdir,
1150
+ allow_writes=allow_writes,
1151
+ use_links=use_links,
1152
+ vfs_cache_mode=vfs_cache_mode,
1153
+ verbose=verbose,
1154
+ cache_dir=cache_dir,
1155
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1156
+ log=log,
1157
+ other_args=other_args,
1158
+ )
1159
+ try:
1160
+ yield mount
1161
+ except Exception as e:
1162
+ error_happened = True
1163
+ stack_trace = traceback.format_exc()
1164
+ warnings.warn(f"Error in scoped_mount: {e}\n\nStack Trace:\n{stack_trace}")
1165
+ raise
1166
+ finally:
1167
+ if not error_happened or (not allow_writes):
1168
+ mount.close()
1169
+
1170
+ # Settings optimized for s3.
1171
+ def mount_s3(
1172
+ self,
1173
+ url: str,
1174
+ outdir: Path,
1175
+ allow_writes=False,
1176
+ vfs_cache_mode="full",
1177
+ dir_cache_time: str | None = "1h",
1178
+ attribute_timeout: str | None = "1h",
1179
+ vfs_disk_space_total_size: str | None = "100M",
1180
+ transfers: int | None = 128,
1181
+ modtime_strategy: (
1182
+ ModTimeStrategy | None
1183
+ ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1184
+ vfs_read_chunk_streams: int | None = 16,
1185
+ vfs_read_chunk_size: str | None = "4M",
1186
+ vfs_fast_fingerprint: bool = True,
1187
+ # vfs-refresh
1188
+ vfs_refresh: bool = True,
1189
+ other_args: list[str] | None = None,
1190
+ ) -> Mount:
1191
+ """Mount a remote or directory to a local path.
1192
+
1193
+ Args:
1194
+ src: Remote or directory to mount
1195
+ outdir: Local path to mount to
1196
+ """
1197
+ other_args = other_args or []
1198
+ if modtime_strategy is not None:
1199
+ other_args.append(f"--{modtime_strategy.value}")
1200
+ if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1201
+ transfers is not None and "--transfers" not in other_args
1202
+ ):
1203
+ other_args.append("--transfers")
1204
+ other_args.append(str(transfers))
1205
+ if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1206
+ other_args.append("--dir-cache-time")
1207
+ other_args.append(dir_cache_time)
1208
+ if (
1209
+ vfs_disk_space_total_size is not None
1210
+ and "--vfs-cache-max-size" not in other_args
1211
+ ):
1212
+ other_args.append("--vfs-cache-max-size")
1213
+ other_args.append(vfs_disk_space_total_size)
1214
+ if vfs_refresh and "--vfs-refresh" not in other_args:
1215
+ other_args.append("--vfs-refresh")
1216
+ if attribute_timeout is not None and "--attr-timeout" not in other_args:
1217
+ other_args.append("--attr-timeout")
1218
+ other_args.append(attribute_timeout)
1219
+ if vfs_read_chunk_streams:
1220
+ other_args.append("--vfs-read-chunk-streams")
1221
+ other_args.append(str(vfs_read_chunk_streams))
1222
+ if vfs_read_chunk_size:
1223
+ other_args.append("--vfs-read-chunk-size")
1224
+ other_args.append(vfs_read_chunk_size)
1225
+ if vfs_fast_fingerprint:
1226
+ other_args.append("--vfs-fast-fingerprint")
1227
+
1228
+ other_args = other_args if other_args else None
1229
+ return self.mount(
1230
+ url,
1231
+ outdir,
1232
+ allow_writes=allow_writes,
1233
+ vfs_cache_mode=vfs_cache_mode,
1234
+ other_args=other_args,
1235
+ )
1236
+
1237
+ def serve_webdav(
1238
+ self,
1239
+ src: Remote | Dir | str,
1240
+ user: str,
1241
+ password: str,
1242
+ addr: str = "localhost:2049",
1243
+ allow_other: bool = False,
1244
+ other_args: list[str] | None = None,
1245
+ ) -> Process:
1246
+ """Serve a remote or directory via NFS.
1247
+
1248
+ Args:
1249
+ src: Remote or directory to serve
1250
+ addr: Network address and port to serve on (default: localhost:2049)
1251
+ allow_other: Allow other users to access the share
1252
+
1253
+ Returns:
1254
+ Process: The running NFS server process
1255
+
1256
+ Raises:
1257
+ ValueError: If the NFS server fails to start
1258
+ """
1259
+ src_str = convert_to_str(src)
1260
+ cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1261
+ cmd_list.extend(["--user", user, "--pass", password])
1262
+ if allow_other:
1263
+ cmd_list.append("--allow-other")
1264
+ if other_args:
1265
+ cmd_list += other_args
1266
+ proc = self._launch_process(cmd_list)
1267
+ time.sleep(2) # give it a moment to start
1268
+ if proc.poll() is not None:
1269
+ raise ValueError("NFS serve process failed to start")
1270
+ return proc
1271
+
1272
+ def size_files(
1273
+ self,
1274
+ src: str,
1275
+ files: list[str],
1276
+ fast_list: bool = False, # Recommend that this is False
1277
+ other_args: list[str] | None = None,
1278
+ check: bool | None = False,
1279
+ verbose: bool | None = None,
1280
+ ) -> SizeResult:
1281
+ """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1282
+ verbose = get_verbose(verbose)
1283
+ check = get_check(check)
1284
+ if fast_list or (other_args and "--fast-list" in other_args):
1285
+ warnings.warn(
1286
+ "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1287
+ )
1288
+ files = list(files)
1289
+ all_files: list[File] = []
1290
+ # prefix, files = group_under_one_prefix(src, files)
1291
+ cmd = ["lsjson", src, "--files-only", "-R"]
1292
+ with TemporaryDirectory() as tmpdir:
1293
+ # print("files: " + ",".join(files))
1294
+ include_files_txt = Path(tmpdir) / "include_files.txt"
1295
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
1296
+ cmd += ["--files-from", str(include_files_txt)]
1297
+ if fast_list:
1298
+ cmd.append("--fast-list")
1299
+ if other_args:
1300
+ cmd += other_args
1301
+ cp = self._run(cmd, check=check)
1302
+
1303
+ if cp.returncode != 0:
1304
+ if check:
1305
+ raise ValueError(f"Error getting file sizes: {cp.stderr}")
1306
+ else:
1307
+ warnings.warn(f"Error getting file sizes: {cp.stderr}")
1308
+ stdout = cp.stdout
1309
+ pieces = src.split(":", 1)
1310
+ remote_name = pieces[0]
1311
+ parent_path: str | None
1312
+ if len(pieces) > 1:
1313
+ parent_path = pieces[1]
1314
+ else:
1315
+ parent_path = None
1316
+ remote = Remote(name=remote_name, rclone=self)
1317
+ paths: list[RPath] = RPath.from_json_str(
1318
+ stdout, remote, parent_path=parent_path
1319
+ )
1320
+ # print(paths)
1321
+ all_files += [File(p) for p in paths]
1322
+ file_sizes: dict[str, int] = {}
1323
+ f: File
1324
+ for f in all_files:
1325
+ p = f.to_string(include_remote=True)
1326
+ if p in file_sizes:
1327
+ warnings.warn(f"Duplicate file found: {p}")
1328
+ continue
1329
+ size = f.size
1330
+ if size == 0:
1331
+ warnings.warn(f"File size is 0: {p}")
1332
+ file_sizes[p] = f.size
1333
+ total_size = sum(file_sizes.values())
1334
+ file_sizes_path_corrected: dict[str, int] = {}
1335
+ for path, size in file_sizes.items():
1336
+ # remove the prefix
1337
+ path_path = Path(path)
1338
+ path_str = path_path.relative_to(src).as_posix()
1339
+ file_sizes_path_corrected[path_str] = size
1340
+ out: SizeResult = SizeResult(
1341
+ prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1342
+ )
1343
+ return out