rclone-api 1.3.15__py2.py3-none-any.whl → 1.3.18__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rclone_api/rclone.py CHANGED
@@ -1,1419 +1,1408 @@
1
- """
2
- Unit test file.
3
- """
4
-
5
- import os
6
- import random
7
- import shutil
8
- import subprocess
9
- import time
10
- import traceback
11
- import warnings
12
- from concurrent.futures import Future, ThreadPoolExecutor
13
- from contextlib import contextmanager
14
- from fnmatch import fnmatch
15
- from pathlib import Path
16
- from tempfile import TemporaryDirectory
17
- from typing import Generator
18
-
19
- from rclone_api import Dir
20
- from rclone_api.completed_process import CompletedProcess
21
- from rclone_api.config import Config, Parsed, Section
22
- from rclone_api.convert import convert_to_filestr_list, convert_to_str
23
- from rclone_api.deprecated import deprecated
24
- from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
- from rclone_api.dir_listing import DirListing
26
- from rclone_api.exec import RcloneExec
27
- from rclone_api.file import File, FileItem
28
- from rclone_api.group_files import group_files
29
- from rclone_api.mount import Mount, clean_mount, prepare_mount
30
- from rclone_api.mount_read_chunker import MultiMountFileChunker
31
- from rclone_api.process import Process
32
- from rclone_api.remote import Remote
33
- from rclone_api.rpath import RPath
34
- from rclone_api.s3.types import (
35
- MultiUploadResult,
36
- S3MutliPartUploadConfig,
37
- S3Provider,
38
- S3UploadTarget,
39
- )
40
- from rclone_api.types import (
41
- ListingOption,
42
- ModTimeStrategy,
43
- Order,
44
- SizeResult,
45
- SizeSuffix,
46
- )
47
- from rclone_api.util import (
48
- get_check,
49
- get_rclone_exe,
50
- get_verbose,
51
- to_path,
52
- )
53
- from rclone_api.walk import walk
54
-
55
-
56
- def rclone_verbose(verbose: bool | None) -> bool:
57
- if verbose is not None:
58
- os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
59
- return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
60
-
61
-
62
- def _to_rclone_conf(config: Config | Path) -> Config:
63
- if isinstance(config, Path):
64
- content = config.read_text(encoding="utf-8")
65
- return Config(content)
66
- else:
67
- return config
68
-
69
-
70
- # class closing(AbstractContextManager):
71
- # """Context to automatically close something at the end of a block.
72
-
73
- # Code like this:
74
-
75
- # with closing(<module>.open(<arguments>)) as f:
76
- # <block>
77
-
78
- # is equivalent to this:
79
-
80
- # f = <module>.open(<arguments>)
81
- # try:
82
- # <block>
83
- # finally:
84
- # f.close()
85
-
86
- # """
87
- # def __init__(self, thing):
88
- # self.thing = thing
89
- # def __enter__(self):
90
- # return self.thing
91
- # def __exit__(self, *exc_info):
92
- # self.thing.close()
93
-
94
-
95
- # Process
96
-
97
-
98
- class FilesStream:
99
-
100
- def __init__(self, path: str, process: Process) -> None:
101
- self.path = path
102
- self.process = process
103
-
104
- def __enter__(self) -> "FilesStream":
105
- self.process.__enter__()
106
- return self
107
-
108
- def __exit__(self, *exc_info):
109
- self.process.__exit__(*exc_info)
110
-
111
- def files(self) -> Generator[FileItem, None, None]:
112
- for line in self.process.stdout:
113
- linestr = line.decode("utf-8").strip()
114
- if linestr.startswith("["):
115
- continue
116
- if linestr.endswith(","):
117
- linestr = linestr[:-1]
118
- if linestr.endswith("]"):
119
- continue
120
- fileitem: FileItem | None = FileItem.from_json_str(self.path, linestr)
121
- if fileitem is None:
122
- continue
123
- yield fileitem
124
-
125
- def files_paged(
126
- self, page_size: int = 1000
127
- ) -> Generator[list[FileItem], None, None]:
128
- page: list[FileItem] = []
129
- for fileitem in self.files():
130
- page.append(fileitem)
131
- if len(page) >= page_size:
132
- yield page
133
- page = []
134
- if len(page) > 0:
135
- yield page
136
-
137
- def __iter__(self) -> Generator[FileItem, None, None]:
138
- return self.files()
139
-
140
-
141
- class Rclone:
142
- def __init__(
143
- self, rclone_conf: Path | Config, rclone_exe: Path | None = None
144
- ) -> None:
145
- if isinstance(rclone_conf, Path):
146
- if not rclone_conf.exists():
147
- raise ValueError(f"Rclone config file not found: {rclone_conf}")
148
- self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
149
- self.config: Config = _to_rclone_conf(rclone_conf)
150
-
151
- def _run(
152
- self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
153
- ) -> subprocess.CompletedProcess:
154
- return self._exec.execute(cmd, check=check, capture=capture)
155
-
156
- def _launch_process(
157
- self, cmd: list[str], capture: bool | None = None, log: Path | None = None
158
- ) -> Process:
159
- return self._exec.launch_process(cmd, capture=capture, log=log)
160
-
161
- def _get_tmp_mount_dir(self) -> Path:
162
- return Path("tmp_mnts")
163
-
164
- def _get_cache_dir(self) -> Path:
165
- return Path("cache")
166
-
167
- def webgui(self, other_args: list[str] | None = None) -> Process:
168
- """Launch the Rclone web GUI."""
169
- cmd = ["rcd", "--rc-web-gui"]
170
- if other_args:
171
- cmd += other_args
172
- return self._launch_process(cmd, capture=False)
173
-
174
- def launch_server(
175
- self,
176
- addr: str,
177
- user: str | None = None,
178
- password: str | None = None,
179
- other_args: list[str] | None = None,
180
- ) -> Process:
181
- """Launch the Rclone server so it can receive commands"""
182
- cmd = ["rcd"]
183
- if addr is not None:
184
- cmd += ["--rc-addr", addr]
185
- if user is not None:
186
- cmd += ["--rc-user", user]
187
- if password is not None:
188
- cmd += ["--rc-pass", password]
189
- if other_args:
190
- cmd += other_args
191
- out = self._launch_process(cmd, capture=False)
192
- time.sleep(1) # Give it some time to launch
193
- return out
194
-
195
- def remote_control(
196
- self,
197
- addr: str,
198
- user: str | None = None,
199
- password: str | None = None,
200
- capture: bool | None = None,
201
- other_args: list[str] | None = None,
202
- ) -> CompletedProcess:
203
- cmd = ["rc"]
204
- if addr:
205
- cmd += ["--rc-addr", addr]
206
- if user is not None:
207
- cmd += ["--rc-user", user]
208
- if password is not None:
209
- cmd += ["--rc-pass", password]
210
- if other_args:
211
- cmd += other_args
212
- cp = self._run(cmd, capture=capture)
213
- return CompletedProcess.from_subprocess(cp)
214
-
215
- def obscure(self, password: str) -> str:
216
- """Obscure a password for use in rclone config files."""
217
- cmd_list: list[str] = ["obscure", password]
218
- cp = self._run(cmd_list)
219
- return cp.stdout.strip()
220
-
221
- def ls_stream(
222
- self,
223
- path: str,
224
- max_depth: int = -1,
225
- fast_list: bool = False,
226
- ) -> FilesStream:
227
- """List files in the given path"""
228
- cmd = ["lsjson", path, "--files-only"]
229
- recurse = max_depth < 0 or max_depth > 1
230
- if recurse:
231
- cmd.append("-R")
232
- if max_depth > 1:
233
- cmd += ["--max-depth", str(max_depth)]
234
- if fast_list:
235
- cmd.append("--fast-list")
236
- streamer = FilesStream(path, self._launch_process(cmd, capture=True))
237
- return streamer
238
-
239
- def save_to_db(
240
- self,
241
- src: str,
242
- db_url: str,
243
- max_depth: int = -1,
244
- fast_list: bool = False,
245
- ) -> None:
246
- """Save files to a database (sqlite, mysql, postgres)"""
247
- from rclone_api.db import DB
248
-
249
- db = DB(db_url)
250
- with self.ls_stream(src, max_depth, fast_list) as stream:
251
- for page in stream.files_paged(page_size=10000):
252
- db.add_files(page)
253
-
254
- def ls(
255
- self,
256
- path: Dir | Remote | str,
257
- max_depth: int | None = None,
258
- glob: str | None = None,
259
- order: Order = Order.NORMAL,
260
- listing_option: ListingOption = ListingOption.ALL,
261
- ) -> DirListing:
262
- """List files in the given path.
263
-
264
- Args:
265
- path: Remote path or Remote object to list
266
- max_depth: Maximum recursion depth (0 means no recursion)
267
-
268
- Returns:
269
- List of File objects found at the path
270
- """
271
-
272
- if isinstance(path, str):
273
- path = Dir(
274
- to_path(path, self)
275
- ) # assume it's a directory if ls is being called.
276
-
277
- cmd = ["lsjson"]
278
- if max_depth is not None:
279
- if max_depth < 0:
280
- cmd.append("--recursive")
281
- if max_depth > 0:
282
- cmd.append("--max-depth")
283
- cmd.append(str(max_depth))
284
- if listing_option != ListingOption.ALL:
285
- cmd.append(f"--{listing_option.value}")
286
-
287
- cmd.append(str(path))
288
- remote = path.remote if isinstance(path, Dir) else path
289
- assert isinstance(remote, Remote)
290
-
291
- cp = self._run(cmd, check=True)
292
- text = cp.stdout
293
- parent_path: str | None = None
294
- if isinstance(path, Dir):
295
- parent_path = path.path.path
296
- paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
297
- # print(parent_path)
298
- for o in paths:
299
- o.set_rclone(self)
300
-
301
- # do we have a glob pattern?
302
- if glob is not None:
303
- paths = [p for p in paths if fnmatch(p.path, glob)]
304
-
305
- if order == Order.REVERSE:
306
- paths.reverse()
307
- elif order == Order.RANDOM:
308
- random.shuffle(paths)
309
- return DirListing(paths)
310
-
311
- def listremotes(self) -> list[Remote]:
312
- cmd = ["listremotes"]
313
- cp = self._run(cmd)
314
- text: str = cp.stdout
315
- tmp = text.splitlines()
316
- tmp = [t.strip() for t in tmp]
317
- # strip out ":" from the end
318
- tmp = [t.replace(":", "") for t in tmp]
319
- out = [Remote(name=t, rclone=self) for t in tmp]
320
- return out
321
-
322
- def diff(
323
- self,
324
- src: str,
325
- dst: str,
326
- min_size: (
327
- str | None
328
- ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
329
- max_size: (
330
- str | None
331
- ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
332
- diff_option: DiffOption = DiffOption.COMBINED,
333
- fast_list: bool = True,
334
- size_only: bool | None = None,
335
- checkers: int | None = None,
336
- other_args: list[str] | None = None,
337
- ) -> Generator[DiffItem, None, None]:
338
- """Be extra careful with the src and dst values. If you are off by one
339
- parent directory, you will get a huge amount of false diffs."""
340
- other_args = other_args or []
341
- if checkers is None or checkers < 1:
342
- checkers = 1000
343
- cmd = [
344
- "check",
345
- src,
346
- dst,
347
- "--checkers",
348
- str(checkers),
349
- "--log-level",
350
- "INFO",
351
- f"--{diff_option.value}",
352
- "-",
353
- ]
354
- if size_only is None:
355
- size_only = diff_option in [
356
- DiffOption.MISSING_ON_DST,
357
- DiffOption.MISSING_ON_SRC,
358
- ]
359
- if size_only:
360
- cmd += ["--size-only"]
361
- if fast_list:
362
- cmd += ["--fast-list"]
363
- if min_size:
364
- cmd += ["--min-size", min_size]
365
- if max_size:
366
- cmd += ["--max-size", max_size]
367
- if diff_option == DiffOption.MISSING_ON_DST:
368
- cmd += ["--one-way"]
369
- if other_args:
370
- cmd += other_args
371
- proc = self._launch_process(cmd, capture=True)
372
- item: DiffItem
373
- for item in diff_stream_from_running_process(
374
- running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
375
- ):
376
- if item is None:
377
- break
378
- yield item
379
-
380
- def walk(
381
- self,
382
- path: Dir | Remote | str,
383
- max_depth: int = -1,
384
- breadth_first: bool = True,
385
- order: Order = Order.NORMAL,
386
- ) -> Generator[DirListing, None, None]:
387
- """Walk through the given path recursively.
388
-
389
- Args:
390
- path: Remote path or Remote object to walk through
391
- max_depth: Maximum depth to traverse (-1 for unlimited)
392
-
393
- Yields:
394
- DirListing: Directory listing for each directory encountered
395
- """
396
- dir_obj: Dir
397
- if isinstance(path, Dir):
398
- # Create a Remote object for the path
399
- remote = path.remote
400
- rpath = RPath(
401
- remote=remote,
402
- path=path.path.path,
403
- name=path.path.name,
404
- size=0,
405
- mime_type="inode/directory",
406
- mod_time="",
407
- is_dir=True,
408
- )
409
- rpath.set_rclone(self)
410
- dir_obj = Dir(rpath)
411
- elif isinstance(path, str):
412
- dir_obj = Dir(to_path(path, self))
413
- elif isinstance(path, Remote):
414
- dir_obj = Dir(path)
415
- else:
416
- dir_obj = Dir(path) # shut up pyright
417
- assert f"Invalid type for path: {type(path)}"
418
-
419
- yield from walk(
420
- dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
421
- )
422
-
423
- def scan_missing_folders(
424
- self,
425
- src: Dir | Remote | str,
426
- dst: Dir | Remote | str,
427
- max_depth: int = -1,
428
- order: Order = Order.NORMAL,
429
- ) -> Generator[Dir, None, None]:
430
- """Walk through the given path recursively.
431
-
432
- WORK IN PROGRESS!!
433
-
434
- Args:
435
- src: Source directory or Remote to walk through
436
- dst: Destination directory or Remote to walk through
437
- max_depth: Maximum depth to traverse (-1 for unlimited)
438
-
439
- Yields:
440
- DirListing: Directory listing for each directory encountered
441
- """
442
- from rclone_api.scan_missing_folders import scan_missing_folders
443
-
444
- src_dir = Dir(to_path(src, self))
445
- dst_dir = Dir(to_path(dst, self))
446
- yield from scan_missing_folders(
447
- src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
448
- )
449
-
450
- def cleanup(
451
- self, path: str, other_args: list[str] | None = None
452
- ) -> CompletedProcess:
453
- """Cleanup any resources used by the Rclone instance."""
454
- # rclone cleanup remote:path [flags]
455
- cmd = ["cleanup", path]
456
- if other_args:
457
- cmd += other_args
458
- out = self._run(cmd)
459
- return CompletedProcess.from_subprocess(out)
460
-
461
- def copy_to(
462
- self,
463
- src: File | str,
464
- dst: File | str,
465
- check: bool | None = None,
466
- verbose: bool | None = None,
467
- other_args: list[str] | None = None,
468
- ) -> CompletedProcess:
469
- """Copy one file from source to destination.
470
-
471
- Warning - slow.
472
-
473
- """
474
- check = get_check(check)
475
- verbose = get_verbose(verbose)
476
- src = src if isinstance(src, str) else str(src.path)
477
- dst = dst if isinstance(dst, str) else str(dst.path)
478
- cmd_list: list[str] = ["copyto", src, dst]
479
- if other_args is not None:
480
- cmd_list += other_args
481
- cp = self._run(cmd_list, check=check)
482
- return CompletedProcess.from_subprocess(cp)
483
-
484
- def copy_files(
485
- self,
486
- src: str,
487
- dst: str,
488
- files: list[str] | Path,
489
- check: bool | None = None,
490
- max_backlog: int | None = None,
491
- verbose: bool | None = None,
492
- checkers: int | None = None,
493
- transfers: int | None = None,
494
- low_level_retries: int | None = None,
495
- retries: int | None = None,
496
- retries_sleep: str | None = None,
497
- metadata: bool | None = None,
498
- timeout: str | None = None,
499
- max_partition_workers: int | None = None,
500
- multi_thread_streams: int | None = None,
501
- other_args: list[str] | None = None,
502
- ) -> list[CompletedProcess]:
503
- """Copy multiple files from source to destination.
504
-
505
- Args:
506
- payload: Dictionary of source and destination file paths
507
- """
508
- check = get_check(check)
509
- max_partition_workers = max_partition_workers or 1
510
- low_level_retries = low_level_retries or 10
511
- retries = retries or 3
512
- other_args = other_args or []
513
- checkers = checkers or 1000
514
- transfers = transfers or 32
515
- verbose = get_verbose(verbose)
516
- payload: list[str] = (
517
- files
518
- if isinstance(files, list)
519
- else [f.strip() for f in files.read_text().splitlines() if f.strip()]
520
- )
521
- if len(payload) == 0:
522
- return []
523
-
524
- for p in payload:
525
- if ":" in p:
526
- raise ValueError(
527
- f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
528
- )
529
-
530
- using_fast_list = "--fast-list" in other_args
531
- if using_fast_list:
532
- warnings.warn(
533
- "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
534
- )
535
-
536
- if max_partition_workers > 1:
537
- datalists: dict[str, list[str]] = group_files(
538
- payload, fully_qualified=False
539
- )
540
- else:
541
- datalists = {"": payload}
542
- # out: subprocess.CompletedProcess | None = None
543
- out: list[CompletedProcess] = []
544
-
545
- futures: list[Future] = []
546
-
547
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
548
- for common_prefix, files in datalists.items():
549
-
550
- def _task(
551
- files: list[str] | Path = files,
552
- ) -> subprocess.CompletedProcess:
553
- with TemporaryDirectory() as tmpdir:
554
- filelist: list[str] = []
555
- filepath: Path
556
- if isinstance(files, list):
557
- include_files_txt = Path(tmpdir) / "include_files.txt"
558
- include_files_txt.write_text(
559
- "\n".join(files), encoding="utf-8"
560
- )
561
- filelist = list(files)
562
- filepath = Path(include_files_txt)
563
- elif isinstance(files, Path):
564
- filelist = [
565
- f.strip()
566
- for f in files.read_text().splitlines()
567
- if f.strip()
568
- ]
569
- filepath = files
570
- if common_prefix:
571
- src_path = f"{src}/{common_prefix}"
572
- dst_path = f"{dst}/{common_prefix}"
573
- else:
574
- src_path = src
575
- dst_path = dst
576
-
577
- if verbose:
578
- nfiles = len(filelist)
579
- files_fqdn = [f" {src_path}/{f}" for f in filelist]
580
- print(f"Copying {nfiles} files:")
581
- chunk_size = 100
582
- for i in range(0, nfiles, chunk_size):
583
- chunk = files_fqdn[i : i + chunk_size]
584
- files_str = "\n".join(chunk)
585
- print(f"{files_str}")
586
- cmd_list: list[str] = [
587
- "copy",
588
- src_path,
589
- dst_path,
590
- "--files-from",
591
- str(filepath),
592
- "--checkers",
593
- str(checkers),
594
- "--transfers",
595
- str(transfers),
596
- "--low-level-retries",
597
- str(low_level_retries),
598
- "--retries",
599
- str(retries),
600
- ]
601
- if metadata:
602
- cmd_list.append("--metadata")
603
- if retries_sleep is not None:
604
- cmd_list += ["--retries-sleep", retries_sleep]
605
- if timeout is not None:
606
- cmd_list += ["--timeout", timeout]
607
- if max_backlog is not None:
608
- cmd_list += ["--max-backlog", str(max_backlog)]
609
- if multi_thread_streams is not None:
610
- cmd_list += [
611
- "--multi-thread-streams",
612
- str(multi_thread_streams),
613
- ]
614
- if verbose:
615
- if not any(["-v" in x for x in other_args]):
616
- cmd_list.append("-vvvv")
617
- if not any(["--progress" in x for x in other_args]):
618
- cmd_list.append("--progress")
619
- if other_args:
620
- cmd_list += other_args
621
- out = self._run(cmd_list, capture=not verbose)
622
- return out
623
-
624
- fut: Future = executor.submit(_task)
625
- futures.append(fut)
626
- for fut in futures:
627
- cp: subprocess.CompletedProcess = fut.result()
628
- assert cp is not None
629
- out.append(CompletedProcess.from_subprocess(cp))
630
- if cp.returncode != 0:
631
- if check:
632
- raise ValueError(f"Error deleting files: {cp.stderr}")
633
- else:
634
- warnings.warn(f"Error deleting files: {cp.stderr}")
635
- return out
636
-
637
- def copy(
638
- self,
639
- src: Dir | str,
640
- dst: Dir | str,
641
- check: bool | None = None,
642
- transfers: int | None = None,
643
- checkers: int | None = None,
644
- multi_thread_streams: int | None = None,
645
- low_level_retries: int | None = None,
646
- retries: int | None = None,
647
- other_args: list[str] | None = None,
648
- ) -> CompletedProcess:
649
- """Copy files from source to destination.
650
-
651
- Args:
652
- src: Source directory
653
- dst: Destination directory
654
- """
655
- # src_dir = src.path.path
656
- # dst_dir = dst.path.path
657
- src_dir = convert_to_str(src)
658
- dst_dir = convert_to_str(dst)
659
- check = get_check(check)
660
- checkers = checkers or 1000
661
- transfers = transfers or 32
662
- low_level_retries = low_level_retries or 10
663
- retries = retries or 3
664
- cmd_list: list[str] = ["copy", src_dir, dst_dir]
665
- cmd_list += ["--checkers", str(checkers)]
666
- cmd_list += ["--transfers", str(transfers)]
667
- cmd_list += ["--low-level-retries", str(low_level_retries)]
668
- if multi_thread_streams is not None:
669
- cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
670
- if other_args:
671
- cmd_list += other_args
672
- cp = self._run(cmd_list, check=check, capture=False)
673
- return CompletedProcess.from_subprocess(cp)
674
-
675
- def purge(self, path: Dir | str) -> CompletedProcess:
676
- """Purge a directory"""
677
- # path should always be a string
678
- path = path if isinstance(path, str) else str(path.path)
679
- cmd_list: list[str] = ["purge", str(path)]
680
- cp = self._run(cmd_list)
681
- return CompletedProcess.from_subprocess(cp)
682
-
683
- def delete_files(
684
- self,
685
- files: str | File | list[str] | list[File],
686
- check: bool | None = None,
687
- rmdirs=False,
688
- verbose: bool | None = None,
689
- max_partition_workers: int | None = None,
690
- other_args: list[str] | None = None,
691
- ) -> CompletedProcess:
692
- """Delete a directory"""
693
- check = get_check(check)
694
- verbose = get_verbose(verbose)
695
- payload: list[str] = convert_to_filestr_list(files)
696
- if len(payload) == 0:
697
- if verbose:
698
- print("No files to delete")
699
- cp = subprocess.CompletedProcess(
700
- args=["rclone", "delete", "--files-from", "[]"],
701
- returncode=0,
702
- stdout="",
703
- stderr="",
704
- )
705
- return CompletedProcess.from_subprocess(cp)
706
-
707
- datalists: dict[str, list[str]] = group_files(payload)
708
- completed_processes: list[subprocess.CompletedProcess] = []
709
-
710
- futures: list[Future] = []
711
-
712
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
713
-
714
- for remote, files in datalists.items():
715
-
716
- def _task(
717
- files=files, check=check, remote=remote
718
- ) -> subprocess.CompletedProcess:
719
- with TemporaryDirectory() as tmpdir:
720
- include_files_txt = Path(tmpdir) / "include_files.txt"
721
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
722
-
723
- # print(include_files_txt)
724
- cmd_list: list[str] = [
725
- "delete",
726
- remote,
727
- "--files-from",
728
- str(include_files_txt),
729
- "--checkers",
730
- "1000",
731
- "--transfers",
732
- "1000",
733
- ]
734
- if verbose:
735
- cmd_list.append("-vvvv")
736
- if rmdirs:
737
- cmd_list.append("--rmdirs")
738
- if other_args:
739
- cmd_list += other_args
740
- out = self._run(cmd_list, check=check)
741
- if out.returncode != 0:
742
- if check:
743
- completed_processes.append(out)
744
- raise ValueError(f"Error deleting files: {out}")
745
- else:
746
- warnings.warn(f"Error deleting files: {out}")
747
- return out
748
-
749
- fut: Future = executor.submit(_task)
750
- futures.append(fut)
751
-
752
- for fut in futures:
753
- out = fut.result()
754
- assert out is not None
755
- completed_processes.append(out)
756
-
757
- return CompletedProcess(completed_processes)
758
-
759
- @deprecated("delete_files")
760
- def deletefiles(
761
- self, files: str | File | list[str] | list[File]
762
- ) -> CompletedProcess:
763
- out = self.delete_files(files)
764
- return out
765
-
766
- def exists(self, path: Dir | Remote | str | File) -> bool:
767
- """Check if a file or directory exists."""
768
- arg: str = convert_to_str(path)
769
- assert isinstance(arg, str)
770
- try:
771
- dir_listing = self.ls(arg)
772
- # print(dir_listing)
773
- return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
774
- except subprocess.CalledProcessError:
775
- return False
776
-
777
- def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
778
- """Check if two directories are in sync."""
779
- src = convert_to_str(src)
780
- dst = convert_to_str(dst)
781
- cmd_list: list[str] = ["check", str(src), str(dst)]
782
- try:
783
- self._run(cmd_list, check=True)
784
- return True
785
- except subprocess.CalledProcessError:
786
- return False
787
-
788
- def copy_file_resumable_s3(
789
- self,
790
- src: str,
791
- dst: str,
792
- save_state_json: Path,
793
- chunk_size: SizeSuffix | None = None,
794
- read_threads: int = 8,
795
- write_threads: int = 8,
796
- retries: int = 3,
797
- verbose: bool | None = None,
798
- max_chunks_before_suspension: int | None = None,
799
- mount_log: Path | None = None,
800
- ) -> MultiUploadResult:
801
- """For massive files that rclone can't handle in one go, this function will copy the file in chunks to an S3 store"""
802
- from rclone_api.s3.api import S3Client
803
- from rclone_api.s3.create import S3Credentials
804
- from rclone_api.util import S3PathInfo, split_s3_path
805
-
806
- other_args: list[str] = ["--no-modtime", "--vfs-read-wait", "1s"]
807
- chunk_size = chunk_size or SizeSuffix("64M")
808
- unit_chunk_size = chunk_size / read_threads
809
- tmp_mount_dir = self._get_tmp_mount_dir()
810
- vfs_read_chunk_size = unit_chunk_size
811
- vfs_read_chunk_size_limit = chunk_size
812
- vfs_read_chunk_streams = read_threads
813
- vfs_disk_space_total_size = chunk_size
814
- assert (
815
- chunk_size.as_int() % vfs_read_chunk_size.as_int() == 0
816
- ), f"chunk_size {chunk_size} must be a multiple of vfs_read_chunk_size {vfs_read_chunk_size}"
817
- other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
818
- other_args += [
819
- "--vfs-read-chunk-size-limit",
820
- vfs_read_chunk_size_limit.as_str(),
821
- ]
822
- other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
823
- other_args += [
824
- "--vfs-disk-space-total-size",
825
- vfs_disk_space_total_size.as_str(),
826
- ]
827
- other_args += ["--read-only"]
828
- other_args += ["--direct-io"]
829
- # --vfs-cache-max-size
830
- other_args += ["--vfs-cache-max-size", vfs_disk_space_total_size.as_str()]
831
- mount_path = tmp_mount_dir / "RCLONE_API_DYNAMIC_MOUNT"
832
- src_path = Path(src)
833
- name = src_path.name
834
-
835
- src_parent_path = Path(src).parent.as_posix()
836
- size_result: SizeResult = self.size_files(src_parent_path, [name])
837
-
838
- target_size = SizeSuffix(size_result.total_size)
839
- if target_size < SizeSuffix("5M"):
840
- # fallback to normal copy
841
- completed_proc = self.copy_to(src, dst, check=True)
842
- if completed_proc.ok:
843
- return MultiUploadResult.UPLOADED_FRESH
844
- if size_result.total_size <= 0:
845
- raise ValueError(
846
- f"File {src} has size {size_result.total_size}, is this a directory?"
847
- )
848
-
849
- path_info: S3PathInfo = split_s3_path(dst)
850
- remote = path_info.remote
851
- bucket_name = path_info.bucket
852
- s3_key = path_info.key
853
- parsed: Parsed = self.config.parse()
854
- sections: dict[str, Section] = parsed.sections
855
- if remote not in sections:
856
- raise ValueError(
857
- f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
858
- )
859
-
860
- section: Section = sections[remote]
861
- dst_type = section.type()
862
- if dst_type != "s3" and dst_type != "b2":
863
- raise ValueError(
864
- f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
865
- )
866
-
867
- def get_provider_str(section=section) -> str | None:
868
- type: str = section.type()
869
- provider: str | None = section.provider()
870
- if provider is not None:
871
- return provider
872
- if type == "b2":
873
- return S3Provider.BACKBLAZE.value
874
- if type != "s3":
875
- raise ValueError(f"Remote {remote} is not an S3 remote")
876
- return S3Provider.S3.value
877
-
878
- provider: str
879
- if provided_provider_str := get_provider_str():
880
- if verbose:
881
- print(f"Using provided provider: {provided_provider_str}")
882
- provider = provided_provider_str
883
- else:
884
- if verbose:
885
- print(f"Using default provider: {S3Provider.S3.value}")
886
- provider = S3Provider.S3.value
887
- provider_enum = S3Provider.from_str(provider)
888
-
889
- s3_creds: S3Credentials = S3Credentials(
890
- provider=provider_enum,
891
- access_key_id=section.access_key_id(),
892
- secret_access_key=section.secret_access_key(),
893
- endpoint_url=section.endpoint(),
894
- )
895
-
896
- chunk_fetcher: MultiMountFileChunker = self.get_multi_mount_file_chunker(
897
- src=src_path.as_posix(),
898
- chunk_size=chunk_size,
899
- threads=read_threads,
900
- mount_log=mount_log,
901
- direct_io=True,
902
- )
903
-
904
- client = S3Client(s3_creds)
905
- upload_config: S3MutliPartUploadConfig = S3MutliPartUploadConfig(
906
- chunk_size=chunk_size.as_int(),
907
- chunk_fetcher=chunk_fetcher.fetch,
908
- max_write_threads=write_threads,
909
- retries=retries,
910
- resume_path_json=save_state_json,
911
- max_chunks_before_suspension=max_chunks_before_suspension,
912
- )
913
-
914
- src_file = mount_path / name
915
-
916
- print(f"Uploading {name} to {s3_key} in bucket {bucket_name}")
917
- print(f"Source: {src_path}")
918
- print(f"bucket_name: {bucket_name}")
919
- print(f"upload_config: {upload_config}")
920
-
921
- # get the file size
922
-
923
- upload_target = S3UploadTarget(
924
- src_file=src_file,
925
- src_file_size=size_result.total_size,
926
- bucket_name=bucket_name,
927
- s3_key=s3_key,
928
- )
929
-
930
- try:
931
- out: MultiUploadResult = client.upload_file_multipart(
932
- upload_target=upload_target,
933
- upload_config=upload_config,
934
- )
935
- return out
936
- except Exception as e:
937
- print(f"Error uploading file: {e}")
938
- traceback.print_exc()
939
- raise
940
- finally:
941
- chunk_fetcher.shutdown()
942
-
943
- def get_multi_mount_file_chunker(
944
- self,
945
- src: str,
946
- chunk_size: SizeSuffix,
947
- threads: int,
948
- mount_log: Path | None,
949
- direct_io: bool,
950
- ) -> MultiMountFileChunker:
951
- from rclone_api.util import random_str
952
-
953
- mounts: list[Mount] = []
954
- vfs_read_chunk_size = chunk_size
955
- vfs_read_chunk_size_limit = chunk_size
956
- vfs_read_chunk_streams = 0
957
- vfs_disk_space_total_size = chunk_size
958
- other_args: list[str] = []
959
- other_args += ["--no-modtime"]
960
- other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
961
- other_args += [
962
- "--vfs-read-chunk-size-limit",
963
- vfs_read_chunk_size_limit.as_str(),
964
- ]
965
- other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
966
- other_args += [
967
- "--vfs-disk-space-total-size",
968
- vfs_disk_space_total_size.as_str(),
969
- ]
970
- other_args += ["--read-only"]
971
- if direct_io:
972
- other_args += ["--direct-io"]
973
-
974
- base_mount_dir = self._get_tmp_mount_dir()
975
- base_cache_dir = self._get_cache_dir()
976
-
977
- filename = Path(src).name
978
- with ThreadPoolExecutor(max_workers=threads) as executor:
979
- futures: list[Future] = []
980
- try:
981
- for i in range(threads):
982
- tmp_mnts = base_mount_dir / random_str(12)
983
- verbose = mount_log is not None
984
-
985
- src_parent_path = Path(src).parent.as_posix()
986
- cache_dir = base_cache_dir / random_str(12)
987
-
988
- def task(
989
- src_parent_path=src_parent_path,
990
- tmp_mnts=tmp_mnts,
991
- cache_dir=cache_dir,
992
- ):
993
- clean_mount(tmp_mnts, verbose=verbose)
994
- prepare_mount(tmp_mnts, verbose=verbose)
995
- return self.mount(
996
- src=src_parent_path,
997
- outdir=tmp_mnts,
998
- allow_writes=False,
999
- use_links=True,
1000
- vfs_cache_mode="minimal",
1001
- verbose=False,
1002
- cache_dir=cache_dir,
1003
- cache_dir_delete_on_exit=True,
1004
- log=mount_log,
1005
- other_args=other_args,
1006
- )
1007
-
1008
- futures.append(executor.submit(task))
1009
- mount_errors: list[Exception] = []
1010
- for fut in futures:
1011
- try:
1012
- mount = fut.result()
1013
- mounts.append(mount)
1014
- except Exception as er:
1015
- warnings.warn(f"Error mounting: {er}")
1016
- mount_errors.append(er)
1017
- if mount_errors:
1018
- warnings.warn(f"Error mounting: {mount_errors}")
1019
- raise Exception(mount_errors)
1020
- except Exception:
1021
- for mount in mounts:
1022
- mount.close()
1023
- raise
1024
-
1025
- src_path: Path = Path(src)
1026
- src_parent_path = src_path.parent.as_posix()
1027
- name = src_path.name
1028
- size_result: SizeResult = self.size_files(src_parent_path, [name])
1029
- filesize = size_result.total_size
1030
-
1031
- executor = ThreadPoolExecutor(max_workers=threads)
1032
- filechunker: MultiMountFileChunker = MultiMountFileChunker(
1033
- filename=filename,
1034
- filesize=filesize,
1035
- mounts=mounts,
1036
- executor=executor,
1037
- verbose=mount_log is not None,
1038
- )
1039
- return filechunker
1040
-
1041
- def copy_bytes(
1042
- self,
1043
- src: str,
1044
- offset: int | SizeSuffix,
1045
- length: int | SizeSuffix,
1046
- outfile: Path,
1047
- other_args: list[str] | None = None,
1048
- ) -> Exception | None:
1049
- """Copy a slice of bytes from the src file to dst."""
1050
- offset = SizeSuffix(offset).as_int()
1051
- length = SizeSuffix(length).as_int()
1052
- cmd_list: list[str] = [
1053
- "cat",
1054
- "--offset",
1055
- str(offset),
1056
- "--count",
1057
- str(length),
1058
- src,
1059
- ]
1060
- if other_args:
1061
- cmd_list.extend(other_args)
1062
- try:
1063
- cp = self._run(cmd_list, capture=outfile)
1064
- if cp.returncode == 0:
1065
- return None
1066
- return Exception(cp.stderr)
1067
- except subprocess.CalledProcessError as e:
1068
- return e
1069
-
1070
- def copy_bytes_mount(
1071
- self,
1072
- src: str,
1073
- offset: int | SizeSuffix,
1074
- length: int | SizeSuffix,
1075
- chunk_size: SizeSuffix,
1076
- max_threads: int = 1,
1077
- # If outfile is supplied then bytes are written to this file and success returns bytes(0)
1078
- outfile: Path | None = None,
1079
- mount_log: Path | None = None,
1080
- direct_io: bool = True,
1081
- ) -> bytes | Exception:
1082
- """Copy a slice of bytes from the src file to dst. Parallelism is achieved through multiple mounted files."""
1083
- from rclone_api.types import FilePart
1084
-
1085
- offset = SizeSuffix(offset).as_int()
1086
- length = SizeSuffix(length).as_int()
1087
- # determine number of threads from chunk size
1088
- threads = max(1, min(max_threads, length // chunk_size.as_int()))
1089
- # todo - implement max threads.
1090
- filechunker = self.get_multi_mount_file_chunker(
1091
- src=src,
1092
- chunk_size=chunk_size,
1093
- threads=threads,
1094
- mount_log=mount_log,
1095
- direct_io=direct_io,
1096
- )
1097
- try:
1098
- fut = filechunker.fetch(offset, length, extra=None)
1099
- fp: FilePart = fut.result()
1100
- payload = fp.payload
1101
- if isinstance(payload, Exception):
1102
- return payload
1103
- try:
1104
- if outfile is None:
1105
- return payload.read_bytes()
1106
- shutil.move(payload, outfile)
1107
- return bytes(0)
1108
- finally:
1109
- fp.close()
1110
-
1111
- except Exception as e:
1112
- warnings.warn(f"Error copying bytes: {e}")
1113
- return e
1114
- finally:
1115
- try:
1116
- filechunker.shutdown()
1117
- except Exception as e:
1118
- warnings.warn(f"Error closing filechunker: {e}")
1119
-
1120
- def copy_dir(
1121
- self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
1122
- ) -> CompletedProcess:
1123
- """Copy a directory from source to destination."""
1124
- # convert src to str, also dst
1125
- src = convert_to_str(src)
1126
- dst = convert_to_str(dst)
1127
- cmd_list: list[str] = ["copy", src, dst]
1128
- if args is not None:
1129
- cmd_list += args
1130
- cp = self._run(cmd_list)
1131
- return CompletedProcess.from_subprocess(cp)
1132
-
1133
- def copy_remote(
1134
- self, src: Remote, dst: Remote, args: list[str] | None = None
1135
- ) -> CompletedProcess:
1136
- """Copy a remote to another remote."""
1137
- cmd_list: list[str] = ["copy", str(src), str(dst)]
1138
- if args is not None:
1139
- cmd_list += args
1140
- # return self._run(cmd_list)
1141
- cp = self._run(cmd_list)
1142
- return CompletedProcess.from_subprocess(cp)
1143
-
1144
- def mount(
1145
- self,
1146
- src: Remote | Dir | str,
1147
- outdir: Path,
1148
- allow_writes: bool | None = False,
1149
- use_links: bool | None = None,
1150
- vfs_cache_mode: str | None = None,
1151
- verbose: bool | None = None,
1152
- cache_dir: Path | None = None,
1153
- cache_dir_delete_on_exit: bool | None = None,
1154
- log: Path | None = None,
1155
- other_args: list[str] | None = None,
1156
- ) -> Mount:
1157
- """Mount a remote or directory to a local path.
1158
-
1159
- Args:
1160
- src: Remote or directory to mount
1161
- outdir: Local path to mount to
1162
-
1163
- Returns:
1164
- CompletedProcess from the mount command execution
1165
-
1166
- Raises:
1167
- subprocess.CalledProcessError: If the mount operation fails
1168
- """
1169
-
1170
- allow_writes = allow_writes or False
1171
- use_links = use_links or True
1172
- verbose = get_verbose(verbose) or (log is not None)
1173
- vfs_cache_mode = vfs_cache_mode or "full"
1174
- clean_mount(outdir, verbose=verbose)
1175
- prepare_mount(outdir, verbose=verbose)
1176
- debug_fuse = log is not None
1177
- src_str = convert_to_str(src)
1178
- cmd_list: list[str] = ["mount", src_str, str(outdir)]
1179
- if not allow_writes:
1180
- cmd_list.append("--read-only")
1181
- if use_links:
1182
- cmd_list.append("--links")
1183
- if vfs_cache_mode:
1184
- cmd_list.append("--vfs-cache-mode")
1185
- cmd_list.append(vfs_cache_mode)
1186
- if cache_dir:
1187
- cmd_list.append("--cache-dir")
1188
- cmd_list.append(str(cache_dir.absolute()))
1189
- if debug_fuse:
1190
- cmd_list.append("--debug-fuse")
1191
- if verbose:
1192
- cmd_list.append("-vvvv")
1193
- if other_args:
1194
- cmd_list += other_args
1195
- proc = self._launch_process(cmd_list, log=log)
1196
- mount_read_only = not allow_writes
1197
- mount: Mount = Mount(
1198
- src=src_str,
1199
- mount_path=outdir,
1200
- process=proc,
1201
- read_only=mount_read_only,
1202
- cache_dir=cache_dir,
1203
- cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1204
- )
1205
- return mount
1206
-
1207
- @contextmanager
1208
- def scoped_mount(
1209
- self,
1210
- src: Remote | Dir | str,
1211
- outdir: Path,
1212
- allow_writes: bool | None = None,
1213
- use_links: bool | None = None,
1214
- vfs_cache_mode: str | None = None,
1215
- verbose: bool | None = None,
1216
- log: Path | None = None,
1217
- cache_dir: Path | None = None,
1218
- cache_dir_delete_on_exit: bool | None = None,
1219
- other_args: list[str] | None = None,
1220
- ) -> Generator[Mount, None, None]:
1221
- """Like mount, but can be used in a context manager."""
1222
- error_happened = False
1223
- mount: Mount = self.mount(
1224
- src,
1225
- outdir,
1226
- allow_writes=allow_writes,
1227
- use_links=use_links,
1228
- vfs_cache_mode=vfs_cache_mode,
1229
- verbose=verbose,
1230
- cache_dir=cache_dir,
1231
- cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1232
- log=log,
1233
- other_args=other_args,
1234
- )
1235
- try:
1236
- yield mount
1237
- except Exception as e:
1238
- error_happened = True
1239
- stack_trace = traceback.format_exc()
1240
- warnings.warn(f"Error in scoped_mount: {e}\n\nStack Trace:\n{stack_trace}")
1241
- raise
1242
- finally:
1243
- if not error_happened or (not allow_writes):
1244
- mount.close()
1245
-
1246
- # Settings optimized for s3.
1247
- def mount_s3(
1248
- self,
1249
- url: str,
1250
- outdir: Path,
1251
- allow_writes=False,
1252
- vfs_cache_mode="full",
1253
- dir_cache_time: str | None = "1h",
1254
- attribute_timeout: str | None = "1h",
1255
- vfs_disk_space_total_size: str | None = "100M",
1256
- transfers: int | None = 128,
1257
- modtime_strategy: (
1258
- ModTimeStrategy | None
1259
- ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1260
- vfs_read_chunk_streams: int | None = 16,
1261
- vfs_read_chunk_size: str | None = "4M",
1262
- vfs_fast_fingerprint: bool = True,
1263
- # vfs-refresh
1264
- vfs_refresh: bool = True,
1265
- other_args: list[str] | None = None,
1266
- ) -> Mount:
1267
- """Mount a remote or directory to a local path.
1268
-
1269
- Args:
1270
- src: Remote or directory to mount
1271
- outdir: Local path to mount to
1272
- """
1273
- other_args = other_args or []
1274
- if modtime_strategy is not None:
1275
- other_args.append(f"--{modtime_strategy.value}")
1276
- if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1277
- transfers is not None and "--transfers" not in other_args
1278
- ):
1279
- other_args.append("--transfers")
1280
- other_args.append(str(transfers))
1281
- if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1282
- other_args.append("--dir-cache-time")
1283
- other_args.append(dir_cache_time)
1284
- if (
1285
- vfs_disk_space_total_size is not None
1286
- and "--vfs-cache-max-size" not in other_args
1287
- ):
1288
- other_args.append("--vfs-cache-max-size")
1289
- other_args.append(vfs_disk_space_total_size)
1290
- if vfs_refresh and "--vfs-refresh" not in other_args:
1291
- other_args.append("--vfs-refresh")
1292
- if attribute_timeout is not None and "--attr-timeout" not in other_args:
1293
- other_args.append("--attr-timeout")
1294
- other_args.append(attribute_timeout)
1295
- if vfs_read_chunk_streams:
1296
- other_args.append("--vfs-read-chunk-streams")
1297
- other_args.append(str(vfs_read_chunk_streams))
1298
- if vfs_read_chunk_size:
1299
- other_args.append("--vfs-read-chunk-size")
1300
- other_args.append(vfs_read_chunk_size)
1301
- if vfs_fast_fingerprint:
1302
- other_args.append("--vfs-fast-fingerprint")
1303
-
1304
- other_args = other_args if other_args else None
1305
- return self.mount(
1306
- url,
1307
- outdir,
1308
- allow_writes=allow_writes,
1309
- vfs_cache_mode=vfs_cache_mode,
1310
- other_args=other_args,
1311
- )
1312
-
1313
- def serve_webdav(
1314
- self,
1315
- src: Remote | Dir | str,
1316
- user: str,
1317
- password: str,
1318
- addr: str = "localhost:2049",
1319
- allow_other: bool = False,
1320
- other_args: list[str] | None = None,
1321
- ) -> Process:
1322
- """Serve a remote or directory via NFS.
1323
-
1324
- Args:
1325
- src: Remote or directory to serve
1326
- addr: Network address and port to serve on (default: localhost:2049)
1327
- allow_other: Allow other users to access the share
1328
-
1329
- Returns:
1330
- Process: The running NFS server process
1331
-
1332
- Raises:
1333
- ValueError: If the NFS server fails to start
1334
- """
1335
- src_str = convert_to_str(src)
1336
- cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1337
- cmd_list.extend(["--user", user, "--pass", password])
1338
- if allow_other:
1339
- cmd_list.append("--allow-other")
1340
- if other_args:
1341
- cmd_list += other_args
1342
- proc = self._launch_process(cmd_list)
1343
- time.sleep(2) # give it a moment to start
1344
- if proc.poll() is not None:
1345
- raise ValueError("NFS serve process failed to start")
1346
- return proc
1347
-
1348
- def size_files(
1349
- self,
1350
- src: str,
1351
- files: list[str],
1352
- fast_list: bool = False, # Recommend that this is False
1353
- other_args: list[str] | None = None,
1354
- check: bool | None = False,
1355
- verbose: bool | None = None,
1356
- ) -> SizeResult:
1357
- """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1358
- verbose = get_verbose(verbose)
1359
- check = get_check(check)
1360
- if fast_list or (other_args and "--fast-list" in other_args):
1361
- warnings.warn(
1362
- "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1363
- )
1364
- files = list(files)
1365
- all_files: list[File] = []
1366
- # prefix, files = group_under_one_prefix(src, files)
1367
- cmd = ["lsjson", src, "--files-only", "-R"]
1368
- with TemporaryDirectory() as tmpdir:
1369
- # print("files: " + ",".join(files))
1370
- include_files_txt = Path(tmpdir) / "include_files.txt"
1371
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
1372
- cmd += ["--files-from", str(include_files_txt)]
1373
- if fast_list:
1374
- cmd.append("--fast-list")
1375
- if other_args:
1376
- cmd += other_args
1377
- cp = self._run(cmd, check=check)
1378
-
1379
- if cp.returncode != 0:
1380
- if check:
1381
- raise ValueError(f"Error getting file sizes: {cp.stderr}")
1382
- else:
1383
- warnings.warn(f"Error getting file sizes: {cp.stderr}")
1384
- stdout = cp.stdout
1385
- pieces = src.split(":", 1)
1386
- remote_name = pieces[0]
1387
- parent_path: str | None
1388
- if len(pieces) > 1:
1389
- parent_path = pieces[1]
1390
- else:
1391
- parent_path = None
1392
- remote = Remote(name=remote_name, rclone=self)
1393
- paths: list[RPath] = RPath.from_json_str(
1394
- stdout, remote, parent_path=parent_path
1395
- )
1396
- # print(paths)
1397
- all_files += [File(p) for p in paths]
1398
- file_sizes: dict[str, int] = {}
1399
- f: File
1400
- for f in all_files:
1401
- p = f.to_string(include_remote=True)
1402
- if p in file_sizes:
1403
- warnings.warn(f"Duplicate file found: {p}")
1404
- continue
1405
- size = f.size
1406
- if size == 0:
1407
- warnings.warn(f"File size is 0: {p}")
1408
- file_sizes[p] = f.size
1409
- total_size = sum(file_sizes.values())
1410
- file_sizes_path_corrected: dict[str, int] = {}
1411
- for path, size in file_sizes.items():
1412
- # remove the prefix
1413
- path_path = Path(path)
1414
- path_str = path_path.relative_to(src).as_posix()
1415
- file_sizes_path_corrected[path_str] = size
1416
- out: SizeResult = SizeResult(
1417
- prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1418
- )
1419
- return out
1
+ """
2
+ Unit test file.
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import shutil
8
+ import subprocess
9
+ import time
10
+ import traceback
11
+ import warnings
12
+ from concurrent.futures import Future, ThreadPoolExecutor
13
+ from contextlib import contextmanager
14
+ from fnmatch import fnmatch
15
+ from pathlib import Path
16
+ from tempfile import TemporaryDirectory
17
+ from typing import Generator
18
+
19
+ from rclone_api import Dir
20
+ from rclone_api.completed_process import CompletedProcess
21
+ from rclone_api.config import Config, Parsed, Section
22
+ from rclone_api.convert import convert_to_filestr_list, convert_to_str
23
+ from rclone_api.deprecated import deprecated
24
+ from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
+ from rclone_api.dir_listing import DirListing
26
+ from rclone_api.exec import RcloneExec
27
+ from rclone_api.file import File, FileItem
28
+ from rclone_api.group_files import group_files
29
+ from rclone_api.mount import Mount, clean_mount, prepare_mount
30
+ from rclone_api.mount_read_chunker import MultiMountFileChunker
31
+ from rclone_api.process import Process
32
+ from rclone_api.remote import Remote
33
+ from rclone_api.rpath import RPath
34
+ from rclone_api.s3.types import (
35
+ MultiUploadResult,
36
+ S3MutliPartUploadConfig,
37
+ S3Provider,
38
+ S3UploadTarget,
39
+ )
40
+ from rclone_api.types import (
41
+ ListingOption,
42
+ ModTimeStrategy,
43
+ Order,
44
+ SizeResult,
45
+ SizeSuffix,
46
+ )
47
+ from rclone_api.util import (
48
+ get_check,
49
+ get_rclone_exe,
50
+ get_verbose,
51
+ to_path,
52
+ )
53
+ from rclone_api.walk import walk
54
+
55
+
56
+ def rclone_verbose(verbose: bool | None) -> bool:
57
+ if verbose is not None:
58
+ os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
59
+ return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
60
+
61
+
62
+ def _to_rclone_conf(config: Config | Path) -> Config:
63
+ if isinstance(config, Path):
64
+ content = config.read_text(encoding="utf-8")
65
+ return Config(content)
66
+ else:
67
+ return config
68
+
69
+
70
+ class FilesStream:
71
+
72
+ def __init__(self, path: str, process: Process) -> None:
73
+ self.path = path
74
+ self.process = process
75
+
76
+ def __enter__(self) -> "FilesStream":
77
+ self.process.__enter__()
78
+ return self
79
+
80
+ def __exit__(self, *exc_info):
81
+ self.process.__exit__(*exc_info)
82
+
83
+ def files(self) -> Generator[FileItem, None, None]:
84
+ line: bytes
85
+ for line in self.process.stdout:
86
+ linestr: str = line.decode("utf-8").strip()
87
+ if linestr.startswith("["):
88
+ continue
89
+ if linestr.endswith(","):
90
+ linestr = linestr[:-1]
91
+ if linestr.endswith("]"):
92
+ continue
93
+ fileitem: FileItem | None = FileItem.from_json_str(self.path, linestr)
94
+ if fileitem is None:
95
+ continue
96
+ yield fileitem
97
+
98
+ def files_paged(
99
+ self, page_size: int = 1000
100
+ ) -> Generator[list[FileItem], None, None]:
101
+ page: list[FileItem] = []
102
+ for fileitem in self.files():
103
+ page.append(fileitem)
104
+ if len(page) >= page_size:
105
+ yield page
106
+ page = []
107
+ if len(page) > 0:
108
+ yield page
109
+
110
+ def __iter__(self) -> Generator[FileItem, None, None]:
111
+ return self.files()
112
+
113
+
114
+ class Rclone:
115
+ def __init__(
116
+ self, rclone_conf: Path | Config, rclone_exe: Path | None = None
117
+ ) -> None:
118
+ if isinstance(rclone_conf, Path):
119
+ if not rclone_conf.exists():
120
+ raise ValueError(f"Rclone config file not found: {rclone_conf}")
121
+ self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
122
+ self.config: Config = _to_rclone_conf(rclone_conf)
123
+
124
+ def _run(
125
+ self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
126
+ ) -> subprocess.CompletedProcess:
127
+ return self._exec.execute(cmd, check=check, capture=capture)
128
+
129
+ def _launch_process(
130
+ self, cmd: list[str], capture: bool | None = None, log: Path | None = None
131
+ ) -> Process:
132
+ return self._exec.launch_process(cmd, capture=capture, log=log)
133
+
134
+ def _get_tmp_mount_dir(self) -> Path:
135
+ return Path("tmp_mnts")
136
+
137
+ def _get_cache_dir(self) -> Path:
138
+ return Path("cache")
139
+
140
+ def webgui(self, other_args: list[str] | None = None) -> Process:
141
+ """Launch the Rclone web GUI."""
142
+ cmd = ["rcd", "--rc-web-gui"]
143
+ if other_args:
144
+ cmd += other_args
145
+ return self._launch_process(cmd, capture=False)
146
+
147
+ def launch_server(
148
+ self,
149
+ addr: str,
150
+ user: str | None = None,
151
+ password: str | None = None,
152
+ other_args: list[str] | None = None,
153
+ ) -> Process:
154
+ """Launch the Rclone server so it can receive commands"""
155
+ cmd = ["rcd"]
156
+ if addr is not None:
157
+ cmd += ["--rc-addr", addr]
158
+ if user is not None:
159
+ cmd += ["--rc-user", user]
160
+ if password is not None:
161
+ cmd += ["--rc-pass", password]
162
+ if other_args:
163
+ cmd += other_args
164
+ out = self._launch_process(cmd, capture=False)
165
+ time.sleep(1) # Give it some time to launch
166
+ return out
167
+
168
+ def remote_control(
169
+ self,
170
+ addr: str,
171
+ user: str | None = None,
172
+ password: str | None = None,
173
+ capture: bool | None = None,
174
+ other_args: list[str] | None = None,
175
+ ) -> CompletedProcess:
176
+ cmd = ["rc"]
177
+ if addr:
178
+ cmd += ["--rc-addr", addr]
179
+ if user is not None:
180
+ cmd += ["--rc-user", user]
181
+ if password is not None:
182
+ cmd += ["--rc-pass", password]
183
+ if other_args:
184
+ cmd += other_args
185
+ cp = self._run(cmd, capture=capture)
186
+ return CompletedProcess.from_subprocess(cp)
187
+
188
+ def obscure(self, password: str) -> str:
189
+ """Obscure a password for use in rclone config files."""
190
+ cmd_list: list[str] = ["obscure", password]
191
+ cp = self._run(cmd_list)
192
+ return cp.stdout.strip()
193
+
194
+ def ls_stream(
195
+ self,
196
+ path: str,
197
+ max_depth: int = -1,
198
+ fast_list: bool = False,
199
+ ) -> FilesStream:
200
+ """
201
+ List files in the given path
202
+
203
+ Args:
204
+ src: Remote path to list
205
+ max_depth: Maximum recursion depth (-1 for unlimited)
206
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket, or it's small)
207
+ """
208
+ cmd = ["lsjson", path, "--files-only"]
209
+ recurse = max_depth < 0 or max_depth > 1
210
+ if recurse:
211
+ cmd.append("-R")
212
+ if max_depth > 1:
213
+ cmd += ["--max-depth", str(max_depth)]
214
+ if fast_list:
215
+ cmd.append("--fast-list")
216
+ streamer = FilesStream(path, self._launch_process(cmd, capture=True))
217
+ return streamer
218
+
219
+ def save_to_db(
220
+ self,
221
+ src: str,
222
+ db_url: str,
223
+ max_depth: int = -1,
224
+ fast_list: bool = False,
225
+ ) -> None:
226
+ """
227
+ Save files to a database (sqlite, mysql, postgres)
228
+
229
+ Args:
230
+ src: Remote path to list, this will be used to populate an entire table, so always use the root-most path.
231
+ db_url: Database URL, like sqlite:///data.db or mysql://user:pass@localhost/db or postgres://user:pass@localhost/db
232
+ max_depth: Maximum depth to traverse (-1 for unlimited)
233
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket)
234
+
235
+ """
236
+ from rclone_api.db import DB
237
+
238
+ db = DB(db_url)
239
+ with self.ls_stream(src, max_depth, fast_list) as stream:
240
+ for page in stream.files_paged(page_size=10000):
241
+ db.add_files(page)
242
+
243
+ def ls(
244
+ self,
245
+ path: Dir | Remote | str,
246
+ max_depth: int | None = None,
247
+ glob: str | None = None,
248
+ order: Order = Order.NORMAL,
249
+ listing_option: ListingOption = ListingOption.ALL,
250
+ ) -> DirListing:
251
+ """List files in the given path.
252
+
253
+ Args:
254
+ path: Remote path or Remote object to list
255
+ max_depth: Maximum recursion depth (0 means no recursion)
256
+
257
+ Returns:
258
+ List of File objects found at the path
259
+ """
260
+
261
+ if isinstance(path, str):
262
+ path = Dir(
263
+ to_path(path, self)
264
+ ) # assume it's a directory if ls is being called.
265
+
266
+ cmd = ["lsjson"]
267
+ if max_depth is not None:
268
+ if max_depth < 0:
269
+ cmd.append("--recursive")
270
+ if max_depth > 0:
271
+ cmd.append("--max-depth")
272
+ cmd.append(str(max_depth))
273
+ if listing_option != ListingOption.ALL:
274
+ cmd.append(f"--{listing_option.value}")
275
+
276
+ cmd.append(str(path))
277
+ remote = path.remote if isinstance(path, Dir) else path
278
+ assert isinstance(remote, Remote)
279
+
280
+ cp = self._run(cmd, check=True)
281
+ text = cp.stdout
282
+ parent_path: str | None = None
283
+ if isinstance(path, Dir):
284
+ parent_path = path.path.path
285
+ paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
286
+ # print(parent_path)
287
+ for o in paths:
288
+ o.set_rclone(self)
289
+
290
+ # do we have a glob pattern?
291
+ if glob is not None:
292
+ paths = [p for p in paths if fnmatch(p.path, glob)]
293
+
294
+ if order == Order.REVERSE:
295
+ paths.reverse()
296
+ elif order == Order.RANDOM:
297
+ random.shuffle(paths)
298
+ return DirListing(paths)
299
+
300
+ def listremotes(self) -> list[Remote]:
301
+ cmd = ["listremotes"]
302
+ cp = self._run(cmd)
303
+ text: str = cp.stdout
304
+ tmp = text.splitlines()
305
+ tmp = [t.strip() for t in tmp]
306
+ # strip out ":" from the end
307
+ tmp = [t.replace(":", "") for t in tmp]
308
+ out = [Remote(name=t, rclone=self) for t in tmp]
309
+ return out
310
+
311
+ def diff(
312
+ self,
313
+ src: str,
314
+ dst: str,
315
+ min_size: (
316
+ str | None
317
+ ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
318
+ max_size: (
319
+ str | None
320
+ ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
321
+ diff_option: DiffOption = DiffOption.COMBINED,
322
+ fast_list: bool = True,
323
+ size_only: bool | None = None,
324
+ checkers: int | None = None,
325
+ other_args: list[str] | None = None,
326
+ ) -> Generator[DiffItem, None, None]:
327
+ """Be extra careful with the src and dst values. If you are off by one
328
+ parent directory, you will get a huge amount of false diffs."""
329
+ other_args = other_args or []
330
+ if checkers is None or checkers < 1:
331
+ checkers = 1000
332
+ cmd = [
333
+ "check",
334
+ src,
335
+ dst,
336
+ "--checkers",
337
+ str(checkers),
338
+ "--log-level",
339
+ "INFO",
340
+ f"--{diff_option.value}",
341
+ "-",
342
+ ]
343
+ if size_only is None:
344
+ size_only = diff_option in [
345
+ DiffOption.MISSING_ON_DST,
346
+ DiffOption.MISSING_ON_SRC,
347
+ ]
348
+ if size_only:
349
+ cmd += ["--size-only"]
350
+ if fast_list:
351
+ cmd += ["--fast-list"]
352
+ if min_size:
353
+ cmd += ["--min-size", min_size]
354
+ if max_size:
355
+ cmd += ["--max-size", max_size]
356
+ if diff_option == DiffOption.MISSING_ON_DST:
357
+ cmd += ["--one-way"]
358
+ if other_args:
359
+ cmd += other_args
360
+ proc = self._launch_process(cmd, capture=True)
361
+ item: DiffItem
362
+ for item in diff_stream_from_running_process(
363
+ running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
364
+ ):
365
+ if item is None:
366
+ break
367
+ yield item
368
+
369
+ def walk(
370
+ self,
371
+ path: Dir | Remote | str,
372
+ max_depth: int = -1,
373
+ breadth_first: bool = True,
374
+ order: Order = Order.NORMAL,
375
+ ) -> Generator[DirListing, None, None]:
376
+ """Walk through the given path recursively.
377
+
378
+ Args:
379
+ path: Remote path or Remote object to walk through
380
+ max_depth: Maximum depth to traverse (-1 for unlimited)
381
+
382
+ Yields:
383
+ DirListing: Directory listing for each directory encountered
384
+ """
385
+ dir_obj: Dir
386
+ if isinstance(path, Dir):
387
+ # Create a Remote object for the path
388
+ remote = path.remote
389
+ rpath = RPath(
390
+ remote=remote,
391
+ path=path.path.path,
392
+ name=path.path.name,
393
+ size=0,
394
+ mime_type="inode/directory",
395
+ mod_time="",
396
+ is_dir=True,
397
+ )
398
+ rpath.set_rclone(self)
399
+ dir_obj = Dir(rpath)
400
+ elif isinstance(path, str):
401
+ dir_obj = Dir(to_path(path, self))
402
+ elif isinstance(path, Remote):
403
+ dir_obj = Dir(path)
404
+ else:
405
+ dir_obj = Dir(path) # shut up pyright
406
+ assert f"Invalid type for path: {type(path)}"
407
+
408
+ yield from walk(
409
+ dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
410
+ )
411
+
412
+ def scan_missing_folders(
413
+ self,
414
+ src: Dir | Remote | str,
415
+ dst: Dir | Remote | str,
416
+ max_depth: int = -1,
417
+ order: Order = Order.NORMAL,
418
+ ) -> Generator[Dir, None, None]:
419
+ """Walk through the given path recursively.
420
+
421
+ WORK IN PROGRESS!!
422
+
423
+ Args:
424
+ src: Source directory or Remote to walk through
425
+ dst: Destination directory or Remote to walk through
426
+ max_depth: Maximum depth to traverse (-1 for unlimited)
427
+
428
+ Yields:
429
+ DirListing: Directory listing for each directory encountered
430
+ """
431
+ from rclone_api.scan_missing_folders import scan_missing_folders
432
+
433
+ src_dir = Dir(to_path(src, self))
434
+ dst_dir = Dir(to_path(dst, self))
435
+ yield from scan_missing_folders(
436
+ src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
437
+ )
438
+
439
+ def cleanup(
440
+ self, path: str, other_args: list[str] | None = None
441
+ ) -> CompletedProcess:
442
+ """Cleanup any resources used by the Rclone instance."""
443
+ # rclone cleanup remote:path [flags]
444
+ cmd = ["cleanup", path]
445
+ if other_args:
446
+ cmd += other_args
447
+ out = self._run(cmd)
448
+ return CompletedProcess.from_subprocess(out)
449
+
450
+ def copy_to(
451
+ self,
452
+ src: File | str,
453
+ dst: File | str,
454
+ check: bool | None = None,
455
+ verbose: bool | None = None,
456
+ other_args: list[str] | None = None,
457
+ ) -> CompletedProcess:
458
+ """Copy one file from source to destination.
459
+
460
+ Warning - slow.
461
+
462
+ """
463
+ check = get_check(check)
464
+ verbose = get_verbose(verbose)
465
+ src = src if isinstance(src, str) else str(src.path)
466
+ dst = dst if isinstance(dst, str) else str(dst.path)
467
+ cmd_list: list[str] = ["copyto", src, dst]
468
+ if other_args is not None:
469
+ cmd_list += other_args
470
+ cp = self._run(cmd_list, check=check)
471
+ return CompletedProcess.from_subprocess(cp)
472
+
473
+ def copy_files(
474
+ self,
475
+ src: str,
476
+ dst: str,
477
+ files: list[str] | Path,
478
+ check: bool | None = None,
479
+ max_backlog: int | None = None,
480
+ verbose: bool | None = None,
481
+ checkers: int | None = None,
482
+ transfers: int | None = None,
483
+ low_level_retries: int | None = None,
484
+ retries: int | None = None,
485
+ retries_sleep: str | None = None,
486
+ metadata: bool | None = None,
487
+ timeout: str | None = None,
488
+ max_partition_workers: int | None = None,
489
+ multi_thread_streams: int | None = None,
490
+ other_args: list[str] | None = None,
491
+ ) -> list[CompletedProcess]:
492
+ """Copy multiple files from source to destination.
493
+
494
+ Args:
495
+ payload: Dictionary of source and destination file paths
496
+ """
497
+ check = get_check(check)
498
+ max_partition_workers = max_partition_workers or 1
499
+ low_level_retries = low_level_retries or 10
500
+ retries = retries or 3
501
+ other_args = other_args or []
502
+ checkers = checkers or 1000
503
+ transfers = transfers or 32
504
+ verbose = get_verbose(verbose)
505
+ payload: list[str] = (
506
+ files
507
+ if isinstance(files, list)
508
+ else [f.strip() for f in files.read_text().splitlines() if f.strip()]
509
+ )
510
+ if len(payload) == 0:
511
+ return []
512
+
513
+ for p in payload:
514
+ if ":" in p:
515
+ raise ValueError(
516
+ f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
517
+ )
518
+
519
+ using_fast_list = "--fast-list" in other_args
520
+ if using_fast_list:
521
+ warnings.warn(
522
+ "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
523
+ )
524
+
525
+ if max_partition_workers > 1:
526
+ datalists: dict[str, list[str]] = group_files(
527
+ payload, fully_qualified=False
528
+ )
529
+ else:
530
+ datalists = {"": payload}
531
+ # out: subprocess.CompletedProcess | None = None
532
+ out: list[CompletedProcess] = []
533
+
534
+ futures: list[Future] = []
535
+
536
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
537
+ for common_prefix, files in datalists.items():
538
+
539
+ def _task(
540
+ files: list[str] | Path = files,
541
+ ) -> subprocess.CompletedProcess:
542
+ with TemporaryDirectory() as tmpdir:
543
+ filelist: list[str] = []
544
+ filepath: Path
545
+ if isinstance(files, list):
546
+ include_files_txt = Path(tmpdir) / "include_files.txt"
547
+ include_files_txt.write_text(
548
+ "\n".join(files), encoding="utf-8"
549
+ )
550
+ filelist = list(files)
551
+ filepath = Path(include_files_txt)
552
+ elif isinstance(files, Path):
553
+ filelist = [
554
+ f.strip()
555
+ for f in files.read_text().splitlines()
556
+ if f.strip()
557
+ ]
558
+ filepath = files
559
+ if common_prefix:
560
+ src_path = f"{src}/{common_prefix}"
561
+ dst_path = f"{dst}/{common_prefix}"
562
+ else:
563
+ src_path = src
564
+ dst_path = dst
565
+
566
+ if verbose:
567
+ nfiles = len(filelist)
568
+ files_fqdn = [f" {src_path}/{f}" for f in filelist]
569
+ print(f"Copying {nfiles} files:")
570
+ chunk_size = 100
571
+ for i in range(0, nfiles, chunk_size):
572
+ chunk = files_fqdn[i : i + chunk_size]
573
+ files_str = "\n".join(chunk)
574
+ print(f"{files_str}")
575
+ cmd_list: list[str] = [
576
+ "copy",
577
+ src_path,
578
+ dst_path,
579
+ "--files-from",
580
+ str(filepath),
581
+ "--checkers",
582
+ str(checkers),
583
+ "--transfers",
584
+ str(transfers),
585
+ "--low-level-retries",
586
+ str(low_level_retries),
587
+ "--retries",
588
+ str(retries),
589
+ ]
590
+ if metadata:
591
+ cmd_list.append("--metadata")
592
+ if retries_sleep is not None:
593
+ cmd_list += ["--retries-sleep", retries_sleep]
594
+ if timeout is not None:
595
+ cmd_list += ["--timeout", timeout]
596
+ if max_backlog is not None:
597
+ cmd_list += ["--max-backlog", str(max_backlog)]
598
+ if multi_thread_streams is not None:
599
+ cmd_list += [
600
+ "--multi-thread-streams",
601
+ str(multi_thread_streams),
602
+ ]
603
+ if verbose:
604
+ if not any(["-v" in x for x in other_args]):
605
+ cmd_list.append("-vvvv")
606
+ if not any(["--progress" in x for x in other_args]):
607
+ cmd_list.append("--progress")
608
+ if other_args:
609
+ cmd_list += other_args
610
+ out = self._run(cmd_list, capture=not verbose)
611
+ return out
612
+
613
+ fut: Future = executor.submit(_task)
614
+ futures.append(fut)
615
+ for fut in futures:
616
+ cp: subprocess.CompletedProcess = fut.result()
617
+ assert cp is not None
618
+ out.append(CompletedProcess.from_subprocess(cp))
619
+ if cp.returncode != 0:
620
+ if check:
621
+ raise ValueError(f"Error deleting files: {cp.stderr}")
622
+ else:
623
+ warnings.warn(f"Error deleting files: {cp.stderr}")
624
+ return out
625
+
626
+ def copy(
627
+ self,
628
+ src: Dir | str,
629
+ dst: Dir | str,
630
+ check: bool | None = None,
631
+ transfers: int | None = None,
632
+ checkers: int | None = None,
633
+ multi_thread_streams: int | None = None,
634
+ low_level_retries: int | None = None,
635
+ retries: int | None = None,
636
+ other_args: list[str] | None = None,
637
+ ) -> CompletedProcess:
638
+ """Copy files from source to destination.
639
+
640
+ Args:
641
+ src: Source directory
642
+ dst: Destination directory
643
+ """
644
+ # src_dir = src.path.path
645
+ # dst_dir = dst.path.path
646
+ src_dir = convert_to_str(src)
647
+ dst_dir = convert_to_str(dst)
648
+ check = get_check(check)
649
+ checkers = checkers or 1000
650
+ transfers = transfers or 32
651
+ low_level_retries = low_level_retries or 10
652
+ retries = retries or 3
653
+ cmd_list: list[str] = ["copy", src_dir, dst_dir]
654
+ cmd_list += ["--checkers", str(checkers)]
655
+ cmd_list += ["--transfers", str(transfers)]
656
+ cmd_list += ["--low-level-retries", str(low_level_retries)]
657
+ if multi_thread_streams is not None:
658
+ cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
659
+ if other_args:
660
+ cmd_list += other_args
661
+ cp = self._run(cmd_list, check=check, capture=False)
662
+ return CompletedProcess.from_subprocess(cp)
663
+
664
+ def purge(self, path: Dir | str) -> CompletedProcess:
665
+ """Purge a directory"""
666
+ # path should always be a string
667
+ path = path if isinstance(path, str) else str(path.path)
668
+ cmd_list: list[str] = ["purge", str(path)]
669
+ cp = self._run(cmd_list)
670
+ return CompletedProcess.from_subprocess(cp)
671
+
672
+ def delete_files(
673
+ self,
674
+ files: str | File | list[str] | list[File],
675
+ check: bool | None = None,
676
+ rmdirs=False,
677
+ verbose: bool | None = None,
678
+ max_partition_workers: int | None = None,
679
+ other_args: list[str] | None = None,
680
+ ) -> CompletedProcess:
681
+ """Delete a directory"""
682
+ check = get_check(check)
683
+ verbose = get_verbose(verbose)
684
+ payload: list[str] = convert_to_filestr_list(files)
685
+ if len(payload) == 0:
686
+ if verbose:
687
+ print("No files to delete")
688
+ cp = subprocess.CompletedProcess(
689
+ args=["rclone", "delete", "--files-from", "[]"],
690
+ returncode=0,
691
+ stdout="",
692
+ stderr="",
693
+ )
694
+ return CompletedProcess.from_subprocess(cp)
695
+
696
+ datalists: dict[str, list[str]] = group_files(payload)
697
+ completed_processes: list[subprocess.CompletedProcess] = []
698
+
699
+ futures: list[Future] = []
700
+
701
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
702
+
703
+ for remote, files in datalists.items():
704
+
705
+ def _task(
706
+ files=files, check=check, remote=remote
707
+ ) -> subprocess.CompletedProcess:
708
+ with TemporaryDirectory() as tmpdir:
709
+ include_files_txt = Path(tmpdir) / "include_files.txt"
710
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
711
+
712
+ # print(include_files_txt)
713
+ cmd_list: list[str] = [
714
+ "delete",
715
+ remote,
716
+ "--files-from",
717
+ str(include_files_txt),
718
+ "--checkers",
719
+ "1000",
720
+ "--transfers",
721
+ "1000",
722
+ ]
723
+ if verbose:
724
+ cmd_list.append("-vvvv")
725
+ if rmdirs:
726
+ cmd_list.append("--rmdirs")
727
+ if other_args:
728
+ cmd_list += other_args
729
+ out = self._run(cmd_list, check=check)
730
+ if out.returncode != 0:
731
+ if check:
732
+ completed_processes.append(out)
733
+ raise ValueError(f"Error deleting files: {out}")
734
+ else:
735
+ warnings.warn(f"Error deleting files: {out}")
736
+ return out
737
+
738
+ fut: Future = executor.submit(_task)
739
+ futures.append(fut)
740
+
741
+ for fut in futures:
742
+ out = fut.result()
743
+ assert out is not None
744
+ completed_processes.append(out)
745
+
746
+ return CompletedProcess(completed_processes)
747
+
748
+ @deprecated("delete_files")
749
+ def deletefiles(
750
+ self, files: str | File | list[str] | list[File]
751
+ ) -> CompletedProcess:
752
+ out = self.delete_files(files)
753
+ return out
754
+
755
+ def exists(self, path: Dir | Remote | str | File) -> bool:
756
+ """Check if a file or directory exists."""
757
+ arg: str = convert_to_str(path)
758
+ assert isinstance(arg, str)
759
+ try:
760
+ dir_listing = self.ls(arg)
761
+ # print(dir_listing)
762
+ return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
763
+ except subprocess.CalledProcessError:
764
+ return False
765
+
766
+ def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
767
+ """Check if two directories are in sync."""
768
+ src = convert_to_str(src)
769
+ dst = convert_to_str(dst)
770
+ cmd_list: list[str] = ["check", str(src), str(dst)]
771
+ try:
772
+ self._run(cmd_list, check=True)
773
+ return True
774
+ except subprocess.CalledProcessError:
775
+ return False
776
+
777
+ def copy_file_resumable_s3(
778
+ self,
779
+ src: str,
780
+ dst: str,
781
+ save_state_json: Path,
782
+ chunk_size: SizeSuffix | None = None,
783
+ read_threads: int = 8,
784
+ write_threads: int = 8,
785
+ retries: int = 3,
786
+ verbose: bool | None = None,
787
+ max_chunks_before_suspension: int | None = None,
788
+ mount_log: Path | None = None,
789
+ ) -> MultiUploadResult:
790
+ """For massive files that rclone can't handle in one go, this function will copy the file in chunks to an S3 store"""
791
+ from rclone_api.s3.api import S3Client
792
+ from rclone_api.s3.create import S3Credentials
793
+ from rclone_api.util import S3PathInfo, split_s3_path
794
+
795
+ other_args: list[str] = ["--no-modtime", "--vfs-read-wait", "1s"]
796
+ chunk_size = chunk_size or SizeSuffix("64M")
797
+ unit_chunk_size = chunk_size / read_threads
798
+ tmp_mount_dir = self._get_tmp_mount_dir()
799
+ vfs_read_chunk_size = unit_chunk_size
800
+ vfs_read_chunk_size_limit = chunk_size
801
+ vfs_read_chunk_streams = read_threads
802
+ vfs_disk_space_total_size = chunk_size
803
+ assert (
804
+ chunk_size.as_int() % vfs_read_chunk_size.as_int() == 0
805
+ ), f"chunk_size {chunk_size} must be a multiple of vfs_read_chunk_size {vfs_read_chunk_size}"
806
+ other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
807
+ other_args += [
808
+ "--vfs-read-chunk-size-limit",
809
+ vfs_read_chunk_size_limit.as_str(),
810
+ ]
811
+ other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
812
+ other_args += [
813
+ "--vfs-disk-space-total-size",
814
+ vfs_disk_space_total_size.as_str(),
815
+ ]
816
+ other_args += ["--read-only"]
817
+ other_args += ["--direct-io"]
818
+ # --vfs-cache-max-size
819
+ other_args += ["--vfs-cache-max-size", vfs_disk_space_total_size.as_str()]
820
+ mount_path = tmp_mount_dir / "RCLONE_API_DYNAMIC_MOUNT"
821
+ src_path = Path(src)
822
+ name = src_path.name
823
+
824
+ src_parent_path = Path(src).parent.as_posix()
825
+ size_result: SizeResult = self.size_files(src_parent_path, [name])
826
+
827
+ target_size = SizeSuffix(size_result.total_size)
828
+ if target_size < SizeSuffix("5M"):
829
+ # fallback to normal copy
830
+ completed_proc = self.copy_to(src, dst, check=True)
831
+ if completed_proc.ok:
832
+ return MultiUploadResult.UPLOADED_FRESH
833
+ if size_result.total_size <= 0:
834
+ raise ValueError(
835
+ f"File {src} has size {size_result.total_size}, is this a directory?"
836
+ )
837
+
838
+ path_info: S3PathInfo = split_s3_path(dst)
839
+ remote = path_info.remote
840
+ bucket_name = path_info.bucket
841
+ s3_key = path_info.key
842
+ parsed: Parsed = self.config.parse()
843
+ sections: dict[str, Section] = parsed.sections
844
+ if remote not in sections:
845
+ raise ValueError(
846
+ f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
847
+ )
848
+
849
+ section: Section = sections[remote]
850
+ dst_type = section.type()
851
+ if dst_type != "s3" and dst_type != "b2":
852
+ raise ValueError(
853
+ f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
854
+ )
855
+
856
+ def get_provider_str(section=section) -> str | None:
857
+ type: str = section.type()
858
+ provider: str | None = section.provider()
859
+ if provider is not None:
860
+ return provider
861
+ if type == "b2":
862
+ return S3Provider.BACKBLAZE.value
863
+ if type != "s3":
864
+ raise ValueError(f"Remote {remote} is not an S3 remote")
865
+ return S3Provider.S3.value
866
+
867
+ provider: str
868
+ if provided_provider_str := get_provider_str():
869
+ if verbose:
870
+ print(f"Using provided provider: {provided_provider_str}")
871
+ provider = provided_provider_str
872
+ else:
873
+ if verbose:
874
+ print(f"Using default provider: {S3Provider.S3.value}")
875
+ provider = S3Provider.S3.value
876
+ provider_enum = S3Provider.from_str(provider)
877
+
878
+ s3_creds: S3Credentials = S3Credentials(
879
+ provider=provider_enum,
880
+ access_key_id=section.access_key_id(),
881
+ secret_access_key=section.secret_access_key(),
882
+ endpoint_url=section.endpoint(),
883
+ )
884
+
885
+ chunk_fetcher: MultiMountFileChunker = self.get_multi_mount_file_chunker(
886
+ src=src_path.as_posix(),
887
+ chunk_size=chunk_size,
888
+ threads=read_threads,
889
+ mount_log=mount_log,
890
+ direct_io=True,
891
+ )
892
+
893
+ client = S3Client(s3_creds)
894
+ upload_config: S3MutliPartUploadConfig = S3MutliPartUploadConfig(
895
+ chunk_size=chunk_size.as_int(),
896
+ chunk_fetcher=chunk_fetcher.fetch,
897
+ max_write_threads=write_threads,
898
+ retries=retries,
899
+ resume_path_json=save_state_json,
900
+ max_chunks_before_suspension=max_chunks_before_suspension,
901
+ )
902
+
903
+ src_file = mount_path / name
904
+
905
+ print(f"Uploading {name} to {s3_key} in bucket {bucket_name}")
906
+ print(f"Source: {src_path}")
907
+ print(f"bucket_name: {bucket_name}")
908
+ print(f"upload_config: {upload_config}")
909
+
910
+ # get the file size
911
+
912
+ upload_target = S3UploadTarget(
913
+ src_file=src_file,
914
+ src_file_size=size_result.total_size,
915
+ bucket_name=bucket_name,
916
+ s3_key=s3_key,
917
+ )
918
+
919
+ try:
920
+ out: MultiUploadResult = client.upload_file_multipart(
921
+ upload_target=upload_target,
922
+ upload_config=upload_config,
923
+ )
924
+ return out
925
+ except Exception as e:
926
+ print(f"Error uploading file: {e}")
927
+ traceback.print_exc()
928
+ raise
929
+ finally:
930
+ chunk_fetcher.shutdown()
931
+
932
+ def get_multi_mount_file_chunker(
933
+ self,
934
+ src: str,
935
+ chunk_size: SizeSuffix,
936
+ threads: int,
937
+ mount_log: Path | None,
938
+ direct_io: bool,
939
+ ) -> MultiMountFileChunker:
940
+ from rclone_api.util import random_str
941
+
942
+ mounts: list[Mount] = []
943
+ vfs_read_chunk_size = chunk_size
944
+ vfs_read_chunk_size_limit = chunk_size
945
+ vfs_read_chunk_streams = 0
946
+ vfs_disk_space_total_size = chunk_size
947
+ other_args: list[str] = []
948
+ other_args += ["--no-modtime"]
949
+ other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
950
+ other_args += [
951
+ "--vfs-read-chunk-size-limit",
952
+ vfs_read_chunk_size_limit.as_str(),
953
+ ]
954
+ other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
955
+ other_args += [
956
+ "--vfs-disk-space-total-size",
957
+ vfs_disk_space_total_size.as_str(),
958
+ ]
959
+ other_args += ["--read-only"]
960
+ if direct_io:
961
+ other_args += ["--direct-io"]
962
+
963
+ base_mount_dir = self._get_tmp_mount_dir()
964
+ base_cache_dir = self._get_cache_dir()
965
+
966
+ filename = Path(src).name
967
+ with ThreadPoolExecutor(max_workers=threads) as executor:
968
+ futures: list[Future] = []
969
+ try:
970
+ for i in range(threads):
971
+ tmp_mnts = base_mount_dir / random_str(12)
972
+ verbose = mount_log is not None
973
+
974
+ src_parent_path = Path(src).parent.as_posix()
975
+ cache_dir = base_cache_dir / random_str(12)
976
+
977
+ def task(
978
+ src_parent_path=src_parent_path,
979
+ tmp_mnts=tmp_mnts,
980
+ cache_dir=cache_dir,
981
+ ):
982
+ clean_mount(tmp_mnts, verbose=verbose)
983
+ prepare_mount(tmp_mnts, verbose=verbose)
984
+ return self.mount(
985
+ src=src_parent_path,
986
+ outdir=tmp_mnts,
987
+ allow_writes=False,
988
+ use_links=True,
989
+ vfs_cache_mode="minimal",
990
+ verbose=False,
991
+ cache_dir=cache_dir,
992
+ cache_dir_delete_on_exit=True,
993
+ log=mount_log,
994
+ other_args=other_args,
995
+ )
996
+
997
+ futures.append(executor.submit(task))
998
+ mount_errors: list[Exception] = []
999
+ for fut in futures:
1000
+ try:
1001
+ mount = fut.result()
1002
+ mounts.append(mount)
1003
+ except Exception as er:
1004
+ warnings.warn(f"Error mounting: {er}")
1005
+ mount_errors.append(er)
1006
+ if mount_errors:
1007
+ warnings.warn(f"Error mounting: {mount_errors}")
1008
+ raise Exception(mount_errors)
1009
+ except Exception:
1010
+ for mount in mounts:
1011
+ mount.close()
1012
+ raise
1013
+
1014
+ src_path: Path = Path(src)
1015
+ src_parent_path = src_path.parent.as_posix()
1016
+ name = src_path.name
1017
+ size_result: SizeResult = self.size_files(src_parent_path, [name])
1018
+ filesize = size_result.total_size
1019
+
1020
+ executor = ThreadPoolExecutor(max_workers=threads)
1021
+ filechunker: MultiMountFileChunker = MultiMountFileChunker(
1022
+ filename=filename,
1023
+ filesize=filesize,
1024
+ mounts=mounts,
1025
+ executor=executor,
1026
+ verbose=mount_log is not None,
1027
+ )
1028
+ return filechunker
1029
+
1030
+ def copy_bytes(
1031
+ self,
1032
+ src: str,
1033
+ offset: int | SizeSuffix,
1034
+ length: int | SizeSuffix,
1035
+ outfile: Path,
1036
+ other_args: list[str] | None = None,
1037
+ ) -> Exception | None:
1038
+ """Copy a slice of bytes from the src file to dst."""
1039
+ offset = SizeSuffix(offset).as_int()
1040
+ length = SizeSuffix(length).as_int()
1041
+ cmd_list: list[str] = [
1042
+ "cat",
1043
+ "--offset",
1044
+ str(offset),
1045
+ "--count",
1046
+ str(length),
1047
+ src,
1048
+ ]
1049
+ if other_args:
1050
+ cmd_list.extend(other_args)
1051
+ try:
1052
+ cp = self._run(cmd_list, capture=outfile)
1053
+ if cp.returncode == 0:
1054
+ return None
1055
+ return Exception(cp.stderr)
1056
+ except subprocess.CalledProcessError as e:
1057
+ return e
1058
+
1059
+ def copy_bytes_mount(
1060
+ self,
1061
+ src: str,
1062
+ offset: int | SizeSuffix,
1063
+ length: int | SizeSuffix,
1064
+ chunk_size: SizeSuffix,
1065
+ max_threads: int = 1,
1066
+ # If outfile is supplied then bytes are written to this file and success returns bytes(0)
1067
+ outfile: Path | None = None,
1068
+ mount_log: Path | None = None,
1069
+ direct_io: bool = True,
1070
+ ) -> bytes | Exception:
1071
+ """Copy a slice of bytes from the src file to dst. Parallelism is achieved through multiple mounted files."""
1072
+ from rclone_api.types import FilePart
1073
+
1074
+ offset = SizeSuffix(offset).as_int()
1075
+ length = SizeSuffix(length).as_int()
1076
+ # determine number of threads from chunk size
1077
+ threads = max(1, min(max_threads, length // chunk_size.as_int()))
1078
+ # todo - implement max threads.
1079
+ filechunker = self.get_multi_mount_file_chunker(
1080
+ src=src,
1081
+ chunk_size=chunk_size,
1082
+ threads=threads,
1083
+ mount_log=mount_log,
1084
+ direct_io=direct_io,
1085
+ )
1086
+ try:
1087
+ fut = filechunker.fetch(offset, length, extra=None)
1088
+ fp: FilePart = fut.result()
1089
+ payload = fp.payload
1090
+ if isinstance(payload, Exception):
1091
+ return payload
1092
+ try:
1093
+ if outfile is None:
1094
+ return payload.read_bytes()
1095
+ shutil.move(payload, outfile)
1096
+ return bytes(0)
1097
+ finally:
1098
+ fp.close()
1099
+
1100
+ except Exception as e:
1101
+ warnings.warn(f"Error copying bytes: {e}")
1102
+ return e
1103
+ finally:
1104
+ try:
1105
+ filechunker.shutdown()
1106
+ except Exception as e:
1107
+ warnings.warn(f"Error closing filechunker: {e}")
1108
+
1109
+ def copy_dir(
1110
+ self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
1111
+ ) -> CompletedProcess:
1112
+ """Copy a directory from source to destination."""
1113
+ # convert src to str, also dst
1114
+ src = convert_to_str(src)
1115
+ dst = convert_to_str(dst)
1116
+ cmd_list: list[str] = ["copy", src, dst]
1117
+ if args is not None:
1118
+ cmd_list += args
1119
+ cp = self._run(cmd_list)
1120
+ return CompletedProcess.from_subprocess(cp)
1121
+
1122
+ def copy_remote(
1123
+ self, src: Remote, dst: Remote, args: list[str] | None = None
1124
+ ) -> CompletedProcess:
1125
+ """Copy a remote to another remote."""
1126
+ cmd_list: list[str] = ["copy", str(src), str(dst)]
1127
+ if args is not None:
1128
+ cmd_list += args
1129
+ # return self._run(cmd_list)
1130
+ cp = self._run(cmd_list)
1131
+ return CompletedProcess.from_subprocess(cp)
1132
+
1133
+ def mount(
1134
+ self,
1135
+ src: Remote | Dir | str,
1136
+ outdir: Path,
1137
+ allow_writes: bool | None = False,
1138
+ use_links: bool | None = None,
1139
+ vfs_cache_mode: str | None = None,
1140
+ verbose: bool | None = None,
1141
+ cache_dir: Path | None = None,
1142
+ cache_dir_delete_on_exit: bool | None = None,
1143
+ log: Path | None = None,
1144
+ other_args: list[str] | None = None,
1145
+ ) -> Mount:
1146
+ """Mount a remote or directory to a local path.
1147
+
1148
+ Args:
1149
+ src: Remote or directory to mount
1150
+ outdir: Local path to mount to
1151
+
1152
+ Returns:
1153
+ CompletedProcess from the mount command execution
1154
+
1155
+ Raises:
1156
+ subprocess.CalledProcessError: If the mount operation fails
1157
+ """
1158
+
1159
+ allow_writes = allow_writes or False
1160
+ use_links = use_links or True
1161
+ verbose = get_verbose(verbose) or (log is not None)
1162
+ vfs_cache_mode = vfs_cache_mode or "full"
1163
+ clean_mount(outdir, verbose=verbose)
1164
+ prepare_mount(outdir, verbose=verbose)
1165
+ debug_fuse = log is not None
1166
+ src_str = convert_to_str(src)
1167
+ cmd_list: list[str] = ["mount", src_str, str(outdir)]
1168
+ if not allow_writes:
1169
+ cmd_list.append("--read-only")
1170
+ if use_links:
1171
+ cmd_list.append("--links")
1172
+ if vfs_cache_mode:
1173
+ cmd_list.append("--vfs-cache-mode")
1174
+ cmd_list.append(vfs_cache_mode)
1175
+ if cache_dir:
1176
+ cmd_list.append("--cache-dir")
1177
+ cmd_list.append(str(cache_dir.absolute()))
1178
+ if debug_fuse:
1179
+ cmd_list.append("--debug-fuse")
1180
+ if verbose:
1181
+ cmd_list.append("-vvvv")
1182
+ if other_args:
1183
+ cmd_list += other_args
1184
+ proc = self._launch_process(cmd_list, log=log)
1185
+ mount_read_only = not allow_writes
1186
+ mount: Mount = Mount(
1187
+ src=src_str,
1188
+ mount_path=outdir,
1189
+ process=proc,
1190
+ read_only=mount_read_only,
1191
+ cache_dir=cache_dir,
1192
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1193
+ )
1194
+ return mount
1195
+
1196
+ @contextmanager
1197
+ def scoped_mount(
1198
+ self,
1199
+ src: Remote | Dir | str,
1200
+ outdir: Path,
1201
+ allow_writes: bool | None = None,
1202
+ use_links: bool | None = None,
1203
+ vfs_cache_mode: str | None = None,
1204
+ verbose: bool | None = None,
1205
+ log: Path | None = None,
1206
+ cache_dir: Path | None = None,
1207
+ cache_dir_delete_on_exit: bool | None = None,
1208
+ other_args: list[str] | None = None,
1209
+ ) -> Generator[Mount, None, None]:
1210
+ """Like mount, but can be used in a context manager."""
1211
+ error_happened = False
1212
+ mount: Mount = self.mount(
1213
+ src,
1214
+ outdir,
1215
+ allow_writes=allow_writes,
1216
+ use_links=use_links,
1217
+ vfs_cache_mode=vfs_cache_mode,
1218
+ verbose=verbose,
1219
+ cache_dir=cache_dir,
1220
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1221
+ log=log,
1222
+ other_args=other_args,
1223
+ )
1224
+ try:
1225
+ yield mount
1226
+ except Exception as e:
1227
+ error_happened = True
1228
+ stack_trace = traceback.format_exc()
1229
+ warnings.warn(f"Error in scoped_mount: {e}\n\nStack Trace:\n{stack_trace}")
1230
+ raise
1231
+ finally:
1232
+ if not error_happened or (not allow_writes):
1233
+ mount.close()
1234
+
1235
+ # Settings optimized for s3.
1236
+ def mount_s3(
1237
+ self,
1238
+ url: str,
1239
+ outdir: Path,
1240
+ allow_writes=False,
1241
+ vfs_cache_mode="full",
1242
+ dir_cache_time: str | None = "1h",
1243
+ attribute_timeout: str | None = "1h",
1244
+ vfs_disk_space_total_size: str | None = "100M",
1245
+ transfers: int | None = 128,
1246
+ modtime_strategy: (
1247
+ ModTimeStrategy | None
1248
+ ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1249
+ vfs_read_chunk_streams: int | None = 16,
1250
+ vfs_read_chunk_size: str | None = "4M",
1251
+ vfs_fast_fingerprint: bool = True,
1252
+ # vfs-refresh
1253
+ vfs_refresh: bool = True,
1254
+ other_args: list[str] | None = None,
1255
+ ) -> Mount:
1256
+ """Mount a remote or directory to a local path.
1257
+
1258
+ Args:
1259
+ src: Remote or directory to mount
1260
+ outdir: Local path to mount to
1261
+ """
1262
+ other_args = other_args or []
1263
+ if modtime_strategy is not None:
1264
+ other_args.append(f"--{modtime_strategy.value}")
1265
+ if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1266
+ transfers is not None and "--transfers" not in other_args
1267
+ ):
1268
+ other_args.append("--transfers")
1269
+ other_args.append(str(transfers))
1270
+ if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1271
+ other_args.append("--dir-cache-time")
1272
+ other_args.append(dir_cache_time)
1273
+ if (
1274
+ vfs_disk_space_total_size is not None
1275
+ and "--vfs-cache-max-size" not in other_args
1276
+ ):
1277
+ other_args.append("--vfs-cache-max-size")
1278
+ other_args.append(vfs_disk_space_total_size)
1279
+ if vfs_refresh and "--vfs-refresh" not in other_args:
1280
+ other_args.append("--vfs-refresh")
1281
+ if attribute_timeout is not None and "--attr-timeout" not in other_args:
1282
+ other_args.append("--attr-timeout")
1283
+ other_args.append(attribute_timeout)
1284
+ if vfs_read_chunk_streams:
1285
+ other_args.append("--vfs-read-chunk-streams")
1286
+ other_args.append(str(vfs_read_chunk_streams))
1287
+ if vfs_read_chunk_size:
1288
+ other_args.append("--vfs-read-chunk-size")
1289
+ other_args.append(vfs_read_chunk_size)
1290
+ if vfs_fast_fingerprint:
1291
+ other_args.append("--vfs-fast-fingerprint")
1292
+
1293
+ other_args = other_args if other_args else None
1294
+ return self.mount(
1295
+ url,
1296
+ outdir,
1297
+ allow_writes=allow_writes,
1298
+ vfs_cache_mode=vfs_cache_mode,
1299
+ other_args=other_args,
1300
+ )
1301
+
1302
+ def serve_webdav(
1303
+ self,
1304
+ src: Remote | Dir | str,
1305
+ user: str,
1306
+ password: str,
1307
+ addr: str = "localhost:2049",
1308
+ allow_other: bool = False,
1309
+ other_args: list[str] | None = None,
1310
+ ) -> Process:
1311
+ """Serve a remote or directory via NFS.
1312
+
1313
+ Args:
1314
+ src: Remote or directory to serve
1315
+ addr: Network address and port to serve on (default: localhost:2049)
1316
+ allow_other: Allow other users to access the share
1317
+
1318
+ Returns:
1319
+ Process: The running webdev server process
1320
+
1321
+ Raises:
1322
+ ValueError: If the NFS server fails to start
1323
+ """
1324
+ src_str = convert_to_str(src)
1325
+ cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1326
+ cmd_list.extend(["--user", user, "--pass", password])
1327
+ if allow_other:
1328
+ cmd_list.append("--allow-other")
1329
+ if other_args:
1330
+ cmd_list += other_args
1331
+ proc = self._launch_process(cmd_list)
1332
+ time.sleep(2) # give it a moment to start
1333
+ if proc.poll() is not None:
1334
+ raise ValueError("NFS serve process failed to start")
1335
+ return proc
1336
+
1337
+ def size_files(
1338
+ self,
1339
+ src: str,
1340
+ files: list[str],
1341
+ fast_list: bool = False, # Recommend that this is False
1342
+ other_args: list[str] | None = None,
1343
+ check: bool | None = False,
1344
+ verbose: bool | None = None,
1345
+ ) -> SizeResult:
1346
+ """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1347
+ verbose = get_verbose(verbose)
1348
+ check = get_check(check)
1349
+ if fast_list or (other_args and "--fast-list" in other_args):
1350
+ warnings.warn(
1351
+ "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1352
+ )
1353
+ files = list(files)
1354
+ all_files: list[File] = []
1355
+ # prefix, files = group_under_one_prefix(src, files)
1356
+ cmd = ["lsjson", src, "--files-only", "-R"]
1357
+ with TemporaryDirectory() as tmpdir:
1358
+ # print("files: " + ",".join(files))
1359
+ include_files_txt = Path(tmpdir) / "include_files.txt"
1360
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
1361
+ cmd += ["--files-from", str(include_files_txt)]
1362
+ if fast_list:
1363
+ cmd.append("--fast-list")
1364
+ if other_args:
1365
+ cmd += other_args
1366
+ cp = self._run(cmd, check=check)
1367
+
1368
+ if cp.returncode != 0:
1369
+ if check:
1370
+ raise ValueError(f"Error getting file sizes: {cp.stderr}")
1371
+ else:
1372
+ warnings.warn(f"Error getting file sizes: {cp.stderr}")
1373
+ stdout = cp.stdout
1374
+ pieces = src.split(":", 1)
1375
+ remote_name = pieces[0]
1376
+ parent_path: str | None
1377
+ if len(pieces) > 1:
1378
+ parent_path = pieces[1]
1379
+ else:
1380
+ parent_path = None
1381
+ remote = Remote(name=remote_name, rclone=self)
1382
+ paths: list[RPath] = RPath.from_json_str(
1383
+ stdout, remote, parent_path=parent_path
1384
+ )
1385
+ # print(paths)
1386
+ all_files += [File(p) for p in paths]
1387
+ file_sizes: dict[str, int] = {}
1388
+ f: File
1389
+ for f in all_files:
1390
+ p = f.to_string(include_remote=True)
1391
+ if p in file_sizes:
1392
+ warnings.warn(f"Duplicate file found: {p}")
1393
+ continue
1394
+ size = f.size
1395
+ if size == 0:
1396
+ warnings.warn(f"File size is 0: {p}")
1397
+ file_sizes[p] = f.size
1398
+ total_size = sum(file_sizes.values())
1399
+ file_sizes_path_corrected: dict[str, int] = {}
1400
+ for path, size in file_sizes.items():
1401
+ # remove the prefix
1402
+ path_path = Path(path)
1403
+ path_str = path_path.relative_to(src).as_posix()
1404
+ file_sizes_path_corrected[path_str] = size
1405
+ out: SizeResult = SizeResult(
1406
+ prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1407
+ )
1408
+ return out