rclone-api 1.3.14__py2.py3-none-any.whl → 1.3.17__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rclone_api/rclone.py CHANGED
@@ -1,1418 +1,1408 @@
1
- """
2
- Unit test file.
3
- """
4
-
5
- import os
6
- import random
7
- import shutil
8
- import subprocess
9
- import time
10
- import traceback
11
- import warnings
12
- from concurrent.futures import Future, ThreadPoolExecutor
13
- from contextlib import contextmanager
14
- from fnmatch import fnmatch
15
- from pathlib import Path
16
- from tempfile import TemporaryDirectory
17
- from typing import Generator
18
-
19
- from rclone_api import Dir
20
- from rclone_api.completed_process import CompletedProcess
21
- from rclone_api.config import Config, Parsed, Section
22
- from rclone_api.convert import convert_to_filestr_list, convert_to_str
23
- from rclone_api.deprecated import deprecated
24
- from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
- from rclone_api.dir_listing import DirListing
26
- from rclone_api.exec import RcloneExec
27
- from rclone_api.file import File, FileItem
28
- from rclone_api.group_files import group_files
29
- from rclone_api.mount import Mount, clean_mount, prepare_mount
30
- from rclone_api.mount_read_chunker import MultiMountFileChunker
31
- from rclone_api.process import Process
32
- from rclone_api.remote import Remote
33
- from rclone_api.rpath import RPath
34
- from rclone_api.s3.types import (
35
- MultiUploadResult,
36
- S3MutliPartUploadConfig,
37
- S3Provider,
38
- S3UploadTarget,
39
- )
40
- from rclone_api.types import (
41
- ListingOption,
42
- ModTimeStrategy,
43
- Order,
44
- SizeResult,
45
- SizeSuffix,
46
- )
47
- from rclone_api.util import (
48
- get_check,
49
- get_rclone_exe,
50
- get_verbose,
51
- to_path,
52
- )
53
- from rclone_api.walk import walk
54
-
55
-
56
- def rclone_verbose(verbose: bool | None) -> bool:
57
- if verbose is not None:
58
- os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
59
- return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
60
-
61
-
62
- def _to_rclone_conf(config: Config | Path) -> Config:
63
- if isinstance(config, Path):
64
- content = config.read_text(encoding="utf-8")
65
- return Config(content)
66
- else:
67
- return config
68
-
69
-
70
- # class closing(AbstractContextManager):
71
- # """Context to automatically close something at the end of a block.
72
-
73
- # Code like this:
74
-
75
- # with closing(<module>.open(<arguments>)) as f:
76
- # <block>
77
-
78
- # is equivalent to this:
79
-
80
- # f = <module>.open(<arguments>)
81
- # try:
82
- # <block>
83
- # finally:
84
- # f.close()
85
-
86
- # """
87
- # def __init__(self, thing):
88
- # self.thing = thing
89
- # def __enter__(self):
90
- # return self.thing
91
- # def __exit__(self, *exc_info):
92
- # self.thing.close()
93
-
94
-
95
- # Process
96
-
97
-
98
- class FilesStream:
99
-
100
- def __init__(self, path: str, process: Process) -> None:
101
- self.path = path
102
- self.process = process
103
-
104
- def __enter__(self) -> "FilesStream":
105
- self.process.__enter__()
106
- return self
107
-
108
- def __exit__(self, *exc_info):
109
- self.process.__exit__(*exc_info)
110
-
111
- def files(self) -> Generator[FileItem, None, None]:
112
- for line in self.process.stdout:
113
- linestr = line.decode("utf-8").strip()
114
- if linestr.startswith("["):
115
- continue
116
- if linestr.endswith(","):
117
- linestr = linestr[:-1]
118
- if linestr.endswith("]"):
119
- continue
120
- fileitem: FileItem | None = FileItem.from_json_str(self.path, linestr)
121
- if fileitem is None:
122
- continue
123
- yield fileitem
124
-
125
- def files_paged(
126
- self, page_size: int = 1000
127
- ) -> Generator[list[FileItem], None, None]:
128
- page: list[FileItem] = []
129
- for fileitem in self.files():
130
- page.append(fileitem)
131
- if len(page) >= page_size:
132
- yield page
133
- page = []
134
- if len(page) > 0:
135
- yield page
136
-
137
- def __iter__(self) -> Generator[FileItem, None, None]:
138
- return self.files()
139
-
140
-
141
- class Rclone:
142
- def __init__(
143
- self, rclone_conf: Path | Config, rclone_exe: Path | None = None
144
- ) -> None:
145
- if isinstance(rclone_conf, Path):
146
- if not rclone_conf.exists():
147
- raise ValueError(f"Rclone config file not found: {rclone_conf}")
148
- self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
149
- self.config: Config = _to_rclone_conf(rclone_conf)
150
-
151
- def _run(
152
- self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
153
- ) -> subprocess.CompletedProcess:
154
- return self._exec.execute(cmd, check=check, capture=capture)
155
-
156
- def _launch_process(
157
- self, cmd: list[str], capture: bool | None = None, log: Path | None = None
158
- ) -> Process:
159
- return self._exec.launch_process(cmd, capture=capture, log=log)
160
-
161
- def _get_tmp_mount_dir(self) -> Path:
162
- return Path("tmp_mnts")
163
-
164
- def _get_cache_dir(self) -> Path:
165
- return Path("cache")
166
-
167
- def webgui(self, other_args: list[str] | None = None) -> Process:
168
- """Launch the Rclone web GUI."""
169
- cmd = ["rcd", "--rc-web-gui"]
170
- if other_args:
171
- cmd += other_args
172
- return self._launch_process(cmd, capture=False)
173
-
174
- def launch_server(
175
- self,
176
- addr: str,
177
- user: str | None = None,
178
- password: str | None = None,
179
- other_args: list[str] | None = None,
180
- ) -> Process:
181
- """Launch the Rclone server so it can receive commands"""
182
- cmd = ["rcd"]
183
- if addr is not None:
184
- cmd += ["--rc-addr", addr]
185
- if user is not None:
186
- cmd += ["--rc-user", user]
187
- if password is not None:
188
- cmd += ["--rc-pass", password]
189
- if other_args:
190
- cmd += other_args
191
- out = self._launch_process(cmd, capture=False)
192
- time.sleep(1) # Give it some time to launch
193
- return out
194
-
195
- def remote_control(
196
- self,
197
- addr: str,
198
- user: str | None = None,
199
- password: str | None = None,
200
- capture: bool | None = None,
201
- other_args: list[str] | None = None,
202
- ) -> CompletedProcess:
203
- cmd = ["rc"]
204
- if addr:
205
- cmd += ["--rc-addr", addr]
206
- if user is not None:
207
- cmd += ["--rc-user", user]
208
- if password is not None:
209
- cmd += ["--rc-pass", password]
210
- if other_args:
211
- cmd += other_args
212
- cp = self._run(cmd, capture=capture)
213
- return CompletedProcess.from_subprocess(cp)
214
-
215
- def obscure(self, password: str) -> str:
216
- """Obscure a password for use in rclone config files."""
217
- cmd_list: list[str] = ["obscure", password]
218
- cp = self._run(cmd_list)
219
- return cp.stdout.strip()
220
-
221
- def ls_stream(
222
- self,
223
- path: str,
224
- max_depth: int = -1,
225
- fast_list: bool = False,
226
- ) -> FilesStream:
227
- """List files in the given path"""
228
- cmd = ["lsjson", path]
229
- if max_depth < 0:
230
- cmd.append("--recursive")
231
- elif max_depth > 0:
232
- cmd += ["--max-depth", str(max_depth)]
233
- if fast_list:
234
- cmd.append("--fast-list")
235
- streamer = FilesStream(path, self._launch_process(cmd, capture=True))
236
- return streamer
237
-
238
- def save_to_db(
239
- self,
240
- src: str,
241
- db_url: str,
242
- max_depth: int = -1,
243
- fast_list: bool = False,
244
- ) -> None:
245
- """Save files to a database (sqlite, mysql, postgres)"""
246
- from rclone_api.db import DB
247
-
248
- db = DB(db_url)
249
- with self.ls_stream(src, max_depth, fast_list) as stream:
250
- for page in stream.files_paged(page_size=10000):
251
- db.add_files(page)
252
-
253
- def ls(
254
- self,
255
- path: Dir | Remote | str,
256
- max_depth: int | None = None,
257
- glob: str | None = None,
258
- order: Order = Order.NORMAL,
259
- listing_option: ListingOption = ListingOption.ALL,
260
- ) -> DirListing:
261
- """List files in the given path.
262
-
263
- Args:
264
- path: Remote path or Remote object to list
265
- max_depth: Maximum recursion depth (0 means no recursion)
266
-
267
- Returns:
268
- List of File objects found at the path
269
- """
270
-
271
- if isinstance(path, str):
272
- path = Dir(
273
- to_path(path, self)
274
- ) # assume it's a directory if ls is being called.
275
-
276
- cmd = ["lsjson"]
277
- if max_depth is not None:
278
- if max_depth < 0:
279
- cmd.append("--recursive")
280
- if max_depth > 0:
281
- cmd.append("--max-depth")
282
- cmd.append(str(max_depth))
283
- if listing_option != ListingOption.ALL:
284
- cmd.append(f"--{listing_option.value}")
285
-
286
- cmd.append(str(path))
287
- remote = path.remote if isinstance(path, Dir) else path
288
- assert isinstance(remote, Remote)
289
-
290
- cp = self._run(cmd, check=True)
291
- text = cp.stdout
292
- parent_path: str | None = None
293
- if isinstance(path, Dir):
294
- parent_path = path.path.path
295
- paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
296
- # print(parent_path)
297
- for o in paths:
298
- o.set_rclone(self)
299
-
300
- # do we have a glob pattern?
301
- if glob is not None:
302
- paths = [p for p in paths if fnmatch(p.path, glob)]
303
-
304
- if order == Order.REVERSE:
305
- paths.reverse()
306
- elif order == Order.RANDOM:
307
- random.shuffle(paths)
308
- return DirListing(paths)
309
-
310
- def listremotes(self) -> list[Remote]:
311
- cmd = ["listremotes"]
312
- cp = self._run(cmd)
313
- text: str = cp.stdout
314
- tmp = text.splitlines()
315
- tmp = [t.strip() for t in tmp]
316
- # strip out ":" from the end
317
- tmp = [t.replace(":", "") for t in tmp]
318
- out = [Remote(name=t, rclone=self) for t in tmp]
319
- return out
320
-
321
- def diff(
322
- self,
323
- src: str,
324
- dst: str,
325
- min_size: (
326
- str | None
327
- ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
328
- max_size: (
329
- str | None
330
- ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
331
- diff_option: DiffOption = DiffOption.COMBINED,
332
- fast_list: bool = True,
333
- size_only: bool | None = None,
334
- checkers: int | None = None,
335
- other_args: list[str] | None = None,
336
- ) -> Generator[DiffItem, None, None]:
337
- """Be extra careful with the src and dst values. If you are off by one
338
- parent directory, you will get a huge amount of false diffs."""
339
- other_args = other_args or []
340
- if checkers is None or checkers < 1:
341
- checkers = 1000
342
- cmd = [
343
- "check",
344
- src,
345
- dst,
346
- "--checkers",
347
- str(checkers),
348
- "--log-level",
349
- "INFO",
350
- f"--{diff_option.value}",
351
- "-",
352
- ]
353
- if size_only is None:
354
- size_only = diff_option in [
355
- DiffOption.MISSING_ON_DST,
356
- DiffOption.MISSING_ON_SRC,
357
- ]
358
- if size_only:
359
- cmd += ["--size-only"]
360
- if fast_list:
361
- cmd += ["--fast-list"]
362
- if min_size:
363
- cmd += ["--min-size", min_size]
364
- if max_size:
365
- cmd += ["--max-size", max_size]
366
- if diff_option == DiffOption.MISSING_ON_DST:
367
- cmd += ["--one-way"]
368
- if other_args:
369
- cmd += other_args
370
- proc = self._launch_process(cmd, capture=True)
371
- item: DiffItem
372
- for item in diff_stream_from_running_process(
373
- running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
374
- ):
375
- if item is None:
376
- break
377
- yield item
378
-
379
- def walk(
380
- self,
381
- path: Dir | Remote | str,
382
- max_depth: int = -1,
383
- breadth_first: bool = True,
384
- order: Order = Order.NORMAL,
385
- ) -> Generator[DirListing, None, None]:
386
- """Walk through the given path recursively.
387
-
388
- Args:
389
- path: Remote path or Remote object to walk through
390
- max_depth: Maximum depth to traverse (-1 for unlimited)
391
-
392
- Yields:
393
- DirListing: Directory listing for each directory encountered
394
- """
395
- dir_obj: Dir
396
- if isinstance(path, Dir):
397
- # Create a Remote object for the path
398
- remote = path.remote
399
- rpath = RPath(
400
- remote=remote,
401
- path=path.path.path,
402
- name=path.path.name,
403
- size=0,
404
- mime_type="inode/directory",
405
- mod_time="",
406
- is_dir=True,
407
- )
408
- rpath.set_rclone(self)
409
- dir_obj = Dir(rpath)
410
- elif isinstance(path, str):
411
- dir_obj = Dir(to_path(path, self))
412
- elif isinstance(path, Remote):
413
- dir_obj = Dir(path)
414
- else:
415
- dir_obj = Dir(path) # shut up pyright
416
- assert f"Invalid type for path: {type(path)}"
417
-
418
- yield from walk(
419
- dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
420
- )
421
-
422
- def scan_missing_folders(
423
- self,
424
- src: Dir | Remote | str,
425
- dst: Dir | Remote | str,
426
- max_depth: int = -1,
427
- order: Order = Order.NORMAL,
428
- ) -> Generator[Dir, None, None]:
429
- """Walk through the given path recursively.
430
-
431
- WORK IN PROGRESS!!
432
-
433
- Args:
434
- src: Source directory or Remote to walk through
435
- dst: Destination directory or Remote to walk through
436
- max_depth: Maximum depth to traverse (-1 for unlimited)
437
-
438
- Yields:
439
- DirListing: Directory listing for each directory encountered
440
- """
441
- from rclone_api.scan_missing_folders import scan_missing_folders
442
-
443
- src_dir = Dir(to_path(src, self))
444
- dst_dir = Dir(to_path(dst, self))
445
- yield from scan_missing_folders(
446
- src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
447
- )
448
-
449
- def cleanup(
450
- self, path: str, other_args: list[str] | None = None
451
- ) -> CompletedProcess:
452
- """Cleanup any resources used by the Rclone instance."""
453
- # rclone cleanup remote:path [flags]
454
- cmd = ["cleanup", path]
455
- if other_args:
456
- cmd += other_args
457
- out = self._run(cmd)
458
- return CompletedProcess.from_subprocess(out)
459
-
460
- def copy_to(
461
- self,
462
- src: File | str,
463
- dst: File | str,
464
- check: bool | None = None,
465
- verbose: bool | None = None,
466
- other_args: list[str] | None = None,
467
- ) -> CompletedProcess:
468
- """Copy one file from source to destination.
469
-
470
- Warning - slow.
471
-
472
- """
473
- check = get_check(check)
474
- verbose = get_verbose(verbose)
475
- src = src if isinstance(src, str) else str(src.path)
476
- dst = dst if isinstance(dst, str) else str(dst.path)
477
- cmd_list: list[str] = ["copyto", src, dst]
478
- if other_args is not None:
479
- cmd_list += other_args
480
- cp = self._run(cmd_list, check=check)
481
- return CompletedProcess.from_subprocess(cp)
482
-
483
- def copy_files(
484
- self,
485
- src: str,
486
- dst: str,
487
- files: list[str] | Path,
488
- check: bool | None = None,
489
- max_backlog: int | None = None,
490
- verbose: bool | None = None,
491
- checkers: int | None = None,
492
- transfers: int | None = None,
493
- low_level_retries: int | None = None,
494
- retries: int | None = None,
495
- retries_sleep: str | None = None,
496
- metadata: bool | None = None,
497
- timeout: str | None = None,
498
- max_partition_workers: int | None = None,
499
- multi_thread_streams: int | None = None,
500
- other_args: list[str] | None = None,
501
- ) -> list[CompletedProcess]:
502
- """Copy multiple files from source to destination.
503
-
504
- Args:
505
- payload: Dictionary of source and destination file paths
506
- """
507
- check = get_check(check)
508
- max_partition_workers = max_partition_workers or 1
509
- low_level_retries = low_level_retries or 10
510
- retries = retries or 3
511
- other_args = other_args or []
512
- checkers = checkers or 1000
513
- transfers = transfers or 32
514
- verbose = get_verbose(verbose)
515
- payload: list[str] = (
516
- files
517
- if isinstance(files, list)
518
- else [f.strip() for f in files.read_text().splitlines() if f.strip()]
519
- )
520
- if len(payload) == 0:
521
- return []
522
-
523
- for p in payload:
524
- if ":" in p:
525
- raise ValueError(
526
- f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
527
- )
528
-
529
- using_fast_list = "--fast-list" in other_args
530
- if using_fast_list:
531
- warnings.warn(
532
- "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
533
- )
534
-
535
- if max_partition_workers > 1:
536
- datalists: dict[str, list[str]] = group_files(
537
- payload, fully_qualified=False
538
- )
539
- else:
540
- datalists = {"": payload}
541
- # out: subprocess.CompletedProcess | None = None
542
- out: list[CompletedProcess] = []
543
-
544
- futures: list[Future] = []
545
-
546
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
547
- for common_prefix, files in datalists.items():
548
-
549
- def _task(
550
- files: list[str] | Path = files,
551
- ) -> subprocess.CompletedProcess:
552
- with TemporaryDirectory() as tmpdir:
553
- filelist: list[str] = []
554
- filepath: Path
555
- if isinstance(files, list):
556
- include_files_txt = Path(tmpdir) / "include_files.txt"
557
- include_files_txt.write_text(
558
- "\n".join(files), encoding="utf-8"
559
- )
560
- filelist = list(files)
561
- filepath = Path(include_files_txt)
562
- elif isinstance(files, Path):
563
- filelist = [
564
- f.strip()
565
- for f in files.read_text().splitlines()
566
- if f.strip()
567
- ]
568
- filepath = files
569
- if common_prefix:
570
- src_path = f"{src}/{common_prefix}"
571
- dst_path = f"{dst}/{common_prefix}"
572
- else:
573
- src_path = src
574
- dst_path = dst
575
-
576
- if verbose:
577
- nfiles = len(filelist)
578
- files_fqdn = [f" {src_path}/{f}" for f in filelist]
579
- print(f"Copying {nfiles} files:")
580
- chunk_size = 100
581
- for i in range(0, nfiles, chunk_size):
582
- chunk = files_fqdn[i : i + chunk_size]
583
- files_str = "\n".join(chunk)
584
- print(f"{files_str}")
585
- cmd_list: list[str] = [
586
- "copy",
587
- src_path,
588
- dst_path,
589
- "--files-from",
590
- str(filepath),
591
- "--checkers",
592
- str(checkers),
593
- "--transfers",
594
- str(transfers),
595
- "--low-level-retries",
596
- str(low_level_retries),
597
- "--retries",
598
- str(retries),
599
- ]
600
- if metadata:
601
- cmd_list.append("--metadata")
602
- if retries_sleep is not None:
603
- cmd_list += ["--retries-sleep", retries_sleep]
604
- if timeout is not None:
605
- cmd_list += ["--timeout", timeout]
606
- if max_backlog is not None:
607
- cmd_list += ["--max-backlog", str(max_backlog)]
608
- if multi_thread_streams is not None:
609
- cmd_list += [
610
- "--multi-thread-streams",
611
- str(multi_thread_streams),
612
- ]
613
- if verbose:
614
- if not any(["-v" in x for x in other_args]):
615
- cmd_list.append("-vvvv")
616
- if not any(["--progress" in x for x in other_args]):
617
- cmd_list.append("--progress")
618
- if other_args:
619
- cmd_list += other_args
620
- out = self._run(cmd_list, capture=not verbose)
621
- return out
622
-
623
- fut: Future = executor.submit(_task)
624
- futures.append(fut)
625
- for fut in futures:
626
- cp: subprocess.CompletedProcess = fut.result()
627
- assert cp is not None
628
- out.append(CompletedProcess.from_subprocess(cp))
629
- if cp.returncode != 0:
630
- if check:
631
- raise ValueError(f"Error deleting files: {cp.stderr}")
632
- else:
633
- warnings.warn(f"Error deleting files: {cp.stderr}")
634
- return out
635
-
636
- def copy(
637
- self,
638
- src: Dir | str,
639
- dst: Dir | str,
640
- check: bool | None = None,
641
- transfers: int | None = None,
642
- checkers: int | None = None,
643
- multi_thread_streams: int | None = None,
644
- low_level_retries: int | None = None,
645
- retries: int | None = None,
646
- other_args: list[str] | None = None,
647
- ) -> CompletedProcess:
648
- """Copy files from source to destination.
649
-
650
- Args:
651
- src: Source directory
652
- dst: Destination directory
653
- """
654
- # src_dir = src.path.path
655
- # dst_dir = dst.path.path
656
- src_dir = convert_to_str(src)
657
- dst_dir = convert_to_str(dst)
658
- check = get_check(check)
659
- checkers = checkers or 1000
660
- transfers = transfers or 32
661
- low_level_retries = low_level_retries or 10
662
- retries = retries or 3
663
- cmd_list: list[str] = ["copy", src_dir, dst_dir]
664
- cmd_list += ["--checkers", str(checkers)]
665
- cmd_list += ["--transfers", str(transfers)]
666
- cmd_list += ["--low-level-retries", str(low_level_retries)]
667
- if multi_thread_streams is not None:
668
- cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
669
- if other_args:
670
- cmd_list += other_args
671
- cp = self._run(cmd_list, check=check, capture=False)
672
- return CompletedProcess.from_subprocess(cp)
673
-
674
- def purge(self, path: Dir | str) -> CompletedProcess:
675
- """Purge a directory"""
676
- # path should always be a string
677
- path = path if isinstance(path, str) else str(path.path)
678
- cmd_list: list[str] = ["purge", str(path)]
679
- cp = self._run(cmd_list)
680
- return CompletedProcess.from_subprocess(cp)
681
-
682
- def delete_files(
683
- self,
684
- files: str | File | list[str] | list[File],
685
- check: bool | None = None,
686
- rmdirs=False,
687
- verbose: bool | None = None,
688
- max_partition_workers: int | None = None,
689
- other_args: list[str] | None = None,
690
- ) -> CompletedProcess:
691
- """Delete a directory"""
692
- check = get_check(check)
693
- verbose = get_verbose(verbose)
694
- payload: list[str] = convert_to_filestr_list(files)
695
- if len(payload) == 0:
696
- if verbose:
697
- print("No files to delete")
698
- cp = subprocess.CompletedProcess(
699
- args=["rclone", "delete", "--files-from", "[]"],
700
- returncode=0,
701
- stdout="",
702
- stderr="",
703
- )
704
- return CompletedProcess.from_subprocess(cp)
705
-
706
- datalists: dict[str, list[str]] = group_files(payload)
707
- completed_processes: list[subprocess.CompletedProcess] = []
708
-
709
- futures: list[Future] = []
710
-
711
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
712
-
713
- for remote, files in datalists.items():
714
-
715
- def _task(
716
- files=files, check=check, remote=remote
717
- ) -> subprocess.CompletedProcess:
718
- with TemporaryDirectory() as tmpdir:
719
- include_files_txt = Path(tmpdir) / "include_files.txt"
720
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
721
-
722
- # print(include_files_txt)
723
- cmd_list: list[str] = [
724
- "delete",
725
- remote,
726
- "--files-from",
727
- str(include_files_txt),
728
- "--checkers",
729
- "1000",
730
- "--transfers",
731
- "1000",
732
- ]
733
- if verbose:
734
- cmd_list.append("-vvvv")
735
- if rmdirs:
736
- cmd_list.append("--rmdirs")
737
- if other_args:
738
- cmd_list += other_args
739
- out = self._run(cmd_list, check=check)
740
- if out.returncode != 0:
741
- if check:
742
- completed_processes.append(out)
743
- raise ValueError(f"Error deleting files: {out}")
744
- else:
745
- warnings.warn(f"Error deleting files: {out}")
746
- return out
747
-
748
- fut: Future = executor.submit(_task)
749
- futures.append(fut)
750
-
751
- for fut in futures:
752
- out = fut.result()
753
- assert out is not None
754
- completed_processes.append(out)
755
-
756
- return CompletedProcess(completed_processes)
757
-
758
- @deprecated("delete_files")
759
- def deletefiles(
760
- self, files: str | File | list[str] | list[File]
761
- ) -> CompletedProcess:
762
- out = self.delete_files(files)
763
- return out
764
-
765
- def exists(self, path: Dir | Remote | str | File) -> bool:
766
- """Check if a file or directory exists."""
767
- arg: str = convert_to_str(path)
768
- assert isinstance(arg, str)
769
- try:
770
- dir_listing = self.ls(arg)
771
- # print(dir_listing)
772
- return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
773
- except subprocess.CalledProcessError:
774
- return False
775
-
776
- def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
777
- """Check if two directories are in sync."""
778
- src = convert_to_str(src)
779
- dst = convert_to_str(dst)
780
- cmd_list: list[str] = ["check", str(src), str(dst)]
781
- try:
782
- self._run(cmd_list, check=True)
783
- return True
784
- except subprocess.CalledProcessError:
785
- return False
786
-
787
- def copy_file_resumable_s3(
788
- self,
789
- src: str,
790
- dst: str,
791
- save_state_json: Path,
792
- chunk_size: SizeSuffix | None = None,
793
- read_threads: int = 8,
794
- write_threads: int = 8,
795
- retries: int = 3,
796
- verbose: bool | None = None,
797
- max_chunks_before_suspension: int | None = None,
798
- mount_log: Path | None = None,
799
- ) -> MultiUploadResult:
800
- """For massive files that rclone can't handle in one go, this function will copy the file in chunks to an S3 store"""
801
- from rclone_api.s3.api import S3Client
802
- from rclone_api.s3.create import S3Credentials
803
- from rclone_api.util import S3PathInfo, split_s3_path
804
-
805
- other_args: list[str] = ["--no-modtime", "--vfs-read-wait", "1s"]
806
- chunk_size = chunk_size or SizeSuffix("64M")
807
- unit_chunk_size = chunk_size / read_threads
808
- tmp_mount_dir = self._get_tmp_mount_dir()
809
- vfs_read_chunk_size = unit_chunk_size
810
- vfs_read_chunk_size_limit = chunk_size
811
- vfs_read_chunk_streams = read_threads
812
- vfs_disk_space_total_size = chunk_size
813
- assert (
814
- chunk_size.as_int() % vfs_read_chunk_size.as_int() == 0
815
- ), f"chunk_size {chunk_size} must be a multiple of vfs_read_chunk_size {vfs_read_chunk_size}"
816
- other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
817
- other_args += [
818
- "--vfs-read-chunk-size-limit",
819
- vfs_read_chunk_size_limit.as_str(),
820
- ]
821
- other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
822
- other_args += [
823
- "--vfs-disk-space-total-size",
824
- vfs_disk_space_total_size.as_str(),
825
- ]
826
- other_args += ["--read-only"]
827
- other_args += ["--direct-io"]
828
- # --vfs-cache-max-size
829
- other_args += ["--vfs-cache-max-size", vfs_disk_space_total_size.as_str()]
830
- mount_path = tmp_mount_dir / "RCLONE_API_DYNAMIC_MOUNT"
831
- src_path = Path(src)
832
- name = src_path.name
833
-
834
- src_parent_path = Path(src).parent.as_posix()
835
- size_result: SizeResult = self.size_files(src_parent_path, [name])
836
-
837
- target_size = SizeSuffix(size_result.total_size)
838
- if target_size < SizeSuffix("5M"):
839
- # fallback to normal copy
840
- completed_proc = self.copy_to(src, dst, check=True)
841
- if completed_proc.ok:
842
- return MultiUploadResult.UPLOADED_FRESH
843
- if size_result.total_size <= 0:
844
- raise ValueError(
845
- f"File {src} has size {size_result.total_size}, is this a directory?"
846
- )
847
-
848
- path_info: S3PathInfo = split_s3_path(dst)
849
- remote = path_info.remote
850
- bucket_name = path_info.bucket
851
- s3_key = path_info.key
852
- parsed: Parsed = self.config.parse()
853
- sections: dict[str, Section] = parsed.sections
854
- if remote not in sections:
855
- raise ValueError(
856
- f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
857
- )
858
-
859
- section: Section = sections[remote]
860
- dst_type = section.type()
861
- if dst_type != "s3" and dst_type != "b2":
862
- raise ValueError(
863
- f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
864
- )
865
-
866
- def get_provider_str(section=section) -> str | None:
867
- type: str = section.type()
868
- provider: str | None = section.provider()
869
- if provider is not None:
870
- return provider
871
- if type == "b2":
872
- return S3Provider.BACKBLAZE.value
873
- if type != "s3":
874
- raise ValueError(f"Remote {remote} is not an S3 remote")
875
- return S3Provider.S3.value
876
-
877
- provider: str
878
- if provided_provider_str := get_provider_str():
879
- if verbose:
880
- print(f"Using provided provider: {provided_provider_str}")
881
- provider = provided_provider_str
882
- else:
883
- if verbose:
884
- print(f"Using default provider: {S3Provider.S3.value}")
885
- provider = S3Provider.S3.value
886
- provider_enum = S3Provider.from_str(provider)
887
-
888
- s3_creds: S3Credentials = S3Credentials(
889
- provider=provider_enum,
890
- access_key_id=section.access_key_id(),
891
- secret_access_key=section.secret_access_key(),
892
- endpoint_url=section.endpoint(),
893
- )
894
-
895
- chunk_fetcher: MultiMountFileChunker = self.get_multi_mount_file_chunker(
896
- src=src_path.as_posix(),
897
- chunk_size=chunk_size,
898
- threads=read_threads,
899
- mount_log=mount_log,
900
- direct_io=True,
901
- )
902
-
903
- client = S3Client(s3_creds)
904
- upload_config: S3MutliPartUploadConfig = S3MutliPartUploadConfig(
905
- chunk_size=chunk_size.as_int(),
906
- chunk_fetcher=chunk_fetcher.fetch,
907
- max_write_threads=write_threads,
908
- retries=retries,
909
- resume_path_json=save_state_json,
910
- max_chunks_before_suspension=max_chunks_before_suspension,
911
- )
912
-
913
- src_file = mount_path / name
914
-
915
- print(f"Uploading {name} to {s3_key} in bucket {bucket_name}")
916
- print(f"Source: {src_path}")
917
- print(f"bucket_name: {bucket_name}")
918
- print(f"upload_config: {upload_config}")
919
-
920
- # get the file size
921
-
922
- upload_target = S3UploadTarget(
923
- src_file=src_file,
924
- src_file_size=size_result.total_size,
925
- bucket_name=bucket_name,
926
- s3_key=s3_key,
927
- )
928
-
929
- try:
930
- out: MultiUploadResult = client.upload_file_multipart(
931
- upload_target=upload_target,
932
- upload_config=upload_config,
933
- )
934
- return out
935
- except Exception as e:
936
- print(f"Error uploading file: {e}")
937
- traceback.print_exc()
938
- raise
939
- finally:
940
- chunk_fetcher.shutdown()
941
-
942
- def get_multi_mount_file_chunker(
943
- self,
944
- src: str,
945
- chunk_size: SizeSuffix,
946
- threads: int,
947
- mount_log: Path | None,
948
- direct_io: bool,
949
- ) -> MultiMountFileChunker:
950
- from rclone_api.util import random_str
951
-
952
- mounts: list[Mount] = []
953
- vfs_read_chunk_size = chunk_size
954
- vfs_read_chunk_size_limit = chunk_size
955
- vfs_read_chunk_streams = 0
956
- vfs_disk_space_total_size = chunk_size
957
- other_args: list[str] = []
958
- other_args += ["--no-modtime"]
959
- other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
960
- other_args += [
961
- "--vfs-read-chunk-size-limit",
962
- vfs_read_chunk_size_limit.as_str(),
963
- ]
964
- other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
965
- other_args += [
966
- "--vfs-disk-space-total-size",
967
- vfs_disk_space_total_size.as_str(),
968
- ]
969
- other_args += ["--read-only"]
970
- if direct_io:
971
- other_args += ["--direct-io"]
972
-
973
- base_mount_dir = self._get_tmp_mount_dir()
974
- base_cache_dir = self._get_cache_dir()
975
-
976
- filename = Path(src).name
977
- with ThreadPoolExecutor(max_workers=threads) as executor:
978
- futures: list[Future] = []
979
- try:
980
- for i in range(threads):
981
- tmp_mnts = base_mount_dir / random_str(12)
982
- verbose = mount_log is not None
983
-
984
- src_parent_path = Path(src).parent.as_posix()
985
- cache_dir = base_cache_dir / random_str(12)
986
-
987
- def task(
988
- src_parent_path=src_parent_path,
989
- tmp_mnts=tmp_mnts,
990
- cache_dir=cache_dir,
991
- ):
992
- clean_mount(tmp_mnts, verbose=verbose)
993
- prepare_mount(tmp_mnts, verbose=verbose)
994
- return self.mount(
995
- src=src_parent_path,
996
- outdir=tmp_mnts,
997
- allow_writes=False,
998
- use_links=True,
999
- vfs_cache_mode="minimal",
1000
- verbose=False,
1001
- cache_dir=cache_dir,
1002
- cache_dir_delete_on_exit=True,
1003
- log=mount_log,
1004
- other_args=other_args,
1005
- )
1006
-
1007
- futures.append(executor.submit(task))
1008
- mount_errors: list[Exception] = []
1009
- for fut in futures:
1010
- try:
1011
- mount = fut.result()
1012
- mounts.append(mount)
1013
- except Exception as er:
1014
- warnings.warn(f"Error mounting: {er}")
1015
- mount_errors.append(er)
1016
- if mount_errors:
1017
- warnings.warn(f"Error mounting: {mount_errors}")
1018
- raise Exception(mount_errors)
1019
- except Exception:
1020
- for mount in mounts:
1021
- mount.close()
1022
- raise
1023
-
1024
- src_path: Path = Path(src)
1025
- src_parent_path = src_path.parent.as_posix()
1026
- name = src_path.name
1027
- size_result: SizeResult = self.size_files(src_parent_path, [name])
1028
- filesize = size_result.total_size
1029
-
1030
- executor = ThreadPoolExecutor(max_workers=threads)
1031
- filechunker: MultiMountFileChunker = MultiMountFileChunker(
1032
- filename=filename,
1033
- filesize=filesize,
1034
- mounts=mounts,
1035
- executor=executor,
1036
- verbose=mount_log is not None,
1037
- )
1038
- return filechunker
1039
-
1040
- def copy_bytes(
1041
- self,
1042
- src: str,
1043
- offset: int | SizeSuffix,
1044
- length: int | SizeSuffix,
1045
- outfile: Path,
1046
- other_args: list[str] | None = None,
1047
- ) -> Exception | None:
1048
- """Copy a slice of bytes from the src file to dst."""
1049
- offset = SizeSuffix(offset).as_int()
1050
- length = SizeSuffix(length).as_int()
1051
- cmd_list: list[str] = [
1052
- "cat",
1053
- "--offset",
1054
- str(offset),
1055
- "--count",
1056
- str(length),
1057
- src,
1058
- ]
1059
- if other_args:
1060
- cmd_list.extend(other_args)
1061
- try:
1062
- cp = self._run(cmd_list, capture=outfile)
1063
- if cp.returncode == 0:
1064
- return None
1065
- return Exception(cp.stderr)
1066
- except subprocess.CalledProcessError as e:
1067
- return e
1068
-
1069
- def copy_bytes_mount(
1070
- self,
1071
- src: str,
1072
- offset: int | SizeSuffix,
1073
- length: int | SizeSuffix,
1074
- chunk_size: SizeSuffix,
1075
- max_threads: int = 1,
1076
- # If outfile is supplied then bytes are written to this file and success returns bytes(0)
1077
- outfile: Path | None = None,
1078
- mount_log: Path | None = None,
1079
- direct_io: bool = True,
1080
- ) -> bytes | Exception:
1081
- """Copy a slice of bytes from the src file to dst. Parallelism is achieved through multiple mounted files."""
1082
- from rclone_api.types import FilePart
1083
-
1084
- offset = SizeSuffix(offset).as_int()
1085
- length = SizeSuffix(length).as_int()
1086
- # determine number of threads from chunk size
1087
- threads = max(1, min(max_threads, length // chunk_size.as_int()))
1088
- # todo - implement max threads.
1089
- filechunker = self.get_multi_mount_file_chunker(
1090
- src=src,
1091
- chunk_size=chunk_size,
1092
- threads=threads,
1093
- mount_log=mount_log,
1094
- direct_io=direct_io,
1095
- )
1096
- try:
1097
- fut = filechunker.fetch(offset, length, extra=None)
1098
- fp: FilePart = fut.result()
1099
- payload = fp.payload
1100
- if isinstance(payload, Exception):
1101
- return payload
1102
- try:
1103
- if outfile is None:
1104
- return payload.read_bytes()
1105
- shutil.move(payload, outfile)
1106
- return bytes(0)
1107
- finally:
1108
- fp.close()
1109
-
1110
- except Exception as e:
1111
- warnings.warn(f"Error copying bytes: {e}")
1112
- return e
1113
- finally:
1114
- try:
1115
- filechunker.shutdown()
1116
- except Exception as e:
1117
- warnings.warn(f"Error closing filechunker: {e}")
1118
-
1119
- def copy_dir(
1120
- self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
1121
- ) -> CompletedProcess:
1122
- """Copy a directory from source to destination."""
1123
- # convert src to str, also dst
1124
- src = convert_to_str(src)
1125
- dst = convert_to_str(dst)
1126
- cmd_list: list[str] = ["copy", src, dst]
1127
- if args is not None:
1128
- cmd_list += args
1129
- cp = self._run(cmd_list)
1130
- return CompletedProcess.from_subprocess(cp)
1131
-
1132
- def copy_remote(
1133
- self, src: Remote, dst: Remote, args: list[str] | None = None
1134
- ) -> CompletedProcess:
1135
- """Copy a remote to another remote."""
1136
- cmd_list: list[str] = ["copy", str(src), str(dst)]
1137
- if args is not None:
1138
- cmd_list += args
1139
- # return self._run(cmd_list)
1140
- cp = self._run(cmd_list)
1141
- return CompletedProcess.from_subprocess(cp)
1142
-
1143
- def mount(
1144
- self,
1145
- src: Remote | Dir | str,
1146
- outdir: Path,
1147
- allow_writes: bool | None = False,
1148
- use_links: bool | None = None,
1149
- vfs_cache_mode: str | None = None,
1150
- verbose: bool | None = None,
1151
- cache_dir: Path | None = None,
1152
- cache_dir_delete_on_exit: bool | None = None,
1153
- log: Path | None = None,
1154
- other_args: list[str] | None = None,
1155
- ) -> Mount:
1156
- """Mount a remote or directory to a local path.
1157
-
1158
- Args:
1159
- src: Remote or directory to mount
1160
- outdir: Local path to mount to
1161
-
1162
- Returns:
1163
- CompletedProcess from the mount command execution
1164
-
1165
- Raises:
1166
- subprocess.CalledProcessError: If the mount operation fails
1167
- """
1168
-
1169
- allow_writes = allow_writes or False
1170
- use_links = use_links or True
1171
- verbose = get_verbose(verbose) or (log is not None)
1172
- vfs_cache_mode = vfs_cache_mode or "full"
1173
- clean_mount(outdir, verbose=verbose)
1174
- prepare_mount(outdir, verbose=verbose)
1175
- debug_fuse = log is not None
1176
- src_str = convert_to_str(src)
1177
- cmd_list: list[str] = ["mount", src_str, str(outdir)]
1178
- if not allow_writes:
1179
- cmd_list.append("--read-only")
1180
- if use_links:
1181
- cmd_list.append("--links")
1182
- if vfs_cache_mode:
1183
- cmd_list.append("--vfs-cache-mode")
1184
- cmd_list.append(vfs_cache_mode)
1185
- if cache_dir:
1186
- cmd_list.append("--cache-dir")
1187
- cmd_list.append(str(cache_dir.absolute()))
1188
- if debug_fuse:
1189
- cmd_list.append("--debug-fuse")
1190
- if verbose:
1191
- cmd_list.append("-vvvv")
1192
- if other_args:
1193
- cmd_list += other_args
1194
- proc = self._launch_process(cmd_list, log=log)
1195
- mount_read_only = not allow_writes
1196
- mount: Mount = Mount(
1197
- src=src_str,
1198
- mount_path=outdir,
1199
- process=proc,
1200
- read_only=mount_read_only,
1201
- cache_dir=cache_dir,
1202
- cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1203
- )
1204
- return mount
1205
-
1206
- @contextmanager
1207
- def scoped_mount(
1208
- self,
1209
- src: Remote | Dir | str,
1210
- outdir: Path,
1211
- allow_writes: bool | None = None,
1212
- use_links: bool | None = None,
1213
- vfs_cache_mode: str | None = None,
1214
- verbose: bool | None = None,
1215
- log: Path | None = None,
1216
- cache_dir: Path | None = None,
1217
- cache_dir_delete_on_exit: bool | None = None,
1218
- other_args: list[str] | None = None,
1219
- ) -> Generator[Mount, None, None]:
1220
- """Like mount, but can be used in a context manager."""
1221
- error_happened = False
1222
- mount: Mount = self.mount(
1223
- src,
1224
- outdir,
1225
- allow_writes=allow_writes,
1226
- use_links=use_links,
1227
- vfs_cache_mode=vfs_cache_mode,
1228
- verbose=verbose,
1229
- cache_dir=cache_dir,
1230
- cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1231
- log=log,
1232
- other_args=other_args,
1233
- )
1234
- try:
1235
- yield mount
1236
- except Exception as e:
1237
- error_happened = True
1238
- stack_trace = traceback.format_exc()
1239
- warnings.warn(f"Error in scoped_mount: {e}\n\nStack Trace:\n{stack_trace}")
1240
- raise
1241
- finally:
1242
- if not error_happened or (not allow_writes):
1243
- mount.close()
1244
-
1245
- # Settings optimized for s3.
1246
- def mount_s3(
1247
- self,
1248
- url: str,
1249
- outdir: Path,
1250
- allow_writes=False,
1251
- vfs_cache_mode="full",
1252
- dir_cache_time: str | None = "1h",
1253
- attribute_timeout: str | None = "1h",
1254
- vfs_disk_space_total_size: str | None = "100M",
1255
- transfers: int | None = 128,
1256
- modtime_strategy: (
1257
- ModTimeStrategy | None
1258
- ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1259
- vfs_read_chunk_streams: int | None = 16,
1260
- vfs_read_chunk_size: str | None = "4M",
1261
- vfs_fast_fingerprint: bool = True,
1262
- # vfs-refresh
1263
- vfs_refresh: bool = True,
1264
- other_args: list[str] | None = None,
1265
- ) -> Mount:
1266
- """Mount a remote or directory to a local path.
1267
-
1268
- Args:
1269
- src: Remote or directory to mount
1270
- outdir: Local path to mount to
1271
- """
1272
- other_args = other_args or []
1273
- if modtime_strategy is not None:
1274
- other_args.append(f"--{modtime_strategy.value}")
1275
- if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1276
- transfers is not None and "--transfers" not in other_args
1277
- ):
1278
- other_args.append("--transfers")
1279
- other_args.append(str(transfers))
1280
- if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1281
- other_args.append("--dir-cache-time")
1282
- other_args.append(dir_cache_time)
1283
- if (
1284
- vfs_disk_space_total_size is not None
1285
- and "--vfs-cache-max-size" not in other_args
1286
- ):
1287
- other_args.append("--vfs-cache-max-size")
1288
- other_args.append(vfs_disk_space_total_size)
1289
- if vfs_refresh and "--vfs-refresh" not in other_args:
1290
- other_args.append("--vfs-refresh")
1291
- if attribute_timeout is not None and "--attr-timeout" not in other_args:
1292
- other_args.append("--attr-timeout")
1293
- other_args.append(attribute_timeout)
1294
- if vfs_read_chunk_streams:
1295
- other_args.append("--vfs-read-chunk-streams")
1296
- other_args.append(str(vfs_read_chunk_streams))
1297
- if vfs_read_chunk_size:
1298
- other_args.append("--vfs-read-chunk-size")
1299
- other_args.append(vfs_read_chunk_size)
1300
- if vfs_fast_fingerprint:
1301
- other_args.append("--vfs-fast-fingerprint")
1302
-
1303
- other_args = other_args if other_args else None
1304
- return self.mount(
1305
- url,
1306
- outdir,
1307
- allow_writes=allow_writes,
1308
- vfs_cache_mode=vfs_cache_mode,
1309
- other_args=other_args,
1310
- )
1311
-
1312
- def serve_webdav(
1313
- self,
1314
- src: Remote | Dir | str,
1315
- user: str,
1316
- password: str,
1317
- addr: str = "localhost:2049",
1318
- allow_other: bool = False,
1319
- other_args: list[str] | None = None,
1320
- ) -> Process:
1321
- """Serve a remote or directory via NFS.
1322
-
1323
- Args:
1324
- src: Remote or directory to serve
1325
- addr: Network address and port to serve on (default: localhost:2049)
1326
- allow_other: Allow other users to access the share
1327
-
1328
- Returns:
1329
- Process: The running NFS server process
1330
-
1331
- Raises:
1332
- ValueError: If the NFS server fails to start
1333
- """
1334
- src_str = convert_to_str(src)
1335
- cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1336
- cmd_list.extend(["--user", user, "--pass", password])
1337
- if allow_other:
1338
- cmd_list.append("--allow-other")
1339
- if other_args:
1340
- cmd_list += other_args
1341
- proc = self._launch_process(cmd_list)
1342
- time.sleep(2) # give it a moment to start
1343
- if proc.poll() is not None:
1344
- raise ValueError("NFS serve process failed to start")
1345
- return proc
1346
-
1347
- def size_files(
1348
- self,
1349
- src: str,
1350
- files: list[str],
1351
- fast_list: bool = False, # Recommend that this is False
1352
- other_args: list[str] | None = None,
1353
- check: bool | None = False,
1354
- verbose: bool | None = None,
1355
- ) -> SizeResult:
1356
- """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1357
- verbose = get_verbose(verbose)
1358
- check = get_check(check)
1359
- if fast_list or (other_args and "--fast-list" in other_args):
1360
- warnings.warn(
1361
- "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1362
- )
1363
- files = list(files)
1364
- all_files: list[File] = []
1365
- # prefix, files = group_under_one_prefix(src, files)
1366
- cmd = ["lsjson", src, "--files-only", "-R"]
1367
- with TemporaryDirectory() as tmpdir:
1368
- # print("files: " + ",".join(files))
1369
- include_files_txt = Path(tmpdir) / "include_files.txt"
1370
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
1371
- cmd += ["--files-from", str(include_files_txt)]
1372
- if fast_list:
1373
- cmd.append("--fast-list")
1374
- if other_args:
1375
- cmd += other_args
1376
- cp = self._run(cmd, check=check)
1377
-
1378
- if cp.returncode != 0:
1379
- if check:
1380
- raise ValueError(f"Error getting file sizes: {cp.stderr}")
1381
- else:
1382
- warnings.warn(f"Error getting file sizes: {cp.stderr}")
1383
- stdout = cp.stdout
1384
- pieces = src.split(":", 1)
1385
- remote_name = pieces[0]
1386
- parent_path: str | None
1387
- if len(pieces) > 1:
1388
- parent_path = pieces[1]
1389
- else:
1390
- parent_path = None
1391
- remote = Remote(name=remote_name, rclone=self)
1392
- paths: list[RPath] = RPath.from_json_str(
1393
- stdout, remote, parent_path=parent_path
1394
- )
1395
- # print(paths)
1396
- all_files += [File(p) for p in paths]
1397
- file_sizes: dict[str, int] = {}
1398
- f: File
1399
- for f in all_files:
1400
- p = f.to_string(include_remote=True)
1401
- if p in file_sizes:
1402
- warnings.warn(f"Duplicate file found: {p}")
1403
- continue
1404
- size = f.size
1405
- if size == 0:
1406
- warnings.warn(f"File size is 0: {p}")
1407
- file_sizes[p] = f.size
1408
- total_size = sum(file_sizes.values())
1409
- file_sizes_path_corrected: dict[str, int] = {}
1410
- for path, size in file_sizes.items():
1411
- # remove the prefix
1412
- path_path = Path(path)
1413
- path_str = path_path.relative_to(src).as_posix()
1414
- file_sizes_path_corrected[path_str] = size
1415
- out: SizeResult = SizeResult(
1416
- prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1417
- )
1418
- return out
1
+ """
2
+ Unit test file.
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import shutil
8
+ import subprocess
9
+ import time
10
+ import traceback
11
+ import warnings
12
+ from concurrent.futures import Future, ThreadPoolExecutor
13
+ from contextlib import contextmanager
14
+ from fnmatch import fnmatch
15
+ from pathlib import Path
16
+ from tempfile import TemporaryDirectory
17
+ from typing import Generator
18
+
19
+ from rclone_api import Dir
20
+ from rclone_api.completed_process import CompletedProcess
21
+ from rclone_api.config import Config, Parsed, Section
22
+ from rclone_api.convert import convert_to_filestr_list, convert_to_str
23
+ from rclone_api.deprecated import deprecated
24
+ from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
+ from rclone_api.dir_listing import DirListing
26
+ from rclone_api.exec import RcloneExec
27
+ from rclone_api.file import File, FileItem
28
+ from rclone_api.group_files import group_files
29
+ from rclone_api.mount import Mount, clean_mount, prepare_mount
30
+ from rclone_api.mount_read_chunker import MultiMountFileChunker
31
+ from rclone_api.process import Process
32
+ from rclone_api.remote import Remote
33
+ from rclone_api.rpath import RPath
34
+ from rclone_api.s3.types import (
35
+ MultiUploadResult,
36
+ S3MutliPartUploadConfig,
37
+ S3Provider,
38
+ S3UploadTarget,
39
+ )
40
+ from rclone_api.types import (
41
+ ListingOption,
42
+ ModTimeStrategy,
43
+ Order,
44
+ SizeResult,
45
+ SizeSuffix,
46
+ )
47
+ from rclone_api.util import (
48
+ get_check,
49
+ get_rclone_exe,
50
+ get_verbose,
51
+ to_path,
52
+ )
53
+ from rclone_api.walk import walk
54
+
55
+
56
+ def rclone_verbose(verbose: bool | None) -> bool:
57
+ if verbose is not None:
58
+ os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
59
+ return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
60
+
61
+
62
+ def _to_rclone_conf(config: Config | Path) -> Config:
63
+ if isinstance(config, Path):
64
+ content = config.read_text(encoding="utf-8")
65
+ return Config(content)
66
+ else:
67
+ return config
68
+
69
+
70
+ class FilesStream:
71
+
72
+ def __init__(self, path: str, process: Process) -> None:
73
+ self.path = path
74
+ self.process = process
75
+
76
+ def __enter__(self) -> "FilesStream":
77
+ self.process.__enter__()
78
+ return self
79
+
80
+ def __exit__(self, *exc_info):
81
+ self.process.__exit__(*exc_info)
82
+
83
+ def files(self) -> Generator[FileItem, None, None]:
84
+ line: bytes
85
+ for line in self.process.stdout:
86
+ linestr: str = line.decode("utf-8").strip()
87
+ if linestr.startswith("["):
88
+ continue
89
+ if linestr.endswith(","):
90
+ linestr = linestr[:-1]
91
+ if linestr.endswith("]"):
92
+ continue
93
+ fileitem: FileItem | None = FileItem.from_json_str(self.path, linestr)
94
+ if fileitem is None:
95
+ continue
96
+ yield fileitem
97
+
98
+ def files_paged(
99
+ self, page_size: int = 1000
100
+ ) -> Generator[list[FileItem], None, None]:
101
+ page: list[FileItem] = []
102
+ for fileitem in self.files():
103
+ page.append(fileitem)
104
+ if len(page) >= page_size:
105
+ yield page
106
+ page = []
107
+ if len(page) > 0:
108
+ yield page
109
+
110
+ def __iter__(self) -> Generator[FileItem, None, None]:
111
+ return self.files()
112
+
113
+
114
+ class Rclone:
115
+ def __init__(
116
+ self, rclone_conf: Path | Config, rclone_exe: Path | None = None
117
+ ) -> None:
118
+ if isinstance(rclone_conf, Path):
119
+ if not rclone_conf.exists():
120
+ raise ValueError(f"Rclone config file not found: {rclone_conf}")
121
+ self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
122
+ self.config: Config = _to_rclone_conf(rclone_conf)
123
+
124
+ def _run(
125
+ self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
126
+ ) -> subprocess.CompletedProcess:
127
+ return self._exec.execute(cmd, check=check, capture=capture)
128
+
129
+ def _launch_process(
130
+ self, cmd: list[str], capture: bool | None = None, log: Path | None = None
131
+ ) -> Process:
132
+ return self._exec.launch_process(cmd, capture=capture, log=log)
133
+
134
+ def _get_tmp_mount_dir(self) -> Path:
135
+ return Path("tmp_mnts")
136
+
137
+ def _get_cache_dir(self) -> Path:
138
+ return Path("cache")
139
+
140
+ def webgui(self, other_args: list[str] | None = None) -> Process:
141
+ """Launch the Rclone web GUI."""
142
+ cmd = ["rcd", "--rc-web-gui"]
143
+ if other_args:
144
+ cmd += other_args
145
+ return self._launch_process(cmd, capture=False)
146
+
147
+ def launch_server(
148
+ self,
149
+ addr: str,
150
+ user: str | None = None,
151
+ password: str | None = None,
152
+ other_args: list[str] | None = None,
153
+ ) -> Process:
154
+ """Launch the Rclone server so it can receive commands"""
155
+ cmd = ["rcd"]
156
+ if addr is not None:
157
+ cmd += ["--rc-addr", addr]
158
+ if user is not None:
159
+ cmd += ["--rc-user", user]
160
+ if password is not None:
161
+ cmd += ["--rc-pass", password]
162
+ if other_args:
163
+ cmd += other_args
164
+ out = self._launch_process(cmd, capture=False)
165
+ time.sleep(1) # Give it some time to launch
166
+ return out
167
+
168
+ def remote_control(
169
+ self,
170
+ addr: str,
171
+ user: str | None = None,
172
+ password: str | None = None,
173
+ capture: bool | None = None,
174
+ other_args: list[str] | None = None,
175
+ ) -> CompletedProcess:
176
+ cmd = ["rc"]
177
+ if addr:
178
+ cmd += ["--rc-addr", addr]
179
+ if user is not None:
180
+ cmd += ["--rc-user", user]
181
+ if password is not None:
182
+ cmd += ["--rc-pass", password]
183
+ if other_args:
184
+ cmd += other_args
185
+ cp = self._run(cmd, capture=capture)
186
+ return CompletedProcess.from_subprocess(cp)
187
+
188
+ def obscure(self, password: str) -> str:
189
+ """Obscure a password for use in rclone config files."""
190
+ cmd_list: list[str] = ["obscure", password]
191
+ cp = self._run(cmd_list)
192
+ return cp.stdout.strip()
193
+
194
+ def ls_stream(
195
+ self,
196
+ path: str,
197
+ max_depth: int = -1,
198
+ fast_list: bool = False,
199
+ ) -> FilesStream:
200
+ """
201
+ List files in the given path
202
+
203
+ Args:
204
+ src: Remote path to list
205
+ max_depth: Maximum recursion depth (-1 for unlimited)
206
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket, or it's small)
207
+ """
208
+ cmd = ["lsjson", path, "--files-only"]
209
+ recurse = max_depth < 0 or max_depth > 1
210
+ if recurse:
211
+ cmd.append("-R")
212
+ if max_depth > 1:
213
+ cmd += ["--max-depth", str(max_depth)]
214
+ if fast_list:
215
+ cmd.append("--fast-list")
216
+ streamer = FilesStream(path, self._launch_process(cmd, capture=True))
217
+ return streamer
218
+
219
+ def save_to_db(
220
+ self,
221
+ src: str,
222
+ db_url: str,
223
+ max_depth: int = -1,
224
+ fast_list: bool = False,
225
+ ) -> None:
226
+ """
227
+ Save files to a database (sqlite, mysql, postgres)
228
+
229
+ Args:
230
+ src: Remote path to list, this will be used to populate an entire table, so always use the root-most path.
231
+ db_url: Database URL, like sqlite:///data.db or mysql://user:pass@localhost/db or postgres://user:pass@localhost/db
232
+ max_depth: Maximum depth to traverse (-1 for unlimited)
233
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket)
234
+
235
+ """
236
+ from rclone_api.db import DB
237
+
238
+ db = DB(db_url)
239
+ with self.ls_stream(src, max_depth, fast_list) as stream:
240
+ for page in stream.files_paged(page_size=10000):
241
+ db.add_files(page)
242
+
243
+ def ls(
244
+ self,
245
+ path: Dir | Remote | str,
246
+ max_depth: int | None = None,
247
+ glob: str | None = None,
248
+ order: Order = Order.NORMAL,
249
+ listing_option: ListingOption = ListingOption.ALL,
250
+ ) -> DirListing:
251
+ """List files in the given path.
252
+
253
+ Args:
254
+ path: Remote path or Remote object to list
255
+ max_depth: Maximum recursion depth (0 means no recursion)
256
+
257
+ Returns:
258
+ List of File objects found at the path
259
+ """
260
+
261
+ if isinstance(path, str):
262
+ path = Dir(
263
+ to_path(path, self)
264
+ ) # assume it's a directory if ls is being called.
265
+
266
+ cmd = ["lsjson"]
267
+ if max_depth is not None:
268
+ if max_depth < 0:
269
+ cmd.append("--recursive")
270
+ if max_depth > 0:
271
+ cmd.append("--max-depth")
272
+ cmd.append(str(max_depth))
273
+ if listing_option != ListingOption.ALL:
274
+ cmd.append(f"--{listing_option.value}")
275
+
276
+ cmd.append(str(path))
277
+ remote = path.remote if isinstance(path, Dir) else path
278
+ assert isinstance(remote, Remote)
279
+
280
+ cp = self._run(cmd, check=True)
281
+ text = cp.stdout
282
+ parent_path: str | None = None
283
+ if isinstance(path, Dir):
284
+ parent_path = path.path.path
285
+ paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
286
+ # print(parent_path)
287
+ for o in paths:
288
+ o.set_rclone(self)
289
+
290
+ # do we have a glob pattern?
291
+ if glob is not None:
292
+ paths = [p for p in paths if fnmatch(p.path, glob)]
293
+
294
+ if order == Order.REVERSE:
295
+ paths.reverse()
296
+ elif order == Order.RANDOM:
297
+ random.shuffle(paths)
298
+ return DirListing(paths)
299
+
300
+ def listremotes(self) -> list[Remote]:
301
+ cmd = ["listremotes"]
302
+ cp = self._run(cmd)
303
+ text: str = cp.stdout
304
+ tmp = text.splitlines()
305
+ tmp = [t.strip() for t in tmp]
306
+ # strip out ":" from the end
307
+ tmp = [t.replace(":", "") for t in tmp]
308
+ out = [Remote(name=t, rclone=self) for t in tmp]
309
+ return out
310
+
311
+ def diff(
312
+ self,
313
+ src: str,
314
+ dst: str,
315
+ min_size: (
316
+ str | None
317
+ ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
318
+ max_size: (
319
+ str | None
320
+ ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
321
+ diff_option: DiffOption = DiffOption.COMBINED,
322
+ fast_list: bool = True,
323
+ size_only: bool | None = None,
324
+ checkers: int | None = None,
325
+ other_args: list[str] | None = None,
326
+ ) -> Generator[DiffItem, None, None]:
327
+ """Be extra careful with the src and dst values. If you are off by one
328
+ parent directory, you will get a huge amount of false diffs."""
329
+ other_args = other_args or []
330
+ if checkers is None or checkers < 1:
331
+ checkers = 1000
332
+ cmd = [
333
+ "check",
334
+ src,
335
+ dst,
336
+ "--checkers",
337
+ str(checkers),
338
+ "--log-level",
339
+ "INFO",
340
+ f"--{diff_option.value}",
341
+ "-",
342
+ ]
343
+ if size_only is None:
344
+ size_only = diff_option in [
345
+ DiffOption.MISSING_ON_DST,
346
+ DiffOption.MISSING_ON_SRC,
347
+ ]
348
+ if size_only:
349
+ cmd += ["--size-only"]
350
+ if fast_list:
351
+ cmd += ["--fast-list"]
352
+ if min_size:
353
+ cmd += ["--min-size", min_size]
354
+ if max_size:
355
+ cmd += ["--max-size", max_size]
356
+ if diff_option == DiffOption.MISSING_ON_DST:
357
+ cmd += ["--one-way"]
358
+ if other_args:
359
+ cmd += other_args
360
+ proc = self._launch_process(cmd, capture=True)
361
+ item: DiffItem
362
+ for item in diff_stream_from_running_process(
363
+ running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
364
+ ):
365
+ if item is None:
366
+ break
367
+ yield item
368
+
369
+ def walk(
370
+ self,
371
+ path: Dir | Remote | str,
372
+ max_depth: int = -1,
373
+ breadth_first: bool = True,
374
+ order: Order = Order.NORMAL,
375
+ ) -> Generator[DirListing, None, None]:
376
+ """Walk through the given path recursively.
377
+
378
+ Args:
379
+ path: Remote path or Remote object to walk through
380
+ max_depth: Maximum depth to traverse (-1 for unlimited)
381
+
382
+ Yields:
383
+ DirListing: Directory listing for each directory encountered
384
+ """
385
+ dir_obj: Dir
386
+ if isinstance(path, Dir):
387
+ # Create a Remote object for the path
388
+ remote = path.remote
389
+ rpath = RPath(
390
+ remote=remote,
391
+ path=path.path.path,
392
+ name=path.path.name,
393
+ size=0,
394
+ mime_type="inode/directory",
395
+ mod_time="",
396
+ is_dir=True,
397
+ )
398
+ rpath.set_rclone(self)
399
+ dir_obj = Dir(rpath)
400
+ elif isinstance(path, str):
401
+ dir_obj = Dir(to_path(path, self))
402
+ elif isinstance(path, Remote):
403
+ dir_obj = Dir(path)
404
+ else:
405
+ dir_obj = Dir(path) # shut up pyright
406
+ assert f"Invalid type for path: {type(path)}"
407
+
408
+ yield from walk(
409
+ dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
410
+ )
411
+
412
+ def scan_missing_folders(
413
+ self,
414
+ src: Dir | Remote | str,
415
+ dst: Dir | Remote | str,
416
+ max_depth: int = -1,
417
+ order: Order = Order.NORMAL,
418
+ ) -> Generator[Dir, None, None]:
419
+ """Walk through the given path recursively.
420
+
421
+ WORK IN PROGRESS!!
422
+
423
+ Args:
424
+ src: Source directory or Remote to walk through
425
+ dst: Destination directory or Remote to walk through
426
+ max_depth: Maximum depth to traverse (-1 for unlimited)
427
+
428
+ Yields:
429
+ DirListing: Directory listing for each directory encountered
430
+ """
431
+ from rclone_api.scan_missing_folders import scan_missing_folders
432
+
433
+ src_dir = Dir(to_path(src, self))
434
+ dst_dir = Dir(to_path(dst, self))
435
+ yield from scan_missing_folders(
436
+ src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
437
+ )
438
+
439
+ def cleanup(
440
+ self, path: str, other_args: list[str] | None = None
441
+ ) -> CompletedProcess:
442
+ """Cleanup any resources used by the Rclone instance."""
443
+ # rclone cleanup remote:path [flags]
444
+ cmd = ["cleanup", path]
445
+ if other_args:
446
+ cmd += other_args
447
+ out = self._run(cmd)
448
+ return CompletedProcess.from_subprocess(out)
449
+
450
+ def copy_to(
451
+ self,
452
+ src: File | str,
453
+ dst: File | str,
454
+ check: bool | None = None,
455
+ verbose: bool | None = None,
456
+ other_args: list[str] | None = None,
457
+ ) -> CompletedProcess:
458
+ """Copy one file from source to destination.
459
+
460
+ Warning - slow.
461
+
462
+ """
463
+ check = get_check(check)
464
+ verbose = get_verbose(verbose)
465
+ src = src if isinstance(src, str) else str(src.path)
466
+ dst = dst if isinstance(dst, str) else str(dst.path)
467
+ cmd_list: list[str] = ["copyto", src, dst]
468
+ if other_args is not None:
469
+ cmd_list += other_args
470
+ cp = self._run(cmd_list, check=check)
471
+ return CompletedProcess.from_subprocess(cp)
472
+
473
+ def copy_files(
474
+ self,
475
+ src: str,
476
+ dst: str,
477
+ files: list[str] | Path,
478
+ check: bool | None = None,
479
+ max_backlog: int | None = None,
480
+ verbose: bool | None = None,
481
+ checkers: int | None = None,
482
+ transfers: int | None = None,
483
+ low_level_retries: int | None = None,
484
+ retries: int | None = None,
485
+ retries_sleep: str | None = None,
486
+ metadata: bool | None = None,
487
+ timeout: str | None = None,
488
+ max_partition_workers: int | None = None,
489
+ multi_thread_streams: int | None = None,
490
+ other_args: list[str] | None = None,
491
+ ) -> list[CompletedProcess]:
492
+ """Copy multiple files from source to destination.
493
+
494
+ Args:
495
+ payload: Dictionary of source and destination file paths
496
+ """
497
+ check = get_check(check)
498
+ max_partition_workers = max_partition_workers or 1
499
+ low_level_retries = low_level_retries or 10
500
+ retries = retries or 3
501
+ other_args = other_args or []
502
+ checkers = checkers or 1000
503
+ transfers = transfers or 32
504
+ verbose = get_verbose(verbose)
505
+ payload: list[str] = (
506
+ files
507
+ if isinstance(files, list)
508
+ else [f.strip() for f in files.read_text().splitlines() if f.strip()]
509
+ )
510
+ if len(payload) == 0:
511
+ return []
512
+
513
+ for p in payload:
514
+ if ":" in p:
515
+ raise ValueError(
516
+ f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
517
+ )
518
+
519
+ using_fast_list = "--fast-list" in other_args
520
+ if using_fast_list:
521
+ warnings.warn(
522
+ "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
523
+ )
524
+
525
+ if max_partition_workers > 1:
526
+ datalists: dict[str, list[str]] = group_files(
527
+ payload, fully_qualified=False
528
+ )
529
+ else:
530
+ datalists = {"": payload}
531
+ # out: subprocess.CompletedProcess | None = None
532
+ out: list[CompletedProcess] = []
533
+
534
+ futures: list[Future] = []
535
+
536
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
537
+ for common_prefix, files in datalists.items():
538
+
539
+ def _task(
540
+ files: list[str] | Path = files,
541
+ ) -> subprocess.CompletedProcess:
542
+ with TemporaryDirectory() as tmpdir:
543
+ filelist: list[str] = []
544
+ filepath: Path
545
+ if isinstance(files, list):
546
+ include_files_txt = Path(tmpdir) / "include_files.txt"
547
+ include_files_txt.write_text(
548
+ "\n".join(files), encoding="utf-8"
549
+ )
550
+ filelist = list(files)
551
+ filepath = Path(include_files_txt)
552
+ elif isinstance(files, Path):
553
+ filelist = [
554
+ f.strip()
555
+ for f in files.read_text().splitlines()
556
+ if f.strip()
557
+ ]
558
+ filepath = files
559
+ if common_prefix:
560
+ src_path = f"{src}/{common_prefix}"
561
+ dst_path = f"{dst}/{common_prefix}"
562
+ else:
563
+ src_path = src
564
+ dst_path = dst
565
+
566
+ if verbose:
567
+ nfiles = len(filelist)
568
+ files_fqdn = [f" {src_path}/{f}" for f in filelist]
569
+ print(f"Copying {nfiles} files:")
570
+ chunk_size = 100
571
+ for i in range(0, nfiles, chunk_size):
572
+ chunk = files_fqdn[i : i + chunk_size]
573
+ files_str = "\n".join(chunk)
574
+ print(f"{files_str}")
575
+ cmd_list: list[str] = [
576
+ "copy",
577
+ src_path,
578
+ dst_path,
579
+ "--files-from",
580
+ str(filepath),
581
+ "--checkers",
582
+ str(checkers),
583
+ "--transfers",
584
+ str(transfers),
585
+ "--low-level-retries",
586
+ str(low_level_retries),
587
+ "--retries",
588
+ str(retries),
589
+ ]
590
+ if metadata:
591
+ cmd_list.append("--metadata")
592
+ if retries_sleep is not None:
593
+ cmd_list += ["--retries-sleep", retries_sleep]
594
+ if timeout is not None:
595
+ cmd_list += ["--timeout", timeout]
596
+ if max_backlog is not None:
597
+ cmd_list += ["--max-backlog", str(max_backlog)]
598
+ if multi_thread_streams is not None:
599
+ cmd_list += [
600
+ "--multi-thread-streams",
601
+ str(multi_thread_streams),
602
+ ]
603
+ if verbose:
604
+ if not any(["-v" in x for x in other_args]):
605
+ cmd_list.append("-vvvv")
606
+ if not any(["--progress" in x for x in other_args]):
607
+ cmd_list.append("--progress")
608
+ if other_args:
609
+ cmd_list += other_args
610
+ out = self._run(cmd_list, capture=not verbose)
611
+ return out
612
+
613
+ fut: Future = executor.submit(_task)
614
+ futures.append(fut)
615
+ for fut in futures:
616
+ cp: subprocess.CompletedProcess = fut.result()
617
+ assert cp is not None
618
+ out.append(CompletedProcess.from_subprocess(cp))
619
+ if cp.returncode != 0:
620
+ if check:
621
+ raise ValueError(f"Error deleting files: {cp.stderr}")
622
+ else:
623
+ warnings.warn(f"Error deleting files: {cp.stderr}")
624
+ return out
625
+
626
+ def copy(
627
+ self,
628
+ src: Dir | str,
629
+ dst: Dir | str,
630
+ check: bool | None = None,
631
+ transfers: int | None = None,
632
+ checkers: int | None = None,
633
+ multi_thread_streams: int | None = None,
634
+ low_level_retries: int | None = None,
635
+ retries: int | None = None,
636
+ other_args: list[str] | None = None,
637
+ ) -> CompletedProcess:
638
+ """Copy files from source to destination.
639
+
640
+ Args:
641
+ src: Source directory
642
+ dst: Destination directory
643
+ """
644
+ # src_dir = src.path.path
645
+ # dst_dir = dst.path.path
646
+ src_dir = convert_to_str(src)
647
+ dst_dir = convert_to_str(dst)
648
+ check = get_check(check)
649
+ checkers = checkers or 1000
650
+ transfers = transfers or 32
651
+ low_level_retries = low_level_retries or 10
652
+ retries = retries or 3
653
+ cmd_list: list[str] = ["copy", src_dir, dst_dir]
654
+ cmd_list += ["--checkers", str(checkers)]
655
+ cmd_list += ["--transfers", str(transfers)]
656
+ cmd_list += ["--low-level-retries", str(low_level_retries)]
657
+ if multi_thread_streams is not None:
658
+ cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
659
+ if other_args:
660
+ cmd_list += other_args
661
+ cp = self._run(cmd_list, check=check, capture=False)
662
+ return CompletedProcess.from_subprocess(cp)
663
+
664
+ def purge(self, path: Dir | str) -> CompletedProcess:
665
+ """Purge a directory"""
666
+ # path should always be a string
667
+ path = path if isinstance(path, str) else str(path.path)
668
+ cmd_list: list[str] = ["purge", str(path)]
669
+ cp = self._run(cmd_list)
670
+ return CompletedProcess.from_subprocess(cp)
671
+
672
+ def delete_files(
673
+ self,
674
+ files: str | File | list[str] | list[File],
675
+ check: bool | None = None,
676
+ rmdirs=False,
677
+ verbose: bool | None = None,
678
+ max_partition_workers: int | None = None,
679
+ other_args: list[str] | None = None,
680
+ ) -> CompletedProcess:
681
+ """Delete a directory"""
682
+ check = get_check(check)
683
+ verbose = get_verbose(verbose)
684
+ payload: list[str] = convert_to_filestr_list(files)
685
+ if len(payload) == 0:
686
+ if verbose:
687
+ print("No files to delete")
688
+ cp = subprocess.CompletedProcess(
689
+ args=["rclone", "delete", "--files-from", "[]"],
690
+ returncode=0,
691
+ stdout="",
692
+ stderr="",
693
+ )
694
+ return CompletedProcess.from_subprocess(cp)
695
+
696
+ datalists: dict[str, list[str]] = group_files(payload)
697
+ completed_processes: list[subprocess.CompletedProcess] = []
698
+
699
+ futures: list[Future] = []
700
+
701
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
702
+
703
+ for remote, files in datalists.items():
704
+
705
+ def _task(
706
+ files=files, check=check, remote=remote
707
+ ) -> subprocess.CompletedProcess:
708
+ with TemporaryDirectory() as tmpdir:
709
+ include_files_txt = Path(tmpdir) / "include_files.txt"
710
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
711
+
712
+ # print(include_files_txt)
713
+ cmd_list: list[str] = [
714
+ "delete",
715
+ remote,
716
+ "--files-from",
717
+ str(include_files_txt),
718
+ "--checkers",
719
+ "1000",
720
+ "--transfers",
721
+ "1000",
722
+ ]
723
+ if verbose:
724
+ cmd_list.append("-vvvv")
725
+ if rmdirs:
726
+ cmd_list.append("--rmdirs")
727
+ if other_args:
728
+ cmd_list += other_args
729
+ out = self._run(cmd_list, check=check)
730
+ if out.returncode != 0:
731
+ if check:
732
+ completed_processes.append(out)
733
+ raise ValueError(f"Error deleting files: {out}")
734
+ else:
735
+ warnings.warn(f"Error deleting files: {out}")
736
+ return out
737
+
738
+ fut: Future = executor.submit(_task)
739
+ futures.append(fut)
740
+
741
+ for fut in futures:
742
+ out = fut.result()
743
+ assert out is not None
744
+ completed_processes.append(out)
745
+
746
+ return CompletedProcess(completed_processes)
747
+
748
+ @deprecated("delete_files")
749
+ def deletefiles(
750
+ self, files: str | File | list[str] | list[File]
751
+ ) -> CompletedProcess:
752
+ out = self.delete_files(files)
753
+ return out
754
+
755
+ def exists(self, path: Dir | Remote | str | File) -> bool:
756
+ """Check if a file or directory exists."""
757
+ arg: str = convert_to_str(path)
758
+ assert isinstance(arg, str)
759
+ try:
760
+ dir_listing = self.ls(arg)
761
+ # print(dir_listing)
762
+ return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
763
+ except subprocess.CalledProcessError:
764
+ return False
765
+
766
+ def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
767
+ """Check if two directories are in sync."""
768
+ src = convert_to_str(src)
769
+ dst = convert_to_str(dst)
770
+ cmd_list: list[str] = ["check", str(src), str(dst)]
771
+ try:
772
+ self._run(cmd_list, check=True)
773
+ return True
774
+ except subprocess.CalledProcessError:
775
+ return False
776
+
777
+ def copy_file_resumable_s3(
778
+ self,
779
+ src: str,
780
+ dst: str,
781
+ save_state_json: Path,
782
+ chunk_size: SizeSuffix | None = None,
783
+ read_threads: int = 8,
784
+ write_threads: int = 8,
785
+ retries: int = 3,
786
+ verbose: bool | None = None,
787
+ max_chunks_before_suspension: int | None = None,
788
+ mount_log: Path | None = None,
789
+ ) -> MultiUploadResult:
790
+ """For massive files that rclone can't handle in one go, this function will copy the file in chunks to an S3 store"""
791
+ from rclone_api.s3.api import S3Client
792
+ from rclone_api.s3.create import S3Credentials
793
+ from rclone_api.util import S3PathInfo, split_s3_path
794
+
795
+ other_args: list[str] = ["--no-modtime", "--vfs-read-wait", "1s"]
796
+ chunk_size = chunk_size or SizeSuffix("64M")
797
+ unit_chunk_size = chunk_size / read_threads
798
+ tmp_mount_dir = self._get_tmp_mount_dir()
799
+ vfs_read_chunk_size = unit_chunk_size
800
+ vfs_read_chunk_size_limit = chunk_size
801
+ vfs_read_chunk_streams = read_threads
802
+ vfs_disk_space_total_size = chunk_size
803
+ assert (
804
+ chunk_size.as_int() % vfs_read_chunk_size.as_int() == 0
805
+ ), f"chunk_size {chunk_size} must be a multiple of vfs_read_chunk_size {vfs_read_chunk_size}"
806
+ other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
807
+ other_args += [
808
+ "--vfs-read-chunk-size-limit",
809
+ vfs_read_chunk_size_limit.as_str(),
810
+ ]
811
+ other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
812
+ other_args += [
813
+ "--vfs-disk-space-total-size",
814
+ vfs_disk_space_total_size.as_str(),
815
+ ]
816
+ other_args += ["--read-only"]
817
+ other_args += ["--direct-io"]
818
+ # --vfs-cache-max-size
819
+ other_args += ["--vfs-cache-max-size", vfs_disk_space_total_size.as_str()]
820
+ mount_path = tmp_mount_dir / "RCLONE_API_DYNAMIC_MOUNT"
821
+ src_path = Path(src)
822
+ name = src_path.name
823
+
824
+ src_parent_path = Path(src).parent.as_posix()
825
+ size_result: SizeResult = self.size_files(src_parent_path, [name])
826
+
827
+ target_size = SizeSuffix(size_result.total_size)
828
+ if target_size < SizeSuffix("5M"):
829
+ # fallback to normal copy
830
+ completed_proc = self.copy_to(src, dst, check=True)
831
+ if completed_proc.ok:
832
+ return MultiUploadResult.UPLOADED_FRESH
833
+ if size_result.total_size <= 0:
834
+ raise ValueError(
835
+ f"File {src} has size {size_result.total_size}, is this a directory?"
836
+ )
837
+
838
+ path_info: S3PathInfo = split_s3_path(dst)
839
+ remote = path_info.remote
840
+ bucket_name = path_info.bucket
841
+ s3_key = path_info.key
842
+ parsed: Parsed = self.config.parse()
843
+ sections: dict[str, Section] = parsed.sections
844
+ if remote not in sections:
845
+ raise ValueError(
846
+ f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
847
+ )
848
+
849
+ section: Section = sections[remote]
850
+ dst_type = section.type()
851
+ if dst_type != "s3" and dst_type != "b2":
852
+ raise ValueError(
853
+ f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
854
+ )
855
+
856
+ def get_provider_str(section=section) -> str | None:
857
+ type: str = section.type()
858
+ provider: str | None = section.provider()
859
+ if provider is not None:
860
+ return provider
861
+ if type == "b2":
862
+ return S3Provider.BACKBLAZE.value
863
+ if type != "s3":
864
+ raise ValueError(f"Remote {remote} is not an S3 remote")
865
+ return S3Provider.S3.value
866
+
867
+ provider: str
868
+ if provided_provider_str := get_provider_str():
869
+ if verbose:
870
+ print(f"Using provided provider: {provided_provider_str}")
871
+ provider = provided_provider_str
872
+ else:
873
+ if verbose:
874
+ print(f"Using default provider: {S3Provider.S3.value}")
875
+ provider = S3Provider.S3.value
876
+ provider_enum = S3Provider.from_str(provider)
877
+
878
+ s3_creds: S3Credentials = S3Credentials(
879
+ provider=provider_enum,
880
+ access_key_id=section.access_key_id(),
881
+ secret_access_key=section.secret_access_key(),
882
+ endpoint_url=section.endpoint(),
883
+ )
884
+
885
+ chunk_fetcher: MultiMountFileChunker = self.get_multi_mount_file_chunker(
886
+ src=src_path.as_posix(),
887
+ chunk_size=chunk_size,
888
+ threads=read_threads,
889
+ mount_log=mount_log,
890
+ direct_io=True,
891
+ )
892
+
893
+ client = S3Client(s3_creds)
894
+ upload_config: S3MutliPartUploadConfig = S3MutliPartUploadConfig(
895
+ chunk_size=chunk_size.as_int(),
896
+ chunk_fetcher=chunk_fetcher.fetch,
897
+ max_write_threads=write_threads,
898
+ retries=retries,
899
+ resume_path_json=save_state_json,
900
+ max_chunks_before_suspension=max_chunks_before_suspension,
901
+ )
902
+
903
+ src_file = mount_path / name
904
+
905
+ print(f"Uploading {name} to {s3_key} in bucket {bucket_name}")
906
+ print(f"Source: {src_path}")
907
+ print(f"bucket_name: {bucket_name}")
908
+ print(f"upload_config: {upload_config}")
909
+
910
+ # get the file size
911
+
912
+ upload_target = S3UploadTarget(
913
+ src_file=src_file,
914
+ src_file_size=size_result.total_size,
915
+ bucket_name=bucket_name,
916
+ s3_key=s3_key,
917
+ )
918
+
919
+ try:
920
+ out: MultiUploadResult = client.upload_file_multipart(
921
+ upload_target=upload_target,
922
+ upload_config=upload_config,
923
+ )
924
+ return out
925
+ except Exception as e:
926
+ print(f"Error uploading file: {e}")
927
+ traceback.print_exc()
928
+ raise
929
+ finally:
930
+ chunk_fetcher.shutdown()
931
+
932
+ def get_multi_mount_file_chunker(
933
+ self,
934
+ src: str,
935
+ chunk_size: SizeSuffix,
936
+ threads: int,
937
+ mount_log: Path | None,
938
+ direct_io: bool,
939
+ ) -> MultiMountFileChunker:
940
+ from rclone_api.util import random_str
941
+
942
+ mounts: list[Mount] = []
943
+ vfs_read_chunk_size = chunk_size
944
+ vfs_read_chunk_size_limit = chunk_size
945
+ vfs_read_chunk_streams = 0
946
+ vfs_disk_space_total_size = chunk_size
947
+ other_args: list[str] = []
948
+ other_args += ["--no-modtime"]
949
+ other_args += ["--vfs-read-chunk-size", vfs_read_chunk_size.as_str()]
950
+ other_args += [
951
+ "--vfs-read-chunk-size-limit",
952
+ vfs_read_chunk_size_limit.as_str(),
953
+ ]
954
+ other_args += ["--vfs-read-chunk-streams", str(vfs_read_chunk_streams)]
955
+ other_args += [
956
+ "--vfs-disk-space-total-size",
957
+ vfs_disk_space_total_size.as_str(),
958
+ ]
959
+ other_args += ["--read-only"]
960
+ if direct_io:
961
+ other_args += ["--direct-io"]
962
+
963
+ base_mount_dir = self._get_tmp_mount_dir()
964
+ base_cache_dir = self._get_cache_dir()
965
+
966
+ filename = Path(src).name
967
+ with ThreadPoolExecutor(max_workers=threads) as executor:
968
+ futures: list[Future] = []
969
+ try:
970
+ for i in range(threads):
971
+ tmp_mnts = base_mount_dir / random_str(12)
972
+ verbose = mount_log is not None
973
+
974
+ src_parent_path = Path(src).parent.as_posix()
975
+ cache_dir = base_cache_dir / random_str(12)
976
+
977
+ def task(
978
+ src_parent_path=src_parent_path,
979
+ tmp_mnts=tmp_mnts,
980
+ cache_dir=cache_dir,
981
+ ):
982
+ clean_mount(tmp_mnts, verbose=verbose)
983
+ prepare_mount(tmp_mnts, verbose=verbose)
984
+ return self.mount(
985
+ src=src_parent_path,
986
+ outdir=tmp_mnts,
987
+ allow_writes=False,
988
+ use_links=True,
989
+ vfs_cache_mode="minimal",
990
+ verbose=False,
991
+ cache_dir=cache_dir,
992
+ cache_dir_delete_on_exit=True,
993
+ log=mount_log,
994
+ other_args=other_args,
995
+ )
996
+
997
+ futures.append(executor.submit(task))
998
+ mount_errors: list[Exception] = []
999
+ for fut in futures:
1000
+ try:
1001
+ mount = fut.result()
1002
+ mounts.append(mount)
1003
+ except Exception as er:
1004
+ warnings.warn(f"Error mounting: {er}")
1005
+ mount_errors.append(er)
1006
+ if mount_errors:
1007
+ warnings.warn(f"Error mounting: {mount_errors}")
1008
+ raise Exception(mount_errors)
1009
+ except Exception:
1010
+ for mount in mounts:
1011
+ mount.close()
1012
+ raise
1013
+
1014
+ src_path: Path = Path(src)
1015
+ src_parent_path = src_path.parent.as_posix()
1016
+ name = src_path.name
1017
+ size_result: SizeResult = self.size_files(src_parent_path, [name])
1018
+ filesize = size_result.total_size
1019
+
1020
+ executor = ThreadPoolExecutor(max_workers=threads)
1021
+ filechunker: MultiMountFileChunker = MultiMountFileChunker(
1022
+ filename=filename,
1023
+ filesize=filesize,
1024
+ mounts=mounts,
1025
+ executor=executor,
1026
+ verbose=mount_log is not None,
1027
+ )
1028
+ return filechunker
1029
+
1030
+ def copy_bytes(
1031
+ self,
1032
+ src: str,
1033
+ offset: int | SizeSuffix,
1034
+ length: int | SizeSuffix,
1035
+ outfile: Path,
1036
+ other_args: list[str] | None = None,
1037
+ ) -> Exception | None:
1038
+ """Copy a slice of bytes from the src file to dst."""
1039
+ offset = SizeSuffix(offset).as_int()
1040
+ length = SizeSuffix(length).as_int()
1041
+ cmd_list: list[str] = [
1042
+ "cat",
1043
+ "--offset",
1044
+ str(offset),
1045
+ "--count",
1046
+ str(length),
1047
+ src,
1048
+ ]
1049
+ if other_args:
1050
+ cmd_list.extend(other_args)
1051
+ try:
1052
+ cp = self._run(cmd_list, capture=outfile)
1053
+ if cp.returncode == 0:
1054
+ return None
1055
+ return Exception(cp.stderr)
1056
+ except subprocess.CalledProcessError as e:
1057
+ return e
1058
+
1059
+ def copy_bytes_mount(
1060
+ self,
1061
+ src: str,
1062
+ offset: int | SizeSuffix,
1063
+ length: int | SizeSuffix,
1064
+ chunk_size: SizeSuffix,
1065
+ max_threads: int = 1,
1066
+ # If outfile is supplied then bytes are written to this file and success returns bytes(0)
1067
+ outfile: Path | None = None,
1068
+ mount_log: Path | None = None,
1069
+ direct_io: bool = True,
1070
+ ) -> bytes | Exception:
1071
+ """Copy a slice of bytes from the src file to dst. Parallelism is achieved through multiple mounted files."""
1072
+ from rclone_api.types import FilePart
1073
+
1074
+ offset = SizeSuffix(offset).as_int()
1075
+ length = SizeSuffix(length).as_int()
1076
+ # determine number of threads from chunk size
1077
+ threads = max(1, min(max_threads, length // chunk_size.as_int()))
1078
+ # todo - implement max threads.
1079
+ filechunker = self.get_multi_mount_file_chunker(
1080
+ src=src,
1081
+ chunk_size=chunk_size,
1082
+ threads=threads,
1083
+ mount_log=mount_log,
1084
+ direct_io=direct_io,
1085
+ )
1086
+ try:
1087
+ fut = filechunker.fetch(offset, length, extra=None)
1088
+ fp: FilePart = fut.result()
1089
+ payload = fp.payload
1090
+ if isinstance(payload, Exception):
1091
+ return payload
1092
+ try:
1093
+ if outfile is None:
1094
+ return payload.read_bytes()
1095
+ shutil.move(payload, outfile)
1096
+ return bytes(0)
1097
+ finally:
1098
+ fp.close()
1099
+
1100
+ except Exception as e:
1101
+ warnings.warn(f"Error copying bytes: {e}")
1102
+ return e
1103
+ finally:
1104
+ try:
1105
+ filechunker.shutdown()
1106
+ except Exception as e:
1107
+ warnings.warn(f"Error closing filechunker: {e}")
1108
+
1109
+ def copy_dir(
1110
+ self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
1111
+ ) -> CompletedProcess:
1112
+ """Copy a directory from source to destination."""
1113
+ # convert src to str, also dst
1114
+ src = convert_to_str(src)
1115
+ dst = convert_to_str(dst)
1116
+ cmd_list: list[str] = ["copy", src, dst]
1117
+ if args is not None:
1118
+ cmd_list += args
1119
+ cp = self._run(cmd_list)
1120
+ return CompletedProcess.from_subprocess(cp)
1121
+
1122
+ def copy_remote(
1123
+ self, src: Remote, dst: Remote, args: list[str] | None = None
1124
+ ) -> CompletedProcess:
1125
+ """Copy a remote to another remote."""
1126
+ cmd_list: list[str] = ["copy", str(src), str(dst)]
1127
+ if args is not None:
1128
+ cmd_list += args
1129
+ # return self._run(cmd_list)
1130
+ cp = self._run(cmd_list)
1131
+ return CompletedProcess.from_subprocess(cp)
1132
+
1133
+ def mount(
1134
+ self,
1135
+ src: Remote | Dir | str,
1136
+ outdir: Path,
1137
+ allow_writes: bool | None = False,
1138
+ use_links: bool | None = None,
1139
+ vfs_cache_mode: str | None = None,
1140
+ verbose: bool | None = None,
1141
+ cache_dir: Path | None = None,
1142
+ cache_dir_delete_on_exit: bool | None = None,
1143
+ log: Path | None = None,
1144
+ other_args: list[str] | None = None,
1145
+ ) -> Mount:
1146
+ """Mount a remote or directory to a local path.
1147
+
1148
+ Args:
1149
+ src: Remote or directory to mount
1150
+ outdir: Local path to mount to
1151
+
1152
+ Returns:
1153
+ CompletedProcess from the mount command execution
1154
+
1155
+ Raises:
1156
+ subprocess.CalledProcessError: If the mount operation fails
1157
+ """
1158
+
1159
+ allow_writes = allow_writes or False
1160
+ use_links = use_links or True
1161
+ verbose = get_verbose(verbose) or (log is not None)
1162
+ vfs_cache_mode = vfs_cache_mode or "full"
1163
+ clean_mount(outdir, verbose=verbose)
1164
+ prepare_mount(outdir, verbose=verbose)
1165
+ debug_fuse = log is not None
1166
+ src_str = convert_to_str(src)
1167
+ cmd_list: list[str] = ["mount", src_str, str(outdir)]
1168
+ if not allow_writes:
1169
+ cmd_list.append("--read-only")
1170
+ if use_links:
1171
+ cmd_list.append("--links")
1172
+ if vfs_cache_mode:
1173
+ cmd_list.append("--vfs-cache-mode")
1174
+ cmd_list.append(vfs_cache_mode)
1175
+ if cache_dir:
1176
+ cmd_list.append("--cache-dir")
1177
+ cmd_list.append(str(cache_dir.absolute()))
1178
+ if debug_fuse:
1179
+ cmd_list.append("--debug-fuse")
1180
+ if verbose:
1181
+ cmd_list.append("-vvvv")
1182
+ if other_args:
1183
+ cmd_list += other_args
1184
+ proc = self._launch_process(cmd_list, log=log)
1185
+ mount_read_only = not allow_writes
1186
+ mount: Mount = Mount(
1187
+ src=src_str,
1188
+ mount_path=outdir,
1189
+ process=proc,
1190
+ read_only=mount_read_only,
1191
+ cache_dir=cache_dir,
1192
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1193
+ )
1194
+ return mount
1195
+
1196
+ @contextmanager
1197
+ def scoped_mount(
1198
+ self,
1199
+ src: Remote | Dir | str,
1200
+ outdir: Path,
1201
+ allow_writes: bool | None = None,
1202
+ use_links: bool | None = None,
1203
+ vfs_cache_mode: str | None = None,
1204
+ verbose: bool | None = None,
1205
+ log: Path | None = None,
1206
+ cache_dir: Path | None = None,
1207
+ cache_dir_delete_on_exit: bool | None = None,
1208
+ other_args: list[str] | None = None,
1209
+ ) -> Generator[Mount, None, None]:
1210
+ """Like mount, but can be used in a context manager."""
1211
+ error_happened = False
1212
+ mount: Mount = self.mount(
1213
+ src,
1214
+ outdir,
1215
+ allow_writes=allow_writes,
1216
+ use_links=use_links,
1217
+ vfs_cache_mode=vfs_cache_mode,
1218
+ verbose=verbose,
1219
+ cache_dir=cache_dir,
1220
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1221
+ log=log,
1222
+ other_args=other_args,
1223
+ )
1224
+ try:
1225
+ yield mount
1226
+ except Exception as e:
1227
+ error_happened = True
1228
+ stack_trace = traceback.format_exc()
1229
+ warnings.warn(f"Error in scoped_mount: {e}\n\nStack Trace:\n{stack_trace}")
1230
+ raise
1231
+ finally:
1232
+ if not error_happened or (not allow_writes):
1233
+ mount.close()
1234
+
1235
+ # Settings optimized for s3.
1236
+ def mount_s3(
1237
+ self,
1238
+ url: str,
1239
+ outdir: Path,
1240
+ allow_writes=False,
1241
+ vfs_cache_mode="full",
1242
+ dir_cache_time: str | None = "1h",
1243
+ attribute_timeout: str | None = "1h",
1244
+ vfs_disk_space_total_size: str | None = "100M",
1245
+ transfers: int | None = 128,
1246
+ modtime_strategy: (
1247
+ ModTimeStrategy | None
1248
+ ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1249
+ vfs_read_chunk_streams: int | None = 16,
1250
+ vfs_read_chunk_size: str | None = "4M",
1251
+ vfs_fast_fingerprint: bool = True,
1252
+ # vfs-refresh
1253
+ vfs_refresh: bool = True,
1254
+ other_args: list[str] | None = None,
1255
+ ) -> Mount:
1256
+ """Mount a remote or directory to a local path.
1257
+
1258
+ Args:
1259
+ src: Remote or directory to mount
1260
+ outdir: Local path to mount to
1261
+ """
1262
+ other_args = other_args or []
1263
+ if modtime_strategy is not None:
1264
+ other_args.append(f"--{modtime_strategy.value}")
1265
+ if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1266
+ transfers is not None and "--transfers" not in other_args
1267
+ ):
1268
+ other_args.append("--transfers")
1269
+ other_args.append(str(transfers))
1270
+ if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1271
+ other_args.append("--dir-cache-time")
1272
+ other_args.append(dir_cache_time)
1273
+ if (
1274
+ vfs_disk_space_total_size is not None
1275
+ and "--vfs-cache-max-size" not in other_args
1276
+ ):
1277
+ other_args.append("--vfs-cache-max-size")
1278
+ other_args.append(vfs_disk_space_total_size)
1279
+ if vfs_refresh and "--vfs-refresh" not in other_args:
1280
+ other_args.append("--vfs-refresh")
1281
+ if attribute_timeout is not None and "--attr-timeout" not in other_args:
1282
+ other_args.append("--attr-timeout")
1283
+ other_args.append(attribute_timeout)
1284
+ if vfs_read_chunk_streams:
1285
+ other_args.append("--vfs-read-chunk-streams")
1286
+ other_args.append(str(vfs_read_chunk_streams))
1287
+ if vfs_read_chunk_size:
1288
+ other_args.append("--vfs-read-chunk-size")
1289
+ other_args.append(vfs_read_chunk_size)
1290
+ if vfs_fast_fingerprint:
1291
+ other_args.append("--vfs-fast-fingerprint")
1292
+
1293
+ other_args = other_args if other_args else None
1294
+ return self.mount(
1295
+ url,
1296
+ outdir,
1297
+ allow_writes=allow_writes,
1298
+ vfs_cache_mode=vfs_cache_mode,
1299
+ other_args=other_args,
1300
+ )
1301
+
1302
+ def serve_webdav(
1303
+ self,
1304
+ src: Remote | Dir | str,
1305
+ user: str,
1306
+ password: str,
1307
+ addr: str = "localhost:2049",
1308
+ allow_other: bool = False,
1309
+ other_args: list[str] | None = None,
1310
+ ) -> Process:
1311
+ """Serve a remote or directory via NFS.
1312
+
1313
+ Args:
1314
+ src: Remote or directory to serve
1315
+ addr: Network address and port to serve on (default: localhost:2049)
1316
+ allow_other: Allow other users to access the share
1317
+
1318
+ Returns:
1319
+ Process: The running webdev server process
1320
+
1321
+ Raises:
1322
+ ValueError: If the NFS server fails to start
1323
+ """
1324
+ src_str = convert_to_str(src)
1325
+ cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1326
+ cmd_list.extend(["--user", user, "--pass", password])
1327
+ if allow_other:
1328
+ cmd_list.append("--allow-other")
1329
+ if other_args:
1330
+ cmd_list += other_args
1331
+ proc = self._launch_process(cmd_list)
1332
+ time.sleep(2) # give it a moment to start
1333
+ if proc.poll() is not None:
1334
+ raise ValueError("NFS serve process failed to start")
1335
+ return proc
1336
+
1337
+ def size_files(
1338
+ self,
1339
+ src: str,
1340
+ files: list[str],
1341
+ fast_list: bool = False, # Recommend that this is False
1342
+ other_args: list[str] | None = None,
1343
+ check: bool | None = False,
1344
+ verbose: bool | None = None,
1345
+ ) -> SizeResult:
1346
+ """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1347
+ verbose = get_verbose(verbose)
1348
+ check = get_check(check)
1349
+ if fast_list or (other_args and "--fast-list" in other_args):
1350
+ warnings.warn(
1351
+ "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1352
+ )
1353
+ files = list(files)
1354
+ all_files: list[File] = []
1355
+ # prefix, files = group_under_one_prefix(src, files)
1356
+ cmd = ["lsjson", src, "--files-only", "-R"]
1357
+ with TemporaryDirectory() as tmpdir:
1358
+ # print("files: " + ",".join(files))
1359
+ include_files_txt = Path(tmpdir) / "include_files.txt"
1360
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
1361
+ cmd += ["--files-from", str(include_files_txt)]
1362
+ if fast_list:
1363
+ cmd.append("--fast-list")
1364
+ if other_args:
1365
+ cmd += other_args
1366
+ cp = self._run(cmd, check=check)
1367
+
1368
+ if cp.returncode != 0:
1369
+ if check:
1370
+ raise ValueError(f"Error getting file sizes: {cp.stderr}")
1371
+ else:
1372
+ warnings.warn(f"Error getting file sizes: {cp.stderr}")
1373
+ stdout = cp.stdout
1374
+ pieces = src.split(":", 1)
1375
+ remote_name = pieces[0]
1376
+ parent_path: str | None
1377
+ if len(pieces) > 1:
1378
+ parent_path = pieces[1]
1379
+ else:
1380
+ parent_path = None
1381
+ remote = Remote(name=remote_name, rclone=self)
1382
+ paths: list[RPath] = RPath.from_json_str(
1383
+ stdout, remote, parent_path=parent_path
1384
+ )
1385
+ # print(paths)
1386
+ all_files += [File(p) for p in paths]
1387
+ file_sizes: dict[str, int] = {}
1388
+ f: File
1389
+ for f in all_files:
1390
+ p = f.to_string(include_remote=True)
1391
+ if p in file_sizes:
1392
+ warnings.warn(f"Duplicate file found: {p}")
1393
+ continue
1394
+ size = f.size
1395
+ if size == 0:
1396
+ warnings.warn(f"File size is 0: {p}")
1397
+ file_sizes[p] = f.size
1398
+ total_size = sum(file_sizes.values())
1399
+ file_sizes_path_corrected: dict[str, int] = {}
1400
+ for path, size in file_sizes.items():
1401
+ # remove the prefix
1402
+ path_path = Path(path)
1403
+ path_str = path_path.relative_to(src).as_posix()
1404
+ file_sizes_path_corrected[path_str] = size
1405
+ out: SizeResult = SizeResult(
1406
+ prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1407
+ )
1408
+ return out