rclone-api 1.5.40__py3-none-any.whl → 1.5.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rclone_api/rclone_impl.py CHANGED
@@ -1,1360 +1,1360 @@
1
- """
2
- Unit test file.
3
- """
4
-
5
- import os
6
- import random
7
- import subprocess
8
- import time
9
- import tracemalloc
10
- import warnings
11
- from concurrent.futures import Future, ThreadPoolExecutor
12
- from datetime import datetime
13
- from fnmatch import fnmatch
14
- from pathlib import Path
15
- from tempfile import TemporaryDirectory
16
- from typing import Generator
17
-
18
- from rclone_api import Dir
19
- from rclone_api.completed_process import CompletedProcess
20
- from rclone_api.config import Config, Parsed, Section
21
- from rclone_api.convert import convert_to_filestr_list, convert_to_str
22
- from rclone_api.deprecated import deprecated
23
- from rclone_api.detail.walk import walk
24
- from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
- from rclone_api.dir_listing import DirListing
26
- from rclone_api.exec import RcloneExec
27
- from rclone_api.file import File
28
- from rclone_api.file_stream import FilesStream
29
- from rclone_api.fs import FSPath, RemoteFS
30
- from rclone_api.group_files import group_files
31
- from rclone_api.http_server import HttpServer
32
- from rclone_api.mount import Mount
33
- from rclone_api.process import Process
34
- from rclone_api.remote import Remote
35
- from rclone_api.rpath import RPath
36
- from rclone_api.s3.create import S3Credentials
37
- from rclone_api.s3.types import (
38
- S3Provider,
39
- )
40
- from rclone_api.types import (
41
- ListingOption,
42
- ModTimeStrategy,
43
- Order,
44
- PartInfo,
45
- SizeResult,
46
- SizeSuffix,
47
- )
48
- from rclone_api.util import (
49
- find_free_port,
50
- get_check,
51
- get_rclone_exe,
52
- get_verbose,
53
- to_path,
54
- )
55
-
56
- # Enable tracing memory usage always
57
- tracemalloc.start()
58
-
59
-
60
- def rclone_verbose(verbose: bool | None) -> bool:
61
- if verbose is not None:
62
- os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
63
- return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
64
-
65
-
66
- def _to_rclone_conf(config: Config | Path) -> Config:
67
- if isinstance(config, Path):
68
- content = config.read_text(encoding="utf-8")
69
- return Config(content)
70
- else:
71
- return config
72
-
73
-
74
- def _parse_paths(src: str) -> list[Path] | Exception:
75
- # Config file: C:\Users\niteris\AppData\Roaming\rclone\rclone.conf
76
- # Cache dir: C:\Users\niteris\AppData\Local\rclone
77
- # Temp dir: C:\Users\niteris\AppData\Local\Temp
78
- lines = src.splitlines()
79
- paths: list[Path] = []
80
- for line in lines:
81
- try:
82
- parts = line.split(":")
83
- if len(parts) != 2:
84
- continue
85
- path = Path(parts[1].strip())
86
- paths.append(path)
87
- except Exception as e:
88
- return e
89
- return paths
90
-
91
-
92
- class RcloneImpl:
93
- def __init__(
94
- self, rclone_conf: Path | Config | None, rclone_exe: Path | None = None
95
- ) -> None:
96
- if isinstance(rclone_conf, Path):
97
- if not rclone_conf.exists():
98
- raise ValueError(f"Rclone config file not found: {rclone_conf}")
99
- if rclone_conf is None:
100
- from rclone_api.config import find_conf_file
101
-
102
- maybe_path = find_conf_file(self)
103
- if not isinstance(maybe_path, Path):
104
- raise ValueError("Rclone config file not found")
105
- rclone_conf = _to_rclone_conf(maybe_path)
106
- self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
107
- self.config: Config = _to_rclone_conf(rclone_conf)
108
-
109
- def _run(
110
- self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
111
- ) -> subprocess.CompletedProcess:
112
- return self._exec.execute(cmd, check=check, capture=capture)
113
-
114
- def _launch_process(
115
- self, cmd: list[str], capture: bool | None = None, log: Path | None = None
116
- ) -> Process:
117
- return self._exec.launch_process(cmd, capture=capture, log=log)
118
-
119
- def _get_tmp_mount_dir(self) -> Path:
120
- return Path("tmp_mnts")
121
-
122
- def _get_cache_dir(self) -> Path:
123
- return Path("cache")
124
-
125
- def webgui(self, other_args: list[str] | None = None) -> Process:
126
- """Launch the Rclone web GUI."""
127
- cmd = ["rcd", "--rc-web-gui"]
128
- if other_args:
129
- cmd += other_args
130
- return self._launch_process(cmd, capture=False)
131
-
132
- def filesystem(self, src: str) -> RemoteFS:
133
- return RemoteFS(self.config, src)
134
-
135
- def cwd(self, src: str) -> FSPath:
136
- return self.filesystem(src).cwd()
137
-
138
- def launch_server(
139
- self,
140
- addr: str,
141
- user: str | None = None,
142
- password: str | None = None,
143
- other_args: list[str] | None = None,
144
- ) -> Process:
145
- """Launch the Rclone server so it can receive commands"""
146
- cmd = ["rcd"]
147
- if addr is not None:
148
- cmd += ["--rc-addr", addr]
149
- if user is not None:
150
- cmd += ["--rc-user", user]
151
- if password is not None:
152
- cmd += ["--rc-pass", password]
153
- if other_args:
154
- cmd += other_args
155
- out = self._launch_process(cmd, capture=False)
156
- time.sleep(1) # Give it some time to launch
157
- return out
158
-
159
- def remote_control(
160
- self,
161
- addr: str,
162
- user: str | None = None,
163
- password: str | None = None,
164
- capture: bool | None = None,
165
- other_args: list[str] | None = None,
166
- ) -> CompletedProcess:
167
- cmd = ["rc"]
168
- if addr:
169
- cmd += ["--rc-addr", addr]
170
- if user is not None:
171
- cmd += ["--rc-user", user]
172
- if password is not None:
173
- cmd += ["--rc-pass", password]
174
- if other_args:
175
- cmd += other_args
176
- cp = self._run(cmd, capture=capture)
177
- return CompletedProcess.from_subprocess(cp)
178
-
179
- def obscure(self, password: str) -> str:
180
- """Obscure a password for use in rclone config files."""
181
- cmd_list: list[str] = ["obscure", password]
182
- cp = self._run(cmd_list)
183
- return cp.stdout.strip()
184
-
185
- def ls_stream(
186
- self,
187
- src: str,
188
- max_depth: int = -1,
189
- fast_list: bool = False,
190
- ) -> FilesStream:
191
- """
192
- List files in the given path
193
-
194
- Args:
195
- src: Remote path to list
196
- max_depth: Maximum recursion depth (-1 for unlimited)
197
- fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket, or it's small)
198
- """
199
- cmd = ["lsjson", src, "--files-only"]
200
- recurse = max_depth < 0 or max_depth > 1
201
- if recurse:
202
- cmd.append("-R")
203
- if max_depth > 1:
204
- cmd += ["--max-depth", str(max_depth)]
205
- if fast_list:
206
- cmd.append("--fast-list")
207
- streamer = FilesStream(src, self._launch_process(cmd, capture=True))
208
- return streamer
209
-
210
- def save_to_db(
211
- self,
212
- src: str,
213
- db_url: str,
214
- max_depth: int = -1,
215
- fast_list: bool = False,
216
- ) -> None:
217
- """
218
- Save files to a database (sqlite, mysql, postgres)
219
-
220
- Args:
221
- src: Remote path to list, this will be used to populate an entire table, so always use the root-most path.
222
- db_url: Database URL, like sqlite:///data.db or mysql://user:pass@localhost/db or postgres://user:pass@localhost/db
223
- max_depth: Maximum depth to traverse (-1 for unlimited)
224
- fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket)
225
-
226
- """
227
- from rclone_api.db import DB
228
-
229
- db = DB(db_url)
230
- with self.ls_stream(src, max_depth, fast_list) as stream:
231
- for page in stream.files_paged(page_size=10000):
232
- db.add_files(page)
233
-
234
- def ls(
235
- self,
236
- src: Dir | Remote | str | None = None,
237
- max_depth: int | None = None,
238
- glob: str | None = None,
239
- order: Order = Order.NORMAL,
240
- listing_option: ListingOption = ListingOption.ALL,
241
- ) -> DirListing:
242
- """List files in the given path.
243
-
244
- Args:
245
- src: Remote path or Remote object to list
246
- max_depth: Maximum recursion depth (0 means no recursion)
247
-
248
- Returns:
249
- List of File objects found at the path
250
- """
251
-
252
- if src is None:
253
- # list remotes instead
254
- list_remotes: list[Remote] = self.listremotes()
255
- dirs: list[Dir] = [Dir(remote) for remote in list_remotes]
256
- for d in dirs:
257
- d.path.path = ""
258
- rpaths = [d.path for d in dirs]
259
- return DirListing(rpaths)
260
-
261
- if isinstance(src, str):
262
- src = Dir(
263
- to_path(src, self)
264
- ) # assume it's a directory if ls is being called.
265
-
266
- cmd = ["lsjson"]
267
- if max_depth is not None:
268
- if max_depth < 0:
269
- cmd.append("--recursive")
270
- if max_depth > 0:
271
- cmd.append("--max-depth")
272
- cmd.append(str(max_depth))
273
- if listing_option != ListingOption.ALL:
274
- cmd.append(f"--{listing_option.value}")
275
-
276
- cmd.append(str(src))
277
- remote = src.remote if isinstance(src, Dir) else src
278
- assert isinstance(remote, Remote)
279
-
280
- cp = self._run(cmd, check=True)
281
- text = cp.stdout
282
- parent_path: str | None = None
283
- if isinstance(src, Dir):
284
- parent_path = src.path.path
285
- paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
286
- # print(parent_path)
287
- for o in paths:
288
- o.set_rclone(self)
289
-
290
- # do we have a glob pattern?
291
- if glob is not None:
292
- paths = [p for p in paths if fnmatch(p.path, glob)]
293
-
294
- if order == Order.REVERSE:
295
- paths.reverse()
296
- elif order == Order.RANDOM:
297
- random.shuffle(paths)
298
- return DirListing(paths)
299
-
300
- def print(self, src: str) -> Exception | None:
301
- """Print the contents of a file."""
302
- try:
303
- text_or_err = self.read_text(src)
304
- if isinstance(text_or_err, Exception):
305
- return text_or_err
306
- print(text_or_err)
307
- except Exception as e:
308
- return e
309
- return None
310
-
311
- def stat(self, src: str) -> File | Exception:
312
- """Get the status of a file or directory."""
313
- dirlist: DirListing = self.ls(src)
314
- if len(dirlist.files) == 0:
315
- # raise FileNotFoundError(f"File not found: {src}")
316
- return FileNotFoundError(f"File not found: {src}")
317
- try:
318
- file: File = dirlist.files[0]
319
- return file
320
- except Exception as e:
321
- return e
322
-
323
- def modtime(self, src: str) -> str | Exception:
324
- """Get the modification time of a file or directory."""
325
- try:
326
- file: File | Exception = self.stat(src)
327
- if isinstance(file, Exception):
328
- return file
329
- return file.mod_time()
330
- except Exception as e:
331
- return e
332
-
333
- def modtime_dt(self, src: str) -> datetime | Exception:
334
- """Get the modification time of a file or directory."""
335
- modtime: str | Exception = self.modtime(src)
336
- if isinstance(modtime, Exception):
337
- return modtime
338
- return datetime.fromisoformat(modtime)
339
-
340
- def listremotes(self) -> list[Remote]:
341
- cmd = ["listremotes"]
342
- cp = self._run(cmd)
343
- text: str = cp.stdout
344
- tmp = text.splitlines()
345
- tmp = [t.strip() for t in tmp]
346
- # strip out ":" from the end
347
- tmp = [t.replace(":", "") for t in tmp]
348
- out = [Remote(name=t, rclone=self) for t in tmp]
349
- return out
350
-
351
- def diff(
352
- self,
353
- src: str,
354
- dst: str,
355
- min_size: (
356
- str | None
357
- ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
358
- max_size: (
359
- str | None
360
- ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
361
- diff_option: DiffOption = DiffOption.COMBINED,
362
- fast_list: bool = True,
363
- size_only: bool | None = None,
364
- checkers: int | None = None,
365
- other_args: list[str] | None = None,
366
- ) -> Generator[DiffItem, None, None]:
367
- """Be extra careful with the src and dst values. If you are off by one
368
- parent directory, you will get a huge amount of false diffs."""
369
- other_args = other_args or []
370
- if checkers is None or checkers < 1:
371
- checkers = 1000
372
- cmd = [
373
- "check",
374
- src,
375
- dst,
376
- "--checkers",
377
- str(checkers),
378
- "--log-level",
379
- "INFO",
380
- f"--{diff_option.value}",
381
- "-",
382
- ]
383
- if size_only is None:
384
- size_only = diff_option in [
385
- DiffOption.MISSING_ON_DST,
386
- DiffOption.MISSING_ON_SRC,
387
- ]
388
- if size_only:
389
- cmd += ["--size-only"]
390
- if fast_list:
391
- cmd += ["--fast-list"]
392
- if min_size:
393
- cmd += ["--min-size", min_size]
394
- if max_size:
395
- cmd += ["--max-size", max_size]
396
- if diff_option == DiffOption.MISSING_ON_DST:
397
- cmd += ["--one-way"]
398
- if other_args:
399
- cmd += other_args
400
- proc = self._launch_process(cmd, capture=True)
401
- item: DiffItem
402
- for item in diff_stream_from_running_process(
403
- running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
404
- ):
405
- if item is None:
406
- break
407
- yield item
408
-
409
- def walk(
410
- self,
411
- src: Dir | Remote | str,
412
- max_depth: int = -1,
413
- breadth_first: bool = True,
414
- order: Order = Order.NORMAL,
415
- ) -> Generator[DirListing, None, None]:
416
- """Walk through the given path recursively.
417
-
418
- Args:
419
- src: Remote path or Remote object to walk through
420
- max_depth: Maximum depth to traverse (-1 for unlimited)
421
-
422
- Yields:
423
- DirListing: Directory listing for each directory encountered
424
- """
425
- dir_obj: Dir
426
- if isinstance(src, Dir):
427
- # Create a Remote object for the path
428
- remote = src.remote
429
- rpath = RPath(
430
- remote=remote,
431
- path=src.path.path,
432
- name=src.path.name,
433
- size=0,
434
- mime_type="inode/directory",
435
- mod_time="",
436
- is_dir=True,
437
- )
438
- rpath.set_rclone(self)
439
- dir_obj = Dir(rpath)
440
- elif isinstance(src, str):
441
- dir_obj = Dir(to_path(src, self))
442
- elif isinstance(src, Remote):
443
- dir_obj = Dir(src)
444
- else:
445
- dir_obj = Dir(src) # shut up pyright
446
- assert f"Invalid type for path: {type(src)}"
447
-
448
- yield from walk(
449
- dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
450
- )
451
-
452
- def scan_missing_folders(
453
- self,
454
- src: Dir | Remote | str,
455
- dst: Dir | Remote | str,
456
- max_depth: int = -1,
457
- order: Order = Order.NORMAL,
458
- ) -> Generator[Dir, None, None]:
459
- """Walk through the given path recursively.
460
-
461
- WORK IN PROGRESS!!
462
-
463
- Args:
464
- src: Source directory or Remote to walk through
465
- dst: Destination directory or Remote to walk through
466
- max_depth: Maximum depth to traverse (-1 for unlimited)
467
-
468
- Yields:
469
- DirListing: Directory listing for each directory encountered
470
- """
471
- from rclone_api.scan_missing_folders import scan_missing_folders
472
-
473
- src_dir = Dir(to_path(src, self))
474
- dst_dir = Dir(to_path(dst, self))
475
- yield from scan_missing_folders(
476
- src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
477
- )
478
-
479
- def cleanup(
480
- self, src: str, other_args: list[str] | None = None
481
- ) -> CompletedProcess:
482
- """Cleanup any resources used by the Rclone instance."""
483
- # rclone cleanup remote:path [flags]
484
- cmd = ["cleanup", src]
485
- if other_args:
486
- cmd += other_args
487
- out = self._run(cmd)
488
- return CompletedProcess.from_subprocess(out)
489
-
490
- def get_verbose(self) -> bool:
491
- return get_verbose(None)
492
-
493
- def copy_to(
494
- self,
495
- src: File | str,
496
- dst: File | str,
497
- check: bool | None = None,
498
- verbose: bool | None = None,
499
- other_args: list[str] | None = None,
500
- ) -> CompletedProcess:
501
- """Copy one file from source to destination.
502
-
503
- Warning - slow.
504
-
505
- """
506
- check = get_check(check)
507
- verbose = get_verbose(verbose)
508
- src = src if isinstance(src, str) else str(src.path)
509
- dst = dst if isinstance(dst, str) else str(dst.path)
510
- cmd_list: list[str] = ["copyto", src, dst, "--s3-no-check-bucket"]
511
- if other_args is not None:
512
- cmd_list += other_args
513
- cp = self._run(cmd_list, check=check)
514
- return CompletedProcess.from_subprocess(cp)
515
-
516
- def copy_files(
517
- self,
518
- src: str,
519
- dst: str,
520
- files: list[str] | Path,
521
- check: bool | None = None,
522
- max_backlog: int | None = None,
523
- verbose: bool | None = None,
524
- checkers: int | None = None,
525
- transfers: int | None = None,
526
- low_level_retries: int | None = None,
527
- retries: int | None = None,
528
- retries_sleep: str | None = None,
529
- metadata: bool | None = None,
530
- timeout: str | None = None,
531
- max_partition_workers: int | None = None,
532
- multi_thread_streams: int | None = None,
533
- other_args: list[str] | None = None,
534
- ) -> list[CompletedProcess]:
535
- """Copy multiple files from source to destination.
536
-
537
- Args:
538
- payload: Dictionary of source and destination file paths
539
- """
540
- check = get_check(check)
541
- max_partition_workers = max_partition_workers or 1
542
- low_level_retries = low_level_retries or 10
543
- retries = retries or 3
544
- other_args = other_args or []
545
- other_args.append("--s3-no-check-bucket")
546
- checkers = checkers or 1000
547
- transfers = transfers or 32
548
- verbose = get_verbose(verbose)
549
- payload: list[str] = (
550
- files
551
- if isinstance(files, list)
552
- else [f.strip() for f in files.read_text().splitlines() if f.strip()]
553
- )
554
- if len(payload) == 0:
555
- return []
556
-
557
- for p in payload:
558
- if ":" in p:
559
- raise ValueError(
560
- f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
561
- )
562
-
563
- using_fast_list = "--fast-list" in other_args
564
- if using_fast_list:
565
- warnings.warn(
566
- "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
567
- )
568
-
569
- if max_partition_workers > 1:
570
- datalists: dict[str, list[str]] = group_files(
571
- payload, fully_qualified=False
572
- )
573
- else:
574
- datalists = {"": payload}
575
- # out: subprocess.CompletedProcess | None = None
576
- out: list[CompletedProcess] = []
577
-
578
- futures: list[Future] = []
579
-
580
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
581
- for common_prefix, files in datalists.items():
582
-
583
- def _task(
584
- files: list[str] | Path = files,
585
- ) -> subprocess.CompletedProcess:
586
- with TemporaryDirectory() as tmpdir:
587
- filelist: list[str] = []
588
- filepath: Path
589
- if isinstance(files, list):
590
- include_files_txt = Path(tmpdir) / "include_files.txt"
591
- include_files_txt.write_text(
592
- "\n".join(files), encoding="utf-8"
593
- )
594
- filelist = list(files)
595
- filepath = Path(include_files_txt)
596
- elif isinstance(files, Path):
597
- filelist = [
598
- f.strip()
599
- for f in files.read_text().splitlines()
600
- if f.strip()
601
- ]
602
- filepath = files
603
- if common_prefix:
604
- src_path = f"{src}/{common_prefix}"
605
- dst_path = f"{dst}/{common_prefix}"
606
- else:
607
- src_path = src
608
- dst_path = dst
609
-
610
- if verbose:
611
- nfiles = len(filelist)
612
- files_fqdn = [f" {src_path}/{f}" for f in filelist]
613
- print(f"Copying {nfiles} files:")
614
- chunk_size = 100
615
- for i in range(0, nfiles, chunk_size):
616
- chunk = files_fqdn[i : i + chunk_size]
617
- files_str = "\n".join(chunk)
618
- print(f"{files_str}")
619
- cmd_list: list[str] = [
620
- "copy",
621
- src_path,
622
- dst_path,
623
- "--files-from",
624
- str(filepath),
625
- "--checkers",
626
- str(checkers),
627
- "--transfers",
628
- str(transfers),
629
- "--low-level-retries",
630
- str(low_level_retries),
631
- "--retries",
632
- str(retries),
633
- ]
634
- if metadata:
635
- cmd_list.append("--metadata")
636
- if retries_sleep is not None:
637
- cmd_list += ["--retries-sleep", retries_sleep]
638
- if timeout is not None:
639
- cmd_list += ["--timeout", timeout]
640
- if max_backlog is not None:
641
- cmd_list += ["--max-backlog", str(max_backlog)]
642
- if multi_thread_streams is not None:
643
- cmd_list += [
644
- "--multi-thread-streams",
645
- str(multi_thread_streams),
646
- ]
647
- if verbose:
648
- if not any(["-v" in x for x in other_args]):
649
- cmd_list.append("-vvvv")
650
- if not any(["--progress" in x for x in other_args]):
651
- cmd_list.append("--progress")
652
- if other_args:
653
- cmd_list += other_args
654
- out = self._run(cmd_list, capture=not verbose)
655
- return out
656
-
657
- fut: Future = executor.submit(_task)
658
- futures.append(fut)
659
- for fut in futures:
660
- cp: subprocess.CompletedProcess = fut.result()
661
- assert cp is not None
662
- out.append(CompletedProcess.from_subprocess(cp))
663
- if cp.returncode != 0:
664
- if check:
665
- raise ValueError(f"Error deleting files: {cp.stderr}")
666
- else:
667
- warnings.warn(f"Error deleting files: {cp.stderr}")
668
- return out
669
-
670
- def copy(
671
- self,
672
- src: Dir | str,
673
- dst: Dir | str,
674
- check: bool | None = None,
675
- transfers: int | None = None,
676
- checkers: int | None = None,
677
- multi_thread_streams: int | None = None,
678
- low_level_retries: int | None = None,
679
- retries: int | None = None,
680
- other_args: list[str] | None = None,
681
- ) -> CompletedProcess:
682
- """Copy files from source to destination.
683
-
684
- Args:
685
- src: Source directory
686
- dst: Destination directory
687
- """
688
- # src_dir = src.path.path
689
- # dst_dir = dst.path.path
690
- src_dir = convert_to_str(src)
691
- dst_dir = convert_to_str(dst)
692
- check = get_check(check)
693
- checkers = checkers or 1000
694
- transfers = transfers or 32
695
- low_level_retries = low_level_retries or 10
696
- retries = retries or 3
697
- cmd_list: list[str] = ["copy", src_dir, dst_dir]
698
- cmd_list += ["--checkers", str(checkers)]
699
- cmd_list += ["--transfers", str(transfers)]
700
- cmd_list += ["--low-level-retries", str(low_level_retries)]
701
- cmd_list.append("--s3-no-check-bucket")
702
- if multi_thread_streams is not None:
703
- cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
704
- if other_args:
705
- cmd_list += other_args
706
- cp = self._run(cmd_list, check=check, capture=False)
707
- return CompletedProcess.from_subprocess(cp)
708
-
709
- def purge(self, src: Dir | str) -> CompletedProcess:
710
- """Purge a directory"""
711
- # path should always be a string
712
- src = src if isinstance(src, str) else str(src.path)
713
- cmd_list: list[str] = ["purge", str(src)]
714
- cp = self._run(cmd_list)
715
- return CompletedProcess.from_subprocess(cp)
716
-
717
- def delete_files(
718
- self,
719
- files: str | File | list[str] | list[File],
720
- check: bool | None = None,
721
- rmdirs=False,
722
- verbose: bool | None = None,
723
- max_partition_workers: int | None = None,
724
- other_args: list[str] | None = None,
725
- ) -> CompletedProcess:
726
- """Delete a directory"""
727
- check = get_check(check)
728
- verbose = get_verbose(verbose)
729
- payload: list[str] = convert_to_filestr_list(files)
730
- if len(payload) == 0:
731
- if verbose:
732
- print("No files to delete")
733
- cp = subprocess.CompletedProcess(
734
- args=["rclone", "delete", "--files-from", "[]"],
735
- returncode=0,
736
- stdout="",
737
- stderr="",
738
- )
739
- return CompletedProcess.from_subprocess(cp)
740
-
741
- datalists: dict[str, list[str]] = group_files(payload)
742
- completed_processes: list[subprocess.CompletedProcess] = []
743
-
744
- futures: list[Future] = []
745
-
746
- with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
747
-
748
- for remote, files in datalists.items():
749
-
750
- def _task(
751
- files=files, check=check, remote=remote
752
- ) -> subprocess.CompletedProcess:
753
- with TemporaryDirectory() as tmpdir:
754
- include_files_txt = Path(tmpdir) / "include_files.txt"
755
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
756
-
757
- # print(include_files_txt)
758
- cmd_list: list[str] = [
759
- "delete",
760
- remote,
761
- "--files-from",
762
- str(include_files_txt),
763
- "--checkers",
764
- "1000",
765
- "--transfers",
766
- "1000",
767
- ]
768
- if verbose:
769
- cmd_list.append("-vvvv")
770
- if rmdirs:
771
- cmd_list.append("--rmdirs")
772
- if other_args:
773
- cmd_list += other_args
774
- out = self._run(cmd_list, check=check)
775
- if out.returncode != 0:
776
- if check:
777
- completed_processes.append(out)
778
- raise ValueError(f"Error deleting files: {out}")
779
- else:
780
- warnings.warn(f"Error deleting files: {out}")
781
- return out
782
-
783
- fut: Future = executor.submit(_task)
784
- futures.append(fut)
785
-
786
- for fut in futures:
787
- out = fut.result()
788
- assert out is not None
789
- completed_processes.append(out)
790
-
791
- return CompletedProcess(completed_processes)
792
-
793
- @deprecated("delete_files")
794
- def deletefiles(
795
- self, files: str | File | list[str] | list[File]
796
- ) -> CompletedProcess:
797
- out = self.delete_files(files)
798
- return out
799
-
800
- def exists(self, src: Dir | Remote | str | File) -> bool:
801
- """Check if a file or directory exists."""
802
- arg: str = convert_to_str(src)
803
- assert isinstance(arg, str)
804
- try:
805
- dir_listing = self.ls(arg)
806
- # print(dir_listing)
807
- return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
808
- except subprocess.CalledProcessError:
809
- return False
810
-
811
- def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
812
- """Check if two directories are in sync."""
813
- src = convert_to_str(src)
814
- dst = convert_to_str(dst)
815
- cmd_list: list[str] = ["check", str(src), str(dst)]
816
- try:
817
- self._run(cmd_list, check=True)
818
- return True
819
- except subprocess.CalledProcessError:
820
- return False
821
-
822
- def copy_file_s3_resumable(
823
- self,
824
- src: str, # src:/Bucket/path/myfile.large.zst
825
- dst: str, # dst:/Bucket/path/myfile.large
826
- part_infos: list[PartInfo] | None = None,
827
- upload_threads: int = 8,
828
- merge_threads: int = 4,
829
- ) -> Exception | None:
830
- """Copy parts of a file from source to destination."""
831
- from rclone_api.detail.copy_file_parts_resumable import (
832
- copy_file_parts_resumable,
833
- )
834
-
835
- if dst.endswith("/"):
836
- dst = dst[:-1]
837
- dst_dir = f"{dst}-parts"
838
-
839
- out = copy_file_parts_resumable(
840
- self=self,
841
- src=src,
842
- dst_dir=dst_dir,
843
- part_infos=part_infos,
844
- upload_threads=upload_threads,
845
- merge_threads=merge_threads,
846
- )
847
- return out
848
-
849
- def write_text(
850
- self,
851
- dst: str,
852
- text: str,
853
- ) -> Exception | None:
854
- """Write text to a file."""
855
- data = text.encode("utf-8")
856
- return self.write_bytes(dst=dst, data=data)
857
-
858
- def write_bytes(
859
- self,
860
- dst: str,
861
- data: bytes,
862
- ) -> Exception | None:
863
- """Write bytes to a file."""
864
- with TemporaryDirectory() as tmpdir:
865
- tmpfile = Path(tmpdir) / "file.bin"
866
- tmpfile.write_bytes(data)
867
- completed_proc = self.copy_to(str(tmpfile), dst, check=True)
868
- if completed_proc.returncode != 0:
869
- return Exception(f"Failed to write bytes to {dst}", completed_proc)
870
- return None
871
-
872
- def read_bytes(self, src: str) -> bytes | Exception:
873
- """Read bytes from a file."""
874
- with TemporaryDirectory() as tmpdir:
875
- tmpfile = Path(tmpdir) / "file.bin"
876
- completed_proc = self.copy_to(src, str(tmpfile), check=True)
877
- if completed_proc.returncode != 0:
878
- return Exception(f"Failed to read bytes from {src}", completed_proc)
879
-
880
- if not tmpfile.exists():
881
- return Exception(f"Failed to read bytes from {src}, file not found")
882
- try:
883
- return tmpfile.read_bytes()
884
- except Exception as e:
885
- return Exception(f"Failed to read bytes from {src}", e)
886
-
887
- def read_text(self, src: str) -> str | Exception:
888
- """Read text from a file."""
889
- data = self.read_bytes(src)
890
- if isinstance(data, Exception):
891
- return data
892
- try:
893
- return data.decode("utf-8")
894
- except UnicodeDecodeError as e:
895
- return Exception(f"Failed to decode text from {src}", e)
896
-
897
- def size_file(self, src: str) -> SizeSuffix | Exception:
898
- """Get the size of a file or directory."""
899
- # src_parent = os.path.dirname(src)
900
- # src_name = os.path.basename(src)
901
- # can't use this because it's only one file.
902
- # out: SizeResult = self.size_files(src_parent, [src_name])
903
- # one_file = len(out.file_sizes) == 1
904
- # if not one_file:
905
- # return Exception(
906
- # f"More than one result returned, is this is a directory? {out}"
907
- # )
908
- # return SizeSuffix(out.total_size)
909
- dirlist: DirListing = self.ls(
910
- src, listing_option=ListingOption.FILES_ONLY, max_depth=0
911
- )
912
- if len(dirlist.files) == 0:
913
- return FileNotFoundError(f"File not found: {src}")
914
- if len(dirlist.files) > 1:
915
- return Exception(f"More than one file found: {src}")
916
- file: File = dirlist.files[0]
917
- return SizeSuffix(file.size)
918
-
919
- def get_s3_credentials(
920
- self, remote: str, verbose: bool | None = None
921
- ) -> S3Credentials:
922
- from rclone_api.util import S3PathInfo, split_s3_path
923
-
924
- verbose = get_verbose(verbose)
925
- path_info: S3PathInfo = split_s3_path(remote)
926
-
927
- # path_info: S3PathInfo = split_s3_path(remote)
928
- remote = path_info.remote
929
- bucket_name = path_info.bucket
930
-
931
- remote = path_info.remote
932
- parsed: Parsed = self.config.parse()
933
- sections: dict[str, Section] = parsed.sections
934
- if remote not in sections:
935
- raise ValueError(
936
- f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
937
- )
938
-
939
- section: Section = sections[remote]
940
- dst_type = section.type()
941
- if dst_type != "s3" and dst_type != "b2":
942
- raise ValueError(
943
- f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
944
- )
945
-
946
- def get_provider_str(section=section) -> str | None:
947
- type: str = section.type()
948
- provider: str | None = section.provider()
949
- if provider is not None:
950
- return provider
951
- if type == "b2":
952
- return S3Provider.BACKBLAZE.value
953
- if type != "s3":
954
- raise ValueError(f"Remote {remote} is not an S3 remote")
955
- return S3Provider.S3.value
956
-
957
- provider: str
958
- if provided_provider_str := get_provider_str():
959
- if verbose:
960
- print(f"Using provided provider: {provided_provider_str}")
961
- provider = provided_provider_str
962
- else:
963
- if verbose:
964
- print(f"Using default provider: {S3Provider.S3.value}")
965
- provider = S3Provider.S3.value
966
- provider_enum = S3Provider.from_str(provider)
967
-
968
- s3_creds: S3Credentials = S3Credentials(
969
- bucket_name=bucket_name,
970
- provider=provider_enum,
971
- access_key_id=section.access_key_id(),
972
- secret_access_key=section.secret_access_key(),
973
- endpoint_url=section.endpoint(),
974
- )
975
- return s3_creds
976
-
977
- def copy_bytes(
978
- self,
979
- src: str,
980
- offset: int | SizeSuffix,
981
- length: int | SizeSuffix,
982
- outfile: Path,
983
- other_args: list[str] | None = None,
984
- ) -> Exception | None:
985
- """Copy a slice of bytes from the src file to dst."""
986
- offset = SizeSuffix(offset).as_int()
987
- length = SizeSuffix(length).as_int()
988
- cmd_list: list[str] = [
989
- "cat",
990
- "--offset",
991
- str(offset),
992
- "--count",
993
- str(length),
994
- src,
995
- ]
996
- if other_args:
997
- cmd_list.extend(other_args)
998
- try:
999
- cp = self._run(cmd_list, capture=outfile)
1000
- if cp.returncode == 0:
1001
- return None
1002
- return Exception(cp.stderr)
1003
- except subprocess.CalledProcessError as e:
1004
- return e
1005
-
1006
- def copy_dir(
1007
- self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
1008
- ) -> CompletedProcess:
1009
- """Copy a directory from source to destination."""
1010
- # convert src to str, also dst
1011
- src = convert_to_str(src)
1012
- dst = convert_to_str(dst)
1013
- cmd_list: list[str] = ["copy", src, dst, "--s3-no-check-bucket"]
1014
- if args is not None:
1015
- cmd_list += args
1016
- cp = self._run(cmd_list)
1017
- return CompletedProcess.from_subprocess(cp)
1018
-
1019
- def copy_remote(
1020
- self, src: Remote, dst: Remote, args: list[str] | None = None
1021
- ) -> CompletedProcess:
1022
- """Copy a remote to another remote."""
1023
- cmd_list: list[str] = ["copy", str(src), str(dst), "--s3-no-check-bucket"]
1024
- if args is not None:
1025
- cmd_list += args
1026
- # return self._run(cmd_list)
1027
- cp = self._run(cmd_list)
1028
- return CompletedProcess.from_subprocess(cp)
1029
-
1030
- def mount(
1031
- self,
1032
- src: Remote | Dir | str,
1033
- outdir: Path,
1034
- allow_writes: bool | None = False,
1035
- transfers: int | None = None,
1036
- use_links: bool | None = None,
1037
- vfs_cache_mode: str | None = None,
1038
- verbose: bool | None = None,
1039
- cache_dir: Path | None = None,
1040
- cache_dir_delete_on_exit: bool | None = None,
1041
- log: Path | None = None,
1042
- other_args: list[str] | None = None,
1043
- ) -> Mount:
1044
- """Mount a remote or directory to a local path.
1045
-
1046
- Args:
1047
- src: Remote or directory to mount
1048
- outdir: Local path to mount to
1049
-
1050
- Returns:
1051
- CompletedProcess from the mount command execution
1052
-
1053
- Raises:
1054
- subprocess.CalledProcessError: If the mount operation fails
1055
- """
1056
- from rclone_api.mount_util import clean_mount, prepare_mount
1057
-
1058
- allow_writes = allow_writes or False
1059
- use_links = use_links or True
1060
- verbose = get_verbose(verbose) or (log is not None)
1061
- vfs_cache_mode = vfs_cache_mode or "full"
1062
- clean_mount(outdir, verbose=verbose)
1063
- prepare_mount(outdir, verbose=verbose)
1064
- debug_fuse = log is not None
1065
- src_str = convert_to_str(src)
1066
- cmd_list: list[str] = ["mount", src_str, str(outdir)]
1067
- if not allow_writes:
1068
- cmd_list.append("--read-only")
1069
- if use_links:
1070
- cmd_list.append("--links")
1071
- if vfs_cache_mode:
1072
- cmd_list.append("--vfs-cache-mode")
1073
- cmd_list.append(vfs_cache_mode)
1074
- if cache_dir:
1075
- cmd_list.append("--cache-dir")
1076
- cmd_list.append(str(cache_dir.absolute()))
1077
- if transfers is not None:
1078
- cmd_list.append("--transfers")
1079
- cmd_list.append(str(transfers))
1080
- if debug_fuse:
1081
- cmd_list.append("--debug-fuse")
1082
- if verbose:
1083
- cmd_list.append("-vvvv")
1084
- if other_args:
1085
- cmd_list += other_args
1086
- proc = self._launch_process(cmd_list, log=log)
1087
- mount_read_only = not allow_writes
1088
- mount: Mount = Mount(
1089
- src=src_str,
1090
- mount_path=outdir,
1091
- process=proc,
1092
- read_only=mount_read_only,
1093
- cache_dir=cache_dir,
1094
- cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1095
- )
1096
- return mount
1097
-
1098
- # Settings optimized for s3.
1099
- def mount_s3(
1100
- self,
1101
- url: str,
1102
- outdir: Path,
1103
- allow_writes=False,
1104
- vfs_cache_mode="full",
1105
- dir_cache_time: str | None = "1h",
1106
- attribute_timeout: str | None = "1h",
1107
- vfs_disk_space_total_size: str | None = "100M",
1108
- transfers: int | None = 128,
1109
- modtime_strategy: (
1110
- ModTimeStrategy | None
1111
- ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1112
- vfs_read_chunk_streams: int | None = 16,
1113
- vfs_read_chunk_size: str | None = "4M",
1114
- vfs_fast_fingerprint: bool = True,
1115
- # vfs-refresh
1116
- vfs_refresh: bool = True,
1117
- other_args: list[str] | None = None,
1118
- ) -> Mount:
1119
- """Mount a remote or directory to a local path.
1120
-
1121
- Args:
1122
- src: Remote or directory to mount
1123
- outdir: Local path to mount to
1124
- """
1125
- other_args = other_args or []
1126
- if modtime_strategy is not None:
1127
- other_args.append(f"--{modtime_strategy.value}")
1128
- if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1129
- transfers is not None and "--transfers" not in other_args
1130
- ):
1131
- other_args.append("--transfers")
1132
- other_args.append(str(transfers))
1133
- if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1134
- other_args.append("--dir-cache-time")
1135
- other_args.append(dir_cache_time)
1136
- if (
1137
- vfs_disk_space_total_size is not None
1138
- and "--vfs-cache-max-size" not in other_args
1139
- ):
1140
- other_args.append("--vfs-cache-max-size")
1141
- other_args.append(vfs_disk_space_total_size)
1142
- if vfs_refresh and "--vfs-refresh" not in other_args:
1143
- other_args.append("--vfs-refresh")
1144
- if attribute_timeout is not None and "--attr-timeout" not in other_args:
1145
- other_args.append("--attr-timeout")
1146
- other_args.append(attribute_timeout)
1147
- if vfs_read_chunk_streams:
1148
- other_args.append("--vfs-read-chunk-streams")
1149
- other_args.append(str(vfs_read_chunk_streams))
1150
- if vfs_read_chunk_size:
1151
- other_args.append("--vfs-read-chunk-size")
1152
- other_args.append(vfs_read_chunk_size)
1153
- if vfs_fast_fingerprint:
1154
- other_args.append("--vfs-fast-fingerprint")
1155
-
1156
- other_args = other_args if other_args else None
1157
- return self.mount(
1158
- url,
1159
- outdir,
1160
- allow_writes=allow_writes,
1161
- vfs_cache_mode=vfs_cache_mode,
1162
- other_args=other_args,
1163
- )
1164
-
1165
- def serve_webdav(
1166
- self,
1167
- src: Remote | Dir | str,
1168
- user: str,
1169
- password: str,
1170
- addr: str = "localhost:2049",
1171
- allow_other: bool = False,
1172
- other_args: list[str] | None = None,
1173
- ) -> Process:
1174
- """Serve a remote or directory via NFS.
1175
-
1176
- Args:
1177
- src: Remote or directory to serve
1178
- addr: Network address and port to serve on (default: localhost:2049)
1179
- allow_other: Allow other users to access the share
1180
-
1181
- Returns:
1182
- Process: The running webdev server process
1183
-
1184
- Raises:
1185
- ValueError: If the NFS server fails to start
1186
- """
1187
- src_str = convert_to_str(src)
1188
- cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1189
- cmd_list.extend(["--user", user, "--pass", password])
1190
- if allow_other:
1191
- cmd_list.append("--allow-other")
1192
- if other_args:
1193
- cmd_list += other_args
1194
- proc = self._launch_process(cmd_list)
1195
- time.sleep(2) # give it a moment to start
1196
- if proc.poll() is not None:
1197
- raise ValueError("NFS serve process failed to start")
1198
- return proc
1199
-
1200
- def serve_http(
1201
- self,
1202
- src: str,
1203
- cache_mode: str | None,
1204
- addr: str | None = None,
1205
- serve_http_log: Path | None = None,
1206
- other_args: list[str] | None = None,
1207
- ) -> HttpServer:
1208
- """Serve a remote or directory via HTTP.
1209
-
1210
- Args:
1211
- src: Remote or directory to serve
1212
- addr: Network address and port to serve on (default: localhost:8080)
1213
- """
1214
- addr = addr or f"localhost:{find_free_port()}"
1215
- _, subpath = src.split(":", 1) # might not work on local paths.
1216
- cmd_list: list[str] = [
1217
- "serve",
1218
- "http",
1219
- "--addr",
1220
- addr,
1221
- src,
1222
- "--vfs-disk-space-total-size",
1223
- "0",
1224
- "--vfs-read-chunk-size-limit",
1225
- "512M",
1226
- ]
1227
-
1228
- if cache_mode:
1229
- cmd_list += [
1230
- "--vfs-cache-mode",
1231
- cache_mode,
1232
- ]
1233
- if serve_http_log:
1234
- cmd_list += ["--log-file", str(serve_http_log)]
1235
- cmd_list += ["-vvvv"]
1236
- if other_args:
1237
- cmd_list += other_args
1238
- proc = self._launch_process(cmd_list, log=serve_http_log)
1239
- time.sleep(2)
1240
- if proc.poll() is not None:
1241
- raise ValueError("HTTP serve process failed to start")
1242
- out: HttpServer = HttpServer(
1243
- url=f"http://{addr}", subpath=subpath, process=proc
1244
- )
1245
- return out
1246
-
1247
- def config_paths(
1248
- self, remote: str | None = None, obscure: bool = False, no_obscure: bool = False
1249
- ) -> list[Path] | Exception:
1250
- """Show the current configuration.
1251
-
1252
- Args:
1253
- remote: Optional remote name to show configuration for
1254
- obscure: Show obscured passwords
1255
- no_obscure: Show passwords in plain text
1256
-
1257
- Returns:
1258
- Configuration as text or an Exception if an error occurred
1259
- """
1260
- cmd_list: list[str] = ["config", "show"]
1261
-
1262
- if remote is not None:
1263
- cmd_list.append(remote)
1264
-
1265
- if obscure:
1266
- cmd_list.append("--obscure")
1267
-
1268
- if no_obscure:
1269
- cmd_list.append("--no-obscure")
1270
-
1271
- try:
1272
- cp = self._run(cmd_list, capture=True, check=True)
1273
- stdout: str | bytes = cp.stdout
1274
- if isinstance(stdout, bytes):
1275
- stdout = stdout.decode("utf-8")
1276
- out = _parse_paths(stdout)
1277
- return out
1278
- except subprocess.CalledProcessError as e:
1279
- return e
1280
-
1281
- def size_files(
1282
- self,
1283
- src: str,
1284
- files: list[str],
1285
- fast_list: bool = False, # Recommend that this is False
1286
- other_args: list[str] | None = None,
1287
- check: bool | None = False,
1288
- verbose: bool | None = None,
1289
- ) -> SizeResult | Exception:
1290
- """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1291
- verbose = get_verbose(verbose)
1292
- check = get_check(check)
1293
- if len(files) < 2:
1294
- tmp = self.size_file(files[0])
1295
- if isinstance(tmp, Exception):
1296
- return tmp
1297
- assert isinstance(tmp, SizeSuffix)
1298
- return SizeResult(
1299
- prefix=src, total_size=tmp.as_int(), file_sizes={files[0]: tmp.as_int()}
1300
- )
1301
- if fast_list or (other_args and "--fast-list" in other_args):
1302
- warnings.warn(
1303
- "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1304
- )
1305
- files = list(files)
1306
- all_files: list[File] = []
1307
- # prefix, files = group_under_one_prefix(src, files)
1308
- cmd = ["lsjson", src, "--files-only", "-R"]
1309
- with TemporaryDirectory() as tmpdir:
1310
- # print("files: " + ",".join(files))
1311
- include_files_txt = Path(tmpdir) / "include_files.txt"
1312
- include_files_txt.write_text("\n".join(files), encoding="utf-8")
1313
- cmd += ["--files-from", str(include_files_txt)]
1314
- if fast_list:
1315
- cmd.append("--fast-list")
1316
- if other_args:
1317
- cmd += other_args
1318
- cp = self._run(cmd, check=check)
1319
-
1320
- if cp.returncode != 0:
1321
- if check:
1322
- raise ValueError(f"Error getting file sizes: {cp.stderr}")
1323
- else:
1324
- warnings.warn(f"Error getting file sizes: {cp.stderr}")
1325
- stdout = cp.stdout
1326
- pieces = src.split(":", 1)
1327
- remote_name = pieces[0]
1328
- parent_path: str | None
1329
- if len(pieces) > 1:
1330
- parent_path = pieces[1]
1331
- else:
1332
- parent_path = None
1333
- remote = Remote(name=remote_name, rclone=self)
1334
- paths: list[RPath] = RPath.from_json_str(
1335
- stdout, remote, parent_path=parent_path
1336
- )
1337
- # print(paths)
1338
- all_files += [File(p) for p in paths]
1339
- file_sizes: dict[str, int] = {}
1340
- f: File
1341
- for f in all_files:
1342
- p = f.to_string(include_remote=True)
1343
- if p in file_sizes:
1344
- warnings.warn(f"Duplicate file found: {p}")
1345
- continue
1346
- size = f.size
1347
- if size == 0:
1348
- warnings.warn(f"File size is 0: {p}")
1349
- file_sizes[p] = f.size
1350
- total_size = sum(file_sizes.values())
1351
- file_sizes_path_corrected: dict[str, int] = {}
1352
- for path, size in file_sizes.items():
1353
- # remove the prefix
1354
- path_path = Path(path)
1355
- path_str = path_path.relative_to(src).as_posix()
1356
- file_sizes_path_corrected[path_str] = size
1357
- out: SizeResult = SizeResult(
1358
- prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1359
- )
1360
- return out
1
+ """
2
+ Unit test file.
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import subprocess
8
+ import time
9
+ import tracemalloc
10
+ import warnings
11
+ from concurrent.futures import Future, ThreadPoolExecutor
12
+ from datetime import datetime
13
+ from fnmatch import fnmatch
14
+ from pathlib import Path
15
+ from tempfile import TemporaryDirectory
16
+ from typing import Generator
17
+
18
+ from rclone_api import Dir
19
+ from rclone_api.completed_process import CompletedProcess
20
+ from rclone_api.config import Config, Parsed, Section
21
+ from rclone_api.convert import convert_to_filestr_list, convert_to_str
22
+ from rclone_api.deprecated import deprecated
23
+ from rclone_api.detail.walk import walk
24
+ from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
25
+ from rclone_api.dir_listing import DirListing
26
+ from rclone_api.exec import RcloneExec
27
+ from rclone_api.file import File
28
+ from rclone_api.file_stream import FilesStream
29
+ from rclone_api.fs import FSPath, RemoteFS
30
+ from rclone_api.group_files import group_files
31
+ from rclone_api.http_server import HttpServer
32
+ from rclone_api.mount import Mount
33
+ from rclone_api.process import Process
34
+ from rclone_api.remote import Remote
35
+ from rclone_api.rpath import RPath
36
+ from rclone_api.s3.create import S3Credentials
37
+ from rclone_api.s3.types import (
38
+ S3Provider,
39
+ )
40
+ from rclone_api.types import (
41
+ ListingOption,
42
+ ModTimeStrategy,
43
+ Order,
44
+ PartInfo,
45
+ SizeResult,
46
+ SizeSuffix,
47
+ )
48
+ from rclone_api.util import (
49
+ find_free_port,
50
+ get_check,
51
+ get_rclone_exe,
52
+ get_verbose,
53
+ to_path,
54
+ )
55
+
56
+ # Enable tracing memory usage always
57
+ tracemalloc.start()
58
+
59
+
60
+ def rclone_verbose(verbose: bool | None) -> bool:
61
+ if verbose is not None:
62
+ os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
63
+ return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
64
+
65
+
66
+ def _to_rclone_conf(config: Config | Path) -> Config:
67
+ if isinstance(config, Path):
68
+ content = config.read_text(encoding="utf-8")
69
+ return Config(content)
70
+ else:
71
+ return config
72
+
73
+
74
+ def _parse_paths(src: str) -> list[Path] | Exception:
75
+ # Config file: C:\Users\niteris\AppData\Roaming\rclone\rclone.conf
76
+ # Cache dir: C:\Users\niteris\AppData\Local\rclone
77
+ # Temp dir: C:\Users\niteris\AppData\Local\Temp
78
+ lines = src.splitlines()
79
+ paths: list[Path] = []
80
+ for line in lines:
81
+ try:
82
+ parts = line.split(":")
83
+ if len(parts) != 2:
84
+ continue
85
+ path = Path(parts[1].strip())
86
+ paths.append(path)
87
+ except Exception as e:
88
+ return e
89
+ return paths
90
+
91
+
92
+ class RcloneImpl:
93
+ def __init__(
94
+ self, rclone_conf: Path | Config | None, rclone_exe: Path | None = None
95
+ ) -> None:
96
+ if isinstance(rclone_conf, Path):
97
+ if not rclone_conf.exists():
98
+ raise ValueError(f"Rclone config file not found: {rclone_conf}")
99
+ if rclone_conf is None:
100
+ from rclone_api.config import find_conf_file
101
+
102
+ maybe_path = find_conf_file(self)
103
+ if not isinstance(maybe_path, Path):
104
+ raise ValueError("Rclone config file not found")
105
+ rclone_conf = _to_rclone_conf(maybe_path)
106
+ self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
107
+ self.config: Config = _to_rclone_conf(rclone_conf)
108
+
109
+ def _run(
110
+ self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
111
+ ) -> subprocess.CompletedProcess:
112
+ return self._exec.execute(cmd, check=check, capture=capture)
113
+
114
+ def _launch_process(
115
+ self, cmd: list[str], capture: bool | None = None, log: Path | None = None
116
+ ) -> Process:
117
+ return self._exec.launch_process(cmd, capture=capture, log=log)
118
+
119
+ def _get_tmp_mount_dir(self) -> Path:
120
+ return Path("tmp_mnts")
121
+
122
+ def _get_cache_dir(self) -> Path:
123
+ return Path("cache")
124
+
125
+ def webgui(self, other_args: list[str] | None = None) -> Process:
126
+ """Launch the Rclone web GUI."""
127
+ cmd = ["rcd", "--rc-web-gui"]
128
+ if other_args:
129
+ cmd += other_args
130
+ return self._launch_process(cmd, capture=False)
131
+
132
+ def filesystem(self, src: str) -> RemoteFS:
133
+ return RemoteFS(self.config, src)
134
+
135
+ def cwd(self, src: str) -> FSPath:
136
+ return self.filesystem(src).cwd()
137
+
138
+ def launch_server(
139
+ self,
140
+ addr: str,
141
+ user: str | None = None,
142
+ password: str | None = None,
143
+ other_args: list[str] | None = None,
144
+ ) -> Process:
145
+ """Launch the Rclone server so it can receive commands"""
146
+ cmd = ["rcd"]
147
+ if addr is not None:
148
+ cmd += ["--rc-addr", addr]
149
+ if user is not None:
150
+ cmd += ["--rc-user", user]
151
+ if password is not None:
152
+ cmd += ["--rc-pass", password]
153
+ if other_args:
154
+ cmd += other_args
155
+ out = self._launch_process(cmd, capture=False)
156
+ time.sleep(1) # Give it some time to launch
157
+ return out
158
+
159
+ def remote_control(
160
+ self,
161
+ addr: str,
162
+ user: str | None = None,
163
+ password: str | None = None,
164
+ capture: bool | None = None,
165
+ other_args: list[str] | None = None,
166
+ ) -> CompletedProcess:
167
+ cmd = ["rc"]
168
+ if addr:
169
+ cmd += ["--rc-addr", addr]
170
+ if user is not None:
171
+ cmd += ["--rc-user", user]
172
+ if password is not None:
173
+ cmd += ["--rc-pass", password]
174
+ if other_args:
175
+ cmd += other_args
176
+ cp = self._run(cmd, capture=capture)
177
+ return CompletedProcess.from_subprocess(cp)
178
+
179
+ def obscure(self, password: str) -> str:
180
+ """Obscure a password for use in rclone config files."""
181
+ cmd_list: list[str] = ["obscure", password]
182
+ cp = self._run(cmd_list)
183
+ return cp.stdout.strip()
184
+
185
+ def ls_stream(
186
+ self,
187
+ src: str,
188
+ max_depth: int = -1,
189
+ fast_list: bool = False,
190
+ ) -> FilesStream:
191
+ """
192
+ List files in the given path
193
+
194
+ Args:
195
+ src: Remote path to list
196
+ max_depth: Maximum recursion depth (-1 for unlimited)
197
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket, or it's small)
198
+ """
199
+ cmd = ["lsjson", src, "--files-only"]
200
+ recurse = max_depth < 0 or max_depth > 1
201
+ if recurse:
202
+ cmd.append("-R")
203
+ if max_depth > 1:
204
+ cmd += ["--max-depth", str(max_depth)]
205
+ if fast_list:
206
+ cmd.append("--fast-list")
207
+ streamer = FilesStream(src, self._launch_process(cmd, capture=True))
208
+ return streamer
209
+
210
+ def save_to_db(
211
+ self,
212
+ src: str,
213
+ db_url: str,
214
+ max_depth: int = -1,
215
+ fast_list: bool = False,
216
+ ) -> None:
217
+ """
218
+ Save files to a database (sqlite, mysql, postgres)
219
+
220
+ Args:
221
+ src: Remote path to list, this will be used to populate an entire table, so always use the root-most path.
222
+ db_url: Database URL, like sqlite:///data.db or mysql://user:pass@localhost/db or postgres://user:pass@localhost/db
223
+ max_depth: Maximum depth to traverse (-1 for unlimited)
224
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket)
225
+
226
+ """
227
+ from rclone_api.db import DB
228
+
229
+ db = DB(db_url)
230
+ with self.ls_stream(src, max_depth, fast_list) as stream:
231
+ for page in stream.files_paged(page_size=10000):
232
+ db.add_files(page)
233
+
234
+ def ls(
235
+ self,
236
+ src: Dir | Remote | str | None = None,
237
+ max_depth: int | None = None,
238
+ glob: str | None = None,
239
+ order: Order = Order.NORMAL,
240
+ listing_option: ListingOption = ListingOption.ALL,
241
+ ) -> DirListing:
242
+ """List files in the given path.
243
+
244
+ Args:
245
+ src: Remote path or Remote object to list
246
+ max_depth: Maximum recursion depth (0 means no recursion)
247
+
248
+ Returns:
249
+ List of File objects found at the path
250
+ """
251
+
252
+ if src is None:
253
+ # list remotes instead
254
+ list_remotes: list[Remote] = self.listremotes()
255
+ dirs: list[Dir] = [Dir(remote) for remote in list_remotes]
256
+ for d in dirs:
257
+ d.path.path = ""
258
+ rpaths = [d.path for d in dirs]
259
+ return DirListing(rpaths)
260
+
261
+ if isinstance(src, str):
262
+ src = Dir(
263
+ to_path(src, self)
264
+ ) # assume it's a directory if ls is being called.
265
+
266
+ cmd = ["lsjson"]
267
+ if max_depth is not None:
268
+ if max_depth < 0:
269
+ cmd.append("--recursive")
270
+ if max_depth > 0:
271
+ cmd.append("--max-depth")
272
+ cmd.append(str(max_depth))
273
+ if listing_option != ListingOption.ALL:
274
+ cmd.append(f"--{listing_option.value}")
275
+
276
+ cmd.append(str(src))
277
+ remote = src.remote if isinstance(src, Dir) else src
278
+ assert isinstance(remote, Remote)
279
+
280
+ cp = self._run(cmd, check=True)
281
+ text = cp.stdout
282
+ parent_path: str | None = None
283
+ if isinstance(src, Dir):
284
+ parent_path = src.path.path
285
+ paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
286
+ # print(parent_path)
287
+ for o in paths:
288
+ o.set_rclone(self)
289
+
290
+ # do we have a glob pattern?
291
+ if glob is not None:
292
+ paths = [p for p in paths if fnmatch(p.path, glob)]
293
+
294
+ if order == Order.REVERSE:
295
+ paths.reverse()
296
+ elif order == Order.RANDOM:
297
+ random.shuffle(paths)
298
+ return DirListing(paths)
299
+
300
+ def print(self, src: str) -> Exception | None:
301
+ """Print the contents of a file."""
302
+ try:
303
+ text_or_err = self.read_text(src)
304
+ if isinstance(text_or_err, Exception):
305
+ return text_or_err
306
+ print(text_or_err)
307
+ except Exception as e:
308
+ return e
309
+ return None
310
+
311
+ def stat(self, src: str) -> File | Exception:
312
+ """Get the status of a file or directory."""
313
+ dirlist: DirListing = self.ls(src)
314
+ if len(dirlist.files) == 0:
315
+ # raise FileNotFoundError(f"File not found: {src}")
316
+ return FileNotFoundError(f"File not found: {src}")
317
+ try:
318
+ file: File = dirlist.files[0]
319
+ return file
320
+ except Exception as e:
321
+ return e
322
+
323
+ def modtime(self, src: str) -> str | Exception:
324
+ """Get the modification time of a file or directory."""
325
+ try:
326
+ file: File | Exception = self.stat(src)
327
+ if isinstance(file, Exception):
328
+ return file
329
+ return file.mod_time()
330
+ except Exception as e:
331
+ return e
332
+
333
+ def modtime_dt(self, src: str) -> datetime | Exception:
334
+ """Get the modification time of a file or directory."""
335
+ modtime: str | Exception = self.modtime(src)
336
+ if isinstance(modtime, Exception):
337
+ return modtime
338
+ return datetime.fromisoformat(modtime)
339
+
340
+ def listremotes(self) -> list[Remote]:
341
+ cmd = ["listremotes"]
342
+ cp = self._run(cmd)
343
+ text: str = cp.stdout
344
+ tmp = text.splitlines()
345
+ tmp = [t.strip() for t in tmp]
346
+ # strip out ":" from the end
347
+ tmp = [t.replace(":", "") for t in tmp]
348
+ out = [Remote(name=t, rclone=self) for t in tmp]
349
+ return out
350
+
351
+ def diff(
352
+ self,
353
+ src: str,
354
+ dst: str,
355
+ min_size: (
356
+ str | None
357
+ ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
358
+ max_size: (
359
+ str | None
360
+ ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
361
+ diff_option: DiffOption = DiffOption.COMBINED,
362
+ fast_list: bool = True,
363
+ size_only: bool | None = None,
364
+ checkers: int | None = None,
365
+ other_args: list[str] | None = None,
366
+ ) -> Generator[DiffItem, None, None]:
367
+ """Be extra careful with the src and dst values. If you are off by one
368
+ parent directory, you will get a huge amount of false diffs."""
369
+ other_args = other_args or []
370
+ if checkers is None or checkers < 1:
371
+ checkers = 1000
372
+ cmd = [
373
+ "check",
374
+ src,
375
+ dst,
376
+ "--checkers",
377
+ str(checkers),
378
+ "--log-level",
379
+ "INFO",
380
+ f"--{diff_option.value}",
381
+ "-",
382
+ ]
383
+ if size_only is None:
384
+ size_only = diff_option in [
385
+ DiffOption.MISSING_ON_DST,
386
+ DiffOption.MISSING_ON_SRC,
387
+ ]
388
+ if size_only:
389
+ cmd += ["--size-only"]
390
+ if fast_list:
391
+ cmd += ["--fast-list"]
392
+ if min_size:
393
+ cmd += ["--min-size", min_size]
394
+ if max_size:
395
+ cmd += ["--max-size", max_size]
396
+ if diff_option == DiffOption.MISSING_ON_DST:
397
+ cmd += ["--one-way"]
398
+ if other_args:
399
+ cmd += other_args
400
+ proc = self._launch_process(cmd, capture=True)
401
+ item: DiffItem
402
+ for item in diff_stream_from_running_process(
403
+ running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
404
+ ):
405
+ if item is None:
406
+ break
407
+ yield item
408
+
409
+ def walk(
410
+ self,
411
+ src: Dir | Remote | str,
412
+ max_depth: int = -1,
413
+ breadth_first: bool = True,
414
+ order: Order = Order.NORMAL,
415
+ ) -> Generator[DirListing, None, None]:
416
+ """Walk through the given path recursively.
417
+
418
+ Args:
419
+ src: Remote path or Remote object to walk through
420
+ max_depth: Maximum depth to traverse (-1 for unlimited)
421
+
422
+ Yields:
423
+ DirListing: Directory listing for each directory encountered
424
+ """
425
+ dir_obj: Dir
426
+ if isinstance(src, Dir):
427
+ # Create a Remote object for the path
428
+ remote = src.remote
429
+ rpath = RPath(
430
+ remote=remote,
431
+ path=src.path.path,
432
+ name=src.path.name,
433
+ size=0,
434
+ mime_type="inode/directory",
435
+ mod_time="",
436
+ is_dir=True,
437
+ )
438
+ rpath.set_rclone(self)
439
+ dir_obj = Dir(rpath)
440
+ elif isinstance(src, str):
441
+ dir_obj = Dir(to_path(src, self))
442
+ elif isinstance(src, Remote):
443
+ dir_obj = Dir(src)
444
+ else:
445
+ dir_obj = Dir(src) # shut up pyright
446
+ assert f"Invalid type for path: {type(src)}"
447
+
448
+ yield from walk(
449
+ dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
450
+ )
451
+
452
+ def scan_missing_folders(
453
+ self,
454
+ src: Dir | Remote | str,
455
+ dst: Dir | Remote | str,
456
+ max_depth: int = -1,
457
+ order: Order = Order.NORMAL,
458
+ ) -> Generator[Dir, None, None]:
459
+ """Walk through the given path recursively.
460
+
461
+ WORK IN PROGRESS!!
462
+
463
+ Args:
464
+ src: Source directory or Remote to walk through
465
+ dst: Destination directory or Remote to walk through
466
+ max_depth: Maximum depth to traverse (-1 for unlimited)
467
+
468
+ Yields:
469
+ DirListing: Directory listing for each directory encountered
470
+ """
471
+ from rclone_api.scan_missing_folders import scan_missing_folders
472
+
473
+ src_dir = Dir(to_path(src, self))
474
+ dst_dir = Dir(to_path(dst, self))
475
+ yield from scan_missing_folders(
476
+ src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
477
+ )
478
+
479
+ def cleanup(
480
+ self, src: str, other_args: list[str] | None = None
481
+ ) -> CompletedProcess:
482
+ """Cleanup any resources used by the Rclone instance."""
483
+ # rclone cleanup remote:path [flags]
484
+ cmd = ["cleanup", src]
485
+ if other_args:
486
+ cmd += other_args
487
+ out = self._run(cmd)
488
+ return CompletedProcess.from_subprocess(out)
489
+
490
+ def get_verbose(self) -> bool:
491
+ return get_verbose(None)
492
+
493
+ def copy_to(
494
+ self,
495
+ src: File | str,
496
+ dst: File | str,
497
+ check: bool | None = None,
498
+ verbose: bool | None = None,
499
+ other_args: list[str] | None = None,
500
+ ) -> CompletedProcess:
501
+ """Copy one file from source to destination.
502
+
503
+ Warning - slow.
504
+
505
+ """
506
+ check = get_check(check)
507
+ verbose = get_verbose(verbose)
508
+ src = src if isinstance(src, str) else str(src.path)
509
+ dst = dst if isinstance(dst, str) else str(dst.path)
510
+ cmd_list: list[str] = ["copyto", src, dst, "--s3-no-check-bucket"]
511
+ if other_args is not None:
512
+ cmd_list += other_args
513
+ cp = self._run(cmd_list, check=check)
514
+ return CompletedProcess.from_subprocess(cp)
515
+
516
+ def copy_files(
517
+ self,
518
+ src: str,
519
+ dst: str,
520
+ files: list[str] | Path,
521
+ check: bool | None = None,
522
+ max_backlog: int | None = None,
523
+ verbose: bool | None = None,
524
+ checkers: int | None = None,
525
+ transfers: int | None = None,
526
+ low_level_retries: int | None = None,
527
+ retries: int | None = None,
528
+ retries_sleep: str | None = None,
529
+ metadata: bool | None = None,
530
+ timeout: str | None = None,
531
+ max_partition_workers: int | None = None,
532
+ multi_thread_streams: int | None = None,
533
+ other_args: list[str] | None = None,
534
+ ) -> list[CompletedProcess]:
535
+ """Copy multiple files from source to destination.
536
+
537
+ Args:
538
+ payload: Dictionary of source and destination file paths
539
+ """
540
+ check = get_check(check)
541
+ max_partition_workers = max_partition_workers or 1
542
+ low_level_retries = low_level_retries or 10
543
+ retries = retries or 3
544
+ other_args = other_args or []
545
+ other_args.append("--s3-no-check-bucket")
546
+ checkers = checkers or 1000
547
+ transfers = transfers or 32
548
+ verbose = get_verbose(verbose)
549
+ payload: list[str] = (
550
+ files
551
+ if isinstance(files, list)
552
+ else [f.strip() for f in files.read_text().splitlines() if f.strip()]
553
+ )
554
+ if len(payload) == 0:
555
+ return []
556
+
557
+ for p in payload:
558
+ if ":" in p:
559
+ raise ValueError(
560
+ f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
561
+ )
562
+
563
+ using_fast_list = "--fast-list" in other_args
564
+ if using_fast_list:
565
+ warnings.warn(
566
+ "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
567
+ )
568
+
569
+ if max_partition_workers > 1:
570
+ datalists: dict[str, list[str]] = group_files(
571
+ payload, fully_qualified=False
572
+ )
573
+ else:
574
+ datalists = {"": payload}
575
+ # out: subprocess.CompletedProcess | None = None
576
+ out: list[CompletedProcess] = []
577
+
578
+ futures: list[Future] = []
579
+
580
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
581
+ for common_prefix, files in datalists.items():
582
+
583
+ def _task(
584
+ files: list[str] | Path = files,
585
+ ) -> subprocess.CompletedProcess:
586
+ with TemporaryDirectory() as tmpdir:
587
+ filelist: list[str] = []
588
+ filepath: Path
589
+ if isinstance(files, list):
590
+ include_files_txt = Path(tmpdir) / "include_files.txt"
591
+ include_files_txt.write_text(
592
+ "\n".join(files), encoding="utf-8"
593
+ )
594
+ filelist = list(files)
595
+ filepath = Path(include_files_txt)
596
+ elif isinstance(files, Path):
597
+ filelist = [
598
+ f.strip()
599
+ for f in files.read_text().splitlines()
600
+ if f.strip()
601
+ ]
602
+ filepath = files
603
+ if common_prefix:
604
+ src_path = f"{src}/{common_prefix}"
605
+ dst_path = f"{dst}/{common_prefix}"
606
+ else:
607
+ src_path = src
608
+ dst_path = dst
609
+
610
+ if verbose:
611
+ nfiles = len(filelist)
612
+ files_fqdn = [f" {src_path}/{f}" for f in filelist]
613
+ print(f"Copying {nfiles} files:")
614
+ chunk_size = 100
615
+ for i in range(0, nfiles, chunk_size):
616
+ chunk = files_fqdn[i : i + chunk_size]
617
+ files_str = "\n".join(chunk)
618
+ print(f"{files_str}")
619
+ cmd_list: list[str] = [
620
+ "copy",
621
+ src_path,
622
+ dst_path,
623
+ "--files-from",
624
+ str(filepath),
625
+ "--checkers",
626
+ str(checkers),
627
+ "--transfers",
628
+ str(transfers),
629
+ "--low-level-retries",
630
+ str(low_level_retries),
631
+ "--retries",
632
+ str(retries),
633
+ ]
634
+ if metadata:
635
+ cmd_list.append("--metadata")
636
+ if retries_sleep is not None:
637
+ cmd_list += ["--retries-sleep", retries_sleep]
638
+ if timeout is not None:
639
+ cmd_list += ["--timeout", timeout]
640
+ if max_backlog is not None:
641
+ cmd_list += ["--max-backlog", str(max_backlog)]
642
+ if multi_thread_streams is not None:
643
+ cmd_list += [
644
+ "--multi-thread-streams",
645
+ str(multi_thread_streams),
646
+ ]
647
+ if verbose:
648
+ if not any(["-v" in x for x in other_args]):
649
+ cmd_list.append("-vvvv")
650
+ if not any(["--progress" in x for x in other_args]):
651
+ cmd_list.append("--progress")
652
+ if other_args:
653
+ cmd_list += other_args
654
+ out = self._run(cmd_list, capture=not verbose)
655
+ return out
656
+
657
+ fut: Future = executor.submit(_task)
658
+ futures.append(fut)
659
+ for fut in futures:
660
+ cp: subprocess.CompletedProcess = fut.result()
661
+ assert cp is not None
662
+ out.append(CompletedProcess.from_subprocess(cp))
663
+ if cp.returncode != 0:
664
+ if check:
665
+ raise ValueError(f"Error deleting files: {cp.stderr}")
666
+ else:
667
+ warnings.warn(f"Error deleting files: {cp.stderr}")
668
+ return out
669
+
670
+ def copy(
671
+ self,
672
+ src: Dir | str,
673
+ dst: Dir | str,
674
+ check: bool | None = None,
675
+ transfers: int | None = None,
676
+ checkers: int | None = None,
677
+ multi_thread_streams: int | None = None,
678
+ low_level_retries: int | None = None,
679
+ retries: int | None = None,
680
+ other_args: list[str] | None = None,
681
+ ) -> CompletedProcess:
682
+ """Copy files from source to destination.
683
+
684
+ Args:
685
+ src: Source directory
686
+ dst: Destination directory
687
+ """
688
+ # src_dir = src.path.path
689
+ # dst_dir = dst.path.path
690
+ src_dir = convert_to_str(src)
691
+ dst_dir = convert_to_str(dst)
692
+ check = get_check(check)
693
+ checkers = checkers or 1000
694
+ transfers = transfers or 32
695
+ low_level_retries = low_level_retries or 10
696
+ retries = retries or 3
697
+ cmd_list: list[str] = ["copy", src_dir, dst_dir]
698
+ cmd_list += ["--checkers", str(checkers)]
699
+ cmd_list += ["--transfers", str(transfers)]
700
+ cmd_list += ["--low-level-retries", str(low_level_retries)]
701
+ cmd_list.append("--s3-no-check-bucket")
702
+ if multi_thread_streams is not None:
703
+ cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
704
+ if other_args:
705
+ cmd_list += other_args
706
+ cp = self._run(cmd_list, check=check, capture=False)
707
+ return CompletedProcess.from_subprocess(cp)
708
+
709
+ def purge(self, src: Dir | str) -> CompletedProcess:
710
+ """Purge a directory"""
711
+ # path should always be a string
712
+ src = src if isinstance(src, str) else str(src.path)
713
+ cmd_list: list[str] = ["purge", str(src)]
714
+ cp = self._run(cmd_list)
715
+ return CompletedProcess.from_subprocess(cp)
716
+
717
+ def delete_files(
718
+ self,
719
+ files: str | File | list[str] | list[File],
720
+ check: bool | None = None,
721
+ rmdirs=False,
722
+ verbose: bool | None = None,
723
+ max_partition_workers: int | None = None,
724
+ other_args: list[str] | None = None,
725
+ ) -> CompletedProcess:
726
+ """Delete a directory"""
727
+ check = get_check(check)
728
+ verbose = get_verbose(verbose)
729
+ payload: list[str] = convert_to_filestr_list(files)
730
+ if len(payload) == 0:
731
+ if verbose:
732
+ print("No files to delete")
733
+ cp = subprocess.CompletedProcess(
734
+ args=["rclone", "delete", "--files-from", "[]"],
735
+ returncode=0,
736
+ stdout="",
737
+ stderr="",
738
+ )
739
+ return CompletedProcess.from_subprocess(cp)
740
+
741
+ datalists: dict[str, list[str]] = group_files(payload)
742
+ completed_processes: list[subprocess.CompletedProcess] = []
743
+
744
+ futures: list[Future] = []
745
+
746
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
747
+
748
+ for remote, files in datalists.items():
749
+
750
+ def _task(
751
+ files=files, check=check, remote=remote
752
+ ) -> subprocess.CompletedProcess:
753
+ with TemporaryDirectory() as tmpdir:
754
+ include_files_txt = Path(tmpdir) / "include_files.txt"
755
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
756
+
757
+ # print(include_files_txt)
758
+ cmd_list: list[str] = [
759
+ "delete",
760
+ remote,
761
+ "--files-from",
762
+ str(include_files_txt),
763
+ "--checkers",
764
+ "1000",
765
+ "--transfers",
766
+ "1000",
767
+ ]
768
+ if verbose:
769
+ cmd_list.append("-vvvv")
770
+ if rmdirs:
771
+ cmd_list.append("--rmdirs")
772
+ if other_args:
773
+ cmd_list += other_args
774
+ out = self._run(cmd_list, check=check)
775
+ if out.returncode != 0:
776
+ if check:
777
+ completed_processes.append(out)
778
+ raise ValueError(f"Error deleting files: {out}")
779
+ else:
780
+ warnings.warn(f"Error deleting files: {out}")
781
+ return out
782
+
783
+ fut: Future = executor.submit(_task)
784
+ futures.append(fut)
785
+
786
+ for fut in futures:
787
+ out = fut.result()
788
+ assert out is not None
789
+ completed_processes.append(out)
790
+
791
+ return CompletedProcess(completed_processes)
792
+
793
+ @deprecated("delete_files")
794
+ def deletefiles(
795
+ self, files: str | File | list[str] | list[File]
796
+ ) -> CompletedProcess:
797
+ out = self.delete_files(files)
798
+ return out
799
+
800
+ def exists(self, src: Dir | Remote | str | File) -> bool:
801
+ """Check if a file or directory exists."""
802
+ arg: str = convert_to_str(src)
803
+ assert isinstance(arg, str)
804
+ try:
805
+ dir_listing = self.ls(arg)
806
+ # print(dir_listing)
807
+ return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
808
+ except subprocess.CalledProcessError:
809
+ return False
810
+
811
+ def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
812
+ """Check if two directories are in sync."""
813
+ src = convert_to_str(src)
814
+ dst = convert_to_str(dst)
815
+ cmd_list: list[str] = ["check", str(src), str(dst)]
816
+ try:
817
+ self._run(cmd_list, check=True)
818
+ return True
819
+ except subprocess.CalledProcessError:
820
+ return False
821
+
822
+ def copy_file_s3_resumable(
823
+ self,
824
+ src: str, # src:/Bucket/path/myfile.large.zst
825
+ dst: str, # dst:/Bucket/path/myfile.large
826
+ part_infos: list[PartInfo] | None = None,
827
+ upload_threads: int = 8,
828
+ merge_threads: int = 4,
829
+ ) -> Exception | None:
830
+ """Copy parts of a file from source to destination."""
831
+ from rclone_api.detail.copy_file_parts_resumable import (
832
+ copy_file_parts_resumable,
833
+ )
834
+
835
+ if dst.endswith("/"):
836
+ dst = dst[:-1]
837
+ dst_dir = f"{dst}-parts"
838
+
839
+ out = copy_file_parts_resumable(
840
+ self=self,
841
+ src=src,
842
+ dst_dir=dst_dir,
843
+ part_infos=part_infos,
844
+ upload_threads=upload_threads,
845
+ merge_threads=merge_threads,
846
+ )
847
+ return out
848
+
849
+ def write_text(
850
+ self,
851
+ dst: str,
852
+ text: str,
853
+ ) -> Exception | None:
854
+ """Write text to a file."""
855
+ data = text.encode("utf-8")
856
+ return self.write_bytes(dst=dst, data=data)
857
+
858
+ def write_bytes(
859
+ self,
860
+ dst: str,
861
+ data: bytes,
862
+ ) -> Exception | None:
863
+ """Write bytes to a file."""
864
+ with TemporaryDirectory() as tmpdir:
865
+ tmpfile = Path(tmpdir) / "file.bin"
866
+ tmpfile.write_bytes(data)
867
+ completed_proc = self.copy_to(str(tmpfile), dst, check=True)
868
+ if completed_proc.returncode != 0:
869
+ return Exception(f"Failed to write bytes to {dst}", completed_proc)
870
+ return None
871
+
872
+ def read_bytes(self, src: str) -> bytes | Exception:
873
+ """Read bytes from a file."""
874
+ with TemporaryDirectory() as tmpdir:
875
+ tmpfile = Path(tmpdir) / "file.bin"
876
+ completed_proc = self.copy_to(src, str(tmpfile), check=True)
877
+ if completed_proc.returncode != 0:
878
+ return Exception(f"Failed to read bytes from {src}", completed_proc)
879
+
880
+ if not tmpfile.exists():
881
+ return Exception(f"Failed to read bytes from {src}, file not found")
882
+ try:
883
+ return tmpfile.read_bytes()
884
+ except Exception as e:
885
+ return Exception(f"Failed to read bytes from {src}", e)
886
+
887
+ def read_text(self, src: str) -> str | Exception:
888
+ """Read text from a file."""
889
+ data = self.read_bytes(src)
890
+ if isinstance(data, Exception):
891
+ return data
892
+ try:
893
+ return data.decode("utf-8")
894
+ except UnicodeDecodeError as e:
895
+ return Exception(f"Failed to decode text from {src}", e)
896
+
897
+ def size_file(self, src: str) -> SizeSuffix | Exception:
898
+ """Get the size of a file or directory."""
899
+ # src_parent = os.path.dirname(src)
900
+ # src_name = os.path.basename(src)
901
+ # can't use this because it's only one file.
902
+ # out: SizeResult = self.size_files(src_parent, [src_name])
903
+ # one_file = len(out.file_sizes) == 1
904
+ # if not one_file:
905
+ # return Exception(
906
+ # f"More than one result returned, is this is a directory? {out}"
907
+ # )
908
+ # return SizeSuffix(out.total_size)
909
+ dirlist: DirListing = self.ls(
910
+ src, listing_option=ListingOption.FILES_ONLY, max_depth=0
911
+ )
912
+ if len(dirlist.files) == 0:
913
+ return FileNotFoundError(f"File not found: {src}")
914
+ if len(dirlist.files) > 1:
915
+ return Exception(f"More than one file found: {src}")
916
+ file: File = dirlist.files[0]
917
+ return SizeSuffix(file.size)
918
+
919
+ def get_s3_credentials(
920
+ self, remote: str, verbose: bool | None = None
921
+ ) -> S3Credentials:
922
+ from rclone_api.util import S3PathInfo, split_s3_path
923
+
924
+ verbose = get_verbose(verbose)
925
+ path_info: S3PathInfo = split_s3_path(remote)
926
+
927
+ # path_info: S3PathInfo = split_s3_path(remote)
928
+ remote = path_info.remote
929
+ bucket_name = path_info.bucket
930
+
931
+ remote = path_info.remote
932
+ parsed: Parsed = self.config.parse()
933
+ sections: dict[str, Section] = parsed.sections
934
+ if remote not in sections:
935
+ raise ValueError(
936
+ f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
937
+ )
938
+
939
+ section: Section = sections[remote]
940
+ dst_type = section.type()
941
+ if dst_type != "s3" and dst_type != "b2":
942
+ raise ValueError(
943
+ f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
944
+ )
945
+
946
+ def get_provider_str(section=section) -> str | None:
947
+ type: str = section.type()
948
+ provider: str | None = section.provider()
949
+ if provider is not None:
950
+ return provider
951
+ if type == "b2":
952
+ return S3Provider.BACKBLAZE.value
953
+ if type != "s3":
954
+ raise ValueError(f"Remote {remote} is not an S3 remote")
955
+ return S3Provider.S3.value
956
+
957
+ provider: str
958
+ if provided_provider_str := get_provider_str():
959
+ if verbose:
960
+ print(f"Using provided provider: {provided_provider_str}")
961
+ provider = provided_provider_str
962
+ else:
963
+ if verbose:
964
+ print(f"Using default provider: {S3Provider.S3.value}")
965
+ provider = S3Provider.S3.value
966
+ provider_enum = S3Provider.from_str(provider)
967
+
968
+ s3_creds: S3Credentials = S3Credentials(
969
+ bucket_name=bucket_name,
970
+ provider=provider_enum,
971
+ access_key_id=section.access_key_id(),
972
+ secret_access_key=section.secret_access_key(),
973
+ endpoint_url=section.endpoint(),
974
+ )
975
+ return s3_creds
976
+
977
+ def copy_bytes(
978
+ self,
979
+ src: str,
980
+ offset: int | SizeSuffix,
981
+ length: int | SizeSuffix,
982
+ outfile: Path,
983
+ other_args: list[str] | None = None,
984
+ ) -> Exception | None:
985
+ """Copy a slice of bytes from the src file to dst."""
986
+ offset = SizeSuffix(offset).as_int()
987
+ length = SizeSuffix(length).as_int()
988
+ cmd_list: list[str] = [
989
+ "cat",
990
+ "--offset",
991
+ str(offset),
992
+ "--count",
993
+ str(length),
994
+ src,
995
+ ]
996
+ if other_args:
997
+ cmd_list.extend(other_args)
998
+ try:
999
+ cp = self._run(cmd_list, capture=outfile)
1000
+ if cp.returncode == 0:
1001
+ return None
1002
+ return Exception(cp.stderr)
1003
+ except subprocess.CalledProcessError as e:
1004
+ return e
1005
+
1006
+ def copy_dir(
1007
+ self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
1008
+ ) -> CompletedProcess:
1009
+ """Copy a directory from source to destination."""
1010
+ # convert src to str, also dst
1011
+ src = convert_to_str(src)
1012
+ dst = convert_to_str(dst)
1013
+ cmd_list: list[str] = ["copy", src, dst, "--s3-no-check-bucket"]
1014
+ if args is not None:
1015
+ cmd_list += args
1016
+ cp = self._run(cmd_list)
1017
+ return CompletedProcess.from_subprocess(cp)
1018
+
1019
+ def copy_remote(
1020
+ self, src: Remote, dst: Remote, args: list[str] | None = None
1021
+ ) -> CompletedProcess:
1022
+ """Copy a remote to another remote."""
1023
+ cmd_list: list[str] = ["copy", str(src), str(dst), "--s3-no-check-bucket"]
1024
+ if args is not None:
1025
+ cmd_list += args
1026
+ # return self._run(cmd_list)
1027
+ cp = self._run(cmd_list)
1028
+ return CompletedProcess.from_subprocess(cp)
1029
+
1030
+ def mount(
1031
+ self,
1032
+ src: Remote | Dir | str,
1033
+ outdir: Path,
1034
+ allow_writes: bool | None = False,
1035
+ transfers: int | None = None,
1036
+ use_links: bool | None = None,
1037
+ vfs_cache_mode: str | None = None,
1038
+ verbose: bool | None = None,
1039
+ cache_dir: Path | None = None,
1040
+ cache_dir_delete_on_exit: bool | None = None,
1041
+ log: Path | None = None,
1042
+ other_args: list[str] | None = None,
1043
+ ) -> Mount:
1044
+ """Mount a remote or directory to a local path.
1045
+
1046
+ Args:
1047
+ src: Remote or directory to mount
1048
+ outdir: Local path to mount to
1049
+
1050
+ Returns:
1051
+ CompletedProcess from the mount command execution
1052
+
1053
+ Raises:
1054
+ subprocess.CalledProcessError: If the mount operation fails
1055
+ """
1056
+ from rclone_api.mount_util import clean_mount, prepare_mount
1057
+
1058
+ allow_writes = allow_writes or False
1059
+ use_links = use_links or True
1060
+ verbose = get_verbose(verbose) or (log is not None)
1061
+ vfs_cache_mode = vfs_cache_mode or "full"
1062
+ clean_mount(outdir, verbose=verbose)
1063
+ prepare_mount(outdir, verbose=verbose)
1064
+ debug_fuse = log is not None
1065
+ src_str = convert_to_str(src)
1066
+ cmd_list: list[str] = ["mount", src_str, str(outdir)]
1067
+ if not allow_writes:
1068
+ cmd_list.append("--read-only")
1069
+ if use_links:
1070
+ cmd_list.append("--links")
1071
+ if vfs_cache_mode:
1072
+ cmd_list.append("--vfs-cache-mode")
1073
+ cmd_list.append(vfs_cache_mode)
1074
+ if cache_dir:
1075
+ cmd_list.append("--cache-dir")
1076
+ cmd_list.append(str(cache_dir.absolute()))
1077
+ if transfers is not None:
1078
+ cmd_list.append("--transfers")
1079
+ cmd_list.append(str(transfers))
1080
+ if debug_fuse:
1081
+ cmd_list.append("--debug-fuse")
1082
+ if verbose:
1083
+ cmd_list.append("-vvvv")
1084
+ if other_args:
1085
+ cmd_list += other_args
1086
+ proc = self._launch_process(cmd_list, log=log)
1087
+ mount_read_only = not allow_writes
1088
+ mount: Mount = Mount(
1089
+ src=src_str,
1090
+ mount_path=outdir,
1091
+ process=proc,
1092
+ read_only=mount_read_only,
1093
+ cache_dir=cache_dir,
1094
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1095
+ )
1096
+ return mount
1097
+
1098
+ # Settings optimized for s3.
1099
+ def mount_s3(
1100
+ self,
1101
+ url: str,
1102
+ outdir: Path,
1103
+ allow_writes=False,
1104
+ vfs_cache_mode="full",
1105
+ dir_cache_time: str | None = "1h",
1106
+ attribute_timeout: str | None = "1h",
1107
+ vfs_disk_space_total_size: str | None = "100M",
1108
+ transfers: int | None = 128,
1109
+ modtime_strategy: (
1110
+ ModTimeStrategy | None
1111
+ ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1112
+ vfs_read_chunk_streams: int | None = 16,
1113
+ vfs_read_chunk_size: str | None = "4M",
1114
+ vfs_fast_fingerprint: bool = True,
1115
+ # vfs-refresh
1116
+ vfs_refresh: bool = True,
1117
+ other_args: list[str] | None = None,
1118
+ ) -> Mount:
1119
+ """Mount a remote or directory to a local path.
1120
+
1121
+ Args:
1122
+ src: Remote or directory to mount
1123
+ outdir: Local path to mount to
1124
+ """
1125
+ other_args = other_args or []
1126
+ if modtime_strategy is not None:
1127
+ other_args.append(f"--{modtime_strategy.value}")
1128
+ if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1129
+ transfers is not None and "--transfers" not in other_args
1130
+ ):
1131
+ other_args.append("--transfers")
1132
+ other_args.append(str(transfers))
1133
+ if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1134
+ other_args.append("--dir-cache-time")
1135
+ other_args.append(dir_cache_time)
1136
+ if (
1137
+ vfs_disk_space_total_size is not None
1138
+ and "--vfs-cache-max-size" not in other_args
1139
+ ):
1140
+ other_args.append("--vfs-cache-max-size")
1141
+ other_args.append(vfs_disk_space_total_size)
1142
+ if vfs_refresh and "--vfs-refresh" not in other_args:
1143
+ other_args.append("--vfs-refresh")
1144
+ if attribute_timeout is not None and "--attr-timeout" not in other_args:
1145
+ other_args.append("--attr-timeout")
1146
+ other_args.append(attribute_timeout)
1147
+ if vfs_read_chunk_streams:
1148
+ other_args.append("--vfs-read-chunk-streams")
1149
+ other_args.append(str(vfs_read_chunk_streams))
1150
+ if vfs_read_chunk_size:
1151
+ other_args.append("--vfs-read-chunk-size")
1152
+ other_args.append(vfs_read_chunk_size)
1153
+ if vfs_fast_fingerprint:
1154
+ other_args.append("--vfs-fast-fingerprint")
1155
+
1156
+ other_args = other_args if other_args else None
1157
+ return self.mount(
1158
+ url,
1159
+ outdir,
1160
+ allow_writes=allow_writes,
1161
+ vfs_cache_mode=vfs_cache_mode,
1162
+ other_args=other_args,
1163
+ )
1164
+
1165
+ def serve_webdav(
1166
+ self,
1167
+ src: Remote | Dir | str,
1168
+ user: str,
1169
+ password: str,
1170
+ addr: str = "localhost:2049",
1171
+ allow_other: bool = False,
1172
+ other_args: list[str] | None = None,
1173
+ ) -> Process:
1174
+ """Serve a remote or directory via NFS.
1175
+
1176
+ Args:
1177
+ src: Remote or directory to serve
1178
+ addr: Network address and port to serve on (default: localhost:2049)
1179
+ allow_other: Allow other users to access the share
1180
+
1181
+ Returns:
1182
+ Process: The running webdev server process
1183
+
1184
+ Raises:
1185
+ ValueError: If the NFS server fails to start
1186
+ """
1187
+ src_str = convert_to_str(src)
1188
+ cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1189
+ cmd_list.extend(["--user", user, "--pass", password])
1190
+ if allow_other:
1191
+ cmd_list.append("--allow-other")
1192
+ if other_args:
1193
+ cmd_list += other_args
1194
+ proc = self._launch_process(cmd_list)
1195
+ time.sleep(2) # give it a moment to start
1196
+ if proc.poll() is not None:
1197
+ raise ValueError("NFS serve process failed to start")
1198
+ return proc
1199
+
1200
+ def serve_http(
1201
+ self,
1202
+ src: str,
1203
+ cache_mode: str | None,
1204
+ addr: str | None = None,
1205
+ serve_http_log: Path | None = None,
1206
+ other_args: list[str] | None = None,
1207
+ ) -> HttpServer:
1208
+ """Serve a remote or directory via HTTP.
1209
+
1210
+ Args:
1211
+ src: Remote or directory to serve
1212
+ addr: Network address and port to serve on (default: localhost:8080)
1213
+ """
1214
+ addr = addr or f"localhost:{find_free_port()}"
1215
+ _, subpath = src.split(":", 1) # might not work on local paths.
1216
+ cmd_list: list[str] = [
1217
+ "serve",
1218
+ "http",
1219
+ "--addr",
1220
+ addr,
1221
+ src,
1222
+ "--vfs-disk-space-total-size",
1223
+ "0",
1224
+ "--vfs-read-chunk-size-limit",
1225
+ "512M",
1226
+ ]
1227
+
1228
+ if cache_mode:
1229
+ cmd_list += [
1230
+ "--vfs-cache-mode",
1231
+ cache_mode,
1232
+ ]
1233
+ if serve_http_log:
1234
+ cmd_list += ["--log-file", str(serve_http_log)]
1235
+ cmd_list += ["-vvvv"]
1236
+ if other_args:
1237
+ cmd_list += other_args
1238
+ proc = self._launch_process(cmd_list, log=serve_http_log)
1239
+ time.sleep(2)
1240
+ if proc.poll() is not None:
1241
+ raise ValueError("HTTP serve process failed to start")
1242
+ out: HttpServer = HttpServer(
1243
+ url=f"http://{addr}", subpath=subpath, process=proc
1244
+ )
1245
+ return out
1246
+
1247
+ def config_paths(
1248
+ self, remote: str | None = None, obscure: bool = False, no_obscure: bool = False
1249
+ ) -> list[Path] | Exception:
1250
+ """Show the current configuration.
1251
+
1252
+ Args:
1253
+ remote: Optional remote name to show configuration for
1254
+ obscure: Show obscured passwords
1255
+ no_obscure: Show passwords in plain text
1256
+
1257
+ Returns:
1258
+ Configuration as text or an Exception if an error occurred
1259
+ """
1260
+ cmd_list: list[str] = ["config", "show"]
1261
+
1262
+ if remote is not None:
1263
+ cmd_list.append(remote)
1264
+
1265
+ if obscure:
1266
+ cmd_list.append("--obscure")
1267
+
1268
+ if no_obscure:
1269
+ cmd_list.append("--no-obscure")
1270
+
1271
+ try:
1272
+ cp = self._run(cmd_list, capture=True, check=True)
1273
+ stdout: str | bytes = cp.stdout
1274
+ if isinstance(stdout, bytes):
1275
+ stdout = stdout.decode("utf-8")
1276
+ out = _parse_paths(stdout)
1277
+ return out
1278
+ except subprocess.CalledProcessError as e:
1279
+ return e
1280
+
1281
+ def size_files(
1282
+ self,
1283
+ src: str,
1284
+ files: list[str],
1285
+ fast_list: bool = False, # Recommend that this is False
1286
+ other_args: list[str] | None = None,
1287
+ check: bool | None = False,
1288
+ verbose: bool | None = None,
1289
+ ) -> SizeResult | Exception:
1290
+ """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1291
+ verbose = get_verbose(verbose)
1292
+ check = get_check(check)
1293
+ if len(files) < 2:
1294
+ tmp = self.size_file(files[0])
1295
+ if isinstance(tmp, Exception):
1296
+ return tmp
1297
+ assert isinstance(tmp, SizeSuffix)
1298
+ return SizeResult(
1299
+ prefix=src, total_size=tmp.as_int(), file_sizes={files[0]: tmp.as_int()}
1300
+ )
1301
+ if fast_list or (other_args and "--fast-list" in other_args):
1302
+ warnings.warn(
1303
+ "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1304
+ )
1305
+ files = list(files)
1306
+ all_files: list[File] = []
1307
+ # prefix, files = group_under_one_prefix(src, files)
1308
+ cmd = ["lsjson", src, "--files-only", "-R"]
1309
+ with TemporaryDirectory() as tmpdir:
1310
+ # print("files: " + ",".join(files))
1311
+ include_files_txt = Path(tmpdir) / "include_files.txt"
1312
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
1313
+ cmd += ["--files-from", str(include_files_txt)]
1314
+ if fast_list:
1315
+ cmd.append("--fast-list")
1316
+ if other_args:
1317
+ cmd += other_args
1318
+ cp = self._run(cmd, check=check)
1319
+
1320
+ if cp.returncode != 0:
1321
+ if check:
1322
+ raise ValueError(f"Error getting file sizes: {cp.stderr}")
1323
+ else:
1324
+ warnings.warn(f"Error getting file sizes: {cp.stderr}")
1325
+ stdout = cp.stdout
1326
+ pieces = src.split(":", 1)
1327
+ remote_name = pieces[0]
1328
+ parent_path: str | None
1329
+ if len(pieces) > 1:
1330
+ parent_path = pieces[1]
1331
+ else:
1332
+ parent_path = None
1333
+ remote = Remote(name=remote_name, rclone=self)
1334
+ paths: list[RPath] = RPath.from_json_str(
1335
+ stdout, remote, parent_path=parent_path
1336
+ )
1337
+ # print(paths)
1338
+ all_files += [File(p) for p in paths]
1339
+ file_sizes: dict[str, int] = {}
1340
+ f: File
1341
+ for f in all_files:
1342
+ p = f.to_string(include_remote=True)
1343
+ if p in file_sizes:
1344
+ warnings.warn(f"Duplicate file found: {p}")
1345
+ continue
1346
+ size = f.size
1347
+ if size == 0:
1348
+ warnings.warn(f"File size is 0: {p}")
1349
+ file_sizes[p] = f.size
1350
+ total_size = sum(file_sizes.values())
1351
+ file_sizes_path_corrected: dict[str, int] = {}
1352
+ for path, size in file_sizes.items():
1353
+ # remove the prefix
1354
+ path_path = Path(path)
1355
+ path_str = path_path.relative_to(src).as_posix()
1356
+ file_sizes_path_corrected[path_str] = size
1357
+ out: SizeResult = SizeResult(
1358
+ prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1359
+ )
1360
+ return out