rclone-api 1.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. rclone_api/__init__.py +951 -0
  2. rclone_api/assets/example.txt +1 -0
  3. rclone_api/cli.py +15 -0
  4. rclone_api/cmd/analyze.py +51 -0
  5. rclone_api/cmd/copy_large_s3.py +111 -0
  6. rclone_api/cmd/copy_large_s3_finish.py +81 -0
  7. rclone_api/cmd/list_files.py +27 -0
  8. rclone_api/cmd/save_to_db.py +77 -0
  9. rclone_api/completed_process.py +60 -0
  10. rclone_api/config.py +87 -0
  11. rclone_api/convert.py +31 -0
  12. rclone_api/db/__init__.py +3 -0
  13. rclone_api/db/db.py +277 -0
  14. rclone_api/db/models.py +57 -0
  15. rclone_api/deprecated.py +24 -0
  16. rclone_api/detail/copy_file_parts_resumable.py +42 -0
  17. rclone_api/detail/walk.py +116 -0
  18. rclone_api/diff.py +164 -0
  19. rclone_api/dir.py +113 -0
  20. rclone_api/dir_listing.py +66 -0
  21. rclone_api/exec.py +40 -0
  22. rclone_api/experimental/flags.py +89 -0
  23. rclone_api/experimental/flags_base.py +58 -0
  24. rclone_api/file.py +205 -0
  25. rclone_api/file_item.py +68 -0
  26. rclone_api/file_part.py +198 -0
  27. rclone_api/file_stream.py +52 -0
  28. rclone_api/filelist.py +30 -0
  29. rclone_api/group_files.py +256 -0
  30. rclone_api/http_server.py +244 -0
  31. rclone_api/install.py +95 -0
  32. rclone_api/log.py +44 -0
  33. rclone_api/mount.py +55 -0
  34. rclone_api/mount_util.py +247 -0
  35. rclone_api/process.py +187 -0
  36. rclone_api/rclone_impl.py +1285 -0
  37. rclone_api/remote.py +21 -0
  38. rclone_api/rpath.py +102 -0
  39. rclone_api/s3/api.py +109 -0
  40. rclone_api/s3/basic_ops.py +61 -0
  41. rclone_api/s3/chunk_task.py +187 -0
  42. rclone_api/s3/create.py +107 -0
  43. rclone_api/s3/multipart/file_info.py +7 -0
  44. rclone_api/s3/multipart/finished_piece.py +69 -0
  45. rclone_api/s3/multipart/info_json.py +239 -0
  46. rclone_api/s3/multipart/merge_state.py +147 -0
  47. rclone_api/s3/multipart/upload_info.py +62 -0
  48. rclone_api/s3/multipart/upload_parts_inline.py +356 -0
  49. rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
  50. rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
  51. rclone_api/s3/multipart/upload_state.py +165 -0
  52. rclone_api/s3/types.py +67 -0
  53. rclone_api/scan_missing_folders.py +153 -0
  54. rclone_api/types.py +402 -0
  55. rclone_api/util.py +324 -0
  56. rclone_api-1.5.8.dist-info/LICENSE +21 -0
  57. rclone_api-1.5.8.dist-info/METADATA +969 -0
  58. rclone_api-1.5.8.dist-info/RECORD +61 -0
  59. rclone_api-1.5.8.dist-info/WHEEL +5 -0
  60. rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
  61. rclone_api-1.5.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1285 @@
1
+ """
2
+ Unit test file.
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import subprocess
8
+ import time
9
+ import warnings
10
+ from concurrent.futures import Future, ThreadPoolExecutor
11
+ from datetime import datetime
12
+ from fnmatch import fnmatch
13
+ from pathlib import Path
14
+ from tempfile import TemporaryDirectory
15
+ from typing import Generator
16
+
17
+ from rclone_api import Dir
18
+ from rclone_api.completed_process import CompletedProcess
19
+ from rclone_api.config import Config, Parsed, Section
20
+ from rclone_api.convert import convert_to_filestr_list, convert_to_str
21
+ from rclone_api.deprecated import deprecated
22
+ from rclone_api.detail.walk import walk
23
+ from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
24
+ from rclone_api.dir_listing import DirListing
25
+ from rclone_api.exec import RcloneExec
26
+ from rclone_api.file import File
27
+ from rclone_api.file_stream import FilesStream
28
+ from rclone_api.group_files import group_files
29
+ from rclone_api.http_server import HttpServer
30
+ from rclone_api.mount import Mount
31
+ from rclone_api.process import Process
32
+ from rclone_api.remote import Remote
33
+ from rclone_api.rpath import RPath
34
+ from rclone_api.s3.create import S3Credentials
35
+ from rclone_api.s3.types import (
36
+ S3Provider,
37
+ )
38
+ from rclone_api.types import (
39
+ ListingOption,
40
+ ModTimeStrategy,
41
+ Order,
42
+ PartInfo,
43
+ SizeResult,
44
+ SizeSuffix,
45
+ )
46
+ from rclone_api.util import (
47
+ find_free_port,
48
+ get_check,
49
+ get_rclone_exe,
50
+ get_verbose,
51
+ to_path,
52
+ )
53
+
54
+
55
+ def rclone_verbose(verbose: bool | None) -> bool:
56
+ if verbose is not None:
57
+ os.environ["RCLONE_API_VERBOSE"] = "1" if verbose else "0"
58
+ return bool(int(os.getenv("RCLONE_API_VERBOSE", "0")))
59
+
60
+
61
+ def _to_rclone_conf(config: Config | Path) -> Config:
62
+ if isinstance(config, Path):
63
+ content = config.read_text(encoding="utf-8")
64
+ return Config(content)
65
+ else:
66
+ return config
67
+
68
+
69
+ class RcloneImpl:
70
+ def __init__(
71
+ self, rclone_conf: Path | Config, rclone_exe: Path | None = None
72
+ ) -> None:
73
+ if isinstance(rclone_conf, Path):
74
+ if not rclone_conf.exists():
75
+ raise ValueError(f"Rclone config file not found: {rclone_conf}")
76
+ self._exec = RcloneExec(rclone_conf, get_rclone_exe(rclone_exe))
77
+ self.config: Config = _to_rclone_conf(rclone_conf)
78
+
79
+ def _run(
80
+ self, cmd: list[str], check: bool = False, capture: bool | Path | None = None
81
+ ) -> subprocess.CompletedProcess:
82
+ return self._exec.execute(cmd, check=check, capture=capture)
83
+
84
+ def _launch_process(
85
+ self, cmd: list[str], capture: bool | None = None, log: Path | None = None
86
+ ) -> Process:
87
+ return self._exec.launch_process(cmd, capture=capture, log=log)
88
+
89
+ def _get_tmp_mount_dir(self) -> Path:
90
+ return Path("tmp_mnts")
91
+
92
+ def _get_cache_dir(self) -> Path:
93
+ return Path("cache")
94
+
95
+ def webgui(self, other_args: list[str] | None = None) -> Process:
96
+ """Launch the Rclone web GUI."""
97
+ cmd = ["rcd", "--rc-web-gui"]
98
+ if other_args:
99
+ cmd += other_args
100
+ return self._launch_process(cmd, capture=False)
101
+
102
+ def launch_server(
103
+ self,
104
+ addr: str,
105
+ user: str | None = None,
106
+ password: str | None = None,
107
+ other_args: list[str] | None = None,
108
+ ) -> Process:
109
+ """Launch the Rclone server so it can receive commands"""
110
+ cmd = ["rcd"]
111
+ if addr is not None:
112
+ cmd += ["--rc-addr", addr]
113
+ if user is not None:
114
+ cmd += ["--rc-user", user]
115
+ if password is not None:
116
+ cmd += ["--rc-pass", password]
117
+ if other_args:
118
+ cmd += other_args
119
+ out = self._launch_process(cmd, capture=False)
120
+ time.sleep(1) # Give it some time to launch
121
+ return out
122
+
123
+ def remote_control(
124
+ self,
125
+ addr: str,
126
+ user: str | None = None,
127
+ password: str | None = None,
128
+ capture: bool | None = None,
129
+ other_args: list[str] | None = None,
130
+ ) -> CompletedProcess:
131
+ cmd = ["rc"]
132
+ if addr:
133
+ cmd += ["--rc-addr", addr]
134
+ if user is not None:
135
+ cmd += ["--rc-user", user]
136
+ if password is not None:
137
+ cmd += ["--rc-pass", password]
138
+ if other_args:
139
+ cmd += other_args
140
+ cp = self._run(cmd, capture=capture)
141
+ return CompletedProcess.from_subprocess(cp)
142
+
143
+ def obscure(self, password: str) -> str:
144
+ """Obscure a password for use in rclone config files."""
145
+ cmd_list: list[str] = ["obscure", password]
146
+ cp = self._run(cmd_list)
147
+ return cp.stdout.strip()
148
+
149
+ def ls_stream(
150
+ self,
151
+ src: str,
152
+ max_depth: int = -1,
153
+ fast_list: bool = False,
154
+ ) -> FilesStream:
155
+ """
156
+ List files in the given path
157
+
158
+ Args:
159
+ src: Remote path to list
160
+ max_depth: Maximum recursion depth (-1 for unlimited)
161
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket, or it's small)
162
+ """
163
+ cmd = ["lsjson", src, "--files-only"]
164
+ recurse = max_depth < 0 or max_depth > 1
165
+ if recurse:
166
+ cmd.append("-R")
167
+ if max_depth > 1:
168
+ cmd += ["--max-depth", str(max_depth)]
169
+ if fast_list:
170
+ cmd.append("--fast-list")
171
+ streamer = FilesStream(src, self._launch_process(cmd, capture=True))
172
+ return streamer
173
+
174
+ def save_to_db(
175
+ self,
176
+ src: str,
177
+ db_url: str,
178
+ max_depth: int = -1,
179
+ fast_list: bool = False,
180
+ ) -> None:
181
+ """
182
+ Save files to a database (sqlite, mysql, postgres)
183
+
184
+ Args:
185
+ src: Remote path to list, this will be used to populate an entire table, so always use the root-most path.
186
+ db_url: Database URL, like sqlite:///data.db or mysql://user:pass@localhost/db or postgres://user:pass@localhost/db
187
+ max_depth: Maximum depth to traverse (-1 for unlimited)
188
+ fast_list: Use fast list (only use when getting THE entire data repository from the root/bucket)
189
+
190
+ """
191
+ from rclone_api.db import DB
192
+
193
+ db = DB(db_url)
194
+ with self.ls_stream(src, max_depth, fast_list) as stream:
195
+ for page in stream.files_paged(page_size=10000):
196
+ db.add_files(page)
197
+
198
+ def ls(
199
+ self,
200
+ src: Dir | Remote | str | None = None,
201
+ max_depth: int | None = None,
202
+ glob: str | None = None,
203
+ order: Order = Order.NORMAL,
204
+ listing_option: ListingOption = ListingOption.ALL,
205
+ ) -> DirListing:
206
+ """List files in the given path.
207
+
208
+ Args:
209
+ src: Remote path or Remote object to list
210
+ max_depth: Maximum recursion depth (0 means no recursion)
211
+
212
+ Returns:
213
+ List of File objects found at the path
214
+ """
215
+
216
+ if src is None:
217
+ # list remotes instead
218
+ list_remotes: list[Remote] = self.listremotes()
219
+ dirs: list[Dir] = [Dir(remote) for remote in list_remotes]
220
+ for d in dirs:
221
+ d.path.path = ""
222
+ rpaths = [d.path for d in dirs]
223
+ return DirListing(rpaths)
224
+
225
+ if isinstance(src, str):
226
+ src = Dir(
227
+ to_path(src, self)
228
+ ) # assume it's a directory if ls is being called.
229
+
230
+ cmd = ["lsjson"]
231
+ if max_depth is not None:
232
+ if max_depth < 0:
233
+ cmd.append("--recursive")
234
+ if max_depth > 0:
235
+ cmd.append("--max-depth")
236
+ cmd.append(str(max_depth))
237
+ if listing_option != ListingOption.ALL:
238
+ cmd.append(f"--{listing_option.value}")
239
+
240
+ cmd.append(str(src))
241
+ remote = src.remote if isinstance(src, Dir) else src
242
+ assert isinstance(remote, Remote)
243
+
244
+ cp = self._run(cmd, check=True)
245
+ text = cp.stdout
246
+ parent_path: str | None = None
247
+ if isinstance(src, Dir):
248
+ parent_path = src.path.path
249
+ paths: list[RPath] = RPath.from_json_str(text, remote, parent_path=parent_path)
250
+ # print(parent_path)
251
+ for o in paths:
252
+ o.set_rclone(self)
253
+
254
+ # do we have a glob pattern?
255
+ if glob is not None:
256
+ paths = [p for p in paths if fnmatch(p.path, glob)]
257
+
258
+ if order == Order.REVERSE:
259
+ paths.reverse()
260
+ elif order == Order.RANDOM:
261
+ random.shuffle(paths)
262
+ return DirListing(paths)
263
+
264
+ def print(self, src: str) -> Exception | None:
265
+ """Print the contents of a file."""
266
+ try:
267
+ text_or_err = self.read_text(src)
268
+ if isinstance(text_or_err, Exception):
269
+ return text_or_err
270
+ print(text_or_err)
271
+ except Exception as e:
272
+ return e
273
+ return None
274
+
275
+ def stat(self, src: str) -> File | Exception:
276
+ """Get the status of a file or directory."""
277
+ dirlist: DirListing = self.ls(src)
278
+ if len(dirlist.files) == 0:
279
+ # raise FileNotFoundError(f"File not found: {src}")
280
+ return FileNotFoundError(f"File not found: {src}")
281
+ try:
282
+ file: File = dirlist.files[0]
283
+ return file
284
+ except Exception as e:
285
+ return e
286
+
287
+ def modtime(self, src: str) -> str | Exception:
288
+ """Get the modification time of a file or directory."""
289
+ try:
290
+ file: File | Exception = self.stat(src)
291
+ if isinstance(file, Exception):
292
+ return file
293
+ return file.mod_time()
294
+ except Exception as e:
295
+ return e
296
+
297
+ def modtime_dt(self, src: str) -> datetime | Exception:
298
+ """Get the modification time of a file or directory."""
299
+ modtime: str | Exception = self.modtime(src)
300
+ if isinstance(modtime, Exception):
301
+ return modtime
302
+ return datetime.fromisoformat(modtime)
303
+
304
+ def listremotes(self) -> list[Remote]:
305
+ cmd = ["listremotes"]
306
+ cp = self._run(cmd)
307
+ text: str = cp.stdout
308
+ tmp = text.splitlines()
309
+ tmp = [t.strip() for t in tmp]
310
+ # strip out ":" from the end
311
+ tmp = [t.replace(":", "") for t in tmp]
312
+ out = [Remote(name=t, rclone=self) for t in tmp]
313
+ return out
314
+
315
+ def diff(
316
+ self,
317
+ src: str,
318
+ dst: str,
319
+ min_size: (
320
+ str | None
321
+ ) = None, # e. g. "1MB" - see rclone documentation: https://rclone.org/commands/rclone_check/
322
+ max_size: (
323
+ str | None
324
+ ) = None, # e. g. "1GB" - see rclone documentation: https://rclone.org/commands/rclone_check/
325
+ diff_option: DiffOption = DiffOption.COMBINED,
326
+ fast_list: bool = True,
327
+ size_only: bool | None = None,
328
+ checkers: int | None = None,
329
+ other_args: list[str] | None = None,
330
+ ) -> Generator[DiffItem, None, None]:
331
+ """Be extra careful with the src and dst values. If you are off by one
332
+ parent directory, you will get a huge amount of false diffs."""
333
+ other_args = other_args or []
334
+ if checkers is None or checkers < 1:
335
+ checkers = 1000
336
+ cmd = [
337
+ "check",
338
+ src,
339
+ dst,
340
+ "--checkers",
341
+ str(checkers),
342
+ "--log-level",
343
+ "INFO",
344
+ f"--{diff_option.value}",
345
+ "-",
346
+ ]
347
+ if size_only is None:
348
+ size_only = diff_option in [
349
+ DiffOption.MISSING_ON_DST,
350
+ DiffOption.MISSING_ON_SRC,
351
+ ]
352
+ if size_only:
353
+ cmd += ["--size-only"]
354
+ if fast_list:
355
+ cmd += ["--fast-list"]
356
+ if min_size:
357
+ cmd += ["--min-size", min_size]
358
+ if max_size:
359
+ cmd += ["--max-size", max_size]
360
+ if diff_option == DiffOption.MISSING_ON_DST:
361
+ cmd += ["--one-way"]
362
+ if other_args:
363
+ cmd += other_args
364
+ proc = self._launch_process(cmd, capture=True)
365
+ item: DiffItem
366
+ for item in diff_stream_from_running_process(
367
+ running_process=proc, src_slug=src, dst_slug=dst, diff_option=diff_option
368
+ ):
369
+ if item is None:
370
+ break
371
+ yield item
372
+
373
+ def walk(
374
+ self,
375
+ src: Dir | Remote | str,
376
+ max_depth: int = -1,
377
+ breadth_first: bool = True,
378
+ order: Order = Order.NORMAL,
379
+ ) -> Generator[DirListing, None, None]:
380
+ """Walk through the given path recursively.
381
+
382
+ Args:
383
+ src: Remote path or Remote object to walk through
384
+ max_depth: Maximum depth to traverse (-1 for unlimited)
385
+
386
+ Yields:
387
+ DirListing: Directory listing for each directory encountered
388
+ """
389
+ dir_obj: Dir
390
+ if isinstance(src, Dir):
391
+ # Create a Remote object for the path
392
+ remote = src.remote
393
+ rpath = RPath(
394
+ remote=remote,
395
+ path=src.path.path,
396
+ name=src.path.name,
397
+ size=0,
398
+ mime_type="inode/directory",
399
+ mod_time="",
400
+ is_dir=True,
401
+ )
402
+ rpath.set_rclone(self)
403
+ dir_obj = Dir(rpath)
404
+ elif isinstance(src, str):
405
+ dir_obj = Dir(to_path(src, self))
406
+ elif isinstance(src, Remote):
407
+ dir_obj = Dir(src)
408
+ else:
409
+ dir_obj = Dir(src) # shut up pyright
410
+ assert f"Invalid type for path: {type(src)}"
411
+
412
+ yield from walk(
413
+ dir_obj, max_depth=max_depth, breadth_first=breadth_first, order=order
414
+ )
415
+
416
+ def scan_missing_folders(
417
+ self,
418
+ src: Dir | Remote | str,
419
+ dst: Dir | Remote | str,
420
+ max_depth: int = -1,
421
+ order: Order = Order.NORMAL,
422
+ ) -> Generator[Dir, None, None]:
423
+ """Walk through the given path recursively.
424
+
425
+ WORK IN PROGRESS!!
426
+
427
+ Args:
428
+ src: Source directory or Remote to walk through
429
+ dst: Destination directory or Remote to walk through
430
+ max_depth: Maximum depth to traverse (-1 for unlimited)
431
+
432
+ Yields:
433
+ DirListing: Directory listing for each directory encountered
434
+ """
435
+ from rclone_api.scan_missing_folders import scan_missing_folders
436
+
437
+ src_dir = Dir(to_path(src, self))
438
+ dst_dir = Dir(to_path(dst, self))
439
+ yield from scan_missing_folders(
440
+ src=src_dir, dst=dst_dir, max_depth=max_depth, order=order
441
+ )
442
+
443
+ def cleanup(
444
+ self, src: str, other_args: list[str] | None = None
445
+ ) -> CompletedProcess:
446
+ """Cleanup any resources used by the Rclone instance."""
447
+ # rclone cleanup remote:path [flags]
448
+ cmd = ["cleanup", src]
449
+ if other_args:
450
+ cmd += other_args
451
+ out = self._run(cmd)
452
+ return CompletedProcess.from_subprocess(out)
453
+
454
+ def get_verbose(self) -> bool:
455
+ return get_verbose(None)
456
+
457
+ def copy_to(
458
+ self,
459
+ src: File | str,
460
+ dst: File | str,
461
+ check: bool | None = None,
462
+ verbose: bool | None = None,
463
+ other_args: list[str] | None = None,
464
+ ) -> CompletedProcess:
465
+ """Copy one file from source to destination.
466
+
467
+ Warning - slow.
468
+
469
+ """
470
+ check = get_check(check)
471
+ verbose = get_verbose(verbose)
472
+ src = src if isinstance(src, str) else str(src.path)
473
+ dst = dst if isinstance(dst, str) else str(dst.path)
474
+ cmd_list: list[str] = ["copyto", src, dst, "--s3-no-check-bucket"]
475
+ if other_args is not None:
476
+ cmd_list += other_args
477
+ cp = self._run(cmd_list, check=check)
478
+ return CompletedProcess.from_subprocess(cp)
479
+
480
+ def copy_files(
481
+ self,
482
+ src: str,
483
+ dst: str,
484
+ files: list[str] | Path,
485
+ check: bool | None = None,
486
+ max_backlog: int | None = None,
487
+ verbose: bool | None = None,
488
+ checkers: int | None = None,
489
+ transfers: int | None = None,
490
+ low_level_retries: int | None = None,
491
+ retries: int | None = None,
492
+ retries_sleep: str | None = None,
493
+ metadata: bool | None = None,
494
+ timeout: str | None = None,
495
+ max_partition_workers: int | None = None,
496
+ multi_thread_streams: int | None = None,
497
+ other_args: list[str] | None = None,
498
+ ) -> list[CompletedProcess]:
499
+ """Copy multiple files from source to destination.
500
+
501
+ Args:
502
+ payload: Dictionary of source and destination file paths
503
+ """
504
+ check = get_check(check)
505
+ max_partition_workers = max_partition_workers or 1
506
+ low_level_retries = low_level_retries or 10
507
+ retries = retries or 3
508
+ other_args = other_args or []
509
+ other_args.append("--s3-no-check-bucket")
510
+ checkers = checkers or 1000
511
+ transfers = transfers or 32
512
+ verbose = get_verbose(verbose)
513
+ payload: list[str] = (
514
+ files
515
+ if isinstance(files, list)
516
+ else [f.strip() for f in files.read_text().splitlines() if f.strip()]
517
+ )
518
+ if len(payload) == 0:
519
+ return []
520
+
521
+ for p in payload:
522
+ if ":" in p:
523
+ raise ValueError(
524
+ f"Invalid file path, contains a remote, which is not allowed for copy_files: {p}"
525
+ )
526
+
527
+ using_fast_list = "--fast-list" in other_args
528
+ if using_fast_list:
529
+ warnings.warn(
530
+ "It's not recommended to use --fast-list with copy_files as this will perform poorly on large repositories since the entire repository has to be scanned."
531
+ )
532
+
533
+ if max_partition_workers > 1:
534
+ datalists: dict[str, list[str]] = group_files(
535
+ payload, fully_qualified=False
536
+ )
537
+ else:
538
+ datalists = {"": payload}
539
+ # out: subprocess.CompletedProcess | None = None
540
+ out: list[CompletedProcess] = []
541
+
542
+ futures: list[Future] = []
543
+
544
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
545
+ for common_prefix, files in datalists.items():
546
+
547
+ def _task(
548
+ files: list[str] | Path = files,
549
+ ) -> subprocess.CompletedProcess:
550
+ with TemporaryDirectory() as tmpdir:
551
+ filelist: list[str] = []
552
+ filepath: Path
553
+ if isinstance(files, list):
554
+ include_files_txt = Path(tmpdir) / "include_files.txt"
555
+ include_files_txt.write_text(
556
+ "\n".join(files), encoding="utf-8"
557
+ )
558
+ filelist = list(files)
559
+ filepath = Path(include_files_txt)
560
+ elif isinstance(files, Path):
561
+ filelist = [
562
+ f.strip()
563
+ for f in files.read_text().splitlines()
564
+ if f.strip()
565
+ ]
566
+ filepath = files
567
+ if common_prefix:
568
+ src_path = f"{src}/{common_prefix}"
569
+ dst_path = f"{dst}/{common_prefix}"
570
+ else:
571
+ src_path = src
572
+ dst_path = dst
573
+
574
+ if verbose:
575
+ nfiles = len(filelist)
576
+ files_fqdn = [f" {src_path}/{f}" for f in filelist]
577
+ print(f"Copying {nfiles} files:")
578
+ chunk_size = 100
579
+ for i in range(0, nfiles, chunk_size):
580
+ chunk = files_fqdn[i : i + chunk_size]
581
+ files_str = "\n".join(chunk)
582
+ print(f"{files_str}")
583
+ cmd_list: list[str] = [
584
+ "copy",
585
+ src_path,
586
+ dst_path,
587
+ "--files-from",
588
+ str(filepath),
589
+ "--checkers",
590
+ str(checkers),
591
+ "--transfers",
592
+ str(transfers),
593
+ "--low-level-retries",
594
+ str(low_level_retries),
595
+ "--retries",
596
+ str(retries),
597
+ ]
598
+ if metadata:
599
+ cmd_list.append("--metadata")
600
+ if retries_sleep is not None:
601
+ cmd_list += ["--retries-sleep", retries_sleep]
602
+ if timeout is not None:
603
+ cmd_list += ["--timeout", timeout]
604
+ if max_backlog is not None:
605
+ cmd_list += ["--max-backlog", str(max_backlog)]
606
+ if multi_thread_streams is not None:
607
+ cmd_list += [
608
+ "--multi-thread-streams",
609
+ str(multi_thread_streams),
610
+ ]
611
+ if verbose:
612
+ if not any(["-v" in x for x in other_args]):
613
+ cmd_list.append("-vvvv")
614
+ if not any(["--progress" in x for x in other_args]):
615
+ cmd_list.append("--progress")
616
+ if other_args:
617
+ cmd_list += other_args
618
+ out = self._run(cmd_list, capture=not verbose)
619
+ return out
620
+
621
+ fut: Future = executor.submit(_task)
622
+ futures.append(fut)
623
+ for fut in futures:
624
+ cp: subprocess.CompletedProcess = fut.result()
625
+ assert cp is not None
626
+ out.append(CompletedProcess.from_subprocess(cp))
627
+ if cp.returncode != 0:
628
+ if check:
629
+ raise ValueError(f"Error deleting files: {cp.stderr}")
630
+ else:
631
+ warnings.warn(f"Error deleting files: {cp.stderr}")
632
+ return out
633
+
634
+ def copy(
635
+ self,
636
+ src: Dir | str,
637
+ dst: Dir | str,
638
+ check: bool | None = None,
639
+ transfers: int | None = None,
640
+ checkers: int | None = None,
641
+ multi_thread_streams: int | None = None,
642
+ low_level_retries: int | None = None,
643
+ retries: int | None = None,
644
+ other_args: list[str] | None = None,
645
+ ) -> CompletedProcess:
646
+ """Copy files from source to destination.
647
+
648
+ Args:
649
+ src: Source directory
650
+ dst: Destination directory
651
+ """
652
+ # src_dir = src.path.path
653
+ # dst_dir = dst.path.path
654
+ src_dir = convert_to_str(src)
655
+ dst_dir = convert_to_str(dst)
656
+ check = get_check(check)
657
+ checkers = checkers or 1000
658
+ transfers = transfers or 32
659
+ low_level_retries = low_level_retries or 10
660
+ retries = retries or 3
661
+ cmd_list: list[str] = ["copy", src_dir, dst_dir]
662
+ cmd_list += ["--checkers", str(checkers)]
663
+ cmd_list += ["--transfers", str(transfers)]
664
+ cmd_list += ["--low-level-retries", str(low_level_retries)]
665
+ cmd_list.append("--s3-no-check-bucket")
666
+ if multi_thread_streams is not None:
667
+ cmd_list += ["--multi-thread-streams", str(multi_thread_streams)]
668
+ if other_args:
669
+ cmd_list += other_args
670
+ cp = self._run(cmd_list, check=check, capture=False)
671
+ return CompletedProcess.from_subprocess(cp)
672
+
673
+ def purge(self, src: Dir | str) -> CompletedProcess:
674
+ """Purge a directory"""
675
+ # path should always be a string
676
+ src = src if isinstance(src, str) else str(src.path)
677
+ cmd_list: list[str] = ["purge", str(src)]
678
+ cp = self._run(cmd_list)
679
+ return CompletedProcess.from_subprocess(cp)
680
+
681
+ def delete_files(
682
+ self,
683
+ files: str | File | list[str] | list[File],
684
+ check: bool | None = None,
685
+ rmdirs=False,
686
+ verbose: bool | None = None,
687
+ max_partition_workers: int | None = None,
688
+ other_args: list[str] | None = None,
689
+ ) -> CompletedProcess:
690
+ """Delete a directory"""
691
+ check = get_check(check)
692
+ verbose = get_verbose(verbose)
693
+ payload: list[str] = convert_to_filestr_list(files)
694
+ if len(payload) == 0:
695
+ if verbose:
696
+ print("No files to delete")
697
+ cp = subprocess.CompletedProcess(
698
+ args=["rclone", "delete", "--files-from", "[]"],
699
+ returncode=0,
700
+ stdout="",
701
+ stderr="",
702
+ )
703
+ return CompletedProcess.from_subprocess(cp)
704
+
705
+ datalists: dict[str, list[str]] = group_files(payload)
706
+ completed_processes: list[subprocess.CompletedProcess] = []
707
+
708
+ futures: list[Future] = []
709
+
710
+ with ThreadPoolExecutor(max_workers=max_partition_workers) as executor:
711
+
712
+ for remote, files in datalists.items():
713
+
714
+ def _task(
715
+ files=files, check=check, remote=remote
716
+ ) -> subprocess.CompletedProcess:
717
+ with TemporaryDirectory() as tmpdir:
718
+ include_files_txt = Path(tmpdir) / "include_files.txt"
719
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
720
+
721
+ # print(include_files_txt)
722
+ cmd_list: list[str] = [
723
+ "delete",
724
+ remote,
725
+ "--files-from",
726
+ str(include_files_txt),
727
+ "--checkers",
728
+ "1000",
729
+ "--transfers",
730
+ "1000",
731
+ ]
732
+ if verbose:
733
+ cmd_list.append("-vvvv")
734
+ if rmdirs:
735
+ cmd_list.append("--rmdirs")
736
+ if other_args:
737
+ cmd_list += other_args
738
+ out = self._run(cmd_list, check=check)
739
+ if out.returncode != 0:
740
+ if check:
741
+ completed_processes.append(out)
742
+ raise ValueError(f"Error deleting files: {out}")
743
+ else:
744
+ warnings.warn(f"Error deleting files: {out}")
745
+ return out
746
+
747
+ fut: Future = executor.submit(_task)
748
+ futures.append(fut)
749
+
750
+ for fut in futures:
751
+ out = fut.result()
752
+ assert out is not None
753
+ completed_processes.append(out)
754
+
755
+ return CompletedProcess(completed_processes)
756
+
757
+ @deprecated("delete_files")
758
+ def deletefiles(
759
+ self, files: str | File | list[str] | list[File]
760
+ ) -> CompletedProcess:
761
+ out = self.delete_files(files)
762
+ return out
763
+
764
+ def exists(self, src: Dir | Remote | str | File) -> bool:
765
+ """Check if a file or directory exists."""
766
+ arg: str = convert_to_str(src)
767
+ assert isinstance(arg, str)
768
+ try:
769
+ dir_listing = self.ls(arg)
770
+ # print(dir_listing)
771
+ return len(dir_listing.dirs) > 0 or len(dir_listing.files) > 0
772
+ except subprocess.CalledProcessError:
773
+ return False
774
+
775
+ def is_synced(self, src: str | Dir, dst: str | Dir) -> bool:
776
+ """Check if two directories are in sync."""
777
+ src = convert_to_str(src)
778
+ dst = convert_to_str(dst)
779
+ cmd_list: list[str] = ["check", str(src), str(dst)]
780
+ try:
781
+ self._run(cmd_list, check=True)
782
+ return True
783
+ except subprocess.CalledProcessError:
784
+ return False
785
+
786
+ def copy_file_s3_resumable(
787
+ self,
788
+ src: str, # src:/Bucket/path/myfile.large.zst
789
+ dst: str, # dst:/Bucket/path/myfile.large
790
+ part_infos: list[PartInfo] | None = None,
791
+ upload_threads: int = 8,
792
+ merge_threads: int = 4,
793
+ ) -> Exception | None:
794
+ """Copy parts of a file from source to destination."""
795
+ from rclone_api.detail.copy_file_parts_resumable import (
796
+ copy_file_parts_resumable,
797
+ )
798
+
799
+ if dst.endswith("/"):
800
+ dst = dst[:-1]
801
+ dst_dir = f"{dst}-parts"
802
+
803
+ out = copy_file_parts_resumable(
804
+ self=self,
805
+ src=src,
806
+ dst_dir=dst_dir,
807
+ part_infos=part_infos,
808
+ upload_threads=upload_threads,
809
+ merge_threads=merge_threads,
810
+ )
811
+ return out
812
+
813
+ def write_text(
814
+ self,
815
+ dst: str,
816
+ text: str,
817
+ ) -> Exception | None:
818
+ """Write text to a file."""
819
+ data = text.encode("utf-8")
820
+ return self.write_bytes(dst=dst, data=data)
821
+
822
+ def write_bytes(
823
+ self,
824
+ dst: str,
825
+ data: bytes,
826
+ ) -> Exception | None:
827
+ """Write bytes to a file."""
828
+ with TemporaryDirectory() as tmpdir:
829
+ tmpfile = Path(tmpdir) / "file.bin"
830
+ tmpfile.write_bytes(data)
831
+ completed_proc = self.copy_to(str(tmpfile), dst, check=True)
832
+ if completed_proc.returncode != 0:
833
+ return Exception(f"Failed to write bytes to {dst}", completed_proc)
834
+ return None
835
+
836
+ def read_bytes(self, src: str) -> bytes | Exception:
837
+ """Read bytes from a file."""
838
+ with TemporaryDirectory() as tmpdir:
839
+ tmpfile = Path(tmpdir) / "file.bin"
840
+ completed_proc = self.copy_to(src, str(tmpfile), check=True)
841
+ if completed_proc.returncode != 0:
842
+ return Exception(f"Failed to read bytes from {src}", completed_proc)
843
+
844
+ if not tmpfile.exists():
845
+ return Exception(f"Failed to read bytes from {src}, file not found")
846
+ try:
847
+ return tmpfile.read_bytes()
848
+ except Exception as e:
849
+ return Exception(f"Failed to read bytes from {src}", e)
850
+
851
+ def read_text(self, src: str) -> str | Exception:
852
+ """Read text from a file."""
853
+ data = self.read_bytes(src)
854
+ if isinstance(data, Exception):
855
+ return data
856
+ try:
857
+ return data.decode("utf-8")
858
+ except UnicodeDecodeError as e:
859
+ return Exception(f"Failed to decode text from {src}", e)
860
+
861
+ def size_file(self, src: str) -> SizeSuffix | Exception:
862
+ """Get the size of a file or directory."""
863
+ # src_parent = os.path.dirname(src)
864
+ # src_name = os.path.basename(src)
865
+ # can't use this because it's only one file.
866
+ # out: SizeResult = self.size_files(src_parent, [src_name])
867
+ # one_file = len(out.file_sizes) == 1
868
+ # if not one_file:
869
+ # return Exception(
870
+ # f"More than one result returned, is this is a directory? {out}"
871
+ # )
872
+ # return SizeSuffix(out.total_size)
873
+ dirlist: DirListing = self.ls(
874
+ src, listing_option=ListingOption.FILES_ONLY, max_depth=0
875
+ )
876
+ if len(dirlist.files) == 0:
877
+ return FileNotFoundError(f"File not found: {src}")
878
+ if len(dirlist.files) > 1:
879
+ return Exception(f"More than one file found: {src}")
880
+ file: File = dirlist.files[0]
881
+ return SizeSuffix(file.size)
882
+
883
+ def get_s3_credentials(
884
+ self, remote: str, verbose: bool | None = None
885
+ ) -> S3Credentials:
886
+ from rclone_api.util import S3PathInfo, split_s3_path
887
+
888
+ verbose = get_verbose(verbose)
889
+ path_info: S3PathInfo = split_s3_path(remote)
890
+
891
+ # path_info: S3PathInfo = split_s3_path(remote)
892
+ remote = path_info.remote
893
+ bucket_name = path_info.bucket
894
+
895
+ remote = path_info.remote
896
+ parsed: Parsed = self.config.parse()
897
+ sections: dict[str, Section] = parsed.sections
898
+ if remote not in sections:
899
+ raise ValueError(
900
+ f"Remote {remote} not found in rclone config, remotes are: {sections.keys()}"
901
+ )
902
+
903
+ section: Section = sections[remote]
904
+ dst_type = section.type()
905
+ if dst_type != "s3" and dst_type != "b2":
906
+ raise ValueError(
907
+ f"Remote {remote} is not an S3 remote, it is of type {dst_type}"
908
+ )
909
+
910
+ def get_provider_str(section=section) -> str | None:
911
+ type: str = section.type()
912
+ provider: str | None = section.provider()
913
+ if provider is not None:
914
+ return provider
915
+ if type == "b2":
916
+ return S3Provider.BACKBLAZE.value
917
+ if type != "s3":
918
+ raise ValueError(f"Remote {remote} is not an S3 remote")
919
+ return S3Provider.S3.value
920
+
921
+ provider: str
922
+ if provided_provider_str := get_provider_str():
923
+ if verbose:
924
+ print(f"Using provided provider: {provided_provider_str}")
925
+ provider = provided_provider_str
926
+ else:
927
+ if verbose:
928
+ print(f"Using default provider: {S3Provider.S3.value}")
929
+ provider = S3Provider.S3.value
930
+ provider_enum = S3Provider.from_str(provider)
931
+
932
+ s3_creds: S3Credentials = S3Credentials(
933
+ bucket_name=bucket_name,
934
+ provider=provider_enum,
935
+ access_key_id=section.access_key_id(),
936
+ secret_access_key=section.secret_access_key(),
937
+ endpoint_url=section.endpoint(),
938
+ )
939
+ return s3_creds
940
+
941
+ def copy_bytes(
942
+ self,
943
+ src: str,
944
+ offset: int | SizeSuffix,
945
+ length: int | SizeSuffix,
946
+ outfile: Path,
947
+ other_args: list[str] | None = None,
948
+ ) -> Exception | None:
949
+ """Copy a slice of bytes from the src file to dst."""
950
+ offset = SizeSuffix(offset).as_int()
951
+ length = SizeSuffix(length).as_int()
952
+ cmd_list: list[str] = [
953
+ "cat",
954
+ "--offset",
955
+ str(offset),
956
+ "--count",
957
+ str(length),
958
+ src,
959
+ ]
960
+ if other_args:
961
+ cmd_list.extend(other_args)
962
+ try:
963
+ cp = self._run(cmd_list, capture=outfile)
964
+ if cp.returncode == 0:
965
+ return None
966
+ return Exception(cp.stderr)
967
+ except subprocess.CalledProcessError as e:
968
+ return e
969
+
970
+ def copy_dir(
971
+ self, src: str | Dir, dst: str | Dir, args: list[str] | None = None
972
+ ) -> CompletedProcess:
973
+ """Copy a directory from source to destination."""
974
+ # convert src to str, also dst
975
+ src = convert_to_str(src)
976
+ dst = convert_to_str(dst)
977
+ cmd_list: list[str] = ["copy", src, dst, "--s3-no-check-bucket"]
978
+ if args is not None:
979
+ cmd_list += args
980
+ cp = self._run(cmd_list)
981
+ return CompletedProcess.from_subprocess(cp)
982
+
983
+ def copy_remote(
984
+ self, src: Remote, dst: Remote, args: list[str] | None = None
985
+ ) -> CompletedProcess:
986
+ """Copy a remote to another remote."""
987
+ cmd_list: list[str] = ["copy", str(src), str(dst), "--s3-no-check-bucket"]
988
+ if args is not None:
989
+ cmd_list += args
990
+ # return self._run(cmd_list)
991
+ cp = self._run(cmd_list)
992
+ return CompletedProcess.from_subprocess(cp)
993
+
994
+ def mount(
995
+ self,
996
+ src: Remote | Dir | str,
997
+ outdir: Path,
998
+ allow_writes: bool | None = False,
999
+ transfers: int | None = None,
1000
+ use_links: bool | None = None,
1001
+ vfs_cache_mode: str | None = None,
1002
+ verbose: bool | None = None,
1003
+ cache_dir: Path | None = None,
1004
+ cache_dir_delete_on_exit: bool | None = None,
1005
+ log: Path | None = None,
1006
+ other_args: list[str] | None = None,
1007
+ ) -> Mount:
1008
+ """Mount a remote or directory to a local path.
1009
+
1010
+ Args:
1011
+ src: Remote or directory to mount
1012
+ outdir: Local path to mount to
1013
+
1014
+ Returns:
1015
+ CompletedProcess from the mount command execution
1016
+
1017
+ Raises:
1018
+ subprocess.CalledProcessError: If the mount operation fails
1019
+ """
1020
+ from rclone_api.mount_util import clean_mount, prepare_mount
1021
+
1022
+ allow_writes = allow_writes or False
1023
+ use_links = use_links or True
1024
+ verbose = get_verbose(verbose) or (log is not None)
1025
+ vfs_cache_mode = vfs_cache_mode or "full"
1026
+ clean_mount(outdir, verbose=verbose)
1027
+ prepare_mount(outdir, verbose=verbose)
1028
+ debug_fuse = log is not None
1029
+ src_str = convert_to_str(src)
1030
+ cmd_list: list[str] = ["mount", src_str, str(outdir)]
1031
+ if not allow_writes:
1032
+ cmd_list.append("--read-only")
1033
+ if use_links:
1034
+ cmd_list.append("--links")
1035
+ if vfs_cache_mode:
1036
+ cmd_list.append("--vfs-cache-mode")
1037
+ cmd_list.append(vfs_cache_mode)
1038
+ if cache_dir:
1039
+ cmd_list.append("--cache-dir")
1040
+ cmd_list.append(str(cache_dir.absolute()))
1041
+ if transfers is not None:
1042
+ cmd_list.append("--transfers")
1043
+ cmd_list.append(str(transfers))
1044
+ if debug_fuse:
1045
+ cmd_list.append("--debug-fuse")
1046
+ if verbose:
1047
+ cmd_list.append("-vvvv")
1048
+ if other_args:
1049
+ cmd_list += other_args
1050
+ proc = self._launch_process(cmd_list, log=log)
1051
+ mount_read_only = not allow_writes
1052
+ mount: Mount = Mount(
1053
+ src=src_str,
1054
+ mount_path=outdir,
1055
+ process=proc,
1056
+ read_only=mount_read_only,
1057
+ cache_dir=cache_dir,
1058
+ cache_dir_delete_on_exit=cache_dir_delete_on_exit,
1059
+ )
1060
+ return mount
1061
+
1062
+ # Settings optimized for s3.
1063
+ def mount_s3(
1064
+ self,
1065
+ url: str,
1066
+ outdir: Path,
1067
+ allow_writes=False,
1068
+ vfs_cache_mode="full",
1069
+ dir_cache_time: str | None = "1h",
1070
+ attribute_timeout: str | None = "1h",
1071
+ vfs_disk_space_total_size: str | None = "100M",
1072
+ transfers: int | None = 128,
1073
+ modtime_strategy: (
1074
+ ModTimeStrategy | None
1075
+ ) = ModTimeStrategy.USE_SERVER_MODTIME, # speeds up S3 operations
1076
+ vfs_read_chunk_streams: int | None = 16,
1077
+ vfs_read_chunk_size: str | None = "4M",
1078
+ vfs_fast_fingerprint: bool = True,
1079
+ # vfs-refresh
1080
+ vfs_refresh: bool = True,
1081
+ other_args: list[str] | None = None,
1082
+ ) -> Mount:
1083
+ """Mount a remote or directory to a local path.
1084
+
1085
+ Args:
1086
+ src: Remote or directory to mount
1087
+ outdir: Local path to mount to
1088
+ """
1089
+ other_args = other_args or []
1090
+ if modtime_strategy is not None:
1091
+ other_args.append(f"--{modtime_strategy.value}")
1092
+ if (vfs_cache_mode == "full" or vfs_cache_mode == "writes") and (
1093
+ transfers is not None and "--transfers" not in other_args
1094
+ ):
1095
+ other_args.append("--transfers")
1096
+ other_args.append(str(transfers))
1097
+ if dir_cache_time is not None and "--dir-cache-time" not in other_args:
1098
+ other_args.append("--dir-cache-time")
1099
+ other_args.append(dir_cache_time)
1100
+ if (
1101
+ vfs_disk_space_total_size is not None
1102
+ and "--vfs-cache-max-size" not in other_args
1103
+ ):
1104
+ other_args.append("--vfs-cache-max-size")
1105
+ other_args.append(vfs_disk_space_total_size)
1106
+ if vfs_refresh and "--vfs-refresh" not in other_args:
1107
+ other_args.append("--vfs-refresh")
1108
+ if attribute_timeout is not None and "--attr-timeout" not in other_args:
1109
+ other_args.append("--attr-timeout")
1110
+ other_args.append(attribute_timeout)
1111
+ if vfs_read_chunk_streams:
1112
+ other_args.append("--vfs-read-chunk-streams")
1113
+ other_args.append(str(vfs_read_chunk_streams))
1114
+ if vfs_read_chunk_size:
1115
+ other_args.append("--vfs-read-chunk-size")
1116
+ other_args.append(vfs_read_chunk_size)
1117
+ if vfs_fast_fingerprint:
1118
+ other_args.append("--vfs-fast-fingerprint")
1119
+
1120
+ other_args = other_args if other_args else None
1121
+ return self.mount(
1122
+ url,
1123
+ outdir,
1124
+ allow_writes=allow_writes,
1125
+ vfs_cache_mode=vfs_cache_mode,
1126
+ other_args=other_args,
1127
+ )
1128
+
1129
+ def serve_webdav(
1130
+ self,
1131
+ src: Remote | Dir | str,
1132
+ user: str,
1133
+ password: str,
1134
+ addr: str = "localhost:2049",
1135
+ allow_other: bool = False,
1136
+ other_args: list[str] | None = None,
1137
+ ) -> Process:
1138
+ """Serve a remote or directory via NFS.
1139
+
1140
+ Args:
1141
+ src: Remote or directory to serve
1142
+ addr: Network address and port to serve on (default: localhost:2049)
1143
+ allow_other: Allow other users to access the share
1144
+
1145
+ Returns:
1146
+ Process: The running webdev server process
1147
+
1148
+ Raises:
1149
+ ValueError: If the NFS server fails to start
1150
+ """
1151
+ src_str = convert_to_str(src)
1152
+ cmd_list: list[str] = ["serve", "webdav", "--addr", addr, src_str]
1153
+ cmd_list.extend(["--user", user, "--pass", password])
1154
+ if allow_other:
1155
+ cmd_list.append("--allow-other")
1156
+ if other_args:
1157
+ cmd_list += other_args
1158
+ proc = self._launch_process(cmd_list)
1159
+ time.sleep(2) # give it a moment to start
1160
+ if proc.poll() is not None:
1161
+ raise ValueError("NFS serve process failed to start")
1162
+ return proc
1163
+
1164
+ def serve_http(
1165
+ self,
1166
+ src: str,
1167
+ addr: str | None = None,
1168
+ serve_http_log: Path | None = None,
1169
+ other_args: list[str] | None = None,
1170
+ ) -> HttpServer:
1171
+ """Serve a remote or directory via HTTP.
1172
+
1173
+ Args:
1174
+ src: Remote or directory to serve
1175
+ addr: Network address and port to serve on (default: localhost:8080)
1176
+ """
1177
+ addr = addr or f"localhost:{find_free_port()}"
1178
+ _, subpath = src.split(":", 1) # might not work on local paths.
1179
+ cmd_list: list[str] = [
1180
+ "serve",
1181
+ "http",
1182
+ "--addr",
1183
+ addr,
1184
+ src,
1185
+ "--vfs-disk-space-total-size",
1186
+ "0",
1187
+ "--vfs-read-chunk-size-limit",
1188
+ "512M",
1189
+ "--vfs-cache-mode",
1190
+ "off",
1191
+ ]
1192
+ if serve_http_log:
1193
+ cmd_list += ["--log-file", str(serve_http_log)]
1194
+ cmd_list += ["-vvvv"]
1195
+ if other_args:
1196
+ cmd_list += other_args
1197
+ proc = self._launch_process(cmd_list, log=serve_http_log)
1198
+ time.sleep(2)
1199
+ if proc.poll() is not None:
1200
+ raise ValueError("HTTP serve process failed to start")
1201
+ out: HttpServer = HttpServer(
1202
+ url=f"http://{addr}", subpath=subpath, process=proc
1203
+ )
1204
+ return out
1205
+
1206
+ def size_files(
1207
+ self,
1208
+ src: str,
1209
+ files: list[str],
1210
+ fast_list: bool = False, # Recommend that this is False
1211
+ other_args: list[str] | None = None,
1212
+ check: bool | None = False,
1213
+ verbose: bool | None = None,
1214
+ ) -> SizeResult | Exception:
1215
+ """Get the size of a list of files. Example of files items: "remote:bucket/to/file"."""
1216
+ verbose = get_verbose(verbose)
1217
+ check = get_check(check)
1218
+ if len(files) < 2:
1219
+ tmp = self.size_file(files[0])
1220
+ if isinstance(tmp, Exception):
1221
+ return tmp
1222
+ assert isinstance(tmp, SizeSuffix)
1223
+ return SizeResult(
1224
+ prefix=src, total_size=tmp.as_int(), file_sizes={files[0]: tmp.as_int()}
1225
+ )
1226
+ if fast_list or (other_args and "--fast-list" in other_args):
1227
+ warnings.warn(
1228
+ "It's not recommended to use --fast-list with size_files as this will perform poorly on large repositories since the entire repository has to be scanned."
1229
+ )
1230
+ files = list(files)
1231
+ all_files: list[File] = []
1232
+ # prefix, files = group_under_one_prefix(src, files)
1233
+ cmd = ["lsjson", src, "--files-only", "-R"]
1234
+ with TemporaryDirectory() as tmpdir:
1235
+ # print("files: " + ",".join(files))
1236
+ include_files_txt = Path(tmpdir) / "include_files.txt"
1237
+ include_files_txt.write_text("\n".join(files), encoding="utf-8")
1238
+ cmd += ["--files-from", str(include_files_txt)]
1239
+ if fast_list:
1240
+ cmd.append("--fast-list")
1241
+ if other_args:
1242
+ cmd += other_args
1243
+ cp = self._run(cmd, check=check)
1244
+
1245
+ if cp.returncode != 0:
1246
+ if check:
1247
+ raise ValueError(f"Error getting file sizes: {cp.stderr}")
1248
+ else:
1249
+ warnings.warn(f"Error getting file sizes: {cp.stderr}")
1250
+ stdout = cp.stdout
1251
+ pieces = src.split(":", 1)
1252
+ remote_name = pieces[0]
1253
+ parent_path: str | None
1254
+ if len(pieces) > 1:
1255
+ parent_path = pieces[1]
1256
+ else:
1257
+ parent_path = None
1258
+ remote = Remote(name=remote_name, rclone=self)
1259
+ paths: list[RPath] = RPath.from_json_str(
1260
+ stdout, remote, parent_path=parent_path
1261
+ )
1262
+ # print(paths)
1263
+ all_files += [File(p) for p in paths]
1264
+ file_sizes: dict[str, int] = {}
1265
+ f: File
1266
+ for f in all_files:
1267
+ p = f.to_string(include_remote=True)
1268
+ if p in file_sizes:
1269
+ warnings.warn(f"Duplicate file found: {p}")
1270
+ continue
1271
+ size = f.size
1272
+ if size == 0:
1273
+ warnings.warn(f"File size is 0: {p}")
1274
+ file_sizes[p] = f.size
1275
+ total_size = sum(file_sizes.values())
1276
+ file_sizes_path_corrected: dict[str, int] = {}
1277
+ for path, size in file_sizes.items():
1278
+ # remove the prefix
1279
+ path_path = Path(path)
1280
+ path_str = path_path.relative_to(src).as_posix()
1281
+ file_sizes_path_corrected[path_str] = size
1282
+ out: SizeResult = SizeResult(
1283
+ prefix=src, total_size=total_size, file_sizes=file_sizes_path_corrected
1284
+ )
1285
+ return out