rclone-api 1.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +951 -0
- rclone_api/assets/example.txt +1 -0
- rclone_api/cli.py +15 -0
- rclone_api/cmd/analyze.py +51 -0
- rclone_api/cmd/copy_large_s3.py +111 -0
- rclone_api/cmd/copy_large_s3_finish.py +81 -0
- rclone_api/cmd/list_files.py +27 -0
- rclone_api/cmd/save_to_db.py +77 -0
- rclone_api/completed_process.py +60 -0
- rclone_api/config.py +87 -0
- rclone_api/convert.py +31 -0
- rclone_api/db/__init__.py +3 -0
- rclone_api/db/db.py +277 -0
- rclone_api/db/models.py +57 -0
- rclone_api/deprecated.py +24 -0
- rclone_api/detail/copy_file_parts_resumable.py +42 -0
- rclone_api/detail/walk.py +116 -0
- rclone_api/diff.py +164 -0
- rclone_api/dir.py +113 -0
- rclone_api/dir_listing.py +66 -0
- rclone_api/exec.py +40 -0
- rclone_api/experimental/flags.py +89 -0
- rclone_api/experimental/flags_base.py +58 -0
- rclone_api/file.py +205 -0
- rclone_api/file_item.py +68 -0
- rclone_api/file_part.py +198 -0
- rclone_api/file_stream.py +52 -0
- rclone_api/filelist.py +30 -0
- rclone_api/group_files.py +256 -0
- rclone_api/http_server.py +244 -0
- rclone_api/install.py +95 -0
- rclone_api/log.py +44 -0
- rclone_api/mount.py +55 -0
- rclone_api/mount_util.py +247 -0
- rclone_api/process.py +187 -0
- rclone_api/rclone_impl.py +1285 -0
- rclone_api/remote.py +21 -0
- rclone_api/rpath.py +102 -0
- rclone_api/s3/api.py +109 -0
- rclone_api/s3/basic_ops.py +61 -0
- rclone_api/s3/chunk_task.py +187 -0
- rclone_api/s3/create.py +107 -0
- rclone_api/s3/multipart/file_info.py +7 -0
- rclone_api/s3/multipart/finished_piece.py +69 -0
- rclone_api/s3/multipart/info_json.py +239 -0
- rclone_api/s3/multipart/merge_state.py +147 -0
- rclone_api/s3/multipart/upload_info.py +62 -0
- rclone_api/s3/multipart/upload_parts_inline.py +356 -0
- rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
- rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
- rclone_api/s3/multipart/upload_state.py +165 -0
- rclone_api/s3/types.py +67 -0
- rclone_api/scan_missing_folders.py +153 -0
- rclone_api/types.py +402 -0
- rclone_api/util.py +324 -0
- rclone_api-1.5.8.dist-info/LICENSE +21 -0
- rclone_api-1.5.8.dist-info/METADATA +969 -0
- rclone_api-1.5.8.dist-info/RECORD +61 -0
- rclone_api-1.5.8.dist-info/WHEEL +5 -0
- rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
- rclone_api-1.5.8.dist-info/top_level.txt +1 -0
rclone_api/file_part.py
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
import atexit
|
2
|
+
import os
|
3
|
+
import time
|
4
|
+
import warnings
|
5
|
+
from pathlib import Path
|
6
|
+
from threading import Lock
|
7
|
+
from typing import Any
|
8
|
+
|
9
|
+
_TMP_DIR_ACCESS_LOCK = Lock()
|
10
|
+
|
11
|
+
|
12
|
+
def _clean_old_files(out: Path) -> None:
|
13
|
+
# clean up files older than 1 day
|
14
|
+
from rclone_api.util import locked_print
|
15
|
+
|
16
|
+
now = time.time()
|
17
|
+
# Erase all stale files and then purge empty directories.
|
18
|
+
for root, dirs, files in os.walk(out):
|
19
|
+
for name in files:
|
20
|
+
f = Path(root) / name
|
21
|
+
filemod = f.stat().st_mtime
|
22
|
+
diff_secs = now - filemod
|
23
|
+
diff_days = diff_secs / (60 * 60 * 24)
|
24
|
+
if diff_days > 1:
|
25
|
+
locked_print(f"Removing old file: {f}")
|
26
|
+
f.unlink()
|
27
|
+
|
28
|
+
for root, dirs, _ in os.walk(out):
|
29
|
+
for dir in dirs:
|
30
|
+
d = Path(root) / dir
|
31
|
+
if not list(d.iterdir()):
|
32
|
+
locked_print(f"Removing empty directory: {d}")
|
33
|
+
d.rmdir()
|
34
|
+
|
35
|
+
|
36
|
+
def get_chunk_tmpdir() -> Path:
|
37
|
+
with _TMP_DIR_ACCESS_LOCK:
|
38
|
+
dat = get_chunk_tmpdir.__dict__
|
39
|
+
if "out" in dat:
|
40
|
+
return dat["out"] # Folder already validated.
|
41
|
+
out = Path("chunk_store")
|
42
|
+
if out.exists():
|
43
|
+
# first access, clean up directory
|
44
|
+
_clean_old_files(out)
|
45
|
+
out.mkdir(exist_ok=True, parents=True)
|
46
|
+
dat["out"] = out
|
47
|
+
return out
|
48
|
+
|
49
|
+
|
50
|
+
_CLEANUP_LIST: list[Path] = []
|
51
|
+
|
52
|
+
|
53
|
+
def _add_for_cleanup(path: Path) -> None:
|
54
|
+
_CLEANUP_LIST.append(path)
|
55
|
+
|
56
|
+
|
57
|
+
def _on_exit_cleanup() -> None:
|
58
|
+
paths = list(_CLEANUP_LIST)
|
59
|
+
for path in paths:
|
60
|
+
try:
|
61
|
+
if path.exists():
|
62
|
+
path.unlink()
|
63
|
+
except Exception as e:
|
64
|
+
warnings.warn(f"Cannot cleanup {path}: {e}")
|
65
|
+
|
66
|
+
|
67
|
+
atexit.register(_on_exit_cleanup)
|
68
|
+
|
69
|
+
|
70
|
+
_FILEPARTS: list["FilePart"] = []
|
71
|
+
|
72
|
+
_FILEPARTS_LOCK = Lock()
|
73
|
+
|
74
|
+
|
75
|
+
def _add_filepart(part: "FilePart") -> None:
|
76
|
+
with _FILEPARTS_LOCK:
|
77
|
+
if part not in _FILEPARTS:
|
78
|
+
_FILEPARTS.append(part)
|
79
|
+
|
80
|
+
|
81
|
+
def _remove_filepart(part: "FilePart") -> None:
|
82
|
+
with _FILEPARTS_LOCK:
|
83
|
+
if part in _FILEPARTS:
|
84
|
+
_FILEPARTS.remove(part)
|
85
|
+
|
86
|
+
|
87
|
+
def run_debug_parts():
|
88
|
+
while True:
|
89
|
+
print("\nAlive file parts:")
|
90
|
+
for part in list(_FILEPARTS):
|
91
|
+
print(part)
|
92
|
+
# print(part.stacktrace)
|
93
|
+
print("\n\n")
|
94
|
+
time.sleep(60)
|
95
|
+
|
96
|
+
|
97
|
+
# dbg_thread = threading.Thread(target=run_debug_parts)
|
98
|
+
# dbg_thread.start()
|
99
|
+
|
100
|
+
|
101
|
+
class FilePart:
|
102
|
+
def __init__(self, payload: Path | bytes | Exception, extra: Any) -> None:
|
103
|
+
import traceback
|
104
|
+
|
105
|
+
from rclone_api.util import random_str
|
106
|
+
|
107
|
+
stacktrace = traceback.format_stack()
|
108
|
+
stacktrace_str = "".join(stacktrace)
|
109
|
+
self.stacktrace = stacktrace_str
|
110
|
+
# _FILEPARTS.append(self)
|
111
|
+
_add_filepart(self)
|
112
|
+
|
113
|
+
self.extra = extra
|
114
|
+
self._lock = Lock()
|
115
|
+
self.payload: Path | Exception
|
116
|
+
if isinstance(payload, Exception):
|
117
|
+
self.payload = payload
|
118
|
+
return
|
119
|
+
if isinstance(payload, bytes):
|
120
|
+
print(f"Creating file part with payload: {len(payload)}")
|
121
|
+
self.payload = get_chunk_tmpdir() / f"{random_str(12)}.chunk"
|
122
|
+
with _TMP_DIR_ACCESS_LOCK:
|
123
|
+
if not self.payload.parent.exists():
|
124
|
+
self.payload.parent.mkdir(parents=True, exist_ok=True)
|
125
|
+
self.payload.write_bytes(payload)
|
126
|
+
_add_for_cleanup(self.payload)
|
127
|
+
if isinstance(payload, Path):
|
128
|
+
print("Adopting payload: ", payload)
|
129
|
+
self.payload = payload
|
130
|
+
_add_for_cleanup(self.payload)
|
131
|
+
|
132
|
+
def get_file(self) -> Path | Exception:
|
133
|
+
return self.payload
|
134
|
+
|
135
|
+
@property
|
136
|
+
def size(self) -> int:
|
137
|
+
with self._lock:
|
138
|
+
if isinstance(self.payload, Path):
|
139
|
+
return self.payload.stat().st_size
|
140
|
+
return -1
|
141
|
+
|
142
|
+
def n_bytes(self) -> int:
|
143
|
+
with self._lock:
|
144
|
+
if isinstance(self.payload, Path):
|
145
|
+
return self.payload.stat().st_size
|
146
|
+
return -1
|
147
|
+
|
148
|
+
def load(self) -> bytes:
|
149
|
+
with self._lock:
|
150
|
+
if isinstance(self.payload, Path):
|
151
|
+
with open(self.payload, "rb") as f:
|
152
|
+
return f.read()
|
153
|
+
raise ValueError("Cannot load from error")
|
154
|
+
|
155
|
+
def __post_init__(self):
|
156
|
+
if isinstance(self.payload, Path):
|
157
|
+
assert self.payload.exists(), f"File part {self.payload} does not exist"
|
158
|
+
assert self.payload.is_file(), f"File part {self.payload} is not a file"
|
159
|
+
assert self.payload.stat().st_size > 0, f"File part {self.payload} is empty"
|
160
|
+
elif isinstance(self.payload, Exception):
|
161
|
+
warnings.warn(f"File part error: {self.payload}")
|
162
|
+
print(f"File part created with payload: {self.payload}")
|
163
|
+
|
164
|
+
def is_error(self) -> bool:
|
165
|
+
return isinstance(self.payload, Exception)
|
166
|
+
|
167
|
+
def dispose(self) -> None:
|
168
|
+
# _FILEPARTS.remove(self)
|
169
|
+
_remove_filepart(self)
|
170
|
+
print("Disposing file part")
|
171
|
+
with self._lock:
|
172
|
+
if isinstance(self.payload, Exception):
|
173
|
+
warnings.warn(
|
174
|
+
f"Cannot close file part because the payload represents an error: {self.payload}"
|
175
|
+
)
|
176
|
+
print("Cannot close file part because the payload represents an error")
|
177
|
+
return
|
178
|
+
if self.payload.exists():
|
179
|
+
print(f"File part {self.payload} exists")
|
180
|
+
try:
|
181
|
+
print(f"Unlinking file part {self.payload}")
|
182
|
+
self.payload.unlink()
|
183
|
+
print(f"File part {self.payload} deleted")
|
184
|
+
except Exception as e:
|
185
|
+
warnings.warn(f"Cannot close file part because of error: {e}")
|
186
|
+
else:
|
187
|
+
warnings.warn(
|
188
|
+
f"Cannot close file part because it does not exist: {self.payload}"
|
189
|
+
)
|
190
|
+
|
191
|
+
def __del__(self):
|
192
|
+
self.dispose()
|
193
|
+
|
194
|
+
def __repr__(self):
|
195
|
+
from rclone_api.types import SizeSuffix
|
196
|
+
|
197
|
+
payload_str = "err" if self.is_error() else f"{SizeSuffix(self.n_bytes())}"
|
198
|
+
return f"FilePart(payload={payload_str}, extra={self.extra})"
|
@@ -0,0 +1,52 @@
|
|
1
|
+
"""
|
2
|
+
Unit test file.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Generator
|
6
|
+
|
7
|
+
from rclone_api.file import FileItem
|
8
|
+
from rclone_api.process import Process
|
9
|
+
|
10
|
+
|
11
|
+
class FilesStream:
|
12
|
+
|
13
|
+
def __init__(self, path: str, process: Process) -> None:
|
14
|
+
self.path = path
|
15
|
+
self.process = process
|
16
|
+
|
17
|
+
def __enter__(self) -> "FilesStream":
|
18
|
+
self.process.__enter__()
|
19
|
+
return self
|
20
|
+
|
21
|
+
def __exit__(self, *exc_info):
|
22
|
+
self.process.__exit__(*exc_info)
|
23
|
+
|
24
|
+
def files(self) -> Generator[FileItem, None, None]:
|
25
|
+
line: bytes
|
26
|
+
for line in self.process.stdout:
|
27
|
+
linestr: str = line.decode("utf-8").strip()
|
28
|
+
if linestr.startswith("["):
|
29
|
+
continue
|
30
|
+
if linestr.endswith(","):
|
31
|
+
linestr = linestr[:-1]
|
32
|
+
if linestr.endswith("]"):
|
33
|
+
continue
|
34
|
+
fileitem: FileItem | None = FileItem.from_json_str(self.path, linestr)
|
35
|
+
if fileitem is None:
|
36
|
+
continue
|
37
|
+
yield fileitem
|
38
|
+
|
39
|
+
def files_paged(
|
40
|
+
self, page_size: int = 1000
|
41
|
+
) -> Generator[list[FileItem], None, None]:
|
42
|
+
page: list[FileItem] = []
|
43
|
+
for fileitem in self.files():
|
44
|
+
page.append(fileitem)
|
45
|
+
if len(page) >= page_size:
|
46
|
+
yield page
|
47
|
+
page = []
|
48
|
+
if len(page) > 0:
|
49
|
+
yield page
|
50
|
+
|
51
|
+
def __iter__(self) -> Generator[FileItem, None, None]:
|
52
|
+
return self.files()
|
rclone_api/filelist.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
|
3
|
+
from rclone_api.dir import Dir
|
4
|
+
from rclone_api.dir_listing import DirListing
|
5
|
+
from rclone_api.file import File
|
6
|
+
from rclone_api.rpath import RPath
|
7
|
+
|
8
|
+
|
9
|
+
@dataclass
|
10
|
+
class FileList:
|
11
|
+
"""Remote file dataclass."""
|
12
|
+
|
13
|
+
dirs: list[Dir]
|
14
|
+
files: list[File]
|
15
|
+
|
16
|
+
def _to_dir_list(self) -> list[RPath]:
|
17
|
+
pathlist: list[RPath] = []
|
18
|
+
for d in self.dirs:
|
19
|
+
pathlist.append(d.path)
|
20
|
+
for f in self.files:
|
21
|
+
pathlist.append(f.path)
|
22
|
+
return pathlist
|
23
|
+
|
24
|
+
def __str__(self) -> str:
|
25
|
+
pathlist: list[RPath] = self._to_dir_list()
|
26
|
+
return str(DirListing(pathlist))
|
27
|
+
|
28
|
+
def __repr__(self) -> str:
|
29
|
+
pathlist: list[RPath] = self._to_dir_list()
|
30
|
+
return repr(DirListing(pathlist))
|
@@ -0,0 +1,256 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class PrefixResult:
|
7
|
+
prefix: str
|
8
|
+
files: list[str]
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass
|
12
|
+
class FilePathParts:
|
13
|
+
"""File path dataclass."""
|
14
|
+
|
15
|
+
remote: str
|
16
|
+
parents: list[str]
|
17
|
+
name: str
|
18
|
+
|
19
|
+
def to_string(self, include_remote: bool, include_bucket: bool) -> str:
|
20
|
+
"""Convert to string, may throw for not include_bucket=False."""
|
21
|
+
parents = list(self.parents)
|
22
|
+
if not include_bucket:
|
23
|
+
parents.pop(0)
|
24
|
+
path = "/".join(parents)
|
25
|
+
if path:
|
26
|
+
path += "/"
|
27
|
+
path += self.name
|
28
|
+
if include_remote:
|
29
|
+
return f"{self.remote}{path}"
|
30
|
+
return path
|
31
|
+
|
32
|
+
|
33
|
+
def parse_file(file_path: str) -> FilePathParts:
|
34
|
+
"""Parse file path into parts."""
|
35
|
+
assert not file_path.endswith("/"), "This looks like a directory path"
|
36
|
+
parts = file_path.split(":")
|
37
|
+
remote = parts[0]
|
38
|
+
path = parts[1]
|
39
|
+
if path.startswith("/"):
|
40
|
+
path = path[1:]
|
41
|
+
parents = path.split("/")
|
42
|
+
if len(parents) == 1:
|
43
|
+
return FilePathParts(remote=remote, parents=[], name=parents[0])
|
44
|
+
name = parents.pop()
|
45
|
+
return FilePathParts(remote=remote, parents=parents, name=name)
|
46
|
+
|
47
|
+
|
48
|
+
class TreeNode:
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
name: str,
|
52
|
+
child_nodes: dict[str, "TreeNode"] | None = None,
|
53
|
+
files: list[str] | None = None,
|
54
|
+
parent: "TreeNode | None" = None,
|
55
|
+
):
|
56
|
+
self.name = name
|
57
|
+
self.child_nodes = child_nodes or {}
|
58
|
+
self.files = files or []
|
59
|
+
self.count = 0
|
60
|
+
self.parent = parent
|
61
|
+
|
62
|
+
def add_count_bubble_up(self):
|
63
|
+
self.count += 1
|
64
|
+
if self.parent:
|
65
|
+
self.parent.add_count_bubble_up()
|
66
|
+
|
67
|
+
def get_path(self) -> str:
|
68
|
+
paths_reversed: list[str] = [self.name]
|
69
|
+
node: TreeNode | None = self
|
70
|
+
assert node is not None
|
71
|
+
while True:
|
72
|
+
node = node.parent
|
73
|
+
if node is None:
|
74
|
+
break
|
75
|
+
paths_reversed.append(node.name)
|
76
|
+
return "/".join(reversed(paths_reversed))
|
77
|
+
|
78
|
+
def get_child_subpaths(self, parent_path: str | None = None) -> list[str]:
|
79
|
+
paths: list[str] = []
|
80
|
+
for child in self.child_nodes.values():
|
81
|
+
child_paths = child.get_child_subpaths(parent_path=child.name)
|
82
|
+
paths.extend(child_paths)
|
83
|
+
for file in self.files:
|
84
|
+
if parent_path:
|
85
|
+
file = f"{parent_path}/{file}"
|
86
|
+
paths.append(file)
|
87
|
+
return paths
|
88
|
+
|
89
|
+
def __repr__(self, indent: int = 0) -> str:
|
90
|
+
# return f"{self.name}: {self.count}, {len(self.children)}"
|
91
|
+
leftpad = " " * indent
|
92
|
+
msg = f"{leftpad}{self.name}: {self.count}"
|
93
|
+
if self.child_nodes:
|
94
|
+
# msg += f"\n {len(self.children)} children"
|
95
|
+
msg += "\n"
|
96
|
+
for child in self.child_nodes.values():
|
97
|
+
if isinstance(child, TreeNode):
|
98
|
+
msg += child.__repr__(indent + 2)
|
99
|
+
else:
|
100
|
+
msg += f"{leftpad} {child}\n"
|
101
|
+
return msg
|
102
|
+
|
103
|
+
|
104
|
+
def _merge(node: TreeNode, parent_path: str, out: dict[str, list[str]]) -> None:
|
105
|
+
parent_path = parent_path + "/" + node.name
|
106
|
+
if not node.child_nodes and not node.files:
|
107
|
+
return # done
|
108
|
+
if node.files:
|
109
|
+
# we saw files, to don't try to go any deeper.
|
110
|
+
filelist = out.setdefault(parent_path, [])
|
111
|
+
paths = node.get_child_subpaths()
|
112
|
+
for path in paths:
|
113
|
+
filelist.append(path)
|
114
|
+
out[parent_path] = filelist
|
115
|
+
return
|
116
|
+
|
117
|
+
n_child_nodes = len(node.child_nodes)
|
118
|
+
|
119
|
+
if n_child_nodes <= 2:
|
120
|
+
for child in node.child_nodes.values():
|
121
|
+
_merge(child, parent_path, out)
|
122
|
+
return
|
123
|
+
|
124
|
+
filelist = out.setdefault(parent_path, [])
|
125
|
+
paths = node.get_child_subpaths()
|
126
|
+
for path in paths:
|
127
|
+
filelist.append(path)
|
128
|
+
out[parent_path] = filelist
|
129
|
+
return
|
130
|
+
|
131
|
+
|
132
|
+
def _make_tree(files: list[str]) -> dict[str, TreeNode]:
|
133
|
+
tree: dict[str, TreeNode] = {}
|
134
|
+
for file in files:
|
135
|
+
parts = parse_file(file)
|
136
|
+
remote = parts.remote
|
137
|
+
node: TreeNode = tree.setdefault(remote, TreeNode(remote))
|
138
|
+
if parts.parents:
|
139
|
+
for parent in parts.parents:
|
140
|
+
is_last = parent == parts.parents[-1]
|
141
|
+
node = node.child_nodes.setdefault(
|
142
|
+
parent, TreeNode(parent, parent=node)
|
143
|
+
)
|
144
|
+
if is_last:
|
145
|
+
node.files.append(parts.name)
|
146
|
+
node.add_count_bubble_up()
|
147
|
+
else:
|
148
|
+
node.files.append(parts.name)
|
149
|
+
node.add_count_bubble_up()
|
150
|
+
|
151
|
+
return tree
|
152
|
+
|
153
|
+
|
154
|
+
#
|
155
|
+
def _fixup_rclone_paths(outpaths: dict[str, list[str]]) -> dict[str, list[str]]:
|
156
|
+
out: dict[str, list[str]] = {}
|
157
|
+
for path, files in outpaths.items():
|
158
|
+
# fixup path
|
159
|
+
assert path.startswith("/"), "Path should start with /"
|
160
|
+
path = path[1:]
|
161
|
+
# replace the first / with :
|
162
|
+
path = path.replace("/", ":", 1)
|
163
|
+
out[path] = files
|
164
|
+
return out
|
165
|
+
|
166
|
+
|
167
|
+
def group_files(files: list[str], fully_qualified: bool = True) -> dict[str, list[str]]:
|
168
|
+
"""split between filename and parent directory path"""
|
169
|
+
if fully_qualified is False:
|
170
|
+
for i, file in enumerate(files):
|
171
|
+
file = "root:" + file
|
172
|
+
files[i] = file
|
173
|
+
tree: dict[str, TreeNode] = _make_tree(files)
|
174
|
+
outpaths: dict[str, list[str]] = {}
|
175
|
+
for _, node in tree.items():
|
176
|
+
_merge(node, "", outpaths)
|
177
|
+
tmp: dict[str, list[str]] = _fixup_rclone_paths(outpaths=outpaths)
|
178
|
+
out: dict[str, list[str]] = {}
|
179
|
+
if fully_qualified is False:
|
180
|
+
for path, files in tmp.items():
|
181
|
+
if path.startswith("root"):
|
182
|
+
path = path.replace("root", "")
|
183
|
+
if path.startswith(":"):
|
184
|
+
path = path[1:]
|
185
|
+
out[path] = [file.replace("/root/", "") for file in files]
|
186
|
+
else:
|
187
|
+
out = tmp
|
188
|
+
return out
|
189
|
+
|
190
|
+
|
191
|
+
def group_under_remote_bucket(
|
192
|
+
files: list[str], fully_qualified: bool = True
|
193
|
+
) -> dict[str, list[str]]:
|
194
|
+
"""split between filename and bucket"""
|
195
|
+
assert fully_qualified is True, "Not implemented for fully_qualified=False"
|
196
|
+
out: dict[str, list[str]] = {}
|
197
|
+
for file in files:
|
198
|
+
parsed = parse_file(file)
|
199
|
+
remote = f"{parsed.remote}:"
|
200
|
+
parts = parsed.parents
|
201
|
+
bucket = parts[0]
|
202
|
+
remote_bucket = f"{remote}{bucket}"
|
203
|
+
file_list = out.setdefault(remote_bucket, [])
|
204
|
+
file_list.append(parsed.to_string(include_remote=False, include_bucket=False))
|
205
|
+
return out
|
206
|
+
|
207
|
+
|
208
|
+
def _get_prefix(path: str) -> tuple[str, str] | None:
|
209
|
+
path_path: Path = Path(path)
|
210
|
+
parts = path_path.parts
|
211
|
+
if len(parts) == 1:
|
212
|
+
return None
|
213
|
+
return parts[0], "/".join(parts[1:])
|
214
|
+
|
215
|
+
|
216
|
+
def _common_prefix(prefix: str, files: list[str]) -> PrefixResult:
|
217
|
+
if not files:
|
218
|
+
return PrefixResult(prefix=prefix, files=[])
|
219
|
+
prefix = prefix
|
220
|
+
tmp: list[str] = list(files)
|
221
|
+
while True:
|
222
|
+
if not tmp:
|
223
|
+
break
|
224
|
+
prefix_set: set[str | None] = set()
|
225
|
+
for file in tmp:
|
226
|
+
pair = _get_prefix(file)
|
227
|
+
if pair is None:
|
228
|
+
break
|
229
|
+
_prefix, _ = pair
|
230
|
+
prefix_set.add(_prefix)
|
231
|
+
if len(prefix_set) > 1 or len(prefix_set) == 0:
|
232
|
+
break
|
233
|
+
next_prefix: str | None = prefix_set.pop()
|
234
|
+
if next_prefix is None:
|
235
|
+
break
|
236
|
+
prefix += f"/{next_prefix}"
|
237
|
+
new_tmp: list[str] = []
|
238
|
+
for file in tmp:
|
239
|
+
pair = _get_prefix(file)
|
240
|
+
assert pair is not None
|
241
|
+
_, path = pair
|
242
|
+
new_tmp.append(path)
|
243
|
+
tmp = new_tmp
|
244
|
+
return PrefixResult(prefix=prefix, files=tmp)
|
245
|
+
|
246
|
+
|
247
|
+
def group_under_one_prefix(prefix: str, files: list[str]) -> tuple[str, list[str]]:
|
248
|
+
"""Group files under one prefix."""
|
249
|
+
if not files:
|
250
|
+
return prefix, []
|
251
|
+
prefix = prefix
|
252
|
+
result = _common_prefix(prefix, files)
|
253
|
+
return result.prefix.replace(":/", ":"), result.files
|
254
|
+
|
255
|
+
|
256
|
+
__all__ = ["group_files", "group_under_remote_bucket", "group_under_one_prefix"]
|