rclone-api 1.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +951 -0
- rclone_api/assets/example.txt +1 -0
- rclone_api/cli.py +15 -0
- rclone_api/cmd/analyze.py +51 -0
- rclone_api/cmd/copy_large_s3.py +111 -0
- rclone_api/cmd/copy_large_s3_finish.py +81 -0
- rclone_api/cmd/list_files.py +27 -0
- rclone_api/cmd/save_to_db.py +77 -0
- rclone_api/completed_process.py +60 -0
- rclone_api/config.py +87 -0
- rclone_api/convert.py +31 -0
- rclone_api/db/__init__.py +3 -0
- rclone_api/db/db.py +277 -0
- rclone_api/db/models.py +57 -0
- rclone_api/deprecated.py +24 -0
- rclone_api/detail/copy_file_parts_resumable.py +42 -0
- rclone_api/detail/walk.py +116 -0
- rclone_api/diff.py +164 -0
- rclone_api/dir.py +113 -0
- rclone_api/dir_listing.py +66 -0
- rclone_api/exec.py +40 -0
- rclone_api/experimental/flags.py +89 -0
- rclone_api/experimental/flags_base.py +58 -0
- rclone_api/file.py +205 -0
- rclone_api/file_item.py +68 -0
- rclone_api/file_part.py +198 -0
- rclone_api/file_stream.py +52 -0
- rclone_api/filelist.py +30 -0
- rclone_api/group_files.py +256 -0
- rclone_api/http_server.py +244 -0
- rclone_api/install.py +95 -0
- rclone_api/log.py +44 -0
- rclone_api/mount.py +55 -0
- rclone_api/mount_util.py +247 -0
- rclone_api/process.py +187 -0
- rclone_api/rclone_impl.py +1285 -0
- rclone_api/remote.py +21 -0
- rclone_api/rpath.py +102 -0
- rclone_api/s3/api.py +109 -0
- rclone_api/s3/basic_ops.py +61 -0
- rclone_api/s3/chunk_task.py +187 -0
- rclone_api/s3/create.py +107 -0
- rclone_api/s3/multipart/file_info.py +7 -0
- rclone_api/s3/multipart/finished_piece.py +69 -0
- rclone_api/s3/multipart/info_json.py +239 -0
- rclone_api/s3/multipart/merge_state.py +147 -0
- rclone_api/s3/multipart/upload_info.py +62 -0
- rclone_api/s3/multipart/upload_parts_inline.py +356 -0
- rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
- rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
- rclone_api/s3/multipart/upload_state.py +165 -0
- rclone_api/s3/types.py +67 -0
- rclone_api/scan_missing_folders.py +153 -0
- rclone_api/types.py +402 -0
- rclone_api/util.py +324 -0
- rclone_api-1.5.8.dist-info/LICENSE +21 -0
- rclone_api-1.5.8.dist-info/METADATA +969 -0
- rclone_api-1.5.8.dist-info/RECORD +61 -0
- rclone_api-1.5.8.dist-info/WHEEL +5 -0
- rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
- rclone_api-1.5.8.dist-info/top_level.txt +1 -0
rclone_api/db/db.py
ADDED
@@ -0,0 +1,277 @@
|
|
1
|
+
"""
|
2
|
+
Database module for rclone_api.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
from threading import Lock
|
7
|
+
from typing import Optional
|
8
|
+
|
9
|
+
from sqlmodel import Session, SQLModel, create_engine, select
|
10
|
+
|
11
|
+
from rclone_api.db.models import RepositoryMeta, create_file_entry_model
|
12
|
+
from rclone_api.file import FileItem
|
13
|
+
|
14
|
+
|
15
|
+
def _to_table_name(remote_name: str) -> str:
|
16
|
+
return (
|
17
|
+
"files_"
|
18
|
+
+ remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
class DB:
|
23
|
+
"""Database class for rclone_api."""
|
24
|
+
|
25
|
+
def __init__(self, db_path_url: str):
|
26
|
+
"""Initialize the database.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
db_path: Path to the database file
|
30
|
+
"""
|
31
|
+
self.db_path_url = db_path_url
|
32
|
+
|
33
|
+
# When running multiple commands in parallel, the database connection may fail once
|
34
|
+
# when the database is first populated.
|
35
|
+
retries = 2
|
36
|
+
for _ in range(retries):
|
37
|
+
try:
|
38
|
+
self.engine = create_engine(db_path_url)
|
39
|
+
SQLModel.metadata.create_all(self.engine)
|
40
|
+
break
|
41
|
+
except Exception as e:
|
42
|
+
print(f"Failed to connect to database. Retrying... {e}")
|
43
|
+
else:
|
44
|
+
raise Exception("Failed to connect to database.")
|
45
|
+
self._cache: dict[str, DBRepo] = {}
|
46
|
+
self._cache_lock = Lock()
|
47
|
+
|
48
|
+
def drop_all(self) -> None:
|
49
|
+
"""Drop all tables in the database."""
|
50
|
+
SQLModel.metadata.drop_all(self.engine)
|
51
|
+
|
52
|
+
def close(self) -> None:
|
53
|
+
"""Close the database connection and release resources."""
|
54
|
+
if hasattr(self, "engine") and self.engine is not None:
|
55
|
+
self.engine.dispose()
|
56
|
+
|
57
|
+
def add_files(self, files: list[FileItem]) -> None:
|
58
|
+
"""Add files to the database.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
remote_name: Name of the remote
|
62
|
+
files: List of file entries
|
63
|
+
"""
|
64
|
+
|
65
|
+
partition: dict[str, list[FileItem]] = {}
|
66
|
+
for file in files:
|
67
|
+
partition.setdefault(file.remote, []).append(file)
|
68
|
+
|
69
|
+
for remote_name, files in partition.items():
|
70
|
+
repo = self.get_or_create_repo(remote_name)
|
71
|
+
repo.insert_files(files)
|
72
|
+
|
73
|
+
def query_all_files(self, remote_name: str) -> list[FileItem]:
|
74
|
+
"""Query files from the database.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
remote_name: Name of the remote
|
78
|
+
"""
|
79
|
+
repo = self.get_or_create_repo(remote_name)
|
80
|
+
files = repo.get_all_files()
|
81
|
+
out: list[FileItem] = []
|
82
|
+
for file in files:
|
83
|
+
out.append(file)
|
84
|
+
return out
|
85
|
+
|
86
|
+
def get_or_create_repo(self, remote_name: str) -> "DBRepo":
|
87
|
+
"""Get a table section for a remote.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
remote_name: Name of the remote
|
91
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
DBRepo: A table section for the remote
|
95
|
+
"""
|
96
|
+
with self._cache_lock:
|
97
|
+
if remote_name in self._cache:
|
98
|
+
return self._cache[remote_name]
|
99
|
+
table_name = _to_table_name(remote_name)
|
100
|
+
out = DBRepo(self.engine, remote_name, table_name)
|
101
|
+
self._cache[remote_name] = out
|
102
|
+
return out
|
103
|
+
|
104
|
+
|
105
|
+
class DBRepo:
|
106
|
+
"""Table repo remote."""
|
107
|
+
|
108
|
+
def __init__(self, engine, remote_name: str, table_name: Optional[str] = None):
|
109
|
+
"""Initialize a table section.
|
110
|
+
|
111
|
+
Args:
|
112
|
+
engine: SQLAlchemy engine
|
113
|
+
remote_name: Name of the remote
|
114
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
115
|
+
"""
|
116
|
+
self.engine = engine
|
117
|
+
self.remote_name = remote_name
|
118
|
+
|
119
|
+
# If table_name is not provided, derive one from the remote name.
|
120
|
+
if table_name is None:
|
121
|
+
# table_name = (
|
122
|
+
# "file_entries_"
|
123
|
+
# + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
124
|
+
# )
|
125
|
+
table_name = _to_table_name(remote_name)
|
126
|
+
self.table_name = table_name
|
127
|
+
|
128
|
+
# Check if repository exists in RepositoryMeta; if not, create a new entry.
|
129
|
+
with Session(self.engine) as session:
|
130
|
+
existing_repo = session.exec(
|
131
|
+
select(RepositoryMeta).where(
|
132
|
+
RepositoryMeta.repo_name == self.remote_name
|
133
|
+
)
|
134
|
+
).first()
|
135
|
+
if not existing_repo:
|
136
|
+
repo_meta = RepositoryMeta(
|
137
|
+
repo_name=self.remote_name, file_table_name=self.table_name
|
138
|
+
)
|
139
|
+
session.add(repo_meta)
|
140
|
+
session.commit()
|
141
|
+
|
142
|
+
# Dynamically create the file entry model and its table.
|
143
|
+
self.FileEntryModel = create_file_entry_model(self.table_name)
|
144
|
+
SQLModel.metadata.create_all(self.engine, tables=[self.FileEntryModel.__table__]) # type: ignore
|
145
|
+
|
146
|
+
def insert_file(self, file: FileItem) -> None:
|
147
|
+
"""Insert a file entry into the table.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
file: File entry
|
151
|
+
"""
|
152
|
+
return self.insert_files([file])
|
153
|
+
|
154
|
+
def insert_files(self, files: list[FileItem]) -> None:
|
155
|
+
"""
|
156
|
+
Insert multiple file entries into the table.
|
157
|
+
|
158
|
+
Three bulk operations are performed:
|
159
|
+
1. Select: Determine which files already exist.
|
160
|
+
2. Insert: Bulk-insert new file entries.
|
161
|
+
3. Update: Bulk-update existing file entries.
|
162
|
+
|
163
|
+
The FileEntryModel must define a unique constraint on (path, name) and have a primary key "id".
|
164
|
+
"""
|
165
|
+
# Step 1: Bulk select existing records.
|
166
|
+
# get_exists() returns a set of FileItem objects (based on path_no_remote and name) that already exist.
|
167
|
+
existing_files = self.get_exists(files)
|
168
|
+
|
169
|
+
# Determine which files need to be updated vs. inserted.
|
170
|
+
needs_update = existing_files
|
171
|
+
is_new = set(files) - existing_files
|
172
|
+
|
173
|
+
# Step 2: Bulk insert new rows.
|
174
|
+
new_values = [
|
175
|
+
{
|
176
|
+
"path": file.path_no_remote,
|
177
|
+
"name": file.name,
|
178
|
+
"size": file.size,
|
179
|
+
"mime_type": file.mime_type,
|
180
|
+
"mod_time": file.mod_time,
|
181
|
+
"suffix": file.real_suffix,
|
182
|
+
}
|
183
|
+
for file in is_new
|
184
|
+
]
|
185
|
+
with Session(self.engine) as session:
|
186
|
+
if new_values:
|
187
|
+
session.bulk_insert_mappings(self.FileEntryModel, new_values) # type: ignore
|
188
|
+
session.commit()
|
189
|
+
|
190
|
+
# Step 3: Bulk update existing rows.
|
191
|
+
# First, query the database for the primary keys of rows that match the unique keys in needs_update.
|
192
|
+
with Session(self.engine) as session:
|
193
|
+
# Collect all unique paths from files needing update.
|
194
|
+
update_paths = [file.path_no_remote for file in needs_update]
|
195
|
+
# Query for existing rows matching any of these paths.
|
196
|
+
db_entries = session.exec(
|
197
|
+
select(self.FileEntryModel).where(
|
198
|
+
self.FileEntryModel.path.in_(update_paths) # type: ignore
|
199
|
+
)
|
200
|
+
).all()
|
201
|
+
|
202
|
+
# Build a mapping from the unique key (path, name) to the primary key (id).
|
203
|
+
id_map = {(entry.path, entry.name): entry.id for entry in db_entries}
|
204
|
+
|
205
|
+
# Prepare bulk update mappings.
|
206
|
+
update_values = []
|
207
|
+
for file in needs_update:
|
208
|
+
key = (file.path_no_remote, file.name)
|
209
|
+
if key in id_map:
|
210
|
+
update_values.append(
|
211
|
+
{
|
212
|
+
"id": id_map[key],
|
213
|
+
"size": file.size,
|
214
|
+
"mime_type": file.mime_type,
|
215
|
+
"mod_time": file.mod_time,
|
216
|
+
"suffix": file.real_suffix,
|
217
|
+
}
|
218
|
+
)
|
219
|
+
if update_values:
|
220
|
+
session.bulk_update_mappings(self.FileEntryModel, update_values) # type: ignore
|
221
|
+
session.commit()
|
222
|
+
|
223
|
+
def get_exists(self, files: list[FileItem]) -> set[FileItem]:
|
224
|
+
"""Get file entries from the table that exist among the given files.
|
225
|
+
|
226
|
+
Args:
|
227
|
+
files: List of file entries
|
228
|
+
|
229
|
+
Returns:
|
230
|
+
Set of FileItem instances whose 'path_no_remote' exists in the table.
|
231
|
+
"""
|
232
|
+
# Extract unique paths from the input files.
|
233
|
+
paths = {file.path_no_remote for file in files}
|
234
|
+
|
235
|
+
with Session(self.engine) as session:
|
236
|
+
# Execute a single query to fetch all file paths in the table that match the input paths.
|
237
|
+
result = session.exec(
|
238
|
+
select(self.FileEntryModel.path).where(
|
239
|
+
self.FileEntryModel.path.in_(paths) # type: ignore
|
240
|
+
)
|
241
|
+
).all()
|
242
|
+
# Convert the result to a set for fast membership tests.
|
243
|
+
existing_paths = set(result)
|
244
|
+
|
245
|
+
# Return the set of FileItem objects that have a path in the existing_paths.
|
246
|
+
return {file for file in files if file.path_no_remote in existing_paths}
|
247
|
+
|
248
|
+
def get_all_files(self) -> list[FileItem]:
|
249
|
+
"""Get all files in the table.
|
250
|
+
|
251
|
+
Returns:
|
252
|
+
list: List of file entries
|
253
|
+
"""
|
254
|
+
# with Session(self.engine) as session:
|
255
|
+
# return session.exec(select(self.FileEntryModel)).all()
|
256
|
+
out: list[FileItem] = []
|
257
|
+
with Session(self.engine) as session:
|
258
|
+
query = session.exec(select(self.FileEntryModel)).all()
|
259
|
+
for item in query:
|
260
|
+
name = item.name # type: ignore
|
261
|
+
size = item.size # type: ignore
|
262
|
+
mime_type = item.mime_type # type: ignore
|
263
|
+
mod_time = item.mod_time # type: ignore
|
264
|
+
path = item.path # type: ignore
|
265
|
+
parent = os.path.dirname(path)
|
266
|
+
if parent == "/" or parent == ".":
|
267
|
+
parent = ""
|
268
|
+
o = FileItem(
|
269
|
+
remote=self.remote_name,
|
270
|
+
parent=parent,
|
271
|
+
name=name,
|
272
|
+
size=size,
|
273
|
+
mime_type=mime_type,
|
274
|
+
mod_time=mod_time,
|
275
|
+
)
|
276
|
+
out.append(o)
|
277
|
+
return out
|
rclone_api/db/models.py
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
"""
|
2
|
+
Database models for rclone_api.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from typing import Optional, Type
|
7
|
+
|
8
|
+
from sqlalchemy import BigInteger, Column
|
9
|
+
from sqlmodel import Field, SQLModel
|
10
|
+
|
11
|
+
|
12
|
+
# Meta table that indexes all repositories
|
13
|
+
class RepositoryMeta(SQLModel, table=True):
|
14
|
+
"""Repository metadata table."""
|
15
|
+
|
16
|
+
id: Optional[int] = Field(default=None, primary_key=True)
|
17
|
+
repo_name: str
|
18
|
+
file_table_name: str # The dedicated table name for file entries
|
19
|
+
|
20
|
+
|
21
|
+
# Base FileEntry model that will be extended
|
22
|
+
class FileEntry(SQLModel, ABC):
|
23
|
+
"""Base file entry model with common fields."""
|
24
|
+
|
25
|
+
id: Optional[int] = Field(default=None, primary_key=True)
|
26
|
+
path: str = Field(index=True, unique=True)
|
27
|
+
suffix: str = Field(index=True)
|
28
|
+
name: str
|
29
|
+
size: int = Field(sa_column=Column(BigInteger))
|
30
|
+
mime_type: str
|
31
|
+
mod_time: str
|
32
|
+
hash: Optional[str] = Field(default=None)
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
def table_name(self) -> str:
|
36
|
+
"""Return the table name for this file entry model."""
|
37
|
+
pass
|
38
|
+
|
39
|
+
|
40
|
+
# Factory to dynamically create a FileEntry model with a given table name
|
41
|
+
def create_file_entry_model(_table_name: str) -> Type[FileEntry]:
|
42
|
+
"""Create a file entry model with a given table name.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
table_name: Table name
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
Type[FileEntryBase]: File entry model class with specified table name
|
49
|
+
"""
|
50
|
+
|
51
|
+
class FileEntryConcrete(FileEntry, table=True):
|
52
|
+
__tablename__ = _table_name # type: ignore # dynamically set table name
|
53
|
+
|
54
|
+
def table_name(self) -> str:
|
55
|
+
return _table_name
|
56
|
+
|
57
|
+
return FileEntryConcrete
|
rclone_api/deprecated.py
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
import functools
|
2
|
+
import warnings
|
3
|
+
|
4
|
+
|
5
|
+
def deprecated(new_func_name: str):
|
6
|
+
"""Decorator to mark functions as deprecated.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
new_func_name: The name of the function that should be used instead.
|
10
|
+
"""
|
11
|
+
|
12
|
+
def decorator(func):
|
13
|
+
@functools.wraps(func)
|
14
|
+
def wrapper(*args, **kwargs):
|
15
|
+
warnings.warn(
|
16
|
+
f"{func.__name__}() is deprecated; use {new_func_name}() instead.",
|
17
|
+
DeprecationWarning,
|
18
|
+
stacklevel=2,
|
19
|
+
)
|
20
|
+
return func(*args, **kwargs)
|
21
|
+
|
22
|
+
return wrapper
|
23
|
+
|
24
|
+
return decorator
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from rclone_api.rclone_impl import RcloneImpl
|
2
|
+
from rclone_api.types import (
|
3
|
+
PartInfo,
|
4
|
+
)
|
5
|
+
|
6
|
+
|
7
|
+
def copy_file_parts_resumable(
|
8
|
+
self: RcloneImpl,
|
9
|
+
src: str, # src:/Bucket/path/myfile.large.zst
|
10
|
+
dst_dir: str, # dst:/Bucket/path/myfile.large.zst-parts/
|
11
|
+
part_infos: list[PartInfo] | None = None,
|
12
|
+
upload_threads: int = 10,
|
13
|
+
merge_threads: int = 5,
|
14
|
+
verbose: bool | None = None,
|
15
|
+
) -> Exception | None:
|
16
|
+
# _upload_parts
|
17
|
+
from rclone_api.s3.multipart.upload_parts_resumable import upload_parts_resumable
|
18
|
+
from rclone_api.s3.multipart.upload_parts_server_side_merge import (
|
19
|
+
s3_server_side_multi_part_merge,
|
20
|
+
)
|
21
|
+
|
22
|
+
if verbose is None:
|
23
|
+
verbose = self.get_verbose()
|
24
|
+
|
25
|
+
err: Exception | None = upload_parts_resumable(
|
26
|
+
self=self,
|
27
|
+
src=src,
|
28
|
+
dst_dir=dst_dir,
|
29
|
+
part_infos=part_infos,
|
30
|
+
threads=upload_threads,
|
31
|
+
)
|
32
|
+
if isinstance(err, Exception):
|
33
|
+
return err
|
34
|
+
if dst_dir.endswith("/"):
|
35
|
+
dst_dir = dst_dir[:-1]
|
36
|
+
dst_info = f"{dst_dir}/info.json"
|
37
|
+
err = s3_server_side_multi_part_merge(
|
38
|
+
rclone=self, info_path=dst_info, max_workers=merge_threads, verbose=verbose
|
39
|
+
)
|
40
|
+
if isinstance(err, Exception):
|
41
|
+
return err
|
42
|
+
return None
|
@@ -0,0 +1,116 @@
|
|
1
|
+
import random
|
2
|
+
from queue import Queue
|
3
|
+
from threading import Thread
|
4
|
+
from typing import Generator
|
5
|
+
|
6
|
+
from rclone_api import Dir
|
7
|
+
from rclone_api.dir_listing import DirListing
|
8
|
+
from rclone_api.remote import Remote
|
9
|
+
from rclone_api.types import Order
|
10
|
+
|
11
|
+
_MAX_OUT_QUEUE_SIZE = 50
|
12
|
+
|
13
|
+
|
14
|
+
def walk_runner_breadth_first(
|
15
|
+
dir: Dir,
|
16
|
+
max_depth: int,
|
17
|
+
out_queue: Queue[DirListing | None],
|
18
|
+
order: Order = Order.NORMAL,
|
19
|
+
) -> None:
|
20
|
+
queue: Queue[Dir] = Queue()
|
21
|
+
queue.put(dir)
|
22
|
+
try:
|
23
|
+
while not queue.empty():
|
24
|
+
current_dir = queue.get()
|
25
|
+
dirlisting = current_dir.ls(max_depth=0, order=order)
|
26
|
+
out_queue.put(dirlisting)
|
27
|
+
dirs = dirlisting.dirs
|
28
|
+
|
29
|
+
if max_depth != 0 and len(dirs) > 0:
|
30
|
+
for child in dirs:
|
31
|
+
queue.put(child)
|
32
|
+
if max_depth < 0:
|
33
|
+
continue
|
34
|
+
if max_depth > 0:
|
35
|
+
max_depth -= 1
|
36
|
+
out_queue.put(None)
|
37
|
+
except KeyboardInterrupt:
|
38
|
+
import _thread
|
39
|
+
|
40
|
+
out_queue.put(None)
|
41
|
+
|
42
|
+
_thread.interrupt_main()
|
43
|
+
|
44
|
+
|
45
|
+
def walk_runner_depth_first(
|
46
|
+
dir: Dir,
|
47
|
+
max_depth: int,
|
48
|
+
out_queue: Queue[DirListing | None],
|
49
|
+
order: Order = Order.NORMAL,
|
50
|
+
) -> None:
|
51
|
+
try:
|
52
|
+
stack = [(dir, max_depth)]
|
53
|
+
while stack:
|
54
|
+
current_dir, depth = stack.pop()
|
55
|
+
dirlisting = current_dir.ls()
|
56
|
+
if order == Order.REVERSE:
|
57
|
+
dirlisting.dirs.reverse()
|
58
|
+
if order == Order.RANDOM:
|
59
|
+
|
60
|
+
random.shuffle(dirlisting.dirs)
|
61
|
+
if depth != 0:
|
62
|
+
for subdir in dirlisting.dirs: # Process deeper directories first
|
63
|
+
# stack.append((child, depth - 1 if depth > 0 else depth))
|
64
|
+
next_depth = depth - 1 if depth > 0 else depth
|
65
|
+
walk_runner_depth_first(subdir, next_depth, out_queue, order=order)
|
66
|
+
out_queue.put(dirlisting)
|
67
|
+
out_queue.put(None)
|
68
|
+
except KeyboardInterrupt:
|
69
|
+
import _thread
|
70
|
+
|
71
|
+
out_queue.put(None)
|
72
|
+
_thread.interrupt_main()
|
73
|
+
|
74
|
+
|
75
|
+
def walk(
|
76
|
+
dir: Dir | Remote,
|
77
|
+
breadth_first: bool,
|
78
|
+
max_depth: int = -1,
|
79
|
+
order: Order = Order.NORMAL,
|
80
|
+
) -> Generator[DirListing, None, None]:
|
81
|
+
"""Walk through the given directory recursively.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
dir: Directory or Remote to walk through
|
85
|
+
max_depth: Maximum depth to traverse (-1 for unlimited)
|
86
|
+
|
87
|
+
Yields:
|
88
|
+
DirListing: Directory listing for each directory encountered
|
89
|
+
"""
|
90
|
+
try:
|
91
|
+
# Convert Remote to Dir if needed
|
92
|
+
if isinstance(dir, Remote):
|
93
|
+
dir = Dir(dir)
|
94
|
+
out_queue: Queue[DirListing | None] = Queue(maxsize=_MAX_OUT_QUEUE_SIZE)
|
95
|
+
|
96
|
+
def _task() -> None:
|
97
|
+
if breadth_first:
|
98
|
+
walk_runner_breadth_first(dir, max_depth, out_queue, order)
|
99
|
+
else:
|
100
|
+
walk_runner_depth_first(dir, max_depth, out_queue, order)
|
101
|
+
|
102
|
+
# Start worker thread
|
103
|
+
worker = Thread(
|
104
|
+
target=_task,
|
105
|
+
daemon=True,
|
106
|
+
)
|
107
|
+
worker.start()
|
108
|
+
|
109
|
+
while dirlisting := out_queue.get():
|
110
|
+
if dirlisting is None:
|
111
|
+
break
|
112
|
+
yield dirlisting
|
113
|
+
|
114
|
+
worker.join()
|
115
|
+
except KeyboardInterrupt:
|
116
|
+
pass
|
rclone_api/diff.py
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from enum import Enum
|
3
|
+
from queue import Queue
|
4
|
+
from threading import Thread
|
5
|
+
from typing import Generator
|
6
|
+
|
7
|
+
from rclone_api.process import Process
|
8
|
+
|
9
|
+
|
10
|
+
class DiffType(Enum):
|
11
|
+
EQUAL = "="
|
12
|
+
MISSING_ON_SRC = (
|
13
|
+
"-" # means path was missing on the source, so only in the destination
|
14
|
+
)
|
15
|
+
MISSING_ON_DST = (
|
16
|
+
"+" # means path was missing on the destination, so only in the source
|
17
|
+
)
|
18
|
+
DIFFERENT = "*" # means path was present in source and destination but different.
|
19
|
+
ERROR = "!" # means there was an error
|
20
|
+
|
21
|
+
|
22
|
+
class DiffOption(Enum):
|
23
|
+
COMBINED = "combined"
|
24
|
+
MISSING_ON_SRC = "missing-on-src"
|
25
|
+
MISSING_ON_DST = "missing-on-dst"
|
26
|
+
DIFFER = "differ"
|
27
|
+
MATCH = "match"
|
28
|
+
ERROR = "error"
|
29
|
+
|
30
|
+
|
31
|
+
@dataclass
|
32
|
+
class DiffItem:
|
33
|
+
type: DiffType
|
34
|
+
path: str
|
35
|
+
src_prefix: str
|
36
|
+
dst_prefix: str
|
37
|
+
|
38
|
+
def __str__(self) -> str:
|
39
|
+
return f"{self.type.value} {self.path}"
|
40
|
+
|
41
|
+
def __repr__(self) -> str:
|
42
|
+
return f"{self.type.name} {self.path}"
|
43
|
+
|
44
|
+
def full_str(self) -> str:
|
45
|
+
return f"{self.type.name} {self.src_prefix}/{self.path} {self.dst_prefix}/{self.path}"
|
46
|
+
|
47
|
+
def dst_path(self) -> str:
|
48
|
+
return f"{self.dst_prefix}/{self.path}"
|
49
|
+
|
50
|
+
def src_path(self) -> str:
|
51
|
+
return f"{self.src_prefix}/{self.path}"
|
52
|
+
|
53
|
+
|
54
|
+
def _parse_missing_on_src_dst(line: str) -> str | None:
|
55
|
+
if line.endswith("does-not-exist"):
|
56
|
+
# 2025/02/17 14:43:38 ERROR : zachs_video/breaking_ai_mind.mp4: file not in S3 bucket rclone-api-unit-test path does-not-exist
|
57
|
+
parts = line.split(" : ", 1)
|
58
|
+
if len(parts) < 1:
|
59
|
+
return None
|
60
|
+
right = parts[1]
|
61
|
+
file_path = right.split(":", 1)[0]
|
62
|
+
return file_path.strip()
|
63
|
+
return None
|
64
|
+
|
65
|
+
|
66
|
+
def _classify_diff(
|
67
|
+
line: str, src_slug: str, dst_slug: str, diff_option: DiffOption
|
68
|
+
) -> DiffItem | None:
|
69
|
+
def _new(type: DiffType, path: str) -> DiffItem:
|
70
|
+
return DiffItem(type, path, src_prefix=src_slug, dst_prefix=dst_slug)
|
71
|
+
|
72
|
+
if diff_option == DiffOption.COMBINED:
|
73
|
+
suffix = line[1:].strip() if len(line) > 0 else ""
|
74
|
+
if line.startswith(DiffType.EQUAL.value):
|
75
|
+
return _new(DiffType.EQUAL, suffix)
|
76
|
+
if line.startswith(DiffType.MISSING_ON_SRC.value):
|
77
|
+
return _new(DiffType.MISSING_ON_SRC, suffix)
|
78
|
+
if line.startswith(DiffType.MISSING_ON_DST.value):
|
79
|
+
return _new(DiffType.MISSING_ON_DST, suffix)
|
80
|
+
if line.startswith(DiffType.DIFFERENT.value):
|
81
|
+
return _new(DiffType.DIFFERENT, suffix)
|
82
|
+
if line.startswith(DiffType.ERROR.value):
|
83
|
+
return _new(DiffType.ERROR, suffix)
|
84
|
+
return None
|
85
|
+
if diff_option == DiffOption.MISSING_ON_SRC:
|
86
|
+
filename_src: str | None = _parse_missing_on_src_dst(line)
|
87
|
+
if filename_src is not None:
|
88
|
+
return _new(DiffType.MISSING_ON_SRC, filename_src)
|
89
|
+
return None
|
90
|
+
if diff_option == DiffOption.MISSING_ON_DST:
|
91
|
+
filename_dst: str | None = _parse_missing_on_src_dst(line)
|
92
|
+
if filename_dst is not None:
|
93
|
+
return _new(DiffType.MISSING_ON_DST, filename_dst)
|
94
|
+
return None
|
95
|
+
else:
|
96
|
+
raise ValueError(f"Unknown diff_option: {diff_option}")
|
97
|
+
|
98
|
+
|
99
|
+
def _async_diff_stream_from_running_process(
|
100
|
+
running_process: Process,
|
101
|
+
src_slug: str,
|
102
|
+
dst_slug: str,
|
103
|
+
diff_option: DiffOption,
|
104
|
+
output: Queue[DiffItem | None],
|
105
|
+
) -> None:
|
106
|
+
count = 0
|
107
|
+
first_few_lines: list[str] = []
|
108
|
+
try:
|
109
|
+
assert running_process.stdout is not None
|
110
|
+
n_max = 10
|
111
|
+
for line in iter(running_process.stdout.readline, b""):
|
112
|
+
try:
|
113
|
+
line_str = line.decode("utf-8").strip()
|
114
|
+
if len(first_few_lines) < n_max:
|
115
|
+
first_few_lines.append(line_str)
|
116
|
+
# _classify_line_type
|
117
|
+
diff_item: DiffItem | None = _classify_diff(
|
118
|
+
line_str, src_slug, dst_slug, diff_option
|
119
|
+
)
|
120
|
+
if diff_item is None:
|
121
|
+
# Some other output that we don't care about, debug print etc.
|
122
|
+
continue
|
123
|
+
output.put(diff_item)
|
124
|
+
count += 1
|
125
|
+
# print(f"unhandled: {line_str}")
|
126
|
+
except UnicodeDecodeError:
|
127
|
+
print("UnicodeDecodeError")
|
128
|
+
continue
|
129
|
+
except KeyboardInterrupt:
|
130
|
+
import _thread
|
131
|
+
|
132
|
+
print("KeyboardInterrupt")
|
133
|
+
_thread.interrupt_main()
|
134
|
+
except Exception as e:
|
135
|
+
import _thread
|
136
|
+
|
137
|
+
print(f"Error: {e}")
|
138
|
+
_thread.interrupt_main()
|
139
|
+
finally:
|
140
|
+
output.put(None)
|
141
|
+
|
142
|
+
|
143
|
+
def diff_stream_from_running_process(
|
144
|
+
running_process: Process,
|
145
|
+
src_slug: str,
|
146
|
+
dst_slug: str,
|
147
|
+
diff_option: DiffOption,
|
148
|
+
) -> Generator[DiffItem, None, None]:
|
149
|
+
output: Queue[DiffItem | None] = Queue()
|
150
|
+
# process_output_to_diff_stream(running_process, src_slug, dst_slug, output)
|
151
|
+
|
152
|
+
def _task() -> None:
|
153
|
+
_async_diff_stream_from_running_process(
|
154
|
+
running_process, src_slug, dst_slug, diff_option, output
|
155
|
+
)
|
156
|
+
|
157
|
+
thread = Thread(target=_task, daemon=True)
|
158
|
+
thread.start()
|
159
|
+
while True:
|
160
|
+
item = output.get()
|
161
|
+
if item is None:
|
162
|
+
break
|
163
|
+
yield item
|
164
|
+
thread.join(timeout=5)
|