rclone-api 1.3.6__py2.py3-none-any.whl → 1.3.8__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +2 -1
- rclone_api/cmd/analyze.py +51 -0
- rclone_api/cmd/fill_db.py +68 -0
- rclone_api/db/__init__.py +3 -0
- rclone_api/db/db.py +268 -0
- rclone_api/db/models.py +57 -0
- rclone_api/file.py +81 -0
- rclone_api/file_item.py +68 -0
- rclone_api/process.py +8 -0
- rclone_api/rclone.py +104 -1
- {rclone_api-1.3.6.dist-info → rclone_api-1.3.8.dist-info}/METADATA +3 -1
- {rclone_api-1.3.6.dist-info → rclone_api-1.3.8.dist-info}/RECORD +16 -10
- {rclone_api-1.3.6.dist-info → rclone_api-1.3.8.dist-info}/LICENSE +0 -0
- {rclone_api-1.3.6.dist-info → rclone_api-1.3.8.dist-info}/WHEEL +0 -0
- {rclone_api-1.3.6.dist-info → rclone_api-1.3.8.dist-info}/entry_points.txt +0 -0
- {rclone_api-1.3.6.dist-info → rclone_api-1.3.8.dist-info}/top_level.txt +0 -0
rclone_api/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ from .config import Config, Parsed, Section
|
|
|
7
7
|
from .diff import DiffItem, DiffOption, DiffType
|
|
8
8
|
from .dir import Dir
|
|
9
9
|
from .dir_listing import DirListing
|
|
10
|
-
from .file import File
|
|
10
|
+
from .file import File, FileItem
|
|
11
11
|
from .filelist import FileList
|
|
12
12
|
|
|
13
13
|
# Import the configure_logging function to make it available at package level
|
|
@@ -28,6 +28,7 @@ __all__ = [
|
|
|
28
28
|
"RPath",
|
|
29
29
|
"DirListing",
|
|
30
30
|
"FileList",
|
|
31
|
+
"FileItem",
|
|
31
32
|
"Process",
|
|
32
33
|
"DiffItem",
|
|
33
34
|
"DiffType",
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from rclone_api import Rclone
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Args:
|
|
10
|
+
config: Path
|
|
11
|
+
path: str
|
|
12
|
+
|
|
13
|
+
def __post_init__(self):
|
|
14
|
+
if not self.config.exists():
|
|
15
|
+
raise FileNotFoundError(f"Config file not found: {self.config}")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def list_files(rclone: Rclone, path: str):
|
|
19
|
+
"""List files in a remote path."""
|
|
20
|
+
|
|
21
|
+
with rclone.ls_stream(path, fast_list=True) as files:
|
|
22
|
+
for file_item in files:
|
|
23
|
+
print(file_item.path, "", file_item.size, file_item.mod_time)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _parse_args() -> Args:
|
|
27
|
+
parser = argparse.ArgumentParser(description="List files in a remote path.")
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--config", help="Path to rclone config file", type=Path, default="rclone.conf"
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument("path", help="Remote path to list")
|
|
32
|
+
tmp = parser.parse_args()
|
|
33
|
+
return Args(config=tmp.config, path=tmp.path)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def main() -> int:
|
|
37
|
+
"""Main entry point."""
|
|
38
|
+
args = _parse_args()
|
|
39
|
+
path = args.path
|
|
40
|
+
rclone = Rclone(Path(args.config))
|
|
41
|
+
list_files(rclone, path)
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
import sys
|
|
47
|
+
|
|
48
|
+
cwd = Path(".").absolute()
|
|
49
|
+
print(f"cwd: {cwd}")
|
|
50
|
+
sys.argv.append("dst:TorrentBooks")
|
|
51
|
+
main()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
|
|
8
|
+
from rclone_api import Rclone
|
|
9
|
+
|
|
10
|
+
# load_dotenv()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# DB_URL = "sqlite:///data.db"
|
|
14
|
+
|
|
15
|
+
# os.environ["DB_URL"] = "sqlite:///data.db"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _db_url_from_env_or_raise() -> str:
|
|
19
|
+
load_dotenv()
|
|
20
|
+
db_url = os.getenv("DB_URL")
|
|
21
|
+
if db_url is None:
|
|
22
|
+
raise ValueError("DB_URL not set")
|
|
23
|
+
return db_url
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class Args:
|
|
28
|
+
config: Path
|
|
29
|
+
path: str
|
|
30
|
+
|
|
31
|
+
def __post_init__(self):
|
|
32
|
+
if not self.config.exists():
|
|
33
|
+
raise FileNotFoundError(f"Config file not found: {self.config}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def fill_db(rclone: Rclone, path: str):
|
|
37
|
+
"""List files in a remote path."""
|
|
38
|
+
# db = DB(_db_url_from_env_or_raise())
|
|
39
|
+
db_url = _db_url_from_env_or_raise()
|
|
40
|
+
rclone.save_to_db(src=path, db_url=db_url, fast_list=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _parse_args() -> Args:
|
|
44
|
+
parser = argparse.ArgumentParser(description="List files in a remote path.")
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--config", help="Path to rclone config file", type=Path, default="rclone.conf"
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument("path", help="Remote path to list")
|
|
49
|
+
tmp = parser.parse_args()
|
|
50
|
+
return Args(config=tmp.config, path=tmp.path)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def main() -> int:
|
|
54
|
+
"""Main entry point."""
|
|
55
|
+
args = _parse_args()
|
|
56
|
+
path = args.path
|
|
57
|
+
rclone = Rclone(Path(args.config))
|
|
58
|
+
fill_db(rclone, path)
|
|
59
|
+
return 0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
import sys
|
|
64
|
+
|
|
65
|
+
cwd = Path(".").absolute()
|
|
66
|
+
print(f"cwd: {cwd}")
|
|
67
|
+
sys.argv.append("dst:TorrentBooks/meta")
|
|
68
|
+
main()
|
rclone_api/db/db.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database module for rclone_api.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from threading import Lock
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from sqlmodel import Session, SQLModel, create_engine, select
|
|
10
|
+
|
|
11
|
+
from rclone_api.db.models import RepositoryMeta, create_file_entry_model
|
|
12
|
+
from rclone_api.file import FileItem
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _to_table_name(remote_name: str) -> str:
|
|
16
|
+
return (
|
|
17
|
+
"files_"
|
|
18
|
+
+ remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DB:
|
|
23
|
+
"""Database class for rclone_api."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, db_path_url: str):
|
|
26
|
+
"""Initialize the database.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db_path: Path to the database file
|
|
30
|
+
"""
|
|
31
|
+
self.db_path_url = db_path_url
|
|
32
|
+
self.engine = create_engine(db_path_url)
|
|
33
|
+
|
|
34
|
+
# Create the meta table
|
|
35
|
+
SQLModel.metadata.create_all(self.engine)
|
|
36
|
+
self._cache: dict[str, DBRepo] = {}
|
|
37
|
+
self._cache_lock = Lock()
|
|
38
|
+
|
|
39
|
+
def drop_all(self) -> None:
|
|
40
|
+
"""Drop all tables in the database."""
|
|
41
|
+
SQLModel.metadata.drop_all(self.engine)
|
|
42
|
+
|
|
43
|
+
def close(self) -> None:
|
|
44
|
+
"""Close the database connection and release resources."""
|
|
45
|
+
if hasattr(self, "engine") and self.engine is not None:
|
|
46
|
+
self.engine.dispose()
|
|
47
|
+
|
|
48
|
+
def add_files(self, files: list[FileItem]) -> None:
|
|
49
|
+
"""Add files to the database.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
remote_name: Name of the remote
|
|
53
|
+
files: List of file entries
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
partition: dict[str, list[FileItem]] = {}
|
|
57
|
+
for file in files:
|
|
58
|
+
partition.setdefault(file.remote, []).append(file)
|
|
59
|
+
|
|
60
|
+
for remote_name, files in partition.items():
|
|
61
|
+
repo = self.get_or_create_repo(remote_name)
|
|
62
|
+
repo.insert_files(files)
|
|
63
|
+
|
|
64
|
+
def query_files(self, remote_name: str) -> list[FileItem]:
|
|
65
|
+
"""Query files from the database.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
remote_name: Name of the remote
|
|
69
|
+
"""
|
|
70
|
+
repo = self.get_or_create_repo(remote_name)
|
|
71
|
+
files = repo.get_files()
|
|
72
|
+
out: list[FileItem] = []
|
|
73
|
+
for file in files:
|
|
74
|
+
out.append(file)
|
|
75
|
+
return out
|
|
76
|
+
|
|
77
|
+
def get_or_create_repo(self, remote_name: str) -> "DBRepo":
|
|
78
|
+
"""Get a table section for a remote.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
remote_name: Name of the remote
|
|
82
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
DBRepo: A table section for the remote
|
|
86
|
+
"""
|
|
87
|
+
with self._cache_lock:
|
|
88
|
+
if remote_name in self._cache:
|
|
89
|
+
return self._cache[remote_name]
|
|
90
|
+
table_name = _to_table_name(remote_name)
|
|
91
|
+
out = DBRepo(self.engine, remote_name, table_name)
|
|
92
|
+
self._cache[remote_name] = out
|
|
93
|
+
return out
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DBRepo:
|
|
97
|
+
"""Table repo remote."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, engine, remote_name: str, table_name: Optional[str] = None):
|
|
100
|
+
"""Initialize a table section.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
engine: SQLAlchemy engine
|
|
104
|
+
remote_name: Name of the remote
|
|
105
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
|
106
|
+
"""
|
|
107
|
+
self.engine = engine
|
|
108
|
+
self.remote_name = remote_name
|
|
109
|
+
|
|
110
|
+
# If table_name is not provided, derive one from the remote name.
|
|
111
|
+
if table_name is None:
|
|
112
|
+
# table_name = (
|
|
113
|
+
# "file_entries_"
|
|
114
|
+
# + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
115
|
+
# )
|
|
116
|
+
table_name = _to_table_name(remote_name)
|
|
117
|
+
self.table_name = table_name
|
|
118
|
+
|
|
119
|
+
# Check if repository exists in RepositoryMeta; if not, create a new entry.
|
|
120
|
+
with Session(self.engine) as session:
|
|
121
|
+
existing_repo = session.exec(
|
|
122
|
+
select(RepositoryMeta).where(
|
|
123
|
+
RepositoryMeta.repo_name == self.remote_name
|
|
124
|
+
)
|
|
125
|
+
).first()
|
|
126
|
+
if not existing_repo:
|
|
127
|
+
repo_meta = RepositoryMeta(
|
|
128
|
+
repo_name=self.remote_name, file_table_name=self.table_name
|
|
129
|
+
)
|
|
130
|
+
session.add(repo_meta)
|
|
131
|
+
session.commit()
|
|
132
|
+
|
|
133
|
+
# Dynamically create the file entry model and its table.
|
|
134
|
+
self.FileEntryModel = create_file_entry_model(self.table_name)
|
|
135
|
+
SQLModel.metadata.create_all(self.engine, tables=[self.FileEntryModel.__table__]) # type: ignore
|
|
136
|
+
|
|
137
|
+
def insert_file(self, file: FileItem) -> None:
|
|
138
|
+
"""Insert a file entry into the table.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
file: File entry
|
|
142
|
+
"""
|
|
143
|
+
return self.insert_files([file])
|
|
144
|
+
|
|
145
|
+
def insert_files(self, files: list[FileItem]) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Insert multiple file entries into the table.
|
|
148
|
+
|
|
149
|
+
Three bulk operations are performed:
|
|
150
|
+
1. Select: Determine which files already exist.
|
|
151
|
+
2. Insert: Bulk-insert new file entries.
|
|
152
|
+
3. Update: Bulk-update existing file entries.
|
|
153
|
+
|
|
154
|
+
The FileEntryModel must define a unique constraint on (path, name) and have a primary key "id".
|
|
155
|
+
"""
|
|
156
|
+
# Step 1: Bulk select existing records.
|
|
157
|
+
# get_exists() returns a set of FileItem objects (based on path_no_remote and name) that already exist.
|
|
158
|
+
existing_files = self.get_exists(files)
|
|
159
|
+
|
|
160
|
+
# Determine which files need to be updated vs. inserted.
|
|
161
|
+
needs_update = existing_files
|
|
162
|
+
is_new = set(files) - existing_files
|
|
163
|
+
|
|
164
|
+
# Step 2: Bulk insert new rows.
|
|
165
|
+
new_values = [
|
|
166
|
+
{
|
|
167
|
+
"path": file.path_no_remote,
|
|
168
|
+
"name": file.name,
|
|
169
|
+
"size": file.size,
|
|
170
|
+
"mime_type": file.mime_type,
|
|
171
|
+
"mod_time": file.mod_time,
|
|
172
|
+
"suffix": file.suffix,
|
|
173
|
+
}
|
|
174
|
+
for file in is_new
|
|
175
|
+
]
|
|
176
|
+
with Session(self.engine) as session:
|
|
177
|
+
if new_values:
|
|
178
|
+
session.bulk_insert_mappings(self.FileEntryModel, new_values) # type: ignore
|
|
179
|
+
session.commit()
|
|
180
|
+
|
|
181
|
+
# Step 3: Bulk update existing rows.
|
|
182
|
+
# First, query the database for the primary keys of rows that match the unique keys in needs_update.
|
|
183
|
+
with Session(self.engine) as session:
|
|
184
|
+
# Collect all unique paths from files needing update.
|
|
185
|
+
update_paths = [file.path_no_remote for file in needs_update]
|
|
186
|
+
# Query for existing rows matching any of these paths.
|
|
187
|
+
db_entries = session.exec(
|
|
188
|
+
select(self.FileEntryModel).where(
|
|
189
|
+
self.FileEntryModel.path.in_(update_paths) # type: ignore
|
|
190
|
+
)
|
|
191
|
+
).all()
|
|
192
|
+
|
|
193
|
+
# Build a mapping from the unique key (path, name) to the primary key (id).
|
|
194
|
+
id_map = {(entry.path, entry.name): entry.id for entry in db_entries}
|
|
195
|
+
|
|
196
|
+
# Prepare bulk update mappings.
|
|
197
|
+
update_values = []
|
|
198
|
+
for file in needs_update:
|
|
199
|
+
key = (file.path_no_remote, file.name)
|
|
200
|
+
if key in id_map:
|
|
201
|
+
update_values.append(
|
|
202
|
+
{
|
|
203
|
+
"id": id_map[key],
|
|
204
|
+
"size": file.size,
|
|
205
|
+
"mime_type": file.mime_type,
|
|
206
|
+
"mod_time": file.mod_time,
|
|
207
|
+
"suffix": file.suffix,
|
|
208
|
+
}
|
|
209
|
+
)
|
|
210
|
+
if update_values:
|
|
211
|
+
session.bulk_update_mappings(self.FileEntryModel, update_values) # type: ignore
|
|
212
|
+
session.commit()
|
|
213
|
+
|
|
214
|
+
def get_exists(self, files: list[FileItem]) -> set[FileItem]:
|
|
215
|
+
"""Get file entries from the table that exist among the given files.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
files: List of file entries
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Set of FileItem instances whose 'path_no_remote' exists in the table.
|
|
222
|
+
"""
|
|
223
|
+
# Extract unique paths from the input files.
|
|
224
|
+
paths = {file.path_no_remote for file in files}
|
|
225
|
+
|
|
226
|
+
with Session(self.engine) as session:
|
|
227
|
+
# Execute a single query to fetch all file paths in the table that match the input paths.
|
|
228
|
+
result = session.exec(
|
|
229
|
+
select(self.FileEntryModel.path).where(
|
|
230
|
+
self.FileEntryModel.path.in_(paths) # type: ignore
|
|
231
|
+
)
|
|
232
|
+
).all()
|
|
233
|
+
# Convert the result to a set for fast membership tests.
|
|
234
|
+
existing_paths = set(result)
|
|
235
|
+
|
|
236
|
+
# Return the set of FileItem objects that have a path in the existing_paths.
|
|
237
|
+
return {file for file in files if file.path_no_remote in existing_paths}
|
|
238
|
+
|
|
239
|
+
def get_files(self) -> list[FileItem]:
|
|
240
|
+
"""Get all files in the table.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
list: List of file entries
|
|
244
|
+
"""
|
|
245
|
+
# with Session(self.engine) as session:
|
|
246
|
+
# return session.exec(select(self.FileEntryModel)).all()
|
|
247
|
+
out: list[FileItem] = []
|
|
248
|
+
with Session(self.engine) as session:
|
|
249
|
+
query = session.exec(select(self.FileEntryModel)).all()
|
|
250
|
+
for item in query:
|
|
251
|
+
name = item.name # type: ignore
|
|
252
|
+
size = item.size # type: ignore
|
|
253
|
+
mime_type = item.mime_type # type: ignore
|
|
254
|
+
mod_time = item.mod_time # type: ignore
|
|
255
|
+
path = item.path # type: ignore
|
|
256
|
+
parent = os.path.dirname(path)
|
|
257
|
+
if parent == "/" or parent == ".":
|
|
258
|
+
parent = ""
|
|
259
|
+
o = FileItem(
|
|
260
|
+
remote=self.remote_name,
|
|
261
|
+
parent=parent,
|
|
262
|
+
name=name,
|
|
263
|
+
size=size,
|
|
264
|
+
mime_type=mime_type,
|
|
265
|
+
mod_time=mod_time,
|
|
266
|
+
)
|
|
267
|
+
out.append(o)
|
|
268
|
+
return out
|
rclone_api/db/models.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database models for rclone_api.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Optional, Type
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import BigInteger, Column
|
|
9
|
+
from sqlmodel import Field, SQLModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Meta table that indexes all repositories
|
|
13
|
+
class RepositoryMeta(SQLModel, table=True):
|
|
14
|
+
"""Repository metadata table."""
|
|
15
|
+
|
|
16
|
+
id: Optional[int] = Field(default=None, primary_key=True)
|
|
17
|
+
repo_name: str
|
|
18
|
+
file_table_name: str # The dedicated table name for file entries
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Base FileEntry model that will be extended
|
|
22
|
+
class FileEntry(SQLModel, ABC):
|
|
23
|
+
"""Base file entry model with common fields."""
|
|
24
|
+
|
|
25
|
+
id: Optional[int] = Field(default=None, primary_key=True)
|
|
26
|
+
path: str = Field(index=True, unique=True)
|
|
27
|
+
suffix: str = Field(index=True)
|
|
28
|
+
name: str
|
|
29
|
+
size: int = Field(sa_column=Column(BigInteger))
|
|
30
|
+
mime_type: str
|
|
31
|
+
mod_time: str
|
|
32
|
+
hash: Optional[str] = Field(default=None)
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def table_name(self) -> str:
|
|
36
|
+
"""Return the table name for this file entry model."""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Factory to dynamically create a FileEntry model with a given table name
|
|
41
|
+
def create_file_entry_model(_table_name: str) -> Type[FileEntry]:
|
|
42
|
+
"""Create a file entry model with a given table name.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
table_name: Table name
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Type[FileEntryBase]: File entry model class with specified table name
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
class FileEntryConcrete(FileEntry, table=True):
|
|
52
|
+
__tablename__ = _table_name # type: ignore # dynamically set table name
|
|
53
|
+
|
|
54
|
+
def table_name(self) -> str:
|
|
55
|
+
return _table_name
|
|
56
|
+
|
|
57
|
+
return FileEntryConcrete
|
rclone_api/file.py
CHANGED
|
@@ -1,8 +1,89 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import warnings
|
|
3
|
+
from dataclasses import dataclass
|
|
2
4
|
from pathlib import Path
|
|
3
5
|
|
|
4
6
|
from rclone_api.rpath import RPath
|
|
5
7
|
|
|
8
|
+
_STRING_INTERNER: dict[str, str] = {}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _intern(s: str) -> str:
|
|
12
|
+
return _STRING_INTERNER.setdefault(s, s)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# File is too complex, this is a simple dataclass that can be streamed out.
|
|
16
|
+
@dataclass
|
|
17
|
+
class FileItem:
|
|
18
|
+
"""Remote file dataclass."""
|
|
19
|
+
|
|
20
|
+
remote: str
|
|
21
|
+
parent: str
|
|
22
|
+
name: str
|
|
23
|
+
size: int
|
|
24
|
+
mime_type: str
|
|
25
|
+
mod_time: str
|
|
26
|
+
hash: str | None = None
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def path(self) -> str:
|
|
30
|
+
if self.parent == ".":
|
|
31
|
+
return f"{self.remote}/{self.name}"
|
|
32
|
+
else:
|
|
33
|
+
return f"{self.remote}/{self.parent}/{self.name}"
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def path_no_remote(self) -> str:
|
|
37
|
+
if self.parent == ".":
|
|
38
|
+
return f"{self.name}"
|
|
39
|
+
else:
|
|
40
|
+
return f"{self.parent}/{self.name}"
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def suffix(self) -> str:
|
|
44
|
+
return self._suffix
|
|
45
|
+
|
|
46
|
+
def __post_init__(self):
|
|
47
|
+
self.parent = _intern(self.parent)
|
|
48
|
+
self.mime_type = _intern(self.mime_type)
|
|
49
|
+
self.remote = _intern(self.remote)
|
|
50
|
+
self._suffix = _intern(Path(self.name).suffix)
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def from_json(remote: str, data: dict) -> "FileItem | None":
|
|
54
|
+
try:
|
|
55
|
+
path_str: str = data["Path"]
|
|
56
|
+
parent_path = Path(path_str).parent.as_posix()
|
|
57
|
+
name = data["Name"]
|
|
58
|
+
size = data["Size"]
|
|
59
|
+
mime_type = data["MimeType"]
|
|
60
|
+
mod_time = data["ModTime"]
|
|
61
|
+
|
|
62
|
+
return FileItem(
|
|
63
|
+
remote=remote,
|
|
64
|
+
parent=parent_path,
|
|
65
|
+
name=name,
|
|
66
|
+
size=size,
|
|
67
|
+
mime_type=mime_type,
|
|
68
|
+
mod_time=mod_time,
|
|
69
|
+
)
|
|
70
|
+
except KeyError:
|
|
71
|
+
warnings.warn(f"Invalid data: {data}")
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def from_json_str(remote: str, data: str) -> "FileItem | None":
|
|
76
|
+
try:
|
|
77
|
+
data_dict = json.loads(data)
|
|
78
|
+
return FileItem.from_json(remote, data_dict)
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
warnings.warn(f"Invalid JSON data: {data}")
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
# hasher for set membership
|
|
84
|
+
def __hash__(self) -> int:
|
|
85
|
+
return hash(self.path_no_remote)
|
|
86
|
+
|
|
6
87
|
|
|
7
88
|
class File:
|
|
8
89
|
"""Remote file dataclass."""
|
rclone_api/file_item.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import warnings
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
_STRING_INTERNER: dict[str, str] = {}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _intern(s: str) -> str:
|
|
10
|
+
return _STRING_INTERNER.setdefault(s, s)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# File is too complex, this is a simple dataclass that can be streamed out.
|
|
14
|
+
@dataclass
|
|
15
|
+
class FileItem:
|
|
16
|
+
"""Remote file dataclass."""
|
|
17
|
+
|
|
18
|
+
remote: str
|
|
19
|
+
parent: str
|
|
20
|
+
name: str
|
|
21
|
+
size: int
|
|
22
|
+
mime_type: str
|
|
23
|
+
mod_time: str
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def path(self) -> str:
|
|
27
|
+
return f"{self.remote}/{self.parent}/{self.name}"
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def suffix(self) -> str:
|
|
31
|
+
return self._suffix
|
|
32
|
+
|
|
33
|
+
def __post_init__(self):
|
|
34
|
+
self.parent = _intern(self.parent)
|
|
35
|
+
self.mime_type = _intern(self.mime_type)
|
|
36
|
+
suffix = Path(self.name).suffix
|
|
37
|
+
self._suffix = _intern(suffix)
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def from_json(data: dict) -> "FileItem | None":
|
|
41
|
+
try:
|
|
42
|
+
path_str: str = data["Path"]
|
|
43
|
+
parent_path = Path(path_str).parent.as_posix()
|
|
44
|
+
name = data["Name"]
|
|
45
|
+
size = data["Size"]
|
|
46
|
+
mime_type = data["MimeType"]
|
|
47
|
+
mod_time = data["ModTime"]
|
|
48
|
+
|
|
49
|
+
return FileItem(
|
|
50
|
+
remote="DUMMY",
|
|
51
|
+
parent=parent_path,
|
|
52
|
+
name=name,
|
|
53
|
+
size=size,
|
|
54
|
+
mime_type=mime_type,
|
|
55
|
+
mod_time=mod_time,
|
|
56
|
+
)
|
|
57
|
+
except KeyError:
|
|
58
|
+
warnings.warn(f"Invalid data: {data}")
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def from_json_str(data: str) -> "FileItem | None":
|
|
63
|
+
try:
|
|
64
|
+
data_dict = json.loads(data)
|
|
65
|
+
return FileItem.from_json(data_dict)
|
|
66
|
+
except json.JSONDecodeError:
|
|
67
|
+
warnings.warn(f"Invalid JSON data: {data}")
|
|
68
|
+
return None
|
rclone_api/process.py
CHANGED
|
@@ -72,6 +72,14 @@ class Process:
|
|
|
72
72
|
|
|
73
73
|
atexit.register(exit_cleanup)
|
|
74
74
|
|
|
75
|
+
def __enter__(self) -> "Process":
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
79
|
+
self.terminate()
|
|
80
|
+
self.wait()
|
|
81
|
+
self.cleanup()
|
|
82
|
+
|
|
75
83
|
def cleanup(self) -> None:
|
|
76
84
|
if self.tempdir and self.needs_cleanup:
|
|
77
85
|
try:
|
rclone_api/rclone.py
CHANGED
|
@@ -24,7 +24,7 @@ from rclone_api.deprecated import deprecated
|
|
|
24
24
|
from rclone_api.diff import DiffItem, DiffOption, diff_stream_from_running_process
|
|
25
25
|
from rclone_api.dir_listing import DirListing
|
|
26
26
|
from rclone_api.exec import RcloneExec
|
|
27
|
-
from rclone_api.file import File
|
|
27
|
+
from rclone_api.file import File, FileItem
|
|
28
28
|
from rclone_api.group_files import group_files
|
|
29
29
|
from rclone_api.mount import Mount, clean_mount, prepare_mount
|
|
30
30
|
from rclone_api.mount_read_chunker import MultiMountFileChunker
|
|
@@ -67,6 +67,77 @@ def _to_rclone_conf(config: Config | Path) -> Config:
|
|
|
67
67
|
return config
|
|
68
68
|
|
|
69
69
|
|
|
70
|
+
# class closing(AbstractContextManager):
|
|
71
|
+
# """Context to automatically close something at the end of a block.
|
|
72
|
+
|
|
73
|
+
# Code like this:
|
|
74
|
+
|
|
75
|
+
# with closing(<module>.open(<arguments>)) as f:
|
|
76
|
+
# <block>
|
|
77
|
+
|
|
78
|
+
# is equivalent to this:
|
|
79
|
+
|
|
80
|
+
# f = <module>.open(<arguments>)
|
|
81
|
+
# try:
|
|
82
|
+
# <block>
|
|
83
|
+
# finally:
|
|
84
|
+
# f.close()
|
|
85
|
+
|
|
86
|
+
# """
|
|
87
|
+
# def __init__(self, thing):
|
|
88
|
+
# self.thing = thing
|
|
89
|
+
# def __enter__(self):
|
|
90
|
+
# return self.thing
|
|
91
|
+
# def __exit__(self, *exc_info):
|
|
92
|
+
# self.thing.close()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# Process
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class FilesStream:
|
|
99
|
+
|
|
100
|
+
def __init__(self, path: str, process: Process) -> None:
|
|
101
|
+
self.path = path
|
|
102
|
+
self.process = process
|
|
103
|
+
|
|
104
|
+
def __enter__(self) -> "FilesStream":
|
|
105
|
+
self.process.__enter__()
|
|
106
|
+
return self
|
|
107
|
+
|
|
108
|
+
def __exit__(self, *exc_info):
|
|
109
|
+
self.process.__exit__(*exc_info)
|
|
110
|
+
|
|
111
|
+
def files(self) -> Generator[FileItem, None, None]:
|
|
112
|
+
for line in self.process.stdout:
|
|
113
|
+
linestr = line.decode("utf-8").strip()
|
|
114
|
+
if linestr.startswith("["):
|
|
115
|
+
continue
|
|
116
|
+
if linestr.endswith(","):
|
|
117
|
+
linestr = linestr[:-1]
|
|
118
|
+
if linestr.endswith("]"):
|
|
119
|
+
continue
|
|
120
|
+
fileitem: FileItem | None = FileItem.from_json_str(self.path, linestr)
|
|
121
|
+
if fileitem is None:
|
|
122
|
+
continue
|
|
123
|
+
yield fileitem
|
|
124
|
+
|
|
125
|
+
def files_paged(
|
|
126
|
+
self, page_size: int = 1000
|
|
127
|
+
) -> Generator[list[FileItem], None, None]:
|
|
128
|
+
page: list[FileItem] = []
|
|
129
|
+
for fileitem in self.files():
|
|
130
|
+
page.append(fileitem)
|
|
131
|
+
if len(page) >= page_size:
|
|
132
|
+
yield page
|
|
133
|
+
page = []
|
|
134
|
+
if len(page) > 0:
|
|
135
|
+
yield page
|
|
136
|
+
|
|
137
|
+
def __iter__(self) -> Generator[FileItem, None, None]:
|
|
138
|
+
return self.files()
|
|
139
|
+
|
|
140
|
+
|
|
70
141
|
class Rclone:
|
|
71
142
|
def __init__(
|
|
72
143
|
self, rclone_conf: Path | Config, rclone_exe: Path | None = None
|
|
@@ -147,6 +218,38 @@ class Rclone:
|
|
|
147
218
|
cp = self._run(cmd_list)
|
|
148
219
|
return cp.stdout.strip()
|
|
149
220
|
|
|
221
|
+
def ls_stream(
|
|
222
|
+
self,
|
|
223
|
+
path: str,
|
|
224
|
+
max_depth: int = -1,
|
|
225
|
+
fast_list: bool = False,
|
|
226
|
+
) -> FilesStream:
|
|
227
|
+
"""List files in the given path"""
|
|
228
|
+
cmd = ["lsjson", path]
|
|
229
|
+
if max_depth < 0:
|
|
230
|
+
cmd.append("--recursive")
|
|
231
|
+
elif max_depth > 0:
|
|
232
|
+
cmd += ["--max-depth", str(max_depth)]
|
|
233
|
+
if fast_list:
|
|
234
|
+
cmd.append("--fast-list")
|
|
235
|
+
streamer = FilesStream(path, self._launch_process(cmd, capture=True))
|
|
236
|
+
return streamer
|
|
237
|
+
|
|
238
|
+
def save_to_db(
|
|
239
|
+
self,
|
|
240
|
+
src: str,
|
|
241
|
+
db_url: str,
|
|
242
|
+
max_depth: int = -1,
|
|
243
|
+
fast_list: bool = False,
|
|
244
|
+
) -> None:
|
|
245
|
+
"""Save files to a database (sqlite, mysql, postgres)"""
|
|
246
|
+
from rclone_api.db import DB
|
|
247
|
+
|
|
248
|
+
db = DB(db_url)
|
|
249
|
+
with self.ls_stream(src, max_depth, fast_list) as stream:
|
|
250
|
+
for page in stream.files_paged(page_size=10000):
|
|
251
|
+
db.add_files(page)
|
|
252
|
+
|
|
150
253
|
def ls(
|
|
151
254
|
self,
|
|
152
255
|
path: Dir | Remote | str,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: rclone_api
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.8
|
|
4
4
|
Summary: rclone api in python
|
|
5
5
|
Home-page: https://github.com/zackees/rclone-api
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -14,6 +14,8 @@ Requires-Dist: python-dotenv>=1.0.0
|
|
|
14
14
|
Requires-Dist: certifi>=2025.1.31
|
|
15
15
|
Requires-Dist: psutil
|
|
16
16
|
Requires-Dist: boto3<=1.35.99,>=1.20.1
|
|
17
|
+
Requires-Dist: sqlmodel>=0.0.23
|
|
18
|
+
Requires-Dist: psycopg2-binary>=2.9.10
|
|
17
19
|
Dynamic: home-page
|
|
18
20
|
|
|
19
21
|
# rclone-api
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
rclone_api/__init__.py,sha256=
|
|
1
|
+
rclone_api/__init__.py,sha256=bJ6x-7ySj1kC7xjQJqEEA-0cr46RUh_tvIZsebGcyu4,1224
|
|
2
2
|
rclone_api/cli.py,sha256=dibfAZIh0kXWsBbfp3onKLjyZXo54mTzDjUdzJlDlWo,231
|
|
3
3
|
rclone_api/completed_process.py,sha256=_IZ8IWK7DM1_tsbDEkH6wPZ-bbcrgf7A7smls854pmg,1775
|
|
4
4
|
rclone_api/config.py,sha256=f6jEAxVorGFr31oHfcsu5AJTtOJj2wR5tTSsbGGZuIw,2558
|
|
@@ -8,14 +8,15 @@ rclone_api/diff.py,sha256=tMoJMAGmLSE6Q_7QhPf6PnCzb840djxMZtDmhc2GlGQ,5227
|
|
|
8
8
|
rclone_api/dir.py,sha256=i4h7LX5hB_WmVixxDRWL_l1nifvscrdWct_8Wx7wHZc,3540
|
|
9
9
|
rclone_api/dir_listing.py,sha256=GoziW8Sne6FY90MLNcb2aO3aaa3jphB6H8ExYrV0Ryo,1882
|
|
10
10
|
rclone_api/exec.py,sha256=Bq0gkyZ10mEY0FRyzNZgdN4FaWP9vpeCk1kjpg-gN_8,1083
|
|
11
|
-
rclone_api/file.py,sha256=
|
|
11
|
+
rclone_api/file.py,sha256=PfjKphl_wMAzXzSShuLGTm4EW134NKPfcYDCuxYn5GY,4057
|
|
12
|
+
rclone_api/file_item.py,sha256=cH-AQYsxedhNPp4c8NHY1ad4Z7St4yf_VGbmiGD59no,1770
|
|
12
13
|
rclone_api/filelist.py,sha256=xbiusvNgaB_b_kQOZoHMJJxn6TWGtPrWd2J042BI28o,767
|
|
13
14
|
rclone_api/group_files.py,sha256=H92xPW9lQnbNw5KbtZCl00bD6iRh9yRbCuxku4j_3dg,8036
|
|
14
15
|
rclone_api/log.py,sha256=VZHM7pNSXip2ZLBKMP7M1u-rp_F7zoafFDuR8CPUoKI,1271
|
|
15
16
|
rclone_api/mount.py,sha256=TE_VIBMW7J1UkF_6HRCt8oi_jGdMov4S51bm2OgxFAM,10045
|
|
16
17
|
rclone_api/mount_read_chunker.py,sha256=bi4N-VkyPdM4RWUYhIqJ71lQrXanck4PpF3pY8k2xnQ,4722
|
|
17
|
-
rclone_api/process.py,sha256=
|
|
18
|
-
rclone_api/rclone.py,sha256=
|
|
18
|
+
rclone_api/process.py,sha256=BGXJTZVT__jeaDyjN8_kRycliOhkBErMPdHO1hKRvJE,5271
|
|
19
|
+
rclone_api/rclone.py,sha256=359ARr5FutcEzzO7vSAwnCvWzdLcN_nR6PW_Eov1BO8,52096
|
|
19
20
|
rclone_api/remote.py,sha256=O9WDUFQy9f6oT1HdUbTixK2eg0xtBBm8k4Xl6aa6K00,431
|
|
20
21
|
rclone_api/rpath.py,sha256=8ZA_1wxWtskwcy0I8V2VbjKDmzPkiWd8Q2JQSvh-sYE,2586
|
|
21
22
|
rclone_api/scan_missing_folders.py,sha256=Kulca2Q6WZodt00ATFHkmqqInuoPvBkhTcS9703y6po,4740
|
|
@@ -23,8 +24,13 @@ rclone_api/types.py,sha256=XiiWoGXAzNxTEfRcszw3BeRF7ZATXHIAPFg2-aJzUfo,9926
|
|
|
23
24
|
rclone_api/util.py,sha256=F9Q3zbWRsgPF4NG6OWB63cZ7GVq82lsraP47gmmDohU,5416
|
|
24
25
|
rclone_api/walk.py,sha256=-54NVE8EJcCstwDoaC_UtHm73R2HrZwVwQmsnv55xNU,3369
|
|
25
26
|
rclone_api/assets/example.txt,sha256=lTBovRjiz0_TgtAtbA1C5hNi2ffbqnNPqkKg6UiKCT8,54
|
|
27
|
+
rclone_api/cmd/analyze.py,sha256=RHbvk1G5ZUc3qLqlm1AZEyQzd_W_ZjcbCNDvW4YpTKQ,1252
|
|
26
28
|
rclone_api/cmd/copy_large_s3.py,sha256=nOpAUAQN1mJnf4EIZCh4OVCW7Q4_EXJeLFVe6r_9rZA,3369
|
|
29
|
+
rclone_api/cmd/fill_db.py,sha256=vCnfbwkPdxeR-c8khyUtAJZs6LmRrlfW0x8_ZqiDKj8,1548
|
|
27
30
|
rclone_api/cmd/list_files.py,sha256=x8FHODEilwKqwdiU1jdkeJbLwOqUkUQuDWPo2u_zpf0,741
|
|
31
|
+
rclone_api/db/__init__.py,sha256=OSRUdnSWUlDTOHmjdjVmxYTUNpTbtaJ5Ll9sl-PfZg0,40
|
|
32
|
+
rclone_api/db/db.py,sha256=j39iDI6kiI3X0TwQwWkLeo3fnTVNlwuc6pdg6L7Nnq0,9932
|
|
33
|
+
rclone_api/db/models.py,sha256=unKEiu8l4R4UAEoncEbOHEda227DpXm-cWwRV6vwJXE,1657
|
|
28
34
|
rclone_api/experimental/flags.py,sha256=qCVD--fSTmzlk9hloRLr0q9elzAOFzPsvVpKM3aB1Mk,2739
|
|
29
35
|
rclone_api/experimental/flags_base.py,sha256=ajU_czkTcAxXYU-SlmiCfHY7aCQGHvpCLqJ-Z8uZLk0,2102
|
|
30
36
|
rclone_api/profile/mount_copy_bytes.py,sha256=nZtqMukLhSzHq64Pn1I8pXwjoraqWjCKey3WLAeubx0,9069
|
|
@@ -35,9 +41,9 @@ rclone_api/s3/chunk_types.py,sha256=oSWv8No9V3BeM7IcGnowyR2a7YrszdAXzEJlxaeZcp0,
|
|
|
35
41
|
rclone_api/s3/create.py,sha256=wgfkapv_j904CfKuWyiBIWJVxfAx_ftemFSUV14aT68,3149
|
|
36
42
|
rclone_api/s3/types.py,sha256=Elmh__gvZJyJyElYwMmvYZIBIunDJiTRAbEg21GmsRU,1604
|
|
37
43
|
rclone_api/s3/upload_file_multipart.py,sha256=d8ZWqO8n9wsqRF6JjmvAFmG1aCkFqdSB1L8yxe_5qiY,11669
|
|
38
|
-
rclone_api-1.3.
|
|
39
|
-
rclone_api-1.3.
|
|
40
|
-
rclone_api-1.3.
|
|
41
|
-
rclone_api-1.3.
|
|
42
|
-
rclone_api-1.3.
|
|
43
|
-
rclone_api-1.3.
|
|
44
|
+
rclone_api-1.3.8.dist-info/LICENSE,sha256=b6pOoifSXiUaz_lDS84vWlG3fr4yUKwB8fzkrH9R8bQ,1064
|
|
45
|
+
rclone_api-1.3.8.dist-info/METADATA,sha256=TrFUV6iwuk5auxFRQFShVRQXhSYIaAZhv3a2MMF82Zs,4609
|
|
46
|
+
rclone_api-1.3.8.dist-info/WHEEL,sha256=rF4EZyR2XVS6irmOHQIJx2SUqXLZKRMUrjsg8UwN-XQ,109
|
|
47
|
+
rclone_api-1.3.8.dist-info/entry_points.txt,sha256=TV8kwP3FRzYwUEr0RLC7aJh0W03SAefIJNXTJ-FdMIQ,200
|
|
48
|
+
rclone_api-1.3.8.dist-info/top_level.txt,sha256=EvZ7uuruUpe9RiUyEp25d1Keq7PWYNT0O_-mr8FCG5g,11
|
|
49
|
+
rclone_api-1.3.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|