rclone-api 1.3.18__tar.gz → 1.3.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rclone_api-1.3.18 → rclone_api-1.3.20}/PKG-INFO +1 -1
- {rclone_api-1.3.18 → rclone_api-1.3.20}/pyproject.toml +1 -1
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/__init__.py +4 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/db/db.py +277 -277
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/db/models.py +57 -57
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/file.py +1 -0
- rclone_api-1.3.20/src/rclone_api/http_server.py +233 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/mount_read_chunker.py +2 -1
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/profile/mount_copy_bytes.py +1 -1
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/rclone.py +101 -18
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/remote.py +3 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/s3/chunk_task.py +3 -1
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/s3/upload_file_multipart.py +32 -20
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/types.py +36 -8
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api.egg-info/PKG-INFO +1 -1
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api.egg-info/SOURCES.txt +2 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_copy_file_resumable_s3.py +1 -1
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_file_item.py +12 -0
- rclone_api-1.3.20/tests/test_serve_http.py +255 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.aiderignore +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.github/workflows/lint.yml +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.github/workflows/push_macos.yml +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.github/workflows/push_ubuntu.yml +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.github/workflows/push_win.yml +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.gitignore +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.pylintrc +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.vscode/launch.json +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.vscode/settings.json +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/.vscode/tasks.json +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/LICENSE +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/MANIFEST.in +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/README.md +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/clean +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/install +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/lint +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/requirements.testing.txt +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/setup.cfg +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/setup.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/assets/example.txt +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/cli.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/cmd/analyze.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/cmd/copy_large_s3.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/cmd/list_files.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/cmd/save_to_db.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/completed_process.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/config.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/convert.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/db/__init__.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/deprecated.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/diff.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/dir.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/dir_listing.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/exec.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/experimental/flags.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/experimental/flags_base.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/file_item.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/filelist.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/group_files.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/log.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/mount.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/process.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/rpath.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/s3/api.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/s3/basic_ops.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/s3/chunk_types.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/s3/create.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/s3/types.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/scan_missing_folders.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/util.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api/walk.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api.egg-info/dependency_links.txt +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api.egg-info/entry_points.txt +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api.egg-info/requires.txt +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/src/rclone_api.egg-info/top_level.txt +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/test +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/archive/test_paramiko.py.disabled +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_cmd_list_files.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_copy.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_copy_bytes.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_copy_files.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_db.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_diff.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_group_files.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_is_synced.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_ls.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_ls_stream_files.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_mount.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_mount_s3.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_obscure.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_rclone_config.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_remote_control.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_remotes.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_s3.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_scan_missing_folders.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_size_files.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_size_suffix.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tests/test_walk.py +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/tox.ini +0 -0
- {rclone_api-1.3.18 → rclone_api-1.3.20}/upload_package.sh +0 -0
|
@@ -9,6 +9,7 @@ from .dir import Dir
|
|
|
9
9
|
from .dir_listing import DirListing
|
|
10
10
|
from .file import File, FileItem
|
|
11
11
|
from .filelist import FileList
|
|
12
|
+
from .http_server import HttpFetcher, HttpServer, Range
|
|
12
13
|
|
|
13
14
|
# Import the configure_logging function to make it available at package level
|
|
14
15
|
from .log import configure_logging, setup_default_logging
|
|
@@ -45,6 +46,9 @@ __all__ = [
|
|
|
45
46
|
"SizeSuffix",
|
|
46
47
|
"configure_logging",
|
|
47
48
|
"log",
|
|
49
|
+
"HttpServer",
|
|
50
|
+
"Range",
|
|
51
|
+
"HttpFetcher",
|
|
48
52
|
]
|
|
49
53
|
|
|
50
54
|
setup_default_logging()
|
|
@@ -1,277 +1,277 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Database module for rclone_api.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
from threading import Lock
|
|
7
|
-
from typing import Optional
|
|
8
|
-
|
|
9
|
-
from sqlmodel import Session, SQLModel, create_engine, select
|
|
10
|
-
|
|
11
|
-
from rclone_api.db.models import RepositoryMeta, create_file_entry_model
|
|
12
|
-
from rclone_api.file import FileItem
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def _to_table_name(remote_name: str) -> str:
|
|
16
|
-
return (
|
|
17
|
-
"files_"
|
|
18
|
-
+ remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class DB:
|
|
23
|
-
"""Database class for rclone_api."""
|
|
24
|
-
|
|
25
|
-
def __init__(self, db_path_url: str):
|
|
26
|
-
"""Initialize the database.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
db_path: Path to the database file
|
|
30
|
-
"""
|
|
31
|
-
self.db_path_url = db_path_url
|
|
32
|
-
|
|
33
|
-
# When running multiple commands in parallel, the database connection may fail once
|
|
34
|
-
# when the database is first populated.
|
|
35
|
-
retries = 2
|
|
36
|
-
for _ in range(retries):
|
|
37
|
-
try:
|
|
38
|
-
self.engine = create_engine(db_path_url)
|
|
39
|
-
SQLModel.metadata.create_all(self.engine)
|
|
40
|
-
break
|
|
41
|
-
except Exception as e:
|
|
42
|
-
print(f"Failed to connect to database. Retrying... {e}")
|
|
43
|
-
else:
|
|
44
|
-
raise Exception("Failed to connect to database.")
|
|
45
|
-
self._cache: dict[str, DBRepo] = {}
|
|
46
|
-
self._cache_lock = Lock()
|
|
47
|
-
|
|
48
|
-
def drop_all(self) -> None:
|
|
49
|
-
"""Drop all tables in the database."""
|
|
50
|
-
SQLModel.metadata.drop_all(self.engine)
|
|
51
|
-
|
|
52
|
-
def close(self) -> None:
|
|
53
|
-
"""Close the database connection and release resources."""
|
|
54
|
-
if hasattr(self, "engine") and self.engine is not None:
|
|
55
|
-
self.engine.dispose()
|
|
56
|
-
|
|
57
|
-
def add_files(self, files: list[FileItem]) -> None:
|
|
58
|
-
"""Add files to the database.
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
remote_name: Name of the remote
|
|
62
|
-
files: List of file entries
|
|
63
|
-
"""
|
|
64
|
-
|
|
65
|
-
partition: dict[str, list[FileItem]] = {}
|
|
66
|
-
for file in files:
|
|
67
|
-
partition.setdefault(file.remote, []).append(file)
|
|
68
|
-
|
|
69
|
-
for remote_name, files in partition.items():
|
|
70
|
-
repo = self.get_or_create_repo(remote_name)
|
|
71
|
-
repo.insert_files(files)
|
|
72
|
-
|
|
73
|
-
def query_files(self, remote_name: str) -> list[FileItem]:
|
|
74
|
-
"""Query files from the database.
|
|
75
|
-
|
|
76
|
-
Args:
|
|
77
|
-
remote_name: Name of the remote
|
|
78
|
-
"""
|
|
79
|
-
repo = self.get_or_create_repo(remote_name)
|
|
80
|
-
files = repo.get_files()
|
|
81
|
-
out: list[FileItem] = []
|
|
82
|
-
for file in files:
|
|
83
|
-
out.append(file)
|
|
84
|
-
return out
|
|
85
|
-
|
|
86
|
-
def get_or_create_repo(self, remote_name: str) -> "DBRepo":
|
|
87
|
-
"""Get a table section for a remote.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
remote_name: Name of the remote
|
|
91
|
-
table_name: Optional table name, will be derived from remote_name if not provided
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
DBRepo: A table section for the remote
|
|
95
|
-
"""
|
|
96
|
-
with self._cache_lock:
|
|
97
|
-
if remote_name in self._cache:
|
|
98
|
-
return self._cache[remote_name]
|
|
99
|
-
table_name = _to_table_name(remote_name)
|
|
100
|
-
out = DBRepo(self.engine, remote_name, table_name)
|
|
101
|
-
self._cache[remote_name] = out
|
|
102
|
-
return out
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
class DBRepo:
|
|
106
|
-
"""Table repo remote."""
|
|
107
|
-
|
|
108
|
-
def __init__(self, engine, remote_name: str, table_name: Optional[str] = None):
|
|
109
|
-
"""Initialize a table section.
|
|
110
|
-
|
|
111
|
-
Args:
|
|
112
|
-
engine: SQLAlchemy engine
|
|
113
|
-
remote_name: Name of the remote
|
|
114
|
-
table_name: Optional table name, will be derived from remote_name if not provided
|
|
115
|
-
"""
|
|
116
|
-
self.engine = engine
|
|
117
|
-
self.remote_name = remote_name
|
|
118
|
-
|
|
119
|
-
# If table_name is not provided, derive one from the remote name.
|
|
120
|
-
if table_name is None:
|
|
121
|
-
# table_name = (
|
|
122
|
-
# "file_entries_"
|
|
123
|
-
# + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
124
|
-
# )
|
|
125
|
-
table_name = _to_table_name(remote_name)
|
|
126
|
-
self.table_name = table_name
|
|
127
|
-
|
|
128
|
-
# Check if repository exists in RepositoryMeta; if not, create a new entry.
|
|
129
|
-
with Session(self.engine) as session:
|
|
130
|
-
existing_repo = session.exec(
|
|
131
|
-
select(RepositoryMeta).where(
|
|
132
|
-
RepositoryMeta.repo_name == self.remote_name
|
|
133
|
-
)
|
|
134
|
-
).first()
|
|
135
|
-
if not existing_repo:
|
|
136
|
-
repo_meta = RepositoryMeta(
|
|
137
|
-
repo_name=self.remote_name, file_table_name=self.table_name
|
|
138
|
-
)
|
|
139
|
-
session.add(repo_meta)
|
|
140
|
-
session.commit()
|
|
141
|
-
|
|
142
|
-
# Dynamically create the file entry model and its table.
|
|
143
|
-
self.FileEntryModel = create_file_entry_model(self.table_name)
|
|
144
|
-
SQLModel.metadata.create_all(self.engine, tables=[self.FileEntryModel.__table__]) # type: ignore
|
|
145
|
-
|
|
146
|
-
def insert_file(self, file: FileItem) -> None:
|
|
147
|
-
"""Insert a file entry into the table.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
file: File entry
|
|
151
|
-
"""
|
|
152
|
-
return self.insert_files([file])
|
|
153
|
-
|
|
154
|
-
def insert_files(self, files: list[FileItem]) -> None:
|
|
155
|
-
"""
|
|
156
|
-
Insert multiple file entries into the table.
|
|
157
|
-
|
|
158
|
-
Three bulk operations are performed:
|
|
159
|
-
1. Select: Determine which files already exist.
|
|
160
|
-
2. Insert: Bulk-insert new file entries.
|
|
161
|
-
3. Update: Bulk-update existing file entries.
|
|
162
|
-
|
|
163
|
-
The FileEntryModel must define a unique constraint on (path, name) and have a primary key "id".
|
|
164
|
-
"""
|
|
165
|
-
# Step 1: Bulk select existing records.
|
|
166
|
-
# get_exists() returns a set of FileItem objects (based on path_no_remote and name) that already exist.
|
|
167
|
-
existing_files = self.get_exists(files)
|
|
168
|
-
|
|
169
|
-
# Determine which files need to be updated vs. inserted.
|
|
170
|
-
needs_update = existing_files
|
|
171
|
-
is_new = set(files) - existing_files
|
|
172
|
-
|
|
173
|
-
# Step 2: Bulk insert new rows.
|
|
174
|
-
new_values = [
|
|
175
|
-
{
|
|
176
|
-
"path": file.path_no_remote,
|
|
177
|
-
"name": file.name,
|
|
178
|
-
"size": file.size,
|
|
179
|
-
"mime_type": file.mime_type,
|
|
180
|
-
"mod_time": file.mod_time,
|
|
181
|
-
"suffix": file.real_suffix,
|
|
182
|
-
}
|
|
183
|
-
for file in is_new
|
|
184
|
-
]
|
|
185
|
-
with Session(self.engine) as session:
|
|
186
|
-
if new_values:
|
|
187
|
-
session.bulk_insert_mappings(self.FileEntryModel, new_values) # type: ignore
|
|
188
|
-
session.commit()
|
|
189
|
-
|
|
190
|
-
# Step 3: Bulk update existing rows.
|
|
191
|
-
# First, query the database for the primary keys of rows that match the unique keys in needs_update.
|
|
192
|
-
with Session(self.engine) as session:
|
|
193
|
-
# Collect all unique paths from files needing update.
|
|
194
|
-
update_paths = [file.path_no_remote for file in needs_update]
|
|
195
|
-
# Query for existing rows matching any of these paths.
|
|
196
|
-
db_entries = session.exec(
|
|
197
|
-
select(self.FileEntryModel).where(
|
|
198
|
-
self.FileEntryModel.path.in_(update_paths) # type: ignore
|
|
199
|
-
)
|
|
200
|
-
).all()
|
|
201
|
-
|
|
202
|
-
# Build a mapping from the unique key (path, name) to the primary key (id).
|
|
203
|
-
id_map = {(entry.path, entry.name): entry.id for entry in db_entries}
|
|
204
|
-
|
|
205
|
-
# Prepare bulk update mappings.
|
|
206
|
-
update_values = []
|
|
207
|
-
for file in needs_update:
|
|
208
|
-
key = (file.path_no_remote, file.name)
|
|
209
|
-
if key in id_map:
|
|
210
|
-
update_values.append(
|
|
211
|
-
{
|
|
212
|
-
"id": id_map[key],
|
|
213
|
-
"size": file.size,
|
|
214
|
-
"mime_type": file.mime_type,
|
|
215
|
-
"mod_time": file.mod_time,
|
|
216
|
-
"suffix": file.real_suffix,
|
|
217
|
-
}
|
|
218
|
-
)
|
|
219
|
-
if update_values:
|
|
220
|
-
session.bulk_update_mappings(self.FileEntryModel, update_values) # type: ignore
|
|
221
|
-
session.commit()
|
|
222
|
-
|
|
223
|
-
def get_exists(self, files: list[FileItem]) -> set[FileItem]:
|
|
224
|
-
"""Get file entries from the table that exist among the given files.
|
|
225
|
-
|
|
226
|
-
Args:
|
|
227
|
-
files: List of file entries
|
|
228
|
-
|
|
229
|
-
Returns:
|
|
230
|
-
Set of FileItem instances whose 'path_no_remote' exists in the table.
|
|
231
|
-
"""
|
|
232
|
-
# Extract unique paths from the input files.
|
|
233
|
-
paths = {file.path_no_remote for file in files}
|
|
234
|
-
|
|
235
|
-
with Session(self.engine) as session:
|
|
236
|
-
# Execute a single query to fetch all file paths in the table that match the input paths.
|
|
237
|
-
result = session.exec(
|
|
238
|
-
select(self.FileEntryModel.path).where(
|
|
239
|
-
self.FileEntryModel.path.in_(paths) # type: ignore
|
|
240
|
-
)
|
|
241
|
-
).all()
|
|
242
|
-
# Convert the result to a set for fast membership tests.
|
|
243
|
-
existing_paths = set(result)
|
|
244
|
-
|
|
245
|
-
# Return the set of FileItem objects that have a path in the existing_paths.
|
|
246
|
-
return {file for file in files if file.path_no_remote in existing_paths}
|
|
247
|
-
|
|
248
|
-
def get_files(self) -> list[FileItem]:
|
|
249
|
-
"""Get all files in the table.
|
|
250
|
-
|
|
251
|
-
Returns:
|
|
252
|
-
list: List of file entries
|
|
253
|
-
"""
|
|
254
|
-
# with Session(self.engine) as session:
|
|
255
|
-
# return session.exec(select(self.FileEntryModel)).all()
|
|
256
|
-
out: list[FileItem] = []
|
|
257
|
-
with Session(self.engine) as session:
|
|
258
|
-
query = session.exec(select(self.FileEntryModel)).all()
|
|
259
|
-
for item in query:
|
|
260
|
-
name = item.name # type: ignore
|
|
261
|
-
size = item.size # type: ignore
|
|
262
|
-
mime_type = item.mime_type # type: ignore
|
|
263
|
-
mod_time = item.mod_time # type: ignore
|
|
264
|
-
path = item.path # type: ignore
|
|
265
|
-
parent = os.path.dirname(path)
|
|
266
|
-
if parent == "/" or parent == ".":
|
|
267
|
-
parent = ""
|
|
268
|
-
o = FileItem(
|
|
269
|
-
remote=self.remote_name,
|
|
270
|
-
parent=parent,
|
|
271
|
-
name=name,
|
|
272
|
-
size=size,
|
|
273
|
-
mime_type=mime_type,
|
|
274
|
-
mod_time=mod_time,
|
|
275
|
-
)
|
|
276
|
-
out.append(o)
|
|
277
|
-
return out
|
|
1
|
+
"""
|
|
2
|
+
Database module for rclone_api.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from threading import Lock
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from sqlmodel import Session, SQLModel, create_engine, select
|
|
10
|
+
|
|
11
|
+
from rclone_api.db.models import RepositoryMeta, create_file_entry_model
|
|
12
|
+
from rclone_api.file import FileItem
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _to_table_name(remote_name: str) -> str:
|
|
16
|
+
return (
|
|
17
|
+
"files_"
|
|
18
|
+
+ remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DB:
|
|
23
|
+
"""Database class for rclone_api."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, db_path_url: str):
|
|
26
|
+
"""Initialize the database.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db_path: Path to the database file
|
|
30
|
+
"""
|
|
31
|
+
self.db_path_url = db_path_url
|
|
32
|
+
|
|
33
|
+
# When running multiple commands in parallel, the database connection may fail once
|
|
34
|
+
# when the database is first populated.
|
|
35
|
+
retries = 2
|
|
36
|
+
for _ in range(retries):
|
|
37
|
+
try:
|
|
38
|
+
self.engine = create_engine(db_path_url)
|
|
39
|
+
SQLModel.metadata.create_all(self.engine)
|
|
40
|
+
break
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(f"Failed to connect to database. Retrying... {e}")
|
|
43
|
+
else:
|
|
44
|
+
raise Exception("Failed to connect to database.")
|
|
45
|
+
self._cache: dict[str, DBRepo] = {}
|
|
46
|
+
self._cache_lock = Lock()
|
|
47
|
+
|
|
48
|
+
def drop_all(self) -> None:
|
|
49
|
+
"""Drop all tables in the database."""
|
|
50
|
+
SQLModel.metadata.drop_all(self.engine)
|
|
51
|
+
|
|
52
|
+
def close(self) -> None:
|
|
53
|
+
"""Close the database connection and release resources."""
|
|
54
|
+
if hasattr(self, "engine") and self.engine is not None:
|
|
55
|
+
self.engine.dispose()
|
|
56
|
+
|
|
57
|
+
def add_files(self, files: list[FileItem]) -> None:
|
|
58
|
+
"""Add files to the database.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
remote_name: Name of the remote
|
|
62
|
+
files: List of file entries
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
partition: dict[str, list[FileItem]] = {}
|
|
66
|
+
for file in files:
|
|
67
|
+
partition.setdefault(file.remote, []).append(file)
|
|
68
|
+
|
|
69
|
+
for remote_name, files in partition.items():
|
|
70
|
+
repo = self.get_or_create_repo(remote_name)
|
|
71
|
+
repo.insert_files(files)
|
|
72
|
+
|
|
73
|
+
def query_files(self, remote_name: str) -> list[FileItem]:
|
|
74
|
+
"""Query files from the database.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
remote_name: Name of the remote
|
|
78
|
+
"""
|
|
79
|
+
repo = self.get_or_create_repo(remote_name)
|
|
80
|
+
files = repo.get_files()
|
|
81
|
+
out: list[FileItem] = []
|
|
82
|
+
for file in files:
|
|
83
|
+
out.append(file)
|
|
84
|
+
return out
|
|
85
|
+
|
|
86
|
+
def get_or_create_repo(self, remote_name: str) -> "DBRepo":
|
|
87
|
+
"""Get a table section for a remote.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
remote_name: Name of the remote
|
|
91
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
DBRepo: A table section for the remote
|
|
95
|
+
"""
|
|
96
|
+
with self._cache_lock:
|
|
97
|
+
if remote_name in self._cache:
|
|
98
|
+
return self._cache[remote_name]
|
|
99
|
+
table_name = _to_table_name(remote_name)
|
|
100
|
+
out = DBRepo(self.engine, remote_name, table_name)
|
|
101
|
+
self._cache[remote_name] = out
|
|
102
|
+
return out
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class DBRepo:
|
|
106
|
+
"""Table repo remote."""
|
|
107
|
+
|
|
108
|
+
def __init__(self, engine, remote_name: str, table_name: Optional[str] = None):
|
|
109
|
+
"""Initialize a table section.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
engine: SQLAlchemy engine
|
|
113
|
+
remote_name: Name of the remote
|
|
114
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
|
115
|
+
"""
|
|
116
|
+
self.engine = engine
|
|
117
|
+
self.remote_name = remote_name
|
|
118
|
+
|
|
119
|
+
# If table_name is not provided, derive one from the remote name.
|
|
120
|
+
if table_name is None:
|
|
121
|
+
# table_name = (
|
|
122
|
+
# "file_entries_"
|
|
123
|
+
# + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
124
|
+
# )
|
|
125
|
+
table_name = _to_table_name(remote_name)
|
|
126
|
+
self.table_name = table_name
|
|
127
|
+
|
|
128
|
+
# Check if repository exists in RepositoryMeta; if not, create a new entry.
|
|
129
|
+
with Session(self.engine) as session:
|
|
130
|
+
existing_repo = session.exec(
|
|
131
|
+
select(RepositoryMeta).where(
|
|
132
|
+
RepositoryMeta.repo_name == self.remote_name
|
|
133
|
+
)
|
|
134
|
+
).first()
|
|
135
|
+
if not existing_repo:
|
|
136
|
+
repo_meta = RepositoryMeta(
|
|
137
|
+
repo_name=self.remote_name, file_table_name=self.table_name
|
|
138
|
+
)
|
|
139
|
+
session.add(repo_meta)
|
|
140
|
+
session.commit()
|
|
141
|
+
|
|
142
|
+
# Dynamically create the file entry model and its table.
|
|
143
|
+
self.FileEntryModel = create_file_entry_model(self.table_name)
|
|
144
|
+
SQLModel.metadata.create_all(self.engine, tables=[self.FileEntryModel.__table__]) # type: ignore
|
|
145
|
+
|
|
146
|
+
def insert_file(self, file: FileItem) -> None:
|
|
147
|
+
"""Insert a file entry into the table.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
file: File entry
|
|
151
|
+
"""
|
|
152
|
+
return self.insert_files([file])
|
|
153
|
+
|
|
154
|
+
def insert_files(self, files: list[FileItem]) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Insert multiple file entries into the table.
|
|
157
|
+
|
|
158
|
+
Three bulk operations are performed:
|
|
159
|
+
1. Select: Determine which files already exist.
|
|
160
|
+
2. Insert: Bulk-insert new file entries.
|
|
161
|
+
3. Update: Bulk-update existing file entries.
|
|
162
|
+
|
|
163
|
+
The FileEntryModel must define a unique constraint on (path, name) and have a primary key "id".
|
|
164
|
+
"""
|
|
165
|
+
# Step 1: Bulk select existing records.
|
|
166
|
+
# get_exists() returns a set of FileItem objects (based on path_no_remote and name) that already exist.
|
|
167
|
+
existing_files = self.get_exists(files)
|
|
168
|
+
|
|
169
|
+
# Determine which files need to be updated vs. inserted.
|
|
170
|
+
needs_update = existing_files
|
|
171
|
+
is_new = set(files) - existing_files
|
|
172
|
+
|
|
173
|
+
# Step 2: Bulk insert new rows.
|
|
174
|
+
new_values = [
|
|
175
|
+
{
|
|
176
|
+
"path": file.path_no_remote,
|
|
177
|
+
"name": file.name,
|
|
178
|
+
"size": file.size,
|
|
179
|
+
"mime_type": file.mime_type,
|
|
180
|
+
"mod_time": file.mod_time,
|
|
181
|
+
"suffix": file.real_suffix,
|
|
182
|
+
}
|
|
183
|
+
for file in is_new
|
|
184
|
+
]
|
|
185
|
+
with Session(self.engine) as session:
|
|
186
|
+
if new_values:
|
|
187
|
+
session.bulk_insert_mappings(self.FileEntryModel, new_values) # type: ignore
|
|
188
|
+
session.commit()
|
|
189
|
+
|
|
190
|
+
# Step 3: Bulk update existing rows.
|
|
191
|
+
# First, query the database for the primary keys of rows that match the unique keys in needs_update.
|
|
192
|
+
with Session(self.engine) as session:
|
|
193
|
+
# Collect all unique paths from files needing update.
|
|
194
|
+
update_paths = [file.path_no_remote for file in needs_update]
|
|
195
|
+
# Query for existing rows matching any of these paths.
|
|
196
|
+
db_entries = session.exec(
|
|
197
|
+
select(self.FileEntryModel).where(
|
|
198
|
+
self.FileEntryModel.path.in_(update_paths) # type: ignore
|
|
199
|
+
)
|
|
200
|
+
).all()
|
|
201
|
+
|
|
202
|
+
# Build a mapping from the unique key (path, name) to the primary key (id).
|
|
203
|
+
id_map = {(entry.path, entry.name): entry.id for entry in db_entries}
|
|
204
|
+
|
|
205
|
+
# Prepare bulk update mappings.
|
|
206
|
+
update_values = []
|
|
207
|
+
for file in needs_update:
|
|
208
|
+
key = (file.path_no_remote, file.name)
|
|
209
|
+
if key in id_map:
|
|
210
|
+
update_values.append(
|
|
211
|
+
{
|
|
212
|
+
"id": id_map[key],
|
|
213
|
+
"size": file.size,
|
|
214
|
+
"mime_type": file.mime_type,
|
|
215
|
+
"mod_time": file.mod_time,
|
|
216
|
+
"suffix": file.real_suffix,
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
if update_values:
|
|
220
|
+
session.bulk_update_mappings(self.FileEntryModel, update_values) # type: ignore
|
|
221
|
+
session.commit()
|
|
222
|
+
|
|
223
|
+
def get_exists(self, files: list[FileItem]) -> set[FileItem]:
|
|
224
|
+
"""Get file entries from the table that exist among the given files.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
files: List of file entries
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Set of FileItem instances whose 'path_no_remote' exists in the table.
|
|
231
|
+
"""
|
|
232
|
+
# Extract unique paths from the input files.
|
|
233
|
+
paths = {file.path_no_remote for file in files}
|
|
234
|
+
|
|
235
|
+
with Session(self.engine) as session:
|
|
236
|
+
# Execute a single query to fetch all file paths in the table that match the input paths.
|
|
237
|
+
result = session.exec(
|
|
238
|
+
select(self.FileEntryModel.path).where(
|
|
239
|
+
self.FileEntryModel.path.in_(paths) # type: ignore
|
|
240
|
+
)
|
|
241
|
+
).all()
|
|
242
|
+
# Convert the result to a set for fast membership tests.
|
|
243
|
+
existing_paths = set(result)
|
|
244
|
+
|
|
245
|
+
# Return the set of FileItem objects that have a path in the existing_paths.
|
|
246
|
+
return {file for file in files if file.path_no_remote in existing_paths}
|
|
247
|
+
|
|
248
|
+
def get_files(self) -> list[FileItem]:
|
|
249
|
+
"""Get all files in the table.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
list: List of file entries
|
|
253
|
+
"""
|
|
254
|
+
# with Session(self.engine) as session:
|
|
255
|
+
# return session.exec(select(self.FileEntryModel)).all()
|
|
256
|
+
out: list[FileItem] = []
|
|
257
|
+
with Session(self.engine) as session:
|
|
258
|
+
query = session.exec(select(self.FileEntryModel)).all()
|
|
259
|
+
for item in query:
|
|
260
|
+
name = item.name # type: ignore
|
|
261
|
+
size = item.size # type: ignore
|
|
262
|
+
mime_type = item.mime_type # type: ignore
|
|
263
|
+
mod_time = item.mod_time # type: ignore
|
|
264
|
+
path = item.path # type: ignore
|
|
265
|
+
parent = os.path.dirname(path)
|
|
266
|
+
if parent == "/" or parent == ".":
|
|
267
|
+
parent = ""
|
|
268
|
+
o = FileItem(
|
|
269
|
+
remote=self.remote_name,
|
|
270
|
+
parent=parent,
|
|
271
|
+
name=name,
|
|
272
|
+
size=size,
|
|
273
|
+
mime_type=mime_type,
|
|
274
|
+
mod_time=mod_time,
|
|
275
|
+
)
|
|
276
|
+
out.append(o)
|
|
277
|
+
return out
|