rclone-api 1.3.6__tar.gz → 1.3.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.gitignore +3 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/PKG-INFO +3 -1
- {rclone_api-1.3.6 → rclone_api-1.3.8}/pyproject.toml +4 -3
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/__init__.py +2 -1
- rclone_api-1.3.8/src/rclone_api/cmd/analyze.py +51 -0
- rclone_api-1.3.8/src/rclone_api/cmd/fill_db.py +68 -0
- rclone_api-1.3.8/src/rclone_api/db/__init__.py +3 -0
- rclone_api-1.3.8/src/rclone_api/db/db.py +268 -0
- rclone_api-1.3.8/src/rclone_api/db/models.py +57 -0
- rclone_api-1.3.8/src/rclone_api/file.py +149 -0
- rclone_api-1.3.8/src/rclone_api/file_item.py +68 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/process.py +8 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/rclone.py +104 -1
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api.egg-info/PKG-INFO +3 -1
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api.egg-info/SOURCES.txt +8 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api.egg-info/requires.txt +2 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/test +1 -1
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_copy_bytes.py +3 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_copy_file_resumable_s3.py +1 -1
- rclone_api-1.3.8/tests/test_db.py +83 -0
- rclone_api-1.3.8/tests/test_ls_stream_files.py +90 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_s3.py +1 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_scan_missing_folders.py +1 -0
- rclone_api-1.3.6/src/rclone_api/file.py +0 -68
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.aiderignore +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.github/workflows/lint.yml +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.github/workflows/push_macos.yml +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.github/workflows/push_ubuntu.yml +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.github/workflows/push_win.yml +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.pylintrc +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.vscode/launch.json +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.vscode/settings.json +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/.vscode/tasks.json +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/LICENSE +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/MANIFEST.in +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/README.md +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/clean +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/install +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/lint +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/requirements.testing.txt +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/setup.cfg +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/setup.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/assets/example.txt +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/cli.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/cmd/copy_large_s3.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/cmd/list_files.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/completed_process.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/config.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/convert.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/deprecated.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/diff.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/dir.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/dir_listing.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/exec.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/experimental/flags.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/experimental/flags_base.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/filelist.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/group_files.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/log.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/mount.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/mount_read_chunker.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/profile/mount_copy_bytes.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/remote.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/rpath.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/s3/api.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/s3/basic_ops.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/s3/chunk_task.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/s3/chunk_types.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/s3/create.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/s3/types.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/s3/upload_file_multipart.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/scan_missing_folders.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/types.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/util.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api/walk.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api.egg-info/dependency_links.txt +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api.egg-info/entry_points.txt +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/src/rclone_api.egg-info/top_level.txt +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/archive/test_paramiko.py.disabled +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_cmd_list_files.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_copy.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_copy_files.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_diff.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_group_files.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_is_synced.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_ls.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_mount.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_mount_s3.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_obscure.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_rclone_config.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_remote_control.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_remotes.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_size_files.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_size_suffix.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tests/test_walk.py +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/tox.ini +0 -0
- {rclone_api-1.3.6 → rclone_api-1.3.8}/upload_package.sh +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: rclone_api
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.8
|
|
4
4
|
Summary: rclone api in python
|
|
5
5
|
Home-page: https://github.com/zackees/rclone-api
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -14,6 +14,8 @@ Requires-Dist: python-dotenv>=1.0.0
|
|
|
14
14
|
Requires-Dist: certifi>=2025.1.31
|
|
15
15
|
Requires-Dist: psutil
|
|
16
16
|
Requires-Dist: boto3<=1.35.99,>=1.20.1
|
|
17
|
+
Requires-Dist: sqlmodel>=0.0.23
|
|
18
|
+
Requires-Dist: psycopg2-binary>=2.9.10
|
|
17
19
|
Dynamic: home-page
|
|
18
20
|
|
|
19
21
|
# rclone-api
|
|
@@ -15,15 +15,16 @@ dependencies = [
|
|
|
15
15
|
"python-dotenv>=1.0.0",
|
|
16
16
|
"certifi>=2025.1.31",
|
|
17
17
|
"psutil",
|
|
18
|
-
|
|
19
18
|
# BOTO3 Library needs to be pinned to a specific version
|
|
20
19
|
# BackBlaze S3 fails with checksum header which it doesn't support after 1.35.99
|
|
21
20
|
# The 1.20.1 was the earliest one I checked that worked and is not the true lower bound.
|
|
22
21
|
"boto3>=1.20.1,<=1.35.99",
|
|
22
|
+
"sqlmodel>=0.0.23",
|
|
23
|
+
"psycopg2-binary>=2.9.10",
|
|
23
24
|
]
|
|
24
25
|
|
|
25
26
|
# Change this with the version number bump.
|
|
26
|
-
version = "1.3.
|
|
27
|
+
version = "1.3.8"
|
|
27
28
|
|
|
28
29
|
[tool.setuptools]
|
|
29
30
|
package-dir = {"" = "src"}
|
|
@@ -55,4 +56,4 @@ disable_error_code = ["import-untyped"]
|
|
|
55
56
|
[project.scripts]
|
|
56
57
|
rclone-api-listfiles = "rclone_api.cmd.list_files:main"
|
|
57
58
|
rclone-api-copylarge-s3 = "rclone_api.cmd.copy_large_s3:main"
|
|
58
|
-
rclone-api-profile-mount = "rclone_api.profile.mount_copy_bytes:main"
|
|
59
|
+
rclone-api-profile-mount = "rclone_api.profile.mount_copy_bytes:main"
|
|
@@ -7,7 +7,7 @@ from .config import Config, Parsed, Section
|
|
|
7
7
|
from .diff import DiffItem, DiffOption, DiffType
|
|
8
8
|
from .dir import Dir
|
|
9
9
|
from .dir_listing import DirListing
|
|
10
|
-
from .file import File
|
|
10
|
+
from .file import File, FileItem
|
|
11
11
|
from .filelist import FileList
|
|
12
12
|
|
|
13
13
|
# Import the configure_logging function to make it available at package level
|
|
@@ -28,6 +28,7 @@ __all__ = [
|
|
|
28
28
|
"RPath",
|
|
29
29
|
"DirListing",
|
|
30
30
|
"FileList",
|
|
31
|
+
"FileItem",
|
|
31
32
|
"Process",
|
|
32
33
|
"DiffItem",
|
|
33
34
|
"DiffType",
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from rclone_api import Rclone
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Args:
|
|
10
|
+
config: Path
|
|
11
|
+
path: str
|
|
12
|
+
|
|
13
|
+
def __post_init__(self):
|
|
14
|
+
if not self.config.exists():
|
|
15
|
+
raise FileNotFoundError(f"Config file not found: {self.config}")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def list_files(rclone: Rclone, path: str):
|
|
19
|
+
"""List files in a remote path."""
|
|
20
|
+
|
|
21
|
+
with rclone.ls_stream(path, fast_list=True) as files:
|
|
22
|
+
for file_item in files:
|
|
23
|
+
print(file_item.path, "", file_item.size, file_item.mod_time)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _parse_args() -> Args:
|
|
27
|
+
parser = argparse.ArgumentParser(description="List files in a remote path.")
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--config", help="Path to rclone config file", type=Path, default="rclone.conf"
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument("path", help="Remote path to list")
|
|
32
|
+
tmp = parser.parse_args()
|
|
33
|
+
return Args(config=tmp.config, path=tmp.path)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def main() -> int:
|
|
37
|
+
"""Main entry point."""
|
|
38
|
+
args = _parse_args()
|
|
39
|
+
path = args.path
|
|
40
|
+
rclone = Rclone(Path(args.config))
|
|
41
|
+
list_files(rclone, path)
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
import sys
|
|
47
|
+
|
|
48
|
+
cwd = Path(".").absolute()
|
|
49
|
+
print(f"cwd: {cwd}")
|
|
50
|
+
sys.argv.append("dst:TorrentBooks")
|
|
51
|
+
main()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
|
|
8
|
+
from rclone_api import Rclone
|
|
9
|
+
|
|
10
|
+
# load_dotenv()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# DB_URL = "sqlite:///data.db"
|
|
14
|
+
|
|
15
|
+
# os.environ["DB_URL"] = "sqlite:///data.db"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _db_url_from_env_or_raise() -> str:
|
|
19
|
+
load_dotenv()
|
|
20
|
+
db_url = os.getenv("DB_URL")
|
|
21
|
+
if db_url is None:
|
|
22
|
+
raise ValueError("DB_URL not set")
|
|
23
|
+
return db_url
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class Args:
|
|
28
|
+
config: Path
|
|
29
|
+
path: str
|
|
30
|
+
|
|
31
|
+
def __post_init__(self):
|
|
32
|
+
if not self.config.exists():
|
|
33
|
+
raise FileNotFoundError(f"Config file not found: {self.config}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def fill_db(rclone: Rclone, path: str):
|
|
37
|
+
"""List files in a remote path."""
|
|
38
|
+
# db = DB(_db_url_from_env_or_raise())
|
|
39
|
+
db_url = _db_url_from_env_or_raise()
|
|
40
|
+
rclone.save_to_db(src=path, db_url=db_url, fast_list=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _parse_args() -> Args:
|
|
44
|
+
parser = argparse.ArgumentParser(description="List files in a remote path.")
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--config", help="Path to rclone config file", type=Path, default="rclone.conf"
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument("path", help="Remote path to list")
|
|
49
|
+
tmp = parser.parse_args()
|
|
50
|
+
return Args(config=tmp.config, path=tmp.path)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def main() -> int:
|
|
54
|
+
"""Main entry point."""
|
|
55
|
+
args = _parse_args()
|
|
56
|
+
path = args.path
|
|
57
|
+
rclone = Rclone(Path(args.config))
|
|
58
|
+
fill_db(rclone, path)
|
|
59
|
+
return 0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
import sys
|
|
64
|
+
|
|
65
|
+
cwd = Path(".").absolute()
|
|
66
|
+
print(f"cwd: {cwd}")
|
|
67
|
+
sys.argv.append("dst:TorrentBooks/meta")
|
|
68
|
+
main()
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database module for rclone_api.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from threading import Lock
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from sqlmodel import Session, SQLModel, create_engine, select
|
|
10
|
+
|
|
11
|
+
from rclone_api.db.models import RepositoryMeta, create_file_entry_model
|
|
12
|
+
from rclone_api.file import FileItem
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _to_table_name(remote_name: str) -> str:
|
|
16
|
+
return (
|
|
17
|
+
"files_"
|
|
18
|
+
+ remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DB:
|
|
23
|
+
"""Database class for rclone_api."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, db_path_url: str):
|
|
26
|
+
"""Initialize the database.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db_path: Path to the database file
|
|
30
|
+
"""
|
|
31
|
+
self.db_path_url = db_path_url
|
|
32
|
+
self.engine = create_engine(db_path_url)
|
|
33
|
+
|
|
34
|
+
# Create the meta table
|
|
35
|
+
SQLModel.metadata.create_all(self.engine)
|
|
36
|
+
self._cache: dict[str, DBRepo] = {}
|
|
37
|
+
self._cache_lock = Lock()
|
|
38
|
+
|
|
39
|
+
def drop_all(self) -> None:
|
|
40
|
+
"""Drop all tables in the database."""
|
|
41
|
+
SQLModel.metadata.drop_all(self.engine)
|
|
42
|
+
|
|
43
|
+
def close(self) -> None:
|
|
44
|
+
"""Close the database connection and release resources."""
|
|
45
|
+
if hasattr(self, "engine") and self.engine is not None:
|
|
46
|
+
self.engine.dispose()
|
|
47
|
+
|
|
48
|
+
def add_files(self, files: list[FileItem]) -> None:
|
|
49
|
+
"""Add files to the database.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
remote_name: Name of the remote
|
|
53
|
+
files: List of file entries
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
partition: dict[str, list[FileItem]] = {}
|
|
57
|
+
for file in files:
|
|
58
|
+
partition.setdefault(file.remote, []).append(file)
|
|
59
|
+
|
|
60
|
+
for remote_name, files in partition.items():
|
|
61
|
+
repo = self.get_or_create_repo(remote_name)
|
|
62
|
+
repo.insert_files(files)
|
|
63
|
+
|
|
64
|
+
def query_files(self, remote_name: str) -> list[FileItem]:
|
|
65
|
+
"""Query files from the database.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
remote_name: Name of the remote
|
|
69
|
+
"""
|
|
70
|
+
repo = self.get_or_create_repo(remote_name)
|
|
71
|
+
files = repo.get_files()
|
|
72
|
+
out: list[FileItem] = []
|
|
73
|
+
for file in files:
|
|
74
|
+
out.append(file)
|
|
75
|
+
return out
|
|
76
|
+
|
|
77
|
+
def get_or_create_repo(self, remote_name: str) -> "DBRepo":
|
|
78
|
+
"""Get a table section for a remote.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
remote_name: Name of the remote
|
|
82
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
DBRepo: A table section for the remote
|
|
86
|
+
"""
|
|
87
|
+
with self._cache_lock:
|
|
88
|
+
if remote_name in self._cache:
|
|
89
|
+
return self._cache[remote_name]
|
|
90
|
+
table_name = _to_table_name(remote_name)
|
|
91
|
+
out = DBRepo(self.engine, remote_name, table_name)
|
|
92
|
+
self._cache[remote_name] = out
|
|
93
|
+
return out
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DBRepo:
|
|
97
|
+
"""Table repo remote."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, engine, remote_name: str, table_name: Optional[str] = None):
|
|
100
|
+
"""Initialize a table section.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
engine: SQLAlchemy engine
|
|
104
|
+
remote_name: Name of the remote
|
|
105
|
+
table_name: Optional table name, will be derived from remote_name if not provided
|
|
106
|
+
"""
|
|
107
|
+
self.engine = engine
|
|
108
|
+
self.remote_name = remote_name
|
|
109
|
+
|
|
110
|
+
# If table_name is not provided, derive one from the remote name.
|
|
111
|
+
if table_name is None:
|
|
112
|
+
# table_name = (
|
|
113
|
+
# "file_entries_"
|
|
114
|
+
# + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
|
|
115
|
+
# )
|
|
116
|
+
table_name = _to_table_name(remote_name)
|
|
117
|
+
self.table_name = table_name
|
|
118
|
+
|
|
119
|
+
# Check if repository exists in RepositoryMeta; if not, create a new entry.
|
|
120
|
+
with Session(self.engine) as session:
|
|
121
|
+
existing_repo = session.exec(
|
|
122
|
+
select(RepositoryMeta).where(
|
|
123
|
+
RepositoryMeta.repo_name == self.remote_name
|
|
124
|
+
)
|
|
125
|
+
).first()
|
|
126
|
+
if not existing_repo:
|
|
127
|
+
repo_meta = RepositoryMeta(
|
|
128
|
+
repo_name=self.remote_name, file_table_name=self.table_name
|
|
129
|
+
)
|
|
130
|
+
session.add(repo_meta)
|
|
131
|
+
session.commit()
|
|
132
|
+
|
|
133
|
+
# Dynamically create the file entry model and its table.
|
|
134
|
+
self.FileEntryModel = create_file_entry_model(self.table_name)
|
|
135
|
+
SQLModel.metadata.create_all(self.engine, tables=[self.FileEntryModel.__table__]) # type: ignore
|
|
136
|
+
|
|
137
|
+
def insert_file(self, file: FileItem) -> None:
|
|
138
|
+
"""Insert a file entry into the table.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
file: File entry
|
|
142
|
+
"""
|
|
143
|
+
return self.insert_files([file])
|
|
144
|
+
|
|
145
|
+
def insert_files(self, files: list[FileItem]) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Insert multiple file entries into the table.
|
|
148
|
+
|
|
149
|
+
Three bulk operations are performed:
|
|
150
|
+
1. Select: Determine which files already exist.
|
|
151
|
+
2. Insert: Bulk-insert new file entries.
|
|
152
|
+
3. Update: Bulk-update existing file entries.
|
|
153
|
+
|
|
154
|
+
The FileEntryModel must define a unique constraint on (path, name) and have a primary key "id".
|
|
155
|
+
"""
|
|
156
|
+
# Step 1: Bulk select existing records.
|
|
157
|
+
# get_exists() returns a set of FileItem objects (based on path_no_remote and name) that already exist.
|
|
158
|
+
existing_files = self.get_exists(files)
|
|
159
|
+
|
|
160
|
+
# Determine which files need to be updated vs. inserted.
|
|
161
|
+
needs_update = existing_files
|
|
162
|
+
is_new = set(files) - existing_files
|
|
163
|
+
|
|
164
|
+
# Step 2: Bulk insert new rows.
|
|
165
|
+
new_values = [
|
|
166
|
+
{
|
|
167
|
+
"path": file.path_no_remote,
|
|
168
|
+
"name": file.name,
|
|
169
|
+
"size": file.size,
|
|
170
|
+
"mime_type": file.mime_type,
|
|
171
|
+
"mod_time": file.mod_time,
|
|
172
|
+
"suffix": file.suffix,
|
|
173
|
+
}
|
|
174
|
+
for file in is_new
|
|
175
|
+
]
|
|
176
|
+
with Session(self.engine) as session:
|
|
177
|
+
if new_values:
|
|
178
|
+
session.bulk_insert_mappings(self.FileEntryModel, new_values) # type: ignore
|
|
179
|
+
session.commit()
|
|
180
|
+
|
|
181
|
+
# Step 3: Bulk update existing rows.
|
|
182
|
+
# First, query the database for the primary keys of rows that match the unique keys in needs_update.
|
|
183
|
+
with Session(self.engine) as session:
|
|
184
|
+
# Collect all unique paths from files needing update.
|
|
185
|
+
update_paths = [file.path_no_remote for file in needs_update]
|
|
186
|
+
# Query for existing rows matching any of these paths.
|
|
187
|
+
db_entries = session.exec(
|
|
188
|
+
select(self.FileEntryModel).where(
|
|
189
|
+
self.FileEntryModel.path.in_(update_paths) # type: ignore
|
|
190
|
+
)
|
|
191
|
+
).all()
|
|
192
|
+
|
|
193
|
+
# Build a mapping from the unique key (path, name) to the primary key (id).
|
|
194
|
+
id_map = {(entry.path, entry.name): entry.id for entry in db_entries}
|
|
195
|
+
|
|
196
|
+
# Prepare bulk update mappings.
|
|
197
|
+
update_values = []
|
|
198
|
+
for file in needs_update:
|
|
199
|
+
key = (file.path_no_remote, file.name)
|
|
200
|
+
if key in id_map:
|
|
201
|
+
update_values.append(
|
|
202
|
+
{
|
|
203
|
+
"id": id_map[key],
|
|
204
|
+
"size": file.size,
|
|
205
|
+
"mime_type": file.mime_type,
|
|
206
|
+
"mod_time": file.mod_time,
|
|
207
|
+
"suffix": file.suffix,
|
|
208
|
+
}
|
|
209
|
+
)
|
|
210
|
+
if update_values:
|
|
211
|
+
session.bulk_update_mappings(self.FileEntryModel, update_values) # type: ignore
|
|
212
|
+
session.commit()
|
|
213
|
+
|
|
214
|
+
def get_exists(self, files: list[FileItem]) -> set[FileItem]:
|
|
215
|
+
"""Get file entries from the table that exist among the given files.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
files: List of file entries
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Set of FileItem instances whose 'path_no_remote' exists in the table.
|
|
222
|
+
"""
|
|
223
|
+
# Extract unique paths from the input files.
|
|
224
|
+
paths = {file.path_no_remote for file in files}
|
|
225
|
+
|
|
226
|
+
with Session(self.engine) as session:
|
|
227
|
+
# Execute a single query to fetch all file paths in the table that match the input paths.
|
|
228
|
+
result = session.exec(
|
|
229
|
+
select(self.FileEntryModel.path).where(
|
|
230
|
+
self.FileEntryModel.path.in_(paths) # type: ignore
|
|
231
|
+
)
|
|
232
|
+
).all()
|
|
233
|
+
# Convert the result to a set for fast membership tests.
|
|
234
|
+
existing_paths = set(result)
|
|
235
|
+
|
|
236
|
+
# Return the set of FileItem objects that have a path in the existing_paths.
|
|
237
|
+
return {file for file in files if file.path_no_remote in existing_paths}
|
|
238
|
+
|
|
239
|
+
def get_files(self) -> list[FileItem]:
|
|
240
|
+
"""Get all files in the table.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
list: List of file entries
|
|
244
|
+
"""
|
|
245
|
+
# with Session(self.engine) as session:
|
|
246
|
+
# return session.exec(select(self.FileEntryModel)).all()
|
|
247
|
+
out: list[FileItem] = []
|
|
248
|
+
with Session(self.engine) as session:
|
|
249
|
+
query = session.exec(select(self.FileEntryModel)).all()
|
|
250
|
+
for item in query:
|
|
251
|
+
name = item.name # type: ignore
|
|
252
|
+
size = item.size # type: ignore
|
|
253
|
+
mime_type = item.mime_type # type: ignore
|
|
254
|
+
mod_time = item.mod_time # type: ignore
|
|
255
|
+
path = item.path # type: ignore
|
|
256
|
+
parent = os.path.dirname(path)
|
|
257
|
+
if parent == "/" or parent == ".":
|
|
258
|
+
parent = ""
|
|
259
|
+
o = FileItem(
|
|
260
|
+
remote=self.remote_name,
|
|
261
|
+
parent=parent,
|
|
262
|
+
name=name,
|
|
263
|
+
size=size,
|
|
264
|
+
mime_type=mime_type,
|
|
265
|
+
mod_time=mod_time,
|
|
266
|
+
)
|
|
267
|
+
out.append(o)
|
|
268
|
+
return out
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database models for rclone_api.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Optional, Type
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import BigInteger, Column
|
|
9
|
+
from sqlmodel import Field, SQLModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Meta table that indexes all repositories
|
|
13
|
+
class RepositoryMeta(SQLModel, table=True):
|
|
14
|
+
"""Repository metadata table."""
|
|
15
|
+
|
|
16
|
+
id: Optional[int] = Field(default=None, primary_key=True)
|
|
17
|
+
repo_name: str
|
|
18
|
+
file_table_name: str # The dedicated table name for file entries
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Base FileEntry model that will be extended
|
|
22
|
+
class FileEntry(SQLModel, ABC):
|
|
23
|
+
"""Base file entry model with common fields."""
|
|
24
|
+
|
|
25
|
+
id: Optional[int] = Field(default=None, primary_key=True)
|
|
26
|
+
path: str = Field(index=True, unique=True)
|
|
27
|
+
suffix: str = Field(index=True)
|
|
28
|
+
name: str
|
|
29
|
+
size: int = Field(sa_column=Column(BigInteger))
|
|
30
|
+
mime_type: str
|
|
31
|
+
mod_time: str
|
|
32
|
+
hash: Optional[str] = Field(default=None)
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def table_name(self) -> str:
|
|
36
|
+
"""Return the table name for this file entry model."""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Factory to dynamically create a FileEntry model with a given table name
|
|
41
|
+
def create_file_entry_model(_table_name: str) -> Type[FileEntry]:
|
|
42
|
+
"""Create a file entry model with a given table name.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
table_name: Table name
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Type[FileEntryBase]: File entry model class with specified table name
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
class FileEntryConcrete(FileEntry, table=True):
|
|
52
|
+
__tablename__ = _table_name # type: ignore # dynamically set table name
|
|
53
|
+
|
|
54
|
+
def table_name(self) -> str:
|
|
55
|
+
return _table_name
|
|
56
|
+
|
|
57
|
+
return FileEntryConcrete
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import warnings
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rclone_api.rpath import RPath
|
|
7
|
+
|
|
8
|
+
_STRING_INTERNER: dict[str, str] = {}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _intern(s: str) -> str:
|
|
12
|
+
return _STRING_INTERNER.setdefault(s, s)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# File is too complex, this is a simple dataclass that can be streamed out.
|
|
16
|
+
@dataclass
|
|
17
|
+
class FileItem:
|
|
18
|
+
"""Remote file dataclass."""
|
|
19
|
+
|
|
20
|
+
remote: str
|
|
21
|
+
parent: str
|
|
22
|
+
name: str
|
|
23
|
+
size: int
|
|
24
|
+
mime_type: str
|
|
25
|
+
mod_time: str
|
|
26
|
+
hash: str | None = None
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def path(self) -> str:
|
|
30
|
+
if self.parent == ".":
|
|
31
|
+
return f"{self.remote}/{self.name}"
|
|
32
|
+
else:
|
|
33
|
+
return f"{self.remote}/{self.parent}/{self.name}"
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def path_no_remote(self) -> str:
|
|
37
|
+
if self.parent == ".":
|
|
38
|
+
return f"{self.name}"
|
|
39
|
+
else:
|
|
40
|
+
return f"{self.parent}/{self.name}"
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def suffix(self) -> str:
|
|
44
|
+
return self._suffix
|
|
45
|
+
|
|
46
|
+
def __post_init__(self):
|
|
47
|
+
self.parent = _intern(self.parent)
|
|
48
|
+
self.mime_type = _intern(self.mime_type)
|
|
49
|
+
self.remote = _intern(self.remote)
|
|
50
|
+
self._suffix = _intern(Path(self.name).suffix)
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def from_json(remote: str, data: dict) -> "FileItem | None":
|
|
54
|
+
try:
|
|
55
|
+
path_str: str = data["Path"]
|
|
56
|
+
parent_path = Path(path_str).parent.as_posix()
|
|
57
|
+
name = data["Name"]
|
|
58
|
+
size = data["Size"]
|
|
59
|
+
mime_type = data["MimeType"]
|
|
60
|
+
mod_time = data["ModTime"]
|
|
61
|
+
|
|
62
|
+
return FileItem(
|
|
63
|
+
remote=remote,
|
|
64
|
+
parent=parent_path,
|
|
65
|
+
name=name,
|
|
66
|
+
size=size,
|
|
67
|
+
mime_type=mime_type,
|
|
68
|
+
mod_time=mod_time,
|
|
69
|
+
)
|
|
70
|
+
except KeyError:
|
|
71
|
+
warnings.warn(f"Invalid data: {data}")
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def from_json_str(remote: str, data: str) -> "FileItem | None":
|
|
76
|
+
try:
|
|
77
|
+
data_dict = json.loads(data)
|
|
78
|
+
return FileItem.from_json(remote, data_dict)
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
warnings.warn(f"Invalid JSON data: {data}")
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
# hasher for set membership
|
|
84
|
+
def __hash__(self) -> int:
|
|
85
|
+
return hash(self.path_no_remote)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class File:
|
|
89
|
+
"""Remote file dataclass."""
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
path: RPath,
|
|
94
|
+
) -> None:
|
|
95
|
+
self.path = path
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def name(self) -> str:
|
|
99
|
+
return self.path.name
|
|
100
|
+
|
|
101
|
+
def read_text(self) -> str:
|
|
102
|
+
"""Read the file contents as bytes.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
bytes: The file contents
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
RuntimeError: If no rclone instance is associated with this file
|
|
109
|
+
RuntimeError: If the path represents a directory
|
|
110
|
+
"""
|
|
111
|
+
if self.path.rclone is None:
|
|
112
|
+
raise RuntimeError("No rclone instance associated with this file")
|
|
113
|
+
if self.path.is_dir:
|
|
114
|
+
raise RuntimeError("Cannot read a directory as bytes")
|
|
115
|
+
|
|
116
|
+
result = self.path.rclone._run(["cat", self.path.path], check=True)
|
|
117
|
+
return result.stdout
|
|
118
|
+
|
|
119
|
+
def to_json(self) -> dict:
|
|
120
|
+
"""Convert the File to a JSON serializable dictionary."""
|
|
121
|
+
return self.path.to_json()
|
|
122
|
+
|
|
123
|
+
def to_string(self, include_remote: bool = True) -> str:
|
|
124
|
+
"""Convert the File to a string."""
|
|
125
|
+
# out = str(self.path)
|
|
126
|
+
remote = self.path.remote
|
|
127
|
+
rest = self.path.path
|
|
128
|
+
if include_remote:
|
|
129
|
+
return f"{remote.name}:{rest}"
|
|
130
|
+
return rest
|
|
131
|
+
|
|
132
|
+
def relative_to(self, prefix: str) -> str:
|
|
133
|
+
"""Return the relative path to the other directory."""
|
|
134
|
+
self_path = Path(str(self))
|
|
135
|
+
rel_path = self_path.relative_to(prefix)
|
|
136
|
+
return str(rel_path.as_posix())
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def size(self) -> int:
|
|
140
|
+
"""Get the size of the file."""
|
|
141
|
+
return self.path.size
|
|
142
|
+
|
|
143
|
+
def __str__(self) -> str:
|
|
144
|
+
return str(self.path)
|
|
145
|
+
|
|
146
|
+
def __repr__(self) -> str:
|
|
147
|
+
data = self.path.to_json()
|
|
148
|
+
data_str = json.dumps(data)
|
|
149
|
+
return data_str
|