rclone-api 1.3.7__tar.gz → 1.3.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {rclone_api-1.3.7 → rclone_api-1.3.9}/.gitignore +3 -0
  2. {rclone_api-1.3.7 → rclone_api-1.3.9}/PKG-INFO +3 -1
  3. {rclone_api-1.3.7 → rclone_api-1.3.9}/pyproject.toml +5 -3
  4. rclone_api-1.3.9/src/rclone_api/cmd/analyze.py +51 -0
  5. rclone_api-1.3.9/src/rclone_api/cmd/save_to_db.py +68 -0
  6. rclone_api-1.3.9/src/rclone_api/db/__init__.py +3 -0
  7. rclone_api-1.3.9/src/rclone_api/db/db.py +268 -0
  8. rclone_api-1.3.9/src/rclone_api/db/models.py +57 -0
  9. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/file.py +53 -9
  10. rclone_api-1.3.9/src/rclone_api/file_item.py +68 -0
  11. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/process.py +2 -0
  12. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/rclone.py +90 -15
  13. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api.egg-info/PKG-INFO +3 -1
  14. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api.egg-info/SOURCES.txt +7 -0
  15. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api.egg-info/entry_points.txt +1 -0
  16. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api.egg-info/requires.txt +2 -0
  17. {rclone_api-1.3.7 → rclone_api-1.3.9}/test +1 -1
  18. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_copy_bytes.py +3 -0
  19. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_copy_file_resumable_s3.py +1 -1
  20. rclone_api-1.3.9/tests/test_db.py +83 -0
  21. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_ls_stream_files.py +5 -2
  22. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_s3.py +1 -0
  23. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_scan_missing_folders.py +1 -0
  24. {rclone_api-1.3.7 → rclone_api-1.3.9}/.aiderignore +0 -0
  25. {rclone_api-1.3.7 → rclone_api-1.3.9}/.github/workflows/lint.yml +0 -0
  26. {rclone_api-1.3.7 → rclone_api-1.3.9}/.github/workflows/push_macos.yml +0 -0
  27. {rclone_api-1.3.7 → rclone_api-1.3.9}/.github/workflows/push_ubuntu.yml +0 -0
  28. {rclone_api-1.3.7 → rclone_api-1.3.9}/.github/workflows/push_win.yml +0 -0
  29. {rclone_api-1.3.7 → rclone_api-1.3.9}/.pylintrc +0 -0
  30. {rclone_api-1.3.7 → rclone_api-1.3.9}/.vscode/launch.json +0 -0
  31. {rclone_api-1.3.7 → rclone_api-1.3.9}/.vscode/settings.json +0 -0
  32. {rclone_api-1.3.7 → rclone_api-1.3.9}/.vscode/tasks.json +0 -0
  33. {rclone_api-1.3.7 → rclone_api-1.3.9}/LICENSE +0 -0
  34. {rclone_api-1.3.7 → rclone_api-1.3.9}/MANIFEST.in +0 -0
  35. {rclone_api-1.3.7 → rclone_api-1.3.9}/README.md +0 -0
  36. {rclone_api-1.3.7 → rclone_api-1.3.9}/clean +0 -0
  37. {rclone_api-1.3.7 → rclone_api-1.3.9}/install +0 -0
  38. {rclone_api-1.3.7 → rclone_api-1.3.9}/lint +0 -0
  39. {rclone_api-1.3.7 → rclone_api-1.3.9}/requirements.testing.txt +0 -0
  40. {rclone_api-1.3.7 → rclone_api-1.3.9}/setup.cfg +0 -0
  41. {rclone_api-1.3.7 → rclone_api-1.3.9}/setup.py +0 -0
  42. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/__init__.py +0 -0
  43. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/assets/example.txt +0 -0
  44. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/cli.py +0 -0
  45. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/cmd/copy_large_s3.py +0 -0
  46. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/cmd/list_files.py +0 -0
  47. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/completed_process.py +0 -0
  48. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/config.py +0 -0
  49. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/convert.py +0 -0
  50. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/deprecated.py +0 -0
  51. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/diff.py +0 -0
  52. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/dir.py +0 -0
  53. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/dir_listing.py +0 -0
  54. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/exec.py +0 -0
  55. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/experimental/flags.py +0 -0
  56. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/experimental/flags_base.py +0 -0
  57. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/filelist.py +0 -0
  58. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/group_files.py +0 -0
  59. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/log.py +0 -0
  60. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/mount.py +0 -0
  61. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/mount_read_chunker.py +0 -0
  62. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/profile/mount_copy_bytes.py +0 -0
  63. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/remote.py +0 -0
  64. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/rpath.py +0 -0
  65. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/s3/api.py +0 -0
  66. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/s3/basic_ops.py +0 -0
  67. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/s3/chunk_task.py +0 -0
  68. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/s3/chunk_types.py +0 -0
  69. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/s3/create.py +0 -0
  70. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/s3/types.py +0 -0
  71. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/s3/upload_file_multipart.py +0 -0
  72. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/scan_missing_folders.py +0 -0
  73. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/types.py +0 -0
  74. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/util.py +0 -0
  75. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api/walk.py +0 -0
  76. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api.egg-info/dependency_links.txt +0 -0
  77. {rclone_api-1.3.7 → rclone_api-1.3.9}/src/rclone_api.egg-info/top_level.txt +0 -0
  78. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/archive/test_paramiko.py.disabled +0 -0
  79. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_cmd_list_files.py +0 -0
  80. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_copy.py +0 -0
  81. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_copy_files.py +0 -0
  82. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_diff.py +0 -0
  83. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_group_files.py +0 -0
  84. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_is_synced.py +0 -0
  85. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_ls.py +0 -0
  86. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_mount.py +0 -0
  87. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_mount_s3.py +0 -0
  88. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_obscure.py +0 -0
  89. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_rclone_config.py +0 -0
  90. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_remote_control.py +0 -0
  91. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_remotes.py +0 -0
  92. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_size_files.py +0 -0
  93. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_size_suffix.py +0 -0
  94. {rclone_api-1.3.7 → rclone_api-1.3.9}/tests/test_walk.py +0 -0
  95. {rclone_api-1.3.7 → rclone_api-1.3.9}/tox.ini +0 -0
  96. {rclone_api-1.3.7 → rclone_api-1.3.9}/upload_package.sh +0 -0
@@ -151,3 +151,6 @@ mount
151
151
  t2.py
152
152
  chunk_store
153
153
  state.json
154
+ database.db
155
+ data.db
156
+ test.db
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rclone_api
3
- Version: 1.3.7
3
+ Version: 1.3.9
4
4
  Summary: rclone api in python
5
5
  Home-page: https://github.com/zackees/rclone-api
6
6
  License: BSD 3-Clause License
@@ -14,6 +14,8 @@ Requires-Dist: python-dotenv>=1.0.0
14
14
  Requires-Dist: certifi>=2025.1.31
15
15
  Requires-Dist: psutil
16
16
  Requires-Dist: boto3<=1.35.99,>=1.20.1
17
+ Requires-Dist: sqlmodel>=0.0.23
18
+ Requires-Dist: psycopg2-binary>=2.9.10
17
19
  Dynamic: home-page
18
20
 
19
21
  # rclone-api
@@ -15,15 +15,16 @@ dependencies = [
15
15
  "python-dotenv>=1.0.0",
16
16
  "certifi>=2025.1.31",
17
17
  "psutil",
18
-
19
18
  # BOTO3 Library needs to be pinned to a specific version
20
19
  # BackBlaze S3 fails with checksum header which it doesn't support after 1.35.99
21
20
  # The 1.20.1 was the earliest one I checked that worked and is not the true lower bound.
22
21
  "boto3>=1.20.1,<=1.35.99",
22
+ "sqlmodel>=0.0.23",
23
+ "psycopg2-binary>=2.9.10",
23
24
  ]
24
25
 
25
26
  # Change this with the version number bump.
26
- version = "1.3.7"
27
+ version = "1.3.9"
27
28
 
28
29
  [tool.setuptools]
29
30
  package-dir = {"" = "src"}
@@ -55,4 +56,5 @@ disable_error_code = ["import-untyped"]
55
56
  [project.scripts]
56
57
  rclone-api-listfiles = "rclone_api.cmd.list_files:main"
57
58
  rclone-api-copylarge-s3 = "rclone_api.cmd.copy_large_s3:main"
58
- rclone-api-profile-mount = "rclone_api.profile.mount_copy_bytes:main"
59
+ rclone-api-profile-mount = "rclone_api.profile.mount_copy_bytes:main"
60
+ rclone-api-save-to-db = "rclone_api.cmd.save_to_db:main"
@@ -0,0 +1,51 @@
1
+ import argparse
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ from rclone_api import Rclone
6
+
7
+
8
+ @dataclass
9
+ class Args:
10
+ config: Path
11
+ path: str
12
+
13
+ def __post_init__(self):
14
+ if not self.config.exists():
15
+ raise FileNotFoundError(f"Config file not found: {self.config}")
16
+
17
+
18
+ def list_files(rclone: Rclone, path: str):
19
+ """List files in a remote path."""
20
+
21
+ with rclone.ls_stream(path, fast_list=True) as files:
22
+ for file_item in files:
23
+ print(file_item.path, "", file_item.size, file_item.mod_time)
24
+
25
+
26
+ def _parse_args() -> Args:
27
+ parser = argparse.ArgumentParser(description="List files in a remote path.")
28
+ parser.add_argument(
29
+ "--config", help="Path to rclone config file", type=Path, default="rclone.conf"
30
+ )
31
+ parser.add_argument("path", help="Remote path to list")
32
+ tmp = parser.parse_args()
33
+ return Args(config=tmp.config, path=tmp.path)
34
+
35
+
36
+ def main() -> int:
37
+ """Main entry point."""
38
+ args = _parse_args()
39
+ path = args.path
40
+ rclone = Rclone(Path(args.config))
41
+ list_files(rclone, path)
42
+ return 0
43
+
44
+
45
+ if __name__ == "__main__":
46
+ import sys
47
+
48
+ cwd = Path(".").absolute()
49
+ print(f"cwd: {cwd}")
50
+ sys.argv.append("dst:TorrentBooks")
51
+ main()
@@ -0,0 +1,68 @@
1
+ import argparse
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from dotenv import load_dotenv
7
+
8
+ from rclone_api import Rclone
9
+
10
+ # load_dotenv()
11
+
12
+
13
+ # DB_URL = "sqlite:///data.db"
14
+
15
+ # os.environ["DB_URL"] = "sqlite:///data.db"
16
+
17
+
18
+ def _db_url_from_env_or_raise() -> str:
19
+ load_dotenv()
20
+ db_url = os.getenv("DB_URL")
21
+ if db_url is None:
22
+ raise ValueError("DB_URL not set")
23
+ return db_url
24
+
25
+
26
+ @dataclass
27
+ class Args:
28
+ config: Path
29
+ path: str
30
+
31
+ def __post_init__(self):
32
+ if not self.config.exists():
33
+ raise FileNotFoundError(f"Config file not found: {self.config}")
34
+
35
+
36
+ def fill_db(rclone: Rclone, path: str):
37
+ """List files in a remote path."""
38
+ # db = DB(_db_url_from_env_or_raise())
39
+ db_url = _db_url_from_env_or_raise()
40
+ rclone.save_to_db(src=path, db_url=db_url, fast_list=True)
41
+
42
+
43
+ def _parse_args() -> Args:
44
+ parser = argparse.ArgumentParser(description="List files in a remote path.")
45
+ parser.add_argument(
46
+ "--config", help="Path to rclone config file", type=Path, default="rclone.conf"
47
+ )
48
+ parser.add_argument("path", help="Remote path to list")
49
+ tmp = parser.parse_args()
50
+ return Args(config=tmp.config, path=tmp.path)
51
+
52
+
53
+ def main() -> int:
54
+ """Main entry point."""
55
+ args = _parse_args()
56
+ path = args.path
57
+ rclone = Rclone(Path(args.config))
58
+ fill_db(rclone, path)
59
+ return 0
60
+
61
+
62
+ if __name__ == "__main__":
63
+ import sys
64
+
65
+ cwd = Path(".").absolute()
66
+ print(f"cwd: {cwd}")
67
+ sys.argv.append("dst:TorrentBooks/meta")
68
+ main()
@@ -0,0 +1,3 @@
1
+ from .db import DB
2
+
3
+ __all__ = ["DB"]
@@ -0,0 +1,268 @@
1
+ """
2
+ Database module for rclone_api.
3
+ """
4
+
5
+ import os
6
+ from threading import Lock
7
+ from typing import Optional
8
+
9
+ from sqlmodel import Session, SQLModel, create_engine, select
10
+
11
+ from rclone_api.db.models import RepositoryMeta, create_file_entry_model
12
+ from rclone_api.file import FileItem
13
+
14
+
15
+ def _to_table_name(remote_name: str) -> str:
16
+ return (
17
+ "files_"
18
+ + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
19
+ )
20
+
21
+
22
+ class DB:
23
+ """Database class for rclone_api."""
24
+
25
+ def __init__(self, db_path_url: str):
26
+ """Initialize the database.
27
+
28
+ Args:
29
+ db_path: Path to the database file
30
+ """
31
+ self.db_path_url = db_path_url
32
+ self.engine = create_engine(db_path_url)
33
+
34
+ # Create the meta table
35
+ SQLModel.metadata.create_all(self.engine)
36
+ self._cache: dict[str, DBRepo] = {}
37
+ self._cache_lock = Lock()
38
+
39
+ def drop_all(self) -> None:
40
+ """Drop all tables in the database."""
41
+ SQLModel.metadata.drop_all(self.engine)
42
+
43
+ def close(self) -> None:
44
+ """Close the database connection and release resources."""
45
+ if hasattr(self, "engine") and self.engine is not None:
46
+ self.engine.dispose()
47
+
48
+ def add_files(self, files: list[FileItem]) -> None:
49
+ """Add files to the database.
50
+
51
+ Args:
52
+ remote_name: Name of the remote
53
+ files: List of file entries
54
+ """
55
+
56
+ partition: dict[str, list[FileItem]] = {}
57
+ for file in files:
58
+ partition.setdefault(file.remote, []).append(file)
59
+
60
+ for remote_name, files in partition.items():
61
+ repo = self.get_or_create_repo(remote_name)
62
+ repo.insert_files(files)
63
+
64
+ def query_files(self, remote_name: str) -> list[FileItem]:
65
+ """Query files from the database.
66
+
67
+ Args:
68
+ remote_name: Name of the remote
69
+ """
70
+ repo = self.get_or_create_repo(remote_name)
71
+ files = repo.get_files()
72
+ out: list[FileItem] = []
73
+ for file in files:
74
+ out.append(file)
75
+ return out
76
+
77
+ def get_or_create_repo(self, remote_name: str) -> "DBRepo":
78
+ """Get a table section for a remote.
79
+
80
+ Args:
81
+ remote_name: Name of the remote
82
+ table_name: Optional table name, will be derived from remote_name if not provided
83
+
84
+ Returns:
85
+ DBRepo: A table section for the remote
86
+ """
87
+ with self._cache_lock:
88
+ if remote_name in self._cache:
89
+ return self._cache[remote_name]
90
+ table_name = _to_table_name(remote_name)
91
+ out = DBRepo(self.engine, remote_name, table_name)
92
+ self._cache[remote_name] = out
93
+ return out
94
+
95
+
96
+ class DBRepo:
97
+ """Table repo remote."""
98
+
99
+ def __init__(self, engine, remote_name: str, table_name: Optional[str] = None):
100
+ """Initialize a table section.
101
+
102
+ Args:
103
+ engine: SQLAlchemy engine
104
+ remote_name: Name of the remote
105
+ table_name: Optional table name, will be derived from remote_name if not provided
106
+ """
107
+ self.engine = engine
108
+ self.remote_name = remote_name
109
+
110
+ # If table_name is not provided, derive one from the remote name.
111
+ if table_name is None:
112
+ # table_name = (
113
+ # "file_entries_"
114
+ # + remote_name.replace(":", "_").replace(" ", "_").replace("/", "_").lower()
115
+ # )
116
+ table_name = _to_table_name(remote_name)
117
+ self.table_name = table_name
118
+
119
+ # Check if repository exists in RepositoryMeta; if not, create a new entry.
120
+ with Session(self.engine) as session:
121
+ existing_repo = session.exec(
122
+ select(RepositoryMeta).where(
123
+ RepositoryMeta.repo_name == self.remote_name
124
+ )
125
+ ).first()
126
+ if not existing_repo:
127
+ repo_meta = RepositoryMeta(
128
+ repo_name=self.remote_name, file_table_name=self.table_name
129
+ )
130
+ session.add(repo_meta)
131
+ session.commit()
132
+
133
+ # Dynamically create the file entry model and its table.
134
+ self.FileEntryModel = create_file_entry_model(self.table_name)
135
+ SQLModel.metadata.create_all(self.engine, tables=[self.FileEntryModel.__table__]) # type: ignore
136
+
137
+ def insert_file(self, file: FileItem) -> None:
138
+ """Insert a file entry into the table.
139
+
140
+ Args:
141
+ file: File entry
142
+ """
143
+ return self.insert_files([file])
144
+
145
+ def insert_files(self, files: list[FileItem]) -> None:
146
+ """
147
+ Insert multiple file entries into the table.
148
+
149
+ Three bulk operations are performed:
150
+ 1. Select: Determine which files already exist.
151
+ 2. Insert: Bulk-insert new file entries.
152
+ 3. Update: Bulk-update existing file entries.
153
+
154
+ The FileEntryModel must define a unique constraint on (path, name) and have a primary key "id".
155
+ """
156
+ # Step 1: Bulk select existing records.
157
+ # get_exists() returns a set of FileItem objects (based on path_no_remote and name) that already exist.
158
+ existing_files = self.get_exists(files)
159
+
160
+ # Determine which files need to be updated vs. inserted.
161
+ needs_update = existing_files
162
+ is_new = set(files) - existing_files
163
+
164
+ # Step 2: Bulk insert new rows.
165
+ new_values = [
166
+ {
167
+ "path": file.path_no_remote,
168
+ "name": file.name,
169
+ "size": file.size,
170
+ "mime_type": file.mime_type,
171
+ "mod_time": file.mod_time,
172
+ "suffix": file.suffix,
173
+ }
174
+ for file in is_new
175
+ ]
176
+ with Session(self.engine) as session:
177
+ if new_values:
178
+ session.bulk_insert_mappings(self.FileEntryModel, new_values) # type: ignore
179
+ session.commit()
180
+
181
+ # Step 3: Bulk update existing rows.
182
+ # First, query the database for the primary keys of rows that match the unique keys in needs_update.
183
+ with Session(self.engine) as session:
184
+ # Collect all unique paths from files needing update.
185
+ update_paths = [file.path_no_remote for file in needs_update]
186
+ # Query for existing rows matching any of these paths.
187
+ db_entries = session.exec(
188
+ select(self.FileEntryModel).where(
189
+ self.FileEntryModel.path.in_(update_paths) # type: ignore
190
+ )
191
+ ).all()
192
+
193
+ # Build a mapping from the unique key (path, name) to the primary key (id).
194
+ id_map = {(entry.path, entry.name): entry.id for entry in db_entries}
195
+
196
+ # Prepare bulk update mappings.
197
+ update_values = []
198
+ for file in needs_update:
199
+ key = (file.path_no_remote, file.name)
200
+ if key in id_map:
201
+ update_values.append(
202
+ {
203
+ "id": id_map[key],
204
+ "size": file.size,
205
+ "mime_type": file.mime_type,
206
+ "mod_time": file.mod_time,
207
+ "suffix": file.suffix,
208
+ }
209
+ )
210
+ if update_values:
211
+ session.bulk_update_mappings(self.FileEntryModel, update_values) # type: ignore
212
+ session.commit()
213
+
214
+ def get_exists(self, files: list[FileItem]) -> set[FileItem]:
215
+ """Get file entries from the table that exist among the given files.
216
+
217
+ Args:
218
+ files: List of file entries
219
+
220
+ Returns:
221
+ Set of FileItem instances whose 'path_no_remote' exists in the table.
222
+ """
223
+ # Extract unique paths from the input files.
224
+ paths = {file.path_no_remote for file in files}
225
+
226
+ with Session(self.engine) as session:
227
+ # Execute a single query to fetch all file paths in the table that match the input paths.
228
+ result = session.exec(
229
+ select(self.FileEntryModel.path).where(
230
+ self.FileEntryModel.path.in_(paths) # type: ignore
231
+ )
232
+ ).all()
233
+ # Convert the result to a set for fast membership tests.
234
+ existing_paths = set(result)
235
+
236
+ # Return the set of FileItem objects that have a path in the existing_paths.
237
+ return {file for file in files if file.path_no_remote in existing_paths}
238
+
239
+ def get_files(self) -> list[FileItem]:
240
+ """Get all files in the table.
241
+
242
+ Returns:
243
+ list: List of file entries
244
+ """
245
+ # with Session(self.engine) as session:
246
+ # return session.exec(select(self.FileEntryModel)).all()
247
+ out: list[FileItem] = []
248
+ with Session(self.engine) as session:
249
+ query = session.exec(select(self.FileEntryModel)).all()
250
+ for item in query:
251
+ name = item.name # type: ignore
252
+ size = item.size # type: ignore
253
+ mime_type = item.mime_type # type: ignore
254
+ mod_time = item.mod_time # type: ignore
255
+ path = item.path # type: ignore
256
+ parent = os.path.dirname(path)
257
+ if parent == "/" or parent == ".":
258
+ parent = ""
259
+ o = FileItem(
260
+ remote=self.remote_name,
261
+ parent=parent,
262
+ name=name,
263
+ size=size,
264
+ mime_type=mime_type,
265
+ mod_time=mod_time,
266
+ )
267
+ out.append(o)
268
+ return out
@@ -0,0 +1,57 @@
1
+ """
2
+ Database models for rclone_api.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Optional, Type
7
+
8
+ from sqlalchemy import BigInteger, Column
9
+ from sqlmodel import Field, SQLModel
10
+
11
+
12
+ # Meta table that indexes all repositories
13
+ class RepositoryMeta(SQLModel, table=True):
14
+ """Repository metadata table."""
15
+
16
+ id: Optional[int] = Field(default=None, primary_key=True)
17
+ repo_name: str
18
+ file_table_name: str # The dedicated table name for file entries
19
+
20
+
21
+ # Base FileEntry model that will be extended
22
+ class FileEntry(SQLModel, ABC):
23
+ """Base file entry model with common fields."""
24
+
25
+ id: Optional[int] = Field(default=None, primary_key=True)
26
+ path: str = Field(index=True, unique=True)
27
+ suffix: str = Field(index=True)
28
+ name: str
29
+ size: int = Field(sa_column=Column(BigInteger))
30
+ mime_type: str
31
+ mod_time: str
32
+ hash: Optional[str] = Field(default=None)
33
+
34
+ @abstractmethod
35
+ def table_name(self) -> str:
36
+ """Return the table name for this file entry model."""
37
+ pass
38
+
39
+
40
+ # Factory to dynamically create a FileEntry model with a given table name
41
+ def create_file_entry_model(_table_name: str) -> Type[FileEntry]:
42
+ """Create a file entry model with a given table name.
43
+
44
+ Args:
45
+ table_name: Table name
46
+
47
+ Returns:
48
+ Type[FileEntryBase]: File entry model class with specified table name
49
+ """
50
+
51
+ class FileEntryConcrete(FileEntry, table=True):
52
+ __tablename__ = _table_name # type: ignore # dynamically set table name
53
+
54
+ def table_name(self) -> str:
55
+ return _table_name
56
+
57
+ return FileEntryConcrete
@@ -5,41 +5,85 @@ from pathlib import Path
5
5
 
6
6
  from rclone_api.rpath import RPath
7
7
 
8
+ _STRING_INTERNER: dict[str, str] = {}
9
+
10
+
11
+ def _intern(s: str) -> str:
12
+ return _STRING_INTERNER.setdefault(s, s)
13
+
8
14
 
9
15
  # File is too complex, this is a simple dataclass that can be streamed out.
10
16
  @dataclass
11
17
  class FileItem:
12
18
  """Remote file dataclass."""
13
19
 
14
- path: str
20
+ remote: str
21
+ parent: str
15
22
  name: str
16
23
  size: int
17
24
  mime_type: str
18
25
  mod_time: str
26
+ hash: str | None = None
27
+
28
+ @property
29
+ def path(self) -> str:
30
+ if self.parent == ".":
31
+ return f"{self.remote}/{self.name}"
32
+ else:
33
+ return f"{self.remote}/{self.parent}/{self.name}"
34
+
35
+ @property
36
+ def path_no_remote(self) -> str:
37
+ if self.parent == ".":
38
+ return f"{self.name}"
39
+ else:
40
+ return f"{self.parent}/{self.name}"
41
+
42
+ @property
43
+ def suffix(self) -> str:
44
+ return self._suffix
45
+
46
+ def __post_init__(self):
47
+ self.parent = _intern(self.parent)
48
+ self.mime_type = _intern(self.mime_type)
49
+ self.remote = _intern(self.remote)
50
+ self._suffix = _intern(Path(self.name).suffix)
19
51
 
20
52
  @staticmethod
21
- def from_json(data: dict) -> "FileItem | None":
53
+ def from_json(remote: str, data: dict) -> "FileItem | None":
22
54
  try:
55
+ path_str: str = data["Path"]
56
+ parent_path = Path(path_str).parent.as_posix()
57
+ name = data["Name"]
58
+ size = data["Size"]
59
+ mime_type = data["MimeType"]
60
+ mod_time = data["ModTime"]
61
+
23
62
  return FileItem(
24
- data["Path"],
25
- data["Name"],
26
- data["Size"],
27
- data["MimeType"],
28
- data["ModTime"],
63
+ remote=remote,
64
+ parent=parent_path,
65
+ name=name,
66
+ size=size,
67
+ mime_type=mime_type,
68
+ mod_time=mod_time,
29
69
  )
30
70
  except KeyError:
31
71
  warnings.warn(f"Invalid data: {data}")
32
72
  return None
33
73
 
34
74
  @staticmethod
35
- def from_json_str(data: str) -> "FileItem | None":
75
+ def from_json_str(remote: str, data: str) -> "FileItem | None":
36
76
  try:
37
77
  data_dict = json.loads(data)
38
- return FileItem.from_json(data_dict)
78
+ return FileItem.from_json(remote, data_dict)
39
79
  except json.JSONDecodeError:
40
80
  warnings.warn(f"Invalid JSON data: {data}")
41
81
  return None
42
82
 
83
+ # hasher for set membership
84
+ def __hash__(self) -> int:
85
+ return hash(self.path_no_remote)
86
+
43
87
 
44
88
  class File:
45
89
  """Remote file dataclass."""
@@ -0,0 +1,68 @@
1
+ import json
2
+ import warnings
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ _STRING_INTERNER: dict[str, str] = {}
7
+
8
+
9
+ def _intern(s: str) -> str:
10
+ return _STRING_INTERNER.setdefault(s, s)
11
+
12
+
13
+ # File is too complex, this is a simple dataclass that can be streamed out.
14
+ @dataclass
15
+ class FileItem:
16
+ """Remote file dataclass."""
17
+
18
+ remote: str
19
+ parent: str
20
+ name: str
21
+ size: int
22
+ mime_type: str
23
+ mod_time: str
24
+
25
+ @property
26
+ def path(self) -> str:
27
+ return f"{self.remote}/{self.parent}/{self.name}"
28
+
29
+ @property
30
+ def suffix(self) -> str:
31
+ return self._suffix
32
+
33
+ def __post_init__(self):
34
+ self.parent = _intern(self.parent)
35
+ self.mime_type = _intern(self.mime_type)
36
+ suffix = Path(self.name).suffix
37
+ self._suffix = _intern(suffix)
38
+
39
+ @staticmethod
40
+ def from_json(data: dict) -> "FileItem | None":
41
+ try:
42
+ path_str: str = data["Path"]
43
+ parent_path = Path(path_str).parent.as_posix()
44
+ name = data["Name"]
45
+ size = data["Size"]
46
+ mime_type = data["MimeType"]
47
+ mod_time = data["ModTime"]
48
+
49
+ return FileItem(
50
+ remote="DUMMY",
51
+ parent=parent_path,
52
+ name=name,
53
+ size=size,
54
+ mime_type=mime_type,
55
+ mod_time=mod_time,
56
+ )
57
+ except KeyError:
58
+ warnings.warn(f"Invalid data: {data}")
59
+ return None
60
+
61
+ @staticmethod
62
+ def from_json_str(data: str) -> "FileItem | None":
63
+ try:
64
+ data_dict = json.loads(data)
65
+ return FileItem.from_json(data_dict)
66
+ except json.JSONDecodeError:
67
+ warnings.warn(f"Invalid JSON data: {data}")
68
+ return None
@@ -76,6 +76,8 @@ class Process:
76
76
  return self
77
77
 
78
78
  def __exit__(self, exc_type, exc_val, exc_tb) -> None:
79
+ self.terminate()
80
+ self.wait()
79
81
  self.cleanup()
80
82
 
81
83
  def cleanup(self) -> None: