rclone-api 1.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. rclone_api/__init__.py +951 -0
  2. rclone_api/assets/example.txt +1 -0
  3. rclone_api/cli.py +15 -0
  4. rclone_api/cmd/analyze.py +51 -0
  5. rclone_api/cmd/copy_large_s3.py +111 -0
  6. rclone_api/cmd/copy_large_s3_finish.py +81 -0
  7. rclone_api/cmd/list_files.py +27 -0
  8. rclone_api/cmd/save_to_db.py +77 -0
  9. rclone_api/completed_process.py +60 -0
  10. rclone_api/config.py +87 -0
  11. rclone_api/convert.py +31 -0
  12. rclone_api/db/__init__.py +3 -0
  13. rclone_api/db/db.py +277 -0
  14. rclone_api/db/models.py +57 -0
  15. rclone_api/deprecated.py +24 -0
  16. rclone_api/detail/copy_file_parts_resumable.py +42 -0
  17. rclone_api/detail/walk.py +116 -0
  18. rclone_api/diff.py +164 -0
  19. rclone_api/dir.py +113 -0
  20. rclone_api/dir_listing.py +66 -0
  21. rclone_api/exec.py +40 -0
  22. rclone_api/experimental/flags.py +89 -0
  23. rclone_api/experimental/flags_base.py +58 -0
  24. rclone_api/file.py +205 -0
  25. rclone_api/file_item.py +68 -0
  26. rclone_api/file_part.py +198 -0
  27. rclone_api/file_stream.py +52 -0
  28. rclone_api/filelist.py +30 -0
  29. rclone_api/group_files.py +256 -0
  30. rclone_api/http_server.py +244 -0
  31. rclone_api/install.py +95 -0
  32. rclone_api/log.py +44 -0
  33. rclone_api/mount.py +55 -0
  34. rclone_api/mount_util.py +247 -0
  35. rclone_api/process.py +187 -0
  36. rclone_api/rclone_impl.py +1285 -0
  37. rclone_api/remote.py +21 -0
  38. rclone_api/rpath.py +102 -0
  39. rclone_api/s3/api.py +109 -0
  40. rclone_api/s3/basic_ops.py +61 -0
  41. rclone_api/s3/chunk_task.py +187 -0
  42. rclone_api/s3/create.py +107 -0
  43. rclone_api/s3/multipart/file_info.py +7 -0
  44. rclone_api/s3/multipart/finished_piece.py +69 -0
  45. rclone_api/s3/multipart/info_json.py +239 -0
  46. rclone_api/s3/multipart/merge_state.py +147 -0
  47. rclone_api/s3/multipart/upload_info.py +62 -0
  48. rclone_api/s3/multipart/upload_parts_inline.py +356 -0
  49. rclone_api/s3/multipart/upload_parts_resumable.py +304 -0
  50. rclone_api/s3/multipart/upload_parts_server_side_merge.py +546 -0
  51. rclone_api/s3/multipart/upload_state.py +165 -0
  52. rclone_api/s3/types.py +67 -0
  53. rclone_api/scan_missing_folders.py +153 -0
  54. rclone_api/types.py +402 -0
  55. rclone_api/util.py +324 -0
  56. rclone_api-1.5.8.dist-info/LICENSE +21 -0
  57. rclone_api-1.5.8.dist-info/METADATA +969 -0
  58. rclone_api-1.5.8.dist-info/RECORD +61 -0
  59. rclone_api-1.5.8.dist-info/WHEEL +5 -0
  60. rclone_api-1.5.8.dist-info/entry_points.txt +5 -0
  61. rclone_api-1.5.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,239 @@
1
+ import hashlib
2
+ import json
3
+ import os
4
+ import warnings
5
+ from datetime import datetime
6
+
7
+ from rclone_api.dir_listing import DirListing
8
+ from rclone_api.rclone_impl import RcloneImpl
9
+ from rclone_api.types import (
10
+ PartInfo,
11
+ SizeSuffix,
12
+ )
13
+
14
+
15
+ def _fetch_all_names(
16
+ self: RcloneImpl,
17
+ src: str,
18
+ ) -> list[str]:
19
+ dl: DirListing = self.ls(src)
20
+ files = dl.files
21
+ filenames: list[str] = [f.name for f in files]
22
+ filtered: list[str] = [f for f in filenames if f.startswith("part.")]
23
+ return filtered
24
+
25
+
26
+ def _get_info_json(self: RcloneImpl, src: str | None, src_info: str) -> dict:
27
+ from rclone_api.file import File
28
+
29
+ data: dict
30
+ text: str
31
+ if src is None:
32
+ # just try to load the file
33
+ text_or_err = self.read_text(src_info)
34
+ if isinstance(text_or_err, Exception):
35
+ raise FileNotFoundError(f"Could not load {src_info}: {text_or_err}")
36
+ assert isinstance(text_or_err, str)
37
+ text = text_or_err
38
+ data = json.loads(text)
39
+ return data
40
+
41
+ src_stat: File | Exception = self.stat(src)
42
+ if isinstance(src_stat, Exception):
43
+ # just try to load the file
44
+ raise FileNotFoundError(f"Failed to stat {src}: {src_stat}")
45
+
46
+ now: datetime = datetime.now()
47
+ new_data = {
48
+ "new": True,
49
+ "created": now.isoformat(),
50
+ "src": src,
51
+ "src_modtime": src_stat.mod_time(),
52
+ "size": src_stat.size,
53
+ "chunksize": None,
54
+ "chunksize_int": None,
55
+ "first_part": None,
56
+ "last_part": None,
57
+ "hash": None,
58
+ }
59
+
60
+ text_or_err = self.read_text(src_info)
61
+ err: Exception | None = text_or_err if isinstance(text_or_err, Exception) else None
62
+ if isinstance(text_or_err, Exception):
63
+ warnings.warn(f"Failed to read {src_info}: {text_or_err}")
64
+ return new_data
65
+ assert isinstance(text_or_err, str)
66
+ text = text_or_err
67
+
68
+ if err is not None:
69
+ return new_data
70
+
71
+ try:
72
+ data = json.loads(text)
73
+ return data
74
+ except Exception as e:
75
+ warnings.warn(f"Failed to parse JSON: {e} at {src_info}")
76
+ return new_data
77
+
78
+
79
+ def _save_info_json(self: RcloneImpl, src: str, data: dict) -> None:
80
+ data = data.copy()
81
+ data["new"] = False
82
+ # hash
83
+
84
+ h = hashlib.md5()
85
+ tmp = [
86
+ data.get("src"),
87
+ data.get("src_modtime"),
88
+ data.get("size"),
89
+ data.get("chunksize_int"),
90
+ ]
91
+ data_vals: list[str] = [str(v) for v in tmp]
92
+ str_data = "".join(data_vals)
93
+ h.update(str_data.encode("utf-8"))
94
+ data["hash"] = h.hexdigest()
95
+ json_str = json.dumps(data, indent=0)
96
+ self.write_text(dst=src, text=json_str)
97
+
98
+
99
+ class InfoJson:
100
+ def __init__(self, rclone: RcloneImpl, src: str | None, src_info: str) -> None:
101
+ self.rclone = rclone
102
+ self.src = src
103
+ self.src_info = src_info
104
+ self.data: dict = {}
105
+
106
+ def load(self) -> bool:
107
+ """Returns true if the file exist and is now loaded."""
108
+ self.data = _get_info_json(self.rclone, self.src, self.src_info)
109
+ return not self.data.get("new", False)
110
+
111
+ def save(self) -> None:
112
+ _save_info_json(self.rclone, self.src_info, self.data)
113
+
114
+ def print(self) -> None:
115
+ self.rclone.print(self.src_info)
116
+
117
+ def fetch_all_finished(self) -> list[str]:
118
+ parent_path = os.path.dirname(self.src_info)
119
+ out = _fetch_all_names(self.rclone, parent_path)
120
+ return out
121
+
122
+ def fetch_all_finished_part_numbers(self) -> list[int]:
123
+ names = self.fetch_all_finished()
124
+ part_numbers = [int(name.split("_")[0].split(".")[1]) for name in names]
125
+ return part_numbers
126
+
127
+ @property
128
+ def parts_dir(self) -> str:
129
+ parts_dir = os.path.dirname(self.src_info)
130
+ if parts_dir.endswith("/"):
131
+ parts_dir = parts_dir[:-1]
132
+ return parts_dir
133
+
134
+ @property
135
+ def dst(self) -> str:
136
+ parts_dir = self.parts_dir
137
+ assert parts_dir.endswith("-parts")
138
+ out = parts_dir[:-6]
139
+ return out
140
+
141
+ @property
142
+ def dst_name(self) -> str:
143
+ return os.path.basename(self.dst)
144
+
145
+ def compute_all_parts(self) -> list[PartInfo] | Exception:
146
+ # full_part_infos: list[PartInfo] | Exception = PartInfo.split_parts(
147
+ # src_size, SizeSuffix("96MB")
148
+ try:
149
+
150
+ src_size = self.size
151
+ chunk_size = self.chunksize
152
+ assert isinstance(src_size, SizeSuffix)
153
+ assert isinstance(chunk_size, SizeSuffix)
154
+ first_part = self.data["first_part"]
155
+ last_part = self.data["last_part"]
156
+ full_part_infos: list[PartInfo] = PartInfo.split_parts(src_size, chunk_size)
157
+ return full_part_infos[first_part : last_part + 1]
158
+ except Exception as e:
159
+ return e
160
+
161
+ def compute_all_part_numbers(self) -> list[int] | Exception:
162
+ all_parts: list[PartInfo] | Exception = self.compute_all_parts()
163
+ if isinstance(all_parts, Exception):
164
+ raise all_parts
165
+
166
+ all_part_nums: list[int] = [p.part_number for p in all_parts]
167
+ return all_part_nums
168
+
169
+ def fetch_remaining_part_numbers(self) -> list[int] | Exception:
170
+ all_part_nums: list[int] | Exception = self.compute_all_part_numbers()
171
+ if isinstance(all_part_nums, Exception):
172
+ return all_part_nums
173
+ finished_part_nums: list[int] = self.fetch_all_finished_part_numbers()
174
+ remaining_part_nums: list[int] = list(
175
+ set(all_part_nums) - set(finished_part_nums)
176
+ )
177
+ return sorted(remaining_part_nums)
178
+
179
+ def fetch_is_done(self) -> bool:
180
+ remaining_part_nums: list[int] | Exception = self.fetch_remaining_part_numbers()
181
+ if isinstance(remaining_part_nums, Exception):
182
+ return False
183
+ return len(remaining_part_nums) == 0
184
+
185
+ @property
186
+ def new(self) -> bool:
187
+ return self.data.get("new", False)
188
+
189
+ @property
190
+ def chunksize(self) -> SizeSuffix | None:
191
+ chunksize_int: int | None = self.data.get("chunksize_int")
192
+ if chunksize_int is None:
193
+ return None
194
+ return SizeSuffix(chunksize_int)
195
+
196
+ @chunksize.setter
197
+ def chunksize(self, value: SizeSuffix) -> None:
198
+ self.data["chunksize"] = str(value)
199
+ self.data["chunksize_int"] = value.as_int()
200
+
201
+ @property
202
+ def src_modtime(self) -> datetime:
203
+ return datetime.fromisoformat(self.data["src_modtime"])
204
+
205
+ @src_modtime.setter
206
+ def src_modtime(self, value: datetime) -> None:
207
+ self.data["src_modtime"] = value.isoformat()
208
+
209
+ @property
210
+ def size(self) -> SizeSuffix:
211
+ return SizeSuffix(self.data["size"])
212
+
213
+ def _get_first_part(self) -> int | None:
214
+ return self.data.get("first_part")
215
+
216
+ def _set_first_part(self, value: int) -> None:
217
+ self.data["first_part"] = value
218
+
219
+ def _get_last_part(self) -> int | None:
220
+ return self.data.get("last_part")
221
+
222
+ def _set_last_part(self, value: int) -> None:
223
+ self.data["last_part"] = value
224
+
225
+ first_part: int | None = property(_get_first_part, _set_first_part) # type: ignore
226
+ last_part: int | None = property(_get_last_part, _set_last_part) # type: ignore
227
+
228
+ @property
229
+ def hash(self) -> str | None:
230
+ return self.data.get("hash")
231
+
232
+ def to_json_str(self) -> str:
233
+ return json.dumps(self.data)
234
+
235
+ def __repr__(self):
236
+ return f"InfoJson({self.src}, {self.src_info}, {self.data})"
237
+
238
+ def __str__(self):
239
+ return self.to_json_str()
@@ -0,0 +1,147 @@
1
+ """
2
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_part_copy.html
3
+ * client.upload_part_copy
4
+
5
+ This module provides functionality for S3 multipart uploads, including copying parts
6
+ from existing S3 objects using upload_part_copy.
7
+ """
8
+
9
+ import json
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ from rclone_api.rclone_impl import RcloneImpl
14
+ from rclone_api.s3.multipart.finished_piece import FinishedPiece
15
+
16
+
17
+ @dataclass
18
+ class Part:
19
+ part_number: int
20
+ s3_key: str
21
+
22
+ def to_json(self) -> dict:
23
+ return {"part_number": self.part_number, "s3_key": self.s3_key}
24
+
25
+ @staticmethod
26
+ def from_json(json_dict: dict) -> "Part | Exception":
27
+ part_number = json_dict.get("part_number")
28
+ s3_key = json_dict.get("s3_key")
29
+ if part_number is None or s3_key is None:
30
+ return Exception(f"Invalid JSON: {json_dict}")
31
+ return Part(part_number=part_number, s3_key=s3_key)
32
+
33
+ @staticmethod
34
+ def from_json_array(json_array: list[dict]) -> list["Part"] | Exception:
35
+ try:
36
+ out: list[Part] = []
37
+ for j in json_array:
38
+ ok_or_err = Part.from_json(j)
39
+ if isinstance(ok_or_err, Exception):
40
+ return ok_or_err
41
+ else:
42
+ out.append(ok_or_err)
43
+ return out
44
+ except Exception as e:
45
+ return e
46
+
47
+
48
+ class MergeState:
49
+
50
+ def __init__(
51
+ self,
52
+ rclone_impl: RcloneImpl,
53
+ merge_path: str,
54
+ upload_id: str,
55
+ bucket: str,
56
+ dst_key: str,
57
+ finished: list[FinishedPiece],
58
+ all_parts: list[Part],
59
+ ) -> None:
60
+ self.rclone_impl: RcloneImpl = rclone_impl
61
+ self.merge_path: str = merge_path
62
+ self.merge_parts_path: str = f"{merge_path}/merge" # future use?
63
+ self.upload_id: str = upload_id
64
+ self.bucket: str = bucket
65
+ self.dst_key: str = dst_key
66
+ self.finished: list[FinishedPiece] = list(finished)
67
+ self.all_parts: list[Part] = list(all_parts)
68
+
69
+ def on_finished(self, finished_piece: FinishedPiece) -> None:
70
+ self.finished.append(finished_piece)
71
+
72
+ def remaining_parts(self) -> list[Part]:
73
+ finished_parts: set[int] = set([p.part_number for p in self.finished])
74
+ remaining = [p for p in self.all_parts if p.part_number not in finished_parts]
75
+ return remaining
76
+
77
+ @staticmethod
78
+ def from_json(rclone_impl: RcloneImpl, json: dict) -> "MergeState | Exception":
79
+ try:
80
+ merge_path = json["merge_path"]
81
+ bucket = json["bucket"]
82
+ dst_key = json["dst_key"]
83
+ finished: list[FinishedPiece] = FinishedPiece.from_json_array(
84
+ json["finished"]
85
+ )
86
+ all_parts: list[Part | Exception] = [Part.from_json(j) for j in json["all"]]
87
+ all_parts_no_err: list[Part] = [
88
+ p for p in all_parts if not isinstance(p, Exception)
89
+ ]
90
+ upload_id: str = json["upload_id"]
91
+ errs: list[Exception] = [p for p in all_parts if isinstance(p, Exception)]
92
+ if len(errs):
93
+ return Exception(f"Errors in parts: {errs}")
94
+ return MergeState(
95
+ rclone_impl=rclone_impl,
96
+ merge_path=merge_path,
97
+ upload_id=upload_id,
98
+ bucket=bucket,
99
+ dst_key=dst_key,
100
+ finished=finished,
101
+ all_parts=all_parts_no_err,
102
+ )
103
+ except Exception as e:
104
+ return e
105
+
106
+ def to_json(self) -> dict:
107
+ finished = self.finished.copy()
108
+ all_parts = self.all_parts.copy()
109
+ return {
110
+ "merge_path": self.merge_path,
111
+ "bucket": self.bucket,
112
+ "dst_key": self.dst_key,
113
+ "upload_id": self.upload_id,
114
+ "finished": FinishedPiece.to_json_array(finished),
115
+ "all": [part.to_json() for part in all_parts],
116
+ }
117
+
118
+ def to_json_str(self) -> str:
119
+ data = self.to_json()
120
+ out = json.dumps(data, indent=2)
121
+ return out
122
+
123
+ def __str__(self):
124
+ return self.to_json_str()
125
+
126
+ def __repr__(self):
127
+ return self.to_json_str()
128
+
129
+ def write(self, rclone_impl: Any, dst: str) -> None:
130
+ from rclone_api.rclone_impl import RcloneImpl
131
+
132
+ assert isinstance(rclone_impl, RcloneImpl)
133
+ json_str = self.to_json_str()
134
+ rclone_impl.write_text(dst, json_str)
135
+
136
+ def read(self, rclone_impl: Any, src: str) -> None:
137
+ from rclone_api.rclone_impl import RcloneImpl
138
+
139
+ assert isinstance(rclone_impl, RcloneImpl)
140
+ json_str = rclone_impl.read_text(src)
141
+ if isinstance(json_str, Exception):
142
+ raise json_str
143
+ json_dict = json.loads(json_str)
144
+ ok_or_err = FinishedPiece.from_json_array(json_dict["finished"])
145
+ if isinstance(ok_or_err, Exception):
146
+ raise ok_or_err
147
+ self.finished = ok_or_err
@@ -0,0 +1,62 @@
1
+ import hashlib
2
+ from dataclasses import dataclass, fields
3
+ from pathlib import Path
4
+
5
+ from botocore.client import BaseClient
6
+
7
+
8
+ @dataclass
9
+ class UploadInfo:
10
+ s3_client: BaseClient
11
+ bucket_name: str
12
+ object_name: str
13
+ src_file_path: Path
14
+ upload_id: str
15
+ retries: int
16
+ chunk_size: int
17
+ file_size: int
18
+ _total_chunks: int | None = None
19
+
20
+ def total_chunks(self) -> int:
21
+ out = self.file_size // self.chunk_size
22
+ if self.file_size % self.chunk_size:
23
+ return out + 1
24
+ return out
25
+
26
+ def __post_init__(self):
27
+ if self._total_chunks is not None:
28
+ return
29
+ self._total_chunks = self.total_chunks()
30
+
31
+ def fingerprint(self) -> str:
32
+ # hash the attributes that are used to identify the upload
33
+ hasher = hashlib.sha256()
34
+ # first is file size
35
+ hasher.update(str(self.file_size).encode("utf-8"))
36
+ # next is chunk size
37
+ hasher.update(str(self.chunk_size).encode("utf-8"))
38
+ # next is the number of parts
39
+ hasher.update(str(self._total_chunks).encode("utf-8"))
40
+ return hasher.hexdigest()
41
+
42
+ def to_json(self) -> dict:
43
+ json_dict = {}
44
+ for f in fields(self):
45
+ value = getattr(self, f.name)
46
+ # Convert non-serializable objects (like s3_client) to a string representation.
47
+ if f.name == "s3_client":
48
+ continue
49
+ else:
50
+ if isinstance(value, Path):
51
+ value = str(value)
52
+ json_dict[f.name] = value
53
+
54
+ return json_dict
55
+
56
+ @staticmethod
57
+ def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
58
+ # json_dict.pop("s3_client") # Remove the placeholder string
59
+ if "s3_client" in json_dict:
60
+ json_dict.pop("s3_client")
61
+
62
+ return UploadInfo(s3_client=s3_client, **json_dict)