rclone-api 1.1.4__py2.py3-none-any.whl → 1.1.5__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rclone_api/mount.py ADDED
@@ -0,0 +1,102 @@
1
+ import platform
2
+ import subprocess
3
+ import time
4
+ import warnings
5
+ from pathlib import Path
6
+
7
+ _SYSTEM = platform.system() # "Linux", "Darwin", "Windows", etc.
8
+
9
+
10
+ def run_command(cmd: str, verbose: bool) -> int:
11
+ """Run a shell command and print its output if verbose is True."""
12
+ if verbose:
13
+ print(f"Executing: {cmd}")
14
+ try:
15
+ result = subprocess.run(
16
+ cmd, shell=True, capture_output=True, text=True, check=False
17
+ )
18
+ if result.returncode != 0 and verbose:
19
+ print(f"Command failed: {cmd}\nStdErr: {result.stderr.strip()}")
20
+ return result.returncode
21
+ except Exception as e:
22
+ warnings.warn(f"Error running command '{cmd}': {e}")
23
+ return -1
24
+
25
+
26
+ def clean_mount(mount_path: Path, verbose: bool = False) -> None:
27
+ """
28
+ Clean up a mount path across Linux, macOS, and Windows.
29
+
30
+ The function attempts to unmount the mount at mount_path, then, if the
31
+ directory is empty, removes it. On Linux it uses 'fusermount -u' (for FUSE mounts)
32
+ and 'umount'. On macOS it uses 'umount' (and optionally 'diskutil unmount'),
33
+ while on Windows it attempts to remove the mount point via 'mountvol /D'.
34
+ """
35
+ # Check if the mount path exists; if an OSError occurs, assume it exists.
36
+ try:
37
+ mount_exists = mount_path.exists()
38
+ except OSError as e:
39
+ warnings.warn(f"Error checking {mount_path}: {e}")
40
+ mount_exists = True
41
+
42
+ # Give the system a moment (if unmount is in progress, etc.)
43
+ time.sleep(2)
44
+
45
+ if not mount_exists:
46
+ if verbose:
47
+ print(f"{mount_path} does not exist; nothing to clean up.")
48
+ return
49
+
50
+ if verbose:
51
+ print(f"{mount_path} still exists, attempting to unmount and remove.")
52
+
53
+ # Platform-specific unmount procedures
54
+ if _SYSTEM == "Linux":
55
+ # Try FUSE unmount first (if applicable), then the regular umount.
56
+ run_command(f"fusermount -u {mount_path}", verbose)
57
+ run_command(f"umount {mount_path}", verbose)
58
+ elif _SYSTEM == "Darwin":
59
+ # On macOS, use umount; optionally try diskutil for stubborn mounts.
60
+ run_command(f"umount {mount_path}", verbose)
61
+ # Optionally: uncomment the next line if diskutil unmount is preferred.
62
+ # run_command(f"diskutil unmount {mount_path}", verbose)
63
+ elif _SYSTEM == "Windows":
64
+ # On Windows, remove the mount point using mountvol.
65
+ run_command(f"mountvol {mount_path} /D", verbose)
66
+ # If that does not work, try to remove the directory directly.
67
+ try:
68
+ mount_path.rmdir()
69
+ if verbose:
70
+ print(f"Successfully removed mount directory {mount_path}")
71
+ except Exception as e:
72
+ warnings.warn(f"Failed to remove mount {mount_path}: {e}")
73
+ else:
74
+ warnings.warn(f"Unsupported platform: {_SYSTEM}")
75
+
76
+ # Allow some time for the unmount commands to take effect.
77
+ time.sleep(2)
78
+
79
+ # Re-check if the mount path still exists.
80
+ try:
81
+ still_exists = mount_path.exists()
82
+ except OSError as e:
83
+ warnings.warn(f"Error re-checking {mount_path}: {e}")
84
+ still_exists = True
85
+
86
+ if still_exists:
87
+ if verbose:
88
+ print(f"{mount_path} still exists after unmount attempt.")
89
+ # Attempt to remove the directory if it is empty.
90
+ try:
91
+ # Only remove if the directory is empty.
92
+ if not any(mount_path.iterdir()):
93
+ mount_path.rmdir()
94
+ if verbose:
95
+ print(f"Removed empty mount directory {mount_path}")
96
+ else:
97
+ warnings.warn(f"{mount_path} is not empty; cannot remove.")
98
+ except Exception as e:
99
+ warnings.warn(f"Failed during cleanup of {mount_path}: {e}")
100
+ else:
101
+ if verbose:
102
+ print(f"{mount_path} successfully cleaned up.")
rclone_api/rclone.py CHANGED
@@ -914,38 +914,10 @@ class Rclone:
914
914
  if proc.poll() is None:
915
915
  proc.terminate()
916
916
  proc.wait()
917
- if not error_happened and outdir.exists():
918
- time.sleep(2)
919
- if outdir.exists():
920
- print(f"{outdir} mount still exists, attempting to remove")
921
- if not _IS_WINDOWS:
922
-
923
- def exec(cmd: str) -> int:
924
- if verbose:
925
- print(f"Executing: {cmd}")
926
- rtn = os.system(cmd)
927
- if rtn != 0 and verbose:
928
- print(f"Failed to execute: {cmd}")
929
- return rtn
930
-
931
- exec(f"fusermount -u {outdir}")
932
- exec(f"umount {outdir}")
933
- time.sleep(2)
934
- if outdir.exists():
935
- is_empty = True
936
- try:
937
- is_empty = not list(outdir.iterdir())
938
- if not is_empty:
939
- warnings.warn(f"Failed to unmount {outdir}")
940
- else:
941
- try:
942
- outdir.rmdir()
943
- except Exception as e:
944
- warnings.warn(f"Failed to remove {outdir}: {e}")
945
- except Exception as e:
946
- warnings.warn(
947
- f"Failed during mount cleanup of {outdir}: because {e}"
948
- )
917
+ if not error_happened:
918
+ from rclone_api.mount import clean_mount
919
+
920
+ clean_mount(outdir, verbose=verbose)
949
921
 
950
922
  @deprecated("mount")
951
923
  def mount_webdav(
@@ -0,0 +1,102 @@
1
+ import time
2
+ import warnings
3
+ from pathlib import Path
4
+ from queue import Queue
5
+
6
+ from rclone_api.s3.chunk_types import FileChunk, UploadState
7
+ from rclone_api.util import locked_print
8
+
9
+
10
+ def _get_file_size(file_path: Path, timeout: int = 60) -> int:
11
+ sleep_time = timeout / 60 if timeout > 0 else 1
12
+ start = time.time()
13
+ while True:
14
+ expired = time.time() - start > timeout
15
+ try:
16
+ time.sleep(sleep_time)
17
+ if file_path.exists():
18
+ return file_path.stat().st_size
19
+ except FileNotFoundError as e:
20
+ if expired:
21
+ print(f"File not found: {file_path}, exception is {e}")
22
+ raise
23
+ if expired:
24
+ raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
25
+
26
+
27
+ def file_chunker(
28
+ upload_state: UploadState, max_chunks: int | None, output: Queue[FileChunk | None]
29
+ ) -> None:
30
+ count = 0
31
+
32
+ def should_stop() -> bool:
33
+ nonlocal count
34
+ if max_chunks is None:
35
+ return False
36
+ if count >= max_chunks:
37
+ return True
38
+ count += 1
39
+ if count > 10 and count % 10 == 0:
40
+ # recheck that the file size has not changed
41
+ file_size = _get_file_size(upload_state.upload_info.src_file_path)
42
+ if file_size != upload_state.upload_info.file_size:
43
+ locked_print(
44
+ f"File size changed, cannot resume, expected {upload_state.upload_info.file_size}, got {file_size}"
45
+ )
46
+ raise ValueError("File size changed, cannot resume")
47
+ return False
48
+
49
+ upload_info = upload_state.upload_info
50
+ file_path = upload_info.src_file_path
51
+ chunk_size = upload_info.chunk_size
52
+ src = Path(file_path)
53
+ # Mounted files may take a while to appear, so keep retrying.
54
+
55
+ try:
56
+ file_size = _get_file_size(src, timeout=60)
57
+ part_number = 1
58
+ done_part_numbers: set[int] = {
59
+ p.part_number for p in upload_state.parts if p is not None
60
+ }
61
+ num_parts = upload_info.total_chunks()
62
+
63
+ def next_part_number() -> int | None:
64
+ nonlocal part_number
65
+ while part_number in done_part_numbers:
66
+ part_number += 1
67
+ if part_number > num_parts:
68
+ return None
69
+ return part_number
70
+
71
+ while not should_stop():
72
+ curr_parth_num = next_part_number()
73
+ if curr_parth_num is None:
74
+ locked_print(f"File {file_path} has completed chunking all parts")
75
+ break
76
+ assert curr_parth_num is not None
77
+ offset = (curr_parth_num - 1) * chunk_size
78
+
79
+ assert offset < file_size, f"Offset {offset} is greater than file size"
80
+
81
+ # Open the file, seek, read the chunk, and close immediately.
82
+ with open(file_path, "rb") as f:
83
+ f.seek(offset)
84
+ data = f.read(chunk_size)
85
+
86
+ if not data:
87
+ warnings.warn(f"Empty data for part {part_number} of {file_path}")
88
+
89
+ file_chunk = FileChunk(
90
+ src,
91
+ upload_id=upload_info.upload_id,
92
+ part_number=part_number,
93
+ data=data, # After this, data should not be reused.
94
+ )
95
+ done_part_numbers.add(part_number)
96
+ output.put(file_chunk)
97
+ part_number += 1
98
+ except Exception as e:
99
+
100
+ warnings.warn(f"Error reading file: {e}")
101
+ finally:
102
+ output.put(None)
@@ -0,0 +1,254 @@
1
+ import json
2
+ import os
3
+ import time
4
+ from dataclasses import dataclass, field, fields
5
+ from pathlib import Path
6
+ from threading import Lock
7
+
8
+ from botocore.client import BaseClient
9
+
10
+ from rclone_api.util import locked_print
11
+
12
+ _MIN_UPLOAD_CHUNK_SIZE = 5 * 1024 * 1024 # 5MB
13
+ _SAVE_STATE_LOCK = Lock()
14
+
15
+ _TMP_DIR_ACCESS_LOCK = Lock()
16
+
17
+
18
+ def _clean_old_files(out: Path) -> None:
19
+ # clean up files older than 1 day
20
+
21
+ now = time.time()
22
+ # Erase all stale files and then purge empty directories.
23
+ for root, dirs, files in os.walk(out):
24
+ for name in files:
25
+ f = Path(root) / name
26
+ filemod = f.stat().st_mtime
27
+ diff_secs = now - filemod
28
+ diff_days = diff_secs / (60 * 60 * 24)
29
+ if diff_days > 1:
30
+ locked_print(f"Removing old file: {f}")
31
+ f.unlink()
32
+
33
+ for root, dirs, _ in os.walk(out):
34
+ for dir in dirs:
35
+ d = Path(root) / dir
36
+ if not list(d.iterdir()):
37
+ locked_print(f"Removing empty directory: {d}")
38
+ d.rmdir()
39
+
40
+
41
+ def _get_chunk_tmpdir() -> Path:
42
+ with _TMP_DIR_ACCESS_LOCK:
43
+ dat = _get_chunk_tmpdir.__dict__
44
+ if "out" in dat:
45
+ return dat["out"] # Folder already validated.
46
+ out = Path("chunk_store")
47
+ if out.exists():
48
+ # first access, clean up directory
49
+ _clean_old_files(out)
50
+ out.mkdir(exist_ok=True, parents=True)
51
+ dat["out"] = out
52
+ return out
53
+
54
+
55
+ class FileChunk:
56
+ def __init__(self, src: Path, upload_id: str, part_number: int, data: bytes):
57
+ assert data is not None, f"{src}: Data must not be None"
58
+ self.upload_id = upload_id
59
+ self.src = src
60
+ self.part_number = part_number
61
+ name = src.name
62
+ self.tmpdir = _get_chunk_tmpdir()
63
+ self.filepart = self.tmpdir / f"{name}_{upload_id}.part_{part_number}.tmp"
64
+ self.filepart.write_bytes(data)
65
+ del data # free up memory
66
+
67
+ @property
68
+ def data(self) -> bytes:
69
+ assert self.filepart is not None
70
+ with open(self.filepart, "rb") as f:
71
+ return f.read()
72
+ return b""
73
+
74
+ def close(self):
75
+ if self.filepart.exists():
76
+ self.filepart.unlink()
77
+
78
+ def __del__(self):
79
+ self.close()
80
+
81
+
82
+ @dataclass
83
+ class UploadInfo:
84
+ s3_client: BaseClient
85
+ bucket_name: str
86
+ object_name: str
87
+ src_file_path: Path
88
+ upload_id: str
89
+ retries: int
90
+ chunk_size: int
91
+ file_size: int
92
+ _total_chunks: int | None = None
93
+
94
+ def total_chunks(self) -> int:
95
+ out = self.file_size // self.chunk_size
96
+ if self.file_size % self.chunk_size:
97
+ return out + 1
98
+ return out
99
+
100
+ def __post_init__(self):
101
+ if self._total_chunks is not None:
102
+ return
103
+ self._total_chunks = self.total_chunks()
104
+
105
+ def to_json(self) -> dict:
106
+ json_dict = {}
107
+ for f in fields(self):
108
+ value = getattr(self, f.name)
109
+ # Convert non-serializable objects (like s3_client) to a string representation.
110
+ if f.name == "s3_client":
111
+ json_dict[f.name] = "RUNTIME OBJECT"
112
+ else:
113
+ if isinstance(value, Path):
114
+ value = str(value)
115
+ json_dict[f.name] = value
116
+ return json_dict
117
+
118
+ @staticmethod
119
+ def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
120
+ json_dict.pop("s3_client") # Remove the placeholder string
121
+ return UploadInfo(s3_client=s3_client, **json_dict)
122
+
123
+
124
+ @dataclass
125
+ class FinishedPiece:
126
+ part_number: int
127
+ etag: str
128
+
129
+ def to_json(self) -> dict:
130
+ return {"part_number": self.part_number, "etag": self.etag}
131
+
132
+ def to_json_str(self) -> str:
133
+ return json.dumps(self.to_json(), indent=0)
134
+
135
+ @staticmethod
136
+ def to_json_array(parts: list["FinishedPiece | None"]) -> list[dict | None]:
137
+ non_none: list[FinishedPiece] = [p for p in parts if p is not None]
138
+ non_none.sort(key=lambda x: x.part_number)
139
+ all_nones: list[None] = [None for p in parts if p is None]
140
+ assert len(all_nones) <= 1, "Only one None should be present"
141
+ return [p.to_json() for p in non_none]
142
+
143
+ @staticmethod
144
+ def from_json(json: dict | None) -> "FinishedPiece | None":
145
+ if json is None:
146
+ return None
147
+ return FinishedPiece(**json)
148
+
149
+
150
+ @dataclass
151
+ class UploadState:
152
+ upload_info: UploadInfo
153
+ # finished_parts: Queue[FinishedPiece | None]
154
+ peristant: Path | None
155
+ lock: Lock = Lock()
156
+ parts: list[FinishedPiece | None] = field(default_factory=list)
157
+
158
+ def update_source_file(self, src_file: Path) -> None:
159
+ new_file_size = os.path.getsize(src_file)
160
+ if new_file_size != self.upload_info.file_size:
161
+ raise ValueError("File size changed, cannot resume")
162
+ self.upload_info.src_file_path = src_file
163
+ self.save()
164
+
165
+ def is_done(self) -> bool:
166
+ return self.remaining() == 0
167
+
168
+ def count(self) -> tuple[int, int]: # count, num_chunks
169
+ num_chunks = self.upload_info.total_chunks()
170
+ count = 0
171
+ for p in self.parts:
172
+ if p is not None:
173
+ count += 1
174
+ return count, num_chunks
175
+
176
+ def finished(self) -> int:
177
+ count, _ = self.count()
178
+ return count
179
+
180
+ def remaining(self) -> int:
181
+ count, num_chunks = self.count()
182
+ assert (
183
+ count <= num_chunks
184
+ ), f"Count {count} is greater than num_chunks {num_chunks}"
185
+ return num_chunks - count
186
+
187
+ def add_finished(self, part: FinishedPiece | None) -> None:
188
+ with self.lock:
189
+ self.parts.append(part)
190
+ self._save_no_lock()
191
+
192
+ def __post_init__(self):
193
+ if self.peristant is None:
194
+ # upload_id = self.upload_info.upload_id
195
+ object_name = self.upload_info.object_name
196
+ chunk_size = self.upload_info.chunk_size
197
+ parent = _get_chunk_tmpdir()
198
+ self.peristant = parent / f"{object_name}_chunk_size_{chunk_size}_.json"
199
+
200
+ def save(self) -> None:
201
+ with _SAVE_STATE_LOCK:
202
+ self._save_no_lock()
203
+
204
+ def _save_no_lock(self) -> None:
205
+ assert self.peristant is not None, "No path to save to"
206
+ self.peristant.write_text(self.to_json_str(), encoding="utf-8")
207
+
208
+ @staticmethod
209
+ def load(s3_client: BaseClient, path: Path) -> "UploadState":
210
+ with _SAVE_STATE_LOCK:
211
+ return UploadState.from_json(s3_client, path)
212
+
213
+ def to_json(self) -> dict:
214
+ # queue -> list
215
+ # parts: list[dict] = [f.to_json() for f in self.parts]
216
+ parts: list[FinishedPiece | None] = list(self.parts)
217
+
218
+ parts_json = FinishedPiece.to_json_array(parts)
219
+ is_done = self.is_done()
220
+ count_non_none: int = 0
221
+ for p in parts:
222
+ if p is not None:
223
+ count_non_none += 1
224
+
225
+ # self.count()
226
+ finished_count, total = self.count()
227
+
228
+ # parts.sort(key=lambda x: x.part_number) # Some backends need this.
229
+ out_json = {
230
+ "upload_info": self.upload_info.to_json(),
231
+ "finished_parts": parts_json,
232
+ "is_done": is_done,
233
+ "finished_count": finished_count,
234
+ "total_parts": total,
235
+ }
236
+
237
+ # check that we can sererialize
238
+ # json.dumps(out_json)
239
+ return out_json
240
+
241
+ def to_json_str(self) -> str:
242
+ return json.dumps(self.to_json(), indent=4)
243
+
244
+ @staticmethod
245
+ def from_json(s3_client: BaseClient, json_file: Path) -> "UploadState":
246
+ json_str = json_file.read_text(encoding="utf-8")
247
+ data = json.loads(json_str)
248
+ upload_info_json = data["upload_info"]
249
+ finished_parts_json = data["finished_parts"]
250
+ upload_info = UploadInfo.from_json(s3_client, upload_info_json)
251
+ finished_parts = [FinishedPiece.from_json(p) for p in finished_parts_json]
252
+ return UploadState(
253
+ peristant=json_file, upload_info=upload_info, parts=finished_parts
254
+ )
@@ -1,359 +1,19 @@
1
1
  import _thread
2
- import json
3
2
  import os
4
- import time
5
3
  import warnings
6
4
  from concurrent.futures import ThreadPoolExecutor
7
- from dataclasses import dataclass, field, fields
8
5
  from pathlib import Path
9
6
  from queue import Queue
10
- from threading import Lock, Thread
7
+ from threading import Thread
11
8
 
12
9
  from botocore.client import BaseClient
13
10
 
11
+ from rclone_api.s3.chunk_file import file_chunker
12
+ from rclone_api.s3.chunk_types import FileChunk, FinishedPiece, UploadInfo, UploadState
14
13
  from rclone_api.s3.types import MultiUploadResult
14
+ from rclone_api.util import locked_print
15
15
 
16
16
  _MIN_UPLOAD_CHUNK_SIZE = 5 * 1024 * 1024 # 5MB
17
- _SAVE_STATE_LOCK = Lock()
18
-
19
- _PRINT_LOCK = Lock()
20
-
21
-
22
- def locked_print(*args, **kwargs):
23
- with _PRINT_LOCK:
24
- print(*args, **kwargs)
25
-
26
-
27
- class FileChunk:
28
- def __init__(self, src: Path, upload_id: str, part_number: int, data: bytes):
29
- assert data is not None, f"{src}: Data must not be None"
30
- self.upload_id = upload_id
31
- self.src = src
32
- self.part_number = part_number
33
- name = src.name
34
- self.tmpdir = _get_chunk_tmpdir()
35
- self.filepart = self.tmpdir / f"{name}_{upload_id}.part_{part_number}.tmp"
36
- self.filepart.write_bytes(data)
37
- del data # free up memory
38
-
39
- @property
40
- def data(self) -> bytes:
41
- assert self.filepart is not None
42
- with open(self.filepart, "rb") as f:
43
- return f.read()
44
- return b""
45
-
46
- def close(self):
47
- if self.filepart.exists():
48
- self.filepart.unlink()
49
-
50
- def __del__(self):
51
- self.close()
52
-
53
-
54
- @dataclass
55
- class UploadInfo:
56
- s3_client: BaseClient
57
- bucket_name: str
58
- object_name: str
59
- src_file_path: Path
60
- upload_id: str
61
- retries: int
62
- chunk_size: int
63
- file_size: int
64
- _total_chunks: int | None = None
65
-
66
- def total_chunks(self) -> int:
67
- out = self.file_size // self.chunk_size
68
- if self.file_size % self.chunk_size:
69
- return out + 1
70
- return out
71
-
72
- def __post_init__(self):
73
- if self._total_chunks is not None:
74
- return
75
- self._total_chunks = self.total_chunks()
76
-
77
- def to_json(self) -> dict:
78
- json_dict = {}
79
- for f in fields(self):
80
- value = getattr(self, f.name)
81
- # Convert non-serializable objects (like s3_client) to a string representation.
82
- if f.name == "s3_client":
83
- json_dict[f.name] = "RUNTIME OBJECT"
84
- else:
85
- if isinstance(value, Path):
86
- value = str(value)
87
- json_dict[f.name] = value
88
- return json_dict
89
-
90
- @staticmethod
91
- def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
92
- json_dict.pop("s3_client") # Remove the placeholder string
93
- return UploadInfo(s3_client=s3_client, **json_dict)
94
-
95
-
96
- @dataclass
97
- class FinishedPiece:
98
- part_number: int
99
- etag: str
100
-
101
- def to_json(self) -> dict:
102
- return {"part_number": self.part_number, "etag": self.etag}
103
-
104
- def to_json_str(self) -> str:
105
- return json.dumps(self.to_json(), indent=0)
106
-
107
- @staticmethod
108
- def to_json_array(parts: list["FinishedPiece | None"]) -> list[dict | None]:
109
- non_none: list[FinishedPiece] = [p for p in parts if p is not None]
110
- non_none.sort(key=lambda x: x.part_number)
111
- all_nones: list[None] = [None for p in parts if p is None]
112
- assert len(all_nones) <= 1, "Only one None should be present"
113
- return [p.to_json() for p in non_none]
114
-
115
- @staticmethod
116
- def from_json(json: dict | None) -> "FinishedPiece | None":
117
- if json is None:
118
- return None
119
- return FinishedPiece(**json)
120
-
121
-
122
- @dataclass
123
- class UploadState:
124
- upload_info: UploadInfo
125
- # finished_parts: Queue[FinishedPiece | None]
126
- peristant: Path | None
127
- lock: Lock = Lock()
128
- parts: list[FinishedPiece | None] = field(default_factory=list)
129
-
130
- def update_source_file(self, src_file: Path) -> None:
131
- new_file_size = os.path.getsize(src_file)
132
- if new_file_size != self.upload_info.file_size:
133
- raise ValueError("File size changed, cannot resume")
134
- self.upload_info.src_file_path = src_file
135
- self.save()
136
-
137
- def is_done(self) -> bool:
138
- return self.remaining() == 0
139
-
140
- def count(self) -> tuple[int, int]: # count, num_chunks
141
- num_chunks = self.upload_info.total_chunks()
142
- count = 0
143
- for p in self.parts:
144
- if p is not None:
145
- count += 1
146
- return count, num_chunks
147
-
148
- def finished(self) -> int:
149
- count, _ = self.count()
150
- return count
151
-
152
- def remaining(self) -> int:
153
- count, num_chunks = self.count()
154
- assert (
155
- count <= num_chunks
156
- ), f"Count {count} is greater than num_chunks {num_chunks}"
157
- return num_chunks - count
158
-
159
- def add_finished(self, part: FinishedPiece | None) -> None:
160
- with self.lock:
161
- self.parts.append(part)
162
- self._save_no_lock()
163
-
164
- def __post_init__(self):
165
- if self.peristant is None:
166
- # upload_id = self.upload_info.upload_id
167
- object_name = self.upload_info.object_name
168
- chunk_size = self.upload_info.chunk_size
169
- parent = _get_chunk_tmpdir()
170
- self.peristant = parent / f"{object_name}_chunk_size_{chunk_size}_.json"
171
-
172
- def save(self) -> None:
173
- with _SAVE_STATE_LOCK:
174
- self._save_no_lock()
175
-
176
- def _save_no_lock(self) -> None:
177
- assert self.peristant is not None, "No path to save to"
178
- self.peristant.write_text(self.to_json_str(), encoding="utf-8")
179
-
180
- @staticmethod
181
- def load(s3_client: BaseClient, path: Path) -> "UploadState":
182
- with _SAVE_STATE_LOCK:
183
- return UploadState.from_json(s3_client, path)
184
-
185
- def to_json(self) -> dict:
186
- # queue -> list
187
- # parts: list[dict] = [f.to_json() for f in self.parts]
188
- parts: list[FinishedPiece | None] = list(self.parts)
189
-
190
- parts_json = FinishedPiece.to_json_array(parts)
191
- is_done = self.is_done()
192
- count_non_none: int = 0
193
- for p in parts:
194
- if p is not None:
195
- count_non_none += 1
196
-
197
- # self.count()
198
- finished_count, total = self.count()
199
-
200
- # parts.sort(key=lambda x: x.part_number) # Some backends need this.
201
- out_json = {
202
- "upload_info": self.upload_info.to_json(),
203
- "finished_parts": parts_json,
204
- "is_done": is_done,
205
- "finished_count": finished_count,
206
- "total_parts": total,
207
- }
208
-
209
- # check that we can sererialize
210
- # json.dumps(out_json)
211
- return out_json
212
-
213
- def to_json_str(self) -> str:
214
- return json.dumps(self.to_json(), indent=4)
215
-
216
- @staticmethod
217
- def from_json(s3_client: BaseClient, json_file: Path) -> "UploadState":
218
- json_str = json_file.read_text(encoding="utf-8")
219
- data = json.loads(json_str)
220
- upload_info_json = data["upload_info"]
221
- finished_parts_json = data["finished_parts"]
222
- upload_info = UploadInfo.from_json(s3_client, upload_info_json)
223
- finished_parts = [FinishedPiece.from_json(p) for p in finished_parts_json]
224
- return UploadState(
225
- peristant=json_file, upload_info=upload_info, parts=finished_parts
226
- )
227
-
228
-
229
- # lock
230
-
231
- _TMP_DIR_ACCESS_LOCK = Lock()
232
-
233
-
234
- def clean_old_files(out: Path) -> None:
235
- # clean up files older than 1 day
236
-
237
- now = time.time()
238
- # Erase all stale files and then purge empty directories.
239
- for root, dirs, files in os.walk(out):
240
- for name in files:
241
- f = Path(root) / name
242
- filemod = f.stat().st_mtime
243
- diff_secs = now - filemod
244
- diff_days = diff_secs / (60 * 60 * 24)
245
- if diff_days > 1:
246
- locked_print(f"Removing old file: {f}")
247
- f.unlink()
248
-
249
- for root, dirs, _ in os.walk(out):
250
- for dir in dirs:
251
- d = Path(root) / dir
252
- if not list(d.iterdir()):
253
- locked_print(f"Removing empty directory: {d}")
254
- d.rmdir()
255
-
256
-
257
- def _get_chunk_tmpdir() -> Path:
258
- with _TMP_DIR_ACCESS_LOCK:
259
- dat = _get_chunk_tmpdir.__dict__
260
- if "out" in dat:
261
- return dat["out"] # Folder already validated.
262
- out = Path("chunk_store")
263
- if out.exists():
264
- # first access, clean up directory
265
- clean_old_files(out)
266
- out.mkdir(exist_ok=True, parents=True)
267
- dat["out"] = out
268
- return out
269
-
270
-
271
- def _get_file_size(file_path: Path, timeout: int = 60) -> int:
272
- sleep_time = timeout / 60 if timeout > 0 else 1
273
- start = time.time()
274
- while True:
275
- expired = time.time() - start > timeout
276
- try:
277
- time.sleep(sleep_time)
278
- if file_path.exists():
279
- return file_path.stat().st_size
280
- except FileNotFoundError as e:
281
- if expired:
282
- print(f"File not found: {file_path}, exception is {e}")
283
- raise
284
- if expired:
285
- raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
286
-
287
-
288
- def file_chunker(
289
- upload_state: UploadState, max_chunks: int | None, output: Queue[FileChunk | None]
290
- ) -> None:
291
-
292
- count = 0
293
-
294
- def should_stop() -> bool:
295
- nonlocal count
296
- if max_chunks is None:
297
- return False
298
- if count >= max_chunks:
299
- return True
300
- count += 1
301
- return False
302
-
303
- upload_info = upload_state.upload_info
304
- file_path = upload_info.src_file_path
305
- chunk_size = upload_info.chunk_size
306
- src = Path(file_path)
307
- # Mounted files may take a while to appear, so keep retrying.
308
-
309
- try:
310
- file_size = _get_file_size(src, timeout=60)
311
- part_number = 1
312
- done_part_numbers: set[int] = {
313
- p.part_number for p in upload_state.parts if p is not None
314
- }
315
- num_parts = upload_info.total_chunks()
316
-
317
- def next_part_number() -> int | None:
318
- nonlocal part_number
319
- while part_number in done_part_numbers:
320
- part_number += 1
321
- if part_number > num_parts:
322
- return None
323
- return part_number
324
-
325
- while not should_stop():
326
- curr_parth_num = next_part_number()
327
- if curr_parth_num is None:
328
- locked_print(f"File {file_path} has completed chunking all parts")
329
- break
330
- assert curr_parth_num is not None
331
- offset = (curr_parth_num - 1) * chunk_size
332
-
333
- assert offset < file_size, f"Offset {offset} is greater than file size"
334
-
335
- # Open the file, seek, read the chunk, and close immediately.
336
- with open(file_path, "rb") as f:
337
- f.seek(offset)
338
- data = f.read(chunk_size)
339
-
340
- if not data:
341
- warnings.warn(f"Empty data for part {part_number} of {file_path}")
342
-
343
- file_chunk = FileChunk(
344
- src,
345
- upload_id=upload_info.upload_id,
346
- part_number=part_number,
347
- data=data, # After this, data should not be reused.
348
- )
349
- done_part_numbers.add(part_number)
350
- output.put(file_chunk)
351
- part_number += 1
352
- except Exception as e:
353
-
354
- warnings.warn(f"Error reading file: {e}")
355
- finally:
356
- output.put(None)
357
17
 
358
18
 
359
19
  def upload_task(
rclone_api/util.py CHANGED
@@ -5,6 +5,7 @@ import time
5
5
  import warnings
6
6
  from pathlib import Path
7
7
  from tempfile import TemporaryDirectory
8
+ from threading import Lock
8
9
  from typing import Any
9
10
 
10
11
  from rclone_api.config import Config
@@ -15,6 +16,13 @@ from rclone_api.types import S3PathInfo
15
16
 
16
17
  # from .rclone import Rclone
17
18
 
19
+ _PRINT_LOCK = Lock()
20
+
21
+
22
+ def locked_print(*args, **kwargs):
23
+ with _PRINT_LOCK:
24
+ print(*args, **kwargs)
25
+
18
26
 
19
27
  def to_path(item: Dir | Remote | str, rclone: Any) -> RPath:
20
28
  from rclone_api.rclone import Rclone
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rclone_api
3
- Version: 1.1.4
3
+ Version: 1.1.5
4
4
  Summary: rclone api in python
5
5
  Home-page: https://github.com/zackees/rclone-api
6
6
  License: BSD 3-Clause License
@@ -11,13 +11,14 @@ rclone_api/exec.py,sha256=1ovvaMXDEfLiT7BrYZyE85u_yFhEUwUNW3jPOzqknR8,1023
11
11
  rclone_api/file.py,sha256=EP5yT2dZ0H2p7CY5n0y5k5pHhIliV25pm8KOwBklUTk,1863
12
12
  rclone_api/filelist.py,sha256=xbiusvNgaB_b_kQOZoHMJJxn6TWGtPrWd2J042BI28o,767
13
13
  rclone_api/group_files.py,sha256=H92xPW9lQnbNw5KbtZCl00bD6iRh9yRbCuxku4j_3dg,8036
14
+ rclone_api/mount.py,sha256=OJGiZLhtvBJ8CTHSYUoKnYblCff3cD8_q4JTgTIVJHQ,3839
14
15
  rclone_api/process.py,sha256=RrMfTe0bndmJ6gBK67ioqNvCstJ8aTC8RlGX1XBLlcw,4191
15
- rclone_api/rclone.py,sha256=_CejQQ8jF-VYc_rO2u5i9qr8jmyksxyuoO4ZdHLW6tg,43540
16
+ rclone_api/rclone.py,sha256=TQuZiUGjVLr8cYVEZQ1m9fyujbASvNs5XFB9EBJOpuc,42115
16
17
  rclone_api/remote.py,sha256=O9WDUFQy9f6oT1HdUbTixK2eg0xtBBm8k4Xl6aa6K00,431
17
18
  rclone_api/rpath.py,sha256=8ZA_1wxWtskwcy0I8V2VbjKDmzPkiWd8Q2JQSvh-sYE,2586
18
19
  rclone_api/scan_missing_folders.py,sha256=Kulca2Q6WZodt00ATFHkmqqInuoPvBkhTcS9703y6po,4740
19
20
  rclone_api/types.py,sha256=NC3e78aXCx-sEQ-FqEaC9KzaJDdJhJrKa4Nwum_-Db0,563
20
- rclone_api/util.py,sha256=efck9W0rw5wfeRI35iiEz4dy2cMkNpVXrQ9zzynkBks,5185
21
+ rclone_api/util.py,sha256=ujinqW4xUkZAHBCL1VMhGu88LMdUFIu1ApF8rZEH8rQ,5324
21
22
  rclone_api/walk.py,sha256=-54NVE8EJcCstwDoaC_UtHm73R2HrZwVwQmsnv55xNU,3369
22
23
  rclone_api/assets/example.txt,sha256=lTBovRjiz0_TgtAtbA1C5hNi2ffbqnNPqkKg6UiKCT8,54
23
24
  rclone_api/cmd/copy_large_s3.py,sha256=33KFvCrh5uk-rdRtkREdEs2WNwxGgTdCAWDLCE4dm0A,2855
@@ -26,12 +27,14 @@ rclone_api/experimental/flags.py,sha256=0-mtXg9J4MoMm2uBKbsMLj4pSGRLQUAqNRDJWGtt
26
27
  rclone_api/experimental/flags_base.py,sha256=ajU_czkTcAxXYU-SlmiCfHY7aCQGHvpCLqJ-Z8uZLk0,2102
27
28
  rclone_api/s3/api.py,sha256=VstlaEnBjO2JDQuCRLdTfUGvQLbfshlXXhAzimFv4Vc,3763
28
29
  rclone_api/s3/basic_ops.py,sha256=hK3366xhVEzEcjz9Gk_8lFx6MRceAk72cax6mUrr6ko,2104
29
- rclone_api/s3/chunk_uploader.py,sha256=k7491vxvCX2KDz-nMQ45PDeHpaKPKmDtxl-URynLdeo,19736
30
+ rclone_api/s3/chunk_file.py,sha256=XPoDl7DJMJIGBMRoPO2wqwqCMT7ZrIsEkDqlbMH8jzs,3506
31
+ rclone_api/s3/chunk_types.py,sha256=Fq0IlhZ0IftuFQFkbICmmrOonII0BNzuY4CIKNC4wB0,8006
32
+ rclone_api/s3/chunk_uploader.py,sha256=KO8834Gery9HKWSqjQTNW0pbBbVoGrza9gj-1OaNLQQ,9130
30
33
  rclone_api/s3/create.py,sha256=SK3IGHZwsSkoG4Zb4NCphcVg9_f7VifDKng-tExMS2s,3088
31
34
  rclone_api/s3/types.py,sha256=81_3jwg6MGIxC-GxL-6zANzKO6au9C0BWvAqRyODxOM,1361
32
- rclone_api-1.1.4.dist-info/LICENSE,sha256=b6pOoifSXiUaz_lDS84vWlG3fr4yUKwB8fzkrH9R8bQ,1064
33
- rclone_api-1.1.4.dist-info/METADATA,sha256=DTOOicQKR2vGSu2zZCKLOwj8uR1u7jh8sst9VOevGQ0,4478
34
- rclone_api-1.1.4.dist-info/WHEEL,sha256=rF4EZyR2XVS6irmOHQIJx2SUqXLZKRMUrjsg8UwN-XQ,109
35
- rclone_api-1.1.4.dist-info/entry_points.txt,sha256=6eNqTRXKhVf8CpWNjXiOa_0Du9tHiW_HD2iQSXRsUg8,132
36
- rclone_api-1.1.4.dist-info/top_level.txt,sha256=EvZ7uuruUpe9RiUyEp25d1Keq7PWYNT0O_-mr8FCG5g,11
37
- rclone_api-1.1.4.dist-info/RECORD,,
35
+ rclone_api-1.1.5.dist-info/LICENSE,sha256=b6pOoifSXiUaz_lDS84vWlG3fr4yUKwB8fzkrH9R8bQ,1064
36
+ rclone_api-1.1.5.dist-info/METADATA,sha256=5gWOijGTvfWbWh8q_PkuzrRXqqA-hFK-hTpq-rYoDNE,4478
37
+ rclone_api-1.1.5.dist-info/WHEEL,sha256=rF4EZyR2XVS6irmOHQIJx2SUqXLZKRMUrjsg8UwN-XQ,109
38
+ rclone_api-1.1.5.dist-info/entry_points.txt,sha256=6eNqTRXKhVf8CpWNjXiOa_0Du9tHiW_HD2iQSXRsUg8,132
39
+ rclone_api-1.1.5.dist-info/top_level.txt,sha256=EvZ7uuruUpe9RiUyEp25d1Keq7PWYNT0O_-mr8FCG5g,11
40
+ rclone_api-1.1.5.dist-info/RECORD,,