rclone-api 1.1.4__tar.gz → 1.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rclone_api-1.1.4 → rclone_api-1.1.5}/PKG-INFO +1 -1
- {rclone_api-1.1.4 → rclone_api-1.1.5}/pyproject.toml +1 -1
- rclone_api-1.1.5/src/rclone_api/mount.py +102 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/rclone.py +4 -32
- rclone_api-1.1.5/src/rclone_api/s3/chunk_file.py +102 -0
- rclone_api-1.1.5/src/rclone_api/s3/chunk_types.py +254 -0
- rclone_api-1.1.5/src/rclone_api/s3/chunk_uploader.py +262 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/util.py +8 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api.egg-info/PKG-INFO +1 -1
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api.egg-info/SOURCES.txt +3 -0
- rclone_api-1.1.4/src/rclone_api/s3/chunk_uploader.py +0 -602
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.aiderignore +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.github/workflows/lint.yml +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.github/workflows/push_macos.yml +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.github/workflows/push_ubuntu.yml +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.github/workflows/push_win.yml +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.gitignore +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.pylintrc +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.vscode/launch.json +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.vscode/settings.json +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/.vscode/tasks.json +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/LICENSE +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/MANIFEST.in +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/README.md +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/clean +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/install +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/lint +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/requirements.testing.txt +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/setup.cfg +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/setup.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/__init__.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/assets/example.txt +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/cli.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/cmd/copy_large_s3.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/cmd/list_files.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/completed_process.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/config.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/convert.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/deprecated.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/diff.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/dir.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/dir_listing.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/exec.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/experimental/flags.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/experimental/flags_base.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/file.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/filelist.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/group_files.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/process.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/remote.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/rpath.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/s3/api.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/s3/basic_ops.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/s3/create.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/s3/types.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/scan_missing_folders.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/types.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api/walk.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api.egg-info/dependency_links.txt +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api.egg-info/entry_points.txt +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api.egg-info/requires.txt +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/src/rclone_api.egg-info/top_level.txt +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/test +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/archive/test_paramiko.py.disabled +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_cmd_list_files.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_copy.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_copy_files.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_diff.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_group_files.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_is_synced.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_ls.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_mount.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_mount_s3.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_mount_webdav.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_mounted_ranged_download.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_obscure.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_rclone_config.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_remote_control.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_remotes.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_s3.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_scan_missing_folders.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_size_files.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tests/test_walk.py +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/tox.ini +0 -0
- {rclone_api-1.1.4 → rclone_api-1.1.5}/upload_package.sh +0 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import platform
|
|
2
|
+
import subprocess
|
|
3
|
+
import time
|
|
4
|
+
import warnings
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
_SYSTEM = platform.system() # "Linux", "Darwin", "Windows", etc.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def run_command(cmd: str, verbose: bool) -> int:
|
|
11
|
+
"""Run a shell command and print its output if verbose is True."""
|
|
12
|
+
if verbose:
|
|
13
|
+
print(f"Executing: {cmd}")
|
|
14
|
+
try:
|
|
15
|
+
result = subprocess.run(
|
|
16
|
+
cmd, shell=True, capture_output=True, text=True, check=False
|
|
17
|
+
)
|
|
18
|
+
if result.returncode != 0 and verbose:
|
|
19
|
+
print(f"Command failed: {cmd}\nStdErr: {result.stderr.strip()}")
|
|
20
|
+
return result.returncode
|
|
21
|
+
except Exception as e:
|
|
22
|
+
warnings.warn(f"Error running command '{cmd}': {e}")
|
|
23
|
+
return -1
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def clean_mount(mount_path: Path, verbose: bool = False) -> None:
|
|
27
|
+
"""
|
|
28
|
+
Clean up a mount path across Linux, macOS, and Windows.
|
|
29
|
+
|
|
30
|
+
The function attempts to unmount the mount at mount_path, then, if the
|
|
31
|
+
directory is empty, removes it. On Linux it uses 'fusermount -u' (for FUSE mounts)
|
|
32
|
+
and 'umount'. On macOS it uses 'umount' (and optionally 'diskutil unmount'),
|
|
33
|
+
while on Windows it attempts to remove the mount point via 'mountvol /D'.
|
|
34
|
+
"""
|
|
35
|
+
# Check if the mount path exists; if an OSError occurs, assume it exists.
|
|
36
|
+
try:
|
|
37
|
+
mount_exists = mount_path.exists()
|
|
38
|
+
except OSError as e:
|
|
39
|
+
warnings.warn(f"Error checking {mount_path}: {e}")
|
|
40
|
+
mount_exists = True
|
|
41
|
+
|
|
42
|
+
# Give the system a moment (if unmount is in progress, etc.)
|
|
43
|
+
time.sleep(2)
|
|
44
|
+
|
|
45
|
+
if not mount_exists:
|
|
46
|
+
if verbose:
|
|
47
|
+
print(f"{mount_path} does not exist; nothing to clean up.")
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
if verbose:
|
|
51
|
+
print(f"{mount_path} still exists, attempting to unmount and remove.")
|
|
52
|
+
|
|
53
|
+
# Platform-specific unmount procedures
|
|
54
|
+
if _SYSTEM == "Linux":
|
|
55
|
+
# Try FUSE unmount first (if applicable), then the regular umount.
|
|
56
|
+
run_command(f"fusermount -u {mount_path}", verbose)
|
|
57
|
+
run_command(f"umount {mount_path}", verbose)
|
|
58
|
+
elif _SYSTEM == "Darwin":
|
|
59
|
+
# On macOS, use umount; optionally try diskutil for stubborn mounts.
|
|
60
|
+
run_command(f"umount {mount_path}", verbose)
|
|
61
|
+
# Optionally: uncomment the next line if diskutil unmount is preferred.
|
|
62
|
+
# run_command(f"diskutil unmount {mount_path}", verbose)
|
|
63
|
+
elif _SYSTEM == "Windows":
|
|
64
|
+
# On Windows, remove the mount point using mountvol.
|
|
65
|
+
run_command(f"mountvol {mount_path} /D", verbose)
|
|
66
|
+
# If that does not work, try to remove the directory directly.
|
|
67
|
+
try:
|
|
68
|
+
mount_path.rmdir()
|
|
69
|
+
if verbose:
|
|
70
|
+
print(f"Successfully removed mount directory {mount_path}")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
warnings.warn(f"Failed to remove mount {mount_path}: {e}")
|
|
73
|
+
else:
|
|
74
|
+
warnings.warn(f"Unsupported platform: {_SYSTEM}")
|
|
75
|
+
|
|
76
|
+
# Allow some time for the unmount commands to take effect.
|
|
77
|
+
time.sleep(2)
|
|
78
|
+
|
|
79
|
+
# Re-check if the mount path still exists.
|
|
80
|
+
try:
|
|
81
|
+
still_exists = mount_path.exists()
|
|
82
|
+
except OSError as e:
|
|
83
|
+
warnings.warn(f"Error re-checking {mount_path}: {e}")
|
|
84
|
+
still_exists = True
|
|
85
|
+
|
|
86
|
+
if still_exists:
|
|
87
|
+
if verbose:
|
|
88
|
+
print(f"{mount_path} still exists after unmount attempt.")
|
|
89
|
+
# Attempt to remove the directory if it is empty.
|
|
90
|
+
try:
|
|
91
|
+
# Only remove if the directory is empty.
|
|
92
|
+
if not any(mount_path.iterdir()):
|
|
93
|
+
mount_path.rmdir()
|
|
94
|
+
if verbose:
|
|
95
|
+
print(f"Removed empty mount directory {mount_path}")
|
|
96
|
+
else:
|
|
97
|
+
warnings.warn(f"{mount_path} is not empty; cannot remove.")
|
|
98
|
+
except Exception as e:
|
|
99
|
+
warnings.warn(f"Failed during cleanup of {mount_path}: {e}")
|
|
100
|
+
else:
|
|
101
|
+
if verbose:
|
|
102
|
+
print(f"{mount_path} successfully cleaned up.")
|
|
@@ -914,38 +914,10 @@ class Rclone:
|
|
|
914
914
|
if proc.poll() is None:
|
|
915
915
|
proc.terminate()
|
|
916
916
|
proc.wait()
|
|
917
|
-
if not error_happened
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
if not _IS_WINDOWS:
|
|
922
|
-
|
|
923
|
-
def exec(cmd: str) -> int:
|
|
924
|
-
if verbose:
|
|
925
|
-
print(f"Executing: {cmd}")
|
|
926
|
-
rtn = os.system(cmd)
|
|
927
|
-
if rtn != 0 and verbose:
|
|
928
|
-
print(f"Failed to execute: {cmd}")
|
|
929
|
-
return rtn
|
|
930
|
-
|
|
931
|
-
exec(f"fusermount -u {outdir}")
|
|
932
|
-
exec(f"umount {outdir}")
|
|
933
|
-
time.sleep(2)
|
|
934
|
-
if outdir.exists():
|
|
935
|
-
is_empty = True
|
|
936
|
-
try:
|
|
937
|
-
is_empty = not list(outdir.iterdir())
|
|
938
|
-
if not is_empty:
|
|
939
|
-
warnings.warn(f"Failed to unmount {outdir}")
|
|
940
|
-
else:
|
|
941
|
-
try:
|
|
942
|
-
outdir.rmdir()
|
|
943
|
-
except Exception as e:
|
|
944
|
-
warnings.warn(f"Failed to remove {outdir}: {e}")
|
|
945
|
-
except Exception as e:
|
|
946
|
-
warnings.warn(
|
|
947
|
-
f"Failed during mount cleanup of {outdir}: because {e}"
|
|
948
|
-
)
|
|
917
|
+
if not error_happened:
|
|
918
|
+
from rclone_api.mount import clean_mount
|
|
919
|
+
|
|
920
|
+
clean_mount(outdir, verbose=verbose)
|
|
949
921
|
|
|
950
922
|
@deprecated("mount")
|
|
951
923
|
def mount_webdav(
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import warnings
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from queue import Queue
|
|
5
|
+
|
|
6
|
+
from rclone_api.s3.chunk_types import FileChunk, UploadState
|
|
7
|
+
from rclone_api.util import locked_print
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_file_size(file_path: Path, timeout: int = 60) -> int:
|
|
11
|
+
sleep_time = timeout / 60 if timeout > 0 else 1
|
|
12
|
+
start = time.time()
|
|
13
|
+
while True:
|
|
14
|
+
expired = time.time() - start > timeout
|
|
15
|
+
try:
|
|
16
|
+
time.sleep(sleep_time)
|
|
17
|
+
if file_path.exists():
|
|
18
|
+
return file_path.stat().st_size
|
|
19
|
+
except FileNotFoundError as e:
|
|
20
|
+
if expired:
|
|
21
|
+
print(f"File not found: {file_path}, exception is {e}")
|
|
22
|
+
raise
|
|
23
|
+
if expired:
|
|
24
|
+
raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def file_chunker(
|
|
28
|
+
upload_state: UploadState, max_chunks: int | None, output: Queue[FileChunk | None]
|
|
29
|
+
) -> None:
|
|
30
|
+
count = 0
|
|
31
|
+
|
|
32
|
+
def should_stop() -> bool:
|
|
33
|
+
nonlocal count
|
|
34
|
+
if max_chunks is None:
|
|
35
|
+
return False
|
|
36
|
+
if count >= max_chunks:
|
|
37
|
+
return True
|
|
38
|
+
count += 1
|
|
39
|
+
if count > 10 and count % 10 == 0:
|
|
40
|
+
# recheck that the file size has not changed
|
|
41
|
+
file_size = _get_file_size(upload_state.upload_info.src_file_path)
|
|
42
|
+
if file_size != upload_state.upload_info.file_size:
|
|
43
|
+
locked_print(
|
|
44
|
+
f"File size changed, cannot resume, expected {upload_state.upload_info.file_size}, got {file_size}"
|
|
45
|
+
)
|
|
46
|
+
raise ValueError("File size changed, cannot resume")
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
upload_info = upload_state.upload_info
|
|
50
|
+
file_path = upload_info.src_file_path
|
|
51
|
+
chunk_size = upload_info.chunk_size
|
|
52
|
+
src = Path(file_path)
|
|
53
|
+
# Mounted files may take a while to appear, so keep retrying.
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
file_size = _get_file_size(src, timeout=60)
|
|
57
|
+
part_number = 1
|
|
58
|
+
done_part_numbers: set[int] = {
|
|
59
|
+
p.part_number for p in upload_state.parts if p is not None
|
|
60
|
+
}
|
|
61
|
+
num_parts = upload_info.total_chunks()
|
|
62
|
+
|
|
63
|
+
def next_part_number() -> int | None:
|
|
64
|
+
nonlocal part_number
|
|
65
|
+
while part_number in done_part_numbers:
|
|
66
|
+
part_number += 1
|
|
67
|
+
if part_number > num_parts:
|
|
68
|
+
return None
|
|
69
|
+
return part_number
|
|
70
|
+
|
|
71
|
+
while not should_stop():
|
|
72
|
+
curr_parth_num = next_part_number()
|
|
73
|
+
if curr_parth_num is None:
|
|
74
|
+
locked_print(f"File {file_path} has completed chunking all parts")
|
|
75
|
+
break
|
|
76
|
+
assert curr_parth_num is not None
|
|
77
|
+
offset = (curr_parth_num - 1) * chunk_size
|
|
78
|
+
|
|
79
|
+
assert offset < file_size, f"Offset {offset} is greater than file size"
|
|
80
|
+
|
|
81
|
+
# Open the file, seek, read the chunk, and close immediately.
|
|
82
|
+
with open(file_path, "rb") as f:
|
|
83
|
+
f.seek(offset)
|
|
84
|
+
data = f.read(chunk_size)
|
|
85
|
+
|
|
86
|
+
if not data:
|
|
87
|
+
warnings.warn(f"Empty data for part {part_number} of {file_path}")
|
|
88
|
+
|
|
89
|
+
file_chunk = FileChunk(
|
|
90
|
+
src,
|
|
91
|
+
upload_id=upload_info.upload_id,
|
|
92
|
+
part_number=part_number,
|
|
93
|
+
data=data, # After this, data should not be reused.
|
|
94
|
+
)
|
|
95
|
+
done_part_numbers.add(part_number)
|
|
96
|
+
output.put(file_chunk)
|
|
97
|
+
part_number += 1
|
|
98
|
+
except Exception as e:
|
|
99
|
+
|
|
100
|
+
warnings.warn(f"Error reading file: {e}")
|
|
101
|
+
finally:
|
|
102
|
+
output.put(None)
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass, field, fields
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from threading import Lock
|
|
7
|
+
|
|
8
|
+
from botocore.client import BaseClient
|
|
9
|
+
|
|
10
|
+
from rclone_api.util import locked_print
|
|
11
|
+
|
|
12
|
+
_MIN_UPLOAD_CHUNK_SIZE = 5 * 1024 * 1024 # 5MB
|
|
13
|
+
_SAVE_STATE_LOCK = Lock()
|
|
14
|
+
|
|
15
|
+
_TMP_DIR_ACCESS_LOCK = Lock()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _clean_old_files(out: Path) -> None:
|
|
19
|
+
# clean up files older than 1 day
|
|
20
|
+
|
|
21
|
+
now = time.time()
|
|
22
|
+
# Erase all stale files and then purge empty directories.
|
|
23
|
+
for root, dirs, files in os.walk(out):
|
|
24
|
+
for name in files:
|
|
25
|
+
f = Path(root) / name
|
|
26
|
+
filemod = f.stat().st_mtime
|
|
27
|
+
diff_secs = now - filemod
|
|
28
|
+
diff_days = diff_secs / (60 * 60 * 24)
|
|
29
|
+
if diff_days > 1:
|
|
30
|
+
locked_print(f"Removing old file: {f}")
|
|
31
|
+
f.unlink()
|
|
32
|
+
|
|
33
|
+
for root, dirs, _ in os.walk(out):
|
|
34
|
+
for dir in dirs:
|
|
35
|
+
d = Path(root) / dir
|
|
36
|
+
if not list(d.iterdir()):
|
|
37
|
+
locked_print(f"Removing empty directory: {d}")
|
|
38
|
+
d.rmdir()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_chunk_tmpdir() -> Path:
|
|
42
|
+
with _TMP_DIR_ACCESS_LOCK:
|
|
43
|
+
dat = _get_chunk_tmpdir.__dict__
|
|
44
|
+
if "out" in dat:
|
|
45
|
+
return dat["out"] # Folder already validated.
|
|
46
|
+
out = Path("chunk_store")
|
|
47
|
+
if out.exists():
|
|
48
|
+
# first access, clean up directory
|
|
49
|
+
_clean_old_files(out)
|
|
50
|
+
out.mkdir(exist_ok=True, parents=True)
|
|
51
|
+
dat["out"] = out
|
|
52
|
+
return out
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class FileChunk:
|
|
56
|
+
def __init__(self, src: Path, upload_id: str, part_number: int, data: bytes):
|
|
57
|
+
assert data is not None, f"{src}: Data must not be None"
|
|
58
|
+
self.upload_id = upload_id
|
|
59
|
+
self.src = src
|
|
60
|
+
self.part_number = part_number
|
|
61
|
+
name = src.name
|
|
62
|
+
self.tmpdir = _get_chunk_tmpdir()
|
|
63
|
+
self.filepart = self.tmpdir / f"{name}_{upload_id}.part_{part_number}.tmp"
|
|
64
|
+
self.filepart.write_bytes(data)
|
|
65
|
+
del data # free up memory
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def data(self) -> bytes:
|
|
69
|
+
assert self.filepart is not None
|
|
70
|
+
with open(self.filepart, "rb") as f:
|
|
71
|
+
return f.read()
|
|
72
|
+
return b""
|
|
73
|
+
|
|
74
|
+
def close(self):
|
|
75
|
+
if self.filepart.exists():
|
|
76
|
+
self.filepart.unlink()
|
|
77
|
+
|
|
78
|
+
def __del__(self):
|
|
79
|
+
self.close()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class UploadInfo:
|
|
84
|
+
s3_client: BaseClient
|
|
85
|
+
bucket_name: str
|
|
86
|
+
object_name: str
|
|
87
|
+
src_file_path: Path
|
|
88
|
+
upload_id: str
|
|
89
|
+
retries: int
|
|
90
|
+
chunk_size: int
|
|
91
|
+
file_size: int
|
|
92
|
+
_total_chunks: int | None = None
|
|
93
|
+
|
|
94
|
+
def total_chunks(self) -> int:
|
|
95
|
+
out = self.file_size // self.chunk_size
|
|
96
|
+
if self.file_size % self.chunk_size:
|
|
97
|
+
return out + 1
|
|
98
|
+
return out
|
|
99
|
+
|
|
100
|
+
def __post_init__(self):
|
|
101
|
+
if self._total_chunks is not None:
|
|
102
|
+
return
|
|
103
|
+
self._total_chunks = self.total_chunks()
|
|
104
|
+
|
|
105
|
+
def to_json(self) -> dict:
|
|
106
|
+
json_dict = {}
|
|
107
|
+
for f in fields(self):
|
|
108
|
+
value = getattr(self, f.name)
|
|
109
|
+
# Convert non-serializable objects (like s3_client) to a string representation.
|
|
110
|
+
if f.name == "s3_client":
|
|
111
|
+
json_dict[f.name] = "RUNTIME OBJECT"
|
|
112
|
+
else:
|
|
113
|
+
if isinstance(value, Path):
|
|
114
|
+
value = str(value)
|
|
115
|
+
json_dict[f.name] = value
|
|
116
|
+
return json_dict
|
|
117
|
+
|
|
118
|
+
@staticmethod
|
|
119
|
+
def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
|
|
120
|
+
json_dict.pop("s3_client") # Remove the placeholder string
|
|
121
|
+
return UploadInfo(s3_client=s3_client, **json_dict)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class FinishedPiece:
|
|
126
|
+
part_number: int
|
|
127
|
+
etag: str
|
|
128
|
+
|
|
129
|
+
def to_json(self) -> dict:
|
|
130
|
+
return {"part_number": self.part_number, "etag": self.etag}
|
|
131
|
+
|
|
132
|
+
def to_json_str(self) -> str:
|
|
133
|
+
return json.dumps(self.to_json(), indent=0)
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def to_json_array(parts: list["FinishedPiece | None"]) -> list[dict | None]:
|
|
137
|
+
non_none: list[FinishedPiece] = [p for p in parts if p is not None]
|
|
138
|
+
non_none.sort(key=lambda x: x.part_number)
|
|
139
|
+
all_nones: list[None] = [None for p in parts if p is None]
|
|
140
|
+
assert len(all_nones) <= 1, "Only one None should be present"
|
|
141
|
+
return [p.to_json() for p in non_none]
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def from_json(json: dict | None) -> "FinishedPiece | None":
|
|
145
|
+
if json is None:
|
|
146
|
+
return None
|
|
147
|
+
return FinishedPiece(**json)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class UploadState:
|
|
152
|
+
upload_info: UploadInfo
|
|
153
|
+
# finished_parts: Queue[FinishedPiece | None]
|
|
154
|
+
peristant: Path | None
|
|
155
|
+
lock: Lock = Lock()
|
|
156
|
+
parts: list[FinishedPiece | None] = field(default_factory=list)
|
|
157
|
+
|
|
158
|
+
def update_source_file(self, src_file: Path) -> None:
|
|
159
|
+
new_file_size = os.path.getsize(src_file)
|
|
160
|
+
if new_file_size != self.upload_info.file_size:
|
|
161
|
+
raise ValueError("File size changed, cannot resume")
|
|
162
|
+
self.upload_info.src_file_path = src_file
|
|
163
|
+
self.save()
|
|
164
|
+
|
|
165
|
+
def is_done(self) -> bool:
|
|
166
|
+
return self.remaining() == 0
|
|
167
|
+
|
|
168
|
+
def count(self) -> tuple[int, int]: # count, num_chunks
|
|
169
|
+
num_chunks = self.upload_info.total_chunks()
|
|
170
|
+
count = 0
|
|
171
|
+
for p in self.parts:
|
|
172
|
+
if p is not None:
|
|
173
|
+
count += 1
|
|
174
|
+
return count, num_chunks
|
|
175
|
+
|
|
176
|
+
def finished(self) -> int:
|
|
177
|
+
count, _ = self.count()
|
|
178
|
+
return count
|
|
179
|
+
|
|
180
|
+
def remaining(self) -> int:
|
|
181
|
+
count, num_chunks = self.count()
|
|
182
|
+
assert (
|
|
183
|
+
count <= num_chunks
|
|
184
|
+
), f"Count {count} is greater than num_chunks {num_chunks}"
|
|
185
|
+
return num_chunks - count
|
|
186
|
+
|
|
187
|
+
def add_finished(self, part: FinishedPiece | None) -> None:
|
|
188
|
+
with self.lock:
|
|
189
|
+
self.parts.append(part)
|
|
190
|
+
self._save_no_lock()
|
|
191
|
+
|
|
192
|
+
def __post_init__(self):
|
|
193
|
+
if self.peristant is None:
|
|
194
|
+
# upload_id = self.upload_info.upload_id
|
|
195
|
+
object_name = self.upload_info.object_name
|
|
196
|
+
chunk_size = self.upload_info.chunk_size
|
|
197
|
+
parent = _get_chunk_tmpdir()
|
|
198
|
+
self.peristant = parent / f"{object_name}_chunk_size_{chunk_size}_.json"
|
|
199
|
+
|
|
200
|
+
def save(self) -> None:
|
|
201
|
+
with _SAVE_STATE_LOCK:
|
|
202
|
+
self._save_no_lock()
|
|
203
|
+
|
|
204
|
+
def _save_no_lock(self) -> None:
|
|
205
|
+
assert self.peristant is not None, "No path to save to"
|
|
206
|
+
self.peristant.write_text(self.to_json_str(), encoding="utf-8")
|
|
207
|
+
|
|
208
|
+
@staticmethod
|
|
209
|
+
def load(s3_client: BaseClient, path: Path) -> "UploadState":
|
|
210
|
+
with _SAVE_STATE_LOCK:
|
|
211
|
+
return UploadState.from_json(s3_client, path)
|
|
212
|
+
|
|
213
|
+
def to_json(self) -> dict:
|
|
214
|
+
# queue -> list
|
|
215
|
+
# parts: list[dict] = [f.to_json() for f in self.parts]
|
|
216
|
+
parts: list[FinishedPiece | None] = list(self.parts)
|
|
217
|
+
|
|
218
|
+
parts_json = FinishedPiece.to_json_array(parts)
|
|
219
|
+
is_done = self.is_done()
|
|
220
|
+
count_non_none: int = 0
|
|
221
|
+
for p in parts:
|
|
222
|
+
if p is not None:
|
|
223
|
+
count_non_none += 1
|
|
224
|
+
|
|
225
|
+
# self.count()
|
|
226
|
+
finished_count, total = self.count()
|
|
227
|
+
|
|
228
|
+
# parts.sort(key=lambda x: x.part_number) # Some backends need this.
|
|
229
|
+
out_json = {
|
|
230
|
+
"upload_info": self.upload_info.to_json(),
|
|
231
|
+
"finished_parts": parts_json,
|
|
232
|
+
"is_done": is_done,
|
|
233
|
+
"finished_count": finished_count,
|
|
234
|
+
"total_parts": total,
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
# check that we can sererialize
|
|
238
|
+
# json.dumps(out_json)
|
|
239
|
+
return out_json
|
|
240
|
+
|
|
241
|
+
def to_json_str(self) -> str:
|
|
242
|
+
return json.dumps(self.to_json(), indent=4)
|
|
243
|
+
|
|
244
|
+
@staticmethod
|
|
245
|
+
def from_json(s3_client: BaseClient, json_file: Path) -> "UploadState":
|
|
246
|
+
json_str = json_file.read_text(encoding="utf-8")
|
|
247
|
+
data = json.loads(json_str)
|
|
248
|
+
upload_info_json = data["upload_info"]
|
|
249
|
+
finished_parts_json = data["finished_parts"]
|
|
250
|
+
upload_info = UploadInfo.from_json(s3_client, upload_info_json)
|
|
251
|
+
finished_parts = [FinishedPiece.from_json(p) for p in finished_parts_json]
|
|
252
|
+
return UploadState(
|
|
253
|
+
peristant=json_file, upload_info=upload_info, parts=finished_parts
|
|
254
|
+
)
|