rclone-api 1.4.15__py2.py3-none-any.whl → 1.4.19__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,24 +1,17 @@
1
1
  import argparse
2
- import os
3
2
  from dataclasses import dataclass
4
3
  from pathlib import Path
5
4
 
6
5
  from rclone_api import Rclone
7
- from rclone_api.detail.copy_file_parts import InfoJson
8
6
  from rclone_api.s3.s3_multipart_uploader_by_copy import (
9
- finish_multipart_upload_from_keys,
7
+ s3_server_side_multi_part_merge,
10
8
  )
11
- from rclone_api.types import SizeSuffix
12
-
13
- _TIMEOUT_READ = 900
14
- _TIMEOUT_CONNECTION = 900
15
9
 
16
10
 
17
11
  @dataclass
18
12
  class Args:
19
13
  config_path: Path
20
14
  src: str # like dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst-parts/ (info.json will be located here)
21
- dst: str # like dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst
22
15
  verbose: bool
23
16
 
24
17
 
@@ -31,8 +24,7 @@ def list_files(rclone: Rclone, path: str):
31
24
 
32
25
  def _parse_args() -> Args:
33
26
  parser = argparse.ArgumentParser(description="List files in a remote path.")
34
- parser.add_argument("src", help="File to copy")
35
- parser.add_argument("dst", help="Destination file")
27
+ parser.add_argument("src", help="Directory that holds the info.json file")
36
28
  parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true")
37
29
  parser.add_argument(
38
30
  "--config", help="Path to rclone config file", type=Path, required=False
@@ -47,112 +39,26 @@ def _parse_args() -> Args:
47
39
  out = Args(
48
40
  config_path=config,
49
41
  src=args.src,
50
- dst=args.dst,
51
42
  verbose=args.verbose,
52
43
  )
53
44
  return out
54
45
 
55
46
 
56
- def do_finish_part(rclone: Rclone, info: InfoJson, dst: str) -> None:
57
- from rclone_api.s3.create import (
58
- BaseClient,
59
- S3Config,
60
- S3Credentials,
61
- create_s3_client,
62
- )
63
-
64
- s3_config = S3Config(
65
- verbose=False,
66
- timeout_read=_TIMEOUT_READ,
67
- timeout_connection=_TIMEOUT_CONNECTION,
68
- )
69
-
70
- s3_creds: S3Credentials = rclone.impl.get_s3_credentials(remote=dst)
71
- s3_client: BaseClient = create_s3_client(s3_creds=s3_creds, s3_config=s3_config)
72
- s3_bucket = s3_creds.bucket_name
73
- is_done = info.fetch_is_done()
74
- assert is_done, f"Upload is not done: {info}"
75
-
76
- parts_dir = info.parts_dir
77
- if parts_dir.endswith("/"):
78
- parts_dir = parts_dir[:-1]
79
- source_keys = info.fetch_all_finished()
80
- # print(parts_dir)
81
- # print(source_keys)
82
-
83
- parts_path = parts_dir.split(s3_bucket)[1]
84
- if parts_path.startswith("/"):
85
- parts_path = parts_path[1:]
86
-
87
- first_part: int | None = info.first_part
88
- last_part: int | None = info.last_part
89
- size: SizeSuffix | None = info.size
90
-
91
- assert first_part is not None
92
- assert last_part is not None
93
- assert size is not None
94
-
95
- def _to_s3_key(name: str | None) -> str:
96
- if name:
97
- out = f"{parts_path}/{name}"
98
- return out
99
- out = f"{parts_path}"
100
- return out
101
-
102
- parts: list[tuple[int, str]] = []
103
- part_num = 1
104
- for part_key in source_keys:
105
- s3_key = _to_s3_key(name=part_key)
106
- parts.append((part_num, s3_key))
107
- part_num += 1
108
-
109
- chunksize = info.chunksize
110
- assert chunksize is not None
111
-
112
- dst_name = info.dst_name
113
- dst_dir = os.path.dirname(parts_path)
114
- # dst_key =
115
- dst_key = f"{dst_dir}/{dst_name}"
116
-
117
- finish_multipart_upload_from_keys(
118
- s3_client=s3_client,
119
- source_bucket=s3_creds.bucket_name,
120
- parts=parts,
121
- destination_bucket=s3_creds.bucket_name,
122
- destination_key=dst_key,
123
- chunk_size=chunksize.as_int(),
124
- final_size=size.as_int(),
125
- max_workers=10,
126
- )
127
-
128
- # now check if the dst now exists, if so, delete the parts folder.
129
- # if rclone.exists(dst):
130
- # rclone.purge(parts_dir)
131
-
132
- if not rclone.exists(dst):
133
- raise FileNotFoundError(f"Destination file not found: {dst}")
134
-
135
- write_size = rclone.size_file(dst)
136
- if write_size != size:
137
- raise ValueError(f"Size mismatch: {write_size} != {size}")
138
-
139
- print(f"Upload complete: {dst}")
140
- rclone.purge(parts_dir)
47
+ def _get_info_path(src: str) -> str:
48
+ if src.endswith("/"):
49
+ src = src[:-1]
50
+ info_path = f"{src}/info.json"
51
+ return info_path
141
52
 
142
53
 
143
54
  def main() -> int:
144
55
  """Main entry point."""
145
56
  args = _parse_args()
146
57
  rclone = Rclone(rclone_conf=args.config_path)
147
- info_json = f"{args.src}/info.json".replace("//", "/")
148
- info = InfoJson(rclone.impl, src=None, src_info=info_json)
149
- loaded = info.load()
150
- if not loaded:
151
- raise FileNotFoundError(
152
- f"Info file not found, has the upload finished? {info_json}"
153
- )
154
- print(info)
155
- do_finish_part(rclone=rclone, info=info, dst=args.dst)
58
+ info_path = _get_info_path(src=args.src)
59
+ s3_server_side_multi_part_merge(
60
+ rclone=rclone.impl, info_path=info_path, max_workers=1
61
+ )
156
62
  return 0
157
63
 
158
64
 
@@ -164,7 +70,4 @@ if __name__ == "__main__":
164
70
  sys.argv.append(
165
71
  "dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst-parts/"
166
72
  )
167
- sys.argv.append(
168
- "dst:TorrentBooks/aa_misc_data/aa_misc_data/world_lending_library_2024_11.tar.zst"
169
- )
170
73
  main()
@@ -209,7 +209,10 @@ class InfoJson:
209
209
 
210
210
  @property
211
211
  def parts_dir(self) -> str:
212
- return os.path.dirname(self.src_info)
212
+ parts_dir = os.path.dirname(self.src_info)
213
+ if parts_dir.endswith("/"):
214
+ parts_dir = parts_dir[:-1]
215
+ return parts_dir
213
216
 
214
217
  @property
215
218
  def dst(self) -> str:
rclone_api/process.py CHANGED
@@ -1,12 +1,13 @@
1
1
  import atexit
2
2
  import subprocess
3
3
  import threading
4
- import time
5
4
  import weakref
6
5
  from dataclasses import dataclass
7
6
  from pathlib import Path
8
7
  from typing import Any
9
8
 
9
+ import psutil
10
+
10
11
  from rclone_api.config import Config
11
12
  from rclone_api.util import clear_temp_config_file, get_verbose, make_temp_config_file
12
13
 
@@ -24,20 +25,25 @@ class ProcessArgs:
24
25
 
25
26
  class Process:
26
27
  def __init__(self, args: ProcessArgs) -> None:
27
- assert args.rclone_exe.exists()
28
+ assert (
29
+ args.rclone_exe.exists()
30
+ ), f"rclone executable not found: {args.rclone_exe}"
28
31
  self.args = args
29
32
  self.log = args.log
30
33
  self.tempfile: Path | None = None
34
+
31
35
  verbose = get_verbose(args.verbose)
36
+ # Create a temporary config file if needed.
32
37
  if isinstance(args.rclone_conf, Config):
33
- self.tmpfile = make_temp_config_file()
34
- self.tmpfile.write_text(args.rclone_conf.text, encoding="utf-8")
35
- rclone_conf = self.tmpfile
38
+ self.tempfile = make_temp_config_file()
39
+ self.tempfile.write_text(args.rclone_conf.text, encoding="utf-8")
40
+ rclone_conf = self.tempfile
36
41
  else:
37
42
  rclone_conf = args.rclone_conf
38
43
 
39
- assert rclone_conf.exists()
44
+ assert rclone_conf.exists(), f"rclone config not found: {rclone_conf}"
40
45
 
46
+ # Build the command.
41
47
  self.cmd = (
42
48
  [str(args.rclone_exe.resolve())]
43
49
  + ["--config", str(rclone_conf.resolve())]
@@ -49,16 +55,14 @@ class Process:
49
55
  if verbose:
50
56
  cmd_str = subprocess.list2cmdline(self.cmd)
51
57
  print(f"Running: {cmd_str}")
52
- kwargs: dict = {}
53
- kwargs["shell"] = False
58
+ kwargs: dict = {"shell": False}
54
59
  if args.capture_stdout:
55
60
  kwargs["stdout"] = subprocess.PIPE
56
61
  kwargs["stderr"] = subprocess.STDOUT
57
62
 
58
63
  self.process = subprocess.Popen(self.cmd, **kwargs) # type: ignore
59
64
 
60
- # Register an atexit callback using a weak reference to avoid
61
- # keeping the Process instance alive solely due to the callback.
65
+ # Register an atexit callback using a weak reference to avoid keeping the Process instance alive.
62
66
  self_ref = weakref.ref(self)
63
67
 
64
68
  def exit_cleanup():
@@ -77,39 +81,60 @@ class Process:
77
81
  self.cleanup()
78
82
 
79
83
  def cleanup(self) -> None:
80
- clear_temp_config_file(self.tempfile)
84
+ if self.tempfile:
85
+ clear_temp_config_file(self.tempfile)
81
86
 
82
- def _atexit_terminate(self) -> None:
87
+ def _kill_process_tree(self) -> None:
83
88
  """
84
- Registered via atexit, this method attempts to gracefully terminate the process.
85
- If the process does not exit within a short timeout, it is aggressively killed.
89
+ Use psutil to recursively terminate the main process and all its child processes.
86
90
  """
87
- if self.process.poll() is None: # Process is still running
88
-
89
- def terminate_sequence():
91
+ try:
92
+ parent = psutil.Process(self.process.pid)
93
+ except psutil.NoSuchProcess:
94
+ return
95
+
96
+ # Terminate child processes.
97
+ children = parent.children(recursive=True)
98
+ if children:
99
+ print(f"Terminating {len(children)} child processes...")
100
+ for child in children:
90
101
  try:
91
- # Try to terminate gracefully.
92
- self.process.terminate()
102
+ child.terminate()
93
103
  except Exception as e:
94
- print(f"Error calling terminate on process {self.process.pid}: {e}")
95
- # Allow time for graceful shutdown.
96
- timeout = 2 # seconds
97
- start = time.time()
98
- while self.process.poll() is None and (time.time() - start) < timeout:
99
- time.sleep(0.1)
100
- # If still running, kill aggressively.
101
- if self.process.poll() is None:
104
+ print(f"Error terminating child process {child.pid}: {e}")
105
+ psutil.wait_procs(children, timeout=2)
106
+ # Kill any that remain.
107
+ for child in children:
108
+ if child.is_running():
102
109
  try:
103
- self.process.kill()
110
+ child.kill()
104
111
  except Exception as e:
105
- print(f"Error calling kill on process {self.process.pid}: {e}")
106
- # Optionally wait briefly for termination.
112
+ print(f"Error killing child process {child.pid}: {e}")
113
+
114
+ # Terminate the parent process.
115
+ if parent.is_running():
116
+ try:
117
+ parent.terminate()
118
+ except Exception as e:
119
+ print(f"Error terminating process {parent.pid}: {e}")
120
+ try:
121
+ parent.wait(timeout=3)
122
+ except psutil.TimeoutExpired:
107
123
  try:
108
- self.process.wait(timeout=1)
109
- except Exception:
110
- pass
124
+ parent.kill()
125
+ except Exception as e:
126
+ print(f"Error killing process {parent.pid}: {e}")
127
+
128
+ def _atexit_terminate(self) -> None:
129
+ """
130
+ This method is registered via atexit and uses psutil to clean up the process tree.
131
+ It runs in a daemon thread so that termination happens without blocking interpreter shutdown.
132
+ """
133
+ if self.process.poll() is None: # Process is still running.
134
+
135
+ def terminate_sequence():
136
+ self._kill_process_tree()
111
137
 
112
- # Run the termination sequence in a separate daemon thread.
113
138
  t = threading.Thread(target=terminate_sequence, daemon=True)
114
139
  t.start()
115
140
  t.join(timeout=3)
@@ -122,12 +147,12 @@ class Process:
122
147
  self.cleanup()
123
148
 
124
149
  def kill(self) -> None:
125
- self.cleanup()
126
- return self.process.kill()
150
+ """Forcefully kill the process tree."""
151
+ self._kill_process_tree()
127
152
 
128
153
  def terminate(self) -> None:
129
- self.cleanup()
130
- return self.process.terminate()
154
+ """Gracefully terminate the process tree."""
155
+ self._kill_process_tree()
131
156
 
132
157
  @property
133
158
  def returncode(self) -> int | None:
@@ -147,8 +172,8 @@ class Process:
147
172
  def wait(self) -> int:
148
173
  return self.process.wait()
149
174
 
150
- def send_signal(self, signal: int) -> None:
151
- return self.process.send_signal(signal)
175
+ def send_signal(self, sig: int) -> None:
176
+ self.process.send_signal(sig)
152
177
 
153
178
  def __str__(self) -> str:
154
179
  state = ""
@@ -0,0 +1,147 @@
1
+ """
2
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_part_copy.html
3
+ * client.upload_part_copy
4
+
5
+ This module provides functionality for S3 multipart uploads, including copying parts
6
+ from existing S3 objects using upload_part_copy.
7
+ """
8
+
9
+ import json
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ from rclone_api.rclone_impl import RcloneImpl
14
+ from rclone_api.s3.multipart.finished_piece import FinishedPiece
15
+
16
+
17
+ @dataclass
18
+ class Part:
19
+ part_number: int
20
+ s3_key: str
21
+
22
+ def to_json(self) -> dict:
23
+ return {"part_number": self.part_number, "s3_key": self.s3_key}
24
+
25
+ @staticmethod
26
+ def from_json(json_dict: dict) -> "Part | Exception":
27
+ part_number = json_dict.get("part_number")
28
+ s3_key = json_dict.get("s3_key")
29
+ if part_number is None or s3_key is None:
30
+ return Exception(f"Invalid JSON: {json_dict}")
31
+ return Part(part_number=part_number, s3_key=s3_key)
32
+
33
+ @staticmethod
34
+ def from_json_array(json_array: list[dict]) -> list["Part"] | Exception:
35
+ try:
36
+ out: list[Part] = []
37
+ for j in json_array:
38
+ ok_or_err = Part.from_json(j)
39
+ if isinstance(ok_or_err, Exception):
40
+ return ok_or_err
41
+ else:
42
+ out.append(ok_or_err)
43
+ return out
44
+ except Exception as e:
45
+ return e
46
+
47
+
48
+ class MergeState:
49
+
50
+ def __init__(
51
+ self,
52
+ rclone_impl: RcloneImpl,
53
+ merge_path: str,
54
+ upload_id: str,
55
+ bucket: str,
56
+ dst_key: str,
57
+ finished: list[FinishedPiece],
58
+ all_parts: list[Part],
59
+ ) -> None:
60
+ self.rclone_impl: RcloneImpl = rclone_impl
61
+ self.merge_path: str = merge_path
62
+ self.merge_parts_path: str = f"{merge_path}/merge" # future use?
63
+ self.upload_id: str = upload_id
64
+ self.bucket: str = bucket
65
+ self.dst_key: str = dst_key
66
+ self.finished: list[FinishedPiece] = list(finished)
67
+ self.all_parts: list[Part] = list(all_parts)
68
+
69
+ def on_finished(self, finished_piece: FinishedPiece) -> None:
70
+ self.finished.append(finished_piece)
71
+
72
+ def remaining_parts(self) -> list[Part]:
73
+ finished_parts: set[int] = set([p.part_number for p in self.finished])
74
+ remaining = [p for p in self.all_parts if p.part_number not in finished_parts]
75
+ return remaining
76
+
77
+ @staticmethod
78
+ def from_json(rclone_impl: RcloneImpl, json: dict) -> "MergeState | Exception":
79
+ try:
80
+ merge_path = json["merge_path"]
81
+ bucket = json["bucket"]
82
+ dst_key = json["dst_key"]
83
+ finished: list[FinishedPiece] = FinishedPiece.from_json_array(
84
+ json["finished"]
85
+ )
86
+ all_parts: list[Part | Exception] = [Part.from_json(j) for j in json["all"]]
87
+ all_parts_no_err: list[Part] = [
88
+ p for p in all_parts if not isinstance(p, Exception)
89
+ ]
90
+ upload_id: str = json["upload_id"]
91
+ errs: list[Exception] = [p for p in all_parts if isinstance(p, Exception)]
92
+ if len(errs):
93
+ return Exception(f"Errors in parts: {errs}")
94
+ return MergeState(
95
+ rclone_impl=rclone_impl,
96
+ merge_path=merge_path,
97
+ upload_id=upload_id,
98
+ bucket=bucket,
99
+ dst_key=dst_key,
100
+ finished=finished,
101
+ all_parts=all_parts_no_err,
102
+ )
103
+ except Exception as e:
104
+ return e
105
+
106
+ def to_json(self) -> dict:
107
+ finished = self.finished.copy()
108
+ all_parts = self.all_parts.copy()
109
+ return {
110
+ "merge_path": self.merge_path,
111
+ "bucket": self.bucket,
112
+ "dst_key": self.dst_key,
113
+ "upload_id": self.upload_id,
114
+ "finished": FinishedPiece.to_json_array(finished),
115
+ "all": [part.to_json() for part in all_parts],
116
+ }
117
+
118
+ def to_json_str(self) -> str:
119
+ data = self.to_json()
120
+ out = json.dumps(data, indent=2)
121
+ return out
122
+
123
+ def __str__(self):
124
+ return self.to_json_str()
125
+
126
+ def __repr__(self):
127
+ return self.to_json_str()
128
+
129
+ def write(self, rclone_impl: Any, dst: str) -> None:
130
+ from rclone_api.rclone_impl import RcloneImpl
131
+
132
+ assert isinstance(rclone_impl, RcloneImpl)
133
+ json_str = self.to_json_str()
134
+ rclone_impl.write_text(dst, json_str)
135
+
136
+ def read(self, rclone_impl: Any, src: str) -> None:
137
+ from rclone_api.rclone_impl import RcloneImpl
138
+
139
+ assert isinstance(rclone_impl, RcloneImpl)
140
+ json_str = rclone_impl.read_text(src)
141
+ if isinstance(json_str, Exception):
142
+ raise json_str
143
+ json_dict = json.loads(json_str)
144
+ ok_or_err = FinishedPiece.from_json_array(json_dict["finished"])
145
+ if isinstance(ok_or_err, Exception):
146
+ raise ok_or_err
147
+ self.finished = ok_or_err
@@ -1,4 +1,3 @@
1
- import json
2
1
  import warnings
3
2
  from dataclasses import dataclass
4
3
 
@@ -13,11 +12,10 @@ class FinishedPiece:
13
12
  def to_json(self) -> dict:
14
13
  return {"part_number": self.part_number, "etag": self.etag}
15
14
 
16
- def to_json_str(self) -> str:
17
- return json.dumps(self.to_json(), indent=0)
18
-
19
15
  @staticmethod
20
- def to_json_array(parts: list["FinishedPiece | EndOfStream"]) -> list[dict]:
16
+ def to_json_array(
17
+ parts: list["FinishedPiece | EndOfStream"] | list["FinishedPiece"],
18
+ ) -> list[dict]:
21
19
  non_none: list[FinishedPiece] = []
22
20
  for p in parts:
23
21
  if not isinstance(p, EndOfStream):
@@ -39,3 +37,15 @@ class FinishedPiece:
39
37
  if json is None:
40
38
  return EndOfStream()
41
39
  return FinishedPiece(**json)
40
+
41
+ @staticmethod
42
+ def from_json_array(json: dict) -> list["FinishedPiece"]:
43
+ tmp = [FinishedPiece.from_json(j) for j in json]
44
+ out: list[FinishedPiece] = []
45
+ for t in tmp:
46
+ if isinstance(t, FinishedPiece):
47
+ out.append(t)
48
+ return out
49
+
50
+ def __hash__(self) -> int:
51
+ return hash(self.part_number)