PyPI - rclone-api - Versions diffs - 1.3.27__py2.py3-none-any.whl → 1.4.1__py2.py3-none-any.whl - Mend

rclone-api 1.3.27py2.py3-none-any.whl → 1.4.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

rclone_api/__init__.py +491 -4
rclone_api/cmd/copy_large_s3.py +17 -10
rclone_api/db/db.py +3 -3
rclone_api/detail/copy_file_parts.py +382 -0
rclone_api/dir.py +1 -1
rclone_api/dir_listing.py +1 -1
rclone_api/file.py +8 -0
rclone_api/file_part.py +198 -0
rclone_api/file_stream.py +52 -0
rclone_api/http_server.py +15 -21
rclone_api/{rclone.py → rclone_impl.py} +153 -321
rclone_api/remote.py +3 -3
rclone_api/rpath.py +11 -4
rclone_api/s3/chunk_task.py +3 -19
rclone_api/s3/multipart/file_info.py +7 -0
rclone_api/s3/multipart/finished_piece.py +38 -0
rclone_api/s3/multipart/upload_info.py +62 -0
rclone_api/s3/{chunk_types.py → multipart/upload_state.py} +3 -99
rclone_api/s3/s3_multipart_uploader.py +138 -0
rclone_api/s3/types.py +1 -1
rclone_api/s3/upload_file_multipart.py +14 -14
rclone_api/scan_missing_folders.py +1 -1
rclone_api/types.py +136 -165
rclone_api/util.py +22 -2
{rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/METADATA +1 -1
rclone_api-1.4.1.dist-info/RECORD +55 -0
rclone_api/mount_read_chunker.py +0 -130
rclone_api/profile/mount_copy_bytes.py +0 -311
rclone_api-1.3.27.dist-info/RECORD +0 -50
/rclone_api/{walk.py → detail/walk.py} +0 -0
{rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/LICENSE +0 -0
{rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/WHEEL +0 -0
{rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/entry_points.txt +0 -0
{rclone_api-1.3.27.dist-info → rclone_api-1.4.1.dist-info}/top_level.txt +0 -0

rclone_api/s3/chunk_task.py CHANGED Viewed

@@ -6,30 +6,14 @@ from queue import Queue
 from threading import Event, Lock
 from typing import Any, Callable
-from rclone_api.mount_read_chunker import FilePart
-from rclone_api.s3.chunk_types import S3FileInfo, UploadState
+from rclone_api.file_part import FilePart
+from rclone_api.s3.multipart.file_info import S3FileInfo
+from rclone_api.s3.multipart.upload_state import UploadState
 from rclone_api.types import EndOfStream
 logger = logging.getLogger(__name__)  # noqa
-# def _get_file_size(file_path: Path, timeout: int = 60) -> int:
-#     sleep_time = timeout / 60 if timeout > 0 else 1
-#     start = time.time()
-#     while True:
-#         expired = time.time() - start > timeout
-#         try:
-#             time.sleep(sleep_time)
-#             if file_path.exists():
-#                 return file_path.stat().st_size
-#         except FileNotFoundError as e:
-#             if expired:
-#                 print(f"File not found: {file_path}, exception is {e}")
-#                 raise
-#         if expired:
-#             raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
 class _ShouldStopChecker:
     def __init__(self, max_chunks: int | None) -> None:
         self.count = 0

rclone_api/s3/multipart/file_info.py ADDED Viewed

@@ -0,0 +1,7 @@
+from dataclasses import dataclass
+@dataclass
+class S3FileInfo:
+    upload_id: str
+    part_number: int

rclone_api/s3/multipart/finished_piece.py ADDED Viewed

@@ -0,0 +1,38 @@
+import json
+from dataclasses import dataclass
+from rclone_api.types import EndOfStream
+@dataclass
+class FinishedPiece:
+    part_number: int
+    etag: str
+    def to_json(self) -> dict:
+        return {"part_number": self.part_number, "etag": self.etag}
+    def to_json_str(self) -> str:
+        return json.dumps(self.to_json(), indent=0)
+    @staticmethod
+    def to_json_array(parts: list["FinishedPiece | EndOfStream"]) -> list[dict]:
+        non_none: list[FinishedPiece] = []
+        for p in parts:
+            if not isinstance(p, EndOfStream):
+                non_none.append(p)
+        non_none.sort(key=lambda x: x.part_number)
+        # all_nones: list[None] = [None for p in parts if p is None]
+        # assert len(all_nones) <= 1, "Only one None should be present"
+        count_eos = 0
+        for p in parts:
+            if p is EndOfStream:
+                count_eos += 1
+        assert count_eos <= 1, "Only one EndOfStream should be present"
+        return [p.to_json() for p in non_none]
+    @staticmethod
+    def from_json(json: dict | None) -> "FinishedPiece | EndOfStream":
+        if json is None:
+            return EndOfStream()
+        return FinishedPiece(**json)

rclone_api/s3/multipart/upload_info.py ADDED Viewed

@@ -0,0 +1,62 @@
+import hashlib
+from dataclasses import dataclass, fields
+from pathlib import Path
+from botocore.client import BaseClient
+@dataclass
+class UploadInfo:
+    s3_client: BaseClient
+    bucket_name: str
+    object_name: str
+    src_file_path: Path
+    upload_id: str
+    retries: int
+    chunk_size: int
+    file_size: int
+    _total_chunks: int | None = None
+    def total_chunks(self) -> int:
+        out = self.file_size // self.chunk_size
+        if self.file_size % self.chunk_size:
+            return out + 1
+        return out
+    def __post_init__(self):
+        if self._total_chunks is not None:
+            return
+        self._total_chunks = self.total_chunks()
+    def fingerprint(self) -> str:
+        # hash the attributes that are used to identify the upload
+        hasher = hashlib.sha256()
+        # first is file size
+        hasher.update(str(self.file_size).encode("utf-8"))
+        # next is chunk size
+        hasher.update(str(self.chunk_size).encode("utf-8"))
+        # next is the number of parts
+        hasher.update(str(self._total_chunks).encode("utf-8"))
+        return hasher.hexdigest()
+    def to_json(self) -> dict:
+        json_dict = {}
+        for f in fields(self):
+            value = getattr(self, f.name)
+            # Convert non-serializable objects (like s3_client) to a string representation.
+            if f.name == "s3_client":
+                continue
+            else:
+                if isinstance(value, Path):
+                    value = str(value)
+                json_dict[f.name] = value
+        return json_dict
+    @staticmethod
+    def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
+        # json_dict.pop("s3_client")  # Remove the placeholder string
+        if "s3_client" in json_dict:
+            json_dict.pop("s3_client")
+        return UploadInfo(s3_client=s3_client, **json_dict)

rclone_api/s3/{chunk_types.py → multipart/upload_state.py} RENAMED Viewed

@@ -1,12 +1,13 @@
-import hashlib
 import json
 import os
-from dataclasses import dataclass, field, fields
+from dataclasses import dataclass, field
 from pathlib import Path
 from threading import Lock
 from botocore.client import BaseClient
+from rclone_api.s3.multipart.finished_piece import FinishedPiece
+from rclone_api.s3.multipart.upload_info import UploadInfo
 from rclone_api.types import EndOfStream, SizeSuffix
 from rclone_api.util import locked_print
@@ -14,103 +15,6 @@ from rclone_api.util import locked_print
 _SAVE_STATE_LOCK = Lock()
-@dataclass
-class S3FileInfo:
-    upload_id: str
-    part_number: int
-@dataclass
-class UploadInfo:
-    s3_client: BaseClient
-    bucket_name: str
-    object_name: str
-    src_file_path: Path
-    upload_id: str
-    retries: int
-    chunk_size: int
-    file_size: int
-    _total_chunks: int | None = None
-    def total_chunks(self) -> int:
-        out = self.file_size // self.chunk_size
-        if self.file_size % self.chunk_size:
-            return out + 1
-        return out
-    def __post_init__(self):
-        if self._total_chunks is not None:
-            return
-        self._total_chunks = self.total_chunks()
-    def fingerprint(self) -> str:
-        # hash the attributes that are used to identify the upload
-        hasher = hashlib.sha256()
-        # first is file size
-        hasher.update(str(self.file_size).encode("utf-8"))
-        # next is chunk size
-        hasher.update(str(self.chunk_size).encode("utf-8"))
-        # next is the number of parts
-        hasher.update(str(self._total_chunks).encode("utf-8"))
-        return hasher.hexdigest()
-    def to_json(self) -> dict:
-        json_dict = {}
-        for f in fields(self):
-            value = getattr(self, f.name)
-            # Convert non-serializable objects (like s3_client) to a string representation.
-            if f.name == "s3_client":
-                continue
-            else:
-                if isinstance(value, Path):
-                    value = str(value)
-                json_dict[f.name] = value
-        return json_dict
-    @staticmethod
-    def from_json(s3_client: BaseClient, json_dict: dict) -> "UploadInfo":
-        # json_dict.pop("s3_client")  # Remove the placeholder string
-        if "s3_client" in json_dict:
-            json_dict.pop("s3_client")
-        return UploadInfo(s3_client=s3_client, **json_dict)
-@dataclass
-class FinishedPiece:
-    part_number: int
-    etag: str
-    def to_json(self) -> dict:
-        return {"part_number": self.part_number, "etag": self.etag}
-    def to_json_str(self) -> str:
-        return json.dumps(self.to_json(), indent=0)
-    @staticmethod
-    def to_json_array(parts: list["FinishedPiece | EndOfStream"]) -> list[dict]:
-        non_none: list[FinishedPiece] = []
-        for p in parts:
-            if not isinstance(p, EndOfStream):
-                non_none.append(p)
-        non_none.sort(key=lambda x: x.part_number)
-        # all_nones: list[None] = [None for p in parts if p is None]
-        # assert len(all_nones) <= 1, "Only one None should be present"
-        count_eos = 0
-        for p in parts:
-            if p is EndOfStream:
-                count_eos += 1
-        assert count_eos <= 1, "Only one EndOfStream should be present"
-        return [p.to_json() for p in non_none]
-    @staticmethod
-    def from_json(json: dict | None) -> "FinishedPiece | EndOfStream":
-        if json is None:
-            return EndOfStream()
-        return FinishedPiece(**json)
 @dataclass
 class UploadState:
     upload_info: UploadInfo

rclone_api/s3/s3_multipart_uploader.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""
+https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_part_copy.html
+  *  client.upload_part_copy
+"""
+# import _thread
+# import os
+# import traceback
+# import warnings
+# from concurrent.futures import Future, ThreadPoolExecutor
+# from pathlib import Path
+# from queue import Queue
+# from threading import Event, Thread
+# from typing import Any, Callable
+# from botocore.client import BaseClient
+# from rclone_api.mount_read_chunker import FilePart
+# from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
+# from rclone_api.s3.chunk_types import (
+#     FinishedPiece,
+#     UploadInfo,
+#     UploadState,
+# )
+# from rclone_api.s3.types import MultiUploadResult
+# from rclone_api.types import EndOfStream
+# from rclone_api.util import locked_print
+# This is how you upload large parts through multi part upload, then the final call
+# is to assemble the parts that have already been uploaded through a multi part uploader
+# and then call complete_multipart_upload to finish the upload
+# response = (
+#  client.upload_part_copy(
+#     Bucket='string',
+#     CopySource='string' or {'Bucket': 'string', 'Key': 'string', 'VersionId': 'string'},
+#     CopySourceIfMatch='string',
+#     CopySourceIfModifiedSince=datetime(2015, 1, 1),
+#     CopySourceIfNoneMatch='string',
+#     CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
+#     CopySourceRange='string',
+#     Key='string',
+#     PartNumber=123,
+#     UploadId='string',
+#     SSECustomerAlgorithm='string',
+#     SSECustomerKey='string',
+#     CopySourceSSECustomerAlgorithm='string',
+#     CopySourceSSECustomerKey='string',
+#     RequestPayer='requester',
+#     ExpectedBucketOwner='string',
+#     ExpectedSourceBucketOwner='string'
+# )
+# def upload_task(
+#     info: UploadInfo,
+#     chunk: FilePart,
+#     part_number: int,
+#     retries: int,
+# ) -> FinishedPiece:
+#     file_or_err: Path | Exception = chunk.get_file()
+#     if isinstance(file_or_err, Exception):
+#         raise file_or_err
+#     file: Path = file_or_err
+#     size = os.path.getsize(file)
+#     retries = retries + 1  # Add one for the initial attempt
+#     for retry in range(retries):
+#         try:
+#             if retry > 0:
+#                 locked_print(f"Retrying part {part_number} for {info.src_file_path}")
+#             locked_print(
+#                 f"Uploading part {part_number} for {info.src_file_path} of size {size}"
+#             )
+#             with open(file, "rb") as f:
+#                 part = info.s3_client.upload_part(
+#                     Bucket=info.bucket_name,
+#                     Key=info.object_name,
+#                     PartNumber=part_number,
+#                     UploadId=info.upload_id,
+#                     Body=f,
+#                 )
+#                 out: FinishedPiece = FinishedPiece(
+#                     etag=part["ETag"], part_number=part_number
+#                 )
+#             chunk.dispose()
+#             return out
+#         except Exception as e:
+#             if retry == retries - 1:
+#                 locked_print(f"Error uploading part {part_number}: {e}")
+#                 chunk.dispose()
+#                 raise e
+#             else:
+#                 locked_print(f"Error uploading part {part_number}: {e}, retrying")
+#                 continue
+#     raise Exception("Should not reach here")
+# def prepare_upload_file_multipart(
+#     s3_client: BaseClient,
+#     bucket_name: str,
+#     file_path: Path,
+#     file_size: int | None,
+#     object_name: str,
+#     chunk_size: int,
+#     retries: int,
+# ) -> UploadInfo:
+#     """Upload a file to the bucket using multipart upload with customizable chunk size."""
+#     # Initiate multipart upload
+#     locked_print(
+#         f"Creating multipart upload for {file_path} to {bucket_name}/{object_name}"
+#     )
+#     mpu = s3_client.create_multipart_upload(Bucket=bucket_name, Key=object_name)
+#     upload_id = mpu["UploadId"]
+#     file_size = file_size if file_size is not None else os.path.getsize(file_path)
+#     upload_info: UploadInfo = UploadInfo(
+#         s3_client=s3_client,
+#         bucket_name=bucket_name,
+#         object_name=object_name,
+#         src_file_path=file_path,
+#         upload_id=upload_id,
+#         retries=retries,
+#         chunk_size=chunk_size,
+#         file_size=file_size,
+#     )
+#     return upload_info
+# class S3MultiPartUploader:
+#     def __init__(self, s3_client: BaseClient, verbose: bool) -> None:
+#         self.s3_client = s3_client
+#         self.verbose = verbose
+#     def prepare(self) -> UploadInfo:

rclone_api/s3/types.py CHANGED Viewed

@@ -4,7 +4,7 @@ from enum import Enum
 from pathlib import Path
 from typing import Any, Callable
-from rclone_api.mount_read_chunker import FilePart
+from rclone_api.file_part import FilePart
 class S3Provider(Enum):

rclone_api/s3/upload_file_multipart.py CHANGED Viewed

@@ -10,13 +10,12 @@ from typing import Any, Callable
 from botocore.client import BaseClient
-from rclone_api.mount_read_chunker import FilePart
-from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
-from rclone_api.s3.chunk_types import (
-    FinishedPiece,
-    UploadInfo,
-    UploadState,
-)
+from rclone_api.file_part import FilePart
+from rclone_api.s3.chunk_task import file_chunker
+from rclone_api.s3.multipart.file_info import S3FileInfo
+from rclone_api.s3.multipart.finished_piece import FinishedPiece
+from rclone_api.s3.multipart.upload_info import UploadInfo
+from rclone_api.s3.multipart.upload_state import UploadState
 from rclone_api.s3.types import MultiUploadResult
 from rclone_api.types import EndOfStream
 from rclone_api.util import locked_print
@@ -149,6 +148,10 @@ def upload_runner(
     queue_upload: Queue[FilePart | EndOfStream],
     cancel_chunker_event: Event,
 ) -> None:
+    # import semaphre
+    import threading
+    semaphore = threading.Semaphore(upload_threads)
     with ThreadPoolExecutor(max_workers=upload_threads) as executor:
         try:
             while True:
@@ -159,9 +162,12 @@ def upload_runner(
                 def task(upload_info=upload_info, file_chunk=file_chunk):
                     return handle_upload(upload_info, file_chunk)
+                semaphore.acquire()
                 fut = executor.submit(task)
                 def done_cb(fut=fut):
+                    semaphore.release()
                     result = fut.result()
                     if isinstance(result, Exception):
                         warnings.warn(f"Error uploading part: {result}, skipping")
@@ -192,12 +198,6 @@ def upload_file_multipart(
 ) -> MultiUploadResult:
     """Upload a file to the bucket using multipart upload with customizable chunk size."""
     file_size = file_size if file_size is not None else os.path.getsize(str(file_path))
-    # if chunk_size > file_size:
-    #     warnings.warn(
-    #         f"Chunk size {chunk_size} is greater than file size {file_size}, using file size"
-    #     )
-    #     chunk_size = file_size
     if chunk_size < _MIN_UPLOAD_CHUNK_SIZE:
         raise ValueError(
             f"Chunk size {chunk_size} is less than minimum upload chunk size {_MIN_UPLOAD_CHUNK_SIZE}"
@@ -233,7 +233,7 @@ def upload_file_multipart(
         )
         return upload_state
-    work_que_max = upload_threads // 2 + 2
+    work_que_max = 1
     new_state = make_new_state()
     loaded_state = get_upload_state()

rclone_api/scan_missing_folders.py CHANGED Viewed

@@ -6,9 +6,9 @@ from threading import Thread
 from typing import Generator
 from rclone_api import Dir
+from rclone_api.detail.walk import walk_runner_depth_first
 from rclone_api.dir_listing import DirListing
 from rclone_api.types import ListingOption, Order
-from rclone_api.walk import walk_runner_depth_first
 _MAX_OUT_QUEUE_SIZE = 50

rclone-api 1.3.27__py2.py3-none-any.whl → 1.4.1__py2.py3-none-any.whl

rclone-api 1.3.27py2.py3-none-any.whl → 1.4.1py2.py3-none-any.whl