elaunira-r2index 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,149 @@
1
+ """Asynchronous R2 uploader using aioboto3."""
2
+
3
+ from collections.abc import Callable
4
+ from pathlib import Path
5
+
6
+ import aioboto3
7
+ from aiobotocore.config import AioConfig
8
+
9
+ from .exceptions import UploadError
10
+ from .uploader import R2Config
11
+
12
+ # 100MB threshold and part size for multipart uploads
13
+ MULTIPART_THRESHOLD = 100 * 1024 * 1024
14
+ MULTIPART_PART_SIZE = 100 * 1024 * 1024
15
+
16
+
17
+ class AsyncR2Uploader:
18
+ """Asynchronous R2 uploader using aioboto3."""
19
+
20
+ def __init__(self, config: R2Config) -> None:
21
+ """
22
+ Initialize the async R2 uploader.
23
+
24
+ Args:
25
+ config: R2 configuration with credentials and endpoint.
26
+ """
27
+ self.config = config
28
+ self._session = aioboto3.Session()
29
+
30
+ async def upload_file(
31
+ self,
32
+ file_path: str | Path,
33
+ object_key: str,
34
+ content_type: str | None = None,
35
+ progress_callback: Callable[[int], None] | None = None,
36
+ ) -> str:
37
+ """
38
+ Upload a file to R2 asynchronously.
39
+
40
+ Uses multipart upload for files larger than 100MB.
41
+
42
+ Args:
43
+ file_path: Path to the file to upload.
44
+ object_key: The key (path) to store the object under in R2.
45
+ content_type: Optional content type for the object.
46
+ progress_callback: Optional callback called with bytes uploaded so far.
47
+
48
+ Returns:
49
+ The object key of the uploaded file.
50
+
51
+ Raises:
52
+ UploadError: If the upload fails.
53
+ """
54
+ file_path = Path(file_path)
55
+
56
+ if not file_path.exists():
57
+ raise UploadError(f"File not found: {file_path}")
58
+
59
+ transfer_config = AioConfig(
60
+ max_pool_connections=10,
61
+ )
62
+
63
+ extra_args = {}
64
+ if content_type:
65
+ extra_args["ContentType"] = content_type
66
+
67
+ try:
68
+ async with self._session.client(
69
+ "s3",
70
+ aws_access_key_id=self.config.access_key_id,
71
+ aws_secret_access_key=self.config.secret_access_key,
72
+ endpoint_url=self.config.endpoint_url,
73
+ region_name=self.config.region,
74
+ config=transfer_config,
75
+ ) as client:
76
+ callback = None
77
+ if progress_callback:
78
+ callback = _AsyncProgressCallback(progress_callback)
79
+
80
+ await client.upload_file(
81
+ str(file_path),
82
+ self.config.bucket,
83
+ object_key,
84
+ ExtraArgs=extra_args if extra_args else None,
85
+ Callback=callback,
86
+ )
87
+ except Exception as e:
88
+ raise UploadError(f"Failed to upload file to R2: {e}") from e
89
+
90
+ return object_key
91
+
92
+ async def delete_object(self, object_key: str) -> None:
93
+ """
94
+ Delete an object from R2 asynchronously.
95
+
96
+ Args:
97
+ object_key: The key of the object to delete.
98
+
99
+ Raises:
100
+ UploadError: If the deletion fails.
101
+ """
102
+ try:
103
+ async with self._session.client(
104
+ "s3",
105
+ aws_access_key_id=self.config.access_key_id,
106
+ aws_secret_access_key=self.config.secret_access_key,
107
+ endpoint_url=self.config.endpoint_url,
108
+ region_name=self.config.region,
109
+ ) as client:
110
+ await client.delete_object(Bucket=self.config.bucket, Key=object_key)
111
+ except Exception as e:
112
+ raise UploadError(f"Failed to delete object from R2: {e}") from e
113
+
114
+ async def object_exists(self, object_key: str) -> bool:
115
+ """
116
+ Check if an object exists in R2 asynchronously.
117
+
118
+ Args:
119
+ object_key: The key of the object to check.
120
+
121
+ Returns:
122
+ True if the object exists, False otherwise.
123
+ """
124
+ try:
125
+ async with self._session.client(
126
+ "s3",
127
+ aws_access_key_id=self.config.access_key_id,
128
+ aws_secret_access_key=self.config.secret_access_key,
129
+ endpoint_url=self.config.endpoint_url,
130
+ region_name=self.config.region,
131
+ ) as client:
132
+ await client.head_object(Bucket=self.config.bucket, Key=object_key)
133
+ return True
134
+ except client.exceptions.ClientError as e:
135
+ if e.response["Error"]["Code"] == "404":
136
+ return False
137
+ raise UploadError(f"Failed to check object existence: {e}") from e
138
+
139
+
140
+ class _AsyncProgressCallback:
141
+ """Wrapper to track cumulative progress for aioboto3 callback."""
142
+
143
+ def __init__(self, callback: Callable[[int], None]) -> None:
144
+ self._callback = callback
145
+ self._bytes_transferred = 0
146
+
147
+ def __call__(self, bytes_amount: int) -> None:
148
+ self._bytes_transferred += bytes_amount
149
+ self._callback(self._bytes_transferred)
@@ -0,0 +1,127 @@
1
+ """Streaming checksum computation for large files."""
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import BinaryIO
7
+
8
+ # 8MB chunk size for memory-efficient processing of large files
9
+ CHUNK_SIZE = 8 * 1024 * 1024
10
+
11
+
12
+ @dataclass
13
+ class ChecksumResult:
14
+ """Result of checksum computation."""
15
+
16
+ md5: str
17
+ sha1: str
18
+ sha256: str
19
+ sha512: str
20
+ size: int
21
+
22
+
23
+ def compute_checksums(file_path: str | Path) -> ChecksumResult:
24
+ """
25
+ Compute MD5, SHA1, SHA256, and SHA512 checksums for a file.
26
+
27
+ Reads the file in chunks for memory-efficient processing of large files.
28
+ All checksums are computed in a single pass through the file.
29
+
30
+ Args:
31
+ file_path: Path to the file to compute checksums for.
32
+
33
+ Returns:
34
+ ChecksumResult containing all checksums and file size.
35
+ """
36
+ file_path = Path(file_path)
37
+
38
+ md5_hash = hashlib.md5()
39
+ sha1_hash = hashlib.sha1()
40
+ sha256_hash = hashlib.sha256()
41
+ sha512_hash = hashlib.sha512()
42
+
43
+ size = 0
44
+
45
+ with open(file_path, "rb") as f:
46
+ size = _compute_from_file_object(f, md5_hash, sha1_hash, sha256_hash, sha512_hash)
47
+
48
+ return ChecksumResult(
49
+ md5=md5_hash.hexdigest(),
50
+ sha1=sha1_hash.hexdigest(),
51
+ sha256=sha256_hash.hexdigest(),
52
+ sha512=sha512_hash.hexdigest(),
53
+ size=size,
54
+ )
55
+
56
+
57
+ def compute_checksums_from_file_object(file_obj: BinaryIO) -> ChecksumResult:
58
+ """
59
+ Compute checksums from a file-like object.
60
+
61
+ Args:
62
+ file_obj: Binary file-like object to read from.
63
+
64
+ Returns:
65
+ ChecksumResult containing all checksums and total bytes read.
66
+ """
67
+ md5_hash = hashlib.md5()
68
+ sha1_hash = hashlib.sha1()
69
+ sha256_hash = hashlib.sha256()
70
+ sha512_hash = hashlib.sha512()
71
+
72
+ size = _compute_from_file_object(file_obj, md5_hash, sha1_hash, sha256_hash, sha512_hash)
73
+
74
+ return ChecksumResult(
75
+ md5=md5_hash.hexdigest(),
76
+ sha1=sha1_hash.hexdigest(),
77
+ sha256=sha256_hash.hexdigest(),
78
+ sha512=sha512_hash.hexdigest(),
79
+ size=size,
80
+ )
81
+
82
+
83
+ def _compute_from_file_object(
84
+ file_obj: BinaryIO,
85
+ md5_hash: "hashlib._Hash",
86
+ sha1_hash: "hashlib._Hash",
87
+ sha256_hash: "hashlib._Hash",
88
+ sha512_hash: "hashlib._Hash",
89
+ ) -> int:
90
+ """
91
+ Internal helper to compute checksums from a file object.
92
+
93
+ Returns the total number of bytes read.
94
+ """
95
+ size = 0
96
+
97
+ while True:
98
+ chunk = file_obj.read(CHUNK_SIZE)
99
+ if not chunk:
100
+ break
101
+
102
+ size += len(chunk)
103
+ md5_hash.update(chunk)
104
+ sha1_hash.update(chunk)
105
+ sha256_hash.update(chunk)
106
+ sha512_hash.update(chunk)
107
+
108
+ return size
109
+
110
+
111
+ async def compute_checksums_async(file_path: str | Path) -> ChecksumResult:
112
+ """
113
+ Compute checksums asynchronously.
114
+
115
+ Note: This uses synchronous file I/O in a way that doesn't block the event loop
116
+ for too long by processing in chunks. For truly async file I/O, consider using
117
+ aiofiles, but for CPU-bound hashing, the benefit is minimal.
118
+
119
+ Args:
120
+ file_path: Path to the file to compute checksums for.
121
+
122
+ Returns:
123
+ ChecksumResult containing all checksums and file size.
124
+ """
125
+ import asyncio
126
+
127
+ return await asyncio.to_thread(compute_checksums, file_path)