s3ui 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
s3ui/core/s3_client.py ADDED
@@ -0,0 +1,358 @@
1
+ """Instrumented S3 client wrapping boto3."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING
7
+
8
+ import boto3
9
+
10
+ from s3ui.core.errors import translate_error
11
+ from s3ui.models.s3_objects import S3Item
12
+
13
+ if TYPE_CHECKING:
14
+ from s3ui.core.cost import CostTracker
15
+ from s3ui.core.credentials import Profile
16
+
17
+ logger = logging.getLogger("s3ui.s3_client")
18
+
19
+
20
+ class S3ClientError(Exception):
21
+ """Wraps an S3 error with user-facing message and raw detail."""
22
+
23
+ def __init__(self, user_message: str, detail: str) -> None:
24
+ super().__init__(user_message)
25
+ self.user_message = user_message
26
+ self.detail = detail
27
+
28
+
29
+ class S3Client:
30
+ """Wraps boto3 S3 client with cost tracking, error translation, and logging."""
31
+
32
+ def __init__(
33
+ self,
34
+ profile: Profile,
35
+ cost_tracker: CostTracker | None = None,
36
+ ) -> None:
37
+ endpoint = profile.endpoint_url or None
38
+ if profile.is_aws_profile:
39
+ session = boto3.Session(profile_name=profile.name)
40
+ self._client = session.client(
41
+ "s3",
42
+ region_name=profile.region or None,
43
+ endpoint_url=endpoint,
44
+ )
45
+ else:
46
+ self._client = boto3.client(
47
+ "s3",
48
+ aws_access_key_id=profile.access_key_id,
49
+ aws_secret_access_key=profile.secret_access_key,
50
+ region_name=profile.region,
51
+ endpoint_url=endpoint,
52
+ )
53
+ self._cost = cost_tracker
54
+ self._profile_name = profile.name
55
+ logger.info(
56
+ "S3Client created for profile '%s' region '%s' endpoint='%s' (aws_profile=%s)",
57
+ profile.name,
58
+ profile.region,
59
+ profile.endpoint_url,
60
+ profile.is_aws_profile,
61
+ )
62
+
63
+ def set_cost_tracker(self, tracker: CostTracker | None) -> None:
64
+ """Attach or replace the cost tracker (e.g. after bucket selection)."""
65
+ self._cost = tracker
66
+
67
+ def _record(self, request_type: str, count: int = 1) -> None:
68
+ if self._cost:
69
+ self._cost.record_request(request_type, count)
70
+
71
+ def _record_upload_bytes(self, size: int) -> None:
72
+ if self._cost:
73
+ self._cost.record_upload_bytes(size)
74
+
75
+ def _record_download_bytes(self, size: int) -> None:
76
+ if self._cost:
77
+ self._cost.record_download_bytes(size)
78
+
79
+ def _handle_error(self, exc: Exception, operation: str) -> None:
80
+ user_msg, detail = translate_error(exc)
81
+ logger.error("S3 operation '%s' failed: %s", operation, detail)
82
+ raise S3ClientError(user_msg, detail) from exc
83
+
84
+ # --- Bucket operations ---
85
+
86
+ def list_buckets(self) -> list[str]:
87
+ """Return a list of bucket names."""
88
+ try:
89
+ logger.debug("list_buckets")
90
+ self._record("list")
91
+ response = self._client.list_buckets()
92
+ return [b["Name"] for b in response.get("Buckets", [])]
93
+ except Exception as e:
94
+ self._handle_error(e, "list_buckets")
95
+
96
+ # --- Listing ---
97
+
98
+ def list_objects(
99
+ self, bucket: str, prefix: str = "", delimiter: str = "/"
100
+ ) -> tuple[list[S3Item], list[str]]:
101
+ """List objects and common prefixes under a prefix.
102
+
103
+ Returns (objects, common_prefixes). Handles pagination internally.
104
+ """
105
+ try:
106
+ logger.debug("list_objects bucket=%s prefix='%s'", bucket, prefix)
107
+ objects: list[S3Item] = []
108
+ prefixes: list[str] = []
109
+
110
+ paginator = self._client.get_paginator("list_objects_v2")
111
+ pages = paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter=delimiter)
112
+
113
+ page_count = 0
114
+ for page in pages:
115
+ page_count += 1
116
+ self._record("list")
117
+
118
+ for obj in page.get("Contents", []):
119
+ key = obj["Key"]
120
+ # Skip the prefix itself (S3 may return the prefix as an object)
121
+ if key == prefix:
122
+ continue
123
+ name = key[len(prefix) :] if prefix else key
124
+ objects.append(
125
+ S3Item(
126
+ name=name,
127
+ key=key,
128
+ is_prefix=False,
129
+ size=obj.get("Size"),
130
+ last_modified=obj.get("LastModified"),
131
+ storage_class=obj.get("StorageClass"),
132
+ etag=obj.get("ETag"),
133
+ )
134
+ )
135
+
136
+ for cp in page.get("CommonPrefixes", []):
137
+ p = cp["Prefix"]
138
+ name = p[len(prefix) :].rstrip("/") if prefix else p.rstrip("/")
139
+ prefixes.append(p)
140
+ objects.append(S3Item(name=name, key=p, is_prefix=True))
141
+
142
+ logger.debug(
143
+ "list_objects returned %d items, %d prefixes across %d pages",
144
+ len(objects),
145
+ len(prefixes),
146
+ page_count,
147
+ )
148
+ return objects, prefixes
149
+ except Exception as e:
150
+ self._handle_error(e, "list_objects")
151
+
152
+ def head_object(self, bucket: str, key: str) -> S3Item:
153
+ """Get full metadata for a single object."""
154
+ try:
155
+ logger.debug("head_object bucket=%s key='%s'", bucket, key)
156
+ self._record("head")
157
+ resp = self._client.head_object(Bucket=bucket, Key=key)
158
+ name = key.rsplit("/", 1)[-1] if "/" in key else key
159
+ return S3Item(
160
+ name=name,
161
+ key=key,
162
+ is_prefix=False,
163
+ size=resp.get("ContentLength"),
164
+ last_modified=resp.get("LastModified"),
165
+ storage_class=resp.get("StorageClass"),
166
+ etag=resp.get("ETag"),
167
+ )
168
+ except Exception as e:
169
+ self._handle_error(e, "head_object")
170
+
171
+ # --- Single object operations ---
172
+
173
+ def put_object(self, bucket: str, key: str, body: bytes) -> None:
174
+ """Upload a small object in a single request."""
175
+ try:
176
+ logger.debug("put_object bucket=%s key='%s' size=%d", bucket, key, len(body))
177
+ self._record("put")
178
+ self._client.put_object(Bucket=bucket, Key=key, Body=body)
179
+ self._record_upload_bytes(len(body))
180
+ except Exception as e:
181
+ self._handle_error(e, "put_object")
182
+
183
+ def get_object(self, bucket: str, key: str, range_header: str | None = None):
184
+ """Download an object (or a byte range). Returns the streaming body."""
185
+ try:
186
+ logger.debug("get_object bucket=%s key='%s' range=%s", bucket, key, range_header)
187
+ self._record("get")
188
+ kwargs = {"Bucket": bucket, "Key": key}
189
+ if range_header:
190
+ kwargs["Range"] = range_header
191
+ return self._client.get_object(**kwargs)["Body"]
192
+ except Exception as e:
193
+ self._handle_error(e, "get_object")
194
+
195
+ def delete_object(self, bucket: str, key: str) -> None:
196
+ """Delete a single object."""
197
+ try:
198
+ logger.debug("delete_object bucket=%s key='%s'", bucket, key)
199
+ self._record("delete")
200
+ self._client.delete_object(Bucket=bucket, Key=key)
201
+ except Exception as e:
202
+ self._handle_error(e, "delete_object")
203
+
204
+ def delete_objects(self, bucket: str, keys: list[str]) -> list[str]:
205
+ """Batch delete up to 1000 objects. Returns list of keys that failed."""
206
+ try:
207
+ logger.debug("delete_objects bucket=%s count=%d", bucket, len(keys))
208
+ self._record("delete", len(keys))
209
+ response = self._client.delete_objects(
210
+ Bucket=bucket,
211
+ Delete={"Objects": [{"Key": k} for k in keys], "Quiet": True},
212
+ )
213
+ errors = response.get("Errors", [])
214
+ if errors:
215
+ failed = [e["Key"] for e in errors]
216
+ logger.warning("delete_objects partial failure: %d failed", len(failed))
217
+ return failed
218
+ return []
219
+ except Exception as e:
220
+ self._handle_error(e, "delete_objects")
221
+
222
+ def copy_object(self, src_bucket: str, src_key: str, dst_bucket: str, dst_key: str) -> None:
223
+ """Server-side copy with metadata preservation."""
224
+ try:
225
+ logger.debug("copy_object %s/%s -> %s/%s", src_bucket, src_key, dst_bucket, dst_key)
226
+ self._record("copy")
227
+ self._client.copy_object(
228
+ Bucket=dst_bucket,
229
+ Key=dst_key,
230
+ CopySource={"Bucket": src_bucket, "Key": src_key},
231
+ MetadataDirective="COPY",
232
+ )
233
+ except Exception as e:
234
+ self._handle_error(e, "copy_object")
235
+
236
+ # --- Multipart upload ---
237
+
238
+ def create_multipart_upload(self, bucket: str, key: str) -> str:
239
+ """Initiate a multipart upload. Returns the upload_id."""
240
+ try:
241
+ logger.debug("create_multipart_upload bucket=%s key='%s'", bucket, key)
242
+ self._record("put")
243
+ response = self._client.create_multipart_upload(Bucket=bucket, Key=key)
244
+ upload_id = response["UploadId"]
245
+ logger.debug("Multipart upload initiated: upload_id=%s", upload_id)
246
+ return upload_id
247
+ except Exception as e:
248
+ self._handle_error(e, "create_multipart_upload")
249
+
250
+ def upload_part(
251
+ self, bucket: str, key: str, upload_id: str, part_number: int, body: bytes
252
+ ) -> str:
253
+ """Upload a single part. Returns the ETag."""
254
+ try:
255
+ logger.debug(
256
+ "upload_part bucket=%s key='%s' part=%d size=%d",
257
+ bucket,
258
+ key,
259
+ part_number,
260
+ len(body),
261
+ )
262
+ self._record("put")
263
+ response = self._client.upload_part(
264
+ Bucket=bucket,
265
+ Key=key,
266
+ UploadId=upload_id,
267
+ PartNumber=part_number,
268
+ Body=body,
269
+ )
270
+ self._record_upload_bytes(len(body))
271
+ return response["ETag"]
272
+ except Exception as e:
273
+ self._handle_error(e, "upload_part")
274
+
275
+ def complete_multipart_upload(
276
+ self, bucket: str, key: str, upload_id: str, parts: list[dict]
277
+ ) -> None:
278
+ """Complete a multipart upload. parts is a list of {'ETag': ..., 'PartNumber': ...}."""
279
+ try:
280
+ logger.debug(
281
+ "complete_multipart_upload bucket=%s key='%s' parts=%d",
282
+ bucket,
283
+ key,
284
+ len(parts),
285
+ )
286
+ self._record("put")
287
+ self._client.complete_multipart_upload(
288
+ Bucket=bucket,
289
+ Key=key,
290
+ UploadId=upload_id,
291
+ MultipartUpload={"Parts": parts},
292
+ )
293
+ except Exception as e:
294
+ self._handle_error(e, "complete_multipart_upload")
295
+
296
+ def abort_multipart_upload(self, bucket: str, key: str, upload_id: str) -> None:
297
+ """Abort a multipart upload and clean up parts."""
298
+ try:
299
+ logger.debug(
300
+ "abort_multipart_upload bucket=%s key='%s' upload_id=%s",
301
+ bucket,
302
+ key,
303
+ upload_id,
304
+ )
305
+ self._client.abort_multipart_upload(Bucket=bucket, Key=key, UploadId=upload_id)
306
+ except Exception as e:
307
+ self._handle_error(e, "abort_multipart_upload")
308
+
309
+ def list_parts(self, bucket: str, key: str, upload_id: str) -> list[dict]:
310
+ """List uploaded parts for a multipart upload."""
311
+ try:
312
+ logger.debug("list_parts bucket=%s key='%s' upload_id=%s", bucket, key, upload_id)
313
+ self._record("list")
314
+ parts = []
315
+ kwargs = {"Bucket": bucket, "Key": key, "UploadId": upload_id}
316
+ while True:
317
+ response = self._client.list_parts(**kwargs)
318
+ for p in response.get("Parts", []):
319
+ parts.append(
320
+ {
321
+ "PartNumber": p["PartNumber"],
322
+ "ETag": p["ETag"],
323
+ "Size": p["Size"],
324
+ }
325
+ )
326
+ if response.get("IsTruncated"):
327
+ kwargs["PartNumberMarker"] = response["NextPartNumberMarker"]
328
+ else:
329
+ break
330
+ return parts
331
+ except Exception as e:
332
+ self._handle_error(e, "list_parts")
333
+
334
+ def list_multipart_uploads(self, bucket: str) -> list[dict]:
335
+ """List in-progress multipart uploads for orphan cleanup."""
336
+ try:
337
+ logger.debug("list_multipart_uploads bucket=%s", bucket)
338
+ self._record("list")
339
+ uploads = []
340
+ kwargs = {"Bucket": bucket}
341
+ while True:
342
+ response = self._client.list_multipart_uploads(**kwargs)
343
+ for u in response.get("Uploads", []):
344
+ uploads.append(
345
+ {
346
+ "Key": u["Key"],
347
+ "UploadId": u["UploadId"],
348
+ "Initiated": u["Initiated"],
349
+ }
350
+ )
351
+ if response.get("IsTruncated"):
352
+ kwargs["KeyMarker"] = response["NextKeyMarker"]
353
+ kwargs["UploadIdMarker"] = response["NextUploadIdMarker"]
354
+ else:
355
+ break
356
+ return uploads
357
+ except Exception as e:
358
+ self._handle_error(e, "list_multipart_uploads")
s3ui/core/stats.py ADDED
@@ -0,0 +1,128 @@
1
+ """Bucket statistics collector."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import threading
7
+ from dataclasses import dataclass, field
8
+ from typing import TYPE_CHECKING
9
+
10
+ from PyQt6.QtCore import QObject, QThread, pyqtSignal
11
+
12
+ if TYPE_CHECKING:
13
+ from s3ui.core.s3_client import S3Client
14
+ from s3ui.db.database import Database
15
+
16
+ logger = logging.getLogger("s3ui.stats")
17
+
18
+
19
+ @dataclass
20
+ class BucketSnapshot:
21
+ """Results of a bucket scan."""
22
+
23
+ bucket: str
24
+ total_count: int = 0
25
+ total_bytes: int = 0
26
+ bytes_by_class: dict[str, int] = field(default_factory=dict)
27
+ count_by_class: dict[str, int] = field(default_factory=dict)
28
+ top_largest: list[dict] = field(default_factory=list) # [{key, size}]
29
+
30
+
31
+ class _ScanSignals(QObject):
32
+ progress = pyqtSignal(int) # objects_counted
33
+ complete = pyqtSignal(BucketSnapshot)
34
+ error = pyqtSignal(str)
35
+
36
+
37
+ class StatsCollector(QThread):
38
+ """Background thread that scans a bucket for statistics."""
39
+
40
+ def __init__(
41
+ self,
42
+ s3_client: S3Client,
43
+ bucket: str,
44
+ db: Database | None = None,
45
+ parent: QObject | None = None,
46
+ ) -> None:
47
+ super().__init__(parent)
48
+ self.signals = _ScanSignals()
49
+ self._s3 = s3_client
50
+ self._bucket = bucket
51
+ self._db = db
52
+ self._cancel = threading.Event()
53
+
54
+ def cancel(self) -> None:
55
+ self._cancel.set()
56
+
57
+ def run(self) -> None:
58
+ try:
59
+ snapshot = BucketSnapshot(bucket=self._bucket)
60
+ top_heap: list[tuple[int, str]] = [] # (size, key)
61
+
62
+ paginator = self._s3._client.get_paginator("list_objects_v2")
63
+ pages = paginator.paginate(Bucket=self._bucket)
64
+
65
+ for page in pages:
66
+ if self._cancel.is_set():
67
+ return
68
+
69
+ for obj in page.get("Contents", []):
70
+ size = obj.get("Size", 0)
71
+ storage_class = obj.get("StorageClass", "STANDARD")
72
+
73
+ snapshot.total_count += 1
74
+ snapshot.total_bytes += size
75
+ snapshot.bytes_by_class[storage_class] = (
76
+ snapshot.bytes_by_class.get(storage_class, 0) + size
77
+ )
78
+ snapshot.count_by_class[storage_class] = (
79
+ snapshot.count_by_class.get(storage_class, 0) + 1
80
+ )
81
+
82
+ # Track top 10 largest
83
+ import heapq
84
+
85
+ if len(top_heap) < 10:
86
+ heapq.heappush(top_heap, (size, obj["Key"]))
87
+ elif size > top_heap[0][0]:
88
+ heapq.heapreplace(top_heap, (size, obj["Key"]))
89
+
90
+ self.signals.progress.emit(snapshot.total_count)
91
+
92
+ snapshot.top_largest = [
93
+ {"key": key, "size": size} for size, key in sorted(top_heap, reverse=True)
94
+ ]
95
+
96
+ # Save to database
97
+ if self._db:
98
+ import json
99
+
100
+ self._db.execute(
101
+ "INSERT INTO bucket_snapshots "
102
+ "(bucket_id, total_objects, total_bytes, breakdown_json) "
103
+ "VALUES ("
104
+ "(SELECT id FROM buckets WHERE name = ? LIMIT 1), ?, ?, ?)",
105
+ (
106
+ self._bucket,
107
+ snapshot.total_count,
108
+ snapshot.total_bytes,
109
+ json.dumps(
110
+ {
111
+ "bytes_by_class": snapshot.bytes_by_class,
112
+ "count_by_class": snapshot.count_by_class,
113
+ "top_largest": snapshot.top_largest,
114
+ }
115
+ ),
116
+ ),
117
+ )
118
+
119
+ self.signals.complete.emit(snapshot)
120
+ logger.info(
121
+ "Scan complete for '%s': %d objects, %d bytes",
122
+ self._bucket,
123
+ snapshot.total_count,
124
+ snapshot.total_bytes,
125
+ )
126
+ except Exception as e:
127
+ logger.error("Scan failed for '%s': %s", self._bucket, e)
128
+ self.signals.error.emit(str(e))