s3ui 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
s3ui/core/transfers.py ADDED
@@ -0,0 +1,281 @@
1
+ """Transfer engine — manages queue of uploads and downloads."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import logging
7
+ import threading
8
+ from typing import TYPE_CHECKING
9
+
10
+ from PyQt6.QtCore import QObject, QThreadPool, pyqtSignal
11
+
12
+ from s3ui.core.download_worker import DownloadWorker
13
+ from s3ui.core.upload_worker import UploadWorker
14
+
15
+ if TYPE_CHECKING:
16
+ from s3ui.core.s3_client import S3Client
17
+ from s3ui.db.database import Database
18
+
19
+ logger = logging.getLogger("s3ui.transfers")
20
+
21
+
22
+ class TransferEngine(QObject):
23
+ """Manages the transfer queue and worker pool."""
24
+
25
+ transfer_progress = pyqtSignal(int, int, int) # transfer_id, bytes_done, total
26
+ transfer_speed = pyqtSignal(int, float) # transfer_id, bytes_per_sec
27
+ transfer_status_changed = pyqtSignal(int, str) # transfer_id, new_status
28
+ transfer_error = pyqtSignal(int, str, str) # transfer_id, user_msg, detail
29
+ transfer_finished = pyqtSignal(int) # transfer_id
30
+
31
+ def __init__(
32
+ self,
33
+ s3_client: S3Client,
34
+ db: Database,
35
+ bucket: str,
36
+ max_workers: int = 4,
37
+ ) -> None:
38
+ super().__init__()
39
+ self._s3 = s3_client
40
+ self._db = db
41
+ self._bucket = bucket
42
+ row = self._db.fetchone(
43
+ "SELECT id FROM buckets WHERE name = ? ORDER BY id DESC LIMIT 1",
44
+ (bucket,),
45
+ )
46
+ self._bucket_id = row["id"] if row else None
47
+ self._pool = QThreadPool()
48
+ self._pool.setMaxThreadCount(max_workers)
49
+
50
+ # Per-transfer control events
51
+ self._pause_events: dict[int, threading.Event] = {}
52
+ self._cancel_events: dict[int, threading.Event] = {}
53
+ self._active: set[int] = set()
54
+ self._paused_global = False
55
+
56
+ def enqueue(self, transfer_id: int) -> None:
57
+ """Submit a transfer to the worker pool."""
58
+ row = self._db.fetchone("SELECT * FROM transfers WHERE id = ?", (transfer_id,))
59
+ if not row:
60
+ logger.warning("Cannot enqueue transfer %d: not found", transfer_id)
61
+ return
62
+
63
+ pause_evt = threading.Event()
64
+ cancel_evt = threading.Event()
65
+ self._pause_events[transfer_id] = pause_evt
66
+ self._cancel_events[transfer_id] = cancel_evt
67
+ self._active.add(transfer_id)
68
+
69
+ if row["direction"] == "upload":
70
+ worker = UploadWorker(
71
+ transfer_id,
72
+ self._s3,
73
+ self._db,
74
+ self._bucket,
75
+ pause_evt,
76
+ cancel_evt,
77
+ )
78
+ else:
79
+ worker = DownloadWorker(
80
+ transfer_id,
81
+ self._s3,
82
+ self._db,
83
+ self._bucket,
84
+ pause_evt,
85
+ cancel_evt,
86
+ )
87
+
88
+ # Connect signals
89
+ worker.signals.progress.connect(self._on_progress)
90
+ worker.signals.speed.connect(self._on_speed)
91
+ worker.signals.finished.connect(self._on_finished)
92
+ worker.signals.failed.connect(self._on_failed)
93
+
94
+ self._pool.start(worker)
95
+ self.transfer_status_changed.emit(transfer_id, "in_progress")
96
+ logger.info("Enqueued transfer %d (%s)", transfer_id, row["direction"])
97
+
98
+ def pause(self, transfer_id: int) -> None:
99
+ """Pause a running transfer."""
100
+ evt = self._pause_events.get(transfer_id)
101
+ if evt:
102
+ evt.set()
103
+ self.transfer_status_changed.emit(transfer_id, "paused")
104
+
105
+ def resume(self, transfer_id: int) -> None:
106
+ """Resume a paused transfer by re-enqueuing it."""
107
+ self._db.execute(
108
+ "UPDATE transfers SET status = 'queued', updated_at = datetime('now') WHERE id = ?",
109
+ (transfer_id,),
110
+ )
111
+ self.enqueue(transfer_id)
112
+
113
+ def cancel(self, transfer_id: int) -> None:
114
+ """Cancel a transfer."""
115
+ evt = self._cancel_events.get(transfer_id)
116
+ if evt:
117
+ evt.set()
118
+ self.transfer_status_changed.emit(transfer_id, "cancelled")
119
+
120
+ def pause_all(self) -> None:
121
+ """Pause all active transfers."""
122
+ self._paused_global = True
123
+ for tid in list(self._active):
124
+ self.pause(tid)
125
+
126
+ def resume_all(self) -> None:
127
+ """Resume all paused transfers."""
128
+ self._paused_global = False
129
+ rows = self._db.fetchall(
130
+ "SELECT id FROM transfers WHERE status = 'paused' AND bucket_id = "
131
+ "(SELECT id FROM buckets WHERE name = ? LIMIT 1)",
132
+ (self._bucket,),
133
+ )
134
+ for row in rows:
135
+ self.resume(row["id"])
136
+
137
+ def retry(self, transfer_id: int) -> None:
138
+ """Retry a failed transfer."""
139
+ self._db.execute(
140
+ "UPDATE transfers SET status = 'queued', retry_count = 0, "
141
+ "error_message = NULL, updated_at = datetime('now') WHERE id = ?",
142
+ (transfer_id,),
143
+ )
144
+ self.enqueue(transfer_id)
145
+
146
+ def restore_pending(self) -> None:
147
+ """Restore transfers that were interrupted by an app shutdown."""
148
+ rows = self._db.fetchall(
149
+ "SELECT id, direction, local_path, status FROM transfers "
150
+ "WHERE status IN ('queued', 'in_progress', 'paused')"
151
+ )
152
+ for row in rows:
153
+ from pathlib import Path
154
+
155
+ local = Path(row["local_path"])
156
+
157
+ if row["direction"] == "upload" and not local.exists():
158
+ self._db.execute(
159
+ "UPDATE transfers SET status = 'failed', "
160
+ "error_message = 'Source file no longer exists.', "
161
+ "updated_at = datetime('now') WHERE id = ?",
162
+ (row["id"],),
163
+ )
164
+ logger.warning("Transfer %d: source file missing: %s", row["id"], local)
165
+ continue
166
+
167
+ if row["direction"] == "download" and not local.parent.exists():
168
+ self._db.execute(
169
+ "UPDATE transfers SET status = 'failed', "
170
+ "error_message = 'Destination directory no longer exists.', "
171
+ "updated_at = datetime('now') WHERE id = ?",
172
+ (row["id"],),
173
+ )
174
+ logger.warning("Transfer %d: dest dir missing: %s", row["id"], local.parent)
175
+ continue
176
+
177
+ # Reset in_progress to queued
178
+ if row["status"] == "in_progress":
179
+ self._db.execute(
180
+ "UPDATE transfers SET status = 'queued', "
181
+ "updated_at = datetime('now') WHERE id = ?",
182
+ (row["id"],),
183
+ )
184
+
185
+ self.enqueue(row["id"])
186
+ logger.info("Restored transfer %d", row["id"])
187
+
188
+ # --- Signal handlers ---
189
+
190
+ def _on_progress(self, transfer_id: int, bytes_done: int, total: int) -> None:
191
+ self.transfer_progress.emit(transfer_id, bytes_done, total)
192
+
193
+ def _on_speed(self, transfer_id: int, bps: float) -> None:
194
+ self.transfer_speed.emit(transfer_id, bps)
195
+
196
+ def _on_finished(self, transfer_id: int) -> None:
197
+ self._cleanup(transfer_id)
198
+ self.transfer_finished.emit(transfer_id)
199
+ self._pick_next()
200
+
201
+ def _on_failed(self, transfer_id: int, user_msg: str, detail: str) -> None:
202
+ self._cleanup(transfer_id)
203
+ self.transfer_error.emit(transfer_id, user_msg, detail)
204
+ self._pick_next()
205
+
206
+ def _cleanup(self, transfer_id: int) -> None:
207
+ self._active.discard(transfer_id)
208
+ self._pause_events.pop(transfer_id, None)
209
+ self._cancel_events.pop(transfer_id, None)
210
+
211
+ def _pick_next(self) -> None:
212
+ """Start the next queued transfer if a slot is available."""
213
+ if self._paused_global:
214
+ return
215
+ if self._bucket_id is None:
216
+ row = self._db.fetchone(
217
+ "SELECT id FROM transfers WHERE status = 'queued' ORDER BY created_at ASC LIMIT 1"
218
+ )
219
+ else:
220
+ row = self._db.fetchone(
221
+ "SELECT id FROM transfers WHERE status = 'queued' AND bucket_id = ? "
222
+ "ORDER BY created_at ASC LIMIT 1",
223
+ (self._bucket_id,),
224
+ )
225
+ if row and row["id"] not in self._active:
226
+ self.enqueue(row["id"])
227
+
228
+ def cleanup_orphaned_uploads(self) -> int:
229
+ """Abort orphaned multipart uploads on S3 not tracked in the database.
230
+
231
+ Only aborts uploads older than 24 hours to avoid interfering with
232
+ uploads started by other tools. Returns the number of aborted uploads.
233
+ """
234
+ try:
235
+ s3_uploads = self._s3.list_multipart_uploads(self._bucket)
236
+ except Exception:
237
+ logger.warning("Failed to list multipart uploads for orphan cleanup")
238
+ return 0
239
+
240
+ # Collect known upload IDs from our database
241
+ rows = self._db.fetchall("SELECT upload_id FROM transfers WHERE upload_id IS NOT NULL")
242
+ known_ids = {r["upload_id"] for r in rows}
243
+
244
+ cutoff = datetime.datetime.now(datetime.UTC) - datetime.timedelta(hours=24)
245
+ aborted = 0
246
+
247
+ for upload in s3_uploads:
248
+ uid = upload["UploadId"]
249
+ if uid in known_ids:
250
+ continue # We own this one
251
+
252
+ initiated = upload["Initiated"]
253
+ # Make sure initiated is offset-aware for comparison
254
+ if initiated.tzinfo is None:
255
+ initiated = initiated.replace(tzinfo=datetime.UTC)
256
+
257
+ if initiated < cutoff:
258
+ try:
259
+ self._s3.abort_multipart_upload(self._bucket, upload["Key"], uid)
260
+ aborted += 1
261
+ logger.info(
262
+ "Aborted orphaned multipart upload: key=%s upload_id=%s",
263
+ upload["Key"],
264
+ uid,
265
+ )
266
+ except Exception:
267
+ logger.warning(
268
+ "Failed to abort orphaned upload: key=%s upload_id=%s",
269
+ upload["Key"],
270
+ uid,
271
+ )
272
+ else:
273
+ logger.debug(
274
+ "Skipping recent orphaned upload: key=%s upload_id=%s (< 24h old)",
275
+ upload["Key"],
276
+ uid,
277
+ )
278
+
279
+ if aborted:
280
+ logger.info("Orphan cleanup: aborted %d uploads", aborted)
281
+ return aborted
@@ -0,0 +1,311 @@
1
+ """Upload worker — handles single and multipart uploads as a QRunnable."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import math
7
+ import random
8
+ import threading
9
+ import time
10
+ from typing import TYPE_CHECKING
11
+
12
+ from PyQt6.QtCore import QObject, QRunnable, pyqtSignal
13
+
14
+ from s3ui.constants import (
15
+ DEFAULT_PART_SIZE,
16
+ HUGE_PART_SIZE,
17
+ LARGE_PART_SIZE,
18
+ MAX_RETRY_ATTEMPTS,
19
+ MULTIPART_THRESHOLD,
20
+ )
21
+
22
+ if TYPE_CHECKING:
23
+ from s3ui.core.s3_client import S3Client
24
+ from s3ui.db.database import Database
25
+
26
+ logger = logging.getLogger("s3ui.upload_worker")
27
+
28
+
29
+ def select_part_size(file_size: int) -> int:
30
+ """Choose part size based on file size to stay under 10,000 parts."""
31
+ if file_size <= 50 * 1024**3: # ≤50 GB
32
+ return DEFAULT_PART_SIZE
33
+ elif file_size <= 500 * 1024**3: # ≤500 GB
34
+ return LARGE_PART_SIZE
35
+ else:
36
+ return HUGE_PART_SIZE
37
+
38
+
39
+ class UploadWorkerSignals(QObject):
40
+ progress = pyqtSignal(int, int, int) # transfer_id, bytes_done, total
41
+ speed = pyqtSignal(int, float) # transfer_id, bytes_per_sec
42
+ finished = pyqtSignal(int) # transfer_id
43
+ failed = pyqtSignal(int, str, str) # transfer_id, user_msg, detail
44
+
45
+
46
+ class UploadWorker(QRunnable):
47
+ """Uploads a file to S3, with multipart support and resume."""
48
+
49
+ def __init__(
50
+ self,
51
+ transfer_id: int,
52
+ s3_client: S3Client,
53
+ db: Database,
54
+ bucket: str,
55
+ pause_event: threading.Event,
56
+ cancel_event: threading.Event,
57
+ ) -> None:
58
+ super().__init__()
59
+ self.setAutoDelete(True)
60
+ self.signals = UploadWorkerSignals()
61
+ self.transfer_id = transfer_id
62
+ self._s3 = s3_client
63
+ self._db = db
64
+ self._bucket = bucket
65
+ self._pause = pause_event
66
+ self._cancel = cancel_event
67
+
68
+ # Speed tracking
69
+ self._speed_window: list[tuple[float, int]] = []
70
+ self._last_speed_emit = 0.0
71
+
72
+ def run(self) -> None:
73
+ try:
74
+ self._do_upload()
75
+ except Exception as e:
76
+ import traceback
77
+
78
+ logger.error("Upload %d failed: %s", self.transfer_id, e)
79
+ try:
80
+ self._mark_failed(str(e))
81
+ except Exception:
82
+ logger.exception("Failed to mark upload %d as failed", self.transfer_id)
83
+ self.signals.failed.emit(self.transfer_id, str(e), traceback.format_exc())
84
+
85
+ def _do_upload(self) -> None:
86
+ row = self._db.fetchone("SELECT * FROM transfers WHERE id = ?", (self.transfer_id,))
87
+ if not row:
88
+ self.signals.failed.emit(self.transfer_id, "Transfer record not found.", "")
89
+ return
90
+
91
+ from pathlib import Path
92
+
93
+ local_path = Path(row["local_path"])
94
+ if not local_path.exists():
95
+ self._mark_failed("Source file no longer exists.")
96
+ self.signals.failed.emit(
97
+ self.transfer_id,
98
+ "Source file no longer exists.",
99
+ str(local_path),
100
+ )
101
+ return
102
+
103
+ object_key = row["object_key"]
104
+ file_size = local_path.stat().st_size
105
+
106
+ # Update total_bytes if not set
107
+ if row["total_bytes"] is None or row["total_bytes"] != file_size:
108
+ self._db.execute(
109
+ "UPDATE transfers SET total_bytes = ? WHERE id = ?",
110
+ (file_size, self.transfer_id),
111
+ )
112
+
113
+ self._db.execute(
114
+ "UPDATE transfers SET status = 'in_progress', updated_at = datetime('now') "
115
+ "WHERE id = ?",
116
+ (self.transfer_id,),
117
+ )
118
+
119
+ if file_size < MULTIPART_THRESHOLD:
120
+ self._single_upload(local_path, object_key, file_size)
121
+ else:
122
+ self._multipart_upload(local_path, object_key, file_size, row)
123
+
124
+ def _single_upload(self, local_path, object_key: str, file_size: int) -> None:
125
+ data = local_path.read_bytes()
126
+ self._s3.put_object(self._bucket, object_key, data)
127
+ self._complete(file_size)
128
+
129
+ def _multipart_upload(self, local_path, object_key: str, file_size: int, row) -> None:
130
+ part_size = select_part_size(file_size)
131
+ num_parts = math.ceil(file_size / part_size)
132
+ upload_id = row["upload_id"]
133
+
134
+ # Initiate or resume
135
+ if not upload_id:
136
+ upload_id = self._s3.create_multipart_upload(self._bucket, object_key)
137
+ self._db.execute(
138
+ "UPDATE transfers SET upload_id = ? WHERE id = ?",
139
+ (upload_id, self.transfer_id),
140
+ )
141
+ # Create part records
142
+ for i in range(num_parts):
143
+ offset = i * part_size
144
+ size = min(part_size, file_size - offset)
145
+ self._db.execute(
146
+ "INSERT OR IGNORE INTO transfer_parts "
147
+ "(transfer_id, part_number, offset, size) VALUES (?, ?, ?, ?)",
148
+ (self.transfer_id, i + 1, offset, size),
149
+ )
150
+ else:
151
+ # Resuming: reconcile with S3
152
+ s3_parts = self._s3.list_parts(self._bucket, object_key, upload_id)
153
+ s3_confirmed = {p["PartNumber"] for p in s3_parts}
154
+ for part_num in s3_confirmed:
155
+ self._db.execute(
156
+ "UPDATE transfer_parts SET status = 'completed' "
157
+ "WHERE transfer_id = ? AND part_number = ?",
158
+ (self.transfer_id, part_num),
159
+ )
160
+
161
+ # Upload pending parts
162
+ pending = self._db.fetchall(
163
+ "SELECT * FROM transfer_parts WHERE transfer_id = ? AND status != 'completed' "
164
+ "ORDER BY part_number",
165
+ (self.transfer_id,),
166
+ )
167
+
168
+ bytes_done = self._get_transferred()
169
+ parts_for_complete = self._get_completed_parts()
170
+
171
+ with open(local_path, "rb") as f:
172
+ for part_row in pending:
173
+ if self._cancel.is_set():
174
+ self._do_cancel(object_key, upload_id)
175
+ return
176
+ if self._pause.is_set():
177
+ self._do_pause()
178
+ return
179
+
180
+ part_num = part_row["part_number"]
181
+ offset = part_row["offset"]
182
+ size = part_row["size"]
183
+
184
+ f.seek(offset)
185
+ data = f.read(size)
186
+ etag = self._upload_part_with_retry(object_key, upload_id, part_num, data)
187
+ if etag is None:
188
+ return # failed signal already emitted
189
+
190
+ self._db.execute(
191
+ "UPDATE transfer_parts SET status = 'completed', etag = ? "
192
+ "WHERE transfer_id = ? AND part_number = ?",
193
+ (etag, self.transfer_id, part_num),
194
+ )
195
+
196
+ bytes_done += size
197
+ self._db.execute(
198
+ "UPDATE transfers SET transferred = ?, updated_at = datetime('now') "
199
+ "WHERE id = ?",
200
+ (bytes_done, self.transfer_id),
201
+ )
202
+ self.signals.progress.emit(self.transfer_id, bytes_done, file_size)
203
+ self._update_speed(size)
204
+ parts_for_complete.append({"ETag": etag, "PartNumber": part_num})
205
+
206
+ # Complete
207
+ all_parts = sorted(self._get_all_completed_parts(), key=lambda p: p["PartNumber"])
208
+ self._s3.complete_multipart_upload(self._bucket, object_key, upload_id, all_parts)
209
+ self._complete(file_size)
210
+
211
+ def _upload_part_with_retry(
212
+ self, key: str, upload_id: str, part_num: int, data: bytes
213
+ ) -> str | None:
214
+ for attempt in range(MAX_RETRY_ATTEMPTS):
215
+ try:
216
+ return self._s3.upload_part(self._bucket, key, upload_id, part_num, data)
217
+ except Exception as e:
218
+ if attempt < MAX_RETRY_ATTEMPTS - 1:
219
+ delay = _backoff_delay(attempt)
220
+ logger.warning(
221
+ "Upload part %d attempt %d failed, retrying in %.1fs: %s",
222
+ part_num,
223
+ attempt + 1,
224
+ delay,
225
+ e,
226
+ )
227
+ time.sleep(delay)
228
+ else:
229
+ self._mark_failed(str(e))
230
+ self.signals.failed.emit(
231
+ self.transfer_id,
232
+ f"Upload failed after {MAX_RETRY_ATTEMPTS} attempts.",
233
+ str(e),
234
+ )
235
+ return None
236
+
237
+ def _complete(self, total: int) -> None:
238
+ self._db.execute(
239
+ "UPDATE transfers SET status = 'completed', transferred = ?, "
240
+ "updated_at = datetime('now') WHERE id = ?",
241
+ (total, self.transfer_id),
242
+ )
243
+ self.signals.progress.emit(self.transfer_id, total, total)
244
+ self.signals.finished.emit(self.transfer_id)
245
+ logger.info("Upload %d completed", self.transfer_id)
246
+
247
+ def _mark_failed(self, msg: str) -> None:
248
+ self._db.execute(
249
+ "UPDATE transfers SET status = 'failed', error_message = ?, "
250
+ "updated_at = datetime('now') WHERE id = ?",
251
+ (msg, self.transfer_id),
252
+ )
253
+
254
+ def _do_cancel(self, key: str, upload_id: str) -> None:
255
+ import contextlib
256
+
257
+ with contextlib.suppress(Exception):
258
+ self._s3.abort_multipart_upload(self._bucket, key, upload_id)
259
+ self._db.execute(
260
+ "UPDATE transfers SET status = 'cancelled', updated_at = datetime('now') WHERE id = ?",
261
+ (self.transfer_id,),
262
+ )
263
+ logger.info("Upload %d cancelled", self.transfer_id)
264
+
265
+ def _do_pause(self) -> None:
266
+ self._db.execute(
267
+ "UPDATE transfers SET status = 'paused', updated_at = datetime('now') WHERE id = ?",
268
+ (self.transfer_id,),
269
+ )
270
+ logger.info("Upload %d paused", self.transfer_id)
271
+
272
+ def _get_transferred(self) -> int:
273
+ row = self._db.fetchone(
274
+ "SELECT COALESCE(SUM(size), 0) as done FROM transfer_parts "
275
+ "WHERE transfer_id = ? AND status = 'completed'",
276
+ (self.transfer_id,),
277
+ )
278
+ return row["done"]
279
+
280
+ def _get_completed_parts(self) -> list[dict]:
281
+ rows = self._db.fetchall(
282
+ "SELECT part_number, etag FROM transfer_parts "
283
+ "WHERE transfer_id = ? AND status = 'completed'",
284
+ (self.transfer_id,),
285
+ )
286
+ return [{"ETag": r["etag"], "PartNumber": r["part_number"]} for r in rows]
287
+
288
+ def _get_all_completed_parts(self) -> list[dict]:
289
+ return self._get_completed_parts()
290
+
291
+ def _update_speed(self, chunk_bytes: int) -> None:
292
+ now = time.monotonic()
293
+ self._speed_window.append((now, chunk_bytes))
294
+ # Keep 3-second window
295
+ self._speed_window = [(t, b) for t, b in self._speed_window if now - t <= 3.0]
296
+ if now - self._last_speed_emit >= 0.5 and self._speed_window:
297
+ window_time = now - self._speed_window[0][0]
298
+ if window_time > 0:
299
+ total_bytes = sum(b for _, b in self._speed_window)
300
+ bps = total_bytes / window_time
301
+ self.signals.speed.emit(self.transfer_id, bps)
302
+ self._last_speed_emit = now
303
+
304
+
305
+ def _backoff_delay(attempt: int) -> float:
306
+ """Exponential backoff with jitter: 0s, ~1s, ~4s."""
307
+ if attempt == 0:
308
+ return 0.0
309
+ base = 4 ** (attempt - 1) # 1, 4
310
+ jitter_max = base * 0.5
311
+ return base + random.uniform(0, jitter_max)
s3ui/db/__init__.py ADDED
File without changes