msaas-resumable-upload 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msaas_resumable_upload-0.1.0/.gitignore +23 -0
- msaas_resumable_upload-0.1.0/PKG-INFO +16 -0
- msaas_resumable_upload-0.1.0/pyproject.toml +41 -0
- msaas_resumable_upload-0.1.0/src/resumable_upload/__init__.py +30 -0
- msaas_resumable_upload-0.1.0/src/resumable_upload/config.py +31 -0
- msaas_resumable_upload-0.1.0/src/resumable_upload/models.py +89 -0
- msaas_resumable_upload-0.1.0/src/resumable_upload/router.py +7 -0
- msaas_resumable_upload-0.1.0/src/resumable_upload/service.py +186 -0
- msaas_resumable_upload-0.1.0/src/resumable_upload/storage.py +107 -0
- msaas_resumable_upload-0.1.0/tests/__init__.py +0 -0
- msaas_resumable_upload-0.1.0/tests/test_service.py +104 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
node_modules/
|
|
2
|
+
dist/
|
|
3
|
+
.next/
|
|
4
|
+
.turbo/
|
|
5
|
+
*.pyc
|
|
6
|
+
__pycache__/
|
|
7
|
+
.venv/
|
|
8
|
+
*.egg-info/
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
.ruff_cache/
|
|
11
|
+
.env
|
|
12
|
+
.env.*
|
|
13
|
+
!.env.example
|
|
14
|
+
!.env.*.example
|
|
15
|
+
!.env.*.template
|
|
16
|
+
.DS_Store
|
|
17
|
+
coverage/
|
|
18
|
+
|
|
19
|
+
# Runtime artifacts
|
|
20
|
+
logs_llm/
|
|
21
|
+
vectors.db
|
|
22
|
+
vectors.db-shm
|
|
23
|
+
vectors.db-wal
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: msaas-resumable-upload
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Chunked resumable upload sessions with S3 multipart and progress tracking
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: msaas-api-core
|
|
7
|
+
Requires-Dist: msaas-errors
|
|
8
|
+
Requires-Dist: pydantic>=2.0
|
|
9
|
+
Provides-Extra: all
|
|
10
|
+
Requires-Dist: boto3>=1.34; extra == 'all'
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: httpx>=0.27.0; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
15
|
+
Provides-Extra: s3
|
|
16
|
+
Requires-Dist: boto3>=1.34; extra == 's3'
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "msaas-resumable-upload"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Chunked resumable upload sessions with S3 multipart and progress tracking"
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"msaas-api-core",
|
|
12
|
+
"msaas-errors",
|
|
13
|
+
"pydantic>=2.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
s3 = ["boto3>=1.34"]
|
|
18
|
+
all = ["boto3>=1.34"]
|
|
19
|
+
dev = [
|
|
20
|
+
"pytest>=8.0",
|
|
21
|
+
"pytest-asyncio>=0.24",
|
|
22
|
+
"httpx>=0.27.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.hatch.build.targets.wheel]
|
|
26
|
+
packages = ["src/resumable_upload"]
|
|
27
|
+
|
|
28
|
+
[tool.pytest.ini_options]
|
|
29
|
+
testpaths = ["tests"]
|
|
30
|
+
asyncio_mode = "auto"
|
|
31
|
+
|
|
32
|
+
[tool.ruff]
|
|
33
|
+
target-version = "py312"
|
|
34
|
+
line-length = 100
|
|
35
|
+
|
|
36
|
+
[tool.ruff.lint]
|
|
37
|
+
select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "TCH"]
|
|
38
|
+
|
|
39
|
+
[tool.uv.sources]
|
|
40
|
+
msaas-api-core = { workspace = true }
|
|
41
|
+
msaas-errors = { workspace = true }
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Willian Resumable Upload -- Chunked resumable upload sessions with S3 multipart support."""
|
|
2
|
+
|
|
3
|
+
from resumable_upload.config import ResumableUploadConfig, get_config, init_resumable_upload
|
|
4
|
+
from resumable_upload.models import (
|
|
5
|
+
ChunkInfo,
|
|
6
|
+
ChunkStatus,
|
|
7
|
+
UploadSession,
|
|
8
|
+
UploadSessionCreate,
|
|
9
|
+
UploadSessionStatus,
|
|
10
|
+
UploadSessionSummary,
|
|
11
|
+
)
|
|
12
|
+
from resumable_upload.router import router
|
|
13
|
+
from resumable_upload.service import ResumableUploadService
|
|
14
|
+
from resumable_upload.storage import S3MultipartStorage, StorageBackend
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"ChunkInfo",
|
|
18
|
+
"ChunkStatus",
|
|
19
|
+
"ResumableUploadConfig",
|
|
20
|
+
"ResumableUploadService",
|
|
21
|
+
"S3MultipartStorage",
|
|
22
|
+
"StorageBackend",
|
|
23
|
+
"UploadSession",
|
|
24
|
+
"UploadSessionCreate",
|
|
25
|
+
"UploadSessionStatus",
|
|
26
|
+
"UploadSessionSummary",
|
|
27
|
+
"get_config",
|
|
28
|
+
"init_resumable_upload",
|
|
29
|
+
"router",
|
|
30
|
+
]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Resumable upload configuration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ResumableUploadConfig(BaseModel):
|
|
9
|
+
min_chunk_size: int = Field(
|
|
10
|
+
default=5 * 1024 * 1024, description="Min chunk size in bytes (5MB)"
|
|
11
|
+
)
|
|
12
|
+
max_chunk_size: int = Field(default=100 * 1024 * 1024, description="Max chunk size (100MB)")
|
|
13
|
+
max_file_size: int = Field(default=5 * 1024 * 1024 * 1024, description="Max file size (5GB)")
|
|
14
|
+
session_ttl_hours: int = Field(default=24)
|
|
15
|
+
max_concurrent_sessions: int = Field(default=10)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_config: ResumableUploadConfig | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def init_resumable_upload(config: ResumableUploadConfig | None = None) -> ResumableUploadConfig:
|
|
22
|
+
global _config
|
|
23
|
+
_config = config or ResumableUploadConfig()
|
|
24
|
+
return _config
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_config() -> ResumableUploadConfig:
|
|
28
|
+
global _config
|
|
29
|
+
if _config is None:
|
|
30
|
+
_config = ResumableUploadConfig()
|
|
31
|
+
return _config
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Resumable upload data models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _utcnow() -> datetime:
|
|
9
|
+
return datetime.now(UTC)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from uuid import uuid4
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class UploadSessionStatus(str, Enum):
|
|
19
|
+
INITIALIZED = "initialized"
|
|
20
|
+
IN_PROGRESS = "in_progress"
|
|
21
|
+
COMPLETING = "completing"
|
|
22
|
+
COMPLETED = "completed"
|
|
23
|
+
FAILED = "failed"
|
|
24
|
+
EXPIRED = "expired"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ChunkStatus(str, Enum):
|
|
28
|
+
PENDING = "pending"
|
|
29
|
+
UPLOADING = "uploading"
|
|
30
|
+
UPLOADED = "uploaded"
|
|
31
|
+
FAILED = "failed"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ChunkInfo(BaseModel):
|
|
35
|
+
chunk_number: int = Field(ge=1)
|
|
36
|
+
size: int = Field(ge=0)
|
|
37
|
+
offset: int = Field(ge=0)
|
|
38
|
+
status: ChunkStatus = ChunkStatus.PENDING
|
|
39
|
+
etag: str | None = None
|
|
40
|
+
checksum: str | None = None
|
|
41
|
+
uploaded_at: datetime | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class UploadSessionCreate(BaseModel):
|
|
45
|
+
filename: str
|
|
46
|
+
file_size: int = Field(ge=1)
|
|
47
|
+
content_type: str = "application/octet-stream"
|
|
48
|
+
chunk_size: int | None = None
|
|
49
|
+
metadata: dict[str, str] = Field(default_factory=dict)
|
|
50
|
+
storage_path: str | None = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class UploadSession(BaseModel):
|
|
54
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
55
|
+
filename: str
|
|
56
|
+
file_size: int
|
|
57
|
+
content_type: str = "application/octet-stream"
|
|
58
|
+
chunk_size: int = 5 * 1024 * 1024
|
|
59
|
+
total_chunks: int = 0
|
|
60
|
+
uploaded_chunks: int = 0
|
|
61
|
+
status: UploadSessionStatus = UploadSessionStatus.INITIALIZED
|
|
62
|
+
storage_path: str = ""
|
|
63
|
+
multipart_upload_id: str | None = None
|
|
64
|
+
chunks: list[ChunkInfo] = Field(default_factory=list)
|
|
65
|
+
metadata: dict[str, str] = Field(default_factory=dict)
|
|
66
|
+
created_at: datetime = Field(default_factory=_utcnow)
|
|
67
|
+
updated_at: datetime = Field(default_factory=_utcnow)
|
|
68
|
+
expires_at: datetime | None = None
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def progress(self) -> float:
|
|
72
|
+
if self.total_chunks == 0:
|
|
73
|
+
return 0.0
|
|
74
|
+
return round(self.uploaded_chunks / self.total_chunks * 100, 2)
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def is_complete(self) -> bool:
|
|
78
|
+
return self.uploaded_chunks == self.total_chunks and self.total_chunks > 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class UploadSessionSummary(BaseModel):
|
|
82
|
+
id: str
|
|
83
|
+
filename: str
|
|
84
|
+
file_size: int
|
|
85
|
+
status: UploadSessionStatus
|
|
86
|
+
progress: float
|
|
87
|
+
uploaded_chunks: int
|
|
88
|
+
total_chunks: int
|
|
89
|
+
created_at: datetime
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Resumable upload service."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
from datetime import UTC, datetime, timedelta
|
|
7
|
+
|
|
8
|
+
from resumable_upload.config import get_config
|
|
9
|
+
from resumable_upload.models import (
|
|
10
|
+
ChunkInfo,
|
|
11
|
+
ChunkStatus,
|
|
12
|
+
UploadSession,
|
|
13
|
+
UploadSessionCreate,
|
|
14
|
+
UploadSessionStatus,
|
|
15
|
+
UploadSessionSummary,
|
|
16
|
+
)
|
|
17
|
+
from resumable_upload.storage import StorageBackend
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ResumableUploadService:
|
|
21
|
+
"""Manages resumable upload sessions with chunked uploads."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, storage: StorageBackend) -> None:
|
|
24
|
+
self._storage = storage
|
|
25
|
+
self._sessions: dict[str, UploadSession] = {}
|
|
26
|
+
|
|
27
|
+
async def create_session(self, params: UploadSessionCreate) -> UploadSession:
|
|
28
|
+
config = get_config()
|
|
29
|
+
|
|
30
|
+
if params.file_size > config.max_file_size:
|
|
31
|
+
from errors import ValidationError
|
|
32
|
+
|
|
33
|
+
raise ValidationError(
|
|
34
|
+
message=f"File size exceeds maximum ({config.max_file_size} bytes)",
|
|
35
|
+
field="file_size",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
chunk_size = params.chunk_size or config.min_chunk_size
|
|
39
|
+
chunk_size = max(config.min_chunk_size, min(chunk_size, config.max_chunk_size))
|
|
40
|
+
total_chunks = math.ceil(params.file_size / chunk_size)
|
|
41
|
+
|
|
42
|
+
storage_path = params.storage_path or f"uploads/{params.filename}"
|
|
43
|
+
upload_id = await self._storage.initiate_multipart(storage_path, params.content_type)
|
|
44
|
+
|
|
45
|
+
chunks = [
|
|
46
|
+
ChunkInfo(
|
|
47
|
+
chunk_number=i + 1,
|
|
48
|
+
size=min(chunk_size, params.file_size - i * chunk_size),
|
|
49
|
+
offset=i * chunk_size,
|
|
50
|
+
)
|
|
51
|
+
for i in range(total_chunks)
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
session = UploadSession(
|
|
55
|
+
filename=params.filename,
|
|
56
|
+
file_size=params.file_size,
|
|
57
|
+
content_type=params.content_type,
|
|
58
|
+
chunk_size=chunk_size,
|
|
59
|
+
total_chunks=total_chunks,
|
|
60
|
+
storage_path=storage_path,
|
|
61
|
+
multipart_upload_id=upload_id,
|
|
62
|
+
chunks=chunks,
|
|
63
|
+
metadata=params.metadata,
|
|
64
|
+
expires_at=datetime.now(UTC) + timedelta(hours=config.session_ttl_hours),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
self._sessions[session.id] = session
|
|
68
|
+
return session
|
|
69
|
+
|
|
70
|
+
async def upload_chunk(self, session_id: str, chunk_number: int, data: bytes) -> ChunkInfo:
|
|
71
|
+
session = self._get_session(session_id)
|
|
72
|
+
|
|
73
|
+
if session.status in (UploadSessionStatus.COMPLETED, UploadSessionStatus.EXPIRED):
|
|
74
|
+
from errors import BusinessLogicError
|
|
75
|
+
|
|
76
|
+
raise BusinessLogicError(message=f"Session is {session.status.value}")
|
|
77
|
+
|
|
78
|
+
if chunk_number < 1 or chunk_number > session.total_chunks:
|
|
79
|
+
from errors import ValidationError
|
|
80
|
+
|
|
81
|
+
raise ValidationError(
|
|
82
|
+
message=f"Invalid chunk number {chunk_number} (1-{session.total_chunks})",
|
|
83
|
+
field="chunk_number",
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
chunk = session.chunks[chunk_number - 1]
|
|
87
|
+
chunk.status = ChunkStatus.UPLOADING
|
|
88
|
+
session.status = UploadSessionStatus.IN_PROGRESS
|
|
89
|
+
|
|
90
|
+
etag = await self._storage.upload_part(
|
|
91
|
+
session.storage_path,
|
|
92
|
+
session.multipart_upload_id,
|
|
93
|
+
chunk_number,
|
|
94
|
+
data,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
chunk.etag = etag
|
|
98
|
+
chunk.status = ChunkStatus.UPLOADED
|
|
99
|
+
chunk.uploaded_at = datetime.now(UTC)
|
|
100
|
+
session.uploaded_chunks = sum(1 for c in session.chunks if c.status == ChunkStatus.UPLOADED)
|
|
101
|
+
session.updated_at = datetime.now(UTC)
|
|
102
|
+
|
|
103
|
+
return chunk
|
|
104
|
+
|
|
105
|
+
async def complete_session(self, session_id: str) -> UploadSession:
|
|
106
|
+
session = self._get_session(session_id)
|
|
107
|
+
|
|
108
|
+
if not session.is_complete:
|
|
109
|
+
from errors import BusinessLogicError
|
|
110
|
+
|
|
111
|
+
missing = session.total_chunks - session.uploaded_chunks
|
|
112
|
+
raise BusinessLogicError(message=f"Cannot complete: {missing} chunks still pending")
|
|
113
|
+
|
|
114
|
+
session.status = UploadSessionStatus.COMPLETING
|
|
115
|
+
|
|
116
|
+
parts = [
|
|
117
|
+
{"part_number": c.chunk_number, "etag": c.etag}
|
|
118
|
+
for c in session.chunks
|
|
119
|
+
if c.etag is not None
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
await self._storage.complete_multipart(
|
|
123
|
+
session.storage_path, session.multipart_upload_id, parts
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
session.status = UploadSessionStatus.COMPLETED
|
|
127
|
+
session.updated_at = datetime.now(UTC)
|
|
128
|
+
return session
|
|
129
|
+
|
|
130
|
+
async def abort_session(self, session_id: str) -> None:
|
|
131
|
+
session = self._get_session(session_id)
|
|
132
|
+
|
|
133
|
+
if session.multipart_upload_id:
|
|
134
|
+
await self._storage.abort_multipart(session.storage_path, session.multipart_upload_id)
|
|
135
|
+
|
|
136
|
+
session.status = UploadSessionStatus.FAILED
|
|
137
|
+
session.updated_at = datetime.now(UTC)
|
|
138
|
+
|
|
139
|
+
def get_session(self, session_id: str) -> UploadSession:
|
|
140
|
+
return self._get_session(session_id)
|
|
141
|
+
|
|
142
|
+
def get_session_summary(self, session_id: str) -> UploadSessionSummary:
|
|
143
|
+
session = self._get_session(session_id)
|
|
144
|
+
return UploadSessionSummary(
|
|
145
|
+
id=session.id,
|
|
146
|
+
filename=session.filename,
|
|
147
|
+
file_size=session.file_size,
|
|
148
|
+
status=session.status,
|
|
149
|
+
progress=session.progress,
|
|
150
|
+
uploaded_chunks=session.uploaded_chunks,
|
|
151
|
+
total_chunks=session.total_chunks,
|
|
152
|
+
created_at=session.created_at,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def get_pending_chunks(self, session_id: str) -> list[ChunkInfo]:
|
|
156
|
+
session = self._get_session(session_id)
|
|
157
|
+
return [c for c in session.chunks if c.status == ChunkStatus.PENDING]
|
|
158
|
+
|
|
159
|
+
async def cleanup_expired(self) -> int:
|
|
160
|
+
now = datetime.now(UTC)
|
|
161
|
+
expired_ids = [
|
|
162
|
+
sid
|
|
163
|
+
for sid, s in self._sessions.items()
|
|
164
|
+
if s.expires_at
|
|
165
|
+
and s.expires_at < now
|
|
166
|
+
and s.status not in (UploadSessionStatus.COMPLETED, UploadSessionStatus.EXPIRED)
|
|
167
|
+
]
|
|
168
|
+
for sid in expired_ids:
|
|
169
|
+
session = self._sessions[sid]
|
|
170
|
+
if session.multipart_upload_id:
|
|
171
|
+
try:
|
|
172
|
+
await self._storage.abort_multipart(
|
|
173
|
+
session.storage_path, session.multipart_upload_id
|
|
174
|
+
)
|
|
175
|
+
except Exception:
|
|
176
|
+
pass
|
|
177
|
+
session.status = UploadSessionStatus.EXPIRED
|
|
178
|
+
return len(expired_ids)
|
|
179
|
+
|
|
180
|
+
def _get_session(self, session_id: str) -> UploadSession:
|
|
181
|
+
session = self._sessions.get(session_id)
|
|
182
|
+
if session is None:
|
|
183
|
+
from errors import NotFoundError
|
|
184
|
+
|
|
185
|
+
raise NotFoundError(message=f"Upload session {session_id} not found")
|
|
186
|
+
return session
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Storage backends for resumable uploads."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StorageBackend(ABC):
|
|
10
|
+
"""Abstract storage backend for multipart uploads."""
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
async def initiate_multipart(self, key: str, content_type: str) -> str:
|
|
14
|
+
"""Start multipart upload, return upload ID."""
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
async def upload_part(self, key: str, upload_id: str, part_number: int, data: bytes) -> str:
|
|
18
|
+
"""Upload a part, return ETag."""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
async def complete_multipart(
|
|
22
|
+
self, key: str, upload_id: str, parts: list[dict[str, str | int]]
|
|
23
|
+
) -> str:
|
|
24
|
+
"""Complete multipart upload, return final key/URL."""
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
async def abort_multipart(self, key: str, upload_id: str) -> None:
|
|
28
|
+
"""Abort multipart upload."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class S3MultipartStorage(StorageBackend):
|
|
32
|
+
"""AWS S3 multipart upload backend."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, bucket: str, region: str = "us-east-1", endpoint_url: str | None = None):
|
|
35
|
+
try:
|
|
36
|
+
import boto3
|
|
37
|
+
except ImportError:
|
|
38
|
+
raise ImportError(
|
|
39
|
+
"boto3 is required for S3 storage. "
|
|
40
|
+
"Install with: pip install msaas-resumable-upload[s3]"
|
|
41
|
+
)
|
|
42
|
+
self.bucket = bucket
|
|
43
|
+
self._client = boto3.client("s3", region_name=region, endpoint_url=endpoint_url)
|
|
44
|
+
|
|
45
|
+
async def initiate_multipart(self, key: str, content_type: str) -> str:
|
|
46
|
+
response = self._client.create_multipart_upload(
|
|
47
|
+
Bucket=self.bucket, Key=key, ContentType=content_type
|
|
48
|
+
)
|
|
49
|
+
return response["UploadId"]
|
|
50
|
+
|
|
51
|
+
async def upload_part(self, key: str, upload_id: str, part_number: int, data: bytes) -> str:
|
|
52
|
+
response = self._client.upload_part(
|
|
53
|
+
Bucket=self.bucket,
|
|
54
|
+
Key=key,
|
|
55
|
+
UploadId=upload_id,
|
|
56
|
+
PartNumber=part_number,
|
|
57
|
+
Body=data,
|
|
58
|
+
)
|
|
59
|
+
return response["ETag"]
|
|
60
|
+
|
|
61
|
+
async def complete_multipart(
|
|
62
|
+
self, key: str, upload_id: str, parts: list[dict[str, str | int]]
|
|
63
|
+
) -> str:
|
|
64
|
+
self._client.complete_multipart_upload(
|
|
65
|
+
Bucket=self.bucket,
|
|
66
|
+
Key=key,
|
|
67
|
+
UploadId=upload_id,
|
|
68
|
+
MultipartUpload={
|
|
69
|
+
"Parts": [{"PartNumber": p["part_number"], "ETag": p["etag"]} for p in parts]
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
return f"s3://{self.bucket}/{key}"
|
|
73
|
+
|
|
74
|
+
async def abort_multipart(self, key: str, upload_id: str) -> None:
|
|
75
|
+
self._client.abort_multipart_upload(Bucket=self.bucket, Key=key, UploadId=upload_id)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class InMemoryStorage(StorageBackend):
|
|
79
|
+
"""In-memory storage backend for testing."""
|
|
80
|
+
|
|
81
|
+
def __init__(self) -> None:
|
|
82
|
+
self._uploads: dict[str, dict[int, bytes]] = {}
|
|
83
|
+
self._completed: dict[str, bytes] = {}
|
|
84
|
+
self._counter = 0
|
|
85
|
+
|
|
86
|
+
async def initiate_multipart(self, key: str, content_type: str) -> str:
|
|
87
|
+
self._counter += 1
|
|
88
|
+
upload_id = f"mem-upload-{self._counter}"
|
|
89
|
+
self._uploads[upload_id] = {}
|
|
90
|
+
return upload_id
|
|
91
|
+
|
|
92
|
+
async def upload_part(self, key: str, upload_id: str, part_number: int, data: bytes) -> str:
|
|
93
|
+
self._uploads[upload_id][part_number] = data
|
|
94
|
+
return hashlib.md5(data).hexdigest()
|
|
95
|
+
|
|
96
|
+
async def complete_multipart(
|
|
97
|
+
self, key: str, upload_id: str, parts: list[dict[str, str | int]]
|
|
98
|
+
) -> str:
|
|
99
|
+
upload_parts = self._uploads.pop(upload_id, {})
|
|
100
|
+
combined = b"".join(
|
|
101
|
+
upload_parts[p["part_number"]] for p in sorted(parts, key=lambda x: x["part_number"])
|
|
102
|
+
)
|
|
103
|
+
self._completed[key] = combined
|
|
104
|
+
return f"mem://{key}"
|
|
105
|
+
|
|
106
|
+
async def abort_multipart(self, key: str, upload_id: str) -> None:
|
|
107
|
+
self._uploads.pop(upload_id, None)
|
|
File without changes
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Tests for resumable upload service."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from resumable_upload.models import (
|
|
8
|
+
ChunkStatus,
|
|
9
|
+
UploadSessionCreate,
|
|
10
|
+
UploadSessionStatus,
|
|
11
|
+
)
|
|
12
|
+
from resumable_upload.service import ResumableUploadService
|
|
13
|
+
from resumable_upload.storage import InMemoryStorage
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def service() -> ResumableUploadService:
|
|
18
|
+
return ResumableUploadService(storage=InMemoryStorage())
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.fixture
|
|
22
|
+
def create_params() -> UploadSessionCreate:
|
|
23
|
+
return UploadSessionCreate(
|
|
24
|
+
filename="test.pdf",
|
|
25
|
+
file_size=15 * 1024 * 1024,
|
|
26
|
+
content_type="application/pdf",
|
|
27
|
+
chunk_size=5 * 1024 * 1024,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestResumableUploadService:
|
|
32
|
+
@pytest.mark.asyncio
|
|
33
|
+
async def test_create_session(
|
|
34
|
+
self, service: ResumableUploadService, create_params: UploadSessionCreate
|
|
35
|
+
) -> None:
|
|
36
|
+
session = await service.create_session(create_params)
|
|
37
|
+
assert session.status == UploadSessionStatus.INITIALIZED
|
|
38
|
+
assert session.total_chunks == 3
|
|
39
|
+
assert session.uploaded_chunks == 0
|
|
40
|
+
assert session.progress == 0.0
|
|
41
|
+
assert len(session.chunks) == 3
|
|
42
|
+
|
|
43
|
+
@pytest.mark.asyncio
|
|
44
|
+
async def test_upload_chunk(
|
|
45
|
+
self, service: ResumableUploadService, create_params: UploadSessionCreate
|
|
46
|
+
) -> None:
|
|
47
|
+
session = await service.create_session(create_params)
|
|
48
|
+
chunk_data = b"x" * (5 * 1024 * 1024)
|
|
49
|
+
chunk = await service.upload_chunk(session.id, 1, chunk_data)
|
|
50
|
+
assert chunk.status == ChunkStatus.UPLOADED
|
|
51
|
+
assert chunk.etag is not None
|
|
52
|
+
|
|
53
|
+
updated = service.get_session(session.id)
|
|
54
|
+
assert updated.uploaded_chunks == 1
|
|
55
|
+
assert updated.status == UploadSessionStatus.IN_PROGRESS
|
|
56
|
+
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_complete_session(
|
|
59
|
+
self, service: ResumableUploadService, create_params: UploadSessionCreate
|
|
60
|
+
) -> None:
|
|
61
|
+
session = await service.create_session(create_params)
|
|
62
|
+
for i in range(session.total_chunks):
|
|
63
|
+
size = session.chunks[i].size
|
|
64
|
+
await service.upload_chunk(session.id, i + 1, b"x" * size)
|
|
65
|
+
|
|
66
|
+
completed = await service.complete_session(session.id)
|
|
67
|
+
assert completed.status == UploadSessionStatus.COMPLETED
|
|
68
|
+
assert completed.progress == 100.0
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_abort_session(
|
|
72
|
+
self, service: ResumableUploadService, create_params: UploadSessionCreate
|
|
73
|
+
) -> None:
|
|
74
|
+
session = await service.create_session(create_params)
|
|
75
|
+
await service.abort_session(session.id)
|
|
76
|
+
aborted = service.get_session(session.id)
|
|
77
|
+
assert aborted.status == UploadSessionStatus.FAILED
|
|
78
|
+
|
|
79
|
+
@pytest.mark.asyncio
|
|
80
|
+
async def test_get_pending_chunks(
|
|
81
|
+
self, service: ResumableUploadService, create_params: UploadSessionCreate
|
|
82
|
+
) -> None:
|
|
83
|
+
session = await service.create_session(create_params)
|
|
84
|
+
await service.upload_chunk(session.id, 1, b"x" * (5 * 1024 * 1024))
|
|
85
|
+
pending = service.get_pending_chunks(session.id)
|
|
86
|
+
assert len(pending) == 2
|
|
87
|
+
|
|
88
|
+
@pytest.mark.asyncio
|
|
89
|
+
async def test_session_summary(
|
|
90
|
+
self, service: ResumableUploadService, create_params: UploadSessionCreate
|
|
91
|
+
) -> None:
|
|
92
|
+
session = await service.create_session(create_params)
|
|
93
|
+
summary = service.get_session_summary(session.id)
|
|
94
|
+
assert summary.filename == "test.pdf"
|
|
95
|
+
assert summary.progress == 0.0
|
|
96
|
+
assert summary.total_chunks == 3
|
|
97
|
+
|
|
98
|
+
@pytest.mark.asyncio
|
|
99
|
+
async def test_cannot_complete_incomplete(
|
|
100
|
+
self, service: ResumableUploadService, create_params: UploadSessionCreate
|
|
101
|
+
) -> None:
|
|
102
|
+
session = await service.create_session(create_params)
|
|
103
|
+
with pytest.raises(Exception, match="chunks still pending"):
|
|
104
|
+
await service.complete_session(session.id)
|