msaas-resumable-upload 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ node_modules/
2
+ dist/
3
+ .next/
4
+ .turbo/
5
+ *.pyc
6
+ __pycache__/
7
+ .venv/
8
+ *.egg-info/
9
+ .pytest_cache/
10
+ .ruff_cache/
11
+ .env
12
+ .env.*
13
+ !.env.example
14
+ !.env.*.example
15
+ !.env.*.template
16
+ .DS_Store
17
+ coverage/
18
+
19
+ # Runtime artifacts
20
+ logs_llm/
21
+ vectors.db
22
+ vectors.db-shm
23
+ vectors.db-wal
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: msaas-resumable-upload
3
+ Version: 0.1.0
4
+ Summary: Chunked resumable upload sessions with S3 multipart and progress tracking
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: msaas-api-core
7
+ Requires-Dist: msaas-errors
8
+ Requires-Dist: pydantic>=2.0
9
+ Provides-Extra: all
10
+ Requires-Dist: boto3>=1.34; extra == 'all'
11
+ Provides-Extra: dev
12
+ Requires-Dist: httpx>=0.27.0; extra == 'dev'
13
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
14
+ Requires-Dist: pytest>=8.0; extra == 'dev'
15
+ Provides-Extra: s3
16
+ Requires-Dist: boto3>=1.34; extra == 's3'
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "msaas-resumable-upload"
7
+ version = "0.1.0"
8
+ description = "Chunked resumable upload sessions with S3 multipart and progress tracking"
9
+ requires-python = ">=3.12"
10
+ dependencies = [
11
+ "msaas-api-core",
12
+ "msaas-errors",
13
+ "pydantic>=2.0",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ s3 = ["boto3>=1.34"]
18
+ all = ["boto3>=1.34"]
19
+ dev = [
20
+ "pytest>=8.0",
21
+ "pytest-asyncio>=0.24",
22
+ "httpx>=0.27.0",
23
+ ]
24
+
25
+ [tool.hatch.build.targets.wheel]
26
+ packages = ["src/resumable_upload"]
27
+
28
+ [tool.pytest.ini_options]
29
+ testpaths = ["tests"]
30
+ asyncio_mode = "auto"
31
+
32
+ [tool.ruff]
33
+ target-version = "py312"
34
+ line-length = 100
35
+
36
+ [tool.ruff.lint]
37
+ select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "TCH"]
38
+
39
+ [tool.uv.sources]
40
+ msaas-api-core = { workspace = true }
41
+ msaas-errors = { workspace = true }
@@ -0,0 +1,30 @@
1
+ """Willian Resumable Upload -- Chunked resumable upload sessions with S3 multipart support."""
2
+
3
+ from resumable_upload.config import ResumableUploadConfig, get_config, init_resumable_upload
4
+ from resumable_upload.models import (
5
+ ChunkInfo,
6
+ ChunkStatus,
7
+ UploadSession,
8
+ UploadSessionCreate,
9
+ UploadSessionStatus,
10
+ UploadSessionSummary,
11
+ )
12
+ from resumable_upload.router import router
13
+ from resumable_upload.service import ResumableUploadService
14
+ from resumable_upload.storage import S3MultipartStorage, StorageBackend
15
+
16
+ __all__ = [
17
+ "ChunkInfo",
18
+ "ChunkStatus",
19
+ "ResumableUploadConfig",
20
+ "ResumableUploadService",
21
+ "S3MultipartStorage",
22
+ "StorageBackend",
23
+ "UploadSession",
24
+ "UploadSessionCreate",
25
+ "UploadSessionStatus",
26
+ "UploadSessionSummary",
27
+ "get_config",
28
+ "init_resumable_upload",
29
+ "router",
30
+ ]
@@ -0,0 +1,31 @@
1
+ """Resumable upload configuration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class ResumableUploadConfig(BaseModel):
9
+ min_chunk_size: int = Field(
10
+ default=5 * 1024 * 1024, description="Min chunk size in bytes (5MB)"
11
+ )
12
+ max_chunk_size: int = Field(default=100 * 1024 * 1024, description="Max chunk size (100MB)")
13
+ max_file_size: int = Field(default=5 * 1024 * 1024 * 1024, description="Max file size (5GB)")
14
+ session_ttl_hours: int = Field(default=24)
15
+ max_concurrent_sessions: int = Field(default=10)
16
+
17
+
18
+ _config: ResumableUploadConfig | None = None
19
+
20
+
21
+ def init_resumable_upload(config: ResumableUploadConfig | None = None) -> ResumableUploadConfig:
22
+ global _config
23
+ _config = config or ResumableUploadConfig()
24
+ return _config
25
+
26
+
27
+ def get_config() -> ResumableUploadConfig:
28
+ global _config
29
+ if _config is None:
30
+ _config = ResumableUploadConfig()
31
+ return _config
@@ -0,0 +1,89 @@
1
+ """Resumable upload data models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import UTC, datetime
6
+
7
+
8
+ def _utcnow() -> datetime:
9
+ return datetime.now(UTC)
10
+
11
+
12
+ from enum import Enum
13
+ from uuid import uuid4
14
+
15
+ from pydantic import BaseModel, Field
16
+
17
+
18
+ class UploadSessionStatus(str, Enum):
19
+ INITIALIZED = "initialized"
20
+ IN_PROGRESS = "in_progress"
21
+ COMPLETING = "completing"
22
+ COMPLETED = "completed"
23
+ FAILED = "failed"
24
+ EXPIRED = "expired"
25
+
26
+
27
+ class ChunkStatus(str, Enum):
28
+ PENDING = "pending"
29
+ UPLOADING = "uploading"
30
+ UPLOADED = "uploaded"
31
+ FAILED = "failed"
32
+
33
+
34
+ class ChunkInfo(BaseModel):
35
+ chunk_number: int = Field(ge=1)
36
+ size: int = Field(ge=0)
37
+ offset: int = Field(ge=0)
38
+ status: ChunkStatus = ChunkStatus.PENDING
39
+ etag: str | None = None
40
+ checksum: str | None = None
41
+ uploaded_at: datetime | None = None
42
+
43
+
44
+ class UploadSessionCreate(BaseModel):
45
+ filename: str
46
+ file_size: int = Field(ge=1)
47
+ content_type: str = "application/octet-stream"
48
+ chunk_size: int | None = None
49
+ metadata: dict[str, str] = Field(default_factory=dict)
50
+ storage_path: str | None = None
51
+
52
+
53
+ class UploadSession(BaseModel):
54
+ id: str = Field(default_factory=lambda: str(uuid4()))
55
+ filename: str
56
+ file_size: int
57
+ content_type: str = "application/octet-stream"
58
+ chunk_size: int = 5 * 1024 * 1024
59
+ total_chunks: int = 0
60
+ uploaded_chunks: int = 0
61
+ status: UploadSessionStatus = UploadSessionStatus.INITIALIZED
62
+ storage_path: str = ""
63
+ multipart_upload_id: str | None = None
64
+ chunks: list[ChunkInfo] = Field(default_factory=list)
65
+ metadata: dict[str, str] = Field(default_factory=dict)
66
+ created_at: datetime = Field(default_factory=_utcnow)
67
+ updated_at: datetime = Field(default_factory=_utcnow)
68
+ expires_at: datetime | None = None
69
+
70
+ @property
71
+ def progress(self) -> float:
72
+ if self.total_chunks == 0:
73
+ return 0.0
74
+ return round(self.uploaded_chunks / self.total_chunks * 100, 2)
75
+
76
+ @property
77
+ def is_complete(self) -> bool:
78
+ return self.uploaded_chunks == self.total_chunks and self.total_chunks > 0
79
+
80
+
81
+ class UploadSessionSummary(BaseModel):
82
+ id: str
83
+ filename: str
84
+ file_size: int
85
+ status: UploadSessionStatus
86
+ progress: float
87
+ uploaded_chunks: int
88
+ total_chunks: int
89
+ created_at: datetime
@@ -0,0 +1,7 @@
1
+ """Resumable upload FastAPI router."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import APIRouter
6
+
7
+ router = APIRouter(prefix="/uploads", tags=["resumable-upload"])
@@ -0,0 +1,186 @@
1
+ """Resumable upload service."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from datetime import UTC, datetime, timedelta
7
+
8
+ from resumable_upload.config import get_config
9
+ from resumable_upload.models import (
10
+ ChunkInfo,
11
+ ChunkStatus,
12
+ UploadSession,
13
+ UploadSessionCreate,
14
+ UploadSessionStatus,
15
+ UploadSessionSummary,
16
+ )
17
+ from resumable_upload.storage import StorageBackend
18
+
19
+
20
+ class ResumableUploadService:
21
+ """Manages resumable upload sessions with chunked uploads."""
22
+
23
+ def __init__(self, storage: StorageBackend) -> None:
24
+ self._storage = storage
25
+ self._sessions: dict[str, UploadSession] = {}
26
+
27
+ async def create_session(self, params: UploadSessionCreate) -> UploadSession:
28
+ config = get_config()
29
+
30
+ if params.file_size > config.max_file_size:
31
+ from errors import ValidationError
32
+
33
+ raise ValidationError(
34
+ message=f"File size exceeds maximum ({config.max_file_size} bytes)",
35
+ field="file_size",
36
+ )
37
+
38
+ chunk_size = params.chunk_size or config.min_chunk_size
39
+ chunk_size = max(config.min_chunk_size, min(chunk_size, config.max_chunk_size))
40
+ total_chunks = math.ceil(params.file_size / chunk_size)
41
+
42
+ storage_path = params.storage_path or f"uploads/{params.filename}"
43
+ upload_id = await self._storage.initiate_multipart(storage_path, params.content_type)
44
+
45
+ chunks = [
46
+ ChunkInfo(
47
+ chunk_number=i + 1,
48
+ size=min(chunk_size, params.file_size - i * chunk_size),
49
+ offset=i * chunk_size,
50
+ )
51
+ for i in range(total_chunks)
52
+ ]
53
+
54
+ session = UploadSession(
55
+ filename=params.filename,
56
+ file_size=params.file_size,
57
+ content_type=params.content_type,
58
+ chunk_size=chunk_size,
59
+ total_chunks=total_chunks,
60
+ storage_path=storage_path,
61
+ multipart_upload_id=upload_id,
62
+ chunks=chunks,
63
+ metadata=params.metadata,
64
+ expires_at=datetime.now(UTC) + timedelta(hours=config.session_ttl_hours),
65
+ )
66
+
67
+ self._sessions[session.id] = session
68
+ return session
69
+
70
+ async def upload_chunk(self, session_id: str, chunk_number: int, data: bytes) -> ChunkInfo:
71
+ session = self._get_session(session_id)
72
+
73
+ if session.status in (UploadSessionStatus.COMPLETED, UploadSessionStatus.EXPIRED):
74
+ from errors import BusinessLogicError
75
+
76
+ raise BusinessLogicError(message=f"Session is {session.status.value}")
77
+
78
+ if chunk_number < 1 or chunk_number > session.total_chunks:
79
+ from errors import ValidationError
80
+
81
+ raise ValidationError(
82
+ message=f"Invalid chunk number {chunk_number} (1-{session.total_chunks})",
83
+ field="chunk_number",
84
+ )
85
+
86
+ chunk = session.chunks[chunk_number - 1]
87
+ chunk.status = ChunkStatus.UPLOADING
88
+ session.status = UploadSessionStatus.IN_PROGRESS
89
+
90
+ etag = await self._storage.upload_part(
91
+ session.storage_path,
92
+ session.multipart_upload_id,
93
+ chunk_number,
94
+ data,
95
+ )
96
+
97
+ chunk.etag = etag
98
+ chunk.status = ChunkStatus.UPLOADED
99
+ chunk.uploaded_at = datetime.now(UTC)
100
+ session.uploaded_chunks = sum(1 for c in session.chunks if c.status == ChunkStatus.UPLOADED)
101
+ session.updated_at = datetime.now(UTC)
102
+
103
+ return chunk
104
+
105
+ async def complete_session(self, session_id: str) -> UploadSession:
106
+ session = self._get_session(session_id)
107
+
108
+ if not session.is_complete:
109
+ from errors import BusinessLogicError
110
+
111
+ missing = session.total_chunks - session.uploaded_chunks
112
+ raise BusinessLogicError(message=f"Cannot complete: {missing} chunks still pending")
113
+
114
+ session.status = UploadSessionStatus.COMPLETING
115
+
116
+ parts = [
117
+ {"part_number": c.chunk_number, "etag": c.etag}
118
+ for c in session.chunks
119
+ if c.etag is not None
120
+ ]
121
+
122
+ await self._storage.complete_multipart(
123
+ session.storage_path, session.multipart_upload_id, parts
124
+ )
125
+
126
+ session.status = UploadSessionStatus.COMPLETED
127
+ session.updated_at = datetime.now(UTC)
128
+ return session
129
+
130
+ async def abort_session(self, session_id: str) -> None:
131
+ session = self._get_session(session_id)
132
+
133
+ if session.multipart_upload_id:
134
+ await self._storage.abort_multipart(session.storage_path, session.multipart_upload_id)
135
+
136
+ session.status = UploadSessionStatus.FAILED
137
+ session.updated_at = datetime.now(UTC)
138
+
139
+ def get_session(self, session_id: str) -> UploadSession:
140
+ return self._get_session(session_id)
141
+
142
+ def get_session_summary(self, session_id: str) -> UploadSessionSummary:
143
+ session = self._get_session(session_id)
144
+ return UploadSessionSummary(
145
+ id=session.id,
146
+ filename=session.filename,
147
+ file_size=session.file_size,
148
+ status=session.status,
149
+ progress=session.progress,
150
+ uploaded_chunks=session.uploaded_chunks,
151
+ total_chunks=session.total_chunks,
152
+ created_at=session.created_at,
153
+ )
154
+
155
+ def get_pending_chunks(self, session_id: str) -> list[ChunkInfo]:
156
+ session = self._get_session(session_id)
157
+ return [c for c in session.chunks if c.status == ChunkStatus.PENDING]
158
+
159
+ async def cleanup_expired(self) -> int:
160
+ now = datetime.now(UTC)
161
+ expired_ids = [
162
+ sid
163
+ for sid, s in self._sessions.items()
164
+ if s.expires_at
165
+ and s.expires_at < now
166
+ and s.status not in (UploadSessionStatus.COMPLETED, UploadSessionStatus.EXPIRED)
167
+ ]
168
+ for sid in expired_ids:
169
+ session = self._sessions[sid]
170
+ if session.multipart_upload_id:
171
+ try:
172
+ await self._storage.abort_multipart(
173
+ session.storage_path, session.multipart_upload_id
174
+ )
175
+ except Exception:
176
+ pass
177
+ session.status = UploadSessionStatus.EXPIRED
178
+ return len(expired_ids)
179
+
180
+ def _get_session(self, session_id: str) -> UploadSession:
181
+ session = self._sessions.get(session_id)
182
+ if session is None:
183
+ from errors import NotFoundError
184
+
185
+ raise NotFoundError(message=f"Upload session {session_id} not found")
186
+ return session
@@ -0,0 +1,107 @@
1
+ """Storage backends for resumable uploads."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ from abc import ABC, abstractmethod
7
+
8
+
9
+ class StorageBackend(ABC):
10
+ """Abstract storage backend for multipart uploads."""
11
+
12
+ @abstractmethod
13
+ async def initiate_multipart(self, key: str, content_type: str) -> str:
14
+ """Start multipart upload, return upload ID."""
15
+
16
+ @abstractmethod
17
+ async def upload_part(self, key: str, upload_id: str, part_number: int, data: bytes) -> str:
18
+ """Upload a part, return ETag."""
19
+
20
+ @abstractmethod
21
+ async def complete_multipart(
22
+ self, key: str, upload_id: str, parts: list[dict[str, str | int]]
23
+ ) -> str:
24
+ """Complete multipart upload, return final key/URL."""
25
+
26
+ @abstractmethod
27
+ async def abort_multipart(self, key: str, upload_id: str) -> None:
28
+ """Abort multipart upload."""
29
+
30
+
31
+ class S3MultipartStorage(StorageBackend):
32
+ """AWS S3 multipart upload backend."""
33
+
34
+ def __init__(self, bucket: str, region: str = "us-east-1", endpoint_url: str | None = None):
35
+ try:
36
+ import boto3
37
+ except ImportError:
38
+ raise ImportError(
39
+ "boto3 is required for S3 storage. "
40
+ "Install with: pip install msaas-resumable-upload[s3]"
41
+ )
42
+ self.bucket = bucket
43
+ self._client = boto3.client("s3", region_name=region, endpoint_url=endpoint_url)
44
+
45
+ async def initiate_multipart(self, key: str, content_type: str) -> str:
46
+ response = self._client.create_multipart_upload(
47
+ Bucket=self.bucket, Key=key, ContentType=content_type
48
+ )
49
+ return response["UploadId"]
50
+
51
+ async def upload_part(self, key: str, upload_id: str, part_number: int, data: bytes) -> str:
52
+ response = self._client.upload_part(
53
+ Bucket=self.bucket,
54
+ Key=key,
55
+ UploadId=upload_id,
56
+ PartNumber=part_number,
57
+ Body=data,
58
+ )
59
+ return response["ETag"]
60
+
61
+ async def complete_multipart(
62
+ self, key: str, upload_id: str, parts: list[dict[str, str | int]]
63
+ ) -> str:
64
+ self._client.complete_multipart_upload(
65
+ Bucket=self.bucket,
66
+ Key=key,
67
+ UploadId=upload_id,
68
+ MultipartUpload={
69
+ "Parts": [{"PartNumber": p["part_number"], "ETag": p["etag"]} for p in parts]
70
+ },
71
+ )
72
+ return f"s3://{self.bucket}/{key}"
73
+
74
+ async def abort_multipart(self, key: str, upload_id: str) -> None:
75
+ self._client.abort_multipart_upload(Bucket=self.bucket, Key=key, UploadId=upload_id)
76
+
77
+
78
+ class InMemoryStorage(StorageBackend):
79
+ """In-memory storage backend for testing."""
80
+
81
+ def __init__(self) -> None:
82
+ self._uploads: dict[str, dict[int, bytes]] = {}
83
+ self._completed: dict[str, bytes] = {}
84
+ self._counter = 0
85
+
86
+ async def initiate_multipart(self, key: str, content_type: str) -> str:
87
+ self._counter += 1
88
+ upload_id = f"mem-upload-{self._counter}"
89
+ self._uploads[upload_id] = {}
90
+ return upload_id
91
+
92
+ async def upload_part(self, key: str, upload_id: str, part_number: int, data: bytes) -> str:
93
+ self._uploads[upload_id][part_number] = data
94
+ return hashlib.md5(data).hexdigest()
95
+
96
+ async def complete_multipart(
97
+ self, key: str, upload_id: str, parts: list[dict[str, str | int]]
98
+ ) -> str:
99
+ upload_parts = self._uploads.pop(upload_id, {})
100
+ combined = b"".join(
101
+ upload_parts[p["part_number"]] for p in sorted(parts, key=lambda x: x["part_number"])
102
+ )
103
+ self._completed[key] = combined
104
+ return f"mem://{key}"
105
+
106
+ async def abort_multipart(self, key: str, upload_id: str) -> None:
107
+ self._uploads.pop(upload_id, None)
File without changes
@@ -0,0 +1,104 @@
1
+ """Tests for resumable upload service."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+ from resumable_upload.models import (
8
+ ChunkStatus,
9
+ UploadSessionCreate,
10
+ UploadSessionStatus,
11
+ )
12
+ from resumable_upload.service import ResumableUploadService
13
+ from resumable_upload.storage import InMemoryStorage
14
+
15
+
16
+ @pytest.fixture
17
+ def service() -> ResumableUploadService:
18
+ return ResumableUploadService(storage=InMemoryStorage())
19
+
20
+
21
+ @pytest.fixture
22
+ def create_params() -> UploadSessionCreate:
23
+ return UploadSessionCreate(
24
+ filename="test.pdf",
25
+ file_size=15 * 1024 * 1024,
26
+ content_type="application/pdf",
27
+ chunk_size=5 * 1024 * 1024,
28
+ )
29
+
30
+
31
+ class TestResumableUploadService:
32
+ @pytest.mark.asyncio
33
+ async def test_create_session(
34
+ self, service: ResumableUploadService, create_params: UploadSessionCreate
35
+ ) -> None:
36
+ session = await service.create_session(create_params)
37
+ assert session.status == UploadSessionStatus.INITIALIZED
38
+ assert session.total_chunks == 3
39
+ assert session.uploaded_chunks == 0
40
+ assert session.progress == 0.0
41
+ assert len(session.chunks) == 3
42
+
43
+ @pytest.mark.asyncio
44
+ async def test_upload_chunk(
45
+ self, service: ResumableUploadService, create_params: UploadSessionCreate
46
+ ) -> None:
47
+ session = await service.create_session(create_params)
48
+ chunk_data = b"x" * (5 * 1024 * 1024)
49
+ chunk = await service.upload_chunk(session.id, 1, chunk_data)
50
+ assert chunk.status == ChunkStatus.UPLOADED
51
+ assert chunk.etag is not None
52
+
53
+ updated = service.get_session(session.id)
54
+ assert updated.uploaded_chunks == 1
55
+ assert updated.status == UploadSessionStatus.IN_PROGRESS
56
+
57
+ @pytest.mark.asyncio
58
+ async def test_complete_session(
59
+ self, service: ResumableUploadService, create_params: UploadSessionCreate
60
+ ) -> None:
61
+ session = await service.create_session(create_params)
62
+ for i in range(session.total_chunks):
63
+ size = session.chunks[i].size
64
+ await service.upload_chunk(session.id, i + 1, b"x" * size)
65
+
66
+ completed = await service.complete_session(session.id)
67
+ assert completed.status == UploadSessionStatus.COMPLETED
68
+ assert completed.progress == 100.0
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_abort_session(
72
+ self, service: ResumableUploadService, create_params: UploadSessionCreate
73
+ ) -> None:
74
+ session = await service.create_session(create_params)
75
+ await service.abort_session(session.id)
76
+ aborted = service.get_session(session.id)
77
+ assert aborted.status == UploadSessionStatus.FAILED
78
+
79
+ @pytest.mark.asyncio
80
+ async def test_get_pending_chunks(
81
+ self, service: ResumableUploadService, create_params: UploadSessionCreate
82
+ ) -> None:
83
+ session = await service.create_session(create_params)
84
+ await service.upload_chunk(session.id, 1, b"x" * (5 * 1024 * 1024))
85
+ pending = service.get_pending_chunks(session.id)
86
+ assert len(pending) == 2
87
+
88
+ @pytest.mark.asyncio
89
+ async def test_session_summary(
90
+ self, service: ResumableUploadService, create_params: UploadSessionCreate
91
+ ) -> None:
92
+ session = await service.create_session(create_params)
93
+ summary = service.get_session_summary(session.id)
94
+ assert summary.filename == "test.pdf"
95
+ assert summary.progress == 0.0
96
+ assert summary.total_chunks == 3
97
+
98
+ @pytest.mark.asyncio
99
+ async def test_cannot_complete_incomplete(
100
+ self, service: ResumableUploadService, create_params: UploadSessionCreate
101
+ ) -> None:
102
+ session = await service.create_session(create_params)
103
+ with pytest.raises(Exception, match="chunks still pending"):
104
+ await service.complete_session(session.id)