hawkapi-storage 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hawkapi_storage/__init__.py +43 -0
- hawkapi_storage/_azure.py +195 -0
- hawkapi_storage/_base.py +88 -0
- hawkapi_storage/_gcs.py +154 -0
- hawkapi_storage/_local.py +185 -0
- hawkapi_storage/_plugin.py +49 -0
- hawkapi_storage/_s3.py +180 -0
- hawkapi_storage/py.typed +0 -0
- hawkapi_storage-0.1.0.dist-info/METADATA +167 -0
- hawkapi_storage-0.1.0.dist-info/RECORD +12 -0
- hawkapi_storage-0.1.0.dist-info/WHEEL +4 -0
- hawkapi_storage-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""hawkapi-storage — pluggable file storage for HawkAPI.
|
|
2
|
+
|
|
3
|
+
Backends: local filesystem, AWS S3 (extras ``[s3]``), Google Cloud Storage
|
|
4
|
+
(extras ``[gcs]``), Azure Blob Storage (extras ``[azure]``). Single
|
|
5
|
+
:class:`Storage` protocol — swap backends freely.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from ._azure import AzureConfig, AzureStorage
|
|
11
|
+
from ._base import (
|
|
12
|
+
NotFoundError,
|
|
13
|
+
Storage,
|
|
14
|
+
StorageError,
|
|
15
|
+
StoredObject,
|
|
16
|
+
guess_content_type,
|
|
17
|
+
)
|
|
18
|
+
from ._gcs import GCSConfig, GCSStorage
|
|
19
|
+
from ._local import LocalConfig, LocalStorage
|
|
20
|
+
from ._plugin import get_storage, init_storage, resolve_storage
|
|
21
|
+
from ._s3 import S3Config, S3Storage
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"AzureConfig",
|
|
27
|
+
"AzureStorage",
|
|
28
|
+
"GCSConfig",
|
|
29
|
+
"GCSStorage",
|
|
30
|
+
"LocalConfig",
|
|
31
|
+
"LocalStorage",
|
|
32
|
+
"NotFoundError",
|
|
33
|
+
"S3Config",
|
|
34
|
+
"S3Storage",
|
|
35
|
+
"Storage",
|
|
36
|
+
"StorageError",
|
|
37
|
+
"StoredObject",
|
|
38
|
+
"__version__",
|
|
39
|
+
"get_storage",
|
|
40
|
+
"guess_content_type",
|
|
41
|
+
"init_storage",
|
|
42
|
+
"resolve_storage",
|
|
43
|
+
]
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""Azure Blob Storage backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections.abc import AsyncIterator
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import UTC, datetime, timedelta
|
|
9
|
+
from typing import Any, BinaryIO
|
|
10
|
+
|
|
11
|
+
from ._base import NotFoundError, StorageError, StoredObject, guess_content_type, to_bytes
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class AzureConfig:
|
|
16
|
+
container: str
|
|
17
|
+
connection_string: str = ""
|
|
18
|
+
account_url: str = ""
|
|
19
|
+
"""e.g. https://myaccount.blob.core.windows.net — used with ``credential``."""
|
|
20
|
+
|
|
21
|
+
account_name: str = ""
|
|
22
|
+
account_key: str = ""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class AzureStorage:
|
|
27
|
+
config: AzureConfig
|
|
28
|
+
name: str = "azure"
|
|
29
|
+
_service: Any = field(default=None, init=False)
|
|
30
|
+
_container: Any = field(default=None, init=False)
|
|
31
|
+
|
|
32
|
+
def _get_container(self) -> Any:
|
|
33
|
+
if self._container is not None:
|
|
34
|
+
return self._container
|
|
35
|
+
try:
|
|
36
|
+
from azure.storage.blob import BlobServiceClient # type: ignore[import-not-found]
|
|
37
|
+
except ImportError as exc: # pragma: no cover
|
|
38
|
+
raise StorageError(
|
|
39
|
+
"azure-storage-blob not installed; pip install 'hawkapi-storage[azure]'"
|
|
40
|
+
) from exc
|
|
41
|
+
if self.config.connection_string:
|
|
42
|
+
self._service = BlobServiceClient.from_connection_string(self.config.connection_string)
|
|
43
|
+
elif self.config.account_url and self.config.account_key:
|
|
44
|
+
self._service = BlobServiceClient(
|
|
45
|
+
account_url=self.config.account_url, credential=self.config.account_key
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
raise StorageError("AzureConfig requires connection_string or account_url+account_key")
|
|
49
|
+
self._container = self._service.get_container_client(self.config.container)
|
|
50
|
+
return self._container
|
|
51
|
+
|
|
52
|
+
async def put(
|
|
53
|
+
self,
|
|
54
|
+
key: str,
|
|
55
|
+
data: bytes | BinaryIO | AsyncIterator[bytes],
|
|
56
|
+
*,
|
|
57
|
+
content_type: str | None = None,
|
|
58
|
+
metadata: dict[str, str] | None = None,
|
|
59
|
+
) -> StoredObject:
|
|
60
|
+
if not isinstance(data, (bytes, bytearray)) and not hasattr(data, "read"):
|
|
61
|
+
chunks: list[bytes] = []
|
|
62
|
+
async for chunk in data:
|
|
63
|
+
chunks.append(chunk)
|
|
64
|
+
data = b"".join(chunks)
|
|
65
|
+
body = to_bytes(data) # type: ignore[arg-type]
|
|
66
|
+
container = self._get_container()
|
|
67
|
+
|
|
68
|
+
def _upload() -> None:
|
|
69
|
+
from azure.storage.blob import ContentSettings # type: ignore[import-not-found]
|
|
70
|
+
|
|
71
|
+
blob = container.get_blob_client(key)
|
|
72
|
+
blob.upload_blob(
|
|
73
|
+
body,
|
|
74
|
+
overwrite=True,
|
|
75
|
+
content_settings=ContentSettings(
|
|
76
|
+
content_type=content_type or guess_content_type(key)
|
|
77
|
+
),
|
|
78
|
+
metadata={k: str(v) for k, v in (metadata or {}).items()} or None,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
await asyncio.to_thread(_upload)
|
|
82
|
+
return StoredObject(
|
|
83
|
+
key=key,
|
|
84
|
+
size=len(body),
|
|
85
|
+
content_type=content_type or guess_content_type(key),
|
|
86
|
+
metadata=dict(metadata or {}),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
async def get(self, key: str) -> bytes:
|
|
90
|
+
container = self._get_container()
|
|
91
|
+
|
|
92
|
+
def _download() -> bytes:
|
|
93
|
+
blob = container.get_blob_client(key)
|
|
94
|
+
try:
|
|
95
|
+
stream = blob.download_blob()
|
|
96
|
+
except Exception as exc:
|
|
97
|
+
raise NotFoundError(key) from exc
|
|
98
|
+
return stream.readall()
|
|
99
|
+
|
|
100
|
+
return await asyncio.to_thread(_download)
|
|
101
|
+
|
|
102
|
+
async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
|
|
103
|
+
data = await self.get(key)
|
|
104
|
+
for i in range(0, len(data), chunk_size):
|
|
105
|
+
yield data[i : i + chunk_size]
|
|
106
|
+
|
|
107
|
+
async def exists(self, key: str) -> bool:
|
|
108
|
+
container = self._get_container()
|
|
109
|
+
return await asyncio.to_thread(container.get_blob_client(key).exists)
|
|
110
|
+
|
|
111
|
+
async def delete(self, key: str) -> None:
|
|
112
|
+
container = self._get_container()
|
|
113
|
+
try:
|
|
114
|
+
await asyncio.to_thread(container.delete_blob, key)
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
async def head(self, key: str) -> StoredObject:
|
|
119
|
+
container = self._get_container()
|
|
120
|
+
|
|
121
|
+
def _props() -> StoredObject:
|
|
122
|
+
blob = container.get_blob_client(key)
|
|
123
|
+
try:
|
|
124
|
+
props = blob.get_blob_properties()
|
|
125
|
+
except Exception as exc:
|
|
126
|
+
raise NotFoundError(key) from exc
|
|
127
|
+
return StoredObject(
|
|
128
|
+
key=key,
|
|
129
|
+
size=props.size or 0,
|
|
130
|
+
content_type=(
|
|
131
|
+
props.content_settings.content_type if props.content_settings else None
|
|
132
|
+
)
|
|
133
|
+
or guess_content_type(key),
|
|
134
|
+
last_modified=props.last_modified,
|
|
135
|
+
etag=str(props.etag or "").strip('"'),
|
|
136
|
+
metadata=dict(props.metadata or {}),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return await asyncio.to_thread(_props)
|
|
140
|
+
|
|
141
|
+
async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
|
|
142
|
+
container = self._get_container()
|
|
143
|
+
|
|
144
|
+
def _list() -> list[Any]:
|
|
145
|
+
return list(
|
|
146
|
+
container.list_blobs(name_starts_with=prefix or None, results_per_page=limit)
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
emitted = 0
|
|
150
|
+
for item in await asyncio.to_thread(_list):
|
|
151
|
+
yield StoredObject(
|
|
152
|
+
key=item.name,
|
|
153
|
+
size=item.size or 0,
|
|
154
|
+
content_type=(item.content_settings.content_type if item.content_settings else None)
|
|
155
|
+
or guess_content_type(item.name),
|
|
156
|
+
last_modified=item.last_modified,
|
|
157
|
+
)
|
|
158
|
+
emitted += 1
|
|
159
|
+
if emitted >= limit:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
async def signed_url(
|
|
163
|
+
self,
|
|
164
|
+
key: str,
|
|
165
|
+
*,
|
|
166
|
+
expires_in: int = 3600,
|
|
167
|
+
method: str = "GET",
|
|
168
|
+
content_type: str | None = None,
|
|
169
|
+
) -> str:
|
|
170
|
+
_ = content_type
|
|
171
|
+
try:
|
|
172
|
+
from azure.storage.blob import ( # type: ignore[import-not-found]
|
|
173
|
+
BlobSasPermissions,
|
|
174
|
+
generate_blob_sas,
|
|
175
|
+
)
|
|
176
|
+
except ImportError as exc: # pragma: no cover
|
|
177
|
+
raise StorageError("azure-storage-blob not installed") from exc
|
|
178
|
+
container = self._get_container()
|
|
179
|
+
account_name = container.account_name
|
|
180
|
+
permissions = BlobSasPermissions(
|
|
181
|
+
read=method.upper() == "GET", write=method.upper() == "PUT"
|
|
182
|
+
)
|
|
183
|
+
token = await asyncio.to_thread(
|
|
184
|
+
generate_blob_sas,
|
|
185
|
+
account_name=account_name,
|
|
186
|
+
container_name=self.config.container,
|
|
187
|
+
blob_name=key,
|
|
188
|
+
account_key=self.config.account_key or None,
|
|
189
|
+
permission=permissions,
|
|
190
|
+
expiry=datetime.now(UTC) + timedelta(seconds=expires_in),
|
|
191
|
+
)
|
|
192
|
+
return f"{container.url}/{key}?{token}"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
__all__ = ["AzureConfig", "AzureStorage"]
|
hawkapi_storage/_base.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Storage backend abstraction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import mimetypes
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import BinaryIO, Protocol
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(slots=True)
|
|
14
|
+
class StoredObject:
|
|
15
|
+
key: str
|
|
16
|
+
size: int = 0
|
|
17
|
+
content_type: str = "application/octet-stream"
|
|
18
|
+
last_modified: datetime | None = None
|
|
19
|
+
etag: str = ""
|
|
20
|
+
metadata: dict[str, str] = field(default_factory=dict)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class StorageError(Exception):
|
|
24
|
+
"""Raised by every backend when a primitive fails."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class NotFoundError(StorageError):
|
|
28
|
+
"""Raised when a key does not exist."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Storage(Protocol):
|
|
32
|
+
"""The minimal contract every backend implements."""
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
|
|
36
|
+
async def put(
|
|
37
|
+
self,
|
|
38
|
+
key: str,
|
|
39
|
+
data: bytes | BinaryIO | AsyncIterator[bytes],
|
|
40
|
+
*,
|
|
41
|
+
content_type: str | None = None,
|
|
42
|
+
metadata: dict[str, str] | None = None,
|
|
43
|
+
) -> StoredObject: ...
|
|
44
|
+
|
|
45
|
+
async def get(self, key: str) -> bytes: ...
|
|
46
|
+
|
|
47
|
+
async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]: ...
|
|
48
|
+
|
|
49
|
+
async def exists(self, key: str) -> bool: ...
|
|
50
|
+
|
|
51
|
+
async def delete(self, key: str) -> None: ...
|
|
52
|
+
|
|
53
|
+
async def head(self, key: str) -> StoredObject: ...
|
|
54
|
+
|
|
55
|
+
async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]: ...
|
|
56
|
+
|
|
57
|
+
async def signed_url(
|
|
58
|
+
self,
|
|
59
|
+
key: str,
|
|
60
|
+
*,
|
|
61
|
+
expires_in: int = 3600,
|
|
62
|
+
method: str = "GET",
|
|
63
|
+
content_type: str | None = None,
|
|
64
|
+
) -> str: ...
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def guess_content_type(key: str) -> str:
|
|
68
|
+
"""Best-effort MIME guess from the key/filename."""
|
|
69
|
+
return mimetypes.guess_type(key)[0] or "application/octet-stream"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def to_bytes(data: bytes | BinaryIO) -> bytes:
|
|
73
|
+
"""Read a bytes/file-like into memory. Used by backends that need a single buffer."""
|
|
74
|
+
if isinstance(data, bytes):
|
|
75
|
+
return data
|
|
76
|
+
if isinstance(data, io.IOBase) or hasattr(data, "read"):
|
|
77
|
+
return data.read()
|
|
78
|
+
raise TypeError(f"unsupported data type: {type(data).__name__}")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
__all__ = [
|
|
82
|
+
"NotFoundError",
|
|
83
|
+
"Storage",
|
|
84
|
+
"StorageError",
|
|
85
|
+
"StoredObject",
|
|
86
|
+
"guess_content_type",
|
|
87
|
+
"to_bytes",
|
|
88
|
+
]
|
hawkapi_storage/_gcs.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""Google Cloud Storage backend (sync API offloaded to a thread)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections.abc import AsyncIterator
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import timedelta
|
|
9
|
+
from typing import Any, BinaryIO
|
|
10
|
+
|
|
11
|
+
from ._base import NotFoundError, StorageError, StoredObject, guess_content_type, to_bytes
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class GCSConfig:
|
|
16
|
+
bucket: str
|
|
17
|
+
project: str = ""
|
|
18
|
+
credentials_path: str = ""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class GCSStorage:
|
|
23
|
+
config: GCSConfig
|
|
24
|
+
name: str = "gcs"
|
|
25
|
+
_client: Any = field(default=None, init=False)
|
|
26
|
+
_bucket: Any = field(default=None, init=False)
|
|
27
|
+
|
|
28
|
+
def _get_bucket(self) -> Any:
|
|
29
|
+
if self._bucket is not None:
|
|
30
|
+
return self._bucket
|
|
31
|
+
try:
|
|
32
|
+
from google.cloud import storage # type: ignore[import-not-found]
|
|
33
|
+
except ImportError as exc: # pragma: no cover
|
|
34
|
+
raise StorageError(
|
|
35
|
+
"google-cloud-storage not installed; pip install 'hawkapi-storage[gcs]'"
|
|
36
|
+
) from exc
|
|
37
|
+
if self.config.credentials_path:
|
|
38
|
+
self._client = storage.Client.from_service_account_json(self.config.credentials_path)
|
|
39
|
+
else:
|
|
40
|
+
self._client = storage.Client(project=self.config.project or None)
|
|
41
|
+
self._bucket = self._client.bucket(self.config.bucket)
|
|
42
|
+
return self._bucket
|
|
43
|
+
|
|
44
|
+
async def put(
|
|
45
|
+
self,
|
|
46
|
+
key: str,
|
|
47
|
+
data: bytes | BinaryIO | AsyncIterator[bytes],
|
|
48
|
+
*,
|
|
49
|
+
content_type: str | None = None,
|
|
50
|
+
metadata: dict[str, str] | None = None,
|
|
51
|
+
) -> StoredObject:
|
|
52
|
+
if not isinstance(data, (bytes, bytearray)) and not hasattr(data, "read"):
|
|
53
|
+
chunks: list[bytes] = []
|
|
54
|
+
async for chunk in data:
|
|
55
|
+
chunks.append(chunk)
|
|
56
|
+
data = b"".join(chunks)
|
|
57
|
+
body = to_bytes(data) # type: ignore[arg-type]
|
|
58
|
+
bucket = self._get_bucket()
|
|
59
|
+
blob = bucket.blob(key)
|
|
60
|
+
if metadata:
|
|
61
|
+
blob.metadata = {k: str(v) for k, v in metadata.items()}
|
|
62
|
+
|
|
63
|
+
def _upload() -> None:
|
|
64
|
+
blob.upload_from_string(body, content_type=content_type or guess_content_type(key))
|
|
65
|
+
|
|
66
|
+
await asyncio.to_thread(_upload)
|
|
67
|
+
return StoredObject(
|
|
68
|
+
key=key,
|
|
69
|
+
size=len(body),
|
|
70
|
+
content_type=content_type or guess_content_type(key),
|
|
71
|
+
metadata=dict(metadata or {}),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
async def get(self, key: str) -> bytes:
|
|
75
|
+
bucket = self._get_bucket()
|
|
76
|
+
blob = bucket.blob(key)
|
|
77
|
+
|
|
78
|
+
def _download() -> bytes:
|
|
79
|
+
if not blob.exists():
|
|
80
|
+
raise NotFoundError(key)
|
|
81
|
+
return blob.download_as_bytes()
|
|
82
|
+
|
|
83
|
+
return await asyncio.to_thread(_download)
|
|
84
|
+
|
|
85
|
+
async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
|
|
86
|
+
# The GCS SDK does not expose a true streaming reader for blobs; we
|
|
87
|
+
# download once and re-yield in chunks, which is fine for the typical
|
|
88
|
+
# API-served-file size and keeps memory bounded for callers.
|
|
89
|
+
data = await self.get(key)
|
|
90
|
+
for i in range(0, len(data), chunk_size):
|
|
91
|
+
yield data[i : i + chunk_size]
|
|
92
|
+
|
|
93
|
+
async def exists(self, key: str) -> bool:
|
|
94
|
+
bucket = self._get_bucket()
|
|
95
|
+
return await asyncio.to_thread(bucket.blob(key).exists)
|
|
96
|
+
|
|
97
|
+
async def delete(self, key: str) -> None:
|
|
98
|
+
bucket = self._get_bucket()
|
|
99
|
+
await asyncio.to_thread(bucket.blob(key).delete)
|
|
100
|
+
|
|
101
|
+
async def head(self, key: str) -> StoredObject:
|
|
102
|
+
bucket = self._get_bucket()
|
|
103
|
+
blob = bucket.blob(key)
|
|
104
|
+
|
|
105
|
+
def _reload() -> StoredObject:
|
|
106
|
+
if not blob.exists():
|
|
107
|
+
raise NotFoundError(key)
|
|
108
|
+
blob.reload()
|
|
109
|
+
return StoredObject(
|
|
110
|
+
key=key,
|
|
111
|
+
size=blob.size or 0,
|
|
112
|
+
content_type=blob.content_type or guess_content_type(key),
|
|
113
|
+
last_modified=blob.updated,
|
|
114
|
+
etag=str(blob.etag or "").strip('"'),
|
|
115
|
+
metadata=dict(blob.metadata or {}),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
return await asyncio.to_thread(_reload)
|
|
119
|
+
|
|
120
|
+
async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
|
|
121
|
+
bucket = self._get_bucket()
|
|
122
|
+
|
|
123
|
+
def _enumerate() -> list[Any]:
|
|
124
|
+
return list(bucket.list_blobs(prefix=prefix or None, max_results=limit))
|
|
125
|
+
|
|
126
|
+
for blob in await asyncio.to_thread(_enumerate):
|
|
127
|
+
yield StoredObject(
|
|
128
|
+
key=blob.name,
|
|
129
|
+
size=blob.size or 0,
|
|
130
|
+
content_type=blob.content_type or guess_content_type(blob.name),
|
|
131
|
+
last_modified=blob.updated,
|
|
132
|
+
etag=str(blob.etag or "").strip('"'),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
async def signed_url(
|
|
136
|
+
self,
|
|
137
|
+
key: str,
|
|
138
|
+
*,
|
|
139
|
+
expires_in: int = 3600,
|
|
140
|
+
method: str = "GET",
|
|
141
|
+
content_type: str | None = None,
|
|
142
|
+
) -> str:
|
|
143
|
+
bucket = self._get_bucket()
|
|
144
|
+
blob = bucket.blob(key)
|
|
145
|
+
return await asyncio.to_thread(
|
|
146
|
+
blob.generate_signed_url,
|
|
147
|
+
expiration=timedelta(seconds=expires_in),
|
|
148
|
+
method=method.upper(),
|
|
149
|
+
content_type=content_type,
|
|
150
|
+
version="v4",
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
__all__ = ["GCSConfig", "GCSStorage"]
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Local filesystem backend — useful for dev + tests."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import base64
|
|
7
|
+
import hashlib
|
|
8
|
+
import hmac
|
|
9
|
+
import os
|
|
10
|
+
import shutil
|
|
11
|
+
import time
|
|
12
|
+
from collections.abc import AsyncIterator
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import UTC, datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import BinaryIO
|
|
17
|
+
from urllib.parse import quote, urlencode
|
|
18
|
+
|
|
19
|
+
from ._base import (
|
|
20
|
+
NotFoundError,
|
|
21
|
+
StorageError,
|
|
22
|
+
StoredObject,
|
|
23
|
+
guess_content_type,
|
|
24
|
+
to_bytes,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(slots=True)
|
|
29
|
+
class LocalConfig:
|
|
30
|
+
root: str
|
|
31
|
+
"""Filesystem directory that holds the objects."""
|
|
32
|
+
|
|
33
|
+
base_url: str = ""
|
|
34
|
+
"""Public base URL prefix used by :meth:`signed_url`. Leave empty to return a
|
|
35
|
+
``file://`` URL (only meaningful for tests)."""
|
|
36
|
+
|
|
37
|
+
signing_secret: str = ""
|
|
38
|
+
"""HMAC secret for short-lived download URLs. Generated lazily if unset."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class LocalStorage:
|
|
43
|
+
config: LocalConfig
|
|
44
|
+
name: str = "local"
|
|
45
|
+
_secret: str = field(default="", init=False)
|
|
46
|
+
|
|
47
|
+
def __post_init__(self) -> None:
|
|
48
|
+
os.makedirs(self.config.root, exist_ok=True)
|
|
49
|
+
self._secret = (
|
|
50
|
+
self.config.signing_secret or base64.urlsafe_b64encode(os.urandom(32)).decode()
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def _path(self, key: str) -> Path:
|
|
54
|
+
safe = key.lstrip("/")
|
|
55
|
+
if ".." in Path(safe).parts:
|
|
56
|
+
raise StorageError("invalid key (path traversal)")
|
|
57
|
+
return Path(self.config.root, safe)
|
|
58
|
+
|
|
59
|
+
async def put(
|
|
60
|
+
self,
|
|
61
|
+
key: str,
|
|
62
|
+
data: bytes | BinaryIO | AsyncIterator[bytes],
|
|
63
|
+
*,
|
|
64
|
+
content_type: str | None = None,
|
|
65
|
+
metadata: dict[str, str] | None = None,
|
|
66
|
+
) -> StoredObject:
|
|
67
|
+
path = self._path(key)
|
|
68
|
+
await asyncio.to_thread(path.parent.mkdir, parents=True, exist_ok=True)
|
|
69
|
+
if isinstance(data, (bytes, bytearray)) or hasattr(data, "read"):
|
|
70
|
+
buf = to_bytes(data) # type: ignore[arg-type]
|
|
71
|
+
await asyncio.to_thread(path.write_bytes, buf)
|
|
72
|
+
else:
|
|
73
|
+
chunks: list[bytes] = []
|
|
74
|
+
async for chunk in data:
|
|
75
|
+
chunks.append(chunk)
|
|
76
|
+
await asyncio.to_thread(path.write_bytes, b"".join(chunks))
|
|
77
|
+
stat = await asyncio.to_thread(path.stat)
|
|
78
|
+
return StoredObject(
|
|
79
|
+
key=key,
|
|
80
|
+
size=stat.st_size,
|
|
81
|
+
content_type=content_type or guess_content_type(key),
|
|
82
|
+
last_modified=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
|
|
83
|
+
metadata=dict(metadata or {}),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
async def get(self, key: str) -> bytes:
|
|
87
|
+
path = self._path(key)
|
|
88
|
+
if not path.exists():
|
|
89
|
+
raise NotFoundError(key)
|
|
90
|
+
return await asyncio.to_thread(path.read_bytes)
|
|
91
|
+
|
|
92
|
+
async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
|
|
93
|
+
path = self._path(key)
|
|
94
|
+
if not path.exists():
|
|
95
|
+
raise NotFoundError(key)
|
|
96
|
+
|
|
97
|
+
def _chunks() -> list[bytes]:
|
|
98
|
+
out: list[bytes] = []
|
|
99
|
+
with path.open("rb") as fh:
|
|
100
|
+
while True:
|
|
101
|
+
chunk = fh.read(chunk_size)
|
|
102
|
+
if not chunk:
|
|
103
|
+
break
|
|
104
|
+
out.append(chunk)
|
|
105
|
+
return out
|
|
106
|
+
|
|
107
|
+
for chunk in await asyncio.to_thread(_chunks):
|
|
108
|
+
yield chunk
|
|
109
|
+
|
|
110
|
+
async def exists(self, key: str) -> bool:
|
|
111
|
+
return self._path(key).exists()
|
|
112
|
+
|
|
113
|
+
async def delete(self, key: str) -> None:
|
|
114
|
+
path = self._path(key)
|
|
115
|
+
if path.is_dir():
|
|
116
|
+
await asyncio.to_thread(shutil.rmtree, str(path))
|
|
117
|
+
elif path.exists():
|
|
118
|
+
await asyncio.to_thread(path.unlink)
|
|
119
|
+
|
|
120
|
+
async def head(self, key: str) -> StoredObject:
|
|
121
|
+
path = self._path(key)
|
|
122
|
+
if not path.exists():
|
|
123
|
+
raise NotFoundError(key)
|
|
124
|
+
stat = await asyncio.to_thread(path.stat)
|
|
125
|
+
return StoredObject(
|
|
126
|
+
key=key,
|
|
127
|
+
size=stat.st_size,
|
|
128
|
+
content_type=guess_content_type(key),
|
|
129
|
+
last_modified=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
|
|
133
|
+
root = Path(self.config.root)
|
|
134
|
+
target = root / prefix.lstrip("/") if prefix else root
|
|
135
|
+
if not root.exists():
|
|
136
|
+
return
|
|
137
|
+
count = 0
|
|
138
|
+
for path in sorted(root.rglob("*")):
|
|
139
|
+
if not path.is_file():
|
|
140
|
+
continue
|
|
141
|
+
rel = path.relative_to(root).as_posix()
|
|
142
|
+
if prefix and not rel.startswith(prefix.lstrip("/")):
|
|
143
|
+
continue
|
|
144
|
+
if not str(path).startswith(str(target)) and prefix:
|
|
145
|
+
continue
|
|
146
|
+
stat = await asyncio.to_thread(path.stat)
|
|
147
|
+
yield StoredObject(
|
|
148
|
+
key=rel,
|
|
149
|
+
size=stat.st_size,
|
|
150
|
+
content_type=guess_content_type(rel),
|
|
151
|
+
last_modified=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
|
|
152
|
+
)
|
|
153
|
+
count += 1
|
|
154
|
+
if count >= limit:
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
async def signed_url(
|
|
158
|
+
self,
|
|
159
|
+
key: str,
|
|
160
|
+
*,
|
|
161
|
+
expires_in: int = 3600,
|
|
162
|
+
method: str = "GET",
|
|
163
|
+
content_type: str | None = None,
|
|
164
|
+
) -> str:
|
|
165
|
+
_ = content_type
|
|
166
|
+
if method.upper() not in {"GET", "PUT"}:
|
|
167
|
+
raise StorageError("LocalStorage only signs GET / PUT")
|
|
168
|
+
expires = int(time.time()) + max(1, expires_in)
|
|
169
|
+
msg = f"{method.upper()}:{key}:{expires}".encode()
|
|
170
|
+
sig = hmac.new(self._secret.encode(), msg, hashlib.sha256).hexdigest()
|
|
171
|
+
query = urlencode({"expires": expires, "sig": sig, "method": method.upper()})
|
|
172
|
+
if self.config.base_url:
|
|
173
|
+
base = self.config.base_url.rstrip("/")
|
|
174
|
+
return f"{base}/{quote(key)}?{query}"
|
|
175
|
+
return f"file://{self._path(key)}?{query}"
|
|
176
|
+
|
|
177
|
+
def verify_signed_url(self, key: str, expires: int, sig: str, *, method: str = "GET") -> bool:
|
|
178
|
+
if expires < int(time.time()):
|
|
179
|
+
return False
|
|
180
|
+
msg = f"{method.upper()}:{key}:{expires}".encode()
|
|
181
|
+
expected = hmac.new(self._secret.encode(), msg, hashlib.sha256).hexdigest()
|
|
182
|
+
return hmac.compare_digest(expected, sig)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
__all__ = ["LocalConfig", "LocalStorage"]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Plugin entry point + DI helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from hawkapi import HTTPException, Request
|
|
8
|
+
|
|
9
|
+
from ._base import Storage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _StateNamespace:
|
|
13
|
+
storage: Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_ACTIVE: dict[int, Storage] = {}
|
|
17
|
+
_LAST: list[Storage | None] = [None]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def init_storage(app: Any, *, storage: Storage) -> Storage:
|
|
21
|
+
"""Attach a :class:`Storage` to ``app.state.storage`` and register it for DI lookup."""
|
|
22
|
+
if getattr(app, "state", None) is None:
|
|
23
|
+
app.state = _StateNamespace()
|
|
24
|
+
app.state.storage = storage
|
|
25
|
+
_ACTIVE[id(app)] = storage
|
|
26
|
+
_LAST[0] = storage
|
|
27
|
+
return storage
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def resolve_storage(app: Any) -> Storage | None:
|
|
31
|
+
if app is None:
|
|
32
|
+
return _LAST[0]
|
|
33
|
+
found = _ACTIVE.get(id(app))
|
|
34
|
+
if found is not None:
|
|
35
|
+
return found
|
|
36
|
+
state = getattr(app, "state", None)
|
|
37
|
+
if state is not None and hasattr(state, "storage"):
|
|
38
|
+
return state.storage # type: ignore[no-any-return]
|
|
39
|
+
return _LAST[0]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_storage(request: Request) -> Storage:
|
|
43
|
+
found = resolve_storage(request.scope.get("app"))
|
|
44
|
+
if found is None:
|
|
45
|
+
raise HTTPException(500, detail="Storage not configured — call init_storage(app, ...)")
|
|
46
|
+
return found
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
__all__ = ["get_storage", "init_storage", "resolve_storage"]
|
hawkapi_storage/_s3.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""AWS S3 backend (boto3, sync API offloaded to a thread)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections.abc import AsyncIterator
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any, BinaryIO
|
|
9
|
+
|
|
10
|
+
from ._base import NotFoundError, StorageError, StoredObject, guess_content_type, to_bytes
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(slots=True)
|
|
14
|
+
class S3Config:
|
|
15
|
+
bucket: str
|
|
16
|
+
region: str = "us-east-1"
|
|
17
|
+
aws_access_key_id: str = ""
|
|
18
|
+
aws_secret_access_key: str = ""
|
|
19
|
+
endpoint_url: str = ""
|
|
20
|
+
"""Set to a non-AWS S3-compatible endpoint (MinIO, Wasabi, …)."""
|
|
21
|
+
|
|
22
|
+
use_path_style: bool = False
|
|
23
|
+
"""Path-style addressing for MinIO and friends."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class S3Storage:
|
|
28
|
+
config: S3Config
|
|
29
|
+
name: str = "s3"
|
|
30
|
+
_client: Any = field(default=None, init=False)
|
|
31
|
+
|
|
32
|
+
def _get_client(self) -> Any:
|
|
33
|
+
if self._client is not None:
|
|
34
|
+
return self._client
|
|
35
|
+
try:
|
|
36
|
+
import boto3
|
|
37
|
+
from botocore.config import Config
|
|
38
|
+
except ImportError as exc: # pragma: no cover
|
|
39
|
+
raise StorageError("boto3 not installed; pip install 'hawkapi-storage[s3]'") from exc
|
|
40
|
+
kwargs: dict[str, Any] = {"region_name": self.config.region}
|
|
41
|
+
if self.config.aws_access_key_id:
|
|
42
|
+
kwargs["aws_access_key_id"] = self.config.aws_access_key_id
|
|
43
|
+
kwargs["aws_secret_access_key"] = self.config.aws_secret_access_key
|
|
44
|
+
if self.config.endpoint_url:
|
|
45
|
+
kwargs["endpoint_url"] = self.config.endpoint_url
|
|
46
|
+
if self.config.use_path_style:
|
|
47
|
+
kwargs["config"] = Config(s3={"addressing_style": "path"})
|
|
48
|
+
self._client = boto3.client("s3", **kwargs)
|
|
49
|
+
return self._client
|
|
50
|
+
|
|
51
|
+
async def put(
|
|
52
|
+
self,
|
|
53
|
+
key: str,
|
|
54
|
+
data: bytes | BinaryIO | AsyncIterator[bytes],
|
|
55
|
+
*,
|
|
56
|
+
content_type: str | None = None,
|
|
57
|
+
metadata: dict[str, str] | None = None,
|
|
58
|
+
) -> StoredObject:
|
|
59
|
+
if not isinstance(data, (bytes, bytearray)) and not hasattr(data, "read"):
|
|
60
|
+
chunks: list[bytes] = []
|
|
61
|
+
async for chunk in data:
|
|
62
|
+
chunks.append(chunk)
|
|
63
|
+
data = b"".join(chunks)
|
|
64
|
+
body = to_bytes(data) # type: ignore[arg-type]
|
|
65
|
+
client = self._get_client()
|
|
66
|
+
kwargs: dict[str, Any] = {
|
|
67
|
+
"Bucket": self.config.bucket,
|
|
68
|
+
"Key": key,
|
|
69
|
+
"Body": body,
|
|
70
|
+
"ContentType": content_type or guess_content_type(key),
|
|
71
|
+
}
|
|
72
|
+
if metadata:
|
|
73
|
+
kwargs["Metadata"] = {k: str(v) for k, v in metadata.items()}
|
|
74
|
+
await asyncio.to_thread(client.put_object, **kwargs)
|
|
75
|
+
return StoredObject(
|
|
76
|
+
key=key,
|
|
77
|
+
size=len(body),
|
|
78
|
+
content_type=kwargs["ContentType"],
|
|
79
|
+
metadata=dict(metadata or {}),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
async def get(self, key: str) -> bytes:
|
|
83
|
+
client = self._get_client()
|
|
84
|
+
try:
|
|
85
|
+
obj = await asyncio.to_thread(client.get_object, Bucket=self.config.bucket, Key=key)
|
|
86
|
+
except Exception as exc:
|
|
87
|
+
raise NotFoundError(key) from exc
|
|
88
|
+
return await asyncio.to_thread(obj["Body"].read)
|
|
89
|
+
|
|
90
|
+
async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
|
|
91
|
+
client = self._get_client()
|
|
92
|
+
try:
|
|
93
|
+
obj = await asyncio.to_thread(client.get_object, Bucket=self.config.bucket, Key=key)
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
raise NotFoundError(key) from exc
|
|
96
|
+
stream = obj["Body"]
|
|
97
|
+
while True:
|
|
98
|
+
chunk = await asyncio.to_thread(stream.read, chunk_size)
|
|
99
|
+
if not chunk:
|
|
100
|
+
break
|
|
101
|
+
yield chunk
|
|
102
|
+
|
|
103
|
+
async def exists(self, key: str) -> bool:
|
|
104
|
+
client = self._get_client()
|
|
105
|
+
try:
|
|
106
|
+
await asyncio.to_thread(client.head_object, Bucket=self.config.bucket, Key=key)
|
|
107
|
+
return True
|
|
108
|
+
except Exception:
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
async def delete(self, key: str) -> None:
|
|
112
|
+
client = self._get_client()
|
|
113
|
+
await asyncio.to_thread(client.delete_object, Bucket=self.config.bucket, Key=key)
|
|
114
|
+
|
|
115
|
+
async def head(self, key: str) -> StoredObject:
|
|
116
|
+
client = self._get_client()
|
|
117
|
+
try:
|
|
118
|
+
meta = await asyncio.to_thread(client.head_object, Bucket=self.config.bucket, Key=key)
|
|
119
|
+
except Exception as exc:
|
|
120
|
+
raise NotFoundError(key) from exc
|
|
121
|
+
return StoredObject(
|
|
122
|
+
key=key,
|
|
123
|
+
size=meta.get("ContentLength", 0),
|
|
124
|
+
content_type=meta.get("ContentType", guess_content_type(key)),
|
|
125
|
+
last_modified=meta.get("LastModified"),
|
|
126
|
+
etag=str(meta.get("ETag", "")).strip('"'),
|
|
127
|
+
metadata=dict(meta.get("Metadata", {})),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
|
|
131
|
+
client = self._get_client()
|
|
132
|
+
token: str | None = None
|
|
133
|
+
emitted = 0
|
|
134
|
+
while True:
|
|
135
|
+
kwargs: dict[str, Any] = {
|
|
136
|
+
"Bucket": self.config.bucket,
|
|
137
|
+
"Prefix": prefix,
|
|
138
|
+
"MaxKeys": min(limit - emitted, 1000),
|
|
139
|
+
}
|
|
140
|
+
if token:
|
|
141
|
+
kwargs["ContinuationToken"] = token
|
|
142
|
+
resp = await asyncio.to_thread(client.list_objects_v2, **kwargs)
|
|
143
|
+
for obj in resp.get("Contents", []):
|
|
144
|
+
yield StoredObject(
|
|
145
|
+
key=obj["Key"],
|
|
146
|
+
size=obj.get("Size", 0),
|
|
147
|
+
content_type=guess_content_type(obj["Key"]),
|
|
148
|
+
last_modified=obj.get("LastModified"),
|
|
149
|
+
etag=str(obj.get("ETag", "")).strip('"'),
|
|
150
|
+
)
|
|
151
|
+
emitted += 1
|
|
152
|
+
if emitted >= limit:
|
|
153
|
+
return
|
|
154
|
+
if not resp.get("IsTruncated"):
|
|
155
|
+
return
|
|
156
|
+
token = resp.get("NextContinuationToken")
|
|
157
|
+
|
|
158
|
+
async def signed_url(
|
|
159
|
+
self,
|
|
160
|
+
key: str,
|
|
161
|
+
*,
|
|
162
|
+
expires_in: int = 3600,
|
|
163
|
+
method: str = "GET",
|
|
164
|
+
content_type: str | None = None,
|
|
165
|
+
) -> str:
|
|
166
|
+
client = self._get_client()
|
|
167
|
+
op = {"GET": "get_object", "PUT": "put_object", "DELETE": "delete_object"}.get(
|
|
168
|
+
method.upper()
|
|
169
|
+
)
|
|
170
|
+
if op is None:
|
|
171
|
+
raise StorageError(f"unsupported signed URL method {method!r}")
|
|
172
|
+
params: dict[str, Any] = {"Bucket": self.config.bucket, "Key": key}
|
|
173
|
+
if method.upper() == "PUT" and content_type:
|
|
174
|
+
params["ContentType"] = content_type
|
|
175
|
+
return await asyncio.to_thread(
|
|
176
|
+
client.generate_presigned_url, op, Params=params, ExpiresIn=expires_in
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
__all__ = ["S3Config", "S3Storage"]
|
hawkapi_storage/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hawkapi-storage
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: File storage for HawkAPI — local, S3, GCS, Azure backends, pre-signed URLs, streaming uploads
|
|
5
|
+
Project-URL: Homepage, https://pypi.org/project/hawkapi-storage/
|
|
6
|
+
Project-URL: Repository, https://github.com/ashimov/hawkapi-storage
|
|
7
|
+
Project-URL: Issues, https://github.com/ashimov/hawkapi-storage/issues
|
|
8
|
+
Author-email: HawkAPI Contributors <hawkapi@users.noreply.github.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 HawkAPI Contributors
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: azure,files,gcs,hawkapi,s3,storage,upload
|
|
32
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
33
|
+
Classifier: Framework :: AsyncIO
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
39
|
+
Classifier: Topic :: System :: Filesystems
|
|
40
|
+
Classifier: Typing :: Typed
|
|
41
|
+
Requires-Python: >=3.12
|
|
42
|
+
Requires-Dist: hawkapi>=0.1.7
|
|
43
|
+
Provides-Extra: azure
|
|
44
|
+
Requires-Dist: azure-storage-blob>=12.19; extra == 'azure'
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: boto3>=1.34; extra == 'dev'
|
|
47
|
+
Requires-Dist: pyright>=1.1; extra == 'dev'
|
|
48
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
49
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
50
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
51
|
+
Provides-Extra: gcs
|
|
52
|
+
Requires-Dist: google-cloud-storage>=2.14; extra == 'gcs'
|
|
53
|
+
Provides-Extra: s3
|
|
54
|
+
Requires-Dist: boto3>=1.34; extra == 's3'
|
|
55
|
+
Description-Content-Type: text/markdown
|
|
56
|
+
|
|
57
|
+
# hawkapi-storage
|
|
58
|
+
|
|
59
|
+
Pluggable file storage for [HawkAPI](https://github.com/ashimov/HawkAPI). One `Storage` protocol, four backends: local filesystem, AWS S3 (and S3-compatible — MinIO, Wasabi, R2), Google Cloud Storage, Azure Blob Storage. Pre-signed URLs and streaming on all of them.
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install hawkapi-storage # local filesystem only
|
|
65
|
+
pip install 'hawkapi-storage[s3]' # + AWS S3
|
|
66
|
+
pip install 'hawkapi-storage[gcs]' # + Google Cloud Storage
|
|
67
|
+
pip install 'hawkapi-storage[azure]' # + Azure Blob Storage
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Quickstart
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from hawkapi import Depends, HawkAPI
|
|
74
|
+
from hawkapi_storage import LocalConfig, LocalStorage, Storage, get_storage, init_storage
|
|
75
|
+
|
|
76
|
+
app = HawkAPI()
|
|
77
|
+
init_storage(app, storage=LocalStorage(LocalConfig(root="/var/data", base_url="https://cdn.example")))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@app.put("/files/{key}")
|
|
81
|
+
async def upload(key: str, body: bytes, s: Storage = Depends(get_storage)):
|
|
82
|
+
obj = await s.put(key, body, content_type="application/octet-stream")
|
|
83
|
+
return {"key": obj.key, "size": obj.size}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@app.get("/files/{key}/url")
|
|
87
|
+
async def signed(key: str, s: Storage = Depends(get_storage)):
|
|
88
|
+
return {"url": await s.signed_url(key, expires_in=300)}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Swap `LocalStorage` for any other backend — every primitive is identical.
|
|
92
|
+
|
|
93
|
+
## Backends
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from hawkapi_storage import (
|
|
97
|
+
LocalStorage, LocalConfig,
|
|
98
|
+
S3Storage, S3Config, # extras: [s3]
|
|
99
|
+
GCSStorage, GCSConfig, # extras: [gcs]
|
|
100
|
+
AzureStorage, AzureConfig, # extras: [azure]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
local = LocalStorage(LocalConfig(root="/var/data"))
|
|
104
|
+
s3 = S3Storage(S3Config(bucket="my-bucket", region="eu-west-1"))
|
|
105
|
+
minio = S3Storage(S3Config(bucket="mb", endpoint_url="https://minio.example", use_path_style=True))
|
|
106
|
+
gcs = GCSStorage(GCSConfig(bucket="my-bucket", project="my-project"))
|
|
107
|
+
azure = AzureStorage(AzureConfig(container="files", connection_string="..."))
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## The `Storage` protocol
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
class Storage(Protocol):
|
|
114
|
+
name: str
|
|
115
|
+
|
|
116
|
+
async def put(self, key, data, *, content_type=None, metadata=None) -> StoredObject: ...
|
|
117
|
+
async def get(self, key) -> bytes: ...
|
|
118
|
+
async def stream(self, key, *, chunk_size=65536) -> AsyncIterator[bytes]: ...
|
|
119
|
+
async def exists(self, key) -> bool: ...
|
|
120
|
+
async def delete(self, key) -> None: ...
|
|
121
|
+
async def head(self, key) -> StoredObject: ...
|
|
122
|
+
async def list(self, prefix="", *, limit=1000) -> AsyncIterator[StoredObject]: ...
|
|
123
|
+
async def signed_url(self, key, *, expires_in=3600, method="GET", content_type=None) -> str: ...
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
`put()` accepts `bytes`, a file-like object, or an `AsyncIterator[bytes]` (for streaming uploads).
|
|
127
|
+
|
|
128
|
+
## Streaming downloads
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
@app.get("/download/{key}")
|
|
132
|
+
async def download(key: str, s: Storage = Depends(get_storage)):
|
|
133
|
+
return StreamingResponse(s.stream(key, chunk_size=65536),
|
|
134
|
+
media_type=(await s.head(key)).content_type)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Pre-signed URLs
|
|
138
|
+
|
|
139
|
+
Every backend supports `signed_url(key, expires_in=..., method="GET" | "PUT")`. For PUT/upload pre-signs, pass `content_type=` so the client must send the matching `Content-Type` header.
|
|
140
|
+
|
|
141
|
+
`LocalStorage` produces HMAC-signed URLs that you verify on download with `local.verify_signed_url(key, expires, sig, method="GET")` — useful when serving downloads through your own handler.
|
|
142
|
+
|
|
143
|
+
## Local filesystem details
|
|
144
|
+
|
|
145
|
+
- Path traversal (`..`) is rejected at `put`/`get` time.
|
|
146
|
+
- `LocalConfig(base_url=...)` sets the prefix used by `signed_url()` — pair it with a Nginx alias or a HawkAPI download handler.
|
|
147
|
+
- `LocalConfig(signing_secret=...)` lets you pin the HMAC secret (otherwise generated once at startup).
|
|
148
|
+
|
|
149
|
+
## Errors
|
|
150
|
+
|
|
151
|
+
- `StorageError` — base class.
|
|
152
|
+
- `NotFoundError(key)` — `get` / `head` / `stream` on a missing key.
|
|
153
|
+
|
|
154
|
+
## Development
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
git clone https://github.com/ashimov/hawkapi-storage.git
|
|
158
|
+
cd hawkapi-storage
|
|
159
|
+
uv sync --extra dev
|
|
160
|
+
uv run pytest -q
|
|
161
|
+
uv run ruff check . && uv run ruff format --check .
|
|
162
|
+
uv run pyright src/
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## License
|
|
166
|
+
|
|
167
|
+
MIT.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
hawkapi_storage/__init__.py,sha256=ukibo0at4kQOe4Y-cdXPZDW8iHuAh0TB1DBt9p3ClEU,1011
|
|
2
|
+
hawkapi_storage/_azure.py,sha256=TEfDNCUyhxAV01LlsKujehxOoXOldSI72c5DTOjFjs0,6895
|
|
3
|
+
hawkapi_storage/_base.py,sha256=5K5ss1NnT0msyT1ZAFFKifzee83omhv7EDoa1dG583k,2248
|
|
4
|
+
hawkapi_storage/_gcs.py,sha256=3t135cpqZ0bQsSX9ojNT4vBAQnQKwflxiNeQ2JQRk60,5245
|
|
5
|
+
hawkapi_storage/_local.py,sha256=iZRUvWVNw7SFgEDWgY6IKqsMsAO4ZCfpDGKx6165rcA,6209
|
|
6
|
+
hawkapi_storage/_plugin.py,sha256=R4bUKGd5qRDyi1LSkkn7FPG-rm6Q7Cu_qWe2y2jLE1U,1279
|
|
7
|
+
hawkapi_storage/_s3.py,sha256=ArPoI30R9XCFhTEUDnMFZSGPQvWT21R_NF7M16PhB0w,6590
|
|
8
|
+
hawkapi_storage/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
hawkapi_storage-0.1.0.dist-info/METADATA,sha256=5K6Ig2cT1iD8yV8b-r_c5m4ZHLpBRsSnqOjwFU6UO7g,6713
|
|
10
|
+
hawkapi_storage-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
11
|
+
hawkapi_storage-0.1.0.dist-info/licenses/LICENSE,sha256=_RpjhvsfLqqeG_gv2cRatjIxCTGXTpXhKU9jqLZXYa4,1077
|
|
12
|
+
hawkapi_storage-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 HawkAPI Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|