hawkapi-storage 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ """hawkapi-storage — pluggable file storage for HawkAPI.
2
+
3
+ Backends: local filesystem, AWS S3 (extras ``[s3]``), Google Cloud Storage
4
+ (extras ``[gcs]``), Azure Blob Storage (extras ``[azure]``). Single
5
+ :class:`Storage` protocol — swap backends freely.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from ._azure import AzureConfig, AzureStorage
11
+ from ._base import (
12
+ NotFoundError,
13
+ Storage,
14
+ StorageError,
15
+ StoredObject,
16
+ guess_content_type,
17
+ )
18
+ from ._gcs import GCSConfig, GCSStorage
19
+ from ._local import LocalConfig, LocalStorage
20
+ from ._plugin import get_storage, init_storage, resolve_storage
21
+ from ._s3 import S3Config, S3Storage
22
+
23
+ __version__ = "0.1.0"
24
+
25
+ __all__ = [
26
+ "AzureConfig",
27
+ "AzureStorage",
28
+ "GCSConfig",
29
+ "GCSStorage",
30
+ "LocalConfig",
31
+ "LocalStorage",
32
+ "NotFoundError",
33
+ "S3Config",
34
+ "S3Storage",
35
+ "Storage",
36
+ "StorageError",
37
+ "StoredObject",
38
+ "__version__",
39
+ "get_storage",
40
+ "guess_content_type",
41
+ "init_storage",
42
+ "resolve_storage",
43
+ ]
@@ -0,0 +1,195 @@
1
+ """Azure Blob Storage backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections.abc import AsyncIterator
7
+ from dataclasses import dataclass, field
8
+ from datetime import UTC, datetime, timedelta
9
+ from typing import Any, BinaryIO
10
+
11
+ from ._base import NotFoundError, StorageError, StoredObject, guess_content_type, to_bytes
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class AzureConfig:
16
+ container: str
17
+ connection_string: str = ""
18
+ account_url: str = ""
19
+ """e.g. https://myaccount.blob.core.windows.net — used with ``credential``."""
20
+
21
+ account_name: str = ""
22
+ account_key: str = ""
23
+
24
+
25
+ @dataclass
26
+ class AzureStorage:
27
+ config: AzureConfig
28
+ name: str = "azure"
29
+ _service: Any = field(default=None, init=False)
30
+ _container: Any = field(default=None, init=False)
31
+
32
+ def _get_container(self) -> Any:
33
+ if self._container is not None:
34
+ return self._container
35
+ try:
36
+ from azure.storage.blob import BlobServiceClient # type: ignore[import-not-found]
37
+ except ImportError as exc: # pragma: no cover
38
+ raise StorageError(
39
+ "azure-storage-blob not installed; pip install 'hawkapi-storage[azure]'"
40
+ ) from exc
41
+ if self.config.connection_string:
42
+ self._service = BlobServiceClient.from_connection_string(self.config.connection_string)
43
+ elif self.config.account_url and self.config.account_key:
44
+ self._service = BlobServiceClient(
45
+ account_url=self.config.account_url, credential=self.config.account_key
46
+ )
47
+ else:
48
+ raise StorageError("AzureConfig requires connection_string or account_url+account_key")
49
+ self._container = self._service.get_container_client(self.config.container)
50
+ return self._container
51
+
52
+ async def put(
53
+ self,
54
+ key: str,
55
+ data: bytes | BinaryIO | AsyncIterator[bytes],
56
+ *,
57
+ content_type: str | None = None,
58
+ metadata: dict[str, str] | None = None,
59
+ ) -> StoredObject:
60
+ if not isinstance(data, (bytes, bytearray)) and not hasattr(data, "read"):
61
+ chunks: list[bytes] = []
62
+ async for chunk in data:
63
+ chunks.append(chunk)
64
+ data = b"".join(chunks)
65
+ body = to_bytes(data) # type: ignore[arg-type]
66
+ container = self._get_container()
67
+
68
+ def _upload() -> None:
69
+ from azure.storage.blob import ContentSettings # type: ignore[import-not-found]
70
+
71
+ blob = container.get_blob_client(key)
72
+ blob.upload_blob(
73
+ body,
74
+ overwrite=True,
75
+ content_settings=ContentSettings(
76
+ content_type=content_type or guess_content_type(key)
77
+ ),
78
+ metadata={k: str(v) for k, v in (metadata or {}).items()} or None,
79
+ )
80
+
81
+ await asyncio.to_thread(_upload)
82
+ return StoredObject(
83
+ key=key,
84
+ size=len(body),
85
+ content_type=content_type or guess_content_type(key),
86
+ metadata=dict(metadata or {}),
87
+ )
88
+
89
+ async def get(self, key: str) -> bytes:
90
+ container = self._get_container()
91
+
92
+ def _download() -> bytes:
93
+ blob = container.get_blob_client(key)
94
+ try:
95
+ stream = blob.download_blob()
96
+ except Exception as exc:
97
+ raise NotFoundError(key) from exc
98
+ return stream.readall()
99
+
100
+ return await asyncio.to_thread(_download)
101
+
102
+ async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
103
+ data = await self.get(key)
104
+ for i in range(0, len(data), chunk_size):
105
+ yield data[i : i + chunk_size]
106
+
107
+ async def exists(self, key: str) -> bool:
108
+ container = self._get_container()
109
+ return await asyncio.to_thread(container.get_blob_client(key).exists)
110
+
111
+ async def delete(self, key: str) -> None:
112
+ container = self._get_container()
113
+ try:
114
+ await asyncio.to_thread(container.delete_blob, key)
115
+ except Exception:
116
+ pass
117
+
118
+ async def head(self, key: str) -> StoredObject:
119
+ container = self._get_container()
120
+
121
+ def _props() -> StoredObject:
122
+ blob = container.get_blob_client(key)
123
+ try:
124
+ props = blob.get_blob_properties()
125
+ except Exception as exc:
126
+ raise NotFoundError(key) from exc
127
+ return StoredObject(
128
+ key=key,
129
+ size=props.size or 0,
130
+ content_type=(
131
+ props.content_settings.content_type if props.content_settings else None
132
+ )
133
+ or guess_content_type(key),
134
+ last_modified=props.last_modified,
135
+ etag=str(props.etag or "").strip('"'),
136
+ metadata=dict(props.metadata or {}),
137
+ )
138
+
139
+ return await asyncio.to_thread(_props)
140
+
141
+ async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
142
+ container = self._get_container()
143
+
144
+ def _list() -> list[Any]:
145
+ return list(
146
+ container.list_blobs(name_starts_with=prefix or None, results_per_page=limit)
147
+ )
148
+
149
+ emitted = 0
150
+ for item in await asyncio.to_thread(_list):
151
+ yield StoredObject(
152
+ key=item.name,
153
+ size=item.size or 0,
154
+ content_type=(item.content_settings.content_type if item.content_settings else None)
155
+ or guess_content_type(item.name),
156
+ last_modified=item.last_modified,
157
+ )
158
+ emitted += 1
159
+ if emitted >= limit:
160
+ return
161
+
162
+ async def signed_url(
163
+ self,
164
+ key: str,
165
+ *,
166
+ expires_in: int = 3600,
167
+ method: str = "GET",
168
+ content_type: str | None = None,
169
+ ) -> str:
170
+ _ = content_type
171
+ try:
172
+ from azure.storage.blob import ( # type: ignore[import-not-found]
173
+ BlobSasPermissions,
174
+ generate_blob_sas,
175
+ )
176
+ except ImportError as exc: # pragma: no cover
177
+ raise StorageError("azure-storage-blob not installed") from exc
178
+ container = self._get_container()
179
+ account_name = container.account_name
180
+ permissions = BlobSasPermissions(
181
+ read=method.upper() == "GET", write=method.upper() == "PUT"
182
+ )
183
+ token = await asyncio.to_thread(
184
+ generate_blob_sas,
185
+ account_name=account_name,
186
+ container_name=self.config.container,
187
+ blob_name=key,
188
+ account_key=self.config.account_key or None,
189
+ permission=permissions,
190
+ expiry=datetime.now(UTC) + timedelta(seconds=expires_in),
191
+ )
192
+ return f"{container.url}/{key}?{token}"
193
+
194
+
195
+ __all__ = ["AzureConfig", "AzureStorage"]
@@ -0,0 +1,88 @@
1
+ """Storage backend abstraction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import mimetypes
7
+ from collections.abc import AsyncIterator
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime
10
+ from typing import BinaryIO, Protocol
11
+
12
+
13
+ @dataclass(slots=True)
14
+ class StoredObject:
15
+ key: str
16
+ size: int = 0
17
+ content_type: str = "application/octet-stream"
18
+ last_modified: datetime | None = None
19
+ etag: str = ""
20
+ metadata: dict[str, str] = field(default_factory=dict)
21
+
22
+
23
+ class StorageError(Exception):
24
+ """Raised by every backend when a primitive fails."""
25
+
26
+
27
+ class NotFoundError(StorageError):
28
+ """Raised when a key does not exist."""
29
+
30
+
31
+ class Storage(Protocol):
32
+ """The minimal contract every backend implements."""
33
+
34
+ name: str
35
+
36
+ async def put(
37
+ self,
38
+ key: str,
39
+ data: bytes | BinaryIO | AsyncIterator[bytes],
40
+ *,
41
+ content_type: str | None = None,
42
+ metadata: dict[str, str] | None = None,
43
+ ) -> StoredObject: ...
44
+
45
+ async def get(self, key: str) -> bytes: ...
46
+
47
+ async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]: ...
48
+
49
+ async def exists(self, key: str) -> bool: ...
50
+
51
+ async def delete(self, key: str) -> None: ...
52
+
53
+ async def head(self, key: str) -> StoredObject: ...
54
+
55
+ async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]: ...
56
+
57
+ async def signed_url(
58
+ self,
59
+ key: str,
60
+ *,
61
+ expires_in: int = 3600,
62
+ method: str = "GET",
63
+ content_type: str | None = None,
64
+ ) -> str: ...
65
+
66
+
67
+ def guess_content_type(key: str) -> str:
68
+ """Best-effort MIME guess from the key/filename."""
69
+ return mimetypes.guess_type(key)[0] or "application/octet-stream"
70
+
71
+
72
+ def to_bytes(data: bytes | BinaryIO) -> bytes:
73
+ """Read a bytes/file-like into memory. Used by backends that need a single buffer."""
74
+ if isinstance(data, bytes):
75
+ return data
76
+ if isinstance(data, io.IOBase) or hasattr(data, "read"):
77
+ return data.read()
78
+ raise TypeError(f"unsupported data type: {type(data).__name__}")
79
+
80
+
81
+ __all__ = [
82
+ "NotFoundError",
83
+ "Storage",
84
+ "StorageError",
85
+ "StoredObject",
86
+ "guess_content_type",
87
+ "to_bytes",
88
+ ]
@@ -0,0 +1,154 @@
1
+ """Google Cloud Storage backend (sync API offloaded to a thread)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections.abc import AsyncIterator
7
+ from dataclasses import dataclass, field
8
+ from datetime import timedelta
9
+ from typing import Any, BinaryIO
10
+
11
+ from ._base import NotFoundError, StorageError, StoredObject, guess_content_type, to_bytes
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class GCSConfig:
16
+ bucket: str
17
+ project: str = ""
18
+ credentials_path: str = ""
19
+
20
+
21
+ @dataclass
22
+ class GCSStorage:
23
+ config: GCSConfig
24
+ name: str = "gcs"
25
+ _client: Any = field(default=None, init=False)
26
+ _bucket: Any = field(default=None, init=False)
27
+
28
+ def _get_bucket(self) -> Any:
29
+ if self._bucket is not None:
30
+ return self._bucket
31
+ try:
32
+ from google.cloud import storage # type: ignore[import-not-found]
33
+ except ImportError as exc: # pragma: no cover
34
+ raise StorageError(
35
+ "google-cloud-storage not installed; pip install 'hawkapi-storage[gcs]'"
36
+ ) from exc
37
+ if self.config.credentials_path:
38
+ self._client = storage.Client.from_service_account_json(self.config.credentials_path)
39
+ else:
40
+ self._client = storage.Client(project=self.config.project or None)
41
+ self._bucket = self._client.bucket(self.config.bucket)
42
+ return self._bucket
43
+
44
+ async def put(
45
+ self,
46
+ key: str,
47
+ data: bytes | BinaryIO | AsyncIterator[bytes],
48
+ *,
49
+ content_type: str | None = None,
50
+ metadata: dict[str, str] | None = None,
51
+ ) -> StoredObject:
52
+ if not isinstance(data, (bytes, bytearray)) and not hasattr(data, "read"):
53
+ chunks: list[bytes] = []
54
+ async for chunk in data:
55
+ chunks.append(chunk)
56
+ data = b"".join(chunks)
57
+ body = to_bytes(data) # type: ignore[arg-type]
58
+ bucket = self._get_bucket()
59
+ blob = bucket.blob(key)
60
+ if metadata:
61
+ blob.metadata = {k: str(v) for k, v in metadata.items()}
62
+
63
+ def _upload() -> None:
64
+ blob.upload_from_string(body, content_type=content_type or guess_content_type(key))
65
+
66
+ await asyncio.to_thread(_upload)
67
+ return StoredObject(
68
+ key=key,
69
+ size=len(body),
70
+ content_type=content_type or guess_content_type(key),
71
+ metadata=dict(metadata or {}),
72
+ )
73
+
74
+ async def get(self, key: str) -> bytes:
75
+ bucket = self._get_bucket()
76
+ blob = bucket.blob(key)
77
+
78
+ def _download() -> bytes:
79
+ if not blob.exists():
80
+ raise NotFoundError(key)
81
+ return blob.download_as_bytes()
82
+
83
+ return await asyncio.to_thread(_download)
84
+
85
+ async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
86
+ # The GCS SDK does not expose a true streaming reader for blobs; we
87
+ # download once and re-yield in chunks, which is fine for the typical
88
+ # API-served-file size and keeps memory bounded for callers.
89
+ data = await self.get(key)
90
+ for i in range(0, len(data), chunk_size):
91
+ yield data[i : i + chunk_size]
92
+
93
+ async def exists(self, key: str) -> bool:
94
+ bucket = self._get_bucket()
95
+ return await asyncio.to_thread(bucket.blob(key).exists)
96
+
97
+ async def delete(self, key: str) -> None:
98
+ bucket = self._get_bucket()
99
+ await asyncio.to_thread(bucket.blob(key).delete)
100
+
101
+ async def head(self, key: str) -> StoredObject:
102
+ bucket = self._get_bucket()
103
+ blob = bucket.blob(key)
104
+
105
+ def _reload() -> StoredObject:
106
+ if not blob.exists():
107
+ raise NotFoundError(key)
108
+ blob.reload()
109
+ return StoredObject(
110
+ key=key,
111
+ size=blob.size or 0,
112
+ content_type=blob.content_type or guess_content_type(key),
113
+ last_modified=blob.updated,
114
+ etag=str(blob.etag or "").strip('"'),
115
+ metadata=dict(blob.metadata or {}),
116
+ )
117
+
118
+ return await asyncio.to_thread(_reload)
119
+
120
+ async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
121
+ bucket = self._get_bucket()
122
+
123
+ def _enumerate() -> list[Any]:
124
+ return list(bucket.list_blobs(prefix=prefix or None, max_results=limit))
125
+
126
+ for blob in await asyncio.to_thread(_enumerate):
127
+ yield StoredObject(
128
+ key=blob.name,
129
+ size=blob.size or 0,
130
+ content_type=blob.content_type or guess_content_type(blob.name),
131
+ last_modified=blob.updated,
132
+ etag=str(blob.etag or "").strip('"'),
133
+ )
134
+
135
+ async def signed_url(
136
+ self,
137
+ key: str,
138
+ *,
139
+ expires_in: int = 3600,
140
+ method: str = "GET",
141
+ content_type: str | None = None,
142
+ ) -> str:
143
+ bucket = self._get_bucket()
144
+ blob = bucket.blob(key)
145
+ return await asyncio.to_thread(
146
+ blob.generate_signed_url,
147
+ expiration=timedelta(seconds=expires_in),
148
+ method=method.upper(),
149
+ content_type=content_type,
150
+ version="v4",
151
+ )
152
+
153
+
154
+ __all__ = ["GCSConfig", "GCSStorage"]
@@ -0,0 +1,185 @@
1
+ """Local filesystem backend — useful for dev + tests."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import base64
7
+ import hashlib
8
+ import hmac
9
+ import os
10
+ import shutil
11
+ import time
12
+ from collections.abc import AsyncIterator
13
+ from dataclasses import dataclass, field
14
+ from datetime import UTC, datetime
15
+ from pathlib import Path
16
+ from typing import BinaryIO
17
+ from urllib.parse import quote, urlencode
18
+
19
+ from ._base import (
20
+ NotFoundError,
21
+ StorageError,
22
+ StoredObject,
23
+ guess_content_type,
24
+ to_bytes,
25
+ )
26
+
27
+
28
+ @dataclass(slots=True)
29
+ class LocalConfig:
30
+ root: str
31
+ """Filesystem directory that holds the objects."""
32
+
33
+ base_url: str = ""
34
+ """Public base URL prefix used by :meth:`signed_url`. Leave empty to return a
35
+ ``file://`` URL (only meaningful for tests)."""
36
+
37
+ signing_secret: str = ""
38
+ """HMAC secret for short-lived download URLs. Generated lazily if unset."""
39
+
40
+
41
+ @dataclass
42
+ class LocalStorage:
43
+ config: LocalConfig
44
+ name: str = "local"
45
+ _secret: str = field(default="", init=False)
46
+
47
+ def __post_init__(self) -> None:
48
+ os.makedirs(self.config.root, exist_ok=True)
49
+ self._secret = (
50
+ self.config.signing_secret or base64.urlsafe_b64encode(os.urandom(32)).decode()
51
+ )
52
+
53
+ def _path(self, key: str) -> Path:
54
+ safe = key.lstrip("/")
55
+ if ".." in Path(safe).parts:
56
+ raise StorageError("invalid key (path traversal)")
57
+ return Path(self.config.root, safe)
58
+
59
+ async def put(
60
+ self,
61
+ key: str,
62
+ data: bytes | BinaryIO | AsyncIterator[bytes],
63
+ *,
64
+ content_type: str | None = None,
65
+ metadata: dict[str, str] | None = None,
66
+ ) -> StoredObject:
67
+ path = self._path(key)
68
+ await asyncio.to_thread(path.parent.mkdir, parents=True, exist_ok=True)
69
+ if isinstance(data, (bytes, bytearray)) or hasattr(data, "read"):
70
+ buf = to_bytes(data) # type: ignore[arg-type]
71
+ await asyncio.to_thread(path.write_bytes, buf)
72
+ else:
73
+ chunks: list[bytes] = []
74
+ async for chunk in data:
75
+ chunks.append(chunk)
76
+ await asyncio.to_thread(path.write_bytes, b"".join(chunks))
77
+ stat = await asyncio.to_thread(path.stat)
78
+ return StoredObject(
79
+ key=key,
80
+ size=stat.st_size,
81
+ content_type=content_type or guess_content_type(key),
82
+ last_modified=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
83
+ metadata=dict(metadata or {}),
84
+ )
85
+
86
+ async def get(self, key: str) -> bytes:
87
+ path = self._path(key)
88
+ if not path.exists():
89
+ raise NotFoundError(key)
90
+ return await asyncio.to_thread(path.read_bytes)
91
+
92
+ async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
93
+ path = self._path(key)
94
+ if not path.exists():
95
+ raise NotFoundError(key)
96
+
97
+ def _chunks() -> list[bytes]:
98
+ out: list[bytes] = []
99
+ with path.open("rb") as fh:
100
+ while True:
101
+ chunk = fh.read(chunk_size)
102
+ if not chunk:
103
+ break
104
+ out.append(chunk)
105
+ return out
106
+
107
+ for chunk in await asyncio.to_thread(_chunks):
108
+ yield chunk
109
+
110
+ async def exists(self, key: str) -> bool:
111
+ return self._path(key).exists()
112
+
113
+ async def delete(self, key: str) -> None:
114
+ path = self._path(key)
115
+ if path.is_dir():
116
+ await asyncio.to_thread(shutil.rmtree, str(path))
117
+ elif path.exists():
118
+ await asyncio.to_thread(path.unlink)
119
+
120
+ async def head(self, key: str) -> StoredObject:
121
+ path = self._path(key)
122
+ if not path.exists():
123
+ raise NotFoundError(key)
124
+ stat = await asyncio.to_thread(path.stat)
125
+ return StoredObject(
126
+ key=key,
127
+ size=stat.st_size,
128
+ content_type=guess_content_type(key),
129
+ last_modified=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
130
+ )
131
+
132
+ async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
133
+ root = Path(self.config.root)
134
+ target = root / prefix.lstrip("/") if prefix else root
135
+ if not root.exists():
136
+ return
137
+ count = 0
138
+ for path in sorted(root.rglob("*")):
139
+ if not path.is_file():
140
+ continue
141
+ rel = path.relative_to(root).as_posix()
142
+ if prefix and not rel.startswith(prefix.lstrip("/")):
143
+ continue
144
+ if not str(path).startswith(str(target)) and prefix:
145
+ continue
146
+ stat = await asyncio.to_thread(path.stat)
147
+ yield StoredObject(
148
+ key=rel,
149
+ size=stat.st_size,
150
+ content_type=guess_content_type(rel),
151
+ last_modified=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
152
+ )
153
+ count += 1
154
+ if count >= limit:
155
+ return
156
+
157
+ async def signed_url(
158
+ self,
159
+ key: str,
160
+ *,
161
+ expires_in: int = 3600,
162
+ method: str = "GET",
163
+ content_type: str | None = None,
164
+ ) -> str:
165
+ _ = content_type
166
+ if method.upper() not in {"GET", "PUT"}:
167
+ raise StorageError("LocalStorage only signs GET / PUT")
168
+ expires = int(time.time()) + max(1, expires_in)
169
+ msg = f"{method.upper()}:{key}:{expires}".encode()
170
+ sig = hmac.new(self._secret.encode(), msg, hashlib.sha256).hexdigest()
171
+ query = urlencode({"expires": expires, "sig": sig, "method": method.upper()})
172
+ if self.config.base_url:
173
+ base = self.config.base_url.rstrip("/")
174
+ return f"{base}/{quote(key)}?{query}"
175
+ return f"file://{self._path(key)}?{query}"
176
+
177
+ def verify_signed_url(self, key: str, expires: int, sig: str, *, method: str = "GET") -> bool:
178
+ if expires < int(time.time()):
179
+ return False
180
+ msg = f"{method.upper()}:{key}:{expires}".encode()
181
+ expected = hmac.new(self._secret.encode(), msg, hashlib.sha256).hexdigest()
182
+ return hmac.compare_digest(expected, sig)
183
+
184
+
185
+ __all__ = ["LocalConfig", "LocalStorage"]
@@ -0,0 +1,49 @@
1
+ """Plugin entry point + DI helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from hawkapi import HTTPException, Request
8
+
9
+ from ._base import Storage
10
+
11
+
12
+ class _StateNamespace:
13
+ storage: Any
14
+
15
+
16
+ _ACTIVE: dict[int, Storage] = {}
17
+ _LAST: list[Storage | None] = [None]
18
+
19
+
20
+ def init_storage(app: Any, *, storage: Storage) -> Storage:
21
+ """Attach a :class:`Storage` to ``app.state.storage`` and register it for DI lookup."""
22
+ if getattr(app, "state", None) is None:
23
+ app.state = _StateNamespace()
24
+ app.state.storage = storage
25
+ _ACTIVE[id(app)] = storage
26
+ _LAST[0] = storage
27
+ return storage
28
+
29
+
30
+ def resolve_storage(app: Any) -> Storage | None:
31
+ if app is None:
32
+ return _LAST[0]
33
+ found = _ACTIVE.get(id(app))
34
+ if found is not None:
35
+ return found
36
+ state = getattr(app, "state", None)
37
+ if state is not None and hasattr(state, "storage"):
38
+ return state.storage # type: ignore[no-any-return]
39
+ return _LAST[0]
40
+
41
+
42
+ def get_storage(request: Request) -> Storage:
43
+ found = resolve_storage(request.scope.get("app"))
44
+ if found is None:
45
+ raise HTTPException(500, detail="Storage not configured — call init_storage(app, ...)")
46
+ return found
47
+
48
+
49
+ __all__ = ["get_storage", "init_storage", "resolve_storage"]
hawkapi_storage/_s3.py ADDED
@@ -0,0 +1,180 @@
1
+ """AWS S3 backend (boto3, sync API offloaded to a thread)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections.abc import AsyncIterator
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, BinaryIO
9
+
10
+ from ._base import NotFoundError, StorageError, StoredObject, guess_content_type, to_bytes
11
+
12
+
13
+ @dataclass(slots=True)
14
+ class S3Config:
15
+ bucket: str
16
+ region: str = "us-east-1"
17
+ aws_access_key_id: str = ""
18
+ aws_secret_access_key: str = ""
19
+ endpoint_url: str = ""
20
+ """Set to a non-AWS S3-compatible endpoint (MinIO, Wasabi, …)."""
21
+
22
+ use_path_style: bool = False
23
+ """Path-style addressing for MinIO and friends."""
24
+
25
+
26
+ @dataclass
27
+ class S3Storage:
28
+ config: S3Config
29
+ name: str = "s3"
30
+ _client: Any = field(default=None, init=False)
31
+
32
+ def _get_client(self) -> Any:
33
+ if self._client is not None:
34
+ return self._client
35
+ try:
36
+ import boto3
37
+ from botocore.config import Config
38
+ except ImportError as exc: # pragma: no cover
39
+ raise StorageError("boto3 not installed; pip install 'hawkapi-storage[s3]'") from exc
40
+ kwargs: dict[str, Any] = {"region_name": self.config.region}
41
+ if self.config.aws_access_key_id:
42
+ kwargs["aws_access_key_id"] = self.config.aws_access_key_id
43
+ kwargs["aws_secret_access_key"] = self.config.aws_secret_access_key
44
+ if self.config.endpoint_url:
45
+ kwargs["endpoint_url"] = self.config.endpoint_url
46
+ if self.config.use_path_style:
47
+ kwargs["config"] = Config(s3={"addressing_style": "path"})
48
+ self._client = boto3.client("s3", **kwargs)
49
+ return self._client
50
+
51
+ async def put(
52
+ self,
53
+ key: str,
54
+ data: bytes | BinaryIO | AsyncIterator[bytes],
55
+ *,
56
+ content_type: str | None = None,
57
+ metadata: dict[str, str] | None = None,
58
+ ) -> StoredObject:
59
+ if not isinstance(data, (bytes, bytearray)) and not hasattr(data, "read"):
60
+ chunks: list[bytes] = []
61
+ async for chunk in data:
62
+ chunks.append(chunk)
63
+ data = b"".join(chunks)
64
+ body = to_bytes(data) # type: ignore[arg-type]
65
+ client = self._get_client()
66
+ kwargs: dict[str, Any] = {
67
+ "Bucket": self.config.bucket,
68
+ "Key": key,
69
+ "Body": body,
70
+ "ContentType": content_type or guess_content_type(key),
71
+ }
72
+ if metadata:
73
+ kwargs["Metadata"] = {k: str(v) for k, v in metadata.items()}
74
+ await asyncio.to_thread(client.put_object, **kwargs)
75
+ return StoredObject(
76
+ key=key,
77
+ size=len(body),
78
+ content_type=kwargs["ContentType"],
79
+ metadata=dict(metadata or {}),
80
+ )
81
+
82
+ async def get(self, key: str) -> bytes:
83
+ client = self._get_client()
84
+ try:
85
+ obj = await asyncio.to_thread(client.get_object, Bucket=self.config.bucket, Key=key)
86
+ except Exception as exc:
87
+ raise NotFoundError(key) from exc
88
+ return await asyncio.to_thread(obj["Body"].read)
89
+
90
+ async def stream(self, key: str, *, chunk_size: int = 65536) -> AsyncIterator[bytes]:
91
+ client = self._get_client()
92
+ try:
93
+ obj = await asyncio.to_thread(client.get_object, Bucket=self.config.bucket, Key=key)
94
+ except Exception as exc:
95
+ raise NotFoundError(key) from exc
96
+ stream = obj["Body"]
97
+ while True:
98
+ chunk = await asyncio.to_thread(stream.read, chunk_size)
99
+ if not chunk:
100
+ break
101
+ yield chunk
102
+
103
+ async def exists(self, key: str) -> bool:
104
+ client = self._get_client()
105
+ try:
106
+ await asyncio.to_thread(client.head_object, Bucket=self.config.bucket, Key=key)
107
+ return True
108
+ except Exception:
109
+ return False
110
+
111
+ async def delete(self, key: str) -> None:
112
+ client = self._get_client()
113
+ await asyncio.to_thread(client.delete_object, Bucket=self.config.bucket, Key=key)
114
+
115
+ async def head(self, key: str) -> StoredObject:
116
+ client = self._get_client()
117
+ try:
118
+ meta = await asyncio.to_thread(client.head_object, Bucket=self.config.bucket, Key=key)
119
+ except Exception as exc:
120
+ raise NotFoundError(key) from exc
121
+ return StoredObject(
122
+ key=key,
123
+ size=meta.get("ContentLength", 0),
124
+ content_type=meta.get("ContentType", guess_content_type(key)),
125
+ last_modified=meta.get("LastModified"),
126
+ etag=str(meta.get("ETag", "")).strip('"'),
127
+ metadata=dict(meta.get("Metadata", {})),
128
+ )
129
+
130
+ async def list(self, prefix: str = "", *, limit: int = 1000) -> AsyncIterator[StoredObject]:
131
+ client = self._get_client()
132
+ token: str | None = None
133
+ emitted = 0
134
+ while True:
135
+ kwargs: dict[str, Any] = {
136
+ "Bucket": self.config.bucket,
137
+ "Prefix": prefix,
138
+ "MaxKeys": min(limit - emitted, 1000),
139
+ }
140
+ if token:
141
+ kwargs["ContinuationToken"] = token
142
+ resp = await asyncio.to_thread(client.list_objects_v2, **kwargs)
143
+ for obj in resp.get("Contents", []):
144
+ yield StoredObject(
145
+ key=obj["Key"],
146
+ size=obj.get("Size", 0),
147
+ content_type=guess_content_type(obj["Key"]),
148
+ last_modified=obj.get("LastModified"),
149
+ etag=str(obj.get("ETag", "")).strip('"'),
150
+ )
151
+ emitted += 1
152
+ if emitted >= limit:
153
+ return
154
+ if not resp.get("IsTruncated"):
155
+ return
156
+ token = resp.get("NextContinuationToken")
157
+
158
+ async def signed_url(
159
+ self,
160
+ key: str,
161
+ *,
162
+ expires_in: int = 3600,
163
+ method: str = "GET",
164
+ content_type: str | None = None,
165
+ ) -> str:
166
+ client = self._get_client()
167
+ op = {"GET": "get_object", "PUT": "put_object", "DELETE": "delete_object"}.get(
168
+ method.upper()
169
+ )
170
+ if op is None:
171
+ raise StorageError(f"unsupported signed URL method {method!r}")
172
+ params: dict[str, Any] = {"Bucket": self.config.bucket, "Key": key}
173
+ if method.upper() == "PUT" and content_type:
174
+ params["ContentType"] = content_type
175
+ return await asyncio.to_thread(
176
+ client.generate_presigned_url, op, Params=params, ExpiresIn=expires_in
177
+ )
178
+
179
+
180
+ __all__ = ["S3Config", "S3Storage"]
File without changes
@@ -0,0 +1,167 @@
1
+ Metadata-Version: 2.4
2
+ Name: hawkapi-storage
3
+ Version: 0.1.0
4
+ Summary: File storage for HawkAPI — local, S3, GCS, Azure backends, pre-signed URLs, streaming uploads
5
+ Project-URL: Homepage, https://pypi.org/project/hawkapi-storage/
6
+ Project-URL: Repository, https://github.com/ashimov/hawkapi-storage
7
+ Project-URL: Issues, https://github.com/ashimov/hawkapi-storage/issues
8
+ Author-email: HawkAPI Contributors <hawkapi@users.noreply.github.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 HawkAPI Contributors
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: azure,files,gcs,hawkapi,s3,storage,upload
32
+ Classifier: Development Status :: 5 - Production/Stable
33
+ Classifier: Framework :: AsyncIO
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Programming Language :: Python :: 3.13
39
+ Classifier: Topic :: System :: Filesystems
40
+ Classifier: Typing :: Typed
41
+ Requires-Python: >=3.12
42
+ Requires-Dist: hawkapi>=0.1.7
43
+ Provides-Extra: azure
44
+ Requires-Dist: azure-storage-blob>=12.19; extra == 'azure'
45
+ Provides-Extra: dev
46
+ Requires-Dist: boto3>=1.34; extra == 'dev'
47
+ Requires-Dist: pyright>=1.1; extra == 'dev'
48
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
49
+ Requires-Dist: pytest>=8.0; extra == 'dev'
50
+ Requires-Dist: ruff>=0.8; extra == 'dev'
51
+ Provides-Extra: gcs
52
+ Requires-Dist: google-cloud-storage>=2.14; extra == 'gcs'
53
+ Provides-Extra: s3
54
+ Requires-Dist: boto3>=1.34; extra == 's3'
55
+ Description-Content-Type: text/markdown
56
+
57
+ # hawkapi-storage
58
+
59
+ Pluggable file storage for [HawkAPI](https://github.com/ashimov/HawkAPI). One `Storage` protocol, four backends: local filesystem, AWS S3 (and S3-compatible — MinIO, Wasabi, R2), Google Cloud Storage, Azure Blob Storage. Pre-signed URLs and streaming on all of them.
60
+
61
+ ## Install
62
+
63
+ ```bash
64
+ pip install hawkapi-storage # local filesystem only
65
+ pip install 'hawkapi-storage[s3]' # + AWS S3
66
+ pip install 'hawkapi-storage[gcs]' # + Google Cloud Storage
67
+ pip install 'hawkapi-storage[azure]' # + Azure Blob Storage
68
+ ```
69
+
70
+ ## Quickstart
71
+
72
+ ```python
73
+ from hawkapi import Depends, HawkAPI
74
+ from hawkapi_storage import LocalConfig, LocalStorage, Storage, get_storage, init_storage
75
+
76
+ app = HawkAPI()
77
+ init_storage(app, storage=LocalStorage(LocalConfig(root="/var/data", base_url="https://cdn.example")))
78
+
79
+
80
+ @app.put("/files/{key}")
81
+ async def upload(key: str, body: bytes, s: Storage = Depends(get_storage)):
82
+ obj = await s.put(key, body, content_type="application/octet-stream")
83
+ return {"key": obj.key, "size": obj.size}
84
+
85
+
86
+ @app.get("/files/{key}/url")
87
+ async def signed(key: str, s: Storage = Depends(get_storage)):
88
+ return {"url": await s.signed_url(key, expires_in=300)}
89
+ ```
90
+
91
+ Swap `LocalStorage` for any other backend — every primitive is identical.
92
+
93
+ ## Backends
94
+
95
+ ```python
96
+ from hawkapi_storage import (
97
+ LocalStorage, LocalConfig,
98
+ S3Storage, S3Config, # extras: [s3]
99
+ GCSStorage, GCSConfig, # extras: [gcs]
100
+ AzureStorage, AzureConfig, # extras: [azure]
101
+ )
102
+
103
+ local = LocalStorage(LocalConfig(root="/var/data"))
104
+ s3 = S3Storage(S3Config(bucket="my-bucket", region="eu-west-1"))
105
+ minio = S3Storage(S3Config(bucket="mb", endpoint_url="https://minio.example", use_path_style=True))
106
+ gcs = GCSStorage(GCSConfig(bucket="my-bucket", project="my-project"))
107
+ azure = AzureStorage(AzureConfig(container="files", connection_string="..."))
108
+ ```
109
+
110
+ ## The `Storage` protocol
111
+
112
+ ```python
113
+ class Storage(Protocol):
114
+ name: str
115
+
116
+ async def put(self, key, data, *, content_type=None, metadata=None) -> StoredObject: ...
117
+ async def get(self, key) -> bytes: ...
118
+ async def stream(self, key, *, chunk_size=65536) -> AsyncIterator[bytes]: ...
119
+ async def exists(self, key) -> bool: ...
120
+ async def delete(self, key) -> None: ...
121
+ async def head(self, key) -> StoredObject: ...
122
+ async def list(self, prefix="", *, limit=1000) -> AsyncIterator[StoredObject]: ...
123
+ async def signed_url(self, key, *, expires_in=3600, method="GET", content_type=None) -> str: ...
124
+ ```
125
+
126
+ `put()` accepts `bytes`, a file-like object, or an `AsyncIterator[bytes]` (for streaming uploads).
127
+
128
+ ## Streaming downloads
129
+
130
+ ```python
131
+ @app.get("/download/{key}")
132
+ async def download(key: str, s: Storage = Depends(get_storage)):
133
+ return StreamingResponse(s.stream(key, chunk_size=65536),
134
+ media_type=(await s.head(key)).content_type)
135
+ ```
136
+
137
+ ## Pre-signed URLs
138
+
139
+ Every backend supports `signed_url(key, expires_in=..., method="GET" | "PUT")`. For PUT/upload pre-signs, pass `content_type=` so the client must send the matching `Content-Type` header.
140
+
141
+ `LocalStorage` produces HMAC-signed URLs that you verify on download with `local.verify_signed_url(key, expires, sig, method="GET")` — useful when serving downloads through your own handler.
142
+
143
+ ## Local filesystem details
144
+
145
+ - Path traversal (`..`) is rejected at `put`/`get` time.
146
+ - `LocalConfig(base_url=...)` sets the prefix used by `signed_url()` — pair it with a Nginx alias or a HawkAPI download handler.
147
+ - `LocalConfig(signing_secret=...)` lets you pin the HMAC secret (otherwise generated once at startup).
148
+
149
+ ## Errors
150
+
151
+ - `StorageError` — base class.
152
+ - `NotFoundError(key)` — `get` / `head` / `stream` on a missing key.
153
+
154
+ ## Development
155
+
156
+ ```bash
157
+ git clone https://github.com/ashimov/hawkapi-storage.git
158
+ cd hawkapi-storage
159
+ uv sync --extra dev
160
+ uv run pytest -q
161
+ uv run ruff check . && uv run ruff format --check .
162
+ uv run pyright src/
163
+ ```
164
+
165
+ ## License
166
+
167
+ MIT.
@@ -0,0 +1,12 @@
1
+ hawkapi_storage/__init__.py,sha256=ukibo0at4kQOe4Y-cdXPZDW8iHuAh0TB1DBt9p3ClEU,1011
2
+ hawkapi_storage/_azure.py,sha256=TEfDNCUyhxAV01LlsKujehxOoXOldSI72c5DTOjFjs0,6895
3
+ hawkapi_storage/_base.py,sha256=5K5ss1NnT0msyT1ZAFFKifzee83omhv7EDoa1dG583k,2248
4
+ hawkapi_storage/_gcs.py,sha256=3t135cpqZ0bQsSX9ojNT4vBAQnQKwflxiNeQ2JQRk60,5245
5
+ hawkapi_storage/_local.py,sha256=iZRUvWVNw7SFgEDWgY6IKqsMsAO4ZCfpDGKx6165rcA,6209
6
+ hawkapi_storage/_plugin.py,sha256=R4bUKGd5qRDyi1LSkkn7FPG-rm6Q7Cu_qWe2y2jLE1U,1279
7
+ hawkapi_storage/_s3.py,sha256=ArPoI30R9XCFhTEUDnMFZSGPQvWT21R_NF7M16PhB0w,6590
8
+ hawkapi_storage/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ hawkapi_storage-0.1.0.dist-info/METADATA,sha256=5K6Ig2cT1iD8yV8b-r_c5m4ZHLpBRsSnqOjwFU6UO7g,6713
10
+ hawkapi_storage-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
11
+ hawkapi_storage-0.1.0.dist-info/licenses/LICENSE,sha256=_RpjhvsfLqqeG_gv2cRatjIxCTGXTpXhKU9jqLZXYa4,1077
12
+ hawkapi_storage-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 HawkAPI Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.