chuk-artifacts 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_artifacts/__init__.py +149 -0
- chuk_artifacts/admin.py +79 -0
- chuk_artifacts/base.py +75 -0
- chuk_artifacts/batch.py +115 -0
- chuk_artifacts/config.py +338 -0
- chuk_artifacts/core.py +215 -0
- chuk_artifacts/exceptions.py +37 -0
- chuk_artifacts/metadata.py +286 -0
- chuk_artifacts/models.py +23 -0
- chuk_artifacts/presigned.py +267 -0
- chuk_artifacts/provider_factory.py +84 -0
- chuk_artifacts/providers/__init__.py +10 -0
- chuk_artifacts/providers/filesystem.py +453 -0
- chuk_artifacts/providers/ibm_cos.py +121 -0
- chuk_artifacts/providers/ibm_cos_iam.py +82 -0
- chuk_artifacts/providers/memory.py +315 -0
- chuk_artifacts/providers/s3.py +90 -0
- chuk_artifacts/store.py +383 -0
- chuk_artifacts-0.1.0.dist-info/METADATA +519 -0
- chuk_artifacts-0.1.0.dist-info/RECORD +23 -0
- chuk_artifacts-0.1.0.dist-info/WHEEL +5 -0
- chuk_artifacts-0.1.0.dist-info/licenses/LICENSE +21 -0
- chuk_artifacts-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,286 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/metadata.py
|
3
|
+
"""
|
4
|
+
Metadata operations: exists, metadata retrieval, and deletion.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
import logging, json
|
10
|
+
from datetime import datetime
|
11
|
+
from typing import Any, Dict, List
|
12
|
+
|
13
|
+
from .base import BaseOperations
|
14
|
+
from .exceptions import (
|
15
|
+
ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
|
16
|
+
ProviderError
|
17
|
+
)
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class MetadataOperations(BaseOperations):
|
23
|
+
"""Handles metadata-related operations."""
|
24
|
+
|
25
|
+
async def metadata(self, artifact_id: str) -> Dict[str, Any]:
|
26
|
+
"""
|
27
|
+
Get artifact metadata.
|
28
|
+
|
29
|
+
Parameters
|
30
|
+
----------
|
31
|
+
artifact_id : str
|
32
|
+
The artifact identifier
|
33
|
+
|
34
|
+
Returns
|
35
|
+
-------
|
36
|
+
dict
|
37
|
+
Artifact metadata
|
38
|
+
|
39
|
+
Raises
|
40
|
+
------
|
41
|
+
ArtifactNotFoundError
|
42
|
+
If artifact doesn't exist or has expired
|
43
|
+
"""
|
44
|
+
return await self._get_record(artifact_id)
|
45
|
+
|
46
|
+
async def exists(self, artifact_id: str) -> bool:
|
47
|
+
"""
|
48
|
+
Check if artifact exists and hasn't expired.
|
49
|
+
|
50
|
+
Parameters
|
51
|
+
----------
|
52
|
+
artifact_id : str
|
53
|
+
The artifact identifier
|
54
|
+
|
55
|
+
Returns
|
56
|
+
-------
|
57
|
+
bool
|
58
|
+
True if artifact exists, False otherwise
|
59
|
+
"""
|
60
|
+
try:
|
61
|
+
await self._get_record(artifact_id)
|
62
|
+
return True
|
63
|
+
except (ArtifactNotFoundError, ArtifactExpiredError):
|
64
|
+
return False
|
65
|
+
|
66
|
+
async def delete(self, artifact_id: str) -> bool:
|
67
|
+
"""
|
68
|
+
Delete artifact and its metadata.
|
69
|
+
|
70
|
+
Parameters
|
71
|
+
----------
|
72
|
+
artifact_id : str
|
73
|
+
The artifact identifier
|
74
|
+
|
75
|
+
Returns
|
76
|
+
-------
|
77
|
+
bool
|
78
|
+
True if deleted, False if not found
|
79
|
+
|
80
|
+
Raises
|
81
|
+
------
|
82
|
+
ProviderError
|
83
|
+
If deletion fails
|
84
|
+
"""
|
85
|
+
self._check_closed()
|
86
|
+
|
87
|
+
try:
|
88
|
+
record = await self._get_record(artifact_id)
|
89
|
+
|
90
|
+
# Delete from object storage
|
91
|
+
storage_ctx_mgr = self.s3_factory()
|
92
|
+
async with storage_ctx_mgr as s3:
|
93
|
+
await s3.delete_object(Bucket=self.bucket, Key=record["key"])
|
94
|
+
|
95
|
+
# Delete metadata from session store
|
96
|
+
session_ctx_mgr = self.session_factory()
|
97
|
+
async with session_ctx_mgr as session:
|
98
|
+
if hasattr(session, 'delete'):
|
99
|
+
await session.delete(artifact_id)
|
100
|
+
else:
|
101
|
+
logger.warning(
|
102
|
+
"Session provider doesn't support delete operation",
|
103
|
+
extra={"artifact_id": artifact_id, "provider": self.session_provider_name}
|
104
|
+
)
|
105
|
+
|
106
|
+
logger.info("Artifact deleted", extra={"artifact_id": artifact_id})
|
107
|
+
return True
|
108
|
+
|
109
|
+
except (ArtifactNotFoundError, ArtifactExpiredError):
|
110
|
+
logger.warning("Attempted to delete non-existent artifact", extra={"artifact_id": artifact_id})
|
111
|
+
return False
|
112
|
+
except Exception as e:
|
113
|
+
logger.error(
|
114
|
+
"Artifact deletion failed",
|
115
|
+
extra={"artifact_id": artifact_id, "error": str(e)}
|
116
|
+
)
|
117
|
+
raise ProviderError(f"Deletion failed: {e}") from e
|
118
|
+
|
119
|
+
async def update_metadata(
|
120
|
+
self,
|
121
|
+
artifact_id: str,
|
122
|
+
*,
|
123
|
+
summary: str = None,
|
124
|
+
meta: Dict[str, Any] = None,
|
125
|
+
filename: str = None,
|
126
|
+
ttl: int = None
|
127
|
+
) -> Dict[str, Any]:
|
128
|
+
"""
|
129
|
+
Update artifact metadata without changing the stored data.
|
130
|
+
|
131
|
+
Parameters
|
132
|
+
----------
|
133
|
+
artifact_id : str
|
134
|
+
The artifact identifier
|
135
|
+
summary : str, optional
|
136
|
+
New summary description
|
137
|
+
meta : dict, optional
|
138
|
+
New or additional metadata fields
|
139
|
+
filename : str, optional
|
140
|
+
New filename
|
141
|
+
ttl : int, optional
|
142
|
+
New TTL for metadata
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
dict
|
147
|
+
Updated metadata record
|
148
|
+
|
149
|
+
Raises
|
150
|
+
------
|
151
|
+
ArtifactNotFoundError
|
152
|
+
If artifact doesn't exist
|
153
|
+
ProviderError
|
154
|
+
If update fails
|
155
|
+
"""
|
156
|
+
self._check_closed()
|
157
|
+
|
158
|
+
try:
|
159
|
+
# Get existing record
|
160
|
+
record = await self._get_record(artifact_id)
|
161
|
+
|
162
|
+
# Update fields if provided
|
163
|
+
if summary is not None:
|
164
|
+
record["summary"] = summary
|
165
|
+
if meta is not None:
|
166
|
+
# Merge with existing meta, allowing overwrites
|
167
|
+
existing_meta = record.get("meta", {})
|
168
|
+
existing_meta.update(meta)
|
169
|
+
record["meta"] = existing_meta
|
170
|
+
if filename is not None:
|
171
|
+
record["filename"] = filename
|
172
|
+
if ttl is not None:
|
173
|
+
record["ttl"] = ttl
|
174
|
+
|
175
|
+
# Update stored metadata
|
176
|
+
record["updated_at"] = datetime.utcnow().isoformat(timespec="seconds") + "Z"
|
177
|
+
|
178
|
+
session_ctx_mgr = self.session_factory()
|
179
|
+
async with session_ctx_mgr as session:
|
180
|
+
final_ttl = ttl or record.get("ttl", 900) # Use provided TTL or existing/default
|
181
|
+
await session.setex(artifact_id, final_ttl, json.dumps(record))
|
182
|
+
|
183
|
+
logger.info(
|
184
|
+
"Artifact metadata updated",
|
185
|
+
extra={"artifact_id": artifact_id, "updated_fields": list([
|
186
|
+
k for k, v in [
|
187
|
+
("summary", summary), ("meta", meta),
|
188
|
+
("filename", filename), ("ttl", ttl)
|
189
|
+
] if v is not None
|
190
|
+
])}
|
191
|
+
)
|
192
|
+
|
193
|
+
return record
|
194
|
+
|
195
|
+
except (ArtifactNotFoundError, ArtifactExpiredError):
|
196
|
+
raise
|
197
|
+
except Exception as e:
|
198
|
+
logger.error(
|
199
|
+
"Metadata update failed",
|
200
|
+
extra={"artifact_id": artifact_id, "error": str(e)}
|
201
|
+
)
|
202
|
+
raise ProviderError(f"Metadata update failed: {e}") from e
|
203
|
+
|
204
|
+
async def extend_ttl(self, artifact_id: str, additional_seconds: int) -> Dict[str, Any]:
|
205
|
+
"""
|
206
|
+
Extend the TTL of an artifact's metadata.
|
207
|
+
|
208
|
+
Parameters
|
209
|
+
----------
|
210
|
+
artifact_id : str
|
211
|
+
The artifact identifier
|
212
|
+
additional_seconds : int
|
213
|
+
Additional seconds to add to the current TTL
|
214
|
+
|
215
|
+
Returns
|
216
|
+
-------
|
217
|
+
dict
|
218
|
+
Updated metadata record
|
219
|
+
|
220
|
+
Raises
|
221
|
+
------
|
222
|
+
ArtifactNotFoundError
|
223
|
+
If artifact doesn't exist
|
224
|
+
ProviderError
|
225
|
+
If TTL extension fails
|
226
|
+
"""
|
227
|
+
self._check_closed()
|
228
|
+
|
229
|
+
try:
|
230
|
+
# Get current record to find existing TTL
|
231
|
+
record = await self._get_record(artifact_id)
|
232
|
+
current_ttl = record.get("ttl", 900)
|
233
|
+
new_ttl = current_ttl + additional_seconds
|
234
|
+
|
235
|
+
# Update with extended TTL
|
236
|
+
return await self.update_metadata(artifact_id, ttl=new_ttl)
|
237
|
+
|
238
|
+
except (ArtifactNotFoundError, ArtifactExpiredError):
|
239
|
+
raise
|
240
|
+
except Exception as e:
|
241
|
+
logger.error(
|
242
|
+
"TTL extension failed",
|
243
|
+
extra={
|
244
|
+
"artifact_id": artifact_id,
|
245
|
+
"additional_seconds": additional_seconds,
|
246
|
+
"error": str(e)
|
247
|
+
}
|
248
|
+
)
|
249
|
+
raise ProviderError(f"TTL extension failed: {e}") from e
|
250
|
+
|
251
|
+
async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
|
252
|
+
"""
|
253
|
+
List artifacts for a specific session.
|
254
|
+
|
255
|
+
Note: This is a basic implementation that would need to be enhanced
|
256
|
+
with proper indexing for production use. Currently, this method
|
257
|
+
cannot be efficiently implemented with the session provider abstraction
|
258
|
+
since we don't have a way to query by session_id patterns.
|
259
|
+
|
260
|
+
Parameters
|
261
|
+
----------
|
262
|
+
session_id : str
|
263
|
+
Session identifier to search for
|
264
|
+
limit : int, optional
|
265
|
+
Maximum number of artifacts to return
|
266
|
+
|
267
|
+
Returns
|
268
|
+
-------
|
269
|
+
list
|
270
|
+
List of metadata records for artifacts in the session
|
271
|
+
|
272
|
+
Raises
|
273
|
+
------
|
274
|
+
NotImplementedError
|
275
|
+
This method requires additional indexing infrastructure
|
276
|
+
"""
|
277
|
+
# This would require either:
|
278
|
+
# 1. A separate index of session_id -> artifact_ids
|
279
|
+
# 2. Storage provider support for prefix queries
|
280
|
+
# 3. Enhanced session provider with query capabilities
|
281
|
+
|
282
|
+
raise NotImplementedError(
|
283
|
+
"list_by_session requires additional indexing infrastructure. "
|
284
|
+
"Consider implementing session-based indexing or using storage "
|
285
|
+
"provider list operations if available."
|
286
|
+
)
|
chuk_artifacts/models.py
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/models.py
|
3
|
+
from typing import Any, Dict
|
4
|
+
from pydantic import BaseModel, Field
|
5
|
+
|
6
|
+
|
7
|
+
class ArtifactEnvelope(BaseModel):
|
8
|
+
"""
|
9
|
+
A tiny, model-friendly wrapper describing a stored artefact.
|
10
|
+
|
11
|
+
The *bytes*, *mime_type*, etc. let the UI reason about the file
|
12
|
+
without ever uploading the raw payload into the chat context.
|
13
|
+
"""
|
14
|
+
|
15
|
+
success: bool = True
|
16
|
+
artifact_id: str # opaque handle for look-ups
|
17
|
+
mime_type: str # e.g. "image/png", "text/csv"
|
18
|
+
bytes: int # size on disk
|
19
|
+
summary: str # human-readable description / alt
|
20
|
+
meta: Dict[str, Any] = Field(default_factory=dict)
|
21
|
+
|
22
|
+
class Config:
|
23
|
+
extra = "allow" # future-proof: lets tools add keys
|
@@ -0,0 +1,267 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/presigned.py
|
3
|
+
"""
|
4
|
+
Presigned URL operations: download URLs, upload URLs, and upload registration.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
import uuid, time, logging, json
|
10
|
+
from datetime import datetime
|
11
|
+
from typing import Any, Dict, Optional
|
12
|
+
|
13
|
+
from .base import BaseOperations
|
14
|
+
from .exceptions import (
|
15
|
+
ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
|
16
|
+
ProviderError, SessionError
|
17
|
+
)
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
_ANON_PREFIX = "anon"
|
22
|
+
_DEFAULT_TTL = 900
|
23
|
+
_DEFAULT_PRESIGN_EXPIRES = 3600
|
24
|
+
|
25
|
+
|
26
|
+
class PresignedURLOperations(BaseOperations):
|
27
|
+
"""Handles all presigned URL operations."""
|
28
|
+
|
29
|
+
async def presign(self, artifact_id: str, expires: int = _DEFAULT_PRESIGN_EXPIRES) -> str:
|
30
|
+
"""Generate a presigned URL for artifact download."""
|
31
|
+
self._check_closed()
|
32
|
+
|
33
|
+
start_time = time.time()
|
34
|
+
|
35
|
+
try:
|
36
|
+
record = await self._get_record(artifact_id)
|
37
|
+
|
38
|
+
storage_ctx_mgr = self.s3_factory()
|
39
|
+
async with storage_ctx_mgr as s3:
|
40
|
+
url = await s3.generate_presigned_url(
|
41
|
+
"get_object",
|
42
|
+
Params={"Bucket": self.bucket, "Key": record["key"]},
|
43
|
+
ExpiresIn=expires,
|
44
|
+
)
|
45
|
+
|
46
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
47
|
+
logger.info(
|
48
|
+
"Presigned URL generated",
|
49
|
+
extra={
|
50
|
+
"artifact_id": artifact_id,
|
51
|
+
"expires_in": expires,
|
52
|
+
"duration_ms": duration_ms,
|
53
|
+
}
|
54
|
+
)
|
55
|
+
|
56
|
+
return url
|
57
|
+
|
58
|
+
except (ArtifactNotFoundError, ArtifactExpiredError):
|
59
|
+
raise
|
60
|
+
except Exception as e:
|
61
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
62
|
+
logger.error(
|
63
|
+
"Presigned URL generation failed",
|
64
|
+
extra={
|
65
|
+
"artifact_id": artifact_id,
|
66
|
+
"error": str(e),
|
67
|
+
"duration_ms": duration_ms,
|
68
|
+
}
|
69
|
+
)
|
70
|
+
|
71
|
+
if "oauth" in str(e).lower() or "credential" in str(e).lower():
|
72
|
+
raise NotImplementedError(
|
73
|
+
"This provider cannot generate presigned URLs with the "
|
74
|
+
"current credential type (e.g. OAuth). Use HMAC creds instead."
|
75
|
+
) from e
|
76
|
+
else:
|
77
|
+
raise ProviderError(f"Presigned URL generation failed: {e}") from e
|
78
|
+
|
79
|
+
async def presign_short(self, artifact_id: str) -> str:
|
80
|
+
"""Generate a short-lived presigned URL (15 minutes)."""
|
81
|
+
return await self.presign(artifact_id, expires=900)
|
82
|
+
|
83
|
+
async def presign_medium(self, artifact_id: str) -> str:
|
84
|
+
"""Generate a medium-lived presigned URL (1 hour)."""
|
85
|
+
return await self.presign(artifact_id, expires=3600)
|
86
|
+
|
87
|
+
async def presign_long(self, artifact_id: str) -> str:
|
88
|
+
"""Generate a long-lived presigned URL (24 hours)."""
|
89
|
+
return await self.presign(artifact_id, expires=86400)
|
90
|
+
|
91
|
+
async def presign_upload(
|
92
|
+
self,
|
93
|
+
session_id: str | None = None,
|
94
|
+
filename: str | None = None,
|
95
|
+
mime_type: str = "application/octet-stream",
|
96
|
+
expires: int = _DEFAULT_PRESIGN_EXPIRES
|
97
|
+
) -> tuple[str, str]:
|
98
|
+
"""Generate a presigned URL for uploading a new artifact."""
|
99
|
+
self._check_closed()
|
100
|
+
|
101
|
+
start_time = time.time()
|
102
|
+
|
103
|
+
# Generate artifact ID and key path
|
104
|
+
artifact_id = uuid.uuid4().hex
|
105
|
+
scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
|
106
|
+
key = f"sess/{scope}/{artifact_id}"
|
107
|
+
|
108
|
+
try:
|
109
|
+
storage_ctx_mgr = self.s3_factory()
|
110
|
+
async with storage_ctx_mgr as s3:
|
111
|
+
url = await s3.generate_presigned_url(
|
112
|
+
"put_object",
|
113
|
+
Params={
|
114
|
+
"Bucket": self.bucket,
|
115
|
+
"Key": key,
|
116
|
+
"ContentType": mime_type
|
117
|
+
},
|
118
|
+
ExpiresIn=expires,
|
119
|
+
)
|
120
|
+
|
121
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
122
|
+
logger.info(
|
123
|
+
"Upload presigned URL generated",
|
124
|
+
extra={
|
125
|
+
"artifact_id": artifact_id,
|
126
|
+
"key": key,
|
127
|
+
"mime_type": mime_type,
|
128
|
+
"expires_in": expires,
|
129
|
+
"duration_ms": duration_ms,
|
130
|
+
}
|
131
|
+
)
|
132
|
+
|
133
|
+
return url, artifact_id
|
134
|
+
|
135
|
+
except Exception as e:
|
136
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
137
|
+
logger.error(
|
138
|
+
"Upload presigned URL generation failed",
|
139
|
+
extra={
|
140
|
+
"artifact_id": artifact_id,
|
141
|
+
"error": str(e),
|
142
|
+
"duration_ms": duration_ms,
|
143
|
+
}
|
144
|
+
)
|
145
|
+
|
146
|
+
if "oauth" in str(e).lower() or "credential" in str(e).lower():
|
147
|
+
raise NotImplementedError(
|
148
|
+
"This provider cannot generate presigned URLs with the "
|
149
|
+
"current credential type (e.g. OAuth). Use HMAC creds instead."
|
150
|
+
) from e
|
151
|
+
else:
|
152
|
+
raise ProviderError(f"Upload presigned URL generation failed: {e}") from e
|
153
|
+
|
154
|
+
async def register_uploaded_artifact(
|
155
|
+
self,
|
156
|
+
artifact_id: str,
|
157
|
+
*,
|
158
|
+
mime: str,
|
159
|
+
summary: str,
|
160
|
+
meta: Dict[str, Any] | None = None,
|
161
|
+
filename: str | None = None,
|
162
|
+
session_id: str | None = None,
|
163
|
+
ttl: int = _DEFAULT_TTL,
|
164
|
+
) -> bool:
|
165
|
+
"""Register metadata for an artifact uploaded via presigned URL."""
|
166
|
+
self._check_closed()
|
167
|
+
|
168
|
+
start_time = time.time()
|
169
|
+
|
170
|
+
# Reconstruct the key path
|
171
|
+
scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
|
172
|
+
key = f"sess/{scope}/{artifact_id}"
|
173
|
+
|
174
|
+
try:
|
175
|
+
# Verify the object exists and get its size
|
176
|
+
storage_ctx_mgr = self.s3_factory()
|
177
|
+
async with storage_ctx_mgr as s3:
|
178
|
+
try:
|
179
|
+
response = await s3.head_object(Bucket=self.bucket, Key=key)
|
180
|
+
file_size = response.get('ContentLength', 0)
|
181
|
+
except Exception:
|
182
|
+
logger.warning(f"Artifact {artifact_id} not found in storage")
|
183
|
+
return False
|
184
|
+
|
185
|
+
# Build metadata record
|
186
|
+
record = {
|
187
|
+
"scope": scope,
|
188
|
+
"key": key,
|
189
|
+
"mime": mime,
|
190
|
+
"summary": summary,
|
191
|
+
"meta": meta or {},
|
192
|
+
"filename": filename,
|
193
|
+
"bytes": file_size,
|
194
|
+
"sha256": None, # We don't have the hash since we didn't upload it directly
|
195
|
+
"stored_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
|
196
|
+
"ttl": ttl,
|
197
|
+
"storage_provider": self.storage_provider_name,
|
198
|
+
"session_provider": self.session_provider_name,
|
199
|
+
"uploaded_via_presigned": True, # Flag to indicate upload method
|
200
|
+
}
|
201
|
+
|
202
|
+
# Cache metadata using session provider
|
203
|
+
session_ctx_mgr = self.session_factory()
|
204
|
+
async with session_ctx_mgr as session:
|
205
|
+
await session.setex(artifact_id, ttl, json.dumps(record))
|
206
|
+
|
207
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
208
|
+
logger.info(
|
209
|
+
"Artifact metadata registered after presigned upload",
|
210
|
+
extra={
|
211
|
+
"artifact_id": artifact_id,
|
212
|
+
"bytes": file_size,
|
213
|
+
"mime": mime,
|
214
|
+
"duration_ms": duration_ms,
|
215
|
+
}
|
216
|
+
)
|
217
|
+
|
218
|
+
return True
|
219
|
+
|
220
|
+
except Exception as e:
|
221
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
222
|
+
logger.error(
|
223
|
+
"Artifact metadata registration failed",
|
224
|
+
extra={
|
225
|
+
"artifact_id": artifact_id,
|
226
|
+
"error": str(e),
|
227
|
+
"duration_ms": duration_ms,
|
228
|
+
}
|
229
|
+
)
|
230
|
+
|
231
|
+
if "session" in str(e).lower() or "redis" in str(e).lower():
|
232
|
+
raise SessionError(f"Metadata registration failed: {e}") from e
|
233
|
+
else:
|
234
|
+
raise ProviderError(f"Metadata registration failed: {e}") from e
|
235
|
+
|
236
|
+
async def presign_upload_and_register(
|
237
|
+
self,
|
238
|
+
*,
|
239
|
+
mime: str,
|
240
|
+
summary: str,
|
241
|
+
meta: Dict[str, Any] | None = None,
|
242
|
+
filename: str | None = None,
|
243
|
+
session_id: str | None = None,
|
244
|
+
ttl: int = _DEFAULT_TTL,
|
245
|
+
expires: int = _DEFAULT_PRESIGN_EXPIRES
|
246
|
+
) -> tuple[str, str]:
|
247
|
+
"""Convenience method combining presign_upload and pre-register metadata."""
|
248
|
+
# Generate presigned URL
|
249
|
+
upload_url, artifact_id = await self.presign_upload(
|
250
|
+
session_id=session_id,
|
251
|
+
filename=filename,
|
252
|
+
mime_type=mime,
|
253
|
+
expires=expires
|
254
|
+
)
|
255
|
+
|
256
|
+
# Pre-register metadata (with unknown file size)
|
257
|
+
await self.register_uploaded_artifact(
|
258
|
+
artifact_id,
|
259
|
+
mime=mime,
|
260
|
+
summary=summary,
|
261
|
+
meta=meta,
|
262
|
+
filename=filename,
|
263
|
+
session_id=session_id,
|
264
|
+
ttl=ttl
|
265
|
+
)
|
266
|
+
|
267
|
+
return upload_url, artifact_id
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/provider_factory.py
|
3
|
+
"""
|
4
|
+
Resolve the storage back-end requested via **ARTIFACT_PROVIDER**.
|
5
|
+
|
6
|
+
Built-in providers
|
7
|
+
──────────────────
|
8
|
+
• **memory** (default) - in-process, non-persistent store (unit tests, demos)
|
9
|
+
• **fs**, **filesystem** - local filesystem rooted at `$ARTIFACT_FS_ROOT`
|
10
|
+
• **s3** - plain AWS or any S3-compatible endpoint
|
11
|
+
• **ibm_cos** - IBM COS, HMAC credentials (Signature V2)
|
12
|
+
• **ibm_cos_iam** - IBM COS, IAM API-key / OAuth signature
|
13
|
+
|
14
|
+
Any other value is resolved dynamically as
|
15
|
+
`chuk_artifacts.providers.<name>.factory()`.
|
16
|
+
"""
|
17
|
+
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
import os
|
21
|
+
from importlib import import_module
|
22
|
+
from typing import Callable, AsyncContextManager
|
23
|
+
|
24
|
+
__all__ = ["factory_for_env"]
|
25
|
+
|
26
|
+
|
27
|
+
# ──────────────────────────────────────────────────────────────────
|
28
|
+
# Public factory selector
|
29
|
+
# ──────────────────────────────────────────────────────────────────
|
30
|
+
|
31
|
+
def factory_for_env() -> Callable[[], AsyncContextManager]:
|
32
|
+
"""Return a provider-specific factory based on `$ARTIFACT_PROVIDER`."""
|
33
|
+
|
34
|
+
provider = os.getenv("ARTIFACT_PROVIDER", "memory").lower().strip()
|
35
|
+
|
36
|
+
# Fast paths for the built-ins ------------------------------------------------
|
37
|
+
# Memory first as it's the default
|
38
|
+
if provider in ("memory", "mem", "inmemory"):
|
39
|
+
from .providers import memory
|
40
|
+
return memory.factory()
|
41
|
+
|
42
|
+
if provider in ("fs", "filesystem"):
|
43
|
+
from .providers import filesystem
|
44
|
+
return filesystem.factory()
|
45
|
+
|
46
|
+
if provider == "s3":
|
47
|
+
from .providers import s3
|
48
|
+
return s3.factory()
|
49
|
+
|
50
|
+
if provider == "ibm_cos":
|
51
|
+
from .providers import ibm_cos
|
52
|
+
return ibm_cos.factory() # returns the zero-arg factory callable
|
53
|
+
|
54
|
+
if provider == "ibm_cos_iam":
|
55
|
+
from .providers import ibm_cos_iam
|
56
|
+
return ibm_cos_iam.factory # note: function itself is already the factory
|
57
|
+
|
58
|
+
# ---------------------------------------------------------------------------
|
59
|
+
# Fallback: dynamic lookup – allows user-supplied provider implementations.
|
60
|
+
# ---------------------------------------------------------------------------
|
61
|
+
try:
|
62
|
+
mod = import_module(f"chuk_artifacts.providers.{provider}")
|
63
|
+
except ModuleNotFoundError as exc:
|
64
|
+
# Provide helpful error message with available providers
|
65
|
+
available = ["memory", "filesystem", "s3", "ibm_cos", "ibm_cos_iam"]
|
66
|
+
raise ValueError(
|
67
|
+
f"Unknown storage provider '{provider}'. "
|
68
|
+
f"Available providers: {', '.join(available)}"
|
69
|
+
) from exc
|
70
|
+
|
71
|
+
if not hasattr(mod, "factory"):
|
72
|
+
raise AttributeError(
|
73
|
+
f"Provider '{provider}' lacks a factory() function"
|
74
|
+
)
|
75
|
+
# For dynamic providers, call factory() to get the actual factory function
|
76
|
+
factory_func = mod.factory
|
77
|
+
if callable(factory_func):
|
78
|
+
# If it's a function that returns a factory, call it
|
79
|
+
try:
|
80
|
+
return factory_func()
|
81
|
+
except TypeError:
|
82
|
+
# If it's already the factory function, return it directly
|
83
|
+
return factory_func
|
84
|
+
return factory_func
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/providers/__init__.py
|
3
|
+
"""
|
4
|
+
Convenience re-exports so caller code can do:
|
5
|
+
|
6
|
+
from chuk_artifacts.providers import s3, ibm_cos, memory, filesystem
|
7
|
+
"""
|
8
|
+
from . import s3, ibm_cos, memory, filesystem, ibm_cos_iam
|
9
|
+
|
10
|
+
__all__ = ["s3", "ibm_cos", "memory", "filesystem", "ibm_cos_iam"]
|