chuk-artifacts 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_artifacts/__init__.py +149 -0
- chuk_artifacts/admin.py +79 -0
- chuk_artifacts/base.py +75 -0
- chuk_artifacts/batch.py +115 -0
- chuk_artifacts/config.py +338 -0
- chuk_artifacts/core.py +215 -0
- chuk_artifacts/exceptions.py +37 -0
- chuk_artifacts/metadata.py +286 -0
- chuk_artifacts/models.py +23 -0
- chuk_artifacts/presigned.py +267 -0
- chuk_artifacts/provider_factory.py +84 -0
- chuk_artifacts/providers/__init__.py +10 -0
- chuk_artifacts/providers/filesystem.py +453 -0
- chuk_artifacts/providers/ibm_cos.py +121 -0
- chuk_artifacts/providers/ibm_cos_iam.py +82 -0
- chuk_artifacts/providers/memory.py +315 -0
- chuk_artifacts/providers/s3.py +90 -0
- chuk_artifacts/store.py +383 -0
- chuk_artifacts-0.1.0.dist-info/METADATA +519 -0
- chuk_artifacts-0.1.0.dist-info/RECORD +23 -0
- chuk_artifacts-0.1.0.dist-info/WHEEL +5 -0
- chuk_artifacts-0.1.0.dist-info/licenses/LICENSE +21 -0
- chuk_artifacts-0.1.0.dist-info/top_level.txt +1 -0
chuk_artifacts/store.py
ADDED
@@ -0,0 +1,383 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/store.py
|
3
|
+
"""
|
4
|
+
Asynchronous, object-store-backed artefact manager with proper modularization.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
import os, logging
|
10
|
+
from typing import Any, Dict, List, Callable, AsyncContextManager, Optional
|
11
|
+
|
12
|
+
try:
|
13
|
+
import aioboto3
|
14
|
+
except ImportError as e:
|
15
|
+
raise ImportError(f"Required dependency missing: {e}. Install with: pip install aioboto3") from e
|
16
|
+
|
17
|
+
# Auto-load .env files if python-dotenv is available
|
18
|
+
try:
|
19
|
+
from dotenv import load_dotenv
|
20
|
+
load_dotenv()
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
logger.debug("Loaded environment variables from .env file")
|
23
|
+
except ImportError:
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
logger.debug("python-dotenv not available, skipping .env file loading")
|
26
|
+
|
27
|
+
# Import exceptions
|
28
|
+
from .exceptions import ArtifactStoreError
|
29
|
+
|
30
|
+
# Configure structured logging
|
31
|
+
logger = logging.getLogger(__name__)
|
32
|
+
|
33
|
+
_DEFAULT_TTL = 900 # seconds (15 minutes for metadata)
|
34
|
+
_DEFAULT_PRESIGN_EXPIRES = 3600 # seconds (1 hour for presigned URLs)
|
35
|
+
|
36
|
+
# ─────────────────────────────────────────────────────────────────────
|
37
|
+
# Default factories
|
38
|
+
# ─────────────────────────────────────────────────────────────────────
|
39
|
+
def _default_storage_factory() -> Callable[[], AsyncContextManager]:
|
40
|
+
"""Return a zero-arg callable that yields an async ctx-mgr S3 client."""
|
41
|
+
from .provider_factory import factory_for_env
|
42
|
+
return factory_for_env() # Defaults to memory provider
|
43
|
+
|
44
|
+
|
45
|
+
def _default_session_factory() -> Callable[[], AsyncContextManager]:
|
46
|
+
"""Return a zero-arg callable that yields an async ctx-mgr session store."""
|
47
|
+
from chuk_sessions.provider_factory import factory_for_env
|
48
|
+
return factory_for_env() # Defaults to memory provider
|
49
|
+
|
50
|
+
|
51
|
+
# ─────────────────────────────────────────────────────────────────────
|
52
|
+
class ArtifactStore:
|
53
|
+
"""
|
54
|
+
FINAL FIXED: Asynchronous artifact storage with modularized operations.
|
55
|
+
|
56
|
+
The circular reference issue has been resolved by fixing BaseOperations.
|
57
|
+
Now properly delegates operations to specialized modules:
|
58
|
+
- CoreStorageOperations: store() and retrieve()
|
59
|
+
- PresignedURLOperations: presign*() methods
|
60
|
+
- MetadataOperations: metadata(), exists(), delete()
|
61
|
+
- BatchOperations: store_batch()
|
62
|
+
- AdminOperations: validate_configuration(), get_stats()
|
63
|
+
"""
|
64
|
+
|
65
|
+
def __init__(
|
66
|
+
self,
|
67
|
+
*,
|
68
|
+
bucket: Optional[str] = None,
|
69
|
+
s3_factory: Optional[Callable[[], AsyncContextManager]] = None,
|
70
|
+
storage_provider: Optional[str] = None,
|
71
|
+
session_factory: Optional[Callable[[], AsyncContextManager]] = None,
|
72
|
+
session_provider: Optional[str] = None,
|
73
|
+
max_retries: int = 3,
|
74
|
+
# Backward compatibility - deprecated but still supported
|
75
|
+
redis_url: Optional[str] = None,
|
76
|
+
provider: Optional[str] = None,
|
77
|
+
):
|
78
|
+
# Read from environment variables with memory as defaults
|
79
|
+
bucket = bucket or os.getenv("ARTIFACT_BUCKET", "mcp-bucket")
|
80
|
+
storage_provider = storage_provider or os.getenv("ARTIFACT_PROVIDER", "memory")
|
81
|
+
session_provider = session_provider or os.getenv("SESSION_PROVIDER", "memory")
|
82
|
+
|
83
|
+
# Handle backward compatibility
|
84
|
+
if redis_url is not None:
|
85
|
+
import warnings
|
86
|
+
warnings.warn(
|
87
|
+
"redis_url parameter is deprecated. Use session_provider='redis' "
|
88
|
+
"and set SESSION_REDIS_URL environment variable instead.",
|
89
|
+
DeprecationWarning,
|
90
|
+
stacklevel=2
|
91
|
+
)
|
92
|
+
os.environ["SESSION_REDIS_URL"] = redis_url
|
93
|
+
session_provider = "redis"
|
94
|
+
|
95
|
+
if provider is not None:
|
96
|
+
import warnings
|
97
|
+
warnings.warn(
|
98
|
+
"provider parameter is deprecated. Use storage_provider instead.",
|
99
|
+
DeprecationWarning,
|
100
|
+
stacklevel=2
|
101
|
+
)
|
102
|
+
storage_provider = provider
|
103
|
+
|
104
|
+
# Validate factory/provider combinations
|
105
|
+
if s3_factory and storage_provider:
|
106
|
+
raise ValueError("Specify either s3_factory or storage_provider—not both")
|
107
|
+
if session_factory and session_provider:
|
108
|
+
raise ValueError("Specify either session_factory or session_provider—not both")
|
109
|
+
|
110
|
+
# Initialize storage factory
|
111
|
+
if s3_factory:
|
112
|
+
self._s3_factory = s3_factory
|
113
|
+
elif storage_provider:
|
114
|
+
self._s3_factory = self._load_storage_provider(storage_provider)
|
115
|
+
else:
|
116
|
+
self._s3_factory = _default_storage_factory()
|
117
|
+
|
118
|
+
# Initialize session factory
|
119
|
+
if session_factory:
|
120
|
+
self._session_factory = session_factory
|
121
|
+
elif session_provider:
|
122
|
+
self._session_factory = self._load_session_provider(session_provider)
|
123
|
+
else:
|
124
|
+
self._session_factory = _default_session_factory()
|
125
|
+
|
126
|
+
self.bucket = bucket
|
127
|
+
self.max_retries = max_retries
|
128
|
+
self._storage_provider_name = storage_provider or "memory"
|
129
|
+
self._session_provider_name = session_provider or "memory"
|
130
|
+
self._closed = False
|
131
|
+
|
132
|
+
# Initialize operation modules (import here to avoid circular dependencies)
|
133
|
+
# FIXED: Now works correctly with the fixed BaseOperations
|
134
|
+
from .core import CoreStorageOperations
|
135
|
+
from .presigned import PresignedURLOperations
|
136
|
+
from .metadata import MetadataOperations
|
137
|
+
from .batch import BatchOperations
|
138
|
+
from .admin import AdminOperations
|
139
|
+
|
140
|
+
self._core = CoreStorageOperations(self)
|
141
|
+
self._presigned = PresignedURLOperations(self)
|
142
|
+
self._metadata = MetadataOperations(self)
|
143
|
+
self._batch = BatchOperations(self)
|
144
|
+
self._admin = AdminOperations(self)
|
145
|
+
|
146
|
+
logger.info(
|
147
|
+
"ArtifactStore initialized with fixed modular operations",
|
148
|
+
extra={
|
149
|
+
"bucket": bucket,
|
150
|
+
"storage_provider": self._storage_provider_name,
|
151
|
+
"session_provider": self._session_provider_name,
|
152
|
+
}
|
153
|
+
)
|
154
|
+
|
155
|
+
# ─────────────────────────────────────────────────────────────────
|
156
|
+
# Core storage operations (delegated to CoreStorageOperations)
|
157
|
+
# ─────────────────────────────────────────────────────────────────
|
158
|
+
|
159
|
+
async def store(
|
160
|
+
self,
|
161
|
+
data: bytes,
|
162
|
+
*,
|
163
|
+
mime: str,
|
164
|
+
summary: str,
|
165
|
+
meta: Dict[str, Any] | None = None,
|
166
|
+
filename: str | None = None,
|
167
|
+
session_id: str | None = None,
|
168
|
+
ttl: int = _DEFAULT_TTL,
|
169
|
+
) -> str:
|
170
|
+
"""Store artifact data with metadata."""
|
171
|
+
return await self._core.store(
|
172
|
+
data,
|
173
|
+
mime=mime,
|
174
|
+
summary=summary,
|
175
|
+
meta=meta,
|
176
|
+
filename=filename,
|
177
|
+
session_id=session_id,
|
178
|
+
ttl=ttl,
|
179
|
+
)
|
180
|
+
|
181
|
+
async def retrieve(self, artifact_id: str) -> bytes:
|
182
|
+
"""Retrieve artifact data directly."""
|
183
|
+
return await self._core.retrieve(artifact_id)
|
184
|
+
|
185
|
+
# ─────────────────────────────────────────────────────────────────
|
186
|
+
# Presigned URL operations (delegated to PresignedURLOperations)
|
187
|
+
# ─────────────────────────────────────────────────────────────────
|
188
|
+
|
189
|
+
async def presign(self, artifact_id: str, expires: int = _DEFAULT_PRESIGN_EXPIRES) -> str:
|
190
|
+
"""Generate a presigned URL for artifact download."""
|
191
|
+
return await self._presigned.presign(artifact_id, expires)
|
192
|
+
|
193
|
+
async def presign_short(self, artifact_id: str) -> str:
|
194
|
+
"""Generate a short-lived presigned URL (15 minutes)."""
|
195
|
+
return await self._presigned.presign_short(artifact_id)
|
196
|
+
|
197
|
+
async def presign_medium(self, artifact_id: str) -> str:
|
198
|
+
"""Generate a medium-lived presigned URL (1 hour)."""
|
199
|
+
return await self._presigned.presign_medium(artifact_id)
|
200
|
+
|
201
|
+
async def presign_long(self, artifact_id: str) -> str:
|
202
|
+
"""Generate a long-lived presigned URL (24 hours)."""
|
203
|
+
return await self._presigned.presign_long(artifact_id)
|
204
|
+
|
205
|
+
async def presign_upload(
|
206
|
+
self,
|
207
|
+
session_id: str | None = None,
|
208
|
+
filename: str | None = None,
|
209
|
+
mime_type: str = "application/octet-stream",
|
210
|
+
expires: int = _DEFAULT_PRESIGN_EXPIRES
|
211
|
+
) -> tuple[str, str]:
|
212
|
+
"""Generate a presigned URL for uploading a new artifact."""
|
213
|
+
return await self._presigned.presign_upload(
|
214
|
+
session_id=session_id,
|
215
|
+
filename=filename,
|
216
|
+
mime_type=mime_type,
|
217
|
+
expires=expires
|
218
|
+
)
|
219
|
+
|
220
|
+
async def register_uploaded_artifact(
|
221
|
+
self,
|
222
|
+
artifact_id: str,
|
223
|
+
*,
|
224
|
+
mime: str,
|
225
|
+
summary: str,
|
226
|
+
meta: Dict[str, Any] | None = None,
|
227
|
+
filename: str | None = None,
|
228
|
+
session_id: str | None = None,
|
229
|
+
ttl: int = _DEFAULT_TTL,
|
230
|
+
) -> bool:
|
231
|
+
"""Register metadata for an artifact uploaded via presigned URL."""
|
232
|
+
return await self._presigned.register_uploaded_artifact(
|
233
|
+
artifact_id,
|
234
|
+
mime=mime,
|
235
|
+
summary=summary,
|
236
|
+
meta=meta,
|
237
|
+
filename=filename,
|
238
|
+
session_id=session_id,
|
239
|
+
ttl=ttl,
|
240
|
+
)
|
241
|
+
|
242
|
+
async def presign_upload_and_register(
|
243
|
+
self,
|
244
|
+
*,
|
245
|
+
mime: str,
|
246
|
+
summary: str,
|
247
|
+
meta: Dict[str, Any] | None = None,
|
248
|
+
filename: str | None = None,
|
249
|
+
session_id: str | None = None,
|
250
|
+
ttl: int = _DEFAULT_TTL,
|
251
|
+
expires: int = _DEFAULT_PRESIGN_EXPIRES
|
252
|
+
) -> tuple[str, str]:
|
253
|
+
"""Convenience method combining presign_upload and pre-register metadata."""
|
254
|
+
return await self._presigned.presign_upload_and_register(
|
255
|
+
mime=mime,
|
256
|
+
summary=summary,
|
257
|
+
meta=meta,
|
258
|
+
filename=filename,
|
259
|
+
session_id=session_id,
|
260
|
+
ttl=ttl,
|
261
|
+
expires=expires
|
262
|
+
)
|
263
|
+
|
264
|
+
# ─────────────────────────────────────────────────────────────────
|
265
|
+
# Metadata operations (delegated to MetadataOperations)
|
266
|
+
# ─────────────────────────────────────────────────────────────────
|
267
|
+
|
268
|
+
async def metadata(self, artifact_id: str) -> Dict[str, Any]:
|
269
|
+
"""Get artifact metadata."""
|
270
|
+
return await self._metadata.metadata(artifact_id)
|
271
|
+
|
272
|
+
async def exists(self, artifact_id: str) -> bool:
|
273
|
+
"""Check if artifact exists and hasn't expired."""
|
274
|
+
return await self._metadata.exists(artifact_id)
|
275
|
+
|
276
|
+
async def delete(self, artifact_id: str) -> bool:
|
277
|
+
"""Delete artifact and its metadata."""
|
278
|
+
return await self._metadata.delete(artifact_id)
|
279
|
+
|
280
|
+
async def update_metadata(
|
281
|
+
self,
|
282
|
+
artifact_id: str,
|
283
|
+
*,
|
284
|
+
summary: str = None,
|
285
|
+
meta: Dict[str, Any] = None,
|
286
|
+
filename: str = None,
|
287
|
+
ttl: int = None
|
288
|
+
) -> Dict[str, Any]:
|
289
|
+
"""Update artifact metadata without changing the stored data."""
|
290
|
+
return await self._metadata.update_metadata(
|
291
|
+
artifact_id,
|
292
|
+
summary=summary,
|
293
|
+
meta=meta,
|
294
|
+
filename=filename,
|
295
|
+
ttl=ttl
|
296
|
+
)
|
297
|
+
|
298
|
+
async def extend_ttl(self, artifact_id: str, additional_seconds: int) -> Dict[str, Any]:
|
299
|
+
"""Extend the TTL of an artifact's metadata."""
|
300
|
+
return await self._metadata.extend_ttl(artifact_id, additional_seconds)
|
301
|
+
|
302
|
+
async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
|
303
|
+
"""List artifacts for a specific session."""
|
304
|
+
return await self._metadata.list_by_session(session_id, limit)
|
305
|
+
|
306
|
+
# ─────────────────────────────────────────────────────────────────
|
307
|
+
# Batch operations (delegated to BatchOperations)
|
308
|
+
# ─────────────────────────────────────────────────────────────────
|
309
|
+
|
310
|
+
async def store_batch(
|
311
|
+
self,
|
312
|
+
items: List[Dict[str, Any]],
|
313
|
+
session_id: str | None = None,
|
314
|
+
ttl: int = _DEFAULT_TTL,
|
315
|
+
) -> List[str]:
|
316
|
+
"""Store multiple artifacts in a batch operation."""
|
317
|
+
return await self._batch.store_batch(items, session_id, ttl)
|
318
|
+
|
319
|
+
# ─────────────────────────────────────────────────────────────────
|
320
|
+
# Administrative operations (delegated to AdminOperations)
|
321
|
+
# ─────────────────────────────────────────────────────────────────
|
322
|
+
|
323
|
+
async def validate_configuration(self) -> Dict[str, Any]:
|
324
|
+
"""Validate store configuration and connectivity."""
|
325
|
+
return await self._admin.validate_configuration()
|
326
|
+
|
327
|
+
async def get_stats(self) -> Dict[str, Any]:
|
328
|
+
"""Get storage statistics."""
|
329
|
+
return await self._admin.get_stats()
|
330
|
+
|
331
|
+
# ─────────────────────────────────────────────────────────────────
|
332
|
+
# Resource management
|
333
|
+
# ─────────────────────────────────────────────────────────────────
|
334
|
+
|
335
|
+
async def close(self):
|
336
|
+
"""Mark store as closed."""
|
337
|
+
if not self._closed:
|
338
|
+
self._closed = True
|
339
|
+
logger.info("ArtifactStore closed")
|
340
|
+
|
341
|
+
async def __aenter__(self):
|
342
|
+
return self
|
343
|
+
|
344
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
345
|
+
await self.close()
|
346
|
+
|
347
|
+
# ─────────────────────────────────────────────────────────────────
|
348
|
+
# Helper functions (still needed for provider loading)
|
349
|
+
# ─────────────────────────────────────────────────────────────────
|
350
|
+
|
351
|
+
def _load_storage_provider(self, name: str) -> Callable[[], AsyncContextManager]:
|
352
|
+
"""Load storage provider by name."""
|
353
|
+
from importlib import import_module
|
354
|
+
|
355
|
+
try:
|
356
|
+
mod = import_module(f"chuk_artifacts.providers.{name}")
|
357
|
+
except ModuleNotFoundError as exc:
|
358
|
+
available = ["memory", "filesystem", "s3", "ibm_cos", "ibm_cos_iam"]
|
359
|
+
raise ValueError(
|
360
|
+
f"Unknown storage provider '{name}'. "
|
361
|
+
f"Available providers: {', '.join(available)}"
|
362
|
+
) from exc
|
363
|
+
|
364
|
+
if not hasattr(mod, "factory"):
|
365
|
+
raise AttributeError(f"Storage provider '{name}' lacks factory()")
|
366
|
+
|
367
|
+
logger.info(f"Loaded storage provider: {name}")
|
368
|
+
return mod.factory()
|
369
|
+
|
370
|
+
def _load_session_provider(self, name: str) -> Callable[[], AsyncContextManager]:
|
371
|
+
"""Load session provider by name."""
|
372
|
+
from importlib import import_module
|
373
|
+
|
374
|
+
try:
|
375
|
+
mod = import_module(f"chuk_sessions.providers.{name}")
|
376
|
+
except ModuleNotFoundError as exc:
|
377
|
+
raise ValueError(f"Unknown session provider '{name}'") from exc
|
378
|
+
|
379
|
+
if not hasattr(mod, "factory"):
|
380
|
+
raise AttributeError(f"Session provider '{name}' lacks factory()")
|
381
|
+
|
382
|
+
logger.info(f"Loaded session provider: {name}")
|
383
|
+
return mod.factory()
|