chuk-artifacts 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,383 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/store.py
3
+ """
4
+ Asynchronous, object-store-backed artefact manager with proper modularization.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os, logging
10
+ from typing import Any, Dict, List, Callable, AsyncContextManager, Optional
11
+
12
+ try:
13
+ import aioboto3
14
+ except ImportError as e:
15
+ raise ImportError(f"Required dependency missing: {e}. Install with: pip install aioboto3") from e
16
+
17
+ # Auto-load .env files if python-dotenv is available
18
+ try:
19
+ from dotenv import load_dotenv
20
+ load_dotenv()
21
+ logger = logging.getLogger(__name__)
22
+ logger.debug("Loaded environment variables from .env file")
23
+ except ImportError:
24
+ logger = logging.getLogger(__name__)
25
+ logger.debug("python-dotenv not available, skipping .env file loading")
26
+
27
+ # Import exceptions
28
+ from .exceptions import ArtifactStoreError
29
+
30
+ # Configure structured logging
31
+ logger = logging.getLogger(__name__)
32
+
33
+ _DEFAULT_TTL = 900 # seconds (15 minutes for metadata)
34
+ _DEFAULT_PRESIGN_EXPIRES = 3600 # seconds (1 hour for presigned URLs)
35
+
36
+ # ─────────────────────────────────────────────────────────────────────
37
+ # Default factories
38
+ # ─────────────────────────────────────────────────────────────────────
39
+ def _default_storage_factory() -> Callable[[], AsyncContextManager]:
40
+ """Return a zero-arg callable that yields an async ctx-mgr S3 client."""
41
+ from .provider_factory import factory_for_env
42
+ return factory_for_env() # Defaults to memory provider
43
+
44
+
45
+ def _default_session_factory() -> Callable[[], AsyncContextManager]:
46
+ """Return a zero-arg callable that yields an async ctx-mgr session store."""
47
+ from chuk_sessions.provider_factory import factory_for_env
48
+ return factory_for_env() # Defaults to memory provider
49
+
50
+
51
+ # ─────────────────────────────────────────────────────────────────────
52
+ class ArtifactStore:
53
+ """
54
+ FINAL FIXED: Asynchronous artifact storage with modularized operations.
55
+
56
+ The circular reference issue has been resolved by fixing BaseOperations.
57
+ Now properly delegates operations to specialized modules:
58
+ - CoreStorageOperations: store() and retrieve()
59
+ - PresignedURLOperations: presign*() methods
60
+ - MetadataOperations: metadata(), exists(), delete()
61
+ - BatchOperations: store_batch()
62
+ - AdminOperations: validate_configuration(), get_stats()
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ *,
68
+ bucket: Optional[str] = None,
69
+ s3_factory: Optional[Callable[[], AsyncContextManager]] = None,
70
+ storage_provider: Optional[str] = None,
71
+ session_factory: Optional[Callable[[], AsyncContextManager]] = None,
72
+ session_provider: Optional[str] = None,
73
+ max_retries: int = 3,
74
+ # Backward compatibility - deprecated but still supported
75
+ redis_url: Optional[str] = None,
76
+ provider: Optional[str] = None,
77
+ ):
78
+ # Read from environment variables with memory as defaults
79
+ bucket = bucket or os.getenv("ARTIFACT_BUCKET", "mcp-bucket")
80
+ storage_provider = storage_provider or os.getenv("ARTIFACT_PROVIDER", "memory")
81
+ session_provider = session_provider or os.getenv("SESSION_PROVIDER", "memory")
82
+
83
+ # Handle backward compatibility
84
+ if redis_url is not None:
85
+ import warnings
86
+ warnings.warn(
87
+ "redis_url parameter is deprecated. Use session_provider='redis' "
88
+ "and set SESSION_REDIS_URL environment variable instead.",
89
+ DeprecationWarning,
90
+ stacklevel=2
91
+ )
92
+ os.environ["SESSION_REDIS_URL"] = redis_url
93
+ session_provider = "redis"
94
+
95
+ if provider is not None:
96
+ import warnings
97
+ warnings.warn(
98
+ "provider parameter is deprecated. Use storage_provider instead.",
99
+ DeprecationWarning,
100
+ stacklevel=2
101
+ )
102
+ storage_provider = provider
103
+
104
+ # Validate factory/provider combinations
105
+ if s3_factory and storage_provider:
106
+ raise ValueError("Specify either s3_factory or storage_provider—not both")
107
+ if session_factory and session_provider:
108
+ raise ValueError("Specify either session_factory or session_provider—not both")
109
+
110
+ # Initialize storage factory
111
+ if s3_factory:
112
+ self._s3_factory = s3_factory
113
+ elif storage_provider:
114
+ self._s3_factory = self._load_storage_provider(storage_provider)
115
+ else:
116
+ self._s3_factory = _default_storage_factory()
117
+
118
+ # Initialize session factory
119
+ if session_factory:
120
+ self._session_factory = session_factory
121
+ elif session_provider:
122
+ self._session_factory = self._load_session_provider(session_provider)
123
+ else:
124
+ self._session_factory = _default_session_factory()
125
+
126
+ self.bucket = bucket
127
+ self.max_retries = max_retries
128
+ self._storage_provider_name = storage_provider or "memory"
129
+ self._session_provider_name = session_provider or "memory"
130
+ self._closed = False
131
+
132
+ # Initialize operation modules (import here to avoid circular dependencies)
133
+ # FIXED: Now works correctly with the fixed BaseOperations
134
+ from .core import CoreStorageOperations
135
+ from .presigned import PresignedURLOperations
136
+ from .metadata import MetadataOperations
137
+ from .batch import BatchOperations
138
+ from .admin import AdminOperations
139
+
140
+ self._core = CoreStorageOperations(self)
141
+ self._presigned = PresignedURLOperations(self)
142
+ self._metadata = MetadataOperations(self)
143
+ self._batch = BatchOperations(self)
144
+ self._admin = AdminOperations(self)
145
+
146
+ logger.info(
147
+ "ArtifactStore initialized with fixed modular operations",
148
+ extra={
149
+ "bucket": bucket,
150
+ "storage_provider": self._storage_provider_name,
151
+ "session_provider": self._session_provider_name,
152
+ }
153
+ )
154
+
155
+ # ─────────────────────────────────────────────────────────────────
156
+ # Core storage operations (delegated to CoreStorageOperations)
157
+ # ─────────────────────────────────────────────────────────────────
158
+
159
+ async def store(
160
+ self,
161
+ data: bytes,
162
+ *,
163
+ mime: str,
164
+ summary: str,
165
+ meta: Dict[str, Any] | None = None,
166
+ filename: str | None = None,
167
+ session_id: str | None = None,
168
+ ttl: int = _DEFAULT_TTL,
169
+ ) -> str:
170
+ """Store artifact data with metadata."""
171
+ return await self._core.store(
172
+ data,
173
+ mime=mime,
174
+ summary=summary,
175
+ meta=meta,
176
+ filename=filename,
177
+ session_id=session_id,
178
+ ttl=ttl,
179
+ )
180
+
181
+ async def retrieve(self, artifact_id: str) -> bytes:
182
+ """Retrieve artifact data directly."""
183
+ return await self._core.retrieve(artifact_id)
184
+
185
+ # ─────────────────────────────────────────────────────────────────
186
+ # Presigned URL operations (delegated to PresignedURLOperations)
187
+ # ─────────────────────────────────────────────────────────────────
188
+
189
+ async def presign(self, artifact_id: str, expires: int = _DEFAULT_PRESIGN_EXPIRES) -> str:
190
+ """Generate a presigned URL for artifact download."""
191
+ return await self._presigned.presign(artifact_id, expires)
192
+
193
+ async def presign_short(self, artifact_id: str) -> str:
194
+ """Generate a short-lived presigned URL (15 minutes)."""
195
+ return await self._presigned.presign_short(artifact_id)
196
+
197
+ async def presign_medium(self, artifact_id: str) -> str:
198
+ """Generate a medium-lived presigned URL (1 hour)."""
199
+ return await self._presigned.presign_medium(artifact_id)
200
+
201
+ async def presign_long(self, artifact_id: str) -> str:
202
+ """Generate a long-lived presigned URL (24 hours)."""
203
+ return await self._presigned.presign_long(artifact_id)
204
+
205
+ async def presign_upload(
206
+ self,
207
+ session_id: str | None = None,
208
+ filename: str | None = None,
209
+ mime_type: str = "application/octet-stream",
210
+ expires: int = _DEFAULT_PRESIGN_EXPIRES
211
+ ) -> tuple[str, str]:
212
+ """Generate a presigned URL for uploading a new artifact."""
213
+ return await self._presigned.presign_upload(
214
+ session_id=session_id,
215
+ filename=filename,
216
+ mime_type=mime_type,
217
+ expires=expires
218
+ )
219
+
220
+ async def register_uploaded_artifact(
221
+ self,
222
+ artifact_id: str,
223
+ *,
224
+ mime: str,
225
+ summary: str,
226
+ meta: Dict[str, Any] | None = None,
227
+ filename: str | None = None,
228
+ session_id: str | None = None,
229
+ ttl: int = _DEFAULT_TTL,
230
+ ) -> bool:
231
+ """Register metadata for an artifact uploaded via presigned URL."""
232
+ return await self._presigned.register_uploaded_artifact(
233
+ artifact_id,
234
+ mime=mime,
235
+ summary=summary,
236
+ meta=meta,
237
+ filename=filename,
238
+ session_id=session_id,
239
+ ttl=ttl,
240
+ )
241
+
242
+ async def presign_upload_and_register(
243
+ self,
244
+ *,
245
+ mime: str,
246
+ summary: str,
247
+ meta: Dict[str, Any] | None = None,
248
+ filename: str | None = None,
249
+ session_id: str | None = None,
250
+ ttl: int = _DEFAULT_TTL,
251
+ expires: int = _DEFAULT_PRESIGN_EXPIRES
252
+ ) -> tuple[str, str]:
253
+ """Convenience method combining presign_upload and pre-register metadata."""
254
+ return await self._presigned.presign_upload_and_register(
255
+ mime=mime,
256
+ summary=summary,
257
+ meta=meta,
258
+ filename=filename,
259
+ session_id=session_id,
260
+ ttl=ttl,
261
+ expires=expires
262
+ )
263
+
264
+ # ─────────────────────────────────────────────────────────────────
265
+ # Metadata operations (delegated to MetadataOperations)
266
+ # ─────────────────────────────────────────────────────────────────
267
+
268
+ async def metadata(self, artifact_id: str) -> Dict[str, Any]:
269
+ """Get artifact metadata."""
270
+ return await self._metadata.metadata(artifact_id)
271
+
272
+ async def exists(self, artifact_id: str) -> bool:
273
+ """Check if artifact exists and hasn't expired."""
274
+ return await self._metadata.exists(artifact_id)
275
+
276
+ async def delete(self, artifact_id: str) -> bool:
277
+ """Delete artifact and its metadata."""
278
+ return await self._metadata.delete(artifact_id)
279
+
280
+ async def update_metadata(
281
+ self,
282
+ artifact_id: str,
283
+ *,
284
+ summary: str = None,
285
+ meta: Dict[str, Any] = None,
286
+ filename: str = None,
287
+ ttl: int = None
288
+ ) -> Dict[str, Any]:
289
+ """Update artifact metadata without changing the stored data."""
290
+ return await self._metadata.update_metadata(
291
+ artifact_id,
292
+ summary=summary,
293
+ meta=meta,
294
+ filename=filename,
295
+ ttl=ttl
296
+ )
297
+
298
+ async def extend_ttl(self, artifact_id: str, additional_seconds: int) -> Dict[str, Any]:
299
+ """Extend the TTL of an artifact's metadata."""
300
+ return await self._metadata.extend_ttl(artifact_id, additional_seconds)
301
+
302
+ async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
303
+ """List artifacts for a specific session."""
304
+ return await self._metadata.list_by_session(session_id, limit)
305
+
306
+ # ─────────────────────────────────────────────────────────────────
307
+ # Batch operations (delegated to BatchOperations)
308
+ # ─────────────────────────────────────────────────────────────────
309
+
310
+ async def store_batch(
311
+ self,
312
+ items: List[Dict[str, Any]],
313
+ session_id: str | None = None,
314
+ ttl: int = _DEFAULT_TTL,
315
+ ) -> List[str]:
316
+ """Store multiple artifacts in a batch operation."""
317
+ return await self._batch.store_batch(items, session_id, ttl)
318
+
319
+ # ─────────────────────────────────────────────────────────────────
320
+ # Administrative operations (delegated to AdminOperations)
321
+ # ─────────────────────────────────────────────────────────────────
322
+
323
+ async def validate_configuration(self) -> Dict[str, Any]:
324
+ """Validate store configuration and connectivity."""
325
+ return await self._admin.validate_configuration()
326
+
327
+ async def get_stats(self) -> Dict[str, Any]:
328
+ """Get storage statistics."""
329
+ return await self._admin.get_stats()
330
+
331
+ # ─────────────────────────────────────────────────────────────────
332
+ # Resource management
333
+ # ─────────────────────────────────────────────────────────────────
334
+
335
+ async def close(self):
336
+ """Mark store as closed."""
337
+ if not self._closed:
338
+ self._closed = True
339
+ logger.info("ArtifactStore closed")
340
+
341
+ async def __aenter__(self):
342
+ return self
343
+
344
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
345
+ await self.close()
346
+
347
+ # ─────────────────────────────────────────────────────────────────
348
+ # Helper functions (still needed for provider loading)
349
+ # ─────────────────────────────────────────────────────────────────
350
+
351
+ def _load_storage_provider(self, name: str) -> Callable[[], AsyncContextManager]:
352
+ """Load storage provider by name."""
353
+ from importlib import import_module
354
+
355
+ try:
356
+ mod = import_module(f"chuk_artifacts.providers.{name}")
357
+ except ModuleNotFoundError as exc:
358
+ available = ["memory", "filesystem", "s3", "ibm_cos", "ibm_cos_iam"]
359
+ raise ValueError(
360
+ f"Unknown storage provider '{name}'. "
361
+ f"Available providers: {', '.join(available)}"
362
+ ) from exc
363
+
364
+ if not hasattr(mod, "factory"):
365
+ raise AttributeError(f"Storage provider '{name}' lacks factory()")
366
+
367
+ logger.info(f"Loaded storage provider: {name}")
368
+ return mod.factory()
369
+
370
+ def _load_session_provider(self, name: str) -> Callable[[], AsyncContextManager]:
371
+ """Load session provider by name."""
372
+ from importlib import import_module
373
+
374
+ try:
375
+ mod = import_module(f"chuk_sessions.providers.{name}")
376
+ except ModuleNotFoundError as exc:
377
+ raise ValueError(f"Unknown session provider '{name}'") from exc
378
+
379
+ if not hasattr(mod, "factory"):
380
+ raise AttributeError(f"Session provider '{name}' lacks factory()")
381
+
382
+ logger.info(f"Loaded session provider: {name}")
383
+ return mod.factory()