chuk-artifacts 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,315 @@
1
+ # ===========================================================================
2
+ # chuk_artifacts/providers/memory.py
3
+ # ===========================================================================
4
+ """In-memory, process-local artefact store (non-persistent).
5
+
6
+ Intended for unit tests and ephemeral fixtures. Fully async-friendly, no
7
+ external deps. Thread-safe with proper instance isolation.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import asyncio, time, uuid, weakref
12
+ from contextlib import asynccontextmanager
13
+ from typing import Any, Dict, Callable, AsyncContextManager, Optional
14
+
15
+
16
+ class _MemoryS3Client:
17
+ """
18
+ Very small subset of aioboto3's S3Client surface used by ArtifactStore.
19
+
20
+ Each instance maintains its own storage to avoid cross-contamination
21
+ between different stores or test cases.
22
+ """
23
+
24
+ # Class-level registry for debugging/testing purposes (optional)
25
+ _instances: weakref.WeakSet = weakref.WeakSet()
26
+
27
+ def __init__(self, shared_store: Optional[Dict[str, Dict[str, Any]]] = None):
28
+ """
29
+ Initialize memory client.
30
+
31
+ Parameters
32
+ ----------
33
+ shared_store : dict, optional
34
+ If provided, use this dict as storage backend.
35
+ Useful for sharing state between multiple clients.
36
+ If None, creates isolated per-instance storage.
37
+ """
38
+ self._store: Dict[str, Dict[str, Any]] = shared_store if shared_store is not None else {}
39
+ self._lock = asyncio.Lock()
40
+ self._closed = False
41
+
42
+ # Register for debugging
43
+ _MemoryS3Client._instances.add(self)
44
+
45
+ # ------------------------------------------------------------
46
+ async def put_object(
47
+ self,
48
+ *,
49
+ Bucket: str, # noqa: N803 – AWS naming convention
50
+ Key: str, # noqa: N803
51
+ Body: bytes, # noqa: N803
52
+ ContentType: str,
53
+ Metadata: Dict[str, str] # noqa: N803
54
+ ):
55
+ """Store object in memory with metadata."""
56
+ if self._closed:
57
+ raise RuntimeError("Client has been closed")
58
+
59
+ full_key = f"{Bucket}/{Key}"
60
+
61
+ async with self._lock:
62
+ self._store[full_key] = {
63
+ "data": Body,
64
+ "content_type": ContentType,
65
+ "metadata": Metadata,
66
+ "timestamp": time.time(),
67
+ "size": len(Body),
68
+ }
69
+
70
+ return {
71
+ "ResponseMetadata": {"HTTPStatusCode": 200},
72
+ "ETag": f'"{hash(Body) & 0x7FFFFFFF:08x}"' # Fake ETag
73
+ }
74
+
75
+ async def get_object(
76
+ self,
77
+ *,
78
+ Bucket: str, # noqa: N803
79
+ Key: str # noqa: N803
80
+ ):
81
+ """Retrieve object from memory (for testing purposes)."""
82
+ if self._closed:
83
+ raise RuntimeError("Client has been closed")
84
+
85
+ full_key = f"{Bucket}/{Key}"
86
+
87
+ async with self._lock:
88
+ if full_key not in self._store:
89
+ # Mimic AWS S3 NoSuchKey error
90
+ error = {
91
+ "Error": {
92
+ "Code": "NoSuchKey",
93
+ "Message": "The specified key does not exist.",
94
+ "Key": Key,
95
+ "BucketName": Bucket,
96
+ }
97
+ }
98
+ raise Exception(f"NoSuchKey: {error}")
99
+
100
+ obj = self._store[full_key]
101
+ return {
102
+ "Body": obj["data"],
103
+ "ContentType": obj["content_type"],
104
+ "Metadata": obj["metadata"],
105
+ "ContentLength": obj["size"],
106
+ "LastModified": obj["timestamp"],
107
+ }
108
+
109
+ async def head_object(
110
+ self,
111
+ *,
112
+ Bucket: str, # noqa: N803
113
+ Key: str # noqa: N803
114
+ ):
115
+ """Get object metadata without body."""
116
+ if self._closed:
117
+ raise RuntimeError("Client has been closed")
118
+
119
+ full_key = f"{Bucket}/{Key}"
120
+
121
+ async with self._lock:
122
+ if full_key not in self._store:
123
+ raise Exception(f"NoSuchKey: {Key}")
124
+
125
+ obj = self._store[full_key]
126
+ return {
127
+ "ContentType": obj["content_type"],
128
+ "Metadata": obj["metadata"],
129
+ "ContentLength": obj["size"],
130
+ "LastModified": obj["timestamp"],
131
+ }
132
+
133
+ async def head_bucket(self, *, Bucket: str): # noqa: N803
134
+ """Check if bucket exists (always returns success in memory mode)."""
135
+ if self._closed:
136
+ raise RuntimeError("Client has been closed")
137
+ return {"ResponseMetadata": {"HTTPStatusCode": 200}}
138
+
139
+ async def generate_presigned_url(
140
+ self,
141
+ operation: str,
142
+ *,
143
+ Params: Dict[str, str], # noqa: N803
144
+ ExpiresIn: int # noqa: N803
145
+ ) -> str:
146
+ """
147
+ Generate fake presigned URLs for testing.
148
+
149
+ URLs use memory:// scheme and include object validation.
150
+ """
151
+ if self._closed:
152
+ raise RuntimeError("Client has been closed")
153
+
154
+ bucket, key = Params["Bucket"], Params["Key"]
155
+ full_key = f"{bucket}/{key}"
156
+
157
+ async with self._lock:
158
+ if full_key not in self._store:
159
+ raise FileNotFoundError(f"Object not found: {full_key}")
160
+
161
+ # Include object hash for validation
162
+ obj = self._store[full_key]
163
+ obj_hash = hash(obj["data"]) & 0x7FFFFFFF
164
+
165
+ return (
166
+ f"memory://{full_key}"
167
+ f"?operation={operation}"
168
+ f"&token={uuid.uuid4().hex}"
169
+ f"&expires={int(time.time()) + ExpiresIn}"
170
+ f"&hash={obj_hash:08x}"
171
+ )
172
+
173
+ async def list_objects_v2(
174
+ self,
175
+ *,
176
+ Bucket: str, # noqa: N803
177
+ Prefix: str = "", # noqa: N803
178
+ MaxKeys: int = 1000 # noqa: N803
179
+ ):
180
+ """List objects with optional prefix filtering."""
181
+ if self._closed:
182
+ raise RuntimeError("Client has been closed")
183
+
184
+ bucket_prefix = f"{Bucket}/"
185
+ search_prefix = f"{bucket_prefix}{Prefix}"
186
+
187
+ async with self._lock:
188
+ matching_keys = [
189
+ key for key in self._store.keys()
190
+ if key.startswith(search_prefix)
191
+ ]
192
+
193
+ # Limit results
194
+ matching_keys = matching_keys[:MaxKeys]
195
+
196
+ contents = []
197
+ for full_key in matching_keys:
198
+ obj = self._store[full_key]
199
+ # Remove bucket prefix to get just the key
200
+ key = full_key[len(bucket_prefix):]
201
+ contents.append({
202
+ "Key": key,
203
+ "Size": obj["size"],
204
+ "LastModified": obj["timestamp"],
205
+ "ETag": f'"{hash(obj["data"]) & 0x7FFFFFFF:08x}"'
206
+ })
207
+
208
+ return {
209
+ "Contents": contents,
210
+ "KeyCount": len(contents),
211
+ "IsTruncated": False, # We don't implement pagination
212
+ }
213
+
214
+ async def delete_object(
215
+ self,
216
+ *,
217
+ Bucket: str, # noqa: N803
218
+ Key: str # noqa: N803
219
+ ):
220
+ """Delete object from memory store."""
221
+ if self._closed:
222
+ raise RuntimeError("Client has been closed")
223
+
224
+ full_key = f"{Bucket}/{Key}"
225
+
226
+ async with self._lock:
227
+ self._store.pop(full_key, None) # Don't error if key doesn't exist
228
+
229
+ return {"ResponseMetadata": {"HTTPStatusCode": 204}}
230
+
231
+ async def close(self):
232
+ """Clean up resources and mark client as closed."""
233
+ if not self._closed:
234
+ async with self._lock:
235
+ self._store.clear()
236
+ self._closed = True
237
+
238
+ # ------------------------------------------------------------
239
+ # Debug/testing utilities
240
+ # ------------------------------------------------------------
241
+
242
+ async def _debug_list_all_keys(self) -> list[str]:
243
+ """List all keys in storage (for debugging)."""
244
+ async with self._lock:
245
+ return list(self._store.keys())
246
+
247
+ async def _debug_get_stats(self) -> Dict[str, Any]:
248
+ """Get storage statistics (for debugging)."""
249
+ async with self._lock:
250
+ total_objects = len(self._store)
251
+ total_bytes = sum(obj["size"] for obj in self._store.values())
252
+ return {
253
+ "total_objects": total_objects,
254
+ "total_bytes": total_bytes,
255
+ "closed": self._closed,
256
+ }
257
+
258
+ @classmethod
259
+ def _debug_instance_count(cls) -> int:
260
+ """Get count of active instances (for debugging)."""
261
+ return len(cls._instances)
262
+
263
+
264
+ # ---- public factory -------------------------------------------------------
265
+
266
+ def factory(shared_store: Optional[Dict[str, Dict[str, Any]]] = None) -> Callable[[], AsyncContextManager]:
267
+ """
268
+ Return a **zero-arg** factory that yields an async-context client.
269
+
270
+ Parameters
271
+ ----------
272
+ shared_store : dict, optional
273
+ If provided, all clients created by this factory will share
274
+ the same storage dict. Useful for testing scenarios where
275
+ you need multiple clients to see the same data.
276
+ """
277
+
278
+ @asynccontextmanager
279
+ async def _ctx():
280
+ client = _MemoryS3Client(shared_store=shared_store)
281
+ try:
282
+ yield client
283
+ finally:
284
+ await client.close()
285
+
286
+ return _ctx
287
+
288
+
289
+ # ---- convenience functions for testing ------------------------------------
290
+
291
+ def create_shared_memory_factory() -> tuple[Callable[[], AsyncContextManager], Dict[str, Dict[str, Any]]]:
292
+ """
293
+ Create a factory that uses shared storage, returning both the factory
294
+ and a reference to the storage dict for inspection.
295
+
296
+ Returns
297
+ -------
298
+ tuple
299
+ (factory_function, shared_storage_dict)
300
+ """
301
+ shared_store: Dict[str, Dict[str, Any]] = {}
302
+ return factory(shared_store), shared_store
303
+
304
+
305
+ async def clear_all_memory_stores():
306
+ """
307
+ Emergency cleanup function that clears all active memory stores.
308
+ Useful for test teardown.
309
+ """
310
+ instances = list(_MemoryS3Client._instances)
311
+ for instance in instances:
312
+ try:
313
+ await instance.close()
314
+ except Exception:
315
+ pass # Best effort cleanup
@@ -0,0 +1,90 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/providers/s3.py
3
+ """
4
+ AWS S3 provider for artifact storage.
5
+
6
+ Uses aioboto3 to provide S3-compatible storage with full async support.
7
+ Supports standard AWS credentials and S3-compatible endpoints.
8
+ """
9
+
10
+ from __future__ import annotations
11
+ import os, aioboto3
12
+ from contextlib import asynccontextmanager
13
+ from typing import Optional, Callable, AsyncContextManager
14
+
15
+
16
+ def factory(
17
+ *,
18
+ endpoint_url: Optional[str] = None,
19
+ region: str = "us-east-1",
20
+ access_key: Optional[str] = None,
21
+ secret_key: Optional[str] = None,
22
+ ) -> Callable[[], AsyncContextManager]:
23
+ """
24
+ Create an S3 client factory.
25
+
26
+ Parameters
27
+ ----------
28
+ endpoint_url : str, optional
29
+ Custom S3 endpoint (for MinIO, DigitalOcean Spaces, etc.)
30
+ region : str, optional
31
+ AWS region (default: us-east-1)
32
+ access_key : str, optional
33
+ AWS access key ID (falls back to environment)
34
+ secret_key : str, optional
35
+ AWS secret access key (falls back to environment)
36
+
37
+ Returns
38
+ -------
39
+ Callable[[], AsyncContextManager]
40
+ Factory function that returns S3 client context managers
41
+ """
42
+ # Get configuration from parameters or environment
43
+ endpoint_url = endpoint_url or os.getenv("S3_ENDPOINT_URL")
44
+ region = region or os.getenv("AWS_REGION", "us-east-1")
45
+ access_key = access_key or os.getenv("AWS_ACCESS_KEY_ID")
46
+ secret_key = secret_key or os.getenv("AWS_SECRET_ACCESS_KEY")
47
+
48
+ if not (access_key and secret_key):
49
+ raise RuntimeError(
50
+ "AWS credentials missing. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY "
51
+ "environment variables or pass them as parameters."
52
+ )
53
+
54
+ @asynccontextmanager
55
+ async def _ctx():
56
+ session = aioboto3.Session()
57
+ async with session.client(
58
+ "s3",
59
+ endpoint_url=endpoint_url,
60
+ region_name=region,
61
+ aws_access_key_id=access_key,
62
+ aws_secret_access_key=secret_key,
63
+ ) as client:
64
+ yield client
65
+
66
+ return _ctx
67
+
68
+
69
+ # Backward compatibility - direct client function
70
+ def client(
71
+ *,
72
+ endpoint_url: Optional[str] = None,
73
+ region: Optional[str] = None,
74
+ access_key: Optional[str] = None,
75
+ secret_key: Optional[str] = None,
76
+ ):
77
+ """
78
+ Return an aioboto3 S3 client context manager.
79
+
80
+ This is a convenience function for direct usage.
81
+ The factory() function is preferred for use with ArtifactStore.
82
+ """
83
+ session = aioboto3.Session()
84
+ return session.client(
85
+ "s3",
86
+ endpoint_url=endpoint_url or os.getenv("S3_ENDPOINT_URL"),
87
+ region_name=region or os.getenv("AWS_REGION", "us-east-1"),
88
+ aws_access_key_id=access_key or os.getenv("AWS_ACCESS_KEY_ID"),
89
+ aws_secret_access_key=secret_key or os.getenv("AWS_SECRET_ACCESS_KEY"),
90
+ )