chuk-artifacts 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_artifacts/__init__.py +149 -0
- chuk_artifacts/admin.py +79 -0
- chuk_artifacts/base.py +75 -0
- chuk_artifacts/batch.py +115 -0
- chuk_artifacts/config.py +338 -0
- chuk_artifacts/core.py +215 -0
- chuk_artifacts/exceptions.py +37 -0
- chuk_artifacts/metadata.py +286 -0
- chuk_artifacts/models.py +23 -0
- chuk_artifacts/presigned.py +267 -0
- chuk_artifacts/provider_factory.py +84 -0
- chuk_artifacts/providers/__init__.py +10 -0
- chuk_artifacts/providers/filesystem.py +453 -0
- chuk_artifacts/providers/ibm_cos.py +121 -0
- chuk_artifacts/providers/ibm_cos_iam.py +82 -0
- chuk_artifacts/providers/memory.py +315 -0
- chuk_artifacts/providers/s3.py +90 -0
- chuk_artifacts/store.py +383 -0
- chuk_artifacts-0.1.0.dist-info/METADATA +519 -0
- chuk_artifacts-0.1.0.dist-info/RECORD +23 -0
- chuk_artifacts-0.1.0.dist-info/WHEEL +5 -0
- chuk_artifacts-0.1.0.dist-info/licenses/LICENSE +21 -0
- chuk_artifacts-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,315 @@
|
|
1
|
+
# ===========================================================================
|
2
|
+
# chuk_artifacts/providers/memory.py
|
3
|
+
# ===========================================================================
|
4
|
+
"""In-memory, process-local artefact store (non-persistent).
|
5
|
+
|
6
|
+
Intended for unit tests and ephemeral fixtures. Fully async-friendly, no
|
7
|
+
external deps. Thread-safe with proper instance isolation.
|
8
|
+
"""
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
import asyncio, time, uuid, weakref
|
12
|
+
from contextlib import asynccontextmanager
|
13
|
+
from typing import Any, Dict, Callable, AsyncContextManager, Optional
|
14
|
+
|
15
|
+
|
16
|
+
class _MemoryS3Client:
|
17
|
+
"""
|
18
|
+
Very small subset of aioboto3's S3Client surface used by ArtifactStore.
|
19
|
+
|
20
|
+
Each instance maintains its own storage to avoid cross-contamination
|
21
|
+
between different stores or test cases.
|
22
|
+
"""
|
23
|
+
|
24
|
+
# Class-level registry for debugging/testing purposes (optional)
|
25
|
+
_instances: weakref.WeakSet = weakref.WeakSet()
|
26
|
+
|
27
|
+
def __init__(self, shared_store: Optional[Dict[str, Dict[str, Any]]] = None):
|
28
|
+
"""
|
29
|
+
Initialize memory client.
|
30
|
+
|
31
|
+
Parameters
|
32
|
+
----------
|
33
|
+
shared_store : dict, optional
|
34
|
+
If provided, use this dict as storage backend.
|
35
|
+
Useful for sharing state between multiple clients.
|
36
|
+
If None, creates isolated per-instance storage.
|
37
|
+
"""
|
38
|
+
self._store: Dict[str, Dict[str, Any]] = shared_store if shared_store is not None else {}
|
39
|
+
self._lock = asyncio.Lock()
|
40
|
+
self._closed = False
|
41
|
+
|
42
|
+
# Register for debugging
|
43
|
+
_MemoryS3Client._instances.add(self)
|
44
|
+
|
45
|
+
# ------------------------------------------------------------
|
46
|
+
async def put_object(
|
47
|
+
self,
|
48
|
+
*,
|
49
|
+
Bucket: str, # noqa: N803 – AWS naming convention
|
50
|
+
Key: str, # noqa: N803
|
51
|
+
Body: bytes, # noqa: N803
|
52
|
+
ContentType: str,
|
53
|
+
Metadata: Dict[str, str] # noqa: N803
|
54
|
+
):
|
55
|
+
"""Store object in memory with metadata."""
|
56
|
+
if self._closed:
|
57
|
+
raise RuntimeError("Client has been closed")
|
58
|
+
|
59
|
+
full_key = f"{Bucket}/{Key}"
|
60
|
+
|
61
|
+
async with self._lock:
|
62
|
+
self._store[full_key] = {
|
63
|
+
"data": Body,
|
64
|
+
"content_type": ContentType,
|
65
|
+
"metadata": Metadata,
|
66
|
+
"timestamp": time.time(),
|
67
|
+
"size": len(Body),
|
68
|
+
}
|
69
|
+
|
70
|
+
return {
|
71
|
+
"ResponseMetadata": {"HTTPStatusCode": 200},
|
72
|
+
"ETag": f'"{hash(Body) & 0x7FFFFFFF:08x}"' # Fake ETag
|
73
|
+
}
|
74
|
+
|
75
|
+
async def get_object(
|
76
|
+
self,
|
77
|
+
*,
|
78
|
+
Bucket: str, # noqa: N803
|
79
|
+
Key: str # noqa: N803
|
80
|
+
):
|
81
|
+
"""Retrieve object from memory (for testing purposes)."""
|
82
|
+
if self._closed:
|
83
|
+
raise RuntimeError("Client has been closed")
|
84
|
+
|
85
|
+
full_key = f"{Bucket}/{Key}"
|
86
|
+
|
87
|
+
async with self._lock:
|
88
|
+
if full_key not in self._store:
|
89
|
+
# Mimic AWS S3 NoSuchKey error
|
90
|
+
error = {
|
91
|
+
"Error": {
|
92
|
+
"Code": "NoSuchKey",
|
93
|
+
"Message": "The specified key does not exist.",
|
94
|
+
"Key": Key,
|
95
|
+
"BucketName": Bucket,
|
96
|
+
}
|
97
|
+
}
|
98
|
+
raise Exception(f"NoSuchKey: {error}")
|
99
|
+
|
100
|
+
obj = self._store[full_key]
|
101
|
+
return {
|
102
|
+
"Body": obj["data"],
|
103
|
+
"ContentType": obj["content_type"],
|
104
|
+
"Metadata": obj["metadata"],
|
105
|
+
"ContentLength": obj["size"],
|
106
|
+
"LastModified": obj["timestamp"],
|
107
|
+
}
|
108
|
+
|
109
|
+
async def head_object(
|
110
|
+
self,
|
111
|
+
*,
|
112
|
+
Bucket: str, # noqa: N803
|
113
|
+
Key: str # noqa: N803
|
114
|
+
):
|
115
|
+
"""Get object metadata without body."""
|
116
|
+
if self._closed:
|
117
|
+
raise RuntimeError("Client has been closed")
|
118
|
+
|
119
|
+
full_key = f"{Bucket}/{Key}"
|
120
|
+
|
121
|
+
async with self._lock:
|
122
|
+
if full_key not in self._store:
|
123
|
+
raise Exception(f"NoSuchKey: {Key}")
|
124
|
+
|
125
|
+
obj = self._store[full_key]
|
126
|
+
return {
|
127
|
+
"ContentType": obj["content_type"],
|
128
|
+
"Metadata": obj["metadata"],
|
129
|
+
"ContentLength": obj["size"],
|
130
|
+
"LastModified": obj["timestamp"],
|
131
|
+
}
|
132
|
+
|
133
|
+
async def head_bucket(self, *, Bucket: str): # noqa: N803
|
134
|
+
"""Check if bucket exists (always returns success in memory mode)."""
|
135
|
+
if self._closed:
|
136
|
+
raise RuntimeError("Client has been closed")
|
137
|
+
return {"ResponseMetadata": {"HTTPStatusCode": 200}}
|
138
|
+
|
139
|
+
async def generate_presigned_url(
|
140
|
+
self,
|
141
|
+
operation: str,
|
142
|
+
*,
|
143
|
+
Params: Dict[str, str], # noqa: N803
|
144
|
+
ExpiresIn: int # noqa: N803
|
145
|
+
) -> str:
|
146
|
+
"""
|
147
|
+
Generate fake presigned URLs for testing.
|
148
|
+
|
149
|
+
URLs use memory:// scheme and include object validation.
|
150
|
+
"""
|
151
|
+
if self._closed:
|
152
|
+
raise RuntimeError("Client has been closed")
|
153
|
+
|
154
|
+
bucket, key = Params["Bucket"], Params["Key"]
|
155
|
+
full_key = f"{bucket}/{key}"
|
156
|
+
|
157
|
+
async with self._lock:
|
158
|
+
if full_key not in self._store:
|
159
|
+
raise FileNotFoundError(f"Object not found: {full_key}")
|
160
|
+
|
161
|
+
# Include object hash for validation
|
162
|
+
obj = self._store[full_key]
|
163
|
+
obj_hash = hash(obj["data"]) & 0x7FFFFFFF
|
164
|
+
|
165
|
+
return (
|
166
|
+
f"memory://{full_key}"
|
167
|
+
f"?operation={operation}"
|
168
|
+
f"&token={uuid.uuid4().hex}"
|
169
|
+
f"&expires={int(time.time()) + ExpiresIn}"
|
170
|
+
f"&hash={obj_hash:08x}"
|
171
|
+
)
|
172
|
+
|
173
|
+
async def list_objects_v2(
|
174
|
+
self,
|
175
|
+
*,
|
176
|
+
Bucket: str, # noqa: N803
|
177
|
+
Prefix: str = "", # noqa: N803
|
178
|
+
MaxKeys: int = 1000 # noqa: N803
|
179
|
+
):
|
180
|
+
"""List objects with optional prefix filtering."""
|
181
|
+
if self._closed:
|
182
|
+
raise RuntimeError("Client has been closed")
|
183
|
+
|
184
|
+
bucket_prefix = f"{Bucket}/"
|
185
|
+
search_prefix = f"{bucket_prefix}{Prefix}"
|
186
|
+
|
187
|
+
async with self._lock:
|
188
|
+
matching_keys = [
|
189
|
+
key for key in self._store.keys()
|
190
|
+
if key.startswith(search_prefix)
|
191
|
+
]
|
192
|
+
|
193
|
+
# Limit results
|
194
|
+
matching_keys = matching_keys[:MaxKeys]
|
195
|
+
|
196
|
+
contents = []
|
197
|
+
for full_key in matching_keys:
|
198
|
+
obj = self._store[full_key]
|
199
|
+
# Remove bucket prefix to get just the key
|
200
|
+
key = full_key[len(bucket_prefix):]
|
201
|
+
contents.append({
|
202
|
+
"Key": key,
|
203
|
+
"Size": obj["size"],
|
204
|
+
"LastModified": obj["timestamp"],
|
205
|
+
"ETag": f'"{hash(obj["data"]) & 0x7FFFFFFF:08x}"'
|
206
|
+
})
|
207
|
+
|
208
|
+
return {
|
209
|
+
"Contents": contents,
|
210
|
+
"KeyCount": len(contents),
|
211
|
+
"IsTruncated": False, # We don't implement pagination
|
212
|
+
}
|
213
|
+
|
214
|
+
async def delete_object(
|
215
|
+
self,
|
216
|
+
*,
|
217
|
+
Bucket: str, # noqa: N803
|
218
|
+
Key: str # noqa: N803
|
219
|
+
):
|
220
|
+
"""Delete object from memory store."""
|
221
|
+
if self._closed:
|
222
|
+
raise RuntimeError("Client has been closed")
|
223
|
+
|
224
|
+
full_key = f"{Bucket}/{Key}"
|
225
|
+
|
226
|
+
async with self._lock:
|
227
|
+
self._store.pop(full_key, None) # Don't error if key doesn't exist
|
228
|
+
|
229
|
+
return {"ResponseMetadata": {"HTTPStatusCode": 204}}
|
230
|
+
|
231
|
+
async def close(self):
|
232
|
+
"""Clean up resources and mark client as closed."""
|
233
|
+
if not self._closed:
|
234
|
+
async with self._lock:
|
235
|
+
self._store.clear()
|
236
|
+
self._closed = True
|
237
|
+
|
238
|
+
# ------------------------------------------------------------
|
239
|
+
# Debug/testing utilities
|
240
|
+
# ------------------------------------------------------------
|
241
|
+
|
242
|
+
async def _debug_list_all_keys(self) -> list[str]:
|
243
|
+
"""List all keys in storage (for debugging)."""
|
244
|
+
async with self._lock:
|
245
|
+
return list(self._store.keys())
|
246
|
+
|
247
|
+
async def _debug_get_stats(self) -> Dict[str, Any]:
|
248
|
+
"""Get storage statistics (for debugging)."""
|
249
|
+
async with self._lock:
|
250
|
+
total_objects = len(self._store)
|
251
|
+
total_bytes = sum(obj["size"] for obj in self._store.values())
|
252
|
+
return {
|
253
|
+
"total_objects": total_objects,
|
254
|
+
"total_bytes": total_bytes,
|
255
|
+
"closed": self._closed,
|
256
|
+
}
|
257
|
+
|
258
|
+
@classmethod
|
259
|
+
def _debug_instance_count(cls) -> int:
|
260
|
+
"""Get count of active instances (for debugging)."""
|
261
|
+
return len(cls._instances)
|
262
|
+
|
263
|
+
|
264
|
+
# ---- public factory -------------------------------------------------------
|
265
|
+
|
266
|
+
def factory(shared_store: Optional[Dict[str, Dict[str, Any]]] = None) -> Callable[[], AsyncContextManager]:
|
267
|
+
"""
|
268
|
+
Return a **zero-arg** factory that yields an async-context client.
|
269
|
+
|
270
|
+
Parameters
|
271
|
+
----------
|
272
|
+
shared_store : dict, optional
|
273
|
+
If provided, all clients created by this factory will share
|
274
|
+
the same storage dict. Useful for testing scenarios where
|
275
|
+
you need multiple clients to see the same data.
|
276
|
+
"""
|
277
|
+
|
278
|
+
@asynccontextmanager
|
279
|
+
async def _ctx():
|
280
|
+
client = _MemoryS3Client(shared_store=shared_store)
|
281
|
+
try:
|
282
|
+
yield client
|
283
|
+
finally:
|
284
|
+
await client.close()
|
285
|
+
|
286
|
+
return _ctx
|
287
|
+
|
288
|
+
|
289
|
+
# ---- convenience functions for testing ------------------------------------
|
290
|
+
|
291
|
+
def create_shared_memory_factory() -> tuple[Callable[[], AsyncContextManager], Dict[str, Dict[str, Any]]]:
|
292
|
+
"""
|
293
|
+
Create a factory that uses shared storage, returning both the factory
|
294
|
+
and a reference to the storage dict for inspection.
|
295
|
+
|
296
|
+
Returns
|
297
|
+
-------
|
298
|
+
tuple
|
299
|
+
(factory_function, shared_storage_dict)
|
300
|
+
"""
|
301
|
+
shared_store: Dict[str, Dict[str, Any]] = {}
|
302
|
+
return factory(shared_store), shared_store
|
303
|
+
|
304
|
+
|
305
|
+
async def clear_all_memory_stores():
|
306
|
+
"""
|
307
|
+
Emergency cleanup function that clears all active memory stores.
|
308
|
+
Useful for test teardown.
|
309
|
+
"""
|
310
|
+
instances = list(_MemoryS3Client._instances)
|
311
|
+
for instance in instances:
|
312
|
+
try:
|
313
|
+
await instance.close()
|
314
|
+
except Exception:
|
315
|
+
pass # Best effort cleanup
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/providers/s3.py
|
3
|
+
"""
|
4
|
+
AWS S3 provider for artifact storage.
|
5
|
+
|
6
|
+
Uses aioboto3 to provide S3-compatible storage with full async support.
|
7
|
+
Supports standard AWS credentials and S3-compatible endpoints.
|
8
|
+
"""
|
9
|
+
|
10
|
+
from __future__ import annotations
|
11
|
+
import os, aioboto3
|
12
|
+
from contextlib import asynccontextmanager
|
13
|
+
from typing import Optional, Callable, AsyncContextManager
|
14
|
+
|
15
|
+
|
16
|
+
def factory(
|
17
|
+
*,
|
18
|
+
endpoint_url: Optional[str] = None,
|
19
|
+
region: str = "us-east-1",
|
20
|
+
access_key: Optional[str] = None,
|
21
|
+
secret_key: Optional[str] = None,
|
22
|
+
) -> Callable[[], AsyncContextManager]:
|
23
|
+
"""
|
24
|
+
Create an S3 client factory.
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
endpoint_url : str, optional
|
29
|
+
Custom S3 endpoint (for MinIO, DigitalOcean Spaces, etc.)
|
30
|
+
region : str, optional
|
31
|
+
AWS region (default: us-east-1)
|
32
|
+
access_key : str, optional
|
33
|
+
AWS access key ID (falls back to environment)
|
34
|
+
secret_key : str, optional
|
35
|
+
AWS secret access key (falls back to environment)
|
36
|
+
|
37
|
+
Returns
|
38
|
+
-------
|
39
|
+
Callable[[], AsyncContextManager]
|
40
|
+
Factory function that returns S3 client context managers
|
41
|
+
"""
|
42
|
+
# Get configuration from parameters or environment
|
43
|
+
endpoint_url = endpoint_url or os.getenv("S3_ENDPOINT_URL")
|
44
|
+
region = region or os.getenv("AWS_REGION", "us-east-1")
|
45
|
+
access_key = access_key or os.getenv("AWS_ACCESS_KEY_ID")
|
46
|
+
secret_key = secret_key or os.getenv("AWS_SECRET_ACCESS_KEY")
|
47
|
+
|
48
|
+
if not (access_key and secret_key):
|
49
|
+
raise RuntimeError(
|
50
|
+
"AWS credentials missing. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY "
|
51
|
+
"environment variables or pass them as parameters."
|
52
|
+
)
|
53
|
+
|
54
|
+
@asynccontextmanager
|
55
|
+
async def _ctx():
|
56
|
+
session = aioboto3.Session()
|
57
|
+
async with session.client(
|
58
|
+
"s3",
|
59
|
+
endpoint_url=endpoint_url,
|
60
|
+
region_name=region,
|
61
|
+
aws_access_key_id=access_key,
|
62
|
+
aws_secret_access_key=secret_key,
|
63
|
+
) as client:
|
64
|
+
yield client
|
65
|
+
|
66
|
+
return _ctx
|
67
|
+
|
68
|
+
|
69
|
+
# Backward compatibility - direct client function
|
70
|
+
def client(
|
71
|
+
*,
|
72
|
+
endpoint_url: Optional[str] = None,
|
73
|
+
region: Optional[str] = None,
|
74
|
+
access_key: Optional[str] = None,
|
75
|
+
secret_key: Optional[str] = None,
|
76
|
+
):
|
77
|
+
"""
|
78
|
+
Return an aioboto3 S3 client context manager.
|
79
|
+
|
80
|
+
This is a convenience function for direct usage.
|
81
|
+
The factory() function is preferred for use with ArtifactStore.
|
82
|
+
"""
|
83
|
+
session = aioboto3.Session()
|
84
|
+
return session.client(
|
85
|
+
"s3",
|
86
|
+
endpoint_url=endpoint_url or os.getenv("S3_ENDPOINT_URL"),
|
87
|
+
region_name=region or os.getenv("AWS_REGION", "us-east-1"),
|
88
|
+
aws_access_key_id=access_key or os.getenv("AWS_ACCESS_KEY_ID"),
|
89
|
+
aws_secret_access_key=secret_key or os.getenv("AWS_SECRET_ACCESS_KEY"),
|
90
|
+
)
|