chuk-artifacts 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,453 @@
1
+ # ===========================================================================
2
+ # chuk_artifacts/providers/filesystem.py
3
+ # ===========================================================================
4
+ """Local-filesystem artefact store.
5
+
6
+ Objects are written relative to $ARTIFACT_FS_ROOT (default ./artifacts).
7
+ Presigned URLs use the *file://* scheme so callers can still download.
8
+ Includes comprehensive S3-compatible methods and proper error handling.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import os, json, asyncio, time, uuid, stat, hashlib
13
+ from pathlib import Path
14
+ from contextlib import asynccontextmanager
15
+ from typing import Any, Dict, Callable, AsyncContextManager, List, Optional
16
+ from datetime import datetime
17
+
18
+ _ROOT = Path(os.getenv("ARTIFACT_FS_ROOT", "./artifacts")).expanduser()
19
+
20
+
21
+ class _FilesystemClient:
22
+ """Mimics the S3 surface ArtifactStore depends on with filesystem backend."""
23
+
24
+ def __init__(self, root: Path = _ROOT):
25
+ self._root = Path(root).expanduser().resolve()
26
+ self._closed = False
27
+ self._lock = asyncio.Lock()
28
+
29
+ # Ensure root directory exists
30
+ self._root.mkdir(parents=True, exist_ok=True)
31
+
32
+ def _get_object_path(self, bucket: str, key: str) -> Path:
33
+ """Get the filesystem path for an object, incorporating bucket as subdirectory."""
34
+ # Use bucket as a subdirectory to maintain some S3-like organization
35
+ return self._root / bucket / key
36
+
37
+ def _get_metadata_path(self, object_path: Path) -> Path:
38
+ """Get the metadata file path for an object."""
39
+ return object_path.with_suffix(object_path.suffix + ".meta.json")
40
+
41
+ async def _ensure_parent_dir(self, path: Path):
42
+ """Ensure parent directory exists."""
43
+ await asyncio.to_thread(path.parent.mkdir, parents=True, exist_ok=True)
44
+
45
+ async def _write_metadata(self, meta_path: Path, content_type: str, metadata: Dict[str, str], size: int, etag: str):
46
+ """Write metadata file."""
47
+ meta_data = {
48
+ "content_type": content_type,
49
+ "metadata": metadata,
50
+ "size": size,
51
+ "etag": etag,
52
+ "last_modified": datetime.utcnow().isoformat() + "Z",
53
+ "created_at": datetime.utcnow().isoformat() + "Z"
54
+ }
55
+ meta_json = json.dumps(meta_data, indent=2)
56
+ await asyncio.to_thread(meta_path.write_text, meta_json, encoding='utf-8')
57
+
58
+ async def _read_metadata(self, meta_path: Path) -> Dict[str, Any]:
59
+ """Read metadata file."""
60
+ try:
61
+ content = await asyncio.to_thread(meta_path.read_text, encoding='utf-8')
62
+ return json.loads(content)
63
+ except (FileNotFoundError, json.JSONDecodeError):
64
+ return {}
65
+
66
+ # ------------------------------------------------------------
67
+ # Core S3-compatible methods
68
+ # ------------------------------------------------------------
69
+
70
+ async def put_object(
71
+ self,
72
+ *,
73
+ Bucket: str, # noqa: N803
74
+ Key: str, # noqa: N803
75
+ Body: bytes, # noqa: N803
76
+ ContentType: str,
77
+ Metadata: Dict[str, str] # noqa: N803
78
+ ):
79
+ """Store object in filesystem with metadata."""
80
+ if self._closed:
81
+ raise RuntimeError("Client has been closed")
82
+
83
+ object_path = self._get_object_path(Bucket, Key)
84
+ meta_path = self._get_metadata_path(object_path)
85
+
86
+ # Generate ETag (MD5 hash like S3)
87
+ etag = hashlib.md5(Body).hexdigest()
88
+
89
+ async with self._lock:
90
+ await self._ensure_parent_dir(object_path)
91
+ await asyncio.to_thread(object_path.write_bytes, Body)
92
+ await self._write_metadata(meta_path, ContentType, Metadata, len(Body), etag)
93
+
94
+ return {
95
+ "ResponseMetadata": {"HTTPStatusCode": 200},
96
+ "ETag": f'"{etag}"'
97
+ }
98
+
99
+ async def get_object(
100
+ self,
101
+ *,
102
+ Bucket: str, # noqa: N803
103
+ Key: str # noqa: N803
104
+ ):
105
+ """Retrieve object from filesystem."""
106
+ if self._closed:
107
+ raise RuntimeError("Client has been closed")
108
+
109
+ object_path = self._get_object_path(Bucket, Key)
110
+ meta_path = self._get_metadata_path(object_path)
111
+
112
+ if not object_path.exists():
113
+ raise FileNotFoundError(f"NoSuchKey: {Key}")
114
+
115
+ async with self._lock:
116
+ body = await asyncio.to_thread(object_path.read_bytes)
117
+ metadata = await self._read_metadata(meta_path)
118
+
119
+ # Get file stats
120
+ stat_info = await asyncio.to_thread(object_path.stat)
121
+
122
+ return {
123
+ "Body": body,
124
+ "ContentType": metadata.get("content_type", "application/octet-stream"),
125
+ "Metadata": metadata.get("metadata", {}),
126
+ "ContentLength": len(body),
127
+ "LastModified": datetime.fromtimestamp(stat_info.st_mtime),
128
+ "ETag": metadata.get("etag", ""),
129
+ }
130
+
131
+ async def head_object(
132
+ self,
133
+ *,
134
+ Bucket: str, # noqa: N803
135
+ Key: str # noqa: N803
136
+ ):
137
+ """Get object metadata without body."""
138
+ if self._closed:
139
+ raise RuntimeError("Client has been closed")
140
+
141
+ object_path = self._get_object_path(Bucket, Key)
142
+ meta_path = self._get_metadata_path(object_path)
143
+
144
+ if not object_path.exists():
145
+ raise FileNotFoundError(f"NoSuchKey: {Key}")
146
+
147
+ async with self._lock:
148
+ metadata = await self._read_metadata(meta_path)
149
+ stat_info = await asyncio.to_thread(object_path.stat)
150
+
151
+ return {
152
+ "ContentType": metadata.get("content_type", "application/octet-stream"),
153
+ "Metadata": metadata.get("metadata", {}),
154
+ "ContentLength": stat_info.st_size,
155
+ "LastModified": datetime.fromtimestamp(stat_info.st_mtime),
156
+ "ETag": metadata.get("etag", ""),
157
+ }
158
+
159
+ async def head_bucket(self, *, Bucket: str): # noqa: N803
160
+ """Check if bucket (directory) exists."""
161
+ if self._closed:
162
+ raise RuntimeError("Client has been closed")
163
+
164
+ bucket_path = self._root / Bucket
165
+ if not bucket_path.exists():
166
+ raise FileNotFoundError(f"NoSuchBucket: {Bucket}")
167
+
168
+ return {"ResponseMetadata": {"HTTPStatusCode": 200}}
169
+
170
+ async def generate_presigned_url(
171
+ self,
172
+ operation: str,
173
+ *,
174
+ Params: Dict[str, str], # noqa: N803
175
+ ExpiresIn: int # noqa: N803
176
+ ) -> str:
177
+ """
178
+ Generate file:// URLs for filesystem objects.
179
+
180
+ Note: file:// URLs don't have real expiration, but we include
181
+ expiry info for compatibility.
182
+ """
183
+ if self._closed:
184
+ raise RuntimeError("Client has been closed")
185
+
186
+ bucket, key = Params["Bucket"], Params["Key"]
187
+ object_path = self._get_object_path(bucket, key)
188
+
189
+ if not object_path.exists():
190
+ raise FileNotFoundError(f"Object not found: {bucket}/{key}")
191
+
192
+ # Create file:// URL with query parameters for compatibility
193
+ return (
194
+ f"file://{object_path.as_posix()}"
195
+ f"?operation={operation}"
196
+ f"&token={uuid.uuid4().hex}"
197
+ f"&expires={int(time.time()) + ExpiresIn}"
198
+ )
199
+
200
+ async def list_objects_v2(
201
+ self,
202
+ *,
203
+ Bucket: str, # noqa: N803
204
+ Prefix: str = "", # noqa: N803
205
+ MaxKeys: int = 1000, # noqa: N803
206
+ ContinuationToken: Optional[str] = None # noqa: N803
207
+ ):
208
+ """List objects in bucket with optional prefix filtering."""
209
+ if self._closed:
210
+ raise RuntimeError("Client has been closed")
211
+
212
+ bucket_path = self._root / Bucket
213
+ if not bucket_path.exists():
214
+ return {
215
+ "Contents": [],
216
+ "KeyCount": 0,
217
+ "IsTruncated": False,
218
+ }
219
+
220
+ contents = []
221
+ total_found = 0
222
+
223
+ async with self._lock:
224
+ # Walk the directory tree
225
+ for item in bucket_path.rglob("*"):
226
+ if item.is_file() and not item.name.endswith(".meta.json"):
227
+ # Get relative path from bucket root as the key
228
+ relative_path = item.relative_to(bucket_path)
229
+ key = relative_path.as_posix()
230
+
231
+ # Apply prefix filter
232
+ if not key.startswith(Prefix):
233
+ continue
234
+
235
+ total_found += 1
236
+
237
+ # Apply pagination
238
+ if len(contents) >= MaxKeys:
239
+ break
240
+
241
+ # Get file stats and metadata
242
+ stat_info = await asyncio.to_thread(item.stat)
243
+ meta_path = self._get_metadata_path(item)
244
+ metadata = await self._read_metadata(meta_path)
245
+
246
+ contents.append({
247
+ "Key": key,
248
+ "Size": stat_info.st_size,
249
+ "LastModified": datetime.fromtimestamp(stat_info.st_mtime),
250
+ "ETag": f'"{metadata.get("etag", "")}"',
251
+ "StorageClass": "STANDARD"
252
+ })
253
+
254
+ return {
255
+ "Contents": contents,
256
+ "KeyCount": len(contents),
257
+ "IsTruncated": total_found > MaxKeys,
258
+ "MaxKeys": MaxKeys,
259
+ "Prefix": Prefix,
260
+ }
261
+
262
+ async def delete_object(
263
+ self,
264
+ *,
265
+ Bucket: str, # noqa: N803
266
+ Key: str # noqa: N803
267
+ ):
268
+ """Delete object and its metadata from filesystem."""
269
+ if self._closed:
270
+ raise RuntimeError("Client has been closed")
271
+
272
+ object_path = self._get_object_path(Bucket, Key)
273
+ meta_path = self._get_metadata_path(object_path)
274
+
275
+ async with self._lock:
276
+ # Remove object file
277
+ try:
278
+ await asyncio.to_thread(object_path.unlink)
279
+ except FileNotFoundError:
280
+ pass # S3 doesn't error if object doesn't exist
281
+
282
+ # Remove metadata file
283
+ try:
284
+ await asyncio.to_thread(meta_path.unlink)
285
+ except FileNotFoundError:
286
+ pass
287
+
288
+ # Clean up empty directories
289
+ try:
290
+ await asyncio.to_thread(object_path.parent.rmdir)
291
+ except OSError:
292
+ pass # Directory not empty or other issue
293
+
294
+ return {"ResponseMetadata": {"HTTPStatusCode": 204}}
295
+
296
+ async def delete_objects(
297
+ self,
298
+ *,
299
+ Bucket: str, # noqa: N803
300
+ Delete: Dict[str, List[Dict[str, str]]] # noqa: N803
301
+ ):
302
+ """Delete multiple objects (batch operation)."""
303
+ if self._closed:
304
+ raise RuntimeError("Client has been closed")
305
+
306
+ deleted = []
307
+ errors = []
308
+
309
+ for obj in Delete.get("Objects", []):
310
+ key = obj["Key"]
311
+ try:
312
+ await self.delete_object(Bucket=Bucket, Key=key)
313
+ deleted.append({"Key": key})
314
+ except Exception as e:
315
+ errors.append({
316
+ "Key": key,
317
+ "Code": "InternalError",
318
+ "Message": str(e)
319
+ })
320
+
321
+ return {
322
+ "Deleted": deleted,
323
+ "Errors": errors,
324
+ }
325
+
326
+ async def copy_object(
327
+ self,
328
+ *,
329
+ Bucket: str, # noqa: N803
330
+ Key: str, # noqa: N803
331
+ CopySource: Dict[str, str] # noqa: N803
332
+ ):
333
+ """Copy object within filesystem."""
334
+ if self._closed:
335
+ raise RuntimeError("Client has been closed")
336
+
337
+ source_bucket = CopySource["Bucket"]
338
+ source_key = CopySource["Key"]
339
+
340
+ # Read source object
341
+ source_obj = await self.get_object(Bucket=source_bucket, Key=source_key)
342
+
343
+ # Write to destination
344
+ result = await self.put_object(
345
+ Bucket=Bucket,
346
+ Key=Key,
347
+ Body=source_obj["Body"],
348
+ ContentType=source_obj["ContentType"],
349
+ Metadata=source_obj["Metadata"]
350
+ )
351
+
352
+ return {
353
+ "CopyObjectResult": {
354
+ "ETag": result["ETag"],
355
+ "LastModified": datetime.utcnow()
356
+ }
357
+ }
358
+
359
+ async def close(self):
360
+ """Mark client as closed."""
361
+ self._closed = True
362
+
363
+ # ------------------------------------------------------------
364
+ # Utility/debugging methods
365
+ # ------------------------------------------------------------
366
+
367
+ async def _debug_get_stats(self) -> Dict[str, Any]:
368
+ """Get storage statistics."""
369
+ if self._closed:
370
+ return {"error": "Client closed"}
371
+
372
+ total_objects = 0
373
+ total_bytes = 0
374
+
375
+ for item in self._root.rglob("*"):
376
+ if item.is_file() and not item.name.endswith(".meta.json"):
377
+ total_objects += 1
378
+ total_bytes += item.stat().st_size
379
+
380
+ return {
381
+ "root_path": str(self._root),
382
+ "total_objects": total_objects,
383
+ "total_bytes": total_bytes,
384
+ "closed": self._closed,
385
+ }
386
+
387
+ async def _debug_cleanup_empty_dirs(self):
388
+ """Remove empty directories (cleanup utility)."""
389
+ async with self._lock:
390
+ for item in reversed(sorted(self._root.rglob("*"))):
391
+ if item.is_dir():
392
+ try:
393
+ await asyncio.to_thread(item.rmdir)
394
+ except OSError:
395
+ pass # Directory not empty
396
+
397
+
398
+ # ---- public factory -------------------------------------------------------
399
+
400
+ def factory(root: Optional[Path] = None) -> Callable[[], AsyncContextManager]:
401
+ """
402
+ Create a filesystem client factory.
403
+
404
+ Parameters
405
+ ----------
406
+ root : Path, optional
407
+ Root directory for storage. If None, uses $ARTIFACT_FS_ROOT or ./artifacts
408
+ """
409
+ if root is None:
410
+ root = _ROOT
411
+ else:
412
+ root = Path(root).expanduser().resolve()
413
+ root.mkdir(parents=True, exist_ok=True)
414
+
415
+ @asynccontextmanager
416
+ async def _ctx():
417
+ client = _FilesystemClient(root)
418
+ try:
419
+ yield client
420
+ finally:
421
+ await client.close()
422
+
423
+ return _ctx
424
+
425
+
426
+ # ---- convenience functions ------------------------------------------------
427
+
428
+ def create_temp_filesystem_factory() -> tuple[Callable[[], AsyncContextManager], Path]:
429
+ """
430
+ Create a factory using a temporary directory.
431
+
432
+ Returns
433
+ -------
434
+ tuple
435
+ (factory_function, temp_directory_path)
436
+ """
437
+ import tempfile
438
+ temp_dir = Path(tempfile.mkdtemp(prefix="artifacts_"))
439
+ return factory(temp_dir), temp_dir
440
+
441
+
442
+ async def cleanup_filesystem_store(root: Path):
443
+ """
444
+ Clean up a filesystem store directory.
445
+
446
+ Parameters
447
+ ----------
448
+ root : Path
449
+ Directory to clean up
450
+ """
451
+ import shutil
452
+ if root.exists():
453
+ await asyncio.to_thread(shutil.rmtree, root)
@@ -0,0 +1,121 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/providers/ibm_cos.py
3
+ """
4
+ Factory for an aioboto3 client wired for IBM Cloud Object Storage (COS).
5
+ Supports both IAM and HMAC auth.
6
+
7
+ aioboto3 ≥ 12 returns an *async-context* client, so we expose
8
+ • factory() - preferred, used by provider_factory
9
+ • client() - retained for backward-compat tests/manual use
10
+ """
11
+
12
+ from __future__ import annotations
13
+ import os, aioboto3
14
+ from aioboto3.session import AioConfig # ✅ CRITICAL: Import AioConfig
15
+ from typing import Optional, Callable, AsyncContextManager
16
+
17
+ # ──────────────────────────────────────────────────────────────────
18
+ # internal helper that actually builds the client
19
+ # ──────────────────────────────────────────────────────────────────
20
+ def _build_client(
21
+ *,
22
+ endpoint_url: str,
23
+ region: str,
24
+ ibm_api_key: Optional[str],
25
+ ibm_instance_crn: Optional[str],
26
+ access_key: Optional[str],
27
+ secret_key: Optional[str],
28
+ ):
29
+ session = aioboto3.Session()
30
+
31
+ # IAM auth (preferred)
32
+ if not access_key and not secret_key:
33
+ return session.client(
34
+ "s3",
35
+ endpoint_url=endpoint_url,
36
+ region_name=region,
37
+ ibm_api_key_id=ibm_api_key,
38
+ ibm_service_instance_id=ibm_instance_crn,
39
+ # ✅ Use SigV2 for IBM COS IAM + path style
40
+ config=AioConfig(
41
+ signature_version='s3',
42
+ s3={'addressing_style': 'path'}
43
+ )
44
+ )
45
+
46
+ # HMAC auth
47
+ return session.client(
48
+ "s3",
49
+ endpoint_url=endpoint_url,
50
+ region_name=region,
51
+ aws_access_key_id=access_key,
52
+ aws_secret_access_key=secret_key,
53
+ # ✅ Use SigV2 for IBM COS HMAC + path style
54
+ config=AioConfig(
55
+ signature_version='s3',
56
+ s3={'addressing_style': 'path'}
57
+ )
58
+ )
59
+
60
+
61
+ # ──────────────────────────────────────────────────────────────────
62
+ # public factory (provider_factory expects this)
63
+ # ──────────────────────────────────────────────────────────────────
64
+ def factory(
65
+ *,
66
+ endpoint_url: Optional[str] = None,
67
+ region: str = "us-south",
68
+ access_key: Optional[str] = None,
69
+ secret_key: Optional[str] = None,
70
+ ):
71
+ """
72
+ Return an async-context S3 client for IBM COS (HMAC only).
73
+ """
74
+ endpoint_url = endpoint_url or os.getenv(
75
+ "IBM_COS_ENDPOINT",
76
+ "https://s3.us-south.cloud-object-storage.appdomain.cloud",
77
+ )
78
+ access_key = access_key or os.getenv("AWS_ACCESS_KEY_ID")
79
+ secret_key = secret_key or os.getenv("AWS_SECRET_ACCESS_KEY")
80
+
81
+ # ✅ Extract region from endpoint to ensure they match
82
+ if endpoint_url:
83
+ if "us-south" in endpoint_url:
84
+ region = "us-south"
85
+ elif "us-east" in endpoint_url:
86
+ region = "us-east-1"
87
+ elif "eu-gb" in endpoint_url:
88
+ region = "eu-gb"
89
+ elif "eu-de" in endpoint_url:
90
+ region = "eu-de"
91
+
92
+ # Check AWS_REGION environment variable as override
93
+ env_region = os.getenv('AWS_REGION')
94
+ if env_region:
95
+ region = env_region
96
+
97
+ if not (access_key and secret_key):
98
+ raise RuntimeError(
99
+ "HMAC credentials missing. "
100
+ "Set AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY "
101
+ "or generate an HMAC key for your COS instance."
102
+ )
103
+
104
+ def _make() -> AsyncContextManager:
105
+ session = aioboto3.Session()
106
+ return session.client(
107
+ "s3",
108
+ endpoint_url=endpoint_url,
109
+ region_name=region,
110
+ aws_access_key_id=access_key,
111
+ aws_secret_access_key=secret_key,
112
+ # ✅ CRITICAL: IBM COS requires Signature Version 2 for writes AND presigned URLs
113
+ config=AioConfig(
114
+ signature_version='s3',
115
+ s3={
116
+ 'addressing_style': 'path' # Also ensure path-style addressing
117
+ }
118
+ )
119
+ )
120
+
121
+ return _make
@@ -0,0 +1,82 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/providers/ibm_cos_iam.py
3
+ """
4
+ Async wrapper for IBM Cloud Object Storage using IAM API-key (oauth).
5
+
6
+ ✓ Fits the aioboto3-style interface that ArtifactStore expects:
7
+ • async put_object(...)
8
+ • async generate_presigned_url(...)
9
+ ✓ No HMAC keys required - just IBM_COS_APIKEY + IBM_COS_INSTANCE_CRN.
10
+
11
+ Env vars
12
+ --------
13
+ IBM_COS_APIKEY - value of "apikey" field
14
+ IBM_COS_INSTANCE_CRN - value of "resource_instance_id"
15
+ IBM_COS_ENDPOINT - regional data endpoint, e.g.
16
+ https://s3.us-south.cloud-object-storage.appdomain.cloud
17
+ """
18
+
19
+ from __future__ import annotations
20
+ import os, asyncio
21
+ from contextlib import asynccontextmanager
22
+ from typing import AsyncContextManager, Any, Dict, Callable
23
+
24
+ import ibm_boto3
25
+ from ibm_botocore.client import Config
26
+
27
+
28
+ # ─────────────────────────────────────────────────────────────────────
29
+ def _sync_client():
30
+ endpoint = os.getenv(
31
+ "IBM_COS_ENDPOINT",
32
+ "https://s3.us-south.cloud-object-storage.appdomain.cloud",
33
+ )
34
+ api_key = os.getenv("IBM_COS_APIKEY")
35
+ instance = os.getenv("IBM_COS_INSTANCE_CRN")
36
+ if not (api_key and instance):
37
+ raise RuntimeError(
38
+ "Set IBM_COS_APIKEY, IBM_COS_INSTANCE_CRN, IBM_COS_ENDPOINT "
39
+ "for ibm_cos_iam provider."
40
+ )
41
+ return ibm_boto3.client(
42
+ "s3",
43
+ ibm_api_key_id=api_key,
44
+ ibm_service_instance_id=instance,
45
+ config=Config(signature_version="oauth"),
46
+ endpoint_url=endpoint,
47
+ )
48
+
49
+
50
+ # ─────────────────────────────────────────────────────────────────────
51
+ class _AsyncIBMClient:
52
+ """Minimal async façade over synchronous ibm_boto3 S3 client."""
53
+ def __init__(self, sync_client):
54
+ self._c = sync_client
55
+
56
+ # ---- methods used by ArtifactStore -------------------------------------
57
+ async def put_object(self, **kw) -> Dict[str, Any]:
58
+ return await asyncio.to_thread(self._c.put_object, **kw)
59
+
60
+ async def generate_presigned_url(self, *a, **kw) -> str:
61
+ return await asyncio.to_thread(self._c.generate_presigned_url, *a, **kw)
62
+
63
+ # ---- cleanup -----------------------------------------------------------
64
+ async def close(self):
65
+ await asyncio.to_thread(self._c.close)
66
+
67
+
68
+ # ─────────────────────────────────────────────────────────────────────
69
+ def factory() -> Callable[[], AsyncContextManager]:
70
+ """
71
+ Return a zero-arg callable that yields an async-context-manager.
72
+ """
73
+
74
+ @asynccontextmanager
75
+ async def _ctx():
76
+ sync_client = _sync_client()
77
+ try:
78
+ yield _AsyncIBMClient(sync_client)
79
+ finally:
80
+ await asyncio.to_thread(sync_client.close)
81
+
82
+ return _ctx # Return the function, not the result of calling it