chuk-artifacts 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_artifacts/__init__.py +149 -0
- chuk_artifacts/admin.py +79 -0
- chuk_artifacts/base.py +75 -0
- chuk_artifacts/batch.py +115 -0
- chuk_artifacts/config.py +338 -0
- chuk_artifacts/core.py +215 -0
- chuk_artifacts/exceptions.py +37 -0
- chuk_artifacts/metadata.py +286 -0
- chuk_artifacts/models.py +23 -0
- chuk_artifacts/presigned.py +267 -0
- chuk_artifacts/provider_factory.py +84 -0
- chuk_artifacts/providers/__init__.py +10 -0
- chuk_artifacts/providers/filesystem.py +453 -0
- chuk_artifacts/providers/ibm_cos.py +121 -0
- chuk_artifacts/providers/ibm_cos_iam.py +82 -0
- chuk_artifacts/providers/memory.py +315 -0
- chuk_artifacts/providers/s3.py +90 -0
- chuk_artifacts/store.py +383 -0
- chuk_artifacts-0.1.0.dist-info/METADATA +519 -0
- chuk_artifacts-0.1.0.dist-info/RECORD +23 -0
- chuk_artifacts-0.1.0.dist-info/WHEEL +5 -0
- chuk_artifacts-0.1.0.dist-info/licenses/LICENSE +21 -0
- chuk_artifacts-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,453 @@
|
|
1
|
+
# ===========================================================================
|
2
|
+
# chuk_artifacts/providers/filesystem.py
|
3
|
+
# ===========================================================================
|
4
|
+
"""Local-filesystem artefact store.
|
5
|
+
|
6
|
+
Objects are written relative to $ARTIFACT_FS_ROOT (default ./artifacts).
|
7
|
+
Presigned URLs use the *file://* scheme so callers can still download.
|
8
|
+
Includes comprehensive S3-compatible methods and proper error handling.
|
9
|
+
"""
|
10
|
+
from __future__ import annotations
|
11
|
+
|
12
|
+
import os, json, asyncio, time, uuid, stat, hashlib
|
13
|
+
from pathlib import Path
|
14
|
+
from contextlib import asynccontextmanager
|
15
|
+
from typing import Any, Dict, Callable, AsyncContextManager, List, Optional
|
16
|
+
from datetime import datetime
|
17
|
+
|
18
|
+
_ROOT = Path(os.getenv("ARTIFACT_FS_ROOT", "./artifacts")).expanduser()
|
19
|
+
|
20
|
+
|
21
|
+
class _FilesystemClient:
|
22
|
+
"""Mimics the S3 surface ArtifactStore depends on with filesystem backend."""
|
23
|
+
|
24
|
+
def __init__(self, root: Path = _ROOT):
|
25
|
+
self._root = Path(root).expanduser().resolve()
|
26
|
+
self._closed = False
|
27
|
+
self._lock = asyncio.Lock()
|
28
|
+
|
29
|
+
# Ensure root directory exists
|
30
|
+
self._root.mkdir(parents=True, exist_ok=True)
|
31
|
+
|
32
|
+
def _get_object_path(self, bucket: str, key: str) -> Path:
|
33
|
+
"""Get the filesystem path for an object, incorporating bucket as subdirectory."""
|
34
|
+
# Use bucket as a subdirectory to maintain some S3-like organization
|
35
|
+
return self._root / bucket / key
|
36
|
+
|
37
|
+
def _get_metadata_path(self, object_path: Path) -> Path:
|
38
|
+
"""Get the metadata file path for an object."""
|
39
|
+
return object_path.with_suffix(object_path.suffix + ".meta.json")
|
40
|
+
|
41
|
+
async def _ensure_parent_dir(self, path: Path):
|
42
|
+
"""Ensure parent directory exists."""
|
43
|
+
await asyncio.to_thread(path.parent.mkdir, parents=True, exist_ok=True)
|
44
|
+
|
45
|
+
async def _write_metadata(self, meta_path: Path, content_type: str, metadata: Dict[str, str], size: int, etag: str):
|
46
|
+
"""Write metadata file."""
|
47
|
+
meta_data = {
|
48
|
+
"content_type": content_type,
|
49
|
+
"metadata": metadata,
|
50
|
+
"size": size,
|
51
|
+
"etag": etag,
|
52
|
+
"last_modified": datetime.utcnow().isoformat() + "Z",
|
53
|
+
"created_at": datetime.utcnow().isoformat() + "Z"
|
54
|
+
}
|
55
|
+
meta_json = json.dumps(meta_data, indent=2)
|
56
|
+
await asyncio.to_thread(meta_path.write_text, meta_json, encoding='utf-8')
|
57
|
+
|
58
|
+
async def _read_metadata(self, meta_path: Path) -> Dict[str, Any]:
|
59
|
+
"""Read metadata file."""
|
60
|
+
try:
|
61
|
+
content = await asyncio.to_thread(meta_path.read_text, encoding='utf-8')
|
62
|
+
return json.loads(content)
|
63
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
64
|
+
return {}
|
65
|
+
|
66
|
+
# ------------------------------------------------------------
|
67
|
+
# Core S3-compatible methods
|
68
|
+
# ------------------------------------------------------------
|
69
|
+
|
70
|
+
async def put_object(
|
71
|
+
self,
|
72
|
+
*,
|
73
|
+
Bucket: str, # noqa: N803
|
74
|
+
Key: str, # noqa: N803
|
75
|
+
Body: bytes, # noqa: N803
|
76
|
+
ContentType: str,
|
77
|
+
Metadata: Dict[str, str] # noqa: N803
|
78
|
+
):
|
79
|
+
"""Store object in filesystem with metadata."""
|
80
|
+
if self._closed:
|
81
|
+
raise RuntimeError("Client has been closed")
|
82
|
+
|
83
|
+
object_path = self._get_object_path(Bucket, Key)
|
84
|
+
meta_path = self._get_metadata_path(object_path)
|
85
|
+
|
86
|
+
# Generate ETag (MD5 hash like S3)
|
87
|
+
etag = hashlib.md5(Body).hexdigest()
|
88
|
+
|
89
|
+
async with self._lock:
|
90
|
+
await self._ensure_parent_dir(object_path)
|
91
|
+
await asyncio.to_thread(object_path.write_bytes, Body)
|
92
|
+
await self._write_metadata(meta_path, ContentType, Metadata, len(Body), etag)
|
93
|
+
|
94
|
+
return {
|
95
|
+
"ResponseMetadata": {"HTTPStatusCode": 200},
|
96
|
+
"ETag": f'"{etag}"'
|
97
|
+
}
|
98
|
+
|
99
|
+
async def get_object(
|
100
|
+
self,
|
101
|
+
*,
|
102
|
+
Bucket: str, # noqa: N803
|
103
|
+
Key: str # noqa: N803
|
104
|
+
):
|
105
|
+
"""Retrieve object from filesystem."""
|
106
|
+
if self._closed:
|
107
|
+
raise RuntimeError("Client has been closed")
|
108
|
+
|
109
|
+
object_path = self._get_object_path(Bucket, Key)
|
110
|
+
meta_path = self._get_metadata_path(object_path)
|
111
|
+
|
112
|
+
if not object_path.exists():
|
113
|
+
raise FileNotFoundError(f"NoSuchKey: {Key}")
|
114
|
+
|
115
|
+
async with self._lock:
|
116
|
+
body = await asyncio.to_thread(object_path.read_bytes)
|
117
|
+
metadata = await self._read_metadata(meta_path)
|
118
|
+
|
119
|
+
# Get file stats
|
120
|
+
stat_info = await asyncio.to_thread(object_path.stat)
|
121
|
+
|
122
|
+
return {
|
123
|
+
"Body": body,
|
124
|
+
"ContentType": metadata.get("content_type", "application/octet-stream"),
|
125
|
+
"Metadata": metadata.get("metadata", {}),
|
126
|
+
"ContentLength": len(body),
|
127
|
+
"LastModified": datetime.fromtimestamp(stat_info.st_mtime),
|
128
|
+
"ETag": metadata.get("etag", ""),
|
129
|
+
}
|
130
|
+
|
131
|
+
async def head_object(
|
132
|
+
self,
|
133
|
+
*,
|
134
|
+
Bucket: str, # noqa: N803
|
135
|
+
Key: str # noqa: N803
|
136
|
+
):
|
137
|
+
"""Get object metadata without body."""
|
138
|
+
if self._closed:
|
139
|
+
raise RuntimeError("Client has been closed")
|
140
|
+
|
141
|
+
object_path = self._get_object_path(Bucket, Key)
|
142
|
+
meta_path = self._get_metadata_path(object_path)
|
143
|
+
|
144
|
+
if not object_path.exists():
|
145
|
+
raise FileNotFoundError(f"NoSuchKey: {Key}")
|
146
|
+
|
147
|
+
async with self._lock:
|
148
|
+
metadata = await self._read_metadata(meta_path)
|
149
|
+
stat_info = await asyncio.to_thread(object_path.stat)
|
150
|
+
|
151
|
+
return {
|
152
|
+
"ContentType": metadata.get("content_type", "application/octet-stream"),
|
153
|
+
"Metadata": metadata.get("metadata", {}),
|
154
|
+
"ContentLength": stat_info.st_size,
|
155
|
+
"LastModified": datetime.fromtimestamp(stat_info.st_mtime),
|
156
|
+
"ETag": metadata.get("etag", ""),
|
157
|
+
}
|
158
|
+
|
159
|
+
async def head_bucket(self, *, Bucket: str): # noqa: N803
|
160
|
+
"""Check if bucket (directory) exists."""
|
161
|
+
if self._closed:
|
162
|
+
raise RuntimeError("Client has been closed")
|
163
|
+
|
164
|
+
bucket_path = self._root / Bucket
|
165
|
+
if not bucket_path.exists():
|
166
|
+
raise FileNotFoundError(f"NoSuchBucket: {Bucket}")
|
167
|
+
|
168
|
+
return {"ResponseMetadata": {"HTTPStatusCode": 200}}
|
169
|
+
|
170
|
+
async def generate_presigned_url(
|
171
|
+
self,
|
172
|
+
operation: str,
|
173
|
+
*,
|
174
|
+
Params: Dict[str, str], # noqa: N803
|
175
|
+
ExpiresIn: int # noqa: N803
|
176
|
+
) -> str:
|
177
|
+
"""
|
178
|
+
Generate file:// URLs for filesystem objects.
|
179
|
+
|
180
|
+
Note: file:// URLs don't have real expiration, but we include
|
181
|
+
expiry info for compatibility.
|
182
|
+
"""
|
183
|
+
if self._closed:
|
184
|
+
raise RuntimeError("Client has been closed")
|
185
|
+
|
186
|
+
bucket, key = Params["Bucket"], Params["Key"]
|
187
|
+
object_path = self._get_object_path(bucket, key)
|
188
|
+
|
189
|
+
if not object_path.exists():
|
190
|
+
raise FileNotFoundError(f"Object not found: {bucket}/{key}")
|
191
|
+
|
192
|
+
# Create file:// URL with query parameters for compatibility
|
193
|
+
return (
|
194
|
+
f"file://{object_path.as_posix()}"
|
195
|
+
f"?operation={operation}"
|
196
|
+
f"&token={uuid.uuid4().hex}"
|
197
|
+
f"&expires={int(time.time()) + ExpiresIn}"
|
198
|
+
)
|
199
|
+
|
200
|
+
async def list_objects_v2(
|
201
|
+
self,
|
202
|
+
*,
|
203
|
+
Bucket: str, # noqa: N803
|
204
|
+
Prefix: str = "", # noqa: N803
|
205
|
+
MaxKeys: int = 1000, # noqa: N803
|
206
|
+
ContinuationToken: Optional[str] = None # noqa: N803
|
207
|
+
):
|
208
|
+
"""List objects in bucket with optional prefix filtering."""
|
209
|
+
if self._closed:
|
210
|
+
raise RuntimeError("Client has been closed")
|
211
|
+
|
212
|
+
bucket_path = self._root / Bucket
|
213
|
+
if not bucket_path.exists():
|
214
|
+
return {
|
215
|
+
"Contents": [],
|
216
|
+
"KeyCount": 0,
|
217
|
+
"IsTruncated": False,
|
218
|
+
}
|
219
|
+
|
220
|
+
contents = []
|
221
|
+
total_found = 0
|
222
|
+
|
223
|
+
async with self._lock:
|
224
|
+
# Walk the directory tree
|
225
|
+
for item in bucket_path.rglob("*"):
|
226
|
+
if item.is_file() and not item.name.endswith(".meta.json"):
|
227
|
+
# Get relative path from bucket root as the key
|
228
|
+
relative_path = item.relative_to(bucket_path)
|
229
|
+
key = relative_path.as_posix()
|
230
|
+
|
231
|
+
# Apply prefix filter
|
232
|
+
if not key.startswith(Prefix):
|
233
|
+
continue
|
234
|
+
|
235
|
+
total_found += 1
|
236
|
+
|
237
|
+
# Apply pagination
|
238
|
+
if len(contents) >= MaxKeys:
|
239
|
+
break
|
240
|
+
|
241
|
+
# Get file stats and metadata
|
242
|
+
stat_info = await asyncio.to_thread(item.stat)
|
243
|
+
meta_path = self._get_metadata_path(item)
|
244
|
+
metadata = await self._read_metadata(meta_path)
|
245
|
+
|
246
|
+
contents.append({
|
247
|
+
"Key": key,
|
248
|
+
"Size": stat_info.st_size,
|
249
|
+
"LastModified": datetime.fromtimestamp(stat_info.st_mtime),
|
250
|
+
"ETag": f'"{metadata.get("etag", "")}"',
|
251
|
+
"StorageClass": "STANDARD"
|
252
|
+
})
|
253
|
+
|
254
|
+
return {
|
255
|
+
"Contents": contents,
|
256
|
+
"KeyCount": len(contents),
|
257
|
+
"IsTruncated": total_found > MaxKeys,
|
258
|
+
"MaxKeys": MaxKeys,
|
259
|
+
"Prefix": Prefix,
|
260
|
+
}
|
261
|
+
|
262
|
+
async def delete_object(
|
263
|
+
self,
|
264
|
+
*,
|
265
|
+
Bucket: str, # noqa: N803
|
266
|
+
Key: str # noqa: N803
|
267
|
+
):
|
268
|
+
"""Delete object and its metadata from filesystem."""
|
269
|
+
if self._closed:
|
270
|
+
raise RuntimeError("Client has been closed")
|
271
|
+
|
272
|
+
object_path = self._get_object_path(Bucket, Key)
|
273
|
+
meta_path = self._get_metadata_path(object_path)
|
274
|
+
|
275
|
+
async with self._lock:
|
276
|
+
# Remove object file
|
277
|
+
try:
|
278
|
+
await asyncio.to_thread(object_path.unlink)
|
279
|
+
except FileNotFoundError:
|
280
|
+
pass # S3 doesn't error if object doesn't exist
|
281
|
+
|
282
|
+
# Remove metadata file
|
283
|
+
try:
|
284
|
+
await asyncio.to_thread(meta_path.unlink)
|
285
|
+
except FileNotFoundError:
|
286
|
+
pass
|
287
|
+
|
288
|
+
# Clean up empty directories
|
289
|
+
try:
|
290
|
+
await asyncio.to_thread(object_path.parent.rmdir)
|
291
|
+
except OSError:
|
292
|
+
pass # Directory not empty or other issue
|
293
|
+
|
294
|
+
return {"ResponseMetadata": {"HTTPStatusCode": 204}}
|
295
|
+
|
296
|
+
async def delete_objects(
|
297
|
+
self,
|
298
|
+
*,
|
299
|
+
Bucket: str, # noqa: N803
|
300
|
+
Delete: Dict[str, List[Dict[str, str]]] # noqa: N803
|
301
|
+
):
|
302
|
+
"""Delete multiple objects (batch operation)."""
|
303
|
+
if self._closed:
|
304
|
+
raise RuntimeError("Client has been closed")
|
305
|
+
|
306
|
+
deleted = []
|
307
|
+
errors = []
|
308
|
+
|
309
|
+
for obj in Delete.get("Objects", []):
|
310
|
+
key = obj["Key"]
|
311
|
+
try:
|
312
|
+
await self.delete_object(Bucket=Bucket, Key=key)
|
313
|
+
deleted.append({"Key": key})
|
314
|
+
except Exception as e:
|
315
|
+
errors.append({
|
316
|
+
"Key": key,
|
317
|
+
"Code": "InternalError",
|
318
|
+
"Message": str(e)
|
319
|
+
})
|
320
|
+
|
321
|
+
return {
|
322
|
+
"Deleted": deleted,
|
323
|
+
"Errors": errors,
|
324
|
+
}
|
325
|
+
|
326
|
+
async def copy_object(
|
327
|
+
self,
|
328
|
+
*,
|
329
|
+
Bucket: str, # noqa: N803
|
330
|
+
Key: str, # noqa: N803
|
331
|
+
CopySource: Dict[str, str] # noqa: N803
|
332
|
+
):
|
333
|
+
"""Copy object within filesystem."""
|
334
|
+
if self._closed:
|
335
|
+
raise RuntimeError("Client has been closed")
|
336
|
+
|
337
|
+
source_bucket = CopySource["Bucket"]
|
338
|
+
source_key = CopySource["Key"]
|
339
|
+
|
340
|
+
# Read source object
|
341
|
+
source_obj = await self.get_object(Bucket=source_bucket, Key=source_key)
|
342
|
+
|
343
|
+
# Write to destination
|
344
|
+
result = await self.put_object(
|
345
|
+
Bucket=Bucket,
|
346
|
+
Key=Key,
|
347
|
+
Body=source_obj["Body"],
|
348
|
+
ContentType=source_obj["ContentType"],
|
349
|
+
Metadata=source_obj["Metadata"]
|
350
|
+
)
|
351
|
+
|
352
|
+
return {
|
353
|
+
"CopyObjectResult": {
|
354
|
+
"ETag": result["ETag"],
|
355
|
+
"LastModified": datetime.utcnow()
|
356
|
+
}
|
357
|
+
}
|
358
|
+
|
359
|
+
async def close(self):
|
360
|
+
"""Mark client as closed."""
|
361
|
+
self._closed = True
|
362
|
+
|
363
|
+
# ------------------------------------------------------------
|
364
|
+
# Utility/debugging methods
|
365
|
+
# ------------------------------------------------------------
|
366
|
+
|
367
|
+
async def _debug_get_stats(self) -> Dict[str, Any]:
|
368
|
+
"""Get storage statistics."""
|
369
|
+
if self._closed:
|
370
|
+
return {"error": "Client closed"}
|
371
|
+
|
372
|
+
total_objects = 0
|
373
|
+
total_bytes = 0
|
374
|
+
|
375
|
+
for item in self._root.rglob("*"):
|
376
|
+
if item.is_file() and not item.name.endswith(".meta.json"):
|
377
|
+
total_objects += 1
|
378
|
+
total_bytes += item.stat().st_size
|
379
|
+
|
380
|
+
return {
|
381
|
+
"root_path": str(self._root),
|
382
|
+
"total_objects": total_objects,
|
383
|
+
"total_bytes": total_bytes,
|
384
|
+
"closed": self._closed,
|
385
|
+
}
|
386
|
+
|
387
|
+
async def _debug_cleanup_empty_dirs(self):
|
388
|
+
"""Remove empty directories (cleanup utility)."""
|
389
|
+
async with self._lock:
|
390
|
+
for item in reversed(sorted(self._root.rglob("*"))):
|
391
|
+
if item.is_dir():
|
392
|
+
try:
|
393
|
+
await asyncio.to_thread(item.rmdir)
|
394
|
+
except OSError:
|
395
|
+
pass # Directory not empty
|
396
|
+
|
397
|
+
|
398
|
+
# ---- public factory -------------------------------------------------------
|
399
|
+
|
400
|
+
def factory(root: Optional[Path] = None) -> Callable[[], AsyncContextManager]:
|
401
|
+
"""
|
402
|
+
Create a filesystem client factory.
|
403
|
+
|
404
|
+
Parameters
|
405
|
+
----------
|
406
|
+
root : Path, optional
|
407
|
+
Root directory for storage. If None, uses $ARTIFACT_FS_ROOT or ./artifacts
|
408
|
+
"""
|
409
|
+
if root is None:
|
410
|
+
root = _ROOT
|
411
|
+
else:
|
412
|
+
root = Path(root).expanduser().resolve()
|
413
|
+
root.mkdir(parents=True, exist_ok=True)
|
414
|
+
|
415
|
+
@asynccontextmanager
|
416
|
+
async def _ctx():
|
417
|
+
client = _FilesystemClient(root)
|
418
|
+
try:
|
419
|
+
yield client
|
420
|
+
finally:
|
421
|
+
await client.close()
|
422
|
+
|
423
|
+
return _ctx
|
424
|
+
|
425
|
+
|
426
|
+
# ---- convenience functions ------------------------------------------------
|
427
|
+
|
428
|
+
def create_temp_filesystem_factory() -> tuple[Callable[[], AsyncContextManager], Path]:
|
429
|
+
"""
|
430
|
+
Create a factory using a temporary directory.
|
431
|
+
|
432
|
+
Returns
|
433
|
+
-------
|
434
|
+
tuple
|
435
|
+
(factory_function, temp_directory_path)
|
436
|
+
"""
|
437
|
+
import tempfile
|
438
|
+
temp_dir = Path(tempfile.mkdtemp(prefix="artifacts_"))
|
439
|
+
return factory(temp_dir), temp_dir
|
440
|
+
|
441
|
+
|
442
|
+
async def cleanup_filesystem_store(root: Path):
|
443
|
+
"""
|
444
|
+
Clean up a filesystem store directory.
|
445
|
+
|
446
|
+
Parameters
|
447
|
+
----------
|
448
|
+
root : Path
|
449
|
+
Directory to clean up
|
450
|
+
"""
|
451
|
+
import shutil
|
452
|
+
if root.exists():
|
453
|
+
await asyncio.to_thread(shutil.rmtree, root)
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/providers/ibm_cos.py
|
3
|
+
"""
|
4
|
+
Factory for an aioboto3 client wired for IBM Cloud Object Storage (COS).
|
5
|
+
Supports both IAM and HMAC auth.
|
6
|
+
|
7
|
+
aioboto3 ≥ 12 returns an *async-context* client, so we expose
|
8
|
+
• factory() - preferred, used by provider_factory
|
9
|
+
• client() - retained for backward-compat tests/manual use
|
10
|
+
"""
|
11
|
+
|
12
|
+
from __future__ import annotations
|
13
|
+
import os, aioboto3
|
14
|
+
from aioboto3.session import AioConfig # ✅ CRITICAL: Import AioConfig
|
15
|
+
from typing import Optional, Callable, AsyncContextManager
|
16
|
+
|
17
|
+
# ──────────────────────────────────────────────────────────────────
|
18
|
+
# internal helper that actually builds the client
|
19
|
+
# ──────────────────────────────────────────────────────────────────
|
20
|
+
def _build_client(
|
21
|
+
*,
|
22
|
+
endpoint_url: str,
|
23
|
+
region: str,
|
24
|
+
ibm_api_key: Optional[str],
|
25
|
+
ibm_instance_crn: Optional[str],
|
26
|
+
access_key: Optional[str],
|
27
|
+
secret_key: Optional[str],
|
28
|
+
):
|
29
|
+
session = aioboto3.Session()
|
30
|
+
|
31
|
+
# IAM auth (preferred)
|
32
|
+
if not access_key and not secret_key:
|
33
|
+
return session.client(
|
34
|
+
"s3",
|
35
|
+
endpoint_url=endpoint_url,
|
36
|
+
region_name=region,
|
37
|
+
ibm_api_key_id=ibm_api_key,
|
38
|
+
ibm_service_instance_id=ibm_instance_crn,
|
39
|
+
# ✅ Use SigV2 for IBM COS IAM + path style
|
40
|
+
config=AioConfig(
|
41
|
+
signature_version='s3',
|
42
|
+
s3={'addressing_style': 'path'}
|
43
|
+
)
|
44
|
+
)
|
45
|
+
|
46
|
+
# HMAC auth
|
47
|
+
return session.client(
|
48
|
+
"s3",
|
49
|
+
endpoint_url=endpoint_url,
|
50
|
+
region_name=region,
|
51
|
+
aws_access_key_id=access_key,
|
52
|
+
aws_secret_access_key=secret_key,
|
53
|
+
# ✅ Use SigV2 for IBM COS HMAC + path style
|
54
|
+
config=AioConfig(
|
55
|
+
signature_version='s3',
|
56
|
+
s3={'addressing_style': 'path'}
|
57
|
+
)
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
# ──────────────────────────────────────────────────────────────────
|
62
|
+
# public factory (provider_factory expects this)
|
63
|
+
# ──────────────────────────────────────────────────────────────────
|
64
|
+
def factory(
|
65
|
+
*,
|
66
|
+
endpoint_url: Optional[str] = None,
|
67
|
+
region: str = "us-south",
|
68
|
+
access_key: Optional[str] = None,
|
69
|
+
secret_key: Optional[str] = None,
|
70
|
+
):
|
71
|
+
"""
|
72
|
+
Return an async-context S3 client for IBM COS (HMAC only).
|
73
|
+
"""
|
74
|
+
endpoint_url = endpoint_url or os.getenv(
|
75
|
+
"IBM_COS_ENDPOINT",
|
76
|
+
"https://s3.us-south.cloud-object-storage.appdomain.cloud",
|
77
|
+
)
|
78
|
+
access_key = access_key or os.getenv("AWS_ACCESS_KEY_ID")
|
79
|
+
secret_key = secret_key or os.getenv("AWS_SECRET_ACCESS_KEY")
|
80
|
+
|
81
|
+
# ✅ Extract region from endpoint to ensure they match
|
82
|
+
if endpoint_url:
|
83
|
+
if "us-south" in endpoint_url:
|
84
|
+
region = "us-south"
|
85
|
+
elif "us-east" in endpoint_url:
|
86
|
+
region = "us-east-1"
|
87
|
+
elif "eu-gb" in endpoint_url:
|
88
|
+
region = "eu-gb"
|
89
|
+
elif "eu-de" in endpoint_url:
|
90
|
+
region = "eu-de"
|
91
|
+
|
92
|
+
# Check AWS_REGION environment variable as override
|
93
|
+
env_region = os.getenv('AWS_REGION')
|
94
|
+
if env_region:
|
95
|
+
region = env_region
|
96
|
+
|
97
|
+
if not (access_key and secret_key):
|
98
|
+
raise RuntimeError(
|
99
|
+
"HMAC credentials missing. "
|
100
|
+
"Set AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY "
|
101
|
+
"or generate an HMAC key for your COS instance."
|
102
|
+
)
|
103
|
+
|
104
|
+
def _make() -> AsyncContextManager:
|
105
|
+
session = aioboto3.Session()
|
106
|
+
return session.client(
|
107
|
+
"s3",
|
108
|
+
endpoint_url=endpoint_url,
|
109
|
+
region_name=region,
|
110
|
+
aws_access_key_id=access_key,
|
111
|
+
aws_secret_access_key=secret_key,
|
112
|
+
# ✅ CRITICAL: IBM COS requires Signature Version 2 for writes AND presigned URLs
|
113
|
+
config=AioConfig(
|
114
|
+
signature_version='s3',
|
115
|
+
s3={
|
116
|
+
'addressing_style': 'path' # Also ensure path-style addressing
|
117
|
+
}
|
118
|
+
)
|
119
|
+
)
|
120
|
+
|
121
|
+
return _make
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/providers/ibm_cos_iam.py
|
3
|
+
"""
|
4
|
+
Async wrapper for IBM Cloud Object Storage using IAM API-key (oauth).
|
5
|
+
|
6
|
+
✓ Fits the aioboto3-style interface that ArtifactStore expects:
|
7
|
+
• async put_object(...)
|
8
|
+
• async generate_presigned_url(...)
|
9
|
+
✓ No HMAC keys required - just IBM_COS_APIKEY + IBM_COS_INSTANCE_CRN.
|
10
|
+
|
11
|
+
Env vars
|
12
|
+
--------
|
13
|
+
IBM_COS_APIKEY - value of "apikey" field
|
14
|
+
IBM_COS_INSTANCE_CRN - value of "resource_instance_id"
|
15
|
+
IBM_COS_ENDPOINT - regional data endpoint, e.g.
|
16
|
+
https://s3.us-south.cloud-object-storage.appdomain.cloud
|
17
|
+
"""
|
18
|
+
|
19
|
+
from __future__ import annotations
|
20
|
+
import os, asyncio
|
21
|
+
from contextlib import asynccontextmanager
|
22
|
+
from typing import AsyncContextManager, Any, Dict, Callable
|
23
|
+
|
24
|
+
import ibm_boto3
|
25
|
+
from ibm_botocore.client import Config
|
26
|
+
|
27
|
+
|
28
|
+
# ─────────────────────────────────────────────────────────────────────
|
29
|
+
def _sync_client():
|
30
|
+
endpoint = os.getenv(
|
31
|
+
"IBM_COS_ENDPOINT",
|
32
|
+
"https://s3.us-south.cloud-object-storage.appdomain.cloud",
|
33
|
+
)
|
34
|
+
api_key = os.getenv("IBM_COS_APIKEY")
|
35
|
+
instance = os.getenv("IBM_COS_INSTANCE_CRN")
|
36
|
+
if not (api_key and instance):
|
37
|
+
raise RuntimeError(
|
38
|
+
"Set IBM_COS_APIKEY, IBM_COS_INSTANCE_CRN, IBM_COS_ENDPOINT "
|
39
|
+
"for ibm_cos_iam provider."
|
40
|
+
)
|
41
|
+
return ibm_boto3.client(
|
42
|
+
"s3",
|
43
|
+
ibm_api_key_id=api_key,
|
44
|
+
ibm_service_instance_id=instance,
|
45
|
+
config=Config(signature_version="oauth"),
|
46
|
+
endpoint_url=endpoint,
|
47
|
+
)
|
48
|
+
|
49
|
+
|
50
|
+
# ─────────────────────────────────────────────────────────────────────
|
51
|
+
class _AsyncIBMClient:
|
52
|
+
"""Minimal async façade over synchronous ibm_boto3 S3 client."""
|
53
|
+
def __init__(self, sync_client):
|
54
|
+
self._c = sync_client
|
55
|
+
|
56
|
+
# ---- methods used by ArtifactStore -------------------------------------
|
57
|
+
async def put_object(self, **kw) -> Dict[str, Any]:
|
58
|
+
return await asyncio.to_thread(self._c.put_object, **kw)
|
59
|
+
|
60
|
+
async def generate_presigned_url(self, *a, **kw) -> str:
|
61
|
+
return await asyncio.to_thread(self._c.generate_presigned_url, *a, **kw)
|
62
|
+
|
63
|
+
# ---- cleanup -----------------------------------------------------------
|
64
|
+
async def close(self):
|
65
|
+
await asyncio.to_thread(self._c.close)
|
66
|
+
|
67
|
+
|
68
|
+
# ─────────────────────────────────────────────────────────────────────
|
69
|
+
def factory() -> Callable[[], AsyncContextManager]:
|
70
|
+
"""
|
71
|
+
Return a zero-arg callable that yields an async-context-manager.
|
72
|
+
"""
|
73
|
+
|
74
|
+
@asynccontextmanager
|
75
|
+
async def _ctx():
|
76
|
+
sync_client = _sync_client()
|
77
|
+
try:
|
78
|
+
yield _AsyncIBMClient(sync_client)
|
79
|
+
finally:
|
80
|
+
await asyncio.to_thread(sync_client.close)
|
81
|
+
|
82
|
+
return _ctx # Return the function, not the result of calling it
|