chuk-artifacts 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_artifacts/__init__.py +149 -0
- chuk_artifacts/admin.py +79 -0
- chuk_artifacts/base.py +75 -0
- chuk_artifacts/batch.py +115 -0
- chuk_artifacts/config.py +338 -0
- chuk_artifacts/core.py +215 -0
- chuk_artifacts/exceptions.py +37 -0
- chuk_artifacts/metadata.py +286 -0
- chuk_artifacts/models.py +23 -0
- chuk_artifacts/presigned.py +267 -0
- chuk_artifacts/provider_factory.py +84 -0
- chuk_artifacts/providers/__init__.py +10 -0
- chuk_artifacts/providers/filesystem.py +453 -0
- chuk_artifacts/providers/ibm_cos.py +121 -0
- chuk_artifacts/providers/ibm_cos_iam.py +82 -0
- chuk_artifacts/providers/memory.py +315 -0
- chuk_artifacts/providers/s3.py +90 -0
- chuk_artifacts/store.py +383 -0
- chuk_artifacts-0.1.0.dist-info/METADATA +519 -0
- chuk_artifacts-0.1.0.dist-info/RECORD +23 -0
- chuk_artifacts-0.1.0.dist-info/WHEEL +5 -0
- chuk_artifacts-0.1.0.dist-info/licenses/LICENSE +21 -0
- chuk_artifacts-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/__init__.py
|
3
|
+
"""
|
4
|
+
Asynchronous, object-store-backed artifact manager.
|
5
|
+
|
6
|
+
This package provides a high-level interface for storing and retrieving
|
7
|
+
artifacts across multiple storage backends (S3, IBM COS, filesystem, memory)
|
8
|
+
with metadata caching and presigned URL support.
|
9
|
+
"""
|
10
|
+
from __future__ import annotations
|
11
|
+
from dotenv import load_dotenv
|
12
|
+
|
13
|
+
# Core classes
|
14
|
+
from .store import ArtifactStore
|
15
|
+
|
16
|
+
# Exception classes
|
17
|
+
from .exceptions import (
|
18
|
+
ArtifactStoreError,
|
19
|
+
ArtifactNotFoundError,
|
20
|
+
ArtifactExpiredError,
|
21
|
+
ArtifactCorruptedError,
|
22
|
+
ProviderError,
|
23
|
+
SessionError,
|
24
|
+
)
|
25
|
+
|
26
|
+
# Operation modules (for advanced usage)
|
27
|
+
from .core import CoreStorageOperations
|
28
|
+
from .presigned import PresignedURLOperations
|
29
|
+
from .metadata import MetadataOperations
|
30
|
+
from .batch import BatchOperations
|
31
|
+
from .admin import AdminOperations
|
32
|
+
from .store import _DEFAULT_TTL, _DEFAULT_PRESIGN_EXPIRES
|
33
|
+
|
34
|
+
|
35
|
+
# load dot env
|
36
|
+
load_dotenv()
|
37
|
+
|
38
|
+
# version
|
39
|
+
__version__ = "1.0.0"
|
40
|
+
|
41
|
+
__all__ = [
|
42
|
+
# Main class
|
43
|
+
"ArtifactStore",
|
44
|
+
|
45
|
+
# Exceptions
|
46
|
+
"ArtifactStoreError",
|
47
|
+
"ArtifactNotFoundError",
|
48
|
+
"ArtifactExpiredError",
|
49
|
+
"ArtifactCorruptedError",
|
50
|
+
"ProviderError",
|
51
|
+
"SessionError",
|
52
|
+
|
53
|
+
# Operation modules (advanced usage)
|
54
|
+
"CoreStorageOperations",
|
55
|
+
"PresignedURLOperations",
|
56
|
+
"MetadataOperations",
|
57
|
+
"BatchOperations",
|
58
|
+
"AdminOperations",
|
59
|
+
|
60
|
+
# Constants
|
61
|
+
"_DEFAULT_TTL",
|
62
|
+
"_DEFAULT_PRESIGN_EXPIRES",
|
63
|
+
]
|
64
|
+
|
65
|
+
# Convenience aliases for common operations
|
66
|
+
def create_store(**kwargs) -> ArtifactStore:
|
67
|
+
"""
|
68
|
+
Convenience function to create an ArtifactStore with sensible defaults.
|
69
|
+
|
70
|
+
Parameters
|
71
|
+
----------
|
72
|
+
**kwargs
|
73
|
+
Passed to ArtifactStore constructor
|
74
|
+
|
75
|
+
Returns
|
76
|
+
-------
|
77
|
+
ArtifactStore
|
78
|
+
Configured artifact store
|
79
|
+
|
80
|
+
Examples
|
81
|
+
--------
|
82
|
+
>>> store = create_store() # Memory-based
|
83
|
+
>>> store = create_store(storage_provider="ibm_cos", bucket="my-bucket")
|
84
|
+
"""
|
85
|
+
return ArtifactStore(**kwargs)
|
86
|
+
|
87
|
+
|
88
|
+
async def quick_store(
|
89
|
+
data: bytes,
|
90
|
+
*,
|
91
|
+
mime: str = "application/octet-stream",
|
92
|
+
summary: str = "Quick upload",
|
93
|
+
**store_kwargs
|
94
|
+
) -> tuple[ArtifactStore, str]:
|
95
|
+
"""
|
96
|
+
Convenience function for quick one-off artifact storage.
|
97
|
+
|
98
|
+
Parameters
|
99
|
+
----------
|
100
|
+
data : bytes
|
101
|
+
Data to store
|
102
|
+
mime : str, optional
|
103
|
+
MIME type
|
104
|
+
summary : str, optional
|
105
|
+
Description
|
106
|
+
**store_kwargs
|
107
|
+
Passed to ArtifactStore constructor
|
108
|
+
|
109
|
+
Returns
|
110
|
+
-------
|
111
|
+
tuple
|
112
|
+
(store_instance, artifact_id)
|
113
|
+
|
114
|
+
Examples
|
115
|
+
--------
|
116
|
+
>>> store, artifact_id = await quick_store(
|
117
|
+
... b"Hello world",
|
118
|
+
... mime="text/plain",
|
119
|
+
... storage_provider="filesystem"
|
120
|
+
... )
|
121
|
+
>>> url = await store.presign(artifact_id)
|
122
|
+
"""
|
123
|
+
store = ArtifactStore(**store_kwargs)
|
124
|
+
artifact_id = await store.store(data, mime=mime, summary=summary)
|
125
|
+
return store, artifact_id
|
126
|
+
|
127
|
+
|
128
|
+
# Module-level configuration helper
|
129
|
+
def configure_logging(level: str = "INFO"):
|
130
|
+
"""
|
131
|
+
Configure logging for the artifacts package.
|
132
|
+
|
133
|
+
Parameters
|
134
|
+
----------
|
135
|
+
level : str
|
136
|
+
Logging level (DEBUG, INFO, WARNING, ERROR)
|
137
|
+
"""
|
138
|
+
import logging
|
139
|
+
|
140
|
+
logger = logging.getLogger("chuk_artifacts")
|
141
|
+
logger.setLevel(getattr(logging, level.upper()))
|
142
|
+
|
143
|
+
if not logger.handlers:
|
144
|
+
handler = logging.StreamHandler()
|
145
|
+
formatter = logging.Formatter(
|
146
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
147
|
+
)
|
148
|
+
handler.setFormatter(formatter)
|
149
|
+
logger.addHandler(handler)
|
chuk_artifacts/admin.py
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/admin.py
|
3
|
+
"""
|
4
|
+
Administrative and debugging operations
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
import uuid, logging
|
10
|
+
from datetime import datetime
|
11
|
+
from typing import Any, Dict
|
12
|
+
|
13
|
+
from .base import BaseOperations
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class AdminOperations(BaseOperations):
|
19
|
+
"""FIXED: Handles administrative and debugging operations."""
|
20
|
+
|
21
|
+
async def validate_configuration(self) -> Dict[str, Any]:
|
22
|
+
"""Validate store configuration and connectivity."""
|
23
|
+
results = {"timestamp": datetime.utcnow().isoformat() + "Z"}
|
24
|
+
|
25
|
+
# Test session provider
|
26
|
+
try:
|
27
|
+
session_ctx_mgr = self.session_factory()
|
28
|
+
async with session_ctx_mgr as session:
|
29
|
+
# Test basic operations
|
30
|
+
test_key = f"test_{uuid.uuid4().hex}"
|
31
|
+
await session.setex(test_key, 10, "test_value")
|
32
|
+
value = await session.get(test_key)
|
33
|
+
|
34
|
+
if value == "test_value":
|
35
|
+
results["session"] = {
|
36
|
+
"status": "ok",
|
37
|
+
"provider": self.session_provider_name
|
38
|
+
}
|
39
|
+
else:
|
40
|
+
results["session"] = {
|
41
|
+
"status": "error",
|
42
|
+
"message": "Session store test failed",
|
43
|
+
"provider": self.session_provider_name
|
44
|
+
}
|
45
|
+
except Exception as e:
|
46
|
+
results["session"] = {
|
47
|
+
"status": "error",
|
48
|
+
"message": str(e),
|
49
|
+
"provider": self.session_provider_name
|
50
|
+
}
|
51
|
+
|
52
|
+
# Test storage provider
|
53
|
+
try:
|
54
|
+
storage_ctx_mgr = self.s3_factory()
|
55
|
+
async with storage_ctx_mgr as s3:
|
56
|
+
await s3.head_bucket(Bucket=self.bucket)
|
57
|
+
results["storage"] = {
|
58
|
+
"status": "ok",
|
59
|
+
"bucket": self.bucket,
|
60
|
+
"provider": self.storage_provider_name
|
61
|
+
}
|
62
|
+
except Exception as e:
|
63
|
+
results["storage"] = {
|
64
|
+
"status": "error",
|
65
|
+
"message": str(e),
|
66
|
+
"provider": self.storage_provider_name
|
67
|
+
}
|
68
|
+
|
69
|
+
return results
|
70
|
+
|
71
|
+
async def get_stats(self) -> Dict[str, Any]:
|
72
|
+
"""Get storage statistics."""
|
73
|
+
return {
|
74
|
+
"storage_provider": self.storage_provider_name,
|
75
|
+
"session_provider": self.session_provider_name,
|
76
|
+
"bucket": self.bucket,
|
77
|
+
"max_retries": self.max_retries,
|
78
|
+
"closed": self._artifact_store._closed, # FIXED: Updated reference
|
79
|
+
}
|
chuk_artifacts/base.py
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/base.py
|
3
|
+
"""
|
4
|
+
base class for operation modules
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
import json, logging
|
9
|
+
from typing import Dict, Any, TYPE_CHECKING
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from .store import ArtifactStore
|
13
|
+
|
14
|
+
from .exceptions import ArtifactNotFoundError, ArtifactCorruptedError, SessionError, ArtifactStoreError
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class BaseOperations:
|
20
|
+
"""Fixed base class for all operation modules."""
|
21
|
+
|
22
|
+
def __init__(self, store: 'ArtifactStore'):
|
23
|
+
# FIXED: Renamed from self.store to self._artifact_store to avoid method name conflicts
|
24
|
+
self._artifact_store = store
|
25
|
+
|
26
|
+
@property
|
27
|
+
def bucket(self) -> str:
|
28
|
+
return self._artifact_store.bucket
|
29
|
+
|
30
|
+
@property
|
31
|
+
def s3_factory(self):
|
32
|
+
return self._artifact_store._s3_factory
|
33
|
+
|
34
|
+
@property
|
35
|
+
def session_factory(self):
|
36
|
+
return self._artifact_store._session_factory
|
37
|
+
|
38
|
+
@property
|
39
|
+
def storage_provider_name(self) -> str:
|
40
|
+
return self._artifact_store._storage_provider_name
|
41
|
+
|
42
|
+
@property
|
43
|
+
def session_provider_name(self) -> str:
|
44
|
+
return self._artifact_store._session_provider_name
|
45
|
+
|
46
|
+
@property
|
47
|
+
def max_retries(self) -> int:
|
48
|
+
return self._artifact_store.max_retries
|
49
|
+
|
50
|
+
def _check_closed(self):
|
51
|
+
"""Check if store is closed and raise error if so."""
|
52
|
+
if self._artifact_store._closed:
|
53
|
+
raise ArtifactStoreError("Store has been closed")
|
54
|
+
|
55
|
+
async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
|
56
|
+
"""
|
57
|
+
Retrieve artifact metadata from session provider.
|
58
|
+
|
59
|
+
This is a shared helper used by multiple operation modules.
|
60
|
+
"""
|
61
|
+
try:
|
62
|
+
session_ctx_mgr = self.session_factory()
|
63
|
+
async with session_ctx_mgr as session:
|
64
|
+
raw = await session.get(artifact_id)
|
65
|
+
except Exception as e:
|
66
|
+
raise SessionError(f"Session provider error retrieving {artifact_id}: {e}") from e
|
67
|
+
|
68
|
+
if raw is None:
|
69
|
+
raise ArtifactNotFoundError(f"Artifact {artifact_id} not found or expired")
|
70
|
+
|
71
|
+
try:
|
72
|
+
return json.loads(raw)
|
73
|
+
except json.JSONDecodeError as e:
|
74
|
+
logger.error(f"Corrupted metadata for artifact {artifact_id}: {e}")
|
75
|
+
raise ArtifactCorruptedError(f"Corrupted metadata for artifact {artifact_id}") from e
|
chuk_artifacts/batch.py
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
# ===========================================================================
|
2
|
+
# chuk_artifacts/batch.py - Batch operations
|
3
|
+
# ===========================================================================
|
4
|
+
"""
|
5
|
+
Batch operations for multiple artifacts.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from __future__ import annotations
|
9
|
+
|
10
|
+
import uuid, hashlib, json, logging
|
11
|
+
from datetime import datetime
|
12
|
+
from typing import Any, Dict, List, Optional
|
13
|
+
|
14
|
+
from .base import BaseOperations
|
15
|
+
from .exceptions import ArtifactStoreError
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
_ANON_PREFIX = "anon"
|
20
|
+
_DEFAULT_TTL = 900
|
21
|
+
|
22
|
+
|
23
|
+
class BatchOperations(BaseOperations):
|
24
|
+
"""Handles batch operations for multiple artifacts."""
|
25
|
+
|
26
|
+
async def store_batch(
|
27
|
+
self,
|
28
|
+
items: List[Dict[str, Any]],
|
29
|
+
session_id: str | None = None,
|
30
|
+
ttl: int = _DEFAULT_TTL,
|
31
|
+
) -> List[str]:
|
32
|
+
"""Store multiple artifacts in a batch operation."""
|
33
|
+
self._check_closed()
|
34
|
+
|
35
|
+
artifact_ids = []
|
36
|
+
failed_items = []
|
37
|
+
|
38
|
+
for i, item in enumerate(items):
|
39
|
+
try:
|
40
|
+
artifact_id = uuid.uuid4().hex
|
41
|
+
scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
|
42
|
+
key = f"sess/{scope}/{artifact_id}"
|
43
|
+
|
44
|
+
# Store in object storage
|
45
|
+
await self._store_with_retry(
|
46
|
+
item["data"], key, item["mime"],
|
47
|
+
item.get("filename"), scope
|
48
|
+
)
|
49
|
+
|
50
|
+
# Prepare metadata record
|
51
|
+
record = {
|
52
|
+
"scope": scope,
|
53
|
+
"key": key,
|
54
|
+
"mime": item["mime"],
|
55
|
+
"summary": item["summary"],
|
56
|
+
"meta": item.get("meta", {}),
|
57
|
+
"filename": item.get("filename"),
|
58
|
+
"bytes": len(item["data"]),
|
59
|
+
"sha256": hashlib.sha256(item["data"]).hexdigest(),
|
60
|
+
"stored_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
|
61
|
+
"ttl": ttl,
|
62
|
+
"storage_provider": self.storage_provider_name,
|
63
|
+
"session_provider": self.session_provider_name,
|
64
|
+
}
|
65
|
+
|
66
|
+
# Store metadata via session provider
|
67
|
+
session_ctx_mgr = self.session_factory()
|
68
|
+
async with session_ctx_mgr as session:
|
69
|
+
await session.setex(artifact_id, ttl, json.dumps(record))
|
70
|
+
|
71
|
+
artifact_ids.append(artifact_id)
|
72
|
+
|
73
|
+
except Exception as e:
|
74
|
+
logger.error(f"Batch item {i} failed: {e}")
|
75
|
+
failed_items.append(i)
|
76
|
+
artifact_ids.append(None) # Placeholder
|
77
|
+
|
78
|
+
if failed_items:
|
79
|
+
logger.warning(f"Batch operation completed with {len(failed_items)} failures")
|
80
|
+
|
81
|
+
return artifact_ids
|
82
|
+
|
83
|
+
async def _store_with_retry(self, data: bytes, key: str, mime: str, filename: str, scope: str):
|
84
|
+
"""Store data with retry logic (copied from core for batch operations)."""
|
85
|
+
import asyncio
|
86
|
+
|
87
|
+
last_exception = None
|
88
|
+
|
89
|
+
for attempt in range(self.max_retries):
|
90
|
+
try:
|
91
|
+
storage_ctx_mgr = self.s3_factory()
|
92
|
+
async with storage_ctx_mgr as s3:
|
93
|
+
await s3.put_object(
|
94
|
+
Bucket=self.bucket,
|
95
|
+
Key=key,
|
96
|
+
Body=data,
|
97
|
+
ContentType=mime,
|
98
|
+
Metadata={"filename": filename or "", "scope": scope},
|
99
|
+
)
|
100
|
+
return # Success
|
101
|
+
|
102
|
+
except Exception as e:
|
103
|
+
last_exception = e
|
104
|
+
if attempt < self.max_retries - 1:
|
105
|
+
wait_time = 2 ** attempt # Exponential backoff
|
106
|
+
logger.warning(
|
107
|
+
f"Batch storage attempt {attempt + 1} failed, retrying in {wait_time}s",
|
108
|
+
extra={"error": str(e), "attempt": attempt + 1}
|
109
|
+
)
|
110
|
+
await asyncio.sleep(wait_time)
|
111
|
+
else:
|
112
|
+
logger.error(f"All {self.max_retries} batch storage attempts failed")
|
113
|
+
|
114
|
+
raise last_exception
|
115
|
+
|