chuk-artifacts 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,149 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/__init__.py
3
+ """
4
+ Asynchronous, object-store-backed artifact manager.
5
+
6
+ This package provides a high-level interface for storing and retrieving
7
+ artifacts across multiple storage backends (S3, IBM COS, filesystem, memory)
8
+ with metadata caching and presigned URL support.
9
+ """
10
+ from __future__ import annotations
11
+ from dotenv import load_dotenv
12
+
13
+ # Core classes
14
+ from .store import ArtifactStore
15
+
16
+ # Exception classes
17
+ from .exceptions import (
18
+ ArtifactStoreError,
19
+ ArtifactNotFoundError,
20
+ ArtifactExpiredError,
21
+ ArtifactCorruptedError,
22
+ ProviderError,
23
+ SessionError,
24
+ )
25
+
26
+ # Operation modules (for advanced usage)
27
+ from .core import CoreStorageOperations
28
+ from .presigned import PresignedURLOperations
29
+ from .metadata import MetadataOperations
30
+ from .batch import BatchOperations
31
+ from .admin import AdminOperations
32
+ from .store import _DEFAULT_TTL, _DEFAULT_PRESIGN_EXPIRES
33
+
34
+
35
+ # load dot env
36
+ load_dotenv()
37
+
38
+ # version
39
+ __version__ = "1.0.0"
40
+
41
+ __all__ = [
42
+ # Main class
43
+ "ArtifactStore",
44
+
45
+ # Exceptions
46
+ "ArtifactStoreError",
47
+ "ArtifactNotFoundError",
48
+ "ArtifactExpiredError",
49
+ "ArtifactCorruptedError",
50
+ "ProviderError",
51
+ "SessionError",
52
+
53
+ # Operation modules (advanced usage)
54
+ "CoreStorageOperations",
55
+ "PresignedURLOperations",
56
+ "MetadataOperations",
57
+ "BatchOperations",
58
+ "AdminOperations",
59
+
60
+ # Constants
61
+ "_DEFAULT_TTL",
62
+ "_DEFAULT_PRESIGN_EXPIRES",
63
+ ]
64
+
65
+ # Convenience aliases for common operations
66
+ def create_store(**kwargs) -> ArtifactStore:
67
+ """
68
+ Convenience function to create an ArtifactStore with sensible defaults.
69
+
70
+ Parameters
71
+ ----------
72
+ **kwargs
73
+ Passed to ArtifactStore constructor
74
+
75
+ Returns
76
+ -------
77
+ ArtifactStore
78
+ Configured artifact store
79
+
80
+ Examples
81
+ --------
82
+ >>> store = create_store() # Memory-based
83
+ >>> store = create_store(storage_provider="ibm_cos", bucket="my-bucket")
84
+ """
85
+ return ArtifactStore(**kwargs)
86
+
87
+
88
+ async def quick_store(
89
+ data: bytes,
90
+ *,
91
+ mime: str = "application/octet-stream",
92
+ summary: str = "Quick upload",
93
+ **store_kwargs
94
+ ) -> tuple[ArtifactStore, str]:
95
+ """
96
+ Convenience function for quick one-off artifact storage.
97
+
98
+ Parameters
99
+ ----------
100
+ data : bytes
101
+ Data to store
102
+ mime : str, optional
103
+ MIME type
104
+ summary : str, optional
105
+ Description
106
+ **store_kwargs
107
+ Passed to ArtifactStore constructor
108
+
109
+ Returns
110
+ -------
111
+ tuple
112
+ (store_instance, artifact_id)
113
+
114
+ Examples
115
+ --------
116
+ >>> store, artifact_id = await quick_store(
117
+ ... b"Hello world",
118
+ ... mime="text/plain",
119
+ ... storage_provider="filesystem"
120
+ ... )
121
+ >>> url = await store.presign(artifact_id)
122
+ """
123
+ store = ArtifactStore(**store_kwargs)
124
+ artifact_id = await store.store(data, mime=mime, summary=summary)
125
+ return store, artifact_id
126
+
127
+
128
+ # Module-level configuration helper
129
+ def configure_logging(level: str = "INFO"):
130
+ """
131
+ Configure logging for the artifacts package.
132
+
133
+ Parameters
134
+ ----------
135
+ level : str
136
+ Logging level (DEBUG, INFO, WARNING, ERROR)
137
+ """
138
+ import logging
139
+
140
+ logger = logging.getLogger("chuk_artifacts")
141
+ logger.setLevel(getattr(logging, level.upper()))
142
+
143
+ if not logger.handlers:
144
+ handler = logging.StreamHandler()
145
+ formatter = logging.Formatter(
146
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
147
+ )
148
+ handler.setFormatter(formatter)
149
+ logger.addHandler(handler)
@@ -0,0 +1,79 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/admin.py
3
+ """
4
+ Administrative and debugging operations
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import uuid, logging
10
+ from datetime import datetime
11
+ from typing import Any, Dict
12
+
13
+ from .base import BaseOperations
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class AdminOperations(BaseOperations):
19
+ """FIXED: Handles administrative and debugging operations."""
20
+
21
+ async def validate_configuration(self) -> Dict[str, Any]:
22
+ """Validate store configuration and connectivity."""
23
+ results = {"timestamp": datetime.utcnow().isoformat() + "Z"}
24
+
25
+ # Test session provider
26
+ try:
27
+ session_ctx_mgr = self.session_factory()
28
+ async with session_ctx_mgr as session:
29
+ # Test basic operations
30
+ test_key = f"test_{uuid.uuid4().hex}"
31
+ await session.setex(test_key, 10, "test_value")
32
+ value = await session.get(test_key)
33
+
34
+ if value == "test_value":
35
+ results["session"] = {
36
+ "status": "ok",
37
+ "provider": self.session_provider_name
38
+ }
39
+ else:
40
+ results["session"] = {
41
+ "status": "error",
42
+ "message": "Session store test failed",
43
+ "provider": self.session_provider_name
44
+ }
45
+ except Exception as e:
46
+ results["session"] = {
47
+ "status": "error",
48
+ "message": str(e),
49
+ "provider": self.session_provider_name
50
+ }
51
+
52
+ # Test storage provider
53
+ try:
54
+ storage_ctx_mgr = self.s3_factory()
55
+ async with storage_ctx_mgr as s3:
56
+ await s3.head_bucket(Bucket=self.bucket)
57
+ results["storage"] = {
58
+ "status": "ok",
59
+ "bucket": self.bucket,
60
+ "provider": self.storage_provider_name
61
+ }
62
+ except Exception as e:
63
+ results["storage"] = {
64
+ "status": "error",
65
+ "message": str(e),
66
+ "provider": self.storage_provider_name
67
+ }
68
+
69
+ return results
70
+
71
+ async def get_stats(self) -> Dict[str, Any]:
72
+ """Get storage statistics."""
73
+ return {
74
+ "storage_provider": self.storage_provider_name,
75
+ "session_provider": self.session_provider_name,
76
+ "bucket": self.bucket,
77
+ "max_retries": self.max_retries,
78
+ "closed": self._artifact_store._closed, # FIXED: Updated reference
79
+ }
chuk_artifacts/base.py ADDED
@@ -0,0 +1,75 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/base.py
3
+ """
4
+ base class for operation modules
5
+ """
6
+
7
+ from __future__ import annotations
8
+ import json, logging
9
+ from typing import Dict, Any, TYPE_CHECKING
10
+
11
+ if TYPE_CHECKING:
12
+ from .store import ArtifactStore
13
+
14
+ from .exceptions import ArtifactNotFoundError, ArtifactCorruptedError, SessionError, ArtifactStoreError
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class BaseOperations:
20
+ """Fixed base class for all operation modules."""
21
+
22
+ def __init__(self, store: 'ArtifactStore'):
23
+ # FIXED: Renamed from self.store to self._artifact_store to avoid method name conflicts
24
+ self._artifact_store = store
25
+
26
+ @property
27
+ def bucket(self) -> str:
28
+ return self._artifact_store.bucket
29
+
30
+ @property
31
+ def s3_factory(self):
32
+ return self._artifact_store._s3_factory
33
+
34
+ @property
35
+ def session_factory(self):
36
+ return self._artifact_store._session_factory
37
+
38
+ @property
39
+ def storage_provider_name(self) -> str:
40
+ return self._artifact_store._storage_provider_name
41
+
42
+ @property
43
+ def session_provider_name(self) -> str:
44
+ return self._artifact_store._session_provider_name
45
+
46
+ @property
47
+ def max_retries(self) -> int:
48
+ return self._artifact_store.max_retries
49
+
50
+ def _check_closed(self):
51
+ """Check if store is closed and raise error if so."""
52
+ if self._artifact_store._closed:
53
+ raise ArtifactStoreError("Store has been closed")
54
+
55
+ async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
56
+ """
57
+ Retrieve artifact metadata from session provider.
58
+
59
+ This is a shared helper used by multiple operation modules.
60
+ """
61
+ try:
62
+ session_ctx_mgr = self.session_factory()
63
+ async with session_ctx_mgr as session:
64
+ raw = await session.get(artifact_id)
65
+ except Exception as e:
66
+ raise SessionError(f"Session provider error retrieving {artifact_id}: {e}") from e
67
+
68
+ if raw is None:
69
+ raise ArtifactNotFoundError(f"Artifact {artifact_id} not found or expired")
70
+
71
+ try:
72
+ return json.loads(raw)
73
+ except json.JSONDecodeError as e:
74
+ logger.error(f"Corrupted metadata for artifact {artifact_id}: {e}")
75
+ raise ArtifactCorruptedError(f"Corrupted metadata for artifact {artifact_id}") from e
@@ -0,0 +1,115 @@
1
+ # ===========================================================================
2
+ # chuk_artifacts/batch.py - Batch operations
3
+ # ===========================================================================
4
+ """
5
+ Batch operations for multiple artifacts.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import uuid, hashlib, json, logging
11
+ from datetime import datetime
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from .base import BaseOperations
15
+ from .exceptions import ArtifactStoreError
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ _ANON_PREFIX = "anon"
20
+ _DEFAULT_TTL = 900
21
+
22
+
23
+ class BatchOperations(BaseOperations):
24
+ """Handles batch operations for multiple artifacts."""
25
+
26
+ async def store_batch(
27
+ self,
28
+ items: List[Dict[str, Any]],
29
+ session_id: str | None = None,
30
+ ttl: int = _DEFAULT_TTL,
31
+ ) -> List[str]:
32
+ """Store multiple artifacts in a batch operation."""
33
+ self._check_closed()
34
+
35
+ artifact_ids = []
36
+ failed_items = []
37
+
38
+ for i, item in enumerate(items):
39
+ try:
40
+ artifact_id = uuid.uuid4().hex
41
+ scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
42
+ key = f"sess/{scope}/{artifact_id}"
43
+
44
+ # Store in object storage
45
+ await self._store_with_retry(
46
+ item["data"], key, item["mime"],
47
+ item.get("filename"), scope
48
+ )
49
+
50
+ # Prepare metadata record
51
+ record = {
52
+ "scope": scope,
53
+ "key": key,
54
+ "mime": item["mime"],
55
+ "summary": item["summary"],
56
+ "meta": item.get("meta", {}),
57
+ "filename": item.get("filename"),
58
+ "bytes": len(item["data"]),
59
+ "sha256": hashlib.sha256(item["data"]).hexdigest(),
60
+ "stored_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
61
+ "ttl": ttl,
62
+ "storage_provider": self.storage_provider_name,
63
+ "session_provider": self.session_provider_name,
64
+ }
65
+
66
+ # Store metadata via session provider
67
+ session_ctx_mgr = self.session_factory()
68
+ async with session_ctx_mgr as session:
69
+ await session.setex(artifact_id, ttl, json.dumps(record))
70
+
71
+ artifact_ids.append(artifact_id)
72
+
73
+ except Exception as e:
74
+ logger.error(f"Batch item {i} failed: {e}")
75
+ failed_items.append(i)
76
+ artifact_ids.append(None) # Placeholder
77
+
78
+ if failed_items:
79
+ logger.warning(f"Batch operation completed with {len(failed_items)} failures")
80
+
81
+ return artifact_ids
82
+
83
+ async def _store_with_retry(self, data: bytes, key: str, mime: str, filename: str, scope: str):
84
+ """Store data with retry logic (copied from core for batch operations)."""
85
+ import asyncio
86
+
87
+ last_exception = None
88
+
89
+ for attempt in range(self.max_retries):
90
+ try:
91
+ storage_ctx_mgr = self.s3_factory()
92
+ async with storage_ctx_mgr as s3:
93
+ await s3.put_object(
94
+ Bucket=self.bucket,
95
+ Key=key,
96
+ Body=data,
97
+ ContentType=mime,
98
+ Metadata={"filename": filename or "", "scope": scope},
99
+ )
100
+ return # Success
101
+
102
+ except Exception as e:
103
+ last_exception = e
104
+ if attempt < self.max_retries - 1:
105
+ wait_time = 2 ** attempt # Exponential backoff
106
+ logger.warning(
107
+ f"Batch storage attempt {attempt + 1} failed, retrying in {wait_time}s",
108
+ extra={"error": str(e), "attempt": attempt + 1}
109
+ )
110
+ await asyncio.sleep(wait_time)
111
+ else:
112
+ logger.error(f"All {self.max_retries} batch storage attempts failed")
113
+
114
+ raise last_exception
115
+