chuk-artifacts 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_artifacts/admin.py +24 -18
- chuk_artifacts/core.py +94 -120
- chuk_artifacts/metadata.py +139 -240
- chuk_artifacts/presigned.py +59 -23
- chuk_artifacts/session/__init__.py +0 -0
- chuk_artifacts/session/session_manager.py +196 -0
- chuk_artifacts/{session_operations.py → session/session_operations.py} +46 -125
- chuk_artifacts/store.py +353 -267
- {chuk_artifacts-0.1.1.dist-info → chuk_artifacts-0.1.3.dist-info}/METADATA +200 -191
- {chuk_artifacts-0.1.1.dist-info → chuk_artifacts-0.1.3.dist-info}/RECORD +13 -11
- {chuk_artifacts-0.1.1.dist-info → chuk_artifacts-0.1.3.dist-info}/WHEEL +0 -0
- {chuk_artifacts-0.1.1.dist-info → chuk_artifacts-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {chuk_artifacts-0.1.1.dist-info → chuk_artifacts-0.1.3.dist-info}/top_level.txt +0 -0
chuk_artifacts/admin.py
CHANGED
@@ -8,15 +8,19 @@ from __future__ import annotations
|
|
8
8
|
|
9
9
|
import uuid, logging
|
10
10
|
from datetime import datetime
|
11
|
-
from typing import Any, Dict
|
11
|
+
from typing import Any, Dict, TYPE_CHECKING
|
12
12
|
|
13
|
-
|
13
|
+
if TYPE_CHECKING:
|
14
|
+
from .store import ArtifactStore
|
14
15
|
|
15
16
|
logger = logging.getLogger(__name__)
|
16
17
|
|
17
18
|
|
18
|
-
class AdminOperations
|
19
|
-
"""
|
19
|
+
class AdminOperations:
|
20
|
+
"""Handles administrative and debugging operations."""
|
21
|
+
|
22
|
+
def __init__(self, artifact_store: 'ArtifactStore'):
|
23
|
+
self.store = artifact_store
|
20
24
|
|
21
25
|
async def validate_configuration(self) -> Dict[str, Any]:
|
22
26
|
"""Validate store configuration and connectivity."""
|
@@ -24,7 +28,7 @@ class AdminOperations(BaseOperations):
|
|
24
28
|
|
25
29
|
# Test session provider
|
26
30
|
try:
|
27
|
-
session_ctx_mgr = self.
|
31
|
+
session_ctx_mgr = self.store._session_factory()
|
28
32
|
async with session_ctx_mgr as session:
|
29
33
|
# Test basic operations
|
30
34
|
test_key = f"test_{uuid.uuid4().hex}"
|
@@ -34,36 +38,36 @@ class AdminOperations(BaseOperations):
|
|
34
38
|
if value == "test_value":
|
35
39
|
results["session"] = {
|
36
40
|
"status": "ok",
|
37
|
-
"provider": self.
|
41
|
+
"provider": self.store._session_provider_name
|
38
42
|
}
|
39
43
|
else:
|
40
44
|
results["session"] = {
|
41
45
|
"status": "error",
|
42
46
|
"message": "Session store test failed",
|
43
|
-
"provider": self.
|
47
|
+
"provider": self.store._session_provider_name
|
44
48
|
}
|
45
49
|
except Exception as e:
|
46
50
|
results["session"] = {
|
47
51
|
"status": "error",
|
48
52
|
"message": str(e),
|
49
|
-
"provider": self.
|
53
|
+
"provider": self.store._session_provider_name
|
50
54
|
}
|
51
55
|
|
52
56
|
# Test storage provider
|
53
57
|
try:
|
54
|
-
storage_ctx_mgr = self.
|
58
|
+
storage_ctx_mgr = self.store._s3_factory()
|
55
59
|
async with storage_ctx_mgr as s3:
|
56
|
-
await s3.head_bucket(Bucket=self.bucket)
|
60
|
+
await s3.head_bucket(Bucket=self.store.bucket)
|
57
61
|
results["storage"] = {
|
58
62
|
"status": "ok",
|
59
|
-
"bucket": self.bucket,
|
60
|
-
"provider": self.
|
63
|
+
"bucket": self.store.bucket,
|
64
|
+
"provider": self.store._storage_provider_name
|
61
65
|
}
|
62
66
|
except Exception as e:
|
63
67
|
results["storage"] = {
|
64
68
|
"status": "error",
|
65
69
|
"message": str(e),
|
66
|
-
"provider": self.
|
70
|
+
"provider": self.store._storage_provider_name
|
67
71
|
}
|
68
72
|
|
69
73
|
return results
|
@@ -71,9 +75,11 @@ class AdminOperations(BaseOperations):
|
|
71
75
|
async def get_stats(self) -> Dict[str, Any]:
|
72
76
|
"""Get storage statistics."""
|
73
77
|
return {
|
74
|
-
"storage_provider": self.
|
75
|
-
"session_provider": self.
|
76
|
-
"bucket": self.bucket,
|
77
|
-
"max_retries": self.max_retries,
|
78
|
-
"closed": self.
|
78
|
+
"storage_provider": self.store._storage_provider_name,
|
79
|
+
"session_provider": self.store._session_provider_name,
|
80
|
+
"bucket": self.store.bucket,
|
81
|
+
"max_retries": self.store.max_retries,
|
82
|
+
"closed": self.store._closed,
|
83
|
+
"sandbox_id": self.store.sandbox_id,
|
84
|
+
"session_ttl_hours": self.store.session_ttl_hours,
|
79
85
|
}
|
chuk_artifacts/core.py
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
# chuk_artifacts/core.py
|
3
3
|
"""
|
4
|
-
core storage operations.
|
4
|
+
Clean core storage operations - grid architecture only.
|
5
5
|
"""
|
6
6
|
|
7
7
|
from __future__ import annotations
|
8
8
|
|
9
|
-
import uuid
|
9
|
+
import uuid
|
10
|
+
import hashlib
|
11
|
+
import time
|
12
|
+
import asyncio
|
13
|
+
import json
|
14
|
+
import logging
|
10
15
|
from datetime import datetime
|
11
16
|
from typing import Any, Dict, Optional, TYPE_CHECKING
|
12
17
|
|
13
18
|
if TYPE_CHECKING:
|
14
19
|
from .store import ArtifactStore
|
15
20
|
|
16
|
-
from .exceptions import
|
17
|
-
ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
|
18
|
-
ArtifactCorruptedError, ProviderError, SessionError
|
19
|
-
)
|
21
|
+
from .exceptions import ArtifactStoreError, ProviderError, SessionError
|
20
22
|
|
21
23
|
logger = logging.getLogger(__name__)
|
22
24
|
|
23
|
-
_ANON_PREFIX = "anon"
|
24
25
|
_DEFAULT_TTL = 900
|
25
26
|
|
26
27
|
|
27
28
|
class CoreStorageOperations:
|
28
|
-
"""core storage operations
|
29
|
+
"""Clean core storage operations with grid architecture."""
|
29
30
|
|
30
31
|
def __init__(self, artifact_store: 'ArtifactStore'):
|
31
|
-
self.
|
32
|
-
logger.info(f"CoreStorageOperations initialized with store: {type(artifact_store)}")
|
32
|
+
self.store = artifact_store
|
33
33
|
|
34
34
|
async def store(
|
35
35
|
self,
|
@@ -39,26 +39,28 @@ class CoreStorageOperations:
|
|
39
39
|
summary: str,
|
40
40
|
meta: Dict[str, Any] | None = None,
|
41
41
|
filename: str | None = None,
|
42
|
-
session_id: str
|
42
|
+
session_id: str, # Required - no more optional sessions
|
43
43
|
ttl: int = _DEFAULT_TTL,
|
44
44
|
) -> str:
|
45
|
-
"""Store artifact
|
46
|
-
if self.
|
47
|
-
raise ArtifactStoreError("Store
|
45
|
+
"""Store artifact with grid key generation."""
|
46
|
+
if self.store._closed:
|
47
|
+
raise ArtifactStoreError("Store is closed")
|
48
48
|
|
49
49
|
start_time = time.time()
|
50
50
|
artifact_id = uuid.uuid4().hex
|
51
51
|
|
52
|
-
|
53
|
-
key =
|
54
|
-
|
52
|
+
# Generate grid key
|
53
|
+
key = self.store.generate_artifact_key(session_id, artifact_id)
|
54
|
+
|
55
55
|
try:
|
56
|
-
# Store in object storage
|
57
|
-
await self._store_with_retry(data, key, mime, filename,
|
58
|
-
|
56
|
+
# Store in object storage
|
57
|
+
await self._store_with_retry(data, key, mime, filename, session_id)
|
58
|
+
|
59
59
|
# Build metadata record
|
60
60
|
record = {
|
61
|
-
"
|
61
|
+
"artifact_id": artifact_id,
|
62
|
+
"session_id": session_id,
|
63
|
+
"sandbox_id": self.store.sandbox_id,
|
62
64
|
"key": key,
|
63
65
|
"mime": mime,
|
64
66
|
"summary": summary,
|
@@ -66,95 +68,54 @@ class CoreStorageOperations:
|
|
66
68
|
"filename": filename,
|
67
69
|
"bytes": len(data),
|
68
70
|
"sha256": hashlib.sha256(data).hexdigest(),
|
69
|
-
"stored_at": datetime.utcnow().isoformat(
|
71
|
+
"stored_at": datetime.utcnow().isoformat() + "Z",
|
70
72
|
"ttl": ttl,
|
71
|
-
"storage_provider": self.
|
72
|
-
"session_provider": self.
|
73
|
+
"storage_provider": self.store._storage_provider_name,
|
74
|
+
"session_provider": self.store._session_provider_name,
|
73
75
|
}
|
74
|
-
|
75
|
-
#
|
76
|
-
session_ctx_mgr = self.
|
76
|
+
|
77
|
+
# Store metadata
|
78
|
+
session_ctx_mgr = self.store._session_factory()
|
77
79
|
async with session_ctx_mgr as session:
|
78
80
|
await session.setex(artifact_id, ttl, json.dumps(record))
|
79
|
-
|
81
|
+
|
80
82
|
duration_ms = int((time.time() - start_time) * 1000)
|
81
83
|
logger.info(
|
82
|
-
"Artifact stored
|
84
|
+
"Artifact stored",
|
83
85
|
extra={
|
84
86
|
"artifact_id": artifact_id,
|
87
|
+
"session_id": session_id,
|
88
|
+
"key": key,
|
85
89
|
"bytes": len(data),
|
86
|
-
"mime": mime,
|
87
90
|
"duration_ms": duration_ms,
|
88
|
-
"storage_provider": self.artifact_store._storage_provider_name,
|
89
91
|
}
|
90
92
|
)
|
91
|
-
|
93
|
+
|
92
94
|
return artifact_id
|
93
|
-
|
94
|
-
except Exception as e:
|
95
|
-
duration_ms = int((time.time() - start_time) * 1000)
|
96
|
-
logger.error(
|
97
|
-
"Artifact storage failed",
|
98
|
-
extra={
|
99
|
-
"artifact_id": artifact_id,
|
100
|
-
"error": str(e),
|
101
|
-
"duration_ms": duration_ms,
|
102
|
-
"storage_provider": self.artifact_store._storage_provider_name,
|
103
|
-
},
|
104
|
-
exc_info=True
|
105
|
-
)
|
106
95
|
|
107
|
-
|
108
|
-
|
96
|
+
except Exception as e:
|
97
|
+
logger.error(f"Storage failed for {artifact_id}: {e}")
|
98
|
+
if "session" in str(e).lower():
|
99
|
+
raise SessionError(f"Metadata storage failed: {e}") from e
|
109
100
|
else:
|
110
|
-
raise ProviderError(f"Storage
|
111
|
-
|
112
|
-
async def _store_with_retry(self, data: bytes, key: str, mime: str, filename: str, scope: str):
|
113
|
-
"""Store data with retry logic."""
|
114
|
-
last_exception = None
|
115
|
-
|
116
|
-
for attempt in range(self.artifact_store.max_retries):
|
117
|
-
try:
|
118
|
-
storage_ctx_mgr = self.artifact_store._s3_factory()
|
119
|
-
async with storage_ctx_mgr as s3:
|
120
|
-
await s3.put_object(
|
121
|
-
Bucket=self.artifact_store.bucket,
|
122
|
-
Key=key,
|
123
|
-
Body=data,
|
124
|
-
ContentType=mime,
|
125
|
-
Metadata={"filename": filename or "", "scope": scope},
|
126
|
-
)
|
127
|
-
return # Success
|
128
|
-
|
129
|
-
except Exception as e:
|
130
|
-
last_exception = e
|
131
|
-
if attempt < self.artifact_store.max_retries - 1:
|
132
|
-
wait_time = 2 ** attempt # Exponential backoff
|
133
|
-
logger.warning(
|
134
|
-
f"Storage attempt {attempt + 1} failed, retrying in {wait_time}s",
|
135
|
-
extra={"error": str(e), "attempt": attempt + 1}
|
136
|
-
)
|
137
|
-
await asyncio.sleep(wait_time)
|
138
|
-
else:
|
139
|
-
logger.error(f"All {self.artifact_store.max_retries} storage attempts failed")
|
140
|
-
|
141
|
-
raise last_exception
|
101
|
+
raise ProviderError(f"Storage failed: {e}") from e
|
142
102
|
|
143
103
|
async def retrieve(self, artifact_id: str) -> bytes:
|
144
|
-
"""Retrieve artifact data
|
145
|
-
if self.
|
146
|
-
raise ArtifactStoreError("Store
|
147
|
-
|
148
|
-
start_time = time.time()
|
104
|
+
"""Retrieve artifact data."""
|
105
|
+
if self.store._closed:
|
106
|
+
raise ArtifactStoreError("Store is closed")
|
149
107
|
|
150
108
|
try:
|
151
109
|
record = await self._get_record(artifact_id)
|
152
110
|
|
153
|
-
storage_ctx_mgr = self.
|
111
|
+
storage_ctx_mgr = self.store._s3_factory()
|
154
112
|
async with storage_ctx_mgr as s3:
|
155
|
-
response = await s3.get_object(
|
113
|
+
response = await s3.get_object(
|
114
|
+
Bucket=self.store.bucket,
|
115
|
+
Key=record["key"]
|
116
|
+
)
|
156
117
|
|
157
|
-
# Handle different response formats
|
118
|
+
# Handle different response formats
|
158
119
|
if hasattr(response["Body"], "read"):
|
159
120
|
data = await response["Body"].read()
|
160
121
|
elif isinstance(response["Body"], bytes):
|
@@ -162,54 +123,67 @@ class CoreStorageOperations:
|
|
162
123
|
else:
|
163
124
|
data = bytes(response["Body"])
|
164
125
|
|
165
|
-
# Verify integrity
|
126
|
+
# Verify integrity
|
166
127
|
if "sha256" in record and record["sha256"]:
|
167
|
-
|
168
|
-
if
|
169
|
-
raise
|
170
|
-
f"SHA256 mismatch: expected {record['sha256']}, got {computed_hash}"
|
171
|
-
)
|
172
|
-
|
173
|
-
duration_ms = int((time.time() - start_time) * 1000)
|
174
|
-
logger.info(
|
175
|
-
"Artifact retrieved successfully",
|
176
|
-
extra={
|
177
|
-
"artifact_id": artifact_id,
|
178
|
-
"bytes": len(data),
|
179
|
-
"duration_ms": duration_ms,
|
180
|
-
}
|
181
|
-
)
|
128
|
+
computed = hashlib.sha256(data).hexdigest()
|
129
|
+
if computed != record["sha256"]:
|
130
|
+
raise ProviderError(f"SHA256 mismatch: {record['sha256']} != {computed}")
|
182
131
|
|
183
132
|
return data
|
184
133
|
|
185
|
-
except (ArtifactNotFoundError, ArtifactExpiredError, ArtifactCorruptedError):
|
186
|
-
raise
|
187
134
|
except Exception as e:
|
188
|
-
|
189
|
-
logger.error(
|
190
|
-
"Artifact retrieval failed",
|
191
|
-
extra={
|
192
|
-
"artifact_id": artifact_id,
|
193
|
-
"error": str(e),
|
194
|
-
"duration_ms": duration_ms,
|
195
|
-
}
|
196
|
-
)
|
135
|
+
logger.error(f"Retrieval failed for {artifact_id}: {e}")
|
197
136
|
raise ProviderError(f"Retrieval failed: {e}") from e
|
198
137
|
|
138
|
+
async def _store_with_retry(
|
139
|
+
self,
|
140
|
+
data: bytes,
|
141
|
+
key: str,
|
142
|
+
mime: str,
|
143
|
+
filename: str,
|
144
|
+
session_id: str
|
145
|
+
):
|
146
|
+
"""Store with retry logic."""
|
147
|
+
last_exception = None
|
148
|
+
|
149
|
+
for attempt in range(self.store.max_retries):
|
150
|
+
try:
|
151
|
+
storage_ctx_mgr = self.store._s3_factory()
|
152
|
+
async with storage_ctx_mgr as s3:
|
153
|
+
await s3.put_object(
|
154
|
+
Bucket=self.store.bucket,
|
155
|
+
Key=key,
|
156
|
+
Body=data,
|
157
|
+
ContentType=mime,
|
158
|
+
Metadata={
|
159
|
+
"filename": filename or "",
|
160
|
+
"session_id": session_id,
|
161
|
+
"sandbox_id": self.store.sandbox_id,
|
162
|
+
},
|
163
|
+
)
|
164
|
+
return # Success
|
165
|
+
|
166
|
+
except Exception as e:
|
167
|
+
last_exception = e
|
168
|
+
if attempt < self.store.max_retries - 1:
|
169
|
+
wait_time = 2 ** attempt
|
170
|
+
await asyncio.sleep(wait_time)
|
171
|
+
|
172
|
+
raise last_exception
|
173
|
+
|
199
174
|
async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
|
200
|
-
"""
|
175
|
+
"""Get artifact metadata."""
|
201
176
|
try:
|
202
|
-
session_ctx_mgr = self.
|
177
|
+
session_ctx_mgr = self.store._session_factory()
|
203
178
|
async with session_ctx_mgr as session:
|
204
179
|
raw = await session.get(artifact_id)
|
205
180
|
except Exception as e:
|
206
|
-
raise SessionError(f"Session
|
181
|
+
raise SessionError(f"Session error for {artifact_id}: {e}") from e
|
207
182
|
|
208
183
|
if raw is None:
|
209
|
-
raise
|
184
|
+
raise ProviderError(f"Artifact {artifact_id} not found")
|
210
185
|
|
211
186
|
try:
|
212
187
|
return json.loads(raw)
|
213
188
|
except json.JSONDecodeError as e:
|
214
|
-
|
215
|
-
raise ArtifactCorruptedError(f"Corrupted metadata for artifact {artifact_id}") from e
|
189
|
+
raise ProviderError(f"Corrupted metadata for {artifact_id}") from e
|