chuk-artifacts 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_artifacts/admin.py +97 -19
- chuk_artifacts/base.py +9 -3
- chuk_artifacts/batch.py +44 -29
- chuk_artifacts/core.py +88 -113
- chuk_artifacts/metadata.py +141 -240
- chuk_artifacts/presigned.py +60 -23
- chuk_artifacts/store.py +393 -267
- {chuk_artifacts-0.1.2.dist-info → chuk_artifacts-0.1.4.dist-info}/METADATA +201 -192
- {chuk_artifacts-0.1.2.dist-info → chuk_artifacts-0.1.4.dist-info}/RECORD +12 -13
- chuk_artifacts/session_operations.py +0 -367
- {chuk_artifacts-0.1.2.dist-info → chuk_artifacts-0.1.4.dist-info}/WHEEL +0 -0
- {chuk_artifacts-0.1.2.dist-info → chuk_artifacts-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {chuk_artifacts-0.1.2.dist-info → chuk_artifacts-0.1.4.dist-info}/top_level.txt +0 -0
chuk_artifacts/admin.py
CHANGED
@@ -1,22 +1,27 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
# chuk_artifacts/admin.py
|
3
3
|
"""
|
4
|
-
Administrative and debugging operations
|
4
|
+
Administrative and debugging operations.
|
5
|
+
Now includes chuk_sessions integration.
|
5
6
|
"""
|
6
7
|
|
7
8
|
from __future__ import annotations
|
8
9
|
|
9
10
|
import uuid, logging
|
10
11
|
from datetime import datetime
|
11
|
-
from typing import Any, Dict
|
12
|
+
from typing import Any, Dict, TYPE_CHECKING
|
12
13
|
|
13
|
-
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from .store import ArtifactStore
|
14
16
|
|
15
17
|
logger = logging.getLogger(__name__)
|
16
18
|
|
17
19
|
|
18
|
-
class AdminOperations
|
19
|
-
"""
|
20
|
+
class AdminOperations:
|
21
|
+
"""Handles administrative and debugging operations."""
|
22
|
+
|
23
|
+
def __init__(self, artifact_store: 'ArtifactStore'):
|
24
|
+
self.artifact_store = artifact_store
|
20
25
|
|
21
26
|
async def validate_configuration(self) -> Dict[str, Any]:
|
22
27
|
"""Validate store configuration and connectivity."""
|
@@ -24,7 +29,7 @@ class AdminOperations(BaseOperations):
|
|
24
29
|
|
25
30
|
# Test session provider
|
26
31
|
try:
|
27
|
-
session_ctx_mgr = self.
|
32
|
+
session_ctx_mgr = self.store._session_factory()
|
28
33
|
async with session_ctx_mgr as session:
|
29
34
|
# Test basic operations
|
30
35
|
test_key = f"test_{uuid.uuid4().hex}"
|
@@ -34,46 +39,119 @@ class AdminOperations(BaseOperations):
|
|
34
39
|
if value == "test_value":
|
35
40
|
results["session"] = {
|
36
41
|
"status": "ok",
|
37
|
-
"provider": self.
|
42
|
+
"provider": self.store._session_provider_name
|
38
43
|
}
|
39
44
|
else:
|
40
45
|
results["session"] = {
|
41
46
|
"status": "error",
|
42
47
|
"message": "Session store test failed",
|
43
|
-
"provider": self.
|
48
|
+
"provider": self.store._session_provider_name
|
44
49
|
}
|
45
50
|
except Exception as e:
|
46
51
|
results["session"] = {
|
47
52
|
"status": "error",
|
48
53
|
"message": str(e),
|
49
|
-
"provider": self.
|
54
|
+
"provider": self.store._session_provider_name
|
50
55
|
}
|
51
56
|
|
52
57
|
# Test storage provider
|
53
58
|
try:
|
54
|
-
storage_ctx_mgr = self.
|
59
|
+
storage_ctx_mgr = self.store._s3_factory()
|
55
60
|
async with storage_ctx_mgr as s3:
|
56
|
-
await s3.head_bucket(Bucket=self.bucket)
|
61
|
+
await s3.head_bucket(Bucket=self.store.bucket)
|
57
62
|
results["storage"] = {
|
58
63
|
"status": "ok",
|
59
|
-
"bucket": self.bucket,
|
60
|
-
"provider": self.
|
64
|
+
"bucket": self.store.bucket,
|
65
|
+
"provider": self.store._storage_provider_name
|
61
66
|
}
|
62
67
|
except Exception as e:
|
63
68
|
results["storage"] = {
|
64
69
|
"status": "error",
|
65
70
|
"message": str(e),
|
66
|
-
"provider": self.
|
71
|
+
"provider": self.store._storage_provider_name
|
72
|
+
}
|
73
|
+
|
74
|
+
# Test session manager (chuk_sessions)
|
75
|
+
try:
|
76
|
+
# Try to allocate a test session
|
77
|
+
test_session = await self.store._session_manager.allocate_session(
|
78
|
+
user_id="test_admin_user"
|
79
|
+
)
|
80
|
+
# Validate it
|
81
|
+
is_valid = await self.store._session_manager.validate_session(test_session)
|
82
|
+
# Clean up
|
83
|
+
await self.store._session_manager.delete_session(test_session)
|
84
|
+
|
85
|
+
if is_valid:
|
86
|
+
results["session_manager"] = {
|
87
|
+
"status": "ok",
|
88
|
+
"sandbox_id": self.store.sandbox_id,
|
89
|
+
"test_session": test_session
|
90
|
+
}
|
91
|
+
else:
|
92
|
+
results["session_manager"] = {
|
93
|
+
"status": "error",
|
94
|
+
"message": "Session validation failed"
|
95
|
+
}
|
96
|
+
except Exception as e:
|
97
|
+
results["session_manager"] = {
|
98
|
+
"status": "error",
|
99
|
+
"message": str(e)
|
67
100
|
}
|
68
101
|
|
69
102
|
return results
|
70
103
|
|
71
104
|
async def get_stats(self) -> Dict[str, Any]:
|
72
105
|
"""Get storage statistics."""
|
106
|
+
base_stats = {
|
107
|
+
"storage_provider": self.store._storage_provider_name,
|
108
|
+
"session_provider": self.store._session_provider_name,
|
109
|
+
"bucket": self.store.bucket,
|
110
|
+
"max_retries": self.store.max_retries,
|
111
|
+
"closed": self.store._closed,
|
112
|
+
"sandbox_id": self.store.sandbox_id,
|
113
|
+
"session_ttl_hours": self.store.session_ttl_hours,
|
114
|
+
}
|
115
|
+
|
116
|
+
# Add session manager stats from chuk_sessions
|
117
|
+
try:
|
118
|
+
session_stats = self.store._session_manager.get_cache_stats()
|
119
|
+
base_stats["session_manager"] = session_stats
|
120
|
+
except Exception as e:
|
121
|
+
base_stats["session_manager"] = {
|
122
|
+
"error": str(e),
|
123
|
+
"status": "unavailable"
|
124
|
+
}
|
125
|
+
|
126
|
+
return base_stats
|
127
|
+
|
128
|
+
async def cleanup_all_expired(self) -> Dict[str, int]:
|
129
|
+
"""Clean up all expired resources."""
|
130
|
+
results = {"timestamp": datetime.utcnow().isoformat() + "Z"}
|
131
|
+
|
132
|
+
# Clean up expired sessions using chuk_sessions
|
133
|
+
try:
|
134
|
+
expired_sessions = await self.store._session_manager.cleanup_expired_sessions()
|
135
|
+
results["expired_sessions_cleaned"] = expired_sessions
|
136
|
+
except Exception as e:
|
137
|
+
results["session_cleanup_error"] = str(e)
|
138
|
+
results["expired_sessions_cleaned"] = 0
|
139
|
+
|
140
|
+
# TODO: Add artifact cleanup based on TTL
|
141
|
+
# This would require scanning metadata to find expired artifacts
|
142
|
+
results["expired_artifacts_cleaned"] = 0 # Placeholder
|
143
|
+
|
144
|
+
return results
|
145
|
+
|
146
|
+
async def get_sandbox_info(self) -> Dict[str, Any]:
|
147
|
+
"""Get information about the current sandbox."""
|
73
148
|
return {
|
74
|
-
"
|
75
|
-
"
|
76
|
-
"
|
77
|
-
|
78
|
-
|
149
|
+
"sandbox_id": self.store.sandbox_id,
|
150
|
+
"session_prefix_pattern": self.store.get_session_prefix_pattern(),
|
151
|
+
"grid_architecture": {
|
152
|
+
"enabled": True,
|
153
|
+
"pattern": "grid/{sandbox_id}/{session_id}/{artifact_id}",
|
154
|
+
"mandatory_sessions": True,
|
155
|
+
"federation_ready": True
|
156
|
+
}
|
79
157
|
}
|
chuk_artifacts/base.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
# chuk_artifacts/base.py
|
3
3
|
"""
|
4
|
-
|
4
|
+
Base class for operation modules.
|
5
|
+
Updated to work with chuk_sessions integration.
|
5
6
|
"""
|
6
7
|
|
7
8
|
from __future__ import annotations
|
@@ -17,10 +18,10 @@ logger = logging.getLogger(__name__)
|
|
17
18
|
|
18
19
|
|
19
20
|
class BaseOperations:
|
20
|
-
"""
|
21
|
+
"""Base class for all operation modules."""
|
21
22
|
|
22
23
|
def __init__(self, store: 'ArtifactStore'):
|
23
|
-
#
|
24
|
+
# Store reference to artifact store
|
24
25
|
self._artifact_store = store
|
25
26
|
|
26
27
|
@property
|
@@ -47,6 +48,11 @@ class BaseOperations:
|
|
47
48
|
def max_retries(self) -> int:
|
48
49
|
return self._artifact_store.max_retries
|
49
50
|
|
51
|
+
@property
|
52
|
+
def session_manager(self):
|
53
|
+
"""Access to chuk_sessions SessionManager."""
|
54
|
+
return self._artifact_store._session_manager
|
55
|
+
|
50
56
|
def _check_closed(self):
|
51
57
|
"""Check if store is closed and raise error if so."""
|
52
58
|
if self._artifact_store._closed:
|
chuk_artifacts/batch.py
CHANGED
@@ -1,28 +1,32 @@
|
|
1
|
-
#
|
2
|
-
# chuk_artifacts/batch.py
|
3
|
-
# ===========================================================================
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# chuk_artifacts/batch.py
|
4
3
|
"""
|
5
4
|
Batch operations for multiple artifacts.
|
5
|
+
Now uses chuk_sessions for session management.
|
6
6
|
"""
|
7
7
|
|
8
8
|
from __future__ import annotations
|
9
9
|
|
10
|
-
import uuid, hashlib, json, logging
|
10
|
+
import uuid, hashlib, json, logging, asyncio
|
11
11
|
from datetime import datetime
|
12
|
-
from typing import Any, Dict, List, Optional
|
12
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
13
13
|
|
14
|
-
|
15
|
-
from .
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from .store import ArtifactStore
|
16
|
+
|
17
|
+
from .exceptions import ArtifactStoreError, ProviderError, SessionError
|
16
18
|
|
17
19
|
logger = logging.getLogger(__name__)
|
18
20
|
|
19
|
-
_ANON_PREFIX = "anon"
|
20
21
|
_DEFAULT_TTL = 900
|
21
22
|
|
22
23
|
|
23
|
-
class BatchOperations
|
24
|
+
class BatchOperations:
|
24
25
|
"""Handles batch operations for multiple artifacts."""
|
25
26
|
|
27
|
+
def __init__(self, artifact_store: 'ArtifactStore'):
|
28
|
+
self.artifact_store = artifact_store
|
29
|
+
|
26
30
|
async def store_batch(
|
27
31
|
self,
|
28
32
|
items: List[Dict[str, Any]],
|
@@ -30,7 +34,14 @@ class BatchOperations(BaseOperations):
|
|
30
34
|
ttl: int = _DEFAULT_TTL,
|
31
35
|
) -> List[str]:
|
32
36
|
"""Store multiple artifacts in a batch operation."""
|
33
|
-
self.
|
37
|
+
if self.artifact_store._closed:
|
38
|
+
raise ArtifactStoreError("Store is closed")
|
39
|
+
|
40
|
+
# Ensure session is allocated using chuk_sessions
|
41
|
+
if session_id is None:
|
42
|
+
session_id = await self.artifact_store._session_manager.allocate_session()
|
43
|
+
else:
|
44
|
+
session_id = await self.artifact_store._session_manager.allocate_session(session_id=session_id)
|
34
45
|
|
35
46
|
artifact_ids = []
|
36
47
|
failed_items = []
|
@@ -38,18 +49,19 @@ class BatchOperations(BaseOperations):
|
|
38
49
|
for i, item in enumerate(items):
|
39
50
|
try:
|
40
51
|
artifact_id = uuid.uuid4().hex
|
41
|
-
|
42
|
-
key = f"sess/{scope}/{artifact_id}"
|
52
|
+
key = self.artifact_store.generate_artifact_key(session_id, artifact_id)
|
43
53
|
|
44
54
|
# Store in object storage
|
45
55
|
await self._store_with_retry(
|
46
56
|
item["data"], key, item["mime"],
|
47
|
-
item.get("filename"),
|
57
|
+
item.get("filename"), session_id
|
48
58
|
)
|
49
59
|
|
50
60
|
# Prepare metadata record
|
51
61
|
record = {
|
52
|
-
"
|
62
|
+
"artifact_id": artifact_id,
|
63
|
+
"session_id": session_id,
|
64
|
+
"sandbox_id": self.artifact_store.sandbox_id,
|
53
65
|
"key": key,
|
54
66
|
"mime": item["mime"],
|
55
67
|
"summary": item["summary"],
|
@@ -57,14 +69,16 @@ class BatchOperations(BaseOperations):
|
|
57
69
|
"filename": item.get("filename"),
|
58
70
|
"bytes": len(item["data"]),
|
59
71
|
"sha256": hashlib.sha256(item["data"]).hexdigest(),
|
60
|
-
"stored_at": datetime.utcnow().isoformat(
|
72
|
+
"stored_at": datetime.utcnow().isoformat() + "Z",
|
61
73
|
"ttl": ttl,
|
62
|
-
"storage_provider": self.
|
63
|
-
"session_provider": self.
|
74
|
+
"storage_provider": self.artifact_store._storage_provider_name,
|
75
|
+
"session_provider": self.artifact_store._session_provider_name,
|
76
|
+
"batch_operation": True,
|
77
|
+
"batch_index": i,
|
64
78
|
}
|
65
79
|
|
66
80
|
# Store metadata via session provider
|
67
|
-
session_ctx_mgr = self.
|
81
|
+
session_ctx_mgr = self.artifact_store._session_factory()
|
68
82
|
async with session_ctx_mgr as session:
|
69
83
|
await session.setex(artifact_id, ttl, json.dumps(record))
|
70
84
|
|
@@ -80,28 +94,30 @@ class BatchOperations(BaseOperations):
|
|
80
94
|
|
81
95
|
return artifact_ids
|
82
96
|
|
83
|
-
async def _store_with_retry(self, data: bytes, key: str, mime: str, filename: str,
|
97
|
+
async def _store_with_retry(self, data: bytes, key: str, mime: str, filename: str, session_id: str):
|
84
98
|
"""Store data with retry logic (copied from core for batch operations)."""
|
85
|
-
import asyncio
|
86
|
-
|
87
99
|
last_exception = None
|
88
100
|
|
89
|
-
for attempt in range(self.max_retries):
|
101
|
+
for attempt in range(self.artifact_store.max_retries):
|
90
102
|
try:
|
91
|
-
storage_ctx_mgr = self.
|
103
|
+
storage_ctx_mgr = self.artifact_store._s3_factory()
|
92
104
|
async with storage_ctx_mgr as s3:
|
93
105
|
await s3.put_object(
|
94
|
-
Bucket=self.bucket,
|
106
|
+
Bucket=self.artifact_store.bucket,
|
95
107
|
Key=key,
|
96
108
|
Body=data,
|
97
109
|
ContentType=mime,
|
98
|
-
Metadata={
|
110
|
+
Metadata={
|
111
|
+
"filename": filename or "",
|
112
|
+
"session_id": session_id,
|
113
|
+
"sandbox_id": self.artifact_store.sandbox_id,
|
114
|
+
},
|
99
115
|
)
|
100
116
|
return # Success
|
101
117
|
|
102
118
|
except Exception as e:
|
103
119
|
last_exception = e
|
104
|
-
if attempt < self.max_retries - 1:
|
120
|
+
if attempt < self.artifact_store.max_retries - 1:
|
105
121
|
wait_time = 2 ** attempt # Exponential backoff
|
106
122
|
logger.warning(
|
107
123
|
f"Batch storage attempt {attempt + 1} failed, retrying in {wait_time}s",
|
@@ -109,7 +125,6 @@ class BatchOperations(BaseOperations):
|
|
109
125
|
)
|
110
126
|
await asyncio.sleep(wait_time)
|
111
127
|
else:
|
112
|
-
logger.error(f"All {self.max_retries} batch storage attempts failed")
|
128
|
+
logger.error(f"All {self.artifact_store.max_retries} batch storage attempts failed")
|
113
129
|
|
114
|
-
raise last_exception
|
115
|
-
|
130
|
+
raise last_exception
|
chuk_artifacts/core.py
CHANGED
@@ -1,35 +1,36 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
# chuk_artifacts/core.py
|
3
3
|
"""
|
4
|
-
core storage operations.
|
4
|
+
Clean core storage operations - grid architecture only.
|
5
|
+
Now uses chuk_sessions for session management.
|
5
6
|
"""
|
6
7
|
|
7
8
|
from __future__ import annotations
|
8
9
|
|
9
|
-
import uuid
|
10
|
+
import uuid
|
11
|
+
import hashlib
|
12
|
+
import time
|
13
|
+
import asyncio
|
14
|
+
import json
|
15
|
+
import logging
|
10
16
|
from datetime import datetime
|
11
17
|
from typing import Any, Dict, Optional, TYPE_CHECKING
|
12
18
|
|
13
19
|
if TYPE_CHECKING:
|
14
20
|
from .store import ArtifactStore
|
15
21
|
|
16
|
-
from .exceptions import
|
17
|
-
ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
|
18
|
-
ArtifactCorruptedError, ProviderError, SessionError
|
19
|
-
)
|
22
|
+
from .exceptions import ArtifactStoreError, ProviderError, SessionError
|
20
23
|
|
21
24
|
logger = logging.getLogger(__name__)
|
22
25
|
|
23
|
-
_ANON_PREFIX = "anon"
|
24
26
|
_DEFAULT_TTL = 900
|
25
27
|
|
26
28
|
|
27
29
|
class CoreStorageOperations:
|
28
|
-
"""core storage operations
|
30
|
+
"""Clean core storage operations with grid architecture."""
|
29
31
|
|
30
32
|
def __init__(self, artifact_store: 'ArtifactStore'):
|
31
|
-
self.artifact_store = artifact_store
|
32
|
-
logger.info(f"CoreStorageOperations initialized with store: {type(artifact_store)}")
|
33
|
+
self.artifact_store = artifact_store
|
33
34
|
|
34
35
|
async def store(
|
35
36
|
self,
|
@@ -39,26 +40,28 @@ class CoreStorageOperations:
|
|
39
40
|
summary: str,
|
40
41
|
meta: Dict[str, Any] | None = None,
|
41
42
|
filename: str | None = None,
|
42
|
-
session_id: str
|
43
|
+
session_id: str, # Required - no more optional sessions
|
43
44
|
ttl: int = _DEFAULT_TTL,
|
44
45
|
) -> str:
|
45
|
-
"""Store artifact
|
46
|
+
"""Store artifact with grid key generation."""
|
46
47
|
if self.artifact_store._closed:
|
47
|
-
raise ArtifactStoreError("Store
|
48
|
+
raise ArtifactStoreError("Store is closed")
|
48
49
|
|
49
50
|
start_time = time.time()
|
50
51
|
artifact_id = uuid.uuid4().hex
|
51
52
|
|
52
|
-
|
53
|
-
key =
|
54
|
-
|
53
|
+
# Generate grid key using chuk_sessions
|
54
|
+
key = self.artifact_store.generate_artifact_key(session_id, artifact_id)
|
55
|
+
|
55
56
|
try:
|
56
|
-
# Store in object storage
|
57
|
-
await self._store_with_retry(data, key, mime, filename,
|
58
|
-
|
57
|
+
# Store in object storage
|
58
|
+
await self._store_with_retry(data, key, mime, filename, session_id)
|
59
|
+
|
59
60
|
# Build metadata record
|
60
61
|
record = {
|
61
|
-
"
|
62
|
+
"artifact_id": artifact_id,
|
63
|
+
"session_id": session_id,
|
64
|
+
"sandbox_id": self.artifact_store.sandbox_id,
|
62
65
|
"key": key,
|
63
66
|
"mime": mime,
|
64
67
|
"summary": summary,
|
@@ -66,95 +69,54 @@ class CoreStorageOperations:
|
|
66
69
|
"filename": filename,
|
67
70
|
"bytes": len(data),
|
68
71
|
"sha256": hashlib.sha256(data).hexdigest(),
|
69
|
-
"stored_at": datetime.utcnow().isoformat(
|
72
|
+
"stored_at": datetime.utcnow().isoformat() + "Z",
|
70
73
|
"ttl": ttl,
|
71
74
|
"storage_provider": self.artifact_store._storage_provider_name,
|
72
75
|
"session_provider": self.artifact_store._session_provider_name,
|
73
76
|
}
|
74
|
-
|
75
|
-
#
|
77
|
+
|
78
|
+
# Store metadata
|
76
79
|
session_ctx_mgr = self.artifact_store._session_factory()
|
77
80
|
async with session_ctx_mgr as session:
|
78
81
|
await session.setex(artifact_id, ttl, json.dumps(record))
|
79
|
-
|
82
|
+
|
80
83
|
duration_ms = int((time.time() - start_time) * 1000)
|
81
84
|
logger.info(
|
82
|
-
"Artifact stored
|
85
|
+
"Artifact stored",
|
83
86
|
extra={
|
84
87
|
"artifact_id": artifact_id,
|
88
|
+
"session_id": session_id,
|
89
|
+
"key": key,
|
85
90
|
"bytes": len(data),
|
86
|
-
"mime": mime,
|
87
91
|
"duration_ms": duration_ms,
|
88
|
-
"storage_provider": self.artifact_store._storage_provider_name,
|
89
92
|
}
|
90
93
|
)
|
91
|
-
|
94
|
+
|
92
95
|
return artifact_id
|
93
|
-
|
94
|
-
except Exception as e:
|
95
|
-
duration_ms = int((time.time() - start_time) * 1000)
|
96
|
-
logger.error(
|
97
|
-
"Artifact storage failed",
|
98
|
-
extra={
|
99
|
-
"artifact_id": artifact_id,
|
100
|
-
"error": str(e),
|
101
|
-
"duration_ms": duration_ms,
|
102
|
-
"storage_provider": self.artifact_store._storage_provider_name,
|
103
|
-
},
|
104
|
-
exc_info=True
|
105
|
-
)
|
106
96
|
|
107
|
-
|
108
|
-
|
97
|
+
except Exception as e:
|
98
|
+
logger.error(f"Storage failed for {artifact_id}: {e}")
|
99
|
+
if "session" in str(e).lower():
|
100
|
+
raise SessionError(f"Metadata storage failed: {e}") from e
|
109
101
|
else:
|
110
|
-
raise ProviderError(f"Storage
|
111
|
-
|
112
|
-
async def _store_with_retry(self, data: bytes, key: str, mime: str, filename: str, scope: str):
|
113
|
-
"""Store data with retry logic."""
|
114
|
-
last_exception = None
|
115
|
-
|
116
|
-
for attempt in range(self.artifact_store.max_retries):
|
117
|
-
try:
|
118
|
-
storage_ctx_mgr = self.artifact_store._s3_factory()
|
119
|
-
async with storage_ctx_mgr as s3:
|
120
|
-
await s3.put_object(
|
121
|
-
Bucket=self.artifact_store.bucket,
|
122
|
-
Key=key,
|
123
|
-
Body=data,
|
124
|
-
ContentType=mime,
|
125
|
-
Metadata={"filename": filename or "", "scope": scope},
|
126
|
-
)
|
127
|
-
return # Success
|
128
|
-
|
129
|
-
except Exception as e:
|
130
|
-
last_exception = e
|
131
|
-
if attempt < self.artifact_store.max_retries - 1:
|
132
|
-
wait_time = 2 ** attempt # Exponential backoff
|
133
|
-
logger.warning(
|
134
|
-
f"Storage attempt {attempt + 1} failed, retrying in {wait_time}s",
|
135
|
-
extra={"error": str(e), "attempt": attempt + 1}
|
136
|
-
)
|
137
|
-
await asyncio.sleep(wait_time)
|
138
|
-
else:
|
139
|
-
logger.error(f"All {self.artifact_store.max_retries} storage attempts failed")
|
140
|
-
|
141
|
-
raise last_exception
|
102
|
+
raise ProviderError(f"Storage failed: {e}") from e
|
142
103
|
|
143
104
|
async def retrieve(self, artifact_id: str) -> bytes:
|
144
|
-
"""Retrieve artifact data
|
105
|
+
"""Retrieve artifact data."""
|
145
106
|
if self.artifact_store._closed:
|
146
|
-
raise ArtifactStoreError("Store
|
147
|
-
|
148
|
-
start_time = time.time()
|
107
|
+
raise ArtifactStoreError("Store is closed")
|
149
108
|
|
150
109
|
try:
|
151
110
|
record = await self._get_record(artifact_id)
|
152
111
|
|
153
112
|
storage_ctx_mgr = self.artifact_store._s3_factory()
|
154
113
|
async with storage_ctx_mgr as s3:
|
155
|
-
response = await s3.get_object(
|
114
|
+
response = await s3.get_object(
|
115
|
+
Bucket=self.artifact_store.bucket,
|
116
|
+
Key=record["key"]
|
117
|
+
)
|
156
118
|
|
157
|
-
# Handle different response formats
|
119
|
+
# Handle different response formats
|
158
120
|
if hasattr(response["Body"], "read"):
|
159
121
|
data = await response["Body"].read()
|
160
122
|
elif isinstance(response["Body"], bytes):
|
@@ -162,54 +124,67 @@ class CoreStorageOperations:
|
|
162
124
|
else:
|
163
125
|
data = bytes(response["Body"])
|
164
126
|
|
165
|
-
# Verify integrity
|
127
|
+
# Verify integrity
|
166
128
|
if "sha256" in record and record["sha256"]:
|
167
|
-
|
168
|
-
if
|
169
|
-
raise
|
170
|
-
f"SHA256 mismatch: expected {record['sha256']}, got {computed_hash}"
|
171
|
-
)
|
172
|
-
|
173
|
-
duration_ms = int((time.time() - start_time) * 1000)
|
174
|
-
logger.info(
|
175
|
-
"Artifact retrieved successfully",
|
176
|
-
extra={
|
177
|
-
"artifact_id": artifact_id,
|
178
|
-
"bytes": len(data),
|
179
|
-
"duration_ms": duration_ms,
|
180
|
-
}
|
181
|
-
)
|
129
|
+
computed = hashlib.sha256(data).hexdigest()
|
130
|
+
if computed != record["sha256"]:
|
131
|
+
raise ProviderError(f"SHA256 mismatch: {record['sha256']} != {computed}")
|
182
132
|
|
183
133
|
return data
|
184
134
|
|
185
|
-
except (ArtifactNotFoundError, ArtifactExpiredError, ArtifactCorruptedError):
|
186
|
-
raise
|
187
135
|
except Exception as e:
|
188
|
-
|
189
|
-
logger.error(
|
190
|
-
"Artifact retrieval failed",
|
191
|
-
extra={
|
192
|
-
"artifact_id": artifact_id,
|
193
|
-
"error": str(e),
|
194
|
-
"duration_ms": duration_ms,
|
195
|
-
}
|
196
|
-
)
|
136
|
+
logger.error(f"Retrieval failed for {artifact_id}: {e}")
|
197
137
|
raise ProviderError(f"Retrieval failed: {e}") from e
|
198
138
|
|
139
|
+
async def _store_with_retry(
|
140
|
+
self,
|
141
|
+
data: bytes,
|
142
|
+
key: str,
|
143
|
+
mime: str,
|
144
|
+
filename: str,
|
145
|
+
session_id: str
|
146
|
+
):
|
147
|
+
"""Store with retry logic."""
|
148
|
+
last_exception = None
|
149
|
+
|
150
|
+
for attempt in range(self.artifact_store.max_retries):
|
151
|
+
try:
|
152
|
+
storage_ctx_mgr = self.artifact_store._s3_factory()
|
153
|
+
async with storage_ctx_mgr as s3:
|
154
|
+
await s3.put_object(
|
155
|
+
Bucket=self.artifact_store.bucket,
|
156
|
+
Key=key,
|
157
|
+
Body=data,
|
158
|
+
ContentType=mime,
|
159
|
+
Metadata={
|
160
|
+
"filename": filename or "",
|
161
|
+
"session_id": session_id,
|
162
|
+
"sandbox_id": self.artifact_store.sandbox_id,
|
163
|
+
},
|
164
|
+
)
|
165
|
+
return # Success
|
166
|
+
|
167
|
+
except Exception as e:
|
168
|
+
last_exception = e
|
169
|
+
if attempt < self.artifact_store.max_retries - 1:
|
170
|
+
wait_time = 2 ** attempt
|
171
|
+
await asyncio.sleep(wait_time)
|
172
|
+
|
173
|
+
raise last_exception
|
174
|
+
|
199
175
|
async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
|
200
|
-
"""
|
176
|
+
"""Get artifact metadata."""
|
201
177
|
try:
|
202
178
|
session_ctx_mgr = self.artifact_store._session_factory()
|
203
179
|
async with session_ctx_mgr as session:
|
204
180
|
raw = await session.get(artifact_id)
|
205
181
|
except Exception as e:
|
206
|
-
raise SessionError(f"Session
|
182
|
+
raise SessionError(f"Session error for {artifact_id}: {e}") from e
|
207
183
|
|
208
184
|
if raw is None:
|
209
|
-
raise
|
185
|
+
raise ProviderError(f"Artifact {artifact_id} not found")
|
210
186
|
|
211
187
|
try:
|
212
188
|
return json.loads(raw)
|
213
189
|
except json.JSONDecodeError as e:
|
214
|
-
|
215
|
-
raise ArtifactCorruptedError(f"Corrupted metadata for artifact {artifact_id}") from e
|
190
|
+
raise ProviderError(f"Corrupted metadata for {artifact_id}") from e
|