chuk-artifacts 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,311 +1,212 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # chuk_artifacts/metadata.py
3
3
  """
4
- Metadata operations: exists, metadata retrieval, deletion, and session-based operations.
5
- This is a WORKING implementation that actually implements the missing methods.
4
+ Clean metadata operations for grid architecture.
5
+ Now uses chuk_sessions for session management.
6
6
  """
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- import logging, json
11
- from datetime import datetime
12
- from typing import Any, Dict, List
10
+ import json
11
+ import logging
12
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING
13
13
 
14
- from .base import BaseOperations
15
- from .exceptions import (
16
- ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
17
- ProviderError
18
- )
14
+ if TYPE_CHECKING:
15
+ from .store import ArtifactStore
16
+
17
+ from .exceptions import ProviderError, SessionError, ArtifactNotFoundError
19
18
 
20
19
  logger = logging.getLogger(__name__)
21
20
 
22
21
 
23
- class MetadataOperations(BaseOperations):
24
- """Handles metadata-related operations with working session-based listing."""
22
+ class MetadataOperations:
23
+ """Clean metadata operations for grid architecture using chuk_sessions."""
24
+
25
+ def __init__(self, artifact_store: 'ArtifactStore'):
26
+ self.store = artifact_store
25
27
 
26
- async def metadata(self, artifact_id: str) -> Dict[str, Any]:
28
+ async def get_metadata(self, artifact_id: str) -> Dict[str, Any]:
27
29
  """Get artifact metadata."""
28
30
  return await self._get_record(artifact_id)
29
31
 
30
32
  async def exists(self, artifact_id: str) -> bool:
31
- """Check if artifact exists and hasn't expired."""
33
+ """Check if artifact exists."""
32
34
  try:
33
35
  await self._get_record(artifact_id)
34
36
  return True
35
- except (ArtifactNotFoundError, ArtifactExpiredError):
37
+ except Exception:
36
38
  return False
37
39
 
38
40
  async def delete(self, artifact_id: str) -> bool:
39
- """Delete artifact and its metadata."""
40
- self._check_closed()
41
-
41
+ """Delete artifact and metadata."""
42
42
  try:
43
43
  record = await self._get_record(artifact_id)
44
44
 
45
- # Delete from object storage
46
- storage_ctx_mgr = self.s3_factory()
45
+ # Delete from storage
46
+ storage_ctx_mgr = self.store._s3_factory()
47
47
  async with storage_ctx_mgr as s3:
48
- await s3.delete_object(Bucket=self.bucket, Key=record["key"])
48
+ await s3.delete_object(
49
+ Bucket=self.store.bucket,
50
+ Key=record["key"]
51
+ )
49
52
 
50
- # Delete metadata from session store
51
- session_ctx_mgr = self.session_factory()
53
+ # Delete metadata from session provider
54
+ session_ctx_mgr = self.store._session_factory()
52
55
  async with session_ctx_mgr as session:
53
56
  if hasattr(session, 'delete'):
54
57
  await session.delete(artifact_id)
55
- else:
56
- logger.warning(
57
- "Session provider doesn't support delete operation",
58
- extra={"artifact_id": artifact_id, "provider": self.session_provider_name}
59
- )
60
58
 
61
- logger.info("Artifact deleted", extra={"artifact_id": artifact_id})
59
+ logger.info(f"Deleted artifact: {artifact_id}")
62
60
  return True
63
61
 
64
- except (ArtifactNotFoundError, ArtifactExpiredError):
65
- logger.warning("Attempted to delete non-existent artifact", extra={"artifact_id": artifact_id})
62
+ except Exception as e:
63
+ logger.error(f"Delete failed for {artifact_id}: {e}")
66
64
  return False
65
+
66
+ async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
67
+ """List artifacts in a session using grid prefix from chuk_sessions."""
68
+ try:
69
+ artifacts = []
70
+ # Use the session manager's canonical prefix instead of building our own
71
+ prefix = self.store._session_manager.get_canonical_prefix(session_id)
72
+
73
+ storage_ctx_mgr = self.store._s3_factory()
74
+ async with storage_ctx_mgr as s3:
75
+ if hasattr(s3, 'list_objects_v2'):
76
+ response = await s3.list_objects_v2(
77
+ Bucket=self.store.bucket,
78
+ Prefix=prefix,
79
+ MaxKeys=limit
80
+ )
81
+
82
+ for obj in response.get('Contents', []):
83
+ key = obj['Key']
84
+ # Parse the grid key using chuk_sessions
85
+ parsed = self.store._session_manager.parse_grid_key(key)
86
+ if parsed and parsed.get('artifact_id'):
87
+ artifact_id = parsed['artifact_id']
88
+ try:
89
+ record = await self._get_record(artifact_id)
90
+ artifacts.append(record)
91
+ except Exception:
92
+ continue # Skip if metadata missing
93
+
94
+ return artifacts[:limit]
95
+
96
+ logger.warning(f"Storage provider doesn't support listing")
97
+ return []
98
+
99
+ except Exception as e:
100
+ logger.error(f"Session listing failed for {session_id}: {e}")
101
+ return []
102
+
103
+ async def list_by_prefix(
104
+ self,
105
+ session_id: str,
106
+ prefix: str = "",
107
+ limit: int = 100
108
+ ) -> List[Dict[str, Any]]:
109
+ """List artifacts with filename prefix filtering."""
110
+ try:
111
+ all_files = await self.list_by_session(session_id, limit * 2)
112
+
113
+ if not prefix:
114
+ return all_files[:limit]
115
+
116
+ # Filter by filename prefix
117
+ filtered = []
118
+ for file_meta in all_files:
119
+ filename = file_meta.get("filename", "")
120
+ if filename.startswith(prefix):
121
+ filtered.append(file_meta)
122
+ if len(filtered) >= limit:
123
+ break
124
+
125
+ return filtered
126
+
67
127
  except Exception as e:
68
- logger.error(
69
- "Artifact deletion failed",
70
- extra={"artifact_id": artifact_id, "error": str(e)}
71
- )
72
- raise ProviderError(f"Deletion failed: {e}") from e
128
+ logger.error(f"Prefix listing failed for session {session_id}: {e}")
129
+ return []
73
130
 
74
131
  async def update_metadata(
75
- self,
76
- artifact_id: str,
132
+ self,
133
+ artifact_id: str,
77
134
  *,
78
135
  summary: str = None,
79
136
  meta: Dict[str, Any] = None,
80
- filename: str = None,
81
- ttl: int = None,
82
- # NEW: MCP-specific parameters
83
- new_meta: Dict[str, Any] = None,
84
- merge: bool = True
137
+ merge: bool = True,
138
+ **kwargs
85
139
  ) -> Dict[str, Any]:
86
- """
87
- Update artifact metadata with MCP server compatibility.
88
-
89
- Parameters
90
- ----------
91
- artifact_id : str
92
- The artifact identifier
93
- summary : str, optional
94
- New summary description
95
- meta : dict, optional
96
- New or additional metadata fields (legacy parameter)
97
- filename : str, optional
98
- New filename
99
- ttl : int, optional
100
- New TTL for metadata
101
- new_meta : dict, optional
102
- New metadata fields (MCP server parameter)
103
- merge : bool, optional
104
- Whether to merge with existing metadata (True) or replace (False)
105
-
106
- Returns
107
- -------
108
- dict
109
- Updated metadata record
110
- """
111
- self._check_closed()
112
-
140
+ """Update artifact metadata."""
113
141
  try:
114
- # Get existing record
142
+ # Get current record
115
143
  record = await self._get_record(artifact_id)
116
144
 
117
- # Handle MCP server compatibility
118
- metadata_update = new_meta or meta or {}
119
-
120
- # Update fields if provided
145
+ # Update fields
121
146
  if summary is not None:
122
147
  record["summary"] = summary
123
- if filename is not None:
124
- record["filename"] = filename
125
- if ttl is not None:
126
- record["ttl"] = ttl
127
148
 
128
- # Handle metadata updates
129
- if metadata_update:
130
- existing_meta = record.get("meta", {})
131
- if merge:
132
- # Merge with existing meta, allowing overwrites
133
- existing_meta.update(metadata_update)
134
- record["meta"] = existing_meta
149
+ if meta is not None:
150
+ if merge and "meta" in record:
151
+ record["meta"].update(meta)
135
152
  else:
136
- # Replace existing meta entirely
137
- record["meta"] = metadata_update
153
+ record["meta"] = meta
138
154
 
139
- # Update stored metadata
140
- record["updated_at"] = datetime.utcnow().isoformat(timespec="seconds") + "Z"
155
+ # Update any other fields
156
+ for key, value in kwargs.items():
157
+ if key not in ["summary", "meta"] and value is not None:
158
+ record[key] = value
141
159
 
142
- session_ctx_mgr = self.session_factory()
160
+ # Store updated record using session provider
161
+ session_ctx_mgr = self.store._session_factory()
143
162
  async with session_ctx_mgr as session:
144
- final_ttl = ttl or record.get("ttl", 900)
145
- await session.setex(artifact_id, final_ttl, json.dumps(record))
146
-
147
- logger.info(
148
- "Artifact metadata updated",
149
- extra={
150
- "artifact_id": artifact_id,
151
- "merge": merge,
152
- "updated_fields": list([
153
- k for k, v in [
154
- ("summary", summary), ("meta", metadata_update),
155
- ("filename", filename), ("ttl", ttl)
156
- ] if v is not None
157
- ])
158
- }
159
- )
163
+ await session.setex(artifact_id, record.get("ttl", 900), json.dumps(record))
160
164
 
161
165
  return record
162
166
 
163
- except (ArtifactNotFoundError, ArtifactExpiredError):
164
- raise
165
167
  except Exception as e:
166
- logger.error(
167
- "Metadata update failed",
168
- extra={"artifact_id": artifact_id, "error": str(e)}
169
- )
168
+ logger.error(f"Metadata update failed for {artifact_id}: {e}")
170
169
  raise ProviderError(f"Metadata update failed: {e}") from e
171
170
 
172
- async def extend_ttl(self, artifact_id: str, additional_seconds: int) -> Dict[str, Any]:
173
- """Extend the TTL of an artifact's metadata."""
174
- self._check_closed()
175
-
171
+ async def extend_ttl(
172
+ self,
173
+ artifact_id: str,
174
+ additional_seconds: int
175
+ ) -> Dict[str, Any]:
176
+ """Extend artifact TTL."""
176
177
  try:
178
+ # Get current record
177
179
  record = await self._get_record(artifact_id)
180
+
181
+ # Update TTL
178
182
  current_ttl = record.get("ttl", 900)
179
183
  new_ttl = current_ttl + additional_seconds
184
+ record["ttl"] = new_ttl
180
185
 
181
- return await self.update_metadata(artifact_id, ttl=new_ttl)
186
+ # Store updated record with new TTL using session provider
187
+ session_ctx_mgr = self.store._session_factory()
188
+ async with session_ctx_mgr as session:
189
+ await session.setex(artifact_id, new_ttl, json.dumps(record))
190
+
191
+ return record
182
192
 
183
- except (ArtifactNotFoundError, ArtifactExpiredError):
184
- raise
185
193
  except Exception as e:
186
- logger.error(
187
- "TTL extension failed",
188
- extra={
189
- "artifact_id": artifact_id,
190
- "additional_seconds": additional_seconds,
191
- "error": str(e)
192
- }
193
- )
194
+ logger.error(f"TTL extension failed for {artifact_id}: {e}")
194
195
  raise ProviderError(f"TTL extension failed: {e}") from e
195
196
 
196
- async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
197
- """
198
- List artifacts for a specific session.
199
-
200
- WORKING IMPLEMENTATION: Uses storage provider listing when available,
201
- falls back to warning for providers that don't support it.
202
- """
203
- self._check_closed()
204
-
197
+ async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
198
+ """Get artifact metadata record from session provider."""
205
199
  try:
206
- artifacts = []
207
-
208
- # Try to use storage provider listing capabilities
209
- storage_ctx_mgr = self.s3_factory()
210
- async with storage_ctx_mgr as s3:
211
- # Check if storage provider supports listing
212
- if hasattr(s3, 'list_objects_v2'):
213
- try:
214
- # List objects with session prefix
215
- prefix = f"sess/{session_id}/"
216
-
217
- response = await s3.list_objects_v2(
218
- Bucket=self.bucket,
219
- Prefix=prefix,
220
- MaxKeys=limit
221
- )
222
-
223
- # Extract artifact IDs from keys and get their metadata
224
- for obj in response.get('Contents', []):
225
- key = obj['Key']
226
- # Extract artifact ID from key pattern: sess/{session_id}/{artifact_id}
227
- parts = key.split('/')
228
- if len(parts) >= 3:
229
- artifact_id = parts[2]
230
- try:
231
- record = await self._get_record(artifact_id)
232
- artifacts.append(record)
233
- except (ArtifactNotFoundError, ArtifactExpiredError):
234
- continue # Skip expired/missing metadata
235
-
236
- logger.info(
237
- f"Successfully listed {len(artifacts)} artifacts for session {session_id}"
238
- )
239
- return artifacts[:limit]
240
-
241
- except Exception as list_error:
242
- logger.warning(
243
- f"Storage provider listing failed: {list_error}. "
244
- f"Provider: {self.storage_provider_name}"
245
- )
246
- # Fall through to empty result with warning
247
-
248
- else:
249
- logger.warning(
250
- f"Storage provider {self.storage_provider_name} doesn't support list_objects_v2"
251
- )
252
-
253
- # If we get here, listing isn't supported
254
- logger.warning(
255
- f"Session listing not fully supported with {self.storage_provider_name} provider. "
256
- f"Returning empty list. For full session listing, use filesystem or S3-compatible storage."
257
- )
258
- return []
259
-
200
+ session_ctx_mgr = self.store._session_factory()
201
+ async with session_ctx_mgr as session:
202
+ raw = await session.get(artifact_id)
260
203
  except Exception as e:
261
- logger.error(
262
- "Session artifact listing failed",
263
- extra={"session_id": session_id, "error": str(e)}
264
- )
265
- # Return empty list rather than failing completely
266
- logger.warning(f"Returning empty list due to error: {e}")
267
- return []
268
-
269
- async def list_by_prefix(
270
- self,
271
- session_id: str,
272
- prefix: str = "",
273
- limit: int = 100
274
- ) -> List[Dict[str, Any]]:
275
- """
276
- List artifacts in a session with filename prefix filtering.
204
+ raise SessionError(f"Session error for {artifact_id}: {e}") from e
205
+
206
+ if raw is None:
207
+ raise ArtifactNotFoundError(f"Artifact {artifact_id} not found")
277
208
 
278
- WORKING IMPLEMENTATION: Gets session artifacts and filters by filename prefix.
279
- """
280
209
  try:
281
- # Get all artifacts in the session first
282
- artifacts = await self.list_by_session(session_id, limit * 2) # Get more to filter
283
-
284
- if not prefix:
285
- return artifacts[:limit]
286
-
287
- # Filter by filename prefix
288
- filtered = []
289
- for artifact in artifacts:
290
- filename = artifact.get("filename", "")
291
- if filename.startswith(prefix):
292
- filtered.append(artifact)
293
- if len(filtered) >= limit:
294
- break
295
-
296
- logger.info(
297
- f"Filtered {len(filtered)} artifacts from {len(artifacts)} total with prefix '{prefix}'"
298
- )
299
- return filtered
300
-
301
- except Exception as e:
302
- logger.error(
303
- "Prefix-based listing failed",
304
- extra={
305
- "session_id": session_id,
306
- "prefix": prefix,
307
- "error": str(e)
308
- }
309
- )
310
- # Return empty list rather than failing
311
- return []
210
+ return json.loads(raw)
211
+ except json.JSONDecodeError as e:
212
+ raise ProviderError(f"Corrupted metadata for {artifact_id}") from e
@@ -2,15 +2,18 @@
2
2
  # chuk_artifacts/presigned.py
3
3
  """
4
4
  Presigned URL operations: download URLs, upload URLs, and upload registration.
5
+ Now uses chuk_sessions for session management.
5
6
  """
6
7
 
7
8
  from __future__ import annotations
8
9
 
9
10
  import uuid, time, logging, json
10
11
  from datetime import datetime
11
- from typing import Any, Dict, Optional
12
+ from typing import Any, Dict, Optional, TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from .store import ArtifactStore
12
16
 
13
- from .base import BaseOperations
14
17
  from .exceptions import (
15
18
  ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
16
19
  ProviderError, SessionError
@@ -18,28 +21,31 @@ from .exceptions import (
18
21
 
19
22
  logger = logging.getLogger(__name__)
20
23
 
21
- _ANON_PREFIX = "anon"
22
24
  _DEFAULT_TTL = 900
23
25
  _DEFAULT_PRESIGN_EXPIRES = 3600
24
26
 
25
27
 
26
- class PresignedURLOperations(BaseOperations):
28
+ class PresignedURLOperations:
27
29
  """Handles all presigned URL operations."""
28
30
 
31
+ def __init__(self, artifact_store: 'ArtifactStore'):
32
+ self.artifact_store = artifact_store
33
+
29
34
  async def presign(self, artifact_id: str, expires: int = _DEFAULT_PRESIGN_EXPIRES) -> str:
30
35
  """Generate a presigned URL for artifact download."""
31
- self._check_closed()
36
+ if self.artifact_store._closed:
37
+ raise ArtifactStoreError("Store is closed")
32
38
 
33
39
  start_time = time.time()
34
40
 
35
41
  try:
36
42
  record = await self._get_record(artifact_id)
37
43
 
38
- storage_ctx_mgr = self.s3_factory()
44
+ storage_ctx_mgr = self.artifact_store._s3_factory()
39
45
  async with storage_ctx_mgr as s3:
40
46
  url = await s3.generate_presigned_url(
41
47
  "get_object",
42
- Params={"Bucket": self.bucket, "Key": record["key"]},
48
+ Params={"Bucket": self.artifact_store.bucket, "Key": record["key"]},
43
49
  ExpiresIn=expires,
44
50
  )
45
51
 
@@ -96,22 +102,28 @@ class PresignedURLOperations(BaseOperations):
96
102
  expires: int = _DEFAULT_PRESIGN_EXPIRES
97
103
  ) -> tuple[str, str]:
98
104
  """Generate a presigned URL for uploading a new artifact."""
99
- self._check_closed()
105
+ if self.artifact_store._closed:
106
+ raise ArtifactStoreError("Store is closed")
100
107
 
101
108
  start_time = time.time()
102
109
 
110
+ # Ensure session is allocated using chuk_sessions
111
+ if session_id is None:
112
+ session_id = await self.artifact_store._session_manager.allocate_session()
113
+ else:
114
+ session_id = await self.artifact_store._session_manager.allocate_session(session_id=session_id)
115
+
103
116
  # Generate artifact ID and key path
104
117
  artifact_id = uuid.uuid4().hex
105
- scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
106
- key = f"sess/{scope}/{artifact_id}"
118
+ key = self.artifact_store.generate_artifact_key(session_id, artifact_id)
107
119
 
108
120
  try:
109
- storage_ctx_mgr = self.s3_factory()
121
+ storage_ctx_mgr = self.artifact_store._s3_factory()
110
122
  async with storage_ctx_mgr as s3:
111
123
  url = await s3.generate_presigned_url(
112
124
  "put_object",
113
125
  Params={
114
- "Bucket": self.bucket,
126
+ "Bucket": self.artifact_store.bucket,
115
127
  "Key": key,
116
128
  "ContentType": mime_type
117
129
  },
@@ -163,20 +175,26 @@ class PresignedURLOperations(BaseOperations):
163
175
  ttl: int = _DEFAULT_TTL,
164
176
  ) -> bool:
165
177
  """Register metadata for an artifact uploaded via presigned URL."""
166
- self._check_closed()
178
+ if self.artifact_store._closed:
179
+ raise ArtifactStoreError("Store is closed")
167
180
 
168
181
  start_time = time.time()
169
182
 
183
+ # Ensure session is allocated using chuk_sessions
184
+ if session_id is None:
185
+ session_id = await self.artifact_store._session_manager.allocate_session()
186
+ else:
187
+ session_id = await self.artifact_store._session_manager.allocate_session(session_id=session_id)
188
+
170
189
  # Reconstruct the key path
171
- scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
172
- key = f"sess/{scope}/{artifact_id}"
190
+ key = self.artifact_store.generate_artifact_key(session_id, artifact_id)
173
191
 
174
192
  try:
175
193
  # Verify the object exists and get its size
176
- storage_ctx_mgr = self.s3_factory()
194
+ storage_ctx_mgr = self.artifact_store._s3_factory()
177
195
  async with storage_ctx_mgr as s3:
178
196
  try:
179
- response = await s3.head_object(Bucket=self.bucket, Key=key)
197
+ response = await s3.head_object(Bucket=self.artifact_store.bucket, Key=key)
180
198
  file_size = response.get('ContentLength', 0)
181
199
  except Exception:
182
200
  logger.warning(f"Artifact {artifact_id} not found in storage")
@@ -184,7 +202,9 @@ class PresignedURLOperations(BaseOperations):
184
202
 
185
203
  # Build metadata record
186
204
  record = {
187
- "scope": scope,
205
+ "artifact_id": artifact_id,
206
+ "session_id": session_id,
207
+ "sandbox_id": self.artifact_store.sandbox_id,
188
208
  "key": key,
189
209
  "mime": mime,
190
210
  "summary": summary,
@@ -192,15 +212,15 @@ class PresignedURLOperations(BaseOperations):
192
212
  "filename": filename,
193
213
  "bytes": file_size,
194
214
  "sha256": None, # We don't have the hash since we didn't upload it directly
195
- "stored_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
215
+ "stored_at": datetime.utcnow().isoformat() + "Z",
196
216
  "ttl": ttl,
197
- "storage_provider": self.storage_provider_name,
198
- "session_provider": self.session_provider_name,
217
+ "storage_provider": self.artifact_store._storage_provider_name,
218
+ "session_provider": self.artifact_store._session_provider_name,
199
219
  "uploaded_via_presigned": True, # Flag to indicate upload method
200
220
  }
201
221
 
202
222
  # Cache metadata using session provider
203
- session_ctx_mgr = self.session_factory()
223
+ session_ctx_mgr = self.artifact_store._session_factory()
204
224
  async with session_ctx_mgr as session:
205
225
  await session.setex(artifact_id, ttl, json.dumps(record))
206
226
 
@@ -264,4 +284,21 @@ class PresignedURLOperations(BaseOperations):
264
284
  ttl=ttl
265
285
  )
266
286
 
267
- return upload_url, artifact_id
287
+ return upload_url, artifact_id
288
+
289
+ async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
290
+ """Get artifact metadata record."""
291
+ try:
292
+ session_ctx_mgr = self.artifact_store._session_factory()
293
+ async with session_ctx_mgr as session:
294
+ raw = await session.get(artifact_id)
295
+ except Exception as e:
296
+ raise SessionError(f"Session error for {artifact_id}: {e}") from e
297
+
298
+ if raw is None:
299
+ raise ArtifactNotFoundError(f"Artifact {artifact_id} not found")
300
+
301
+ try:
302
+ return json.loads(raw)
303
+ except json.JSONDecodeError as e:
304
+ raise ProviderError(f"Corrupted metadata for {artifact_id}") from e