chuk-artifacts 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,311 +1,210 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # chuk_artifacts/metadata.py
3
3
  """
4
- Metadata operations: exists, metadata retrieval, deletion, and session-based operations.
5
- This is a WORKING implementation that actually implements the missing methods.
4
+ Clean metadata operations for grid architecture.
6
5
  """
7
6
 
8
7
  from __future__ import annotations
9
8
 
10
- import logging, json
11
- from datetime import datetime
12
- from typing import Any, Dict, List
9
+ import json
10
+ import logging
11
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING
13
12
 
14
- from .base import BaseOperations
15
- from .exceptions import (
16
- ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
17
- ProviderError
18
- )
13
+ if TYPE_CHECKING:
14
+ from .store import ArtifactStore
15
+
16
+ from .exceptions import ProviderError, SessionError
19
17
 
20
18
  logger = logging.getLogger(__name__)
21
19
 
22
20
 
23
- class MetadataOperations(BaseOperations):
24
- """Handles metadata-related operations with working session-based listing."""
21
+ class MetadataOperations:
22
+ """Clean metadata operations for grid architecture."""
23
+
24
+ def __init__(self, artifact_store: 'ArtifactStore'):
25
+ self.store = artifact_store
25
26
 
26
- async def metadata(self, artifact_id: str) -> Dict[str, Any]:
27
+ async def get_metadata(self, artifact_id: str) -> Dict[str, Any]:
27
28
  """Get artifact metadata."""
28
29
  return await self._get_record(artifact_id)
29
30
 
30
31
  async def exists(self, artifact_id: str) -> bool:
31
- """Check if artifact exists and hasn't expired."""
32
+ """Check if artifact exists."""
32
33
  try:
33
34
  await self._get_record(artifact_id)
34
35
  return True
35
- except (ArtifactNotFoundError, ArtifactExpiredError):
36
+ except Exception:
36
37
  return False
37
38
 
38
39
  async def delete(self, artifact_id: str) -> bool:
39
- """Delete artifact and its metadata."""
40
- self._check_closed()
41
-
40
+ """Delete artifact and metadata."""
42
41
  try:
43
42
  record = await self._get_record(artifact_id)
44
43
 
45
- # Delete from object storage
46
- storage_ctx_mgr = self.s3_factory()
44
+ # Delete from storage
45
+ storage_ctx_mgr = self.store._s3_factory()
47
46
  async with storage_ctx_mgr as s3:
48
- await s3.delete_object(Bucket=self.bucket, Key=record["key"])
47
+ await s3.delete_object(
48
+ Bucket=self.store.bucket,
49
+ Key=record["key"]
50
+ )
49
51
 
50
- # Delete metadata from session store
51
- session_ctx_mgr = self.session_factory()
52
+ # Delete metadata
53
+ session_ctx_mgr = self.store._session_factory()
52
54
  async with session_ctx_mgr as session:
53
55
  if hasattr(session, 'delete'):
54
56
  await session.delete(artifact_id)
55
- else:
56
- logger.warning(
57
- "Session provider doesn't support delete operation",
58
- extra={"artifact_id": artifact_id, "provider": self.session_provider_name}
59
- )
60
57
 
61
- logger.info("Artifact deleted", extra={"artifact_id": artifact_id})
58
+ logger.info(f"Deleted artifact: {artifact_id}")
62
59
  return True
63
60
 
64
- except (ArtifactNotFoundError, ArtifactExpiredError):
65
- logger.warning("Attempted to delete non-existent artifact", extra={"artifact_id": artifact_id})
61
+ except Exception as e:
62
+ logger.error(f"Delete failed for {artifact_id}: {e}")
66
63
  return False
64
+
65
+ async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
66
+ """List artifacts in a session using grid prefix."""
67
+ try:
68
+ artifacts = []
69
+ prefix = f"grid/{self.store.sandbox_id}/{session_id}/"
70
+
71
+ storage_ctx_mgr = self.store._s3_factory()
72
+ async with storage_ctx_mgr as s3:
73
+ if hasattr(s3, 'list_objects_v2'):
74
+ response = await s3.list_objects_v2(
75
+ Bucket=self.store.bucket,
76
+ Prefix=prefix,
77
+ MaxKeys=limit
78
+ )
79
+
80
+ for obj in response.get('Contents', []):
81
+ key = obj['Key']
82
+ # Extract artifact ID from key
83
+ parts = key.split('/')
84
+ if len(parts) >= 4: # grid/sandbox/session/artifact_id
85
+ artifact_id = parts[3]
86
+ try:
87
+ record = await self._get_record(artifact_id)
88
+ artifacts.append(record)
89
+ except Exception:
90
+ continue # Skip if metadata missing
91
+
92
+ return artifacts[:limit]
93
+
94
+ logger.warning(f"Storage provider doesn't support listing")
95
+ return []
96
+
97
+ except Exception as e:
98
+ logger.error(f"Session listing failed for {session_id}: {e}")
99
+ return []
100
+
101
+ async def list_by_prefix(
102
+ self,
103
+ session_id: str,
104
+ prefix: str = "",
105
+ limit: int = 100
106
+ ) -> List[Dict[str, Any]]:
107
+ """List artifacts with filename prefix filtering."""
108
+ try:
109
+ all_files = await self.list_by_session(session_id, limit * 2)
110
+
111
+ if not prefix:
112
+ return all_files[:limit]
113
+
114
+ # Filter by filename prefix
115
+ filtered = []
116
+ for file_meta in all_files:
117
+ filename = file_meta.get("filename", "")
118
+ if filename.startswith(prefix):
119
+ filtered.append(file_meta)
120
+ if len(filtered) >= limit:
121
+ break
122
+
123
+ return filtered
124
+
67
125
  except Exception as e:
68
- logger.error(
69
- "Artifact deletion failed",
70
- extra={"artifact_id": artifact_id, "error": str(e)}
71
- )
72
- raise ProviderError(f"Deletion failed: {e}") from e
126
+ logger.error(f"Prefix listing failed for session {session_id}: {e}")
127
+ return []
73
128
 
74
129
  async def update_metadata(
75
- self,
76
- artifact_id: str,
130
+ self,
131
+ artifact_id: str,
77
132
  *,
78
133
  summary: str = None,
79
134
  meta: Dict[str, Any] = None,
80
- filename: str = None,
81
- ttl: int = None,
82
- # NEW: MCP-specific parameters
83
- new_meta: Dict[str, Any] = None,
84
- merge: bool = True
135
+ merge: bool = True,
136
+ **kwargs
85
137
  ) -> Dict[str, Any]:
86
- """
87
- Update artifact metadata with MCP server compatibility.
88
-
89
- Parameters
90
- ----------
91
- artifact_id : str
92
- The artifact identifier
93
- summary : str, optional
94
- New summary description
95
- meta : dict, optional
96
- New or additional metadata fields (legacy parameter)
97
- filename : str, optional
98
- New filename
99
- ttl : int, optional
100
- New TTL for metadata
101
- new_meta : dict, optional
102
- New metadata fields (MCP server parameter)
103
- merge : bool, optional
104
- Whether to merge with existing metadata (True) or replace (False)
105
-
106
- Returns
107
- -------
108
- dict
109
- Updated metadata record
110
- """
111
- self._check_closed()
112
-
138
+ """Update artifact metadata."""
113
139
  try:
114
- # Get existing record
140
+ # Get current record
115
141
  record = await self._get_record(artifact_id)
116
142
 
117
- # Handle MCP server compatibility
118
- metadata_update = new_meta or meta or {}
119
-
120
- # Update fields if provided
143
+ # Update fields
121
144
  if summary is not None:
122
145
  record["summary"] = summary
123
- if filename is not None:
124
- record["filename"] = filename
125
- if ttl is not None:
126
- record["ttl"] = ttl
127
146
 
128
- # Handle metadata updates
129
- if metadata_update:
130
- existing_meta = record.get("meta", {})
131
- if merge:
132
- # Merge with existing meta, allowing overwrites
133
- existing_meta.update(metadata_update)
134
- record["meta"] = existing_meta
147
+ if meta is not None:
148
+ if merge and "meta" in record:
149
+ record["meta"].update(meta)
135
150
  else:
136
- # Replace existing meta entirely
137
- record["meta"] = metadata_update
151
+ record["meta"] = meta
138
152
 
139
- # Update stored metadata
140
- record["updated_at"] = datetime.utcnow().isoformat(timespec="seconds") + "Z"
153
+ # Update any other fields
154
+ for key, value in kwargs.items():
155
+ if key not in ["summary", "meta"] and value is not None:
156
+ record[key] = value
141
157
 
142
- session_ctx_mgr = self.session_factory()
158
+ # Store updated record
159
+ session_ctx_mgr = self.store._session_factory()
143
160
  async with session_ctx_mgr as session:
144
- final_ttl = ttl or record.get("ttl", 900)
145
- await session.setex(artifact_id, final_ttl, json.dumps(record))
146
-
147
- logger.info(
148
- "Artifact metadata updated",
149
- extra={
150
- "artifact_id": artifact_id,
151
- "merge": merge,
152
- "updated_fields": list([
153
- k for k, v in [
154
- ("summary", summary), ("meta", metadata_update),
155
- ("filename", filename), ("ttl", ttl)
156
- ] if v is not None
157
- ])
158
- }
159
- )
161
+ await session.setex(artifact_id, record.get("ttl", 900), json.dumps(record))
160
162
 
161
163
  return record
162
164
 
163
- except (ArtifactNotFoundError, ArtifactExpiredError):
164
- raise
165
165
  except Exception as e:
166
- logger.error(
167
- "Metadata update failed",
168
- extra={"artifact_id": artifact_id, "error": str(e)}
169
- )
166
+ logger.error(f"Metadata update failed for {artifact_id}: {e}")
170
167
  raise ProviderError(f"Metadata update failed: {e}") from e
171
168
 
172
- async def extend_ttl(self, artifact_id: str, additional_seconds: int) -> Dict[str, Any]:
173
- """Extend the TTL of an artifact's metadata."""
174
- self._check_closed()
175
-
169
+ async def extend_ttl(
170
+ self,
171
+ artifact_id: str,
172
+ additional_seconds: int
173
+ ) -> Dict[str, Any]:
174
+ """Extend artifact TTL."""
176
175
  try:
176
+ # Get current record
177
177
  record = await self._get_record(artifact_id)
178
+
179
+ # Update TTL
178
180
  current_ttl = record.get("ttl", 900)
179
181
  new_ttl = current_ttl + additional_seconds
182
+ record["ttl"] = new_ttl
180
183
 
181
- return await self.update_metadata(artifact_id, ttl=new_ttl)
184
+ # Store updated record with new TTL
185
+ session_ctx_mgr = self.store._session_factory()
186
+ async with session_ctx_mgr as session:
187
+ await session.setex(artifact_id, new_ttl, json.dumps(record))
188
+
189
+ return record
182
190
 
183
- except (ArtifactNotFoundError, ArtifactExpiredError):
184
- raise
185
191
  except Exception as e:
186
- logger.error(
187
- "TTL extension failed",
188
- extra={
189
- "artifact_id": artifact_id,
190
- "additional_seconds": additional_seconds,
191
- "error": str(e)
192
- }
193
- )
192
+ logger.error(f"TTL extension failed for {artifact_id}: {e}")
194
193
  raise ProviderError(f"TTL extension failed: {e}") from e
195
194
 
196
- async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
197
- """
198
- List artifacts for a specific session.
199
-
200
- WORKING IMPLEMENTATION: Uses storage provider listing when available,
201
- falls back to warning for providers that don't support it.
202
- """
203
- self._check_closed()
204
-
195
+ async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
196
+ """Get artifact metadata record."""
205
197
  try:
206
- artifacts = []
207
-
208
- # Try to use storage provider listing capabilities
209
- storage_ctx_mgr = self.s3_factory()
210
- async with storage_ctx_mgr as s3:
211
- # Check if storage provider supports listing
212
- if hasattr(s3, 'list_objects_v2'):
213
- try:
214
- # List objects with session prefix
215
- prefix = f"sess/{session_id}/"
216
-
217
- response = await s3.list_objects_v2(
218
- Bucket=self.bucket,
219
- Prefix=prefix,
220
- MaxKeys=limit
221
- )
222
-
223
- # Extract artifact IDs from keys and get their metadata
224
- for obj in response.get('Contents', []):
225
- key = obj['Key']
226
- # Extract artifact ID from key pattern: sess/{session_id}/{artifact_id}
227
- parts = key.split('/')
228
- if len(parts) >= 3:
229
- artifact_id = parts[2]
230
- try:
231
- record = await self._get_record(artifact_id)
232
- artifacts.append(record)
233
- except (ArtifactNotFoundError, ArtifactExpiredError):
234
- continue # Skip expired/missing metadata
235
-
236
- logger.info(
237
- f"Successfully listed {len(artifacts)} artifacts for session {session_id}"
238
- )
239
- return artifacts[:limit]
240
-
241
- except Exception as list_error:
242
- logger.warning(
243
- f"Storage provider listing failed: {list_error}. "
244
- f"Provider: {self.storage_provider_name}"
245
- )
246
- # Fall through to empty result with warning
247
-
248
- else:
249
- logger.warning(
250
- f"Storage provider {self.storage_provider_name} doesn't support list_objects_v2"
251
- )
252
-
253
- # If we get here, listing isn't supported
254
- logger.warning(
255
- f"Session listing not fully supported with {self.storage_provider_name} provider. "
256
- f"Returning empty list. For full session listing, use filesystem or S3-compatible storage."
257
- )
258
- return []
259
-
198
+ session_ctx_mgr = self.store._session_factory()
199
+ async with session_ctx_mgr as session:
200
+ raw = await session.get(artifact_id)
260
201
  except Exception as e:
261
- logger.error(
262
- "Session artifact listing failed",
263
- extra={"session_id": session_id, "error": str(e)}
264
- )
265
- # Return empty list rather than failing completely
266
- logger.warning(f"Returning empty list due to error: {e}")
267
- return []
268
-
269
- async def list_by_prefix(
270
- self,
271
- session_id: str,
272
- prefix: str = "",
273
- limit: int = 100
274
- ) -> List[Dict[str, Any]]:
275
- """
276
- List artifacts in a session with filename prefix filtering.
202
+ raise SessionError(f"Session error for {artifact_id}: {e}") from e
203
+
204
+ if raw is None:
205
+ raise ProviderError(f"Artifact {artifact_id} not found")
277
206
 
278
- WORKING IMPLEMENTATION: Gets session artifacts and filters by filename prefix.
279
- """
280
207
  try:
281
- # Get all artifacts in the session first
282
- artifacts = await self.list_by_session(session_id, limit * 2) # Get more to filter
283
-
284
- if not prefix:
285
- return artifacts[:limit]
286
-
287
- # Filter by filename prefix
288
- filtered = []
289
- for artifact in artifacts:
290
- filename = artifact.get("filename", "")
291
- if filename.startswith(prefix):
292
- filtered.append(artifact)
293
- if len(filtered) >= limit:
294
- break
295
-
296
- logger.info(
297
- f"Filtered {len(filtered)} artifacts from {len(artifacts)} total with prefix '{prefix}'"
298
- )
299
- return filtered
300
-
301
- except Exception as e:
302
- logger.error(
303
- "Prefix-based listing failed",
304
- extra={
305
- "session_id": session_id,
306
- "prefix": prefix,
307
- "error": str(e)
308
- }
309
- )
310
- # Return empty list rather than failing
311
- return []
208
+ return json.loads(raw)
209
+ except json.JSONDecodeError as e:
210
+ raise ProviderError(f"Corrupted metadata for {artifact_id}") from e
@@ -8,9 +8,11 @@ from __future__ import annotations
8
8
 
9
9
  import uuid, time, logging, json
10
10
  from datetime import datetime
11
- from typing import Any, Dict, Optional
11
+ from typing import Any, Dict, Optional, TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from .store import ArtifactStore
12
15
 
13
- from .base import BaseOperations
14
16
  from .exceptions import (
15
17
  ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
16
18
  ProviderError, SessionError
@@ -18,28 +20,31 @@ from .exceptions import (
18
20
 
19
21
  logger = logging.getLogger(__name__)
20
22
 
21
- _ANON_PREFIX = "anon"
22
23
  _DEFAULT_TTL = 900
23
24
  _DEFAULT_PRESIGN_EXPIRES = 3600
24
25
 
25
26
 
26
- class PresignedURLOperations(BaseOperations):
27
+ class PresignedURLOperations:
27
28
  """Handles all presigned URL operations."""
28
29
 
30
+ def __init__(self, artifact_store: 'ArtifactStore'):
31
+ self.store = artifact_store
32
+
29
33
  async def presign(self, artifact_id: str, expires: int = _DEFAULT_PRESIGN_EXPIRES) -> str:
30
34
  """Generate a presigned URL for artifact download."""
31
- self._check_closed()
35
+ if self.store._closed:
36
+ raise ArtifactStoreError("Store is closed")
32
37
 
33
38
  start_time = time.time()
34
39
 
35
40
  try:
36
41
  record = await self._get_record(artifact_id)
37
42
 
38
- storage_ctx_mgr = self.s3_factory()
43
+ storage_ctx_mgr = self.store._s3_factory()
39
44
  async with storage_ctx_mgr as s3:
40
45
  url = await s3.generate_presigned_url(
41
46
  "get_object",
42
- Params={"Bucket": self.bucket, "Key": record["key"]},
47
+ Params={"Bucket": self.store.bucket, "Key": record["key"]},
43
48
  ExpiresIn=expires,
44
49
  )
45
50
 
@@ -96,22 +101,28 @@ class PresignedURLOperations(BaseOperations):
96
101
  expires: int = _DEFAULT_PRESIGN_EXPIRES
97
102
  ) -> tuple[str, str]:
98
103
  """Generate a presigned URL for uploading a new artifact."""
99
- self._check_closed()
104
+ if self.store._closed:
105
+ raise ArtifactStoreError("Store is closed")
100
106
 
101
107
  start_time = time.time()
102
108
 
109
+ # Ensure session is allocated
110
+ if session_id is None:
111
+ session_id = await self.store._session_manager.allocate_session()
112
+ else:
113
+ session_id = await self.store._session_manager.allocate_session(session_id=session_id)
114
+
103
115
  # Generate artifact ID and key path
104
116
  artifact_id = uuid.uuid4().hex
105
- scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
106
- key = f"sess/{scope}/{artifact_id}"
117
+ key = self.store.generate_artifact_key(session_id, artifact_id)
107
118
 
108
119
  try:
109
- storage_ctx_mgr = self.s3_factory()
120
+ storage_ctx_mgr = self.store._s3_factory()
110
121
  async with storage_ctx_mgr as s3:
111
122
  url = await s3.generate_presigned_url(
112
123
  "put_object",
113
124
  Params={
114
- "Bucket": self.bucket,
125
+ "Bucket": self.store.bucket,
115
126
  "Key": key,
116
127
  "ContentType": mime_type
117
128
  },
@@ -163,20 +174,26 @@ class PresignedURLOperations(BaseOperations):
163
174
  ttl: int = _DEFAULT_TTL,
164
175
  ) -> bool:
165
176
  """Register metadata for an artifact uploaded via presigned URL."""
166
- self._check_closed()
177
+ if self.store._closed:
178
+ raise ArtifactStoreError("Store is closed")
167
179
 
168
180
  start_time = time.time()
169
181
 
182
+ # Ensure session is allocated
183
+ if session_id is None:
184
+ session_id = await self.store._session_manager.allocate_session()
185
+ else:
186
+ session_id = await self.store._session_manager.allocate_session(session_id=session_id)
187
+
170
188
  # Reconstruct the key path
171
- scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
172
- key = f"sess/{scope}/{artifact_id}"
189
+ key = self.store.generate_artifact_key(session_id, artifact_id)
173
190
 
174
191
  try:
175
192
  # Verify the object exists and get its size
176
- storage_ctx_mgr = self.s3_factory()
193
+ storage_ctx_mgr = self.store._s3_factory()
177
194
  async with storage_ctx_mgr as s3:
178
195
  try:
179
- response = await s3.head_object(Bucket=self.bucket, Key=key)
196
+ response = await s3.head_object(Bucket=self.store.bucket, Key=key)
180
197
  file_size = response.get('ContentLength', 0)
181
198
  except Exception:
182
199
  logger.warning(f"Artifact {artifact_id} not found in storage")
@@ -184,7 +201,9 @@ class PresignedURLOperations(BaseOperations):
184
201
 
185
202
  # Build metadata record
186
203
  record = {
187
- "scope": scope,
204
+ "artifact_id": artifact_id,
205
+ "session_id": session_id,
206
+ "sandbox_id": self.store.sandbox_id,
188
207
  "key": key,
189
208
  "mime": mime,
190
209
  "summary": summary,
@@ -192,15 +211,15 @@ class PresignedURLOperations(BaseOperations):
192
211
  "filename": filename,
193
212
  "bytes": file_size,
194
213
  "sha256": None, # We don't have the hash since we didn't upload it directly
195
- "stored_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
214
+ "stored_at": datetime.utcnow().isoformat() + "Z",
196
215
  "ttl": ttl,
197
- "storage_provider": self.storage_provider_name,
198
- "session_provider": self.session_provider_name,
216
+ "storage_provider": self.store._storage_provider_name,
217
+ "session_provider": self.store._session_provider_name,
199
218
  "uploaded_via_presigned": True, # Flag to indicate upload method
200
219
  }
201
220
 
202
221
  # Cache metadata using session provider
203
- session_ctx_mgr = self.session_factory()
222
+ session_ctx_mgr = self.store._session_factory()
204
223
  async with session_ctx_mgr as session:
205
224
  await session.setex(artifact_id, ttl, json.dumps(record))
206
225
 
@@ -264,4 +283,21 @@ class PresignedURLOperations(BaseOperations):
264
283
  ttl=ttl
265
284
  )
266
285
 
267
- return upload_url, artifact_id
286
+ return upload_url, artifact_id
287
+
288
+ async def _get_record(self, artifact_id: str) -> Dict[str, Any]:
289
+ """Get artifact metadata record."""
290
+ try:
291
+ session_ctx_mgr = self.store._session_factory()
292
+ async with session_ctx_mgr as session:
293
+ raw = await session.get(artifact_id)
294
+ except Exception as e:
295
+ raise SessionError(f"Session error for {artifact_id}: {e}") from e
296
+
297
+ if raw is None:
298
+ raise ArtifactNotFoundError(f"Artifact {artifact_id} not found")
299
+
300
+ try:
301
+ return json.loads(raw)
302
+ except json.JSONDecodeError as e:
303
+ raise ProviderError(f"Corrupted metadata for {artifact_id}") from e
File without changes