chuk-artifacts 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,286 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/metadata.py
3
+ """
4
+ Metadata operations: exists, metadata retrieval, and deletion.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging, json
10
+ from datetime import datetime
11
+ from typing import Any, Dict, List
12
+
13
+ from .base import BaseOperations
14
+ from .exceptions import (
15
+ ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
16
+ ProviderError
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class MetadataOperations(BaseOperations):
23
+ """Handles metadata-related operations."""
24
+
25
+ async def metadata(self, artifact_id: str) -> Dict[str, Any]:
26
+ """
27
+ Get artifact metadata.
28
+
29
+ Parameters
30
+ ----------
31
+ artifact_id : str
32
+ The artifact identifier
33
+
34
+ Returns
35
+ -------
36
+ dict
37
+ Artifact metadata
38
+
39
+ Raises
40
+ ------
41
+ ArtifactNotFoundError
42
+ If artifact doesn't exist or has expired
43
+ """
44
+ return await self._get_record(artifact_id)
45
+
46
+ async def exists(self, artifact_id: str) -> bool:
47
+ """
48
+ Check if artifact exists and hasn't expired.
49
+
50
+ Parameters
51
+ ----------
52
+ artifact_id : str
53
+ The artifact identifier
54
+
55
+ Returns
56
+ -------
57
+ bool
58
+ True if artifact exists, False otherwise
59
+ """
60
+ try:
61
+ await self._get_record(artifact_id)
62
+ return True
63
+ except (ArtifactNotFoundError, ArtifactExpiredError):
64
+ return False
65
+
66
+ async def delete(self, artifact_id: str) -> bool:
67
+ """
68
+ Delete artifact and its metadata.
69
+
70
+ Parameters
71
+ ----------
72
+ artifact_id : str
73
+ The artifact identifier
74
+
75
+ Returns
76
+ -------
77
+ bool
78
+ True if deleted, False if not found
79
+
80
+ Raises
81
+ ------
82
+ ProviderError
83
+ If deletion fails
84
+ """
85
+ self._check_closed()
86
+
87
+ try:
88
+ record = await self._get_record(artifact_id)
89
+
90
+ # Delete from object storage
91
+ storage_ctx_mgr = self.s3_factory()
92
+ async with storage_ctx_mgr as s3:
93
+ await s3.delete_object(Bucket=self.bucket, Key=record["key"])
94
+
95
+ # Delete metadata from session store
96
+ session_ctx_mgr = self.session_factory()
97
+ async with session_ctx_mgr as session:
98
+ if hasattr(session, 'delete'):
99
+ await session.delete(artifact_id)
100
+ else:
101
+ logger.warning(
102
+ "Session provider doesn't support delete operation",
103
+ extra={"artifact_id": artifact_id, "provider": self.session_provider_name}
104
+ )
105
+
106
+ logger.info("Artifact deleted", extra={"artifact_id": artifact_id})
107
+ return True
108
+
109
+ except (ArtifactNotFoundError, ArtifactExpiredError):
110
+ logger.warning("Attempted to delete non-existent artifact", extra={"artifact_id": artifact_id})
111
+ return False
112
+ except Exception as e:
113
+ logger.error(
114
+ "Artifact deletion failed",
115
+ extra={"artifact_id": artifact_id, "error": str(e)}
116
+ )
117
+ raise ProviderError(f"Deletion failed: {e}") from e
118
+
119
+ async def update_metadata(
120
+ self,
121
+ artifact_id: str,
122
+ *,
123
+ summary: str = None,
124
+ meta: Dict[str, Any] = None,
125
+ filename: str = None,
126
+ ttl: int = None
127
+ ) -> Dict[str, Any]:
128
+ """
129
+ Update artifact metadata without changing the stored data.
130
+
131
+ Parameters
132
+ ----------
133
+ artifact_id : str
134
+ The artifact identifier
135
+ summary : str, optional
136
+ New summary description
137
+ meta : dict, optional
138
+ New or additional metadata fields
139
+ filename : str, optional
140
+ New filename
141
+ ttl : int, optional
142
+ New TTL for metadata
143
+
144
+ Returns
145
+ -------
146
+ dict
147
+ Updated metadata record
148
+
149
+ Raises
150
+ ------
151
+ ArtifactNotFoundError
152
+ If artifact doesn't exist
153
+ ProviderError
154
+ If update fails
155
+ """
156
+ self._check_closed()
157
+
158
+ try:
159
+ # Get existing record
160
+ record = await self._get_record(artifact_id)
161
+
162
+ # Update fields if provided
163
+ if summary is not None:
164
+ record["summary"] = summary
165
+ if meta is not None:
166
+ # Merge with existing meta, allowing overwrites
167
+ existing_meta = record.get("meta", {})
168
+ existing_meta.update(meta)
169
+ record["meta"] = existing_meta
170
+ if filename is not None:
171
+ record["filename"] = filename
172
+ if ttl is not None:
173
+ record["ttl"] = ttl
174
+
175
+ # Update stored metadata
176
+ record["updated_at"] = datetime.utcnow().isoformat(timespec="seconds") + "Z"
177
+
178
+ session_ctx_mgr = self.session_factory()
179
+ async with session_ctx_mgr as session:
180
+ final_ttl = ttl or record.get("ttl", 900) # Use provided TTL or existing/default
181
+ await session.setex(artifact_id, final_ttl, json.dumps(record))
182
+
183
+ logger.info(
184
+ "Artifact metadata updated",
185
+ extra={"artifact_id": artifact_id, "updated_fields": list([
186
+ k for k, v in [
187
+ ("summary", summary), ("meta", meta),
188
+ ("filename", filename), ("ttl", ttl)
189
+ ] if v is not None
190
+ ])}
191
+ )
192
+
193
+ return record
194
+
195
+ except (ArtifactNotFoundError, ArtifactExpiredError):
196
+ raise
197
+ except Exception as e:
198
+ logger.error(
199
+ "Metadata update failed",
200
+ extra={"artifact_id": artifact_id, "error": str(e)}
201
+ )
202
+ raise ProviderError(f"Metadata update failed: {e}") from e
203
+
204
+ async def extend_ttl(self, artifact_id: str, additional_seconds: int) -> Dict[str, Any]:
205
+ """
206
+ Extend the TTL of an artifact's metadata.
207
+
208
+ Parameters
209
+ ----------
210
+ artifact_id : str
211
+ The artifact identifier
212
+ additional_seconds : int
213
+ Additional seconds to add to the current TTL
214
+
215
+ Returns
216
+ -------
217
+ dict
218
+ Updated metadata record
219
+
220
+ Raises
221
+ ------
222
+ ArtifactNotFoundError
223
+ If artifact doesn't exist
224
+ ProviderError
225
+ If TTL extension fails
226
+ """
227
+ self._check_closed()
228
+
229
+ try:
230
+ # Get current record to find existing TTL
231
+ record = await self._get_record(artifact_id)
232
+ current_ttl = record.get("ttl", 900)
233
+ new_ttl = current_ttl + additional_seconds
234
+
235
+ # Update with extended TTL
236
+ return await self.update_metadata(artifact_id, ttl=new_ttl)
237
+
238
+ except (ArtifactNotFoundError, ArtifactExpiredError):
239
+ raise
240
+ except Exception as e:
241
+ logger.error(
242
+ "TTL extension failed",
243
+ extra={
244
+ "artifact_id": artifact_id,
245
+ "additional_seconds": additional_seconds,
246
+ "error": str(e)
247
+ }
248
+ )
249
+ raise ProviderError(f"TTL extension failed: {e}") from e
250
+
251
+ async def list_by_session(self, session_id: str, limit: int = 100) -> List[Dict[str, Any]]:
252
+ """
253
+ List artifacts for a specific session.
254
+
255
+ Note: This is a basic implementation that would need to be enhanced
256
+ with proper indexing for production use. Currently, this method
257
+ cannot be efficiently implemented with the session provider abstraction
258
+ since we don't have a way to query by session_id patterns.
259
+
260
+ Parameters
261
+ ----------
262
+ session_id : str
263
+ Session identifier to search for
264
+ limit : int, optional
265
+ Maximum number of artifacts to return
266
+
267
+ Returns
268
+ -------
269
+ list
270
+ List of metadata records for artifacts in the session
271
+
272
+ Raises
273
+ ------
274
+ NotImplementedError
275
+ This method requires additional indexing infrastructure
276
+ """
277
+ # This would require either:
278
+ # 1. A separate index of session_id -> artifact_ids
279
+ # 2. Storage provider support for prefix queries
280
+ # 3. Enhanced session provider with query capabilities
281
+
282
+ raise NotImplementedError(
283
+ "list_by_session requires additional indexing infrastructure. "
284
+ "Consider implementing session-based indexing or using storage "
285
+ "provider list operations if available."
286
+ )
@@ -0,0 +1,23 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/models.py
3
+ from typing import Any, Dict
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class ArtifactEnvelope(BaseModel):
8
+ """
9
+ A tiny, model-friendly wrapper describing a stored artefact.
10
+
11
+ The *bytes*, *mime_type*, etc. let the UI reason about the file
12
+ without ever uploading the raw payload into the chat context.
13
+ """
14
+
15
+ success: bool = True
16
+ artifact_id: str # opaque handle for look-ups
17
+ mime_type: str # e.g. "image/png", "text/csv"
18
+ bytes: int # size on disk
19
+ summary: str # human-readable description / alt
20
+ meta: Dict[str, Any] = Field(default_factory=dict)
21
+
22
+ class Config:
23
+ extra = "allow" # future-proof: lets tools add keys
@@ -0,0 +1,267 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/presigned.py
3
+ """
4
+ Presigned URL operations: download URLs, upload URLs, and upload registration.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import uuid, time, logging, json
10
+ from datetime import datetime
11
+ from typing import Any, Dict, Optional
12
+
13
+ from .base import BaseOperations
14
+ from .exceptions import (
15
+ ArtifactStoreError, ArtifactNotFoundError, ArtifactExpiredError,
16
+ ProviderError, SessionError
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ _ANON_PREFIX = "anon"
22
+ _DEFAULT_TTL = 900
23
+ _DEFAULT_PRESIGN_EXPIRES = 3600
24
+
25
+
26
+ class PresignedURLOperations(BaseOperations):
27
+ """Handles all presigned URL operations."""
28
+
29
+ async def presign(self, artifact_id: str, expires: int = _DEFAULT_PRESIGN_EXPIRES) -> str:
30
+ """Generate a presigned URL for artifact download."""
31
+ self._check_closed()
32
+
33
+ start_time = time.time()
34
+
35
+ try:
36
+ record = await self._get_record(artifact_id)
37
+
38
+ storage_ctx_mgr = self.s3_factory()
39
+ async with storage_ctx_mgr as s3:
40
+ url = await s3.generate_presigned_url(
41
+ "get_object",
42
+ Params={"Bucket": self.bucket, "Key": record["key"]},
43
+ ExpiresIn=expires,
44
+ )
45
+
46
+ duration_ms = int((time.time() - start_time) * 1000)
47
+ logger.info(
48
+ "Presigned URL generated",
49
+ extra={
50
+ "artifact_id": artifact_id,
51
+ "expires_in": expires,
52
+ "duration_ms": duration_ms,
53
+ }
54
+ )
55
+
56
+ return url
57
+
58
+ except (ArtifactNotFoundError, ArtifactExpiredError):
59
+ raise
60
+ except Exception as e:
61
+ duration_ms = int((time.time() - start_time) * 1000)
62
+ logger.error(
63
+ "Presigned URL generation failed",
64
+ extra={
65
+ "artifact_id": artifact_id,
66
+ "error": str(e),
67
+ "duration_ms": duration_ms,
68
+ }
69
+ )
70
+
71
+ if "oauth" in str(e).lower() or "credential" in str(e).lower():
72
+ raise NotImplementedError(
73
+ "This provider cannot generate presigned URLs with the "
74
+ "current credential type (e.g. OAuth). Use HMAC creds instead."
75
+ ) from e
76
+ else:
77
+ raise ProviderError(f"Presigned URL generation failed: {e}") from e
78
+
79
+ async def presign_short(self, artifact_id: str) -> str:
80
+ """Generate a short-lived presigned URL (15 minutes)."""
81
+ return await self.presign(artifact_id, expires=900)
82
+
83
+ async def presign_medium(self, artifact_id: str) -> str:
84
+ """Generate a medium-lived presigned URL (1 hour)."""
85
+ return await self.presign(artifact_id, expires=3600)
86
+
87
+ async def presign_long(self, artifact_id: str) -> str:
88
+ """Generate a long-lived presigned URL (24 hours)."""
89
+ return await self.presign(artifact_id, expires=86400)
90
+
91
+ async def presign_upload(
92
+ self,
93
+ session_id: str | None = None,
94
+ filename: str | None = None,
95
+ mime_type: str = "application/octet-stream",
96
+ expires: int = _DEFAULT_PRESIGN_EXPIRES
97
+ ) -> tuple[str, str]:
98
+ """Generate a presigned URL for uploading a new artifact."""
99
+ self._check_closed()
100
+
101
+ start_time = time.time()
102
+
103
+ # Generate artifact ID and key path
104
+ artifact_id = uuid.uuid4().hex
105
+ scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
106
+ key = f"sess/{scope}/{artifact_id}"
107
+
108
+ try:
109
+ storage_ctx_mgr = self.s3_factory()
110
+ async with storage_ctx_mgr as s3:
111
+ url = await s3.generate_presigned_url(
112
+ "put_object",
113
+ Params={
114
+ "Bucket": self.bucket,
115
+ "Key": key,
116
+ "ContentType": mime_type
117
+ },
118
+ ExpiresIn=expires,
119
+ )
120
+
121
+ duration_ms = int((time.time() - start_time) * 1000)
122
+ logger.info(
123
+ "Upload presigned URL generated",
124
+ extra={
125
+ "artifact_id": artifact_id,
126
+ "key": key,
127
+ "mime_type": mime_type,
128
+ "expires_in": expires,
129
+ "duration_ms": duration_ms,
130
+ }
131
+ )
132
+
133
+ return url, artifact_id
134
+
135
+ except Exception as e:
136
+ duration_ms = int((time.time() - start_time) * 1000)
137
+ logger.error(
138
+ "Upload presigned URL generation failed",
139
+ extra={
140
+ "artifact_id": artifact_id,
141
+ "error": str(e),
142
+ "duration_ms": duration_ms,
143
+ }
144
+ )
145
+
146
+ if "oauth" in str(e).lower() or "credential" in str(e).lower():
147
+ raise NotImplementedError(
148
+ "This provider cannot generate presigned URLs with the "
149
+ "current credential type (e.g. OAuth). Use HMAC creds instead."
150
+ ) from e
151
+ else:
152
+ raise ProviderError(f"Upload presigned URL generation failed: {e}") from e
153
+
154
+ async def register_uploaded_artifact(
155
+ self,
156
+ artifact_id: str,
157
+ *,
158
+ mime: str,
159
+ summary: str,
160
+ meta: Dict[str, Any] | None = None,
161
+ filename: str | None = None,
162
+ session_id: str | None = None,
163
+ ttl: int = _DEFAULT_TTL,
164
+ ) -> bool:
165
+ """Register metadata for an artifact uploaded via presigned URL."""
166
+ self._check_closed()
167
+
168
+ start_time = time.time()
169
+
170
+ # Reconstruct the key path
171
+ scope = session_id or f"{_ANON_PREFIX}_{artifact_id}"
172
+ key = f"sess/{scope}/{artifact_id}"
173
+
174
+ try:
175
+ # Verify the object exists and get its size
176
+ storage_ctx_mgr = self.s3_factory()
177
+ async with storage_ctx_mgr as s3:
178
+ try:
179
+ response = await s3.head_object(Bucket=self.bucket, Key=key)
180
+ file_size = response.get('ContentLength', 0)
181
+ except Exception:
182
+ logger.warning(f"Artifact {artifact_id} not found in storage")
183
+ return False
184
+
185
+ # Build metadata record
186
+ record = {
187
+ "scope": scope,
188
+ "key": key,
189
+ "mime": mime,
190
+ "summary": summary,
191
+ "meta": meta or {},
192
+ "filename": filename,
193
+ "bytes": file_size,
194
+ "sha256": None, # We don't have the hash since we didn't upload it directly
195
+ "stored_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
196
+ "ttl": ttl,
197
+ "storage_provider": self.storage_provider_name,
198
+ "session_provider": self.session_provider_name,
199
+ "uploaded_via_presigned": True, # Flag to indicate upload method
200
+ }
201
+
202
+ # Cache metadata using session provider
203
+ session_ctx_mgr = self.session_factory()
204
+ async with session_ctx_mgr as session:
205
+ await session.setex(artifact_id, ttl, json.dumps(record))
206
+
207
+ duration_ms = int((time.time() - start_time) * 1000)
208
+ logger.info(
209
+ "Artifact metadata registered after presigned upload",
210
+ extra={
211
+ "artifact_id": artifact_id,
212
+ "bytes": file_size,
213
+ "mime": mime,
214
+ "duration_ms": duration_ms,
215
+ }
216
+ )
217
+
218
+ return True
219
+
220
+ except Exception as e:
221
+ duration_ms = int((time.time() - start_time) * 1000)
222
+ logger.error(
223
+ "Artifact metadata registration failed",
224
+ extra={
225
+ "artifact_id": artifact_id,
226
+ "error": str(e),
227
+ "duration_ms": duration_ms,
228
+ }
229
+ )
230
+
231
+ if "session" in str(e).lower() or "redis" in str(e).lower():
232
+ raise SessionError(f"Metadata registration failed: {e}") from e
233
+ else:
234
+ raise ProviderError(f"Metadata registration failed: {e}") from e
235
+
236
+ async def presign_upload_and_register(
237
+ self,
238
+ *,
239
+ mime: str,
240
+ summary: str,
241
+ meta: Dict[str, Any] | None = None,
242
+ filename: str | None = None,
243
+ session_id: str | None = None,
244
+ ttl: int = _DEFAULT_TTL,
245
+ expires: int = _DEFAULT_PRESIGN_EXPIRES
246
+ ) -> tuple[str, str]:
247
+ """Convenience method combining presign_upload and pre-register metadata."""
248
+ # Generate presigned URL
249
+ upload_url, artifact_id = await self.presign_upload(
250
+ session_id=session_id,
251
+ filename=filename,
252
+ mime_type=mime,
253
+ expires=expires
254
+ )
255
+
256
+ # Pre-register metadata (with unknown file size)
257
+ await self.register_uploaded_artifact(
258
+ artifact_id,
259
+ mime=mime,
260
+ summary=summary,
261
+ meta=meta,
262
+ filename=filename,
263
+ session_id=session_id,
264
+ ttl=ttl
265
+ )
266
+
267
+ return upload_url, artifact_id
@@ -0,0 +1,84 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/provider_factory.py
3
+ """
4
+ Resolve the storage back-end requested via **ARTIFACT_PROVIDER**.
5
+
6
+ Built-in providers
7
+ ──────────────────
8
+ • **memory** (default) - in-process, non-persistent store (unit tests, demos)
9
+ • **fs**, **filesystem** - local filesystem rooted at `$ARTIFACT_FS_ROOT`
10
+ • **s3** - plain AWS or any S3-compatible endpoint
11
+ • **ibm_cos** - IBM COS, HMAC credentials (Signature V2)
12
+ • **ibm_cos_iam** - IBM COS, IAM API-key / OAuth signature
13
+
14
+ Any other value is resolved dynamically as
15
+ `chuk_artifacts.providers.<name>.factory()`.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ from importlib import import_module
22
+ from typing import Callable, AsyncContextManager
23
+
24
+ __all__ = ["factory_for_env"]
25
+
26
+
27
+ # ──────────────────────────────────────────────────────────────────
28
+ # Public factory selector
29
+ # ──────────────────────────────────────────────────────────────────
30
+
31
+ def factory_for_env() -> Callable[[], AsyncContextManager]:
32
+ """Return a provider-specific factory based on `$ARTIFACT_PROVIDER`."""
33
+
34
+ provider = os.getenv("ARTIFACT_PROVIDER", "memory").lower().strip()
35
+
36
+ # Fast paths for the built-ins ------------------------------------------------
37
+ # Memory first as it's the default
38
+ if provider in ("memory", "mem", "inmemory"):
39
+ from .providers import memory
40
+ return memory.factory()
41
+
42
+ if provider in ("fs", "filesystem"):
43
+ from .providers import filesystem
44
+ return filesystem.factory()
45
+
46
+ if provider == "s3":
47
+ from .providers import s3
48
+ return s3.factory()
49
+
50
+ if provider == "ibm_cos":
51
+ from .providers import ibm_cos
52
+ return ibm_cos.factory() # returns the zero-arg factory callable
53
+
54
+ if provider == "ibm_cos_iam":
55
+ from .providers import ibm_cos_iam
56
+ return ibm_cos_iam.factory # note: function itself is already the factory
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Fallback: dynamic lookup – allows user-supplied provider implementations.
60
+ # ---------------------------------------------------------------------------
61
+ try:
62
+ mod = import_module(f"chuk_artifacts.providers.{provider}")
63
+ except ModuleNotFoundError as exc:
64
+ # Provide helpful error message with available providers
65
+ available = ["memory", "filesystem", "s3", "ibm_cos", "ibm_cos_iam"]
66
+ raise ValueError(
67
+ f"Unknown storage provider '{provider}'. "
68
+ f"Available providers: {', '.join(available)}"
69
+ ) from exc
70
+
71
+ if not hasattr(mod, "factory"):
72
+ raise AttributeError(
73
+ f"Provider '{provider}' lacks a factory() function"
74
+ )
75
+ # For dynamic providers, call factory() to get the actual factory function
76
+ factory_func = mod.factory
77
+ if callable(factory_func):
78
+ # If it's a function that returns a factory, call it
79
+ try:
80
+ return factory_func()
81
+ except TypeError:
82
+ # If it's already the factory function, return it directly
83
+ return factory_func
84
+ return factory_func
@@ -0,0 +1,10 @@
1
+ # -*- coding: utf-8 -*-
2
+ # chuk_artifacts/providers/__init__.py
3
+ """
4
+ Convenience re-exports so caller code can do:
5
+
6
+ from chuk_artifacts.providers import s3, ibm_cos, memory, filesystem
7
+ """
8
+ from . import s3, ibm_cos, memory, filesystem, ibm_cos_iam
9
+
10
+ __all__ = ["s3", "ibm_cos", "memory", "filesystem", "ibm_cos_iam"]