slide-narrator 5.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,282 @@
1
+ """Thread storage implementation."""
2
+ from typing import Optional, Dict, Any, List
3
+ from ..models.thread import Thread
4
+ from ..models.message import Message
5
+ from ..utils.logging import get_logger
6
+ from .storage_backend import MemoryBackend, SQLBackend
7
+
8
+ logger = get_logger(__name__)
9
+
10
+ class ThreadStore:
11
+ """
12
+ Thread storage implementation with pluggable backends.
13
+ Supports both in-memory and SQL (SQLite/PostgreSQL) storage.
14
+
15
+ Key characteristics:
16
+ - Unified interface for all storage types
17
+ - Memory backend for development/testing (default)
18
+ - SQLite for local persistence
19
+ - PostgreSQL for production
20
+ - Built-in connection pooling for SQLBackend
21
+
22
+ Usage:
23
+ # RECOMMENDED: Factory pattern for immediate connection validation
24
+ from narrator import ThreadStore
25
+ store = await ThreadStore.create("postgresql+asyncpg://user:pass@localhost/dbname")
26
+
27
+ # Or for in-memory storage:
28
+ store = await ThreadStore.create() # Uses memory backend
29
+
30
+ # Direct constructor (connects on first operation):
31
+ store = ThreadStore("postgresql+asyncpg://user:pass@localhost/dbname")
32
+
33
+ Connection pooling settings can be configured via environment variables:
34
+ - NARRATOR_DB_POOL_SIZE: Max number of connections to keep open (default: 5)
35
+ - NARRATOR_DB_MAX_OVERFLOW: Max number of connections to create above pool_size (default: 10)
36
+ - NARRATOR_DB_POOL_TIMEOUT: Seconds to wait for a connection from pool (default: 30)
37
+ - NARRATOR_DB_POOL_RECYCLE: Seconds after which a connection is recycled (default: 300)
38
+ """
39
+
40
+ def __init__(self, database_url = None):
41
+ """
42
+ Initialize thread store with optional database URL.
43
+ If no URL is provided, uses in-memory storage by default.
44
+ This constructor doesn't establish database connections - they happen on first use.
45
+
46
+ For immediate connection validation, use the async factory method:
47
+ `store = await ThreadStore.create(database_url)`
48
+
49
+ Args:
50
+ database_url: SQLAlchemy async database URL. Examples:
51
+ - "postgresql+asyncpg://user:pass@localhost/dbname"
52
+ - "sqlite+aiosqlite:///path/to/db.sqlite"
53
+ - None for in-memory storage
54
+ """
55
+ if database_url is None:
56
+ # Default to in-memory storage
57
+ logger.info("No database URL provided. Using in-memory storage.")
58
+ self._backend = MemoryBackend()
59
+ else:
60
+ # Use SQLBackend with the provided URL
61
+ logger.info(f"Using database URL: {database_url}")
62
+ self._backend = SQLBackend(database_url)
63
+
64
+ # Add initialization flag
65
+ self._initialized = False
66
+
67
+ @classmethod
68
+ async def create(cls, database_url = None):
69
+ """
70
+ Factory method to create and initialize a ThreadStore.
71
+ This method connects to the database immediately, allowing early validation
72
+ of connection parameters.
73
+
74
+ Args:
75
+ database_url: SQLAlchemy async database URL. Examples:
76
+ - "postgresql+asyncpg://user:pass@localhost/dbname"
77
+ - "sqlite+aiosqlite:///path/to/db.sqlite"
78
+ - None for in-memory storage
79
+
80
+ Returns:
81
+ Initialized ThreadStore instance
82
+
83
+ Raises:
84
+ Exception: If database connection fails
85
+ """
86
+ # Create instance
87
+ store = cls(database_url)
88
+
89
+ # Initialize immediately
90
+ try:
91
+ await store.initialize()
92
+ except Exception as e:
93
+ # If a database URL was provided but initialization failed, we should raise the error
94
+ # instead of silently falling back to memory storage
95
+ if database_url is not None:
96
+ raise RuntimeError(f"Failed to initialize database with URL {database_url}: {str(e)}") from e
97
+ raise
98
+
99
+ return store
100
+
101
+ async def _ensure_initialized(self) -> None:
102
+ """Ensure the storage backend is initialized."""
103
+ if not self._initialized:
104
+ await self.initialize()
105
+ self._initialized = True
106
+
107
+ async def initialize(self) -> None:
108
+ """Initialize the storage backend."""
109
+ await self._backend.initialize()
110
+ self._initialized = True
111
+
112
+ async def save(self, thread: Thread) -> Thread:
113
+ """
114
+ Save a thread to storage, filtering out system messages.
115
+
116
+ System messages are not persisted to storage by design, but are kept
117
+ in the original Thread object in memory.
118
+
119
+ Args:
120
+ thread: The Thread object to save
121
+
122
+ Returns:
123
+ The original Thread object (with system messages intact)
124
+ """
125
+ await self._ensure_initialized()
126
+
127
+ # Create a filtered copy of the thread without system messages
128
+ filtered_thread = Thread(
129
+ id=thread.id,
130
+ title=thread.title,
131
+ created_at=thread.created_at,
132
+ updated_at=thread.updated_at,
133
+ attributes=thread.attributes.copy() if thread.attributes else {},
134
+ platforms=thread.platforms.copy() if thread.platforms else {}
135
+ )
136
+
137
+ # Only copy non-system messages to the filtered thread
138
+ for message in thread.messages:
139
+ if message.role != "system":
140
+ # We create a shallow copy of the message to preserve the original
141
+ filtered_thread.messages.append(message)
142
+
143
+ # Save the filtered thread to storage
144
+ await self._backend.save(filtered_thread)
145
+
146
+ # Return the original thread (with system messages intact)
147
+ return thread
148
+
149
+ async def get(self, thread_id: str) -> Optional[Thread]:
150
+ """Get a thread by ID."""
151
+ await self._ensure_initialized()
152
+ return await self._backend.get(thread_id)
153
+
154
+ async def delete(self, thread_id: str) -> bool:
155
+ """Delete a thread by ID."""
156
+ await self._ensure_initialized()
157
+ return await self._backend.delete(thread_id)
158
+
159
+ async def list(self, limit: int = 100, offset: int = 0) -> List[Thread]:
160
+ """List threads with pagination."""
161
+ await self._ensure_initialized()
162
+ return await self._backend.list(limit, offset)
163
+
164
+ async def find_by_attributes(self, attributes: Dict[str, Any]) -> List[Thread]:
165
+ """Find threads by matching attributes."""
166
+ await self._ensure_initialized()
167
+ return await self._backend.find_by_attributes(attributes)
168
+
169
+ async def find_by_platform(self, platform_name: str, properties: Dict[str, Any]) -> List[Thread]:
170
+ """Find threads by platform name and properties."""
171
+ await self._ensure_initialized()
172
+ return await self._backend.find_by_platform(platform_name, properties)
173
+
174
+ async def list_recent(self, limit: Optional[int] = None) -> List[Thread]:
175
+ """List recent threads."""
176
+ await self._ensure_initialized()
177
+ return await self._backend.list_recent(limit)
178
+
179
+ async def find_messages_by_attribute(self, path: str, value: Any) -> List[Message]:
180
+ """
181
+ Find messages with a specific attribute at a given JSON path.
182
+ This is useful for finding messages with specific metadata (like a Slack ts).
183
+
184
+ Args:
185
+ path: Dot-notation path to the attribute (e.g., "platforms.slack.ts")
186
+ value: The value to search for
187
+
188
+ Returns:
189
+ List of Message objects that match the criteria (possibly empty)
190
+ """
191
+ await self._ensure_initialized()
192
+ if hasattr(self._backend, 'find_messages_by_attribute'):
193
+ results = await self._backend.find_messages_by_attribute(path, value)
194
+
195
+ # Handle different return types from different backends
196
+ messages = []
197
+ for item in results:
198
+ if hasattr(item, 'model_dump'): # It's a Message object (from MemoryBackend)
199
+ messages.append(item)
200
+ elif hasattr(self._backend, '_create_message_from_record'): # It's a MessageRecord (from SQLBackend)
201
+ message = self._backend._create_message_from_record(item)
202
+ messages.append(message)
203
+
204
+ return messages
205
+ else:
206
+ # Fallback implementation for backends that don't support this method
207
+ # This is less efficient but provides compatibility
208
+ messages = []
209
+ threads = await self._backend.list_recent(100) # Get recent threads
210
+
211
+ # Check each thread's messages
212
+ for thread in threads:
213
+ for message in thread.messages:
214
+ # Navigate the path to get the value
215
+ current = message
216
+ parts = path.split('.')
217
+
218
+ for part in parts:
219
+ if isinstance(current, dict) and part in current:
220
+ current = current[part]
221
+ elif hasattr(current, part):
222
+ current = getattr(current, part)
223
+ else:
224
+ current = None
225
+ break
226
+
227
+ # Check if we found a match
228
+ if current == value:
229
+ messages.append(message)
230
+
231
+ return messages
232
+
233
+ # Add properties to expose backend attributes
234
+ @property
235
+ def database_url(self):
236
+ return getattr(self._backend, "database_url", None)
237
+
238
+ @property
239
+ def engine(self):
240
+ return getattr(self._backend, "engine", None)
241
+
242
+ async def get_thread_by_message_id(self, message_id: str) -> Optional[Thread]:
243
+ """
244
+ Find a thread containing a specific message ID.
245
+
246
+ Args:
247
+ message_id: The ID of the message to find
248
+
249
+ Returns:
250
+ The Thread containing the message, or None if not found
251
+ """
252
+ await self._ensure_initialized()
253
+
254
+ # Check if backend has native implementation
255
+ if hasattr(self._backend, 'get_thread_by_message_id'):
256
+ return await self._backend.get_thread_by_message_id(message_id)
257
+
258
+ # Fallback implementation for backends that don't support this method
259
+ threads = await self._backend.list_recent(500) # Get recent threads
260
+
261
+ # Check each thread's messages for the message ID
262
+ for thread in threads:
263
+ for message in thread.messages:
264
+ if message.id == message_id:
265
+ return thread
266
+
267
+ return None
268
+
269
+ # Optional PostgreSQL-specific implementation
270
+ try:
271
+ import asyncpg
272
+
273
+ class SQLAlchemyThreadStore(ThreadStore):
274
+ """PostgreSQL-based thread storage for production use."""
275
+
276
+ def __init__(self, database_url):
277
+ if not database_url.startswith('postgresql+asyncpg://'):
278
+ database_url = database_url.replace('postgresql://', 'postgresql+asyncpg://')
279
+ super().__init__(database_url)
280
+
281
+ except ImportError:
282
+ pass
@@ -0,0 +1,9 @@
1
+ """
2
+ Models package for Tyler Stores
3
+ """
4
+
5
+ from .thread import Thread
6
+ from .message import Message
7
+ from .attachment import Attachment
8
+
9
+ __all__ = ["Thread", "Message", "Attachment"]
@@ -0,0 +1,386 @@
1
+ from typing import Dict, Optional, Any, Union, Literal
2
+ from pydantic import BaseModel, computed_field
3
+ import base64
4
+ import io
5
+ import filetype
6
+ import mimetypes
7
+ import logging
8
+ from pathlib import Path
9
+ from ..storage.file_store import FileStore
10
+ import hashlib
11
+
12
+ class Attachment(BaseModel):
13
+ """Represents a file attached to a message"""
14
+ filename: str
15
+ content: Optional[Union[bytes, str]] = None # Can be either bytes or base64 string
16
+ mime_type: Optional[str] = None
17
+ attributes: Optional[Dict[str, Any]] = None # Renamed from processed_content
18
+ file_id: Optional[str] = None # Reference to stored file
19
+ storage_path: Optional[str] = None # Path in storage backend
20
+ storage_backend: Optional[str] = None # Storage backend type
21
+ status: Literal["pending", "stored", "failed"] = "pending"
22
+
23
+ @computed_field
24
+ @property
25
+ def id(self) -> str:
26
+ """Generate a unique ID based on content hash"""
27
+ if self.content is None:
28
+ # If no content, use filename and other attributes
29
+ hash_input = f"{self.filename}{self.mime_type or ''}"
30
+ return hashlib.sha256(hash_input.encode()).hexdigest()[:16]
31
+
32
+ # Get content as bytes for hashing
33
+ if isinstance(self.content, bytes):
34
+ content_bytes = self.content
35
+ elif isinstance(self.content, str):
36
+ # Try to decode as base64 first
37
+ try:
38
+ content_bytes = base64.b64decode(self.content)
39
+ except:
40
+ # If not base64, encode as UTF-8
41
+ content_bytes = self.content.encode('utf-8')
42
+ else:
43
+ # Fallback to filename hash
44
+ return hashlib.sha256(self.filename.encode()).hexdigest()[:16]
45
+
46
+ # Create hash of filename + content
47
+ hash_input = self.filename.encode() + content_bytes
48
+ return hashlib.sha256(hash_input).hexdigest()[:16]
49
+
50
+ @classmethod
51
+ def from_file_path(cls, file_path: Union[str, Path]) -> 'Attachment':
52
+ """Create an attachment from a file path"""
53
+ file_path = Path(file_path)
54
+
55
+ if not file_path.exists():
56
+ raise FileNotFoundError(f"File not found: {file_path}")
57
+
58
+ # Read file content
59
+ content = file_path.read_bytes()
60
+
61
+ # Detect MIME type
62
+ mime_type = filetype.guess_mime(content)
63
+
64
+ if not mime_type:
65
+ # Fallback: extension-based detection
66
+ mime_type, _ = mimetypes.guess_type(str(file_path))
67
+
68
+ if not mime_type:
69
+ # Default: binary
70
+ mime_type = 'application/octet-stream'
71
+
72
+ return cls(
73
+ filename=file_path.name,
74
+ content=content,
75
+ mime_type=mime_type
76
+ )
77
+
78
+ def detect_mime_type(self) -> None:
79
+ """Detect and set MIME type from content"""
80
+ if self.content is None:
81
+ logging.getLogger(__name__).warning(f"Cannot detect MIME type for {self.filename}: no content")
82
+ return
83
+
84
+ # Get content as bytes
85
+ if isinstance(self.content, bytes):
86
+ content_bytes = self.content
87
+ elif isinstance(self.content, str):
88
+ try:
89
+ content_bytes = base64.b64decode(self.content)
90
+ except:
91
+ content_bytes = self.content.encode('utf-8')
92
+ else:
93
+ logging.getLogger(__name__).warning(f"Cannot detect MIME type for {self.filename}: invalid content type")
94
+ return
95
+
96
+ # Detect MIME type
97
+ detected_mime_type = filetype.guess_mime(content_bytes)
98
+
99
+ if not detected_mime_type:
100
+ # Fallback: extension-based detection
101
+ detected_mime_type, _ = mimetypes.guess_type(self.filename)
102
+
103
+ if not detected_mime_type:
104
+ # Default: binary
105
+ detected_mime_type = 'application/octet-stream'
106
+
107
+ if not self.mime_type:
108
+ self.mime_type = detected_mime_type
109
+ logging.getLogger(__name__).debug(f"Detected MIME type for {self.filename}: {self.mime_type}")
110
+ else:
111
+ logging.getLogger(__name__).debug(f"MIME type already set for {self.filename}: {self.mime_type}")
112
+
113
+ def model_dump(self, mode: str = "json") -> Dict[str, Any]:
114
+ """Convert attachment to a dictionary suitable for JSON serialization
115
+
116
+ Args:
117
+ mode: Serialization mode, either "json" or "python".
118
+ "json" converts datetimes to ISO strings (default).
119
+ "python" keeps datetimes as datetime objects.
120
+ """
121
+ data = {
122
+ "filename": self.filename,
123
+ "mime_type": self.mime_type,
124
+ "attributes": self.attributes,
125
+ "file_id": self.file_id,
126
+ "storage_path": self.storage_path,
127
+ "storage_backend": self.storage_backend,
128
+ "status": self.status
129
+ }
130
+
131
+ return data
132
+
133
+ async def get_content_bytes(self, file_store: Optional[FileStore] = None) -> bytes:
134
+ """Get the content as bytes, converting from base64 if necessary
135
+
136
+ If file_id is present, retrieves content from file storage.
137
+ Otherwise falls back to content field.
138
+
139
+ Args:
140
+ file_store: FileStore instance to use for retrieving file content.
141
+ Required when file_id is present.
142
+ """
143
+ logging.getLogger(__name__).debug(f"Getting content bytes for {self.filename}")
144
+
145
+ if self.file_id:
146
+ logging.getLogger(__name__).debug(f"Retrieving content from file store for file_id: {self.file_id}")
147
+ if file_store is None:
148
+ raise ValueError("FileStore instance required to retrieve content for file_id")
149
+ if self.storage_path is None:
150
+ raise ValueError("storage_path required to retrieve content for file_id")
151
+ return await file_store.get(self.file_id, self.storage_path)
152
+
153
+ if isinstance(self.content, bytes):
154
+ logging.getLogger(__name__).debug(f"Content is already in bytes format for {self.filename}")
155
+ return self.content
156
+ elif isinstance(self.content, str):
157
+ logging.getLogger(__name__).debug(f"Converting string content for {self.filename}")
158
+ if self.content.startswith('data:'):
159
+ # Handle data URLs
160
+ logging.getLogger(__name__).debug("Detected data URL format")
161
+ header, encoded = self.content.split(",", 1)
162
+ logging.getLogger(__name__).debug(f"Data URL header: {header}")
163
+ try:
164
+ decoded = base64.b64decode(encoded)
165
+ logging.getLogger(__name__).debug(f"Successfully decoded data URL content, size: {len(decoded)} bytes")
166
+ return decoded
167
+ except Exception as e:
168
+ logging.getLogger(__name__).error(f"Failed to decode data URL content: {e}")
169
+ raise
170
+ else:
171
+ try:
172
+ # Try base64 decode
173
+ logging.getLogger(__name__).debug("Attempting base64 decode")
174
+ decoded = base64.b64decode(self.content)
175
+ logging.getLogger(__name__).debug(f"Successfully decoded base64 content, size: {len(decoded)} bytes")
176
+ return decoded
177
+ except:
178
+ logging.getLogger(__name__).debug("Not base64, treating as UTF-8 text")
179
+ # If not base64, encode as UTF-8
180
+ return self.content.encode('utf-8')
181
+
182
+ raise ValueError("No content available - attachment has neither file_id nor content")
183
+
184
+ def update_attributes_with_url(self) -> None:
185
+ """Update attributes with URL after storage_path is set."""
186
+ if self.storage_path:
187
+ if not self.attributes:
188
+ self.attributes = {}
189
+
190
+ try:
191
+ # Get the file URL from FileStore
192
+ self.attributes["url"] = FileStore.get_file_url(self.storage_path)
193
+ logging.getLogger(__name__).debug(f"Updated attributes with URL: {self.attributes['url']}")
194
+ except Exception as e:
195
+ # Log the error but don't fail - the URL will be missing but that's better than crashing
196
+ logging.getLogger(__name__).error(f"Failed to construct URL for attachment: {e}")
197
+ self.attributes["error"] = f"Failed to construct URL: {str(e)}"
198
+
199
+ async def process_and_store(self, file_store: FileStore, force: bool = False) -> None:
200
+ """Process the attachment content and store it in the file store.
201
+
202
+ Args:
203
+ file_store: FileStore instance to use for storing files
204
+ force: Whether to force processing even if already stored
205
+ """
206
+ logging.getLogger(__name__).debug(f"Starting process_and_store for {self.filename} (force={force})")
207
+ logging.getLogger(__name__).debug(f"Initial state - mime_type: {self.mime_type}, status: {self.status}, content type: {type(self.content)}")
208
+
209
+ if not force and self.status == "stored":
210
+ logging.getLogger(__name__).info(f"Skipping process_and_store for {self.filename} - already stored")
211
+ return
212
+
213
+ if self.content is None:
214
+ logging.getLogger(__name__).error(f"Cannot process attachment {self.filename}: no content provided")
215
+ self.status = "failed"
216
+ raise RuntimeError(f"Cannot process attachment {self.filename}: no content provided")
217
+
218
+ try:
219
+ # Get content as bytes first
220
+ logging.getLogger(__name__).debug("Converting content to bytes")
221
+ content_bytes = await self.get_content_bytes(file_store=file_store)
222
+ logging.getLogger(__name__).debug(f"Successfully converted content to bytes, size: {len(content_bytes)} bytes")
223
+
224
+ # Detect/verify MIME type
225
+ logging.getLogger(__name__).debug("Detecting MIME type")
226
+ detected_mime_type = filetype.guess_mime(content_bytes)
227
+
228
+ if not detected_mime_type:
229
+ # Fallback: extension-based detection
230
+ detected_mime_type, _ = mimetypes.guess_type(self.filename)
231
+
232
+ if not detected_mime_type:
233
+ # Default: binary
234
+ detected_mime_type = 'application/octet-stream'
235
+
236
+ logging.getLogger(__name__).debug(f"Detected MIME type: {detected_mime_type}")
237
+
238
+ if not self.mime_type:
239
+ self.mime_type = detected_mime_type
240
+ logging.getLogger(__name__).debug(f"Set MIME type to detected type: {self.mime_type}")
241
+ elif self.mime_type != detected_mime_type:
242
+ logging.getLogger(__name__).warning(f"Provided MIME type {self.mime_type} doesn't match detected type {detected_mime_type}")
243
+
244
+ # Initialize attributes
245
+ if not self.attributes:
246
+ self.attributes = {}
247
+
248
+ # Process content based on MIME type
249
+ logging.getLogger(__name__).debug(f"Processing content based on MIME type: {self.mime_type}")
250
+
251
+ if self.mime_type.startswith('image/'):
252
+ logging.getLogger(__name__).debug("Processing as image")
253
+ self.attributes.update({
254
+ "type": "image",
255
+ "description": f"Image file {self.filename}",
256
+ "mime_type": self.mime_type
257
+ })
258
+
259
+ elif self.mime_type.startswith('audio/'):
260
+ logging.getLogger(__name__).debug("Processing as audio")
261
+ self.attributes.update({
262
+ "type": "audio",
263
+ "description": f"Audio file {self.filename}",
264
+ "mime_type": self.mime_type
265
+ })
266
+
267
+ elif self.mime_type == 'application/pdf':
268
+ logging.getLogger(__name__).debug("Processing as PDF")
269
+ try:
270
+ from pypdf import PdfReader
271
+ reader = PdfReader(io.BytesIO(content_bytes))
272
+ text = ""
273
+ for page in reader.pages:
274
+ try:
275
+ extracted = page.extract_text()
276
+ if extracted:
277
+ text += extracted + "\n"
278
+ except Exception as e:
279
+ logging.getLogger(__name__).warning(f"Error extracting text from PDF page: {e}")
280
+ continue
281
+ self.attributes.update({
282
+ "type": "document",
283
+ "text": text.strip(),
284
+ "overview": f"Extracted text from {self.filename}",
285
+ "mime_type": self.mime_type
286
+ })
287
+ except ImportError:
288
+ logging.getLogger(__name__).warning("pypdf not available, skipping PDF text extraction")
289
+ self.attributes.update({
290
+ "type": "document",
291
+ "description": f"PDF document {self.filename}",
292
+ "mime_type": self.mime_type
293
+ })
294
+
295
+ elif self.mime_type.startswith('text/'):
296
+ logging.getLogger(__name__).debug("Processing as text")
297
+ try:
298
+ text = content_bytes.decode('utf-8')
299
+ self.attributes.update({
300
+ "type": "text",
301
+ "text": text,
302
+ "mime_type": self.mime_type
303
+ })
304
+ except UnicodeDecodeError:
305
+ logging.getLogger(__name__).warning("UTF-8 decode failed, trying alternative encodings")
306
+ # Try alternative encodings
307
+ for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
308
+ try:
309
+ text = content_bytes.decode(encoding)
310
+ self.attributes.update({
311
+ "type": "text",
312
+ "text": text,
313
+ "encoding": encoding,
314
+ "mime_type": self.mime_type
315
+ })
316
+ logging.getLogger(__name__).debug(f"Successfully decoded text using {encoding}")
317
+ break
318
+ except UnicodeDecodeError:
319
+ continue
320
+
321
+ elif self.mime_type == 'application/json':
322
+ logging.getLogger(__name__).debug("Processing as JSON")
323
+ import json
324
+ try:
325
+ json_text = content_bytes.decode('utf-8')
326
+ json_data = json.loads(json_text)
327
+ self.attributes.update({
328
+ "type": "json",
329
+ "overview": "JSON data structure",
330
+ "parsed_content": json_data,
331
+ "mime_type": self.mime_type
332
+ })
333
+ except Exception as e:
334
+ logging.getLogger(__name__).warning(f"Error parsing JSON content: {e}")
335
+ self.attributes.update({
336
+ "type": "json",
337
+ "error": f"Failed to parse JSON: {str(e)}",
338
+ "mime_type": self.mime_type
339
+ })
340
+
341
+ else:
342
+ logging.getLogger(__name__).debug(f"Processing as binary file with MIME type: {self.mime_type}")
343
+ self.attributes.update({
344
+ "type": "binary",
345
+ "description": f"Binary file {self.filename}",
346
+ "mime_type": self.mime_type
347
+ })
348
+
349
+ # Store the file
350
+ logging.getLogger(__name__).debug("Storing file in FileStore")
351
+
352
+ try:
353
+ logging.getLogger(__name__).debug(f"Saving file to storage, content size: {len(content_bytes)} bytes")
354
+ result = await file_store.save(content_bytes, self.filename, self.mime_type)
355
+ logging.getLogger(__name__).debug(f"Successfully saved file. Result: {result}")
356
+
357
+ self.file_id = result['id']
358
+ self.storage_backend = result['storage_backend']
359
+ self.storage_path = result['storage_path']
360
+ self.status = "stored"
361
+
362
+ # Update filename to match the one created by the file store
363
+ # Extract the actual filename from the storage path
364
+ new_filename = Path(self.storage_path).name
365
+ logging.getLogger(__name__).debug(f"Updating attachment filename from {self.filename} to {new_filename}")
366
+ self.filename = new_filename
367
+
368
+ # Add storage info to attributes
369
+ self.attributes["storage_path"] = self.storage_path
370
+ self.update_attributes_with_url()
371
+
372
+ # Clear content after successful storage
373
+ self.content = None
374
+ logging.getLogger(__name__).debug(f"Cleared content after successful storage for {self.filename}")
375
+
376
+ logging.getLogger(__name__).debug(f"Successfully processed and stored attachment {self.filename}")
377
+
378
+ except Exception as e:
379
+ logging.getLogger(__name__).error(f"Error processing attachment {self.filename}: {e}")
380
+ self.status = "failed"
381
+ raise
382
+
383
+ except Exception as e:
384
+ logging.getLogger(__name__).error(f"Failed to process attachment {self.filename}: {str(e)}")
385
+ self.status = "failed"
386
+ raise RuntimeError(f"Failed to process attachment {self.filename}: {str(e)}") from e