abstractcore 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. abstractcore/apps/app_config_utils.py +19 -0
  2. abstractcore/apps/summarizer.py +85 -56
  3. abstractcore/architectures/detection.py +15 -4
  4. abstractcore/assets/architecture_formats.json +1 -1
  5. abstractcore/assets/model_capabilities.json +420 -11
  6. abstractcore/core/interface.py +2 -0
  7. abstractcore/core/session.py +4 -0
  8. abstractcore/embeddings/manager.py +54 -16
  9. abstractcore/media/__init__.py +116 -148
  10. abstractcore/media/auto_handler.py +363 -0
  11. abstractcore/media/base.py +456 -0
  12. abstractcore/media/capabilities.py +335 -0
  13. abstractcore/media/types.py +300 -0
  14. abstractcore/media/vision_fallback.py +260 -0
  15. abstractcore/providers/anthropic_provider.py +18 -1
  16. abstractcore/providers/base.py +187 -0
  17. abstractcore/providers/huggingface_provider.py +111 -12
  18. abstractcore/providers/lmstudio_provider.py +88 -5
  19. abstractcore/providers/mlx_provider.py +33 -1
  20. abstractcore/providers/ollama_provider.py +37 -3
  21. abstractcore/providers/openai_provider.py +18 -1
  22. abstractcore/server/app.py +1390 -104
  23. abstractcore/tools/common_tools.py +12 -8
  24. abstractcore/utils/__init__.py +9 -5
  25. abstractcore/utils/cli.py +199 -17
  26. abstractcore/utils/message_preprocessor.py +182 -0
  27. abstractcore/utils/structured_logging.py +117 -16
  28. abstractcore/utils/version.py +1 -1
  29. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/METADATA +214 -20
  30. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/RECORD +34 -27
  31. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/entry_points.txt +1 -0
  32. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/WHEEL +0 -0
  33. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/licenses/LICENSE +0 -0
  34. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,335 @@
1
+ """
2
+ Media capability detection and management for AbstractCore.
3
+
4
+ This module provides comprehensive capability detection for multimodal models,
5
+ leveraging the existing model_capabilities.json infrastructure to determine
6
+ what media types and formats each model supports.
7
+ """
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Dict, List, Any, Optional, Union
11
+ from pathlib import Path
12
+ import logging
13
+
14
+ from ..architectures import get_model_capabilities
15
+ from .types import MediaType
16
+
17
+
18
+ @dataclass
19
+ class MediaCapabilities:
20
+ """
21
+ Comprehensive media capabilities for a specific model.
22
+
23
+ This class aggregates all media-related capabilities from model_capabilities.json
24
+ and provides convenient methods for checking media support.
25
+ """
26
+ model_name: str
27
+
28
+ # Core media support
29
+ vision_support: bool = False
30
+ audio_support: bool = False
31
+ video_support: bool = False
32
+
33
+ # Image capabilities
34
+ max_images_per_message: int = 1
35
+ supported_image_formats: List[str] = field(default_factory=lambda: ['jpg', 'jpeg', 'png'])
36
+ image_resolutions: List[str] = field(default_factory=list)
37
+ max_image_size_bytes: int = 5 * 1024 * 1024 # 5MB default
38
+
39
+ # Document capabilities
40
+ document_support: bool = True # Most models can handle text documents
41
+ max_document_size_bytes: int = 50 * 1024 * 1024 # 50MB default
42
+
43
+ # Provider-specific features
44
+ multimodal_message_support: bool = True
45
+ text_embedding_preferred: bool = False # For local models
46
+ streaming_media_support: bool = False
47
+
48
+ # Advanced features
49
+ parallel_media_processing: bool = False
50
+ media_token_estimation: bool = True
51
+
52
+ @classmethod
53
+ def from_model_capabilities(cls, model: str, provider: str = None) -> 'MediaCapabilities':
54
+ """
55
+ Create MediaCapabilities from model_capabilities.json data.
56
+
57
+ Args:
58
+ model: Model name to look up capabilities for
59
+ provider: Optional provider name for provider-specific adjustments
60
+
61
+ Returns:
62
+ MediaCapabilities instance with detected capabilities
63
+ """
64
+ caps = get_model_capabilities(model)
65
+ if not caps:
66
+ caps = {}
67
+
68
+ # Base capabilities from JSON
69
+ instance = cls(
70
+ model_name=model,
71
+ vision_support=caps.get('vision_support', False),
72
+ audio_support=caps.get('audio_support', False),
73
+ video_support=caps.get('video_support', False),
74
+ image_resolutions=caps.get('image_resolutions', [])
75
+ )
76
+
77
+ # Provider-specific adjustments
78
+ if provider:
79
+ instance._apply_provider_adjustments(provider, caps)
80
+
81
+ # Model-specific adjustments based on model name patterns
82
+ instance._apply_model_adjustments(caps)
83
+
84
+ return instance
85
+
86
+ def _apply_provider_adjustments(self, provider: str, caps: Dict[str, Any]):
87
+ """Apply provider-specific capability adjustments."""
88
+ provider_lower = provider.lower()
89
+
90
+ if provider_lower == "openai":
91
+ self.max_images_per_message = 10 if "gpt-4o" in self.model_name.lower() else 1
92
+ self.max_image_size_bytes = 20 * 1024 * 1024 # 20MB for OpenAI
93
+ self.supported_image_formats = ['png', 'jpeg', 'jpg', 'gif', 'webp']
94
+ self.streaming_media_support = True
95
+
96
+ elif provider_lower == "anthropic":
97
+ self.max_images_per_message = 20 # Claude supports up to 20 images
98
+ self.max_image_size_bytes = 5 * 1024 * 1024 # 5MB for Anthropic
99
+ self.supported_image_formats = ['png', 'jpeg', 'jpg', 'gif', 'webp']
100
+ self.streaming_media_support = True
101
+
102
+ elif provider_lower in ["ollama", "mlx", "lmstudio"]:
103
+ self.text_embedding_preferred = True # Local models often prefer text
104
+ self.multimodal_message_support = True
105
+ self.streaming_media_support = False
106
+ self.max_image_size_bytes = 10 * 1024 * 1024 # 10MB for local
107
+
108
+ elif provider_lower == "huggingface":
109
+ self.streaming_media_support = False
110
+ self.max_image_size_bytes = 15 * 1024 * 1024 # 15MB for HF
111
+
112
+ def _apply_model_adjustments(self, caps: Dict[str, Any]):
113
+ """Apply model-specific capability adjustments based on model patterns."""
114
+ model_lower = self.model_name.lower()
115
+
116
+ # Vision model patterns
117
+ if any(pattern in model_lower for pattern in ['vision', 'vl', 'visual']):
118
+ self.vision_support = True
119
+ if 'qwen' in model_lower:
120
+ self.max_images_per_message = 5 # Qwen-VL supports multiple images
121
+
122
+ # Multimodal model patterns
123
+ if any(pattern in model_lower for pattern in ['4o', 'multimodal', 'omni']):
124
+ self.vision_support = True
125
+ if 'audio' not in caps or caps.get('audio_support'):
126
+ self.audio_support = True
127
+
128
+ # Local model adjustments
129
+ if any(pattern in model_lower for pattern in ['llama', 'qwen', 'phi', 'gemma']):
130
+ self.text_embedding_preferred = True
131
+
132
+ def supports_media_type(self, media_type: MediaType) -> bool:
133
+ """Check if the model supports a specific media type."""
134
+ if media_type == MediaType.IMAGE:
135
+ return self.vision_support
136
+ elif media_type == MediaType.AUDIO:
137
+ return self.audio_support
138
+ elif media_type == MediaType.VIDEO:
139
+ return self.video_support
140
+ elif media_type in [MediaType.DOCUMENT, MediaType.TEXT]:
141
+ return self.document_support
142
+ return False
143
+
144
+ def get_image_limits(self) -> Dict[str, Any]:
145
+ """Get image-specific limits and capabilities."""
146
+ return {
147
+ 'max_images_per_message': self.max_images_per_message,
148
+ 'supported_formats': self.supported_image_formats,
149
+ 'max_size_bytes': self.max_image_size_bytes,
150
+ 'supported_resolutions': self.image_resolutions,
151
+ 'vision_support': self.vision_support
152
+ }
153
+
154
+ def get_document_limits(self) -> Dict[str, Any]:
155
+ """Get document-specific limits and capabilities."""
156
+ return {
157
+ 'max_size_bytes': self.max_document_size_bytes,
158
+ 'document_support': self.document_support,
159
+ 'text_embedding_preferred': self.text_embedding_preferred
160
+ }
161
+
162
+ def estimate_media_tokens(self, media_type: MediaType, content_size: int = 0) -> int:
163
+ """
164
+ Estimate token usage for media content.
165
+
166
+ Args:
167
+ media_type: Type of media
168
+ content_size: Size of content in bytes (optional)
169
+
170
+ Returns:
171
+ Estimated token count
172
+ """
173
+ if not self.media_token_estimation:
174
+ return 0
175
+
176
+ if media_type == MediaType.IMAGE and self.vision_support:
177
+ # Base token cost for images varies by model
178
+ model_lower = self.model_name.lower()
179
+ if 'gpt-4o' in model_lower:
180
+ return 85 + (170 * 4) # Simplified GPT-4o calculation
181
+ elif 'claude' in model_lower:
182
+ return 1600 # Anthropic standard
183
+ else:
184
+ return 512 # Conservative estimate for local models
185
+
186
+ elif media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
187
+ # Text content token estimation
188
+ if content_size > 0:
189
+ return content_size // 4 # ~4 chars per token
190
+ return 100 # Default estimate
191
+
192
+ return 0
193
+
194
+ def validate_media_content(self, media_type: MediaType, file_size: int = 0,
195
+ format: str = None) -> tuple[bool, Optional[str]]:
196
+ """
197
+ Validate if media content meets model requirements.
198
+
199
+ Args:
200
+ media_type: Type of media
201
+ file_size: Size of file in bytes
202
+ format: File format/extension
203
+
204
+ Returns:
205
+ Tuple of (is_valid, error_message)
206
+ """
207
+ if not self.supports_media_type(media_type):
208
+ return False, f"Model {self.model_name} does not support {media_type.value} content"
209
+
210
+ if media_type == MediaType.IMAGE:
211
+ if file_size > self.max_image_size_bytes:
212
+ return False, f"Image size ({file_size} bytes) exceeds limit ({self.max_image_size_bytes} bytes)"
213
+
214
+ if format and format.lower() not in [f.lower() for f in self.supported_image_formats]:
215
+ return False, f"Image format '{format}' not supported. Supported: {self.supported_image_formats}"
216
+
217
+ elif media_type in [MediaType.DOCUMENT, MediaType.TEXT]:
218
+ if file_size > self.max_document_size_bytes:
219
+ return False, f"Document size ({file_size} bytes) exceeds limit ({self.max_document_size_bytes} bytes)"
220
+
221
+ return True, None
222
+
223
+ def get_processing_strategy(self, media_type: MediaType) -> str:
224
+ """
225
+ Get the recommended processing strategy for this media type.
226
+
227
+ Returns:
228
+ Processing strategy: 'multimodal', 'text_embedding', or 'unsupported'
229
+ """
230
+ if not self.supports_media_type(media_type):
231
+ return 'unsupported'
232
+
233
+ if media_type == MediaType.IMAGE and self.vision_support:
234
+ if self.text_embedding_preferred:
235
+ return 'text_embedding' # Local models often prefer text description
236
+ else:
237
+ return 'multimodal'
238
+
239
+ elif media_type in [MediaType.DOCUMENT, MediaType.TEXT]:
240
+ return 'text_embedding' # Always embed documents as text
241
+
242
+ return 'unsupported'
243
+
244
+ def to_dict(self) -> Dict[str, Any]:
245
+ """Convert to dictionary for serialization."""
246
+ return {
247
+ 'model_name': self.model_name,
248
+ 'vision_support': self.vision_support,
249
+ 'audio_support': self.audio_support,
250
+ 'video_support': self.video_support,
251
+ 'max_images_per_message': self.max_images_per_message,
252
+ 'supported_image_formats': self.supported_image_formats,
253
+ 'image_resolutions': self.image_resolutions,
254
+ 'max_image_size_bytes': self.max_image_size_bytes,
255
+ 'document_support': self.document_support,
256
+ 'max_document_size_bytes': self.max_document_size_bytes,
257
+ 'multimodal_message_support': self.multimodal_message_support,
258
+ 'text_embedding_preferred': self.text_embedding_preferred,
259
+ 'streaming_media_support': self.streaming_media_support,
260
+ 'parallel_media_processing': self.parallel_media_processing,
261
+ 'media_token_estimation': self.media_token_estimation
262
+ }
263
+
264
+
265
+ def get_media_capabilities(model: str, provider: str = None) -> MediaCapabilities:
266
+ """
267
+ Get comprehensive media capabilities for a model.
268
+
269
+ Args:
270
+ model: Model name
271
+ provider: Optional provider name for provider-specific adjustments
272
+
273
+ Returns:
274
+ MediaCapabilities instance
275
+ """
276
+ return MediaCapabilities.from_model_capabilities(model, provider)
277
+
278
+
279
+ def is_vision_model(model: str) -> bool:
280
+ """Quick check if a model supports vision."""
281
+ caps = get_media_capabilities(model)
282
+ return caps.vision_support
283
+
284
+
285
+ def is_multimodal_model(model: str) -> bool:
286
+ """Quick check if a model supports any multimodal content."""
287
+ caps = get_media_capabilities(model)
288
+ return caps.vision_support or caps.audio_support or caps.video_support
289
+
290
+
291
+ def get_supported_media_types(model: str, provider: str = None) -> List[MediaType]:
292
+ """
293
+ Get list of supported media types for a model.
294
+
295
+ Args:
296
+ model: Model name
297
+ provider: Optional provider name
298
+
299
+ Returns:
300
+ List of supported MediaType values
301
+ """
302
+ caps = get_media_capabilities(model, provider)
303
+ supported = []
304
+
305
+ if caps.vision_support:
306
+ supported.append(MediaType.IMAGE)
307
+ if caps.audio_support:
308
+ supported.append(MediaType.AUDIO)
309
+ if caps.video_support:
310
+ supported.append(MediaType.VIDEO)
311
+ if caps.document_support:
312
+ supported.extend([MediaType.DOCUMENT, MediaType.TEXT])
313
+
314
+ return supported
315
+
316
+
317
+ # Convenience functions for common capability checks
318
+ def supports_images(model: str, provider: str = None) -> bool:
319
+ """Check if model supports image processing."""
320
+ return get_media_capabilities(model, provider).vision_support
321
+
322
+
323
+ def supports_documents(model: str, provider: str = None) -> bool:
324
+ """Check if model supports document processing."""
325
+ return get_media_capabilities(model, provider).document_support
326
+
327
+
328
+ def get_max_images(model: str, provider: str = None) -> int:
329
+ """Get maximum images per message for model."""
330
+ return get_media_capabilities(model, provider).max_images_per_message
331
+
332
+
333
+ def should_use_text_embedding(model: str, provider: str = None) -> bool:
334
+ """Check if model prefers text embedding over multimodal messages."""
335
+ return get_media_capabilities(model, provider).text_embedding_preferred
@@ -0,0 +1,300 @@
1
+ """
2
+ Core media types and models for AbstractCore multimodal support.
3
+
4
+ This module defines the fundamental data structures for handling various media types
5
+ across different LLM providers, following AbstractCore's unified interface patterns.
6
+ """
7
+
8
+ import base64
9
+ import mimetypes
10
+ from dataclasses import dataclass, field
11
+ from pathlib import Path
12
+ from typing import Union, Dict, Any, Optional, List, Literal
13
+ from enum import Enum
14
+
15
+ from pydantic import BaseModel, Field, validator
16
+
17
+
18
+ class MediaType(Enum):
19
+ """Supported media types for multimodal processing."""
20
+ IMAGE = "image"
21
+ DOCUMENT = "document"
22
+ AUDIO = "audio"
23
+ VIDEO = "video"
24
+ TEXT = "text"
25
+
26
+
27
+ class ContentFormat(Enum):
28
+ """Different ways media content can be represented."""
29
+ BASE64 = "base64"
30
+ URL = "url"
31
+ FILE_PATH = "file_path"
32
+ TEXT = "text"
33
+ BINARY = "binary"
34
+ AUTO = "auto"
35
+
36
+
37
+ @dataclass
38
+ class MediaContent:
39
+ """
40
+ Represents a piece of media content with metadata.
41
+
42
+ This is the core data structure for all media handling in AbstractCore.
43
+ It provides a unified way to represent different types of content regardless
44
+ of the underlying provider.
45
+ """
46
+ media_type: MediaType
47
+ content: Union[str, bytes]
48
+ content_format: ContentFormat
49
+ mime_type: str
50
+ file_path: Optional[str] = None
51
+ metadata: Dict[str, Any] = field(default_factory=dict)
52
+
53
+ def __post_init__(self):
54
+ """Validate and normalize the MediaContent after initialization."""
55
+ # Auto-detect MIME type if not provided and we have a file path
56
+ if self.mime_type == "auto" and self.file_path:
57
+ detected_mime, _ = mimetypes.guess_type(self.file_path)
58
+ self.mime_type = detected_mime or "application/octet-stream"
59
+
60
+ # Ensure content format matches content type
61
+ if self.content_format == ContentFormat.BASE64 and isinstance(self.content, bytes):
62
+ self.content = base64.b64encode(self.content).decode('utf-8')
63
+ elif self.content_format == ContentFormat.TEXT and isinstance(self.content, bytes):
64
+ self.content = self.content.decode('utf-8')
65
+
66
+
67
+ class MultimodalMessage(BaseModel):
68
+ """
69
+ A message that can contain both text and media content.
70
+
71
+ This follows the pattern of modern multimodal APIs where a single message
72
+ can contain multiple content elements of different types.
73
+ """
74
+ role: str = Field(..., description="Message role (user, assistant, system)")
75
+ content: List[Union[str, Dict[str, Any]]] = Field(
76
+ default_factory=list,
77
+ description="Mixed content list containing text strings and media objects"
78
+ )
79
+ metadata: Dict[str, Any] = Field(default_factory=dict)
80
+
81
+ @validator('role')
82
+ def validate_role(cls, v):
83
+ valid_roles = {'user', 'assistant', 'system', 'tool'}
84
+ if v not in valid_roles:
85
+ raise ValueError(f"Role must be one of {valid_roles}")
86
+ return v
87
+
88
+ def add_text(self, text: str) -> None:
89
+ """Add text content to the message."""
90
+ self.content.append(text)
91
+
92
+ def add_media(self, media: MediaContent) -> None:
93
+ """Add media content to the message."""
94
+ media_dict = {
95
+ "type": "media",
96
+ "media_type": media.media_type.value,
97
+ "content": media.content,
98
+ "content_format": media.content_format.value,
99
+ "mime_type": media.mime_type,
100
+ "metadata": media.metadata
101
+ }
102
+ if media.file_path:
103
+ media_dict["file_path"] = media.file_path
104
+
105
+ self.content.append(media_dict)
106
+
107
+ def has_media(self) -> bool:
108
+ """Check if this message contains any media content."""
109
+ return any(
110
+ isinstance(item, dict) and item.get("type") == "media"
111
+ for item in self.content
112
+ )
113
+
114
+ def get_text_content(self) -> str:
115
+ """Extract all text content from the message."""
116
+ text_parts = [
117
+ item for item in self.content
118
+ if isinstance(item, str)
119
+ ]
120
+ return " ".join(text_parts)
121
+
122
+ def get_media_content(self) -> List[Dict[str, Any]]:
123
+ """Extract all media content from the message."""
124
+ return [
125
+ item for item in self.content
126
+ if isinstance(item, dict) and item.get("type") == "media"
127
+ ]
128
+
129
+
130
+ @dataclass
131
+ class MediaCapabilities:
132
+ """
133
+ Represents what media capabilities a provider/model supports.
134
+
135
+ This is used for intelligent routing and validation of media content
136
+ based on the target provider and model capabilities.
137
+ """
138
+ vision_support: bool = False
139
+ audio_support: bool = False
140
+ video_support: bool = False
141
+ document_support: bool = False
142
+
143
+ # Image-specific capabilities
144
+ max_image_resolution: Optional[str] = None
145
+ supported_image_formats: List[str] = field(default_factory=lambda: ["jpg", "png"])
146
+
147
+ # Document-specific capabilities
148
+ supported_document_formats: List[str] = field(default_factory=lambda: ["pdf", "txt"])
149
+
150
+ # Audio/Video capabilities
151
+ max_audio_duration: Optional[int] = None # in seconds
152
+ max_video_duration: Optional[int] = None # in seconds
153
+
154
+ # Provider-specific limits
155
+ max_file_size: Optional[int] = None # in bytes
156
+ max_concurrent_media: int = 1
157
+
158
+ def supports_media_type(self, media_type: MediaType) -> bool:
159
+ """Check if this provider supports the given media type."""
160
+ support_map = {
161
+ MediaType.IMAGE: self.vision_support,
162
+ MediaType.AUDIO: self.audio_support,
163
+ MediaType.VIDEO: self.video_support,
164
+ MediaType.DOCUMENT: self.document_support,
165
+ MediaType.TEXT: True # All providers support text
166
+ }
167
+ return support_map.get(media_type, False)
168
+
169
+ def supports_format(self, media_type: MediaType, format_ext: str) -> bool:
170
+ """Check if this provider supports the specific format."""
171
+ format_ext = format_ext.lower().lstrip('.')
172
+
173
+ if media_type == MediaType.IMAGE:
174
+ return format_ext in self.supported_image_formats
175
+ elif media_type == MediaType.DOCUMENT:
176
+ return format_ext in self.supported_document_formats
177
+ elif media_type in [MediaType.AUDIO, MediaType.VIDEO]:
178
+ # For now, assume basic support if the media type is supported
179
+ return self.supports_media_type(media_type)
180
+ else:
181
+ return True
182
+
183
+
184
+ class MediaProcessingResult(BaseModel):
185
+ """
186
+ Result of processing a media file.
187
+
188
+ Contains the processed content and metadata about the processing operation.
189
+ """
190
+ success: bool
191
+ media_content: Optional[MediaContent] = None
192
+ error_message: Optional[str] = None
193
+ processing_time: Optional[float] = None
194
+ extracted_text: Optional[str] = None
195
+ metadata: Dict[str, Any] = Field(default_factory=dict)
196
+
197
+ @property
198
+ def failed(self) -> bool:
199
+ """Check if processing failed."""
200
+ return not self.success
201
+
202
+
203
+ # File extension mappings for quick media type detection
204
+ FILE_TYPE_MAPPINGS = {
205
+ # Images
206
+ 'jpg': MediaType.IMAGE, 'jpeg': MediaType.IMAGE, 'png': MediaType.IMAGE,
207
+ 'gif': MediaType.IMAGE, 'bmp': MediaType.IMAGE, 'tif': MediaType.IMAGE,
208
+ 'tiff': MediaType.IMAGE, 'webp': MediaType.IMAGE, 'ico': MediaType.IMAGE,
209
+
210
+ # Documents
211
+ 'pdf': MediaType.DOCUMENT, 'doc': MediaType.DOCUMENT, 'docx': MediaType.DOCUMENT,
212
+ 'xls': MediaType.DOCUMENT, 'xlsx': MediaType.DOCUMENT, 'ppt': MediaType.DOCUMENT,
213
+ 'pptx': MediaType.DOCUMENT, 'odt': MediaType.DOCUMENT, 'rtf': MediaType.DOCUMENT,
214
+
215
+ # Text formats
216
+ 'txt': MediaType.TEXT, 'md': MediaType.TEXT, 'csv': MediaType.TEXT,
217
+ 'tsv': MediaType.TEXT, 'json': MediaType.TEXT, 'xml': MediaType.TEXT,
218
+ 'html': MediaType.TEXT, 'htm': MediaType.TEXT,
219
+
220
+ # Audio
221
+ 'mp3': MediaType.AUDIO, 'wav': MediaType.AUDIO, 'm4a': MediaType.AUDIO,
222
+ 'ogg': MediaType.AUDIO, 'flac': MediaType.AUDIO, 'aac': MediaType.AUDIO,
223
+
224
+ # Video
225
+ 'mp4': MediaType.VIDEO, 'avi': MediaType.VIDEO, 'mov': MediaType.VIDEO,
226
+ 'mkv': MediaType.VIDEO, 'webm': MediaType.VIDEO, 'wmv': MediaType.VIDEO,
227
+ }
228
+
229
+
230
+ def detect_media_type(file_path: Union[str, Path]) -> MediaType:
231
+ """
232
+ Detect the media type of a file based on its extension.
233
+
234
+ Args:
235
+ file_path: Path to the file
236
+
237
+ Returns:
238
+ MediaType enum value
239
+ """
240
+ path = Path(file_path)
241
+ extension = path.suffix.lower().lstrip('.')
242
+
243
+ return FILE_TYPE_MAPPINGS.get(extension, MediaType.DOCUMENT)
244
+
245
+
246
+ def create_media_content(
247
+ file_path: Union[str, Path],
248
+ content_format: ContentFormat = ContentFormat.AUTO,
249
+ mime_type: str = "auto"
250
+ ) -> MediaContent:
251
+ """
252
+ Create a MediaContent object from a file path.
253
+
254
+ Args:
255
+ file_path: Path to the media file
256
+ content_format: How to represent the content
257
+ mime_type: MIME type of the content (auto-detected if "auto")
258
+
259
+ Returns:
260
+ MediaContent object
261
+ """
262
+ path = Path(file_path)
263
+
264
+ if not path.exists():
265
+ raise FileNotFoundError(f"File not found: {file_path}")
266
+
267
+ media_type = detect_media_type(path)
268
+
269
+ # Auto-select content format based on media type
270
+ if content_format == ContentFormat.AUTO:
271
+ if media_type in [MediaType.IMAGE, MediaType.AUDIO, MediaType.VIDEO]:
272
+ content_format = ContentFormat.BASE64
273
+ else:
274
+ content_format = ContentFormat.TEXT
275
+
276
+ # Read and encode content based on format
277
+ if content_format == ContentFormat.BASE64:
278
+ with open(path, 'rb') as f:
279
+ content = base64.b64encode(f.read()).decode('utf-8')
280
+ elif content_format == ContentFormat.TEXT:
281
+ with open(path, 'r', encoding='utf-8') as f:
282
+ content = f.read()
283
+ elif content_format == ContentFormat.FILE_PATH:
284
+ content = str(path)
285
+ else:
286
+ with open(path, 'rb') as f:
287
+ content = f.read()
288
+
289
+ return MediaContent(
290
+ media_type=media_type,
291
+ content=content,
292
+ content_format=content_format,
293
+ mime_type=mime_type,
294
+ file_path=str(path),
295
+ metadata={
296
+ 'file_size': path.stat().st_size,
297
+ 'file_name': path.name,
298
+ 'file_extension': path.suffix
299
+ }
300
+ )