abstractcore 2.4.4__py3-none-any.whl → 2.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. abstractcore/cli/__init__.py +9 -0
  2. abstractcore/cli/main.py +759 -0
  3. abstractcore/cli/vision_config.py +491 -0
  4. abstractcore/core/interface.py +7 -0
  5. abstractcore/core/session.py +27 -2
  6. abstractcore/media/handlers/__init__.py +16 -0
  7. abstractcore/media/handlers/anthropic_handler.py +326 -0
  8. abstractcore/media/handlers/local_handler.py +541 -0
  9. abstractcore/media/handlers/openai_handler.py +281 -0
  10. abstractcore/media/processors/__init__.py +13 -0
  11. abstractcore/media/processors/image_processor.py +610 -0
  12. abstractcore/media/processors/office_processor.py +490 -0
  13. abstractcore/media/processors/pdf_processor.py +485 -0
  14. abstractcore/media/processors/text_processor.py +557 -0
  15. abstractcore/media/utils/__init__.py +22 -0
  16. abstractcore/media/utils/image_scaler.py +306 -0
  17. abstractcore/providers/anthropic_provider.py +14 -2
  18. abstractcore/providers/base.py +24 -0
  19. abstractcore/providers/huggingface_provider.py +23 -9
  20. abstractcore/providers/lmstudio_provider.py +6 -1
  21. abstractcore/providers/mlx_provider.py +20 -7
  22. abstractcore/providers/ollama_provider.py +6 -1
  23. abstractcore/providers/openai_provider.py +6 -2
  24. abstractcore/tools/common_tools.py +651 -1
  25. abstractcore/utils/version.py +1 -1
  26. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/METADATA +59 -9
  27. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/RECORD +31 -17
  28. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/entry_points.txt +2 -0
  29. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/WHEEL +0 -0
  30. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/licenses/LICENSE +0 -0
  31. {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,281 @@
1
+ """
2
+ OpenAI-specific media handler.
3
+
4
+ This module provides media formatting capabilities specifically for OpenAI's API,
5
+ including support for GPT-4 Vision, audio models, and document processing.
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Dict, Any, List, Optional, Union
10
+
11
+ from ..base import BaseProviderMediaHandler, MediaProcessingError
12
+ from ..types import MediaContent, MediaType, ContentFormat
13
+
14
+
15
+ class OpenAIMediaHandler(BaseProviderMediaHandler):
16
+ """
17
+ Media handler for OpenAI API formatting.
18
+
19
+ Formats media content according to OpenAI's API specifications for
20
+ GPT-4 Vision, audio models, and other multimodal capabilities.
21
+ """
22
+
23
+ def __init__(self, model_capabilities: Optional[Dict[str, Any]] = None, **kwargs):
24
+ """
25
+ Initialize OpenAI media handler.
26
+
27
+ Args:
28
+ model_capabilities: Model capabilities from model_capabilities.json
29
+ **kwargs: Additional configuration
30
+ """
31
+ super().__init__("openai", model_capabilities, **kwargs)
32
+
33
+ # OpenAI-specific configuration
34
+ self.max_image_size = kwargs.get('max_image_size', 20 * 1024 * 1024) # 20MB
35
+ self.supported_image_detail = kwargs.get('supported_image_detail', ['auto', 'low', 'high'])
36
+
37
+ self.logger.debug(f"Initialized OpenAI media handler with capabilities: {self.capabilities}")
38
+
39
+ def _process_internal(self, file_path: Path, media_type: MediaType, **kwargs) -> MediaContent:
40
+ """
41
+ Process file using appropriate processor and return OpenAI-formatted content.
42
+
43
+ Args:
44
+ file_path: Path to the file
45
+ media_type: Type of media
46
+ **kwargs: Processing options
47
+
48
+ Returns:
49
+ MediaContent formatted for OpenAI
50
+ """
51
+ # Use appropriate processor based on media type
52
+ if media_type == MediaType.IMAGE:
53
+ from ..processors import ImageProcessor
54
+ # Ensure maximum resolution for best OpenAI vision results
55
+ if 'prefer_max_resolution' not in kwargs:
56
+ kwargs['prefer_max_resolution'] = True
57
+ processor = ImageProcessor(**kwargs)
58
+ elif media_type == MediaType.DOCUMENT:
59
+ if file_path.suffix.lower() == '.pdf':
60
+ from ..processors import PDFProcessor
61
+ processor = PDFProcessor(**kwargs)
62
+ else:
63
+ from ..processors import TextProcessor
64
+ processor = TextProcessor(**kwargs)
65
+ else:
66
+ from ..processors import TextProcessor
67
+ processor = TextProcessor(**kwargs)
68
+
69
+ # Process the file
70
+ result = processor.process_file(file_path, **kwargs)
71
+
72
+ if not result.success:
73
+ raise MediaProcessingError(f"Failed to process {file_path}: {result.error_message}")
74
+
75
+ return result.media_content
76
+
77
+ def format_for_provider(self, media_content: MediaContent) -> Dict[str, Any]:
78
+ """
79
+ Format media content for OpenAI API.
80
+
81
+ Args:
82
+ media_content: MediaContent to format
83
+
84
+ Returns:
85
+ Dictionary formatted for OpenAI API
86
+ """
87
+ if media_content.media_type == MediaType.IMAGE:
88
+ return self._format_image_for_openai(media_content)
89
+ elif media_content.media_type in [MediaType.DOCUMENT, MediaType.TEXT]:
90
+ return self._format_text_for_openai(media_content)
91
+ else:
92
+ raise MediaProcessingError(f"Unsupported media type for OpenAI: {media_content.media_type}")
93
+
94
+ def _format_image_for_openai(self, media_content: MediaContent) -> Dict[str, Any]:
95
+ """
96
+ Format image content for OpenAI's image_url format.
97
+
98
+ Args:
99
+ media_content: Image MediaContent
100
+
101
+ Returns:
102
+ OpenAI-compatible image object
103
+ """
104
+ if media_content.content_format != ContentFormat.BASE64:
105
+ raise MediaProcessingError("OpenAI image formatting requires base64 content")
106
+
107
+ # Construct data URL
108
+ data_url = f"data:{media_content.mime_type};base64,{media_content.content}"
109
+
110
+ # Create OpenAI image object
111
+ image_obj = {
112
+ "type": "image_url",
113
+ "image_url": {
114
+ "url": data_url
115
+ }
116
+ }
117
+
118
+ # Add detail level if supported by model
119
+ if self.model_capabilities.get('vision_support'):
120
+ detail_level = media_content.metadata.get('detail_level', 'auto')
121
+ if detail_level in self.supported_image_detail:
122
+ image_obj["image_url"]["detail"] = detail_level
123
+
124
+ return image_obj
125
+
126
+ def _format_text_for_openai(self, media_content: MediaContent) -> Dict[str, Any]:
127
+ """
128
+ Format text/document content for OpenAI API.
129
+
130
+ Args:
131
+ media_content: Text/Document MediaContent
132
+
133
+ Returns:
134
+ OpenAI-compatible text object
135
+ """
136
+ if isinstance(media_content.content, bytes):
137
+ content = media_content.content.decode('utf-8')
138
+ else:
139
+ content = str(media_content.content)
140
+
141
+ return {
142
+ "type": "text",
143
+ "text": content
144
+ }
145
+
146
+ def create_multimodal_message(self, text: str, media_contents: List[MediaContent]) -> Dict[str, Any]:
147
+ """
148
+ Create a multimodal message for OpenAI API.
149
+
150
+ Args:
151
+ text: Text content
152
+ media_contents: List of media contents
153
+
154
+ Returns:
155
+ OpenAI-compatible message object
156
+ """
157
+ content = []
158
+
159
+ # Add text content
160
+ if text.strip():
161
+ content.append({
162
+ "type": "text",
163
+ "text": text
164
+ })
165
+
166
+ # Add media contents
167
+ for media_content in media_contents:
168
+ if self.can_handle_media(media_content):
169
+ formatted_content = self.format_for_provider(media_content)
170
+ content.append(formatted_content)
171
+ else:
172
+ self.logger.warning(f"Skipping unsupported media type: {media_content.media_type}")
173
+
174
+ return {
175
+ "role": "user",
176
+ "content": content
177
+ }
178
+
179
+ def validate_media_for_model(self, media_content: MediaContent, model: str) -> bool:
180
+ """
181
+ Validate if media content is compatible with specific OpenAI model.
182
+
183
+ Args:
184
+ media_content: MediaContent to validate
185
+ model: OpenAI model name
186
+
187
+ Returns:
188
+ True if compatible, False otherwise
189
+ """
190
+ model_lower = model.lower()
191
+
192
+ # Vision model validation
193
+ if media_content.media_type == MediaType.IMAGE:
194
+ # Check if model supports vision
195
+ if not self.model_capabilities.get('vision_support', False):
196
+ return False
197
+
198
+ # Model-specific checks
199
+ if 'gpt-4' in model_lower and 'vision' in model_lower:
200
+ return True
201
+ elif 'gpt-4o' in model_lower:
202
+ return True
203
+ elif 'gpt-4' in model_lower:
204
+ # Regular GPT-4 models don't support vision
205
+ return False
206
+
207
+ # Text/document validation
208
+ elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
209
+ # All OpenAI models support text
210
+ return True
211
+
212
+ # Audio validation (future support)
213
+ elif media_content.media_type == MediaType.AUDIO:
214
+ return self.model_capabilities.get('audio_support', False)
215
+
216
+ return False
217
+
218
+ def estimate_tokens_for_media(self, media_content: MediaContent) -> int:
219
+ """
220
+ Estimate token usage for media content.
221
+
222
+ Args:
223
+ media_content: MediaContent to estimate
224
+
225
+ Returns:
226
+ Estimated token count
227
+ """
228
+ if media_content.media_type == MediaType.IMAGE:
229
+ # OpenAI image token estimation
230
+ # Base cost varies by detail level and image size
231
+ detail_level = media_content.metadata.get('detail_level', 'auto')
232
+
233
+ if detail_level == 'low':
234
+ return 85 # Low detail images use 85 tokens
235
+ else:
236
+ # High detail calculation based on image dimensions
237
+ width = media_content.metadata.get('final_size', [512, 512])[0]
238
+ height = media_content.metadata.get('final_size', [512, 512])[1]
239
+
240
+ # OpenAI's tile-based calculation (simplified)
241
+ tiles_width = (width + 511) // 512
242
+ tiles_height = (height + 511) // 512
243
+ total_tiles = tiles_width * tiles_height
244
+
245
+ return 85 + (170 * total_tiles)
246
+
247
+ elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
248
+ # Rough estimation: 4 characters per token
249
+ content_length = len(str(media_content.content))
250
+ return content_length // 4
251
+
252
+ return 0
253
+
254
+ def get_model_media_limits(self, model: str) -> Dict[str, Any]:
255
+ """
256
+ Get media-specific limits for OpenAI model.
257
+
258
+ Args:
259
+ model: OpenAI model name
260
+
261
+ Returns:
262
+ Dictionary of limits
263
+ """
264
+ limits = {
265
+ 'max_images_per_message': 1,
266
+ 'max_image_size_bytes': self.max_image_size,
267
+ 'supported_image_formats': ['png', 'jpeg', 'jpg', 'gif', 'webp'],
268
+ 'max_detail_level': 'high'
269
+ }
270
+
271
+ model_lower = model.lower()
272
+
273
+ # Model-specific adjustments
274
+ if 'gpt-4o' in model_lower:
275
+ limits.update({
276
+ 'max_images_per_message': 10, # GPT-4o supports multiple images
277
+ 'supports_audio': self.model_capabilities.get('audio_support', False),
278
+ 'supports_video': False # Not yet supported
279
+ })
280
+
281
+ return limits
@@ -0,0 +1,13 @@
1
+ """
2
+ Media processors for different file types.
3
+
4
+ This module contains concrete implementations of media processors
5
+ for various file formats including images, documents, and text files.
6
+ """
7
+
8
+ from .image_processor import ImageProcessor
9
+ from .text_processor import TextProcessor
10
+ from .pdf_processor import PDFProcessor
11
+ from .office_processor import OfficeProcessor
12
+
13
+ __all__ = ['ImageProcessor', 'TextProcessor', 'PDFProcessor', 'OfficeProcessor']