abstractcore 2.4.4__py3-none-any.whl → 2.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/cli/__init__.py +9 -0
- abstractcore/cli/main.py +759 -0
- abstractcore/cli/vision_config.py +491 -0
- abstractcore/core/interface.py +7 -0
- abstractcore/core/session.py +27 -2
- abstractcore/media/handlers/__init__.py +16 -0
- abstractcore/media/handlers/anthropic_handler.py +326 -0
- abstractcore/media/handlers/local_handler.py +541 -0
- abstractcore/media/handlers/openai_handler.py +281 -0
- abstractcore/media/processors/__init__.py +13 -0
- abstractcore/media/processors/image_processor.py +610 -0
- abstractcore/media/processors/office_processor.py +490 -0
- abstractcore/media/processors/pdf_processor.py +485 -0
- abstractcore/media/processors/text_processor.py +557 -0
- abstractcore/media/utils/__init__.py +22 -0
- abstractcore/media/utils/image_scaler.py +306 -0
- abstractcore/providers/anthropic_provider.py +14 -2
- abstractcore/providers/base.py +24 -0
- abstractcore/providers/huggingface_provider.py +23 -9
- abstractcore/providers/lmstudio_provider.py +6 -1
- abstractcore/providers/mlx_provider.py +20 -7
- abstractcore/providers/ollama_provider.py +6 -1
- abstractcore/providers/openai_provider.py +6 -2
- abstractcore/tools/common_tools.py +651 -1
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/METADATA +59 -9
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/RECORD +31 -17
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/entry_points.txt +2 -0
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/WHEEL +0 -0
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.4.4.dist-info → abstractcore-2.4.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI-specific media handler.
|
|
3
|
+
|
|
4
|
+
This module provides media formatting capabilities specifically for OpenAI's API,
|
|
5
|
+
including support for GPT-4 Vision, audio models, and document processing.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, Any, List, Optional, Union
|
|
10
|
+
|
|
11
|
+
from ..base import BaseProviderMediaHandler, MediaProcessingError
|
|
12
|
+
from ..types import MediaContent, MediaType, ContentFormat
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OpenAIMediaHandler(BaseProviderMediaHandler):
|
|
16
|
+
"""
|
|
17
|
+
Media handler for OpenAI API formatting.
|
|
18
|
+
|
|
19
|
+
Formats media content according to OpenAI's API specifications for
|
|
20
|
+
GPT-4 Vision, audio models, and other multimodal capabilities.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, model_capabilities: Optional[Dict[str, Any]] = None, **kwargs):
|
|
24
|
+
"""
|
|
25
|
+
Initialize OpenAI media handler.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
model_capabilities: Model capabilities from model_capabilities.json
|
|
29
|
+
**kwargs: Additional configuration
|
|
30
|
+
"""
|
|
31
|
+
super().__init__("openai", model_capabilities, **kwargs)
|
|
32
|
+
|
|
33
|
+
# OpenAI-specific configuration
|
|
34
|
+
self.max_image_size = kwargs.get('max_image_size', 20 * 1024 * 1024) # 20MB
|
|
35
|
+
self.supported_image_detail = kwargs.get('supported_image_detail', ['auto', 'low', 'high'])
|
|
36
|
+
|
|
37
|
+
self.logger.debug(f"Initialized OpenAI media handler with capabilities: {self.capabilities}")
|
|
38
|
+
|
|
39
|
+
def _process_internal(self, file_path: Path, media_type: MediaType, **kwargs) -> MediaContent:
|
|
40
|
+
"""
|
|
41
|
+
Process file using appropriate processor and return OpenAI-formatted content.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
file_path: Path to the file
|
|
45
|
+
media_type: Type of media
|
|
46
|
+
**kwargs: Processing options
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
MediaContent formatted for OpenAI
|
|
50
|
+
"""
|
|
51
|
+
# Use appropriate processor based on media type
|
|
52
|
+
if media_type == MediaType.IMAGE:
|
|
53
|
+
from ..processors import ImageProcessor
|
|
54
|
+
# Ensure maximum resolution for best OpenAI vision results
|
|
55
|
+
if 'prefer_max_resolution' not in kwargs:
|
|
56
|
+
kwargs['prefer_max_resolution'] = True
|
|
57
|
+
processor = ImageProcessor(**kwargs)
|
|
58
|
+
elif media_type == MediaType.DOCUMENT:
|
|
59
|
+
if file_path.suffix.lower() == '.pdf':
|
|
60
|
+
from ..processors import PDFProcessor
|
|
61
|
+
processor = PDFProcessor(**kwargs)
|
|
62
|
+
else:
|
|
63
|
+
from ..processors import TextProcessor
|
|
64
|
+
processor = TextProcessor(**kwargs)
|
|
65
|
+
else:
|
|
66
|
+
from ..processors import TextProcessor
|
|
67
|
+
processor = TextProcessor(**kwargs)
|
|
68
|
+
|
|
69
|
+
# Process the file
|
|
70
|
+
result = processor.process_file(file_path, **kwargs)
|
|
71
|
+
|
|
72
|
+
if not result.success:
|
|
73
|
+
raise MediaProcessingError(f"Failed to process {file_path}: {result.error_message}")
|
|
74
|
+
|
|
75
|
+
return result.media_content
|
|
76
|
+
|
|
77
|
+
def format_for_provider(self, media_content: MediaContent) -> Dict[str, Any]:
|
|
78
|
+
"""
|
|
79
|
+
Format media content for OpenAI API.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
media_content: MediaContent to format
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Dictionary formatted for OpenAI API
|
|
86
|
+
"""
|
|
87
|
+
if media_content.media_type == MediaType.IMAGE:
|
|
88
|
+
return self._format_image_for_openai(media_content)
|
|
89
|
+
elif media_content.media_type in [MediaType.DOCUMENT, MediaType.TEXT]:
|
|
90
|
+
return self._format_text_for_openai(media_content)
|
|
91
|
+
else:
|
|
92
|
+
raise MediaProcessingError(f"Unsupported media type for OpenAI: {media_content.media_type}")
|
|
93
|
+
|
|
94
|
+
def _format_image_for_openai(self, media_content: MediaContent) -> Dict[str, Any]:
|
|
95
|
+
"""
|
|
96
|
+
Format image content for OpenAI's image_url format.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
media_content: Image MediaContent
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
OpenAI-compatible image object
|
|
103
|
+
"""
|
|
104
|
+
if media_content.content_format != ContentFormat.BASE64:
|
|
105
|
+
raise MediaProcessingError("OpenAI image formatting requires base64 content")
|
|
106
|
+
|
|
107
|
+
# Construct data URL
|
|
108
|
+
data_url = f"data:{media_content.mime_type};base64,{media_content.content}"
|
|
109
|
+
|
|
110
|
+
# Create OpenAI image object
|
|
111
|
+
image_obj = {
|
|
112
|
+
"type": "image_url",
|
|
113
|
+
"image_url": {
|
|
114
|
+
"url": data_url
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Add detail level if supported by model
|
|
119
|
+
if self.model_capabilities.get('vision_support'):
|
|
120
|
+
detail_level = media_content.metadata.get('detail_level', 'auto')
|
|
121
|
+
if detail_level in self.supported_image_detail:
|
|
122
|
+
image_obj["image_url"]["detail"] = detail_level
|
|
123
|
+
|
|
124
|
+
return image_obj
|
|
125
|
+
|
|
126
|
+
def _format_text_for_openai(self, media_content: MediaContent) -> Dict[str, Any]:
|
|
127
|
+
"""
|
|
128
|
+
Format text/document content for OpenAI API.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
media_content: Text/Document MediaContent
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
OpenAI-compatible text object
|
|
135
|
+
"""
|
|
136
|
+
if isinstance(media_content.content, bytes):
|
|
137
|
+
content = media_content.content.decode('utf-8')
|
|
138
|
+
else:
|
|
139
|
+
content = str(media_content.content)
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
"type": "text",
|
|
143
|
+
"text": content
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
def create_multimodal_message(self, text: str, media_contents: List[MediaContent]) -> Dict[str, Any]:
|
|
147
|
+
"""
|
|
148
|
+
Create a multimodal message for OpenAI API.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
text: Text content
|
|
152
|
+
media_contents: List of media contents
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
OpenAI-compatible message object
|
|
156
|
+
"""
|
|
157
|
+
content = []
|
|
158
|
+
|
|
159
|
+
# Add text content
|
|
160
|
+
if text.strip():
|
|
161
|
+
content.append({
|
|
162
|
+
"type": "text",
|
|
163
|
+
"text": text
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
# Add media contents
|
|
167
|
+
for media_content in media_contents:
|
|
168
|
+
if self.can_handle_media(media_content):
|
|
169
|
+
formatted_content = self.format_for_provider(media_content)
|
|
170
|
+
content.append(formatted_content)
|
|
171
|
+
else:
|
|
172
|
+
self.logger.warning(f"Skipping unsupported media type: {media_content.media_type}")
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
"role": "user",
|
|
176
|
+
"content": content
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
def validate_media_for_model(self, media_content: MediaContent, model: str) -> bool:
|
|
180
|
+
"""
|
|
181
|
+
Validate if media content is compatible with specific OpenAI model.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
media_content: MediaContent to validate
|
|
185
|
+
model: OpenAI model name
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
True if compatible, False otherwise
|
|
189
|
+
"""
|
|
190
|
+
model_lower = model.lower()
|
|
191
|
+
|
|
192
|
+
# Vision model validation
|
|
193
|
+
if media_content.media_type == MediaType.IMAGE:
|
|
194
|
+
# Check if model supports vision
|
|
195
|
+
if not self.model_capabilities.get('vision_support', False):
|
|
196
|
+
return False
|
|
197
|
+
|
|
198
|
+
# Model-specific checks
|
|
199
|
+
if 'gpt-4' in model_lower and 'vision' in model_lower:
|
|
200
|
+
return True
|
|
201
|
+
elif 'gpt-4o' in model_lower:
|
|
202
|
+
return True
|
|
203
|
+
elif 'gpt-4' in model_lower:
|
|
204
|
+
# Regular GPT-4 models don't support vision
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
# Text/document validation
|
|
208
|
+
elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
|
|
209
|
+
# All OpenAI models support text
|
|
210
|
+
return True
|
|
211
|
+
|
|
212
|
+
# Audio validation (future support)
|
|
213
|
+
elif media_content.media_type == MediaType.AUDIO:
|
|
214
|
+
return self.model_capabilities.get('audio_support', False)
|
|
215
|
+
|
|
216
|
+
return False
|
|
217
|
+
|
|
218
|
+
def estimate_tokens_for_media(self, media_content: MediaContent) -> int:
|
|
219
|
+
"""
|
|
220
|
+
Estimate token usage for media content.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
media_content: MediaContent to estimate
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Estimated token count
|
|
227
|
+
"""
|
|
228
|
+
if media_content.media_type == MediaType.IMAGE:
|
|
229
|
+
# OpenAI image token estimation
|
|
230
|
+
# Base cost varies by detail level and image size
|
|
231
|
+
detail_level = media_content.metadata.get('detail_level', 'auto')
|
|
232
|
+
|
|
233
|
+
if detail_level == 'low':
|
|
234
|
+
return 85 # Low detail images use 85 tokens
|
|
235
|
+
else:
|
|
236
|
+
# High detail calculation based on image dimensions
|
|
237
|
+
width = media_content.metadata.get('final_size', [512, 512])[0]
|
|
238
|
+
height = media_content.metadata.get('final_size', [512, 512])[1]
|
|
239
|
+
|
|
240
|
+
# OpenAI's tile-based calculation (simplified)
|
|
241
|
+
tiles_width = (width + 511) // 512
|
|
242
|
+
tiles_height = (height + 511) // 512
|
|
243
|
+
total_tiles = tiles_width * tiles_height
|
|
244
|
+
|
|
245
|
+
return 85 + (170 * total_tiles)
|
|
246
|
+
|
|
247
|
+
elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
|
|
248
|
+
# Rough estimation: 4 characters per token
|
|
249
|
+
content_length = len(str(media_content.content))
|
|
250
|
+
return content_length // 4
|
|
251
|
+
|
|
252
|
+
return 0
|
|
253
|
+
|
|
254
|
+
def get_model_media_limits(self, model: str) -> Dict[str, Any]:
|
|
255
|
+
"""
|
|
256
|
+
Get media-specific limits for OpenAI model.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
model: OpenAI model name
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Dictionary of limits
|
|
263
|
+
"""
|
|
264
|
+
limits = {
|
|
265
|
+
'max_images_per_message': 1,
|
|
266
|
+
'max_image_size_bytes': self.max_image_size,
|
|
267
|
+
'supported_image_formats': ['png', 'jpeg', 'jpg', 'gif', 'webp'],
|
|
268
|
+
'max_detail_level': 'high'
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
model_lower = model.lower()
|
|
272
|
+
|
|
273
|
+
# Model-specific adjustments
|
|
274
|
+
if 'gpt-4o' in model_lower:
|
|
275
|
+
limits.update({
|
|
276
|
+
'max_images_per_message': 10, # GPT-4o supports multiple images
|
|
277
|
+
'supports_audio': self.model_capabilities.get('audio_support', False),
|
|
278
|
+
'supports_video': False # Not yet supported
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
return limits
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Media processors for different file types.
|
|
3
|
+
|
|
4
|
+
This module contains concrete implementations of media processors
|
|
5
|
+
for various file formats including images, documents, and text files.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .image_processor import ImageProcessor
|
|
9
|
+
from .text_processor import TextProcessor
|
|
10
|
+
from .pdf_processor import PDFProcessor
|
|
11
|
+
from .office_processor import OfficeProcessor
|
|
12
|
+
|
|
13
|
+
__all__ = ['ImageProcessor', 'TextProcessor', 'PDFProcessor', 'OfficeProcessor']
|