abstractcore 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/app_config_utils.py +19 -0
- abstractcore/apps/summarizer.py +85 -56
- abstractcore/architectures/detection.py +15 -4
- abstractcore/assets/architecture_formats.json +1 -1
- abstractcore/assets/model_capabilities.json +420 -11
- abstractcore/core/interface.py +2 -0
- abstractcore/core/session.py +4 -0
- abstractcore/embeddings/manager.py +54 -16
- abstractcore/media/__init__.py +116 -148
- abstractcore/media/auto_handler.py +363 -0
- abstractcore/media/base.py +456 -0
- abstractcore/media/capabilities.py +335 -0
- abstractcore/media/types.py +300 -0
- abstractcore/media/vision_fallback.py +260 -0
- abstractcore/providers/anthropic_provider.py +18 -1
- abstractcore/providers/base.py +187 -0
- abstractcore/providers/huggingface_provider.py +111 -12
- abstractcore/providers/lmstudio_provider.py +88 -5
- abstractcore/providers/mlx_provider.py +33 -1
- abstractcore/providers/ollama_provider.py +37 -3
- abstractcore/providers/openai_provider.py +18 -1
- abstractcore/server/app.py +1390 -104
- abstractcore/tools/common_tools.py +12 -8
- abstractcore/utils/__init__.py +9 -5
- abstractcore/utils/cli.py +199 -17
- abstractcore/utils/message_preprocessor.py +182 -0
- abstractcore/utils/structured_logging.py +117 -16
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/METADATA +214 -20
- {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/RECORD +34 -27
- {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/entry_points.txt +1 -0
- {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/WHEEL +0 -0
- {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base media handler following AbstractCore patterns.
|
|
3
|
+
|
|
4
|
+
This module defines the base class for all media processing operations,
|
|
5
|
+
providing a unified interface for handling different file types across providers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Dict, Any, Optional, Union, Type
|
|
12
|
+
|
|
13
|
+
from .types import (
|
|
14
|
+
MediaContent, MediaType, ContentFormat, MediaCapabilities,
|
|
15
|
+
MediaProcessingResult, detect_media_type, FILE_TYPE_MAPPINGS
|
|
16
|
+
)
|
|
17
|
+
from ..utils.structured_logging import get_logger
|
|
18
|
+
from ..events import EventType, emit_global
|
|
19
|
+
from ..exceptions import AbstractCoreError
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MediaProcessingError(AbstractCoreError):
|
|
23
|
+
"""Exception raised when media processing fails."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class UnsupportedMediaTypeError(MediaProcessingError):
|
|
28
|
+
"""Exception raised when a media type is not supported."""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FileSizeExceededError(MediaProcessingError):
|
|
33
|
+
"""Exception raised when file size exceeds limits."""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BaseMediaHandler(ABC):
|
|
38
|
+
"""
|
|
39
|
+
Base class for media handling operations.
|
|
40
|
+
|
|
41
|
+
This class provides the fundamental interface and shared functionality
|
|
42
|
+
for all media processors, following AbstractCore's architecture patterns.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, **kwargs):
|
|
46
|
+
"""
|
|
47
|
+
Initialize the base media handler.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
**kwargs: Configuration parameters
|
|
51
|
+
"""
|
|
52
|
+
# Setup structured logging
|
|
53
|
+
self.logger = get_logger(self.__class__.__name__)
|
|
54
|
+
|
|
55
|
+
# Configuration
|
|
56
|
+
self.max_file_size = kwargs.get('max_file_size', 50 * 1024 * 1024) # 50MB default
|
|
57
|
+
self.supported_formats = kwargs.get('supported_formats', [])
|
|
58
|
+
self.temp_dir = kwargs.get('temp_dir', None)
|
|
59
|
+
self.enable_events = kwargs.get('enable_events', True)
|
|
60
|
+
|
|
61
|
+
# Capabilities (to be set by subclasses)
|
|
62
|
+
self.capabilities = MediaCapabilities()
|
|
63
|
+
|
|
64
|
+
self.logger.debug(f"Initialized {self.__class__.__name__} with max_file_size={self.max_file_size}")
|
|
65
|
+
|
|
66
|
+
def process_file(self, file_path: Union[str, Path], **kwargs) -> MediaProcessingResult:
|
|
67
|
+
"""
|
|
68
|
+
Process a file and return media content.
|
|
69
|
+
|
|
70
|
+
This is the main entry point for media processing, providing telemetry
|
|
71
|
+
and error handling around the actual processing implementation.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
file_path: Path to the file to process
|
|
75
|
+
**kwargs: Additional processing parameters
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
MediaProcessingResult with success/failure information
|
|
79
|
+
"""
|
|
80
|
+
start_time = time.time()
|
|
81
|
+
file_path = Path(file_path)
|
|
82
|
+
|
|
83
|
+
# Emit processing started event
|
|
84
|
+
if self.enable_events:
|
|
85
|
+
self._emit_processing_event(
|
|
86
|
+
EventType.GENERATION_STARTED, # Reuse generation events for media
|
|
87
|
+
file_path=str(file_path),
|
|
88
|
+
media_type=detect_media_type(file_path).value,
|
|
89
|
+
processor=self.__class__.__name__
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
# Validate file exists
|
|
94
|
+
if not file_path.exists():
|
|
95
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
96
|
+
|
|
97
|
+
# Validate file size
|
|
98
|
+
file_size = file_path.stat().st_size
|
|
99
|
+
if file_size > self.max_file_size:
|
|
100
|
+
raise FileSizeExceededError(
|
|
101
|
+
f"File size {file_size} exceeds maximum {self.max_file_size} bytes"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Detect media type
|
|
105
|
+
media_type = detect_media_type(file_path)
|
|
106
|
+
|
|
107
|
+
# Check if media type is supported
|
|
108
|
+
if not self.supports_media_type(media_type):
|
|
109
|
+
raise UnsupportedMediaTypeError(
|
|
110
|
+
f"Media type {media_type.value} not supported by {self.__class__.__name__}"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Check format support
|
|
114
|
+
format_ext = file_path.suffix.lower().lstrip('.')
|
|
115
|
+
if not self.supports_format(media_type, format_ext):
|
|
116
|
+
raise UnsupportedMediaTypeError(
|
|
117
|
+
f"Format .{format_ext} not supported for {media_type.value}"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Call the actual processing implementation
|
|
121
|
+
media_content = self._process_internal(file_path, media_type, **kwargs)
|
|
122
|
+
|
|
123
|
+
# Create successful result
|
|
124
|
+
processing_time = time.time() - start_time
|
|
125
|
+
result = MediaProcessingResult(
|
|
126
|
+
success=True,
|
|
127
|
+
media_content=media_content,
|
|
128
|
+
processing_time=processing_time,
|
|
129
|
+
metadata={
|
|
130
|
+
'file_size': file_size,
|
|
131
|
+
'file_name': file_path.name,
|
|
132
|
+
'processor': self.__class__.__name__
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Track successful processing
|
|
137
|
+
self._track_processing(file_path, result, start_time, success=True)
|
|
138
|
+
|
|
139
|
+
return result
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
# Create error result
|
|
143
|
+
processing_time = time.time() - start_time
|
|
144
|
+
result = MediaProcessingResult(
|
|
145
|
+
success=False,
|
|
146
|
+
error_message=str(e),
|
|
147
|
+
processing_time=processing_time,
|
|
148
|
+
metadata={
|
|
149
|
+
'processor': self.__class__.__name__,
|
|
150
|
+
'error_type': type(e).__name__
|
|
151
|
+
}
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Track failed processing
|
|
155
|
+
self._track_processing(file_path, result, start_time, success=False, error=e)
|
|
156
|
+
|
|
157
|
+
return result
|
|
158
|
+
|
|
159
|
+
def process_multiple_files(self, file_paths: List[Union[str, Path]], **kwargs) -> List[MediaProcessingResult]:
|
|
160
|
+
"""
|
|
161
|
+
Process multiple files efficiently.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
file_paths: List of file paths to process
|
|
165
|
+
**kwargs: Additional processing parameters
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
List of MediaProcessingResult objects
|
|
169
|
+
"""
|
|
170
|
+
results = []
|
|
171
|
+
|
|
172
|
+
for file_path in file_paths:
|
|
173
|
+
try:
|
|
174
|
+
result = self.process_file(file_path, **kwargs)
|
|
175
|
+
results.append(result)
|
|
176
|
+
except Exception as e:
|
|
177
|
+
# Create error result for failed file
|
|
178
|
+
error_result = MediaProcessingResult(
|
|
179
|
+
success=False,
|
|
180
|
+
error_message=str(e),
|
|
181
|
+
metadata={
|
|
182
|
+
'file_path': str(file_path),
|
|
183
|
+
'processor': self.__class__.__name__,
|
|
184
|
+
'error_type': type(e).__name__
|
|
185
|
+
}
|
|
186
|
+
)
|
|
187
|
+
results.append(error_result)
|
|
188
|
+
|
|
189
|
+
return results
|
|
190
|
+
|
|
191
|
+
def supports_media_type(self, media_type: MediaType) -> bool:
|
|
192
|
+
"""
|
|
193
|
+
Check if this handler supports the given media type.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
media_type: MediaType to check
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
True if supported, False otherwise
|
|
200
|
+
"""
|
|
201
|
+
return self.capabilities.supports_media_type(media_type)
|
|
202
|
+
|
|
203
|
+
def supports_format(self, media_type: MediaType, format_ext: str) -> bool:
|
|
204
|
+
"""
|
|
205
|
+
Check if this handler supports the specific format.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
media_type: MediaType of the content
|
|
209
|
+
format_ext: File extension (without dot)
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
True if supported, False otherwise
|
|
213
|
+
"""
|
|
214
|
+
return self.capabilities.supports_format(media_type, format_ext)
|
|
215
|
+
|
|
216
|
+
def get_capabilities(self) -> MediaCapabilities:
|
|
217
|
+
"""
|
|
218
|
+
Get the capabilities of this media handler.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
MediaCapabilities object
|
|
222
|
+
"""
|
|
223
|
+
return self.capabilities
|
|
224
|
+
|
|
225
|
+
def get_supported_formats(self) -> Dict[str, List[str]]:
|
|
226
|
+
"""
|
|
227
|
+
Get supported formats organized by media type.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Dictionary mapping media type to list of supported extensions
|
|
231
|
+
"""
|
|
232
|
+
result = {}
|
|
233
|
+
for media_type in MediaType:
|
|
234
|
+
if self.supports_media_type(media_type):
|
|
235
|
+
formats = []
|
|
236
|
+
for ext, mt in FILE_TYPE_MAPPINGS.items():
|
|
237
|
+
if mt == media_type and self.supports_format(media_type, ext):
|
|
238
|
+
formats.append(ext)
|
|
239
|
+
if formats:
|
|
240
|
+
result[media_type.value] = formats
|
|
241
|
+
return result
|
|
242
|
+
|
|
243
|
+
@abstractmethod
|
|
244
|
+
def _process_internal(self, file_path: Path, media_type: MediaType, **kwargs) -> MediaContent:
|
|
245
|
+
"""
|
|
246
|
+
Internal processing method to be implemented by subclasses.
|
|
247
|
+
|
|
248
|
+
This method contains the actual processing logic for the specific
|
|
249
|
+
media type or file format.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
file_path: Path to the file to process
|
|
253
|
+
media_type: Detected media type
|
|
254
|
+
**kwargs: Additional processing parameters
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
MediaContent object with processed content
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
MediaProcessingError: If processing fails
|
|
261
|
+
"""
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
def _track_processing(self, file_path: Path, result: MediaProcessingResult,
|
|
265
|
+
start_time: float, success: bool = True,
|
|
266
|
+
error: Optional[Exception] = None):
|
|
267
|
+
"""
|
|
268
|
+
Track media processing with telemetry and events.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
file_path: Path to the processed file
|
|
272
|
+
result: Processing result
|
|
273
|
+
start_time: Processing start time
|
|
274
|
+
success: Whether processing succeeded
|
|
275
|
+
error: Error if failed
|
|
276
|
+
"""
|
|
277
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
278
|
+
|
|
279
|
+
# Emit processing completed event
|
|
280
|
+
if self.enable_events:
|
|
281
|
+
event_data = {
|
|
282
|
+
"file_path": str(file_path),
|
|
283
|
+
"file_name": file_path.name,
|
|
284
|
+
"file_size": file_path.stat().st_size if file_path.exists() else 0,
|
|
285
|
+
"media_type": detect_media_type(file_path).value,
|
|
286
|
+
"processor": self.__class__.__name__,
|
|
287
|
+
"success": success,
|
|
288
|
+
"duration_ms": duration_ms,
|
|
289
|
+
"error": str(error) if error else None
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if result.media_content:
|
|
293
|
+
event_data.update({
|
|
294
|
+
"content_format": result.media_content.content_format.value,
|
|
295
|
+
"mime_type": result.media_content.mime_type,
|
|
296
|
+
"content_size": len(str(result.media_content.content))
|
|
297
|
+
})
|
|
298
|
+
|
|
299
|
+
self._emit_processing_event(
|
|
300
|
+
EventType.GENERATION_COMPLETED,
|
|
301
|
+
**event_data
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Log processing result
|
|
305
|
+
if error:
|
|
306
|
+
self.logger.error(
|
|
307
|
+
f"Media processing failed for {file_path.name}: {error} "
|
|
308
|
+
f"(duration: {duration_ms:.2f}ms)"
|
|
309
|
+
)
|
|
310
|
+
else:
|
|
311
|
+
content_info = ""
|
|
312
|
+
if result.media_content:
|
|
313
|
+
content_size = len(str(result.media_content.content))
|
|
314
|
+
content_info = f" (content size: {content_size} chars)"
|
|
315
|
+
|
|
316
|
+
self.logger.info(
|
|
317
|
+
f"Media processing completed for {file_path.name}: "
|
|
318
|
+
f"{duration_ms:.2f}ms{content_info}"
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
def _emit_processing_event(self, event_type: EventType, **event_data):
|
|
322
|
+
"""
|
|
323
|
+
Emit a media processing event.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
event_type: Type of event to emit
|
|
327
|
+
**event_data: Event data
|
|
328
|
+
"""
|
|
329
|
+
if self.enable_events:
|
|
330
|
+
emit_global(event_type, event_data, source=self.__class__.__name__)
|
|
331
|
+
|
|
332
|
+
def _validate_content_size(self, content: Union[str, bytes], max_size: Optional[int] = None) -> None:
|
|
333
|
+
"""
|
|
334
|
+
Validate that content size doesn't exceed limits.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
content: Content to validate
|
|
338
|
+
max_size: Maximum allowed size in bytes
|
|
339
|
+
|
|
340
|
+
Raises:
|
|
341
|
+
FileSizeExceededError: If content exceeds size limit
|
|
342
|
+
"""
|
|
343
|
+
if max_size is None:
|
|
344
|
+
max_size = self.max_file_size
|
|
345
|
+
|
|
346
|
+
content_size = len(content.encode('utf-8') if isinstance(content, str) else content)
|
|
347
|
+
if content_size > max_size:
|
|
348
|
+
raise FileSizeExceededError(
|
|
349
|
+
f"Processed content size {content_size} exceeds maximum {max_size} bytes"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
def _create_media_content(self, content: Union[str, bytes], file_path: Path,
|
|
353
|
+
media_type: MediaType, content_format: ContentFormat,
|
|
354
|
+
mime_type: str = "auto", **metadata) -> MediaContent:
|
|
355
|
+
"""
|
|
356
|
+
Create a MediaContent object with consistent metadata.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
content: Processed content
|
|
360
|
+
file_path: Original file path
|
|
361
|
+
media_type: Type of media content
|
|
362
|
+
content_format: Format of the content
|
|
363
|
+
mime_type: MIME type of the content
|
|
364
|
+
**metadata: Additional metadata
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
MediaContent object
|
|
368
|
+
"""
|
|
369
|
+
# Validate content size
|
|
370
|
+
self._validate_content_size(content)
|
|
371
|
+
|
|
372
|
+
# Create base metadata
|
|
373
|
+
base_metadata = {
|
|
374
|
+
'file_size': file_path.stat().st_size,
|
|
375
|
+
'file_name': file_path.name,
|
|
376
|
+
'file_extension': file_path.suffix,
|
|
377
|
+
'processor': self.__class__.__name__,
|
|
378
|
+
'processing_timestamp': time.time()
|
|
379
|
+
}
|
|
380
|
+
base_metadata.update(metadata)
|
|
381
|
+
|
|
382
|
+
return MediaContent(
|
|
383
|
+
media_type=media_type,
|
|
384
|
+
content=content,
|
|
385
|
+
content_format=content_format,
|
|
386
|
+
mime_type=mime_type,
|
|
387
|
+
file_path=str(file_path),
|
|
388
|
+
metadata=base_metadata
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class BaseProviderMediaHandler(BaseMediaHandler):
|
|
393
|
+
"""
|
|
394
|
+
Base class for provider-specific media handlers.
|
|
395
|
+
|
|
396
|
+
This class extends BaseMediaHandler to provide provider-specific
|
|
397
|
+
media formatting capabilities.
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
def __init__(self, provider_name: str, model_capabilities: Optional[Dict[str, Any]] = None, **kwargs):
|
|
401
|
+
"""
|
|
402
|
+
Initialize provider media handler.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
provider_name: Name of the provider (e.g., "openai", "anthropic")
|
|
406
|
+
model_capabilities: Model capabilities from model_capabilities.json
|
|
407
|
+
**kwargs: Additional configuration
|
|
408
|
+
"""
|
|
409
|
+
super().__init__(**kwargs)
|
|
410
|
+
|
|
411
|
+
self.provider_name = provider_name
|
|
412
|
+
self.model_capabilities = model_capabilities or {}
|
|
413
|
+
|
|
414
|
+
# Set capabilities based on model capabilities
|
|
415
|
+
self._initialize_capabilities_from_model()
|
|
416
|
+
|
|
417
|
+
self.logger.debug(f"Initialized provider media handler for {provider_name}")
|
|
418
|
+
|
|
419
|
+
def _initialize_capabilities_from_model(self):
|
|
420
|
+
"""Initialize capabilities based on model capabilities."""
|
|
421
|
+
if self.model_capabilities:
|
|
422
|
+
self.capabilities = MediaCapabilities(
|
|
423
|
+
vision_support=self.model_capabilities.get('vision_support', False),
|
|
424
|
+
audio_support=self.model_capabilities.get('audio_support', False),
|
|
425
|
+
video_support=self.model_capabilities.get('video_support', False),
|
|
426
|
+
document_support=True, # Assume document support for all providers
|
|
427
|
+
max_image_resolution=self.model_capabilities.get('image_resolutions', [None])[0],
|
|
428
|
+
supported_image_formats=['jpg', 'png', 'gif', 'webp'] if self.model_capabilities.get('vision_support') else [],
|
|
429
|
+
supported_document_formats=['pdf', 'txt', 'md', 'csv', 'tsv'],
|
|
430
|
+
max_file_size=self.max_file_size
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
@abstractmethod
|
|
434
|
+
def format_for_provider(self, media_content: MediaContent) -> Dict[str, Any]:
|
|
435
|
+
"""
|
|
436
|
+
Format media content for the specific provider's API.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
media_content: MediaContent to format
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Dictionary formatted for provider's API
|
|
443
|
+
"""
|
|
444
|
+
pass
|
|
445
|
+
|
|
446
|
+
def can_handle_media(self, media_content: MediaContent) -> bool:
|
|
447
|
+
"""
|
|
448
|
+
Check if this provider can handle the given media content.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
media_content: MediaContent to check
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
True if provider can handle this content
|
|
455
|
+
"""
|
|
456
|
+
return self.supports_media_type(media_content.media_type)
|