abstractcore 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. abstractcore/apps/app_config_utils.py +19 -0
  2. abstractcore/apps/summarizer.py +85 -56
  3. abstractcore/architectures/detection.py +15 -4
  4. abstractcore/assets/architecture_formats.json +1 -1
  5. abstractcore/assets/model_capabilities.json +420 -11
  6. abstractcore/core/interface.py +2 -0
  7. abstractcore/core/session.py +4 -0
  8. abstractcore/embeddings/manager.py +54 -16
  9. abstractcore/media/__init__.py +116 -148
  10. abstractcore/media/auto_handler.py +363 -0
  11. abstractcore/media/base.py +456 -0
  12. abstractcore/media/capabilities.py +335 -0
  13. abstractcore/media/types.py +300 -0
  14. abstractcore/media/vision_fallback.py +260 -0
  15. abstractcore/providers/anthropic_provider.py +18 -1
  16. abstractcore/providers/base.py +187 -0
  17. abstractcore/providers/huggingface_provider.py +111 -12
  18. abstractcore/providers/lmstudio_provider.py +88 -5
  19. abstractcore/providers/mlx_provider.py +33 -1
  20. abstractcore/providers/ollama_provider.py +37 -3
  21. abstractcore/providers/openai_provider.py +18 -1
  22. abstractcore/server/app.py +1390 -104
  23. abstractcore/tools/common_tools.py +12 -8
  24. abstractcore/utils/__init__.py +9 -5
  25. abstractcore/utils/cli.py +199 -17
  26. abstractcore/utils/message_preprocessor.py +182 -0
  27. abstractcore/utils/structured_logging.py +117 -16
  28. abstractcore/utils/version.py +1 -1
  29. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/METADATA +214 -20
  30. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/RECORD +34 -27
  31. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/entry_points.txt +1 -0
  32. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/WHEEL +0 -0
  33. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/licenses/LICENSE +0 -0
  34. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,456 @@
1
+ """
2
+ Base media handler following AbstractCore patterns.
3
+
4
+ This module defines the base class for all media processing operations,
5
+ providing a unified interface for handling different file types across providers.
6
+ """
7
+
8
+ import time
9
+ from abc import ABC, abstractmethod
10
+ from pathlib import Path
11
+ from typing import List, Dict, Any, Optional, Union, Type
12
+
13
+ from .types import (
14
+ MediaContent, MediaType, ContentFormat, MediaCapabilities,
15
+ MediaProcessingResult, detect_media_type, FILE_TYPE_MAPPINGS
16
+ )
17
+ from ..utils.structured_logging import get_logger
18
+ from ..events import EventType, emit_global
19
+ from ..exceptions import AbstractCoreError
20
+
21
+
22
+ class MediaProcessingError(AbstractCoreError):
23
+ """Exception raised when media processing fails."""
24
+ pass
25
+
26
+
27
+ class UnsupportedMediaTypeError(MediaProcessingError):
28
+ """Exception raised when a media type is not supported."""
29
+ pass
30
+
31
+
32
+ class FileSizeExceededError(MediaProcessingError):
33
+ """Exception raised when file size exceeds limits."""
34
+ pass
35
+
36
+
37
+ class BaseMediaHandler(ABC):
38
+ """
39
+ Base class for media handling operations.
40
+
41
+ This class provides the fundamental interface and shared functionality
42
+ for all media processors, following AbstractCore's architecture patterns.
43
+ """
44
+
45
+ def __init__(self, **kwargs):
46
+ """
47
+ Initialize the base media handler.
48
+
49
+ Args:
50
+ **kwargs: Configuration parameters
51
+ """
52
+ # Setup structured logging
53
+ self.logger = get_logger(self.__class__.__name__)
54
+
55
+ # Configuration
56
+ self.max_file_size = kwargs.get('max_file_size', 50 * 1024 * 1024) # 50MB default
57
+ self.supported_formats = kwargs.get('supported_formats', [])
58
+ self.temp_dir = kwargs.get('temp_dir', None)
59
+ self.enable_events = kwargs.get('enable_events', True)
60
+
61
+ # Capabilities (to be set by subclasses)
62
+ self.capabilities = MediaCapabilities()
63
+
64
+ self.logger.debug(f"Initialized {self.__class__.__name__} with max_file_size={self.max_file_size}")
65
+
66
+ def process_file(self, file_path: Union[str, Path], **kwargs) -> MediaProcessingResult:
67
+ """
68
+ Process a file and return media content.
69
+
70
+ This is the main entry point for media processing, providing telemetry
71
+ and error handling around the actual processing implementation.
72
+
73
+ Args:
74
+ file_path: Path to the file to process
75
+ **kwargs: Additional processing parameters
76
+
77
+ Returns:
78
+ MediaProcessingResult with success/failure information
79
+ """
80
+ start_time = time.time()
81
+ file_path = Path(file_path)
82
+
83
+ # Emit processing started event
84
+ if self.enable_events:
85
+ self._emit_processing_event(
86
+ EventType.GENERATION_STARTED, # Reuse generation events for media
87
+ file_path=str(file_path),
88
+ media_type=detect_media_type(file_path).value,
89
+ processor=self.__class__.__name__
90
+ )
91
+
92
+ try:
93
+ # Validate file exists
94
+ if not file_path.exists():
95
+ raise FileNotFoundError(f"File not found: {file_path}")
96
+
97
+ # Validate file size
98
+ file_size = file_path.stat().st_size
99
+ if file_size > self.max_file_size:
100
+ raise FileSizeExceededError(
101
+ f"File size {file_size} exceeds maximum {self.max_file_size} bytes"
102
+ )
103
+
104
+ # Detect media type
105
+ media_type = detect_media_type(file_path)
106
+
107
+ # Check if media type is supported
108
+ if not self.supports_media_type(media_type):
109
+ raise UnsupportedMediaTypeError(
110
+ f"Media type {media_type.value} not supported by {self.__class__.__name__}"
111
+ )
112
+
113
+ # Check format support
114
+ format_ext = file_path.suffix.lower().lstrip('.')
115
+ if not self.supports_format(media_type, format_ext):
116
+ raise UnsupportedMediaTypeError(
117
+ f"Format .{format_ext} not supported for {media_type.value}"
118
+ )
119
+
120
+ # Call the actual processing implementation
121
+ media_content = self._process_internal(file_path, media_type, **kwargs)
122
+
123
+ # Create successful result
124
+ processing_time = time.time() - start_time
125
+ result = MediaProcessingResult(
126
+ success=True,
127
+ media_content=media_content,
128
+ processing_time=processing_time,
129
+ metadata={
130
+ 'file_size': file_size,
131
+ 'file_name': file_path.name,
132
+ 'processor': self.__class__.__name__
133
+ }
134
+ )
135
+
136
+ # Track successful processing
137
+ self._track_processing(file_path, result, start_time, success=True)
138
+
139
+ return result
140
+
141
+ except Exception as e:
142
+ # Create error result
143
+ processing_time = time.time() - start_time
144
+ result = MediaProcessingResult(
145
+ success=False,
146
+ error_message=str(e),
147
+ processing_time=processing_time,
148
+ metadata={
149
+ 'processor': self.__class__.__name__,
150
+ 'error_type': type(e).__name__
151
+ }
152
+ )
153
+
154
+ # Track failed processing
155
+ self._track_processing(file_path, result, start_time, success=False, error=e)
156
+
157
+ return result
158
+
159
+ def process_multiple_files(self, file_paths: List[Union[str, Path]], **kwargs) -> List[MediaProcessingResult]:
160
+ """
161
+ Process multiple files efficiently.
162
+
163
+ Args:
164
+ file_paths: List of file paths to process
165
+ **kwargs: Additional processing parameters
166
+
167
+ Returns:
168
+ List of MediaProcessingResult objects
169
+ """
170
+ results = []
171
+
172
+ for file_path in file_paths:
173
+ try:
174
+ result = self.process_file(file_path, **kwargs)
175
+ results.append(result)
176
+ except Exception as e:
177
+ # Create error result for failed file
178
+ error_result = MediaProcessingResult(
179
+ success=False,
180
+ error_message=str(e),
181
+ metadata={
182
+ 'file_path': str(file_path),
183
+ 'processor': self.__class__.__name__,
184
+ 'error_type': type(e).__name__
185
+ }
186
+ )
187
+ results.append(error_result)
188
+
189
+ return results
190
+
191
+ def supports_media_type(self, media_type: MediaType) -> bool:
192
+ """
193
+ Check if this handler supports the given media type.
194
+
195
+ Args:
196
+ media_type: MediaType to check
197
+
198
+ Returns:
199
+ True if supported, False otherwise
200
+ """
201
+ return self.capabilities.supports_media_type(media_type)
202
+
203
+ def supports_format(self, media_type: MediaType, format_ext: str) -> bool:
204
+ """
205
+ Check if this handler supports the specific format.
206
+
207
+ Args:
208
+ media_type: MediaType of the content
209
+ format_ext: File extension (without dot)
210
+
211
+ Returns:
212
+ True if supported, False otherwise
213
+ """
214
+ return self.capabilities.supports_format(media_type, format_ext)
215
+
216
+ def get_capabilities(self) -> MediaCapabilities:
217
+ """
218
+ Get the capabilities of this media handler.
219
+
220
+ Returns:
221
+ MediaCapabilities object
222
+ """
223
+ return self.capabilities
224
+
225
+ def get_supported_formats(self) -> Dict[str, List[str]]:
226
+ """
227
+ Get supported formats organized by media type.
228
+
229
+ Returns:
230
+ Dictionary mapping media type to list of supported extensions
231
+ """
232
+ result = {}
233
+ for media_type in MediaType:
234
+ if self.supports_media_type(media_type):
235
+ formats = []
236
+ for ext, mt in FILE_TYPE_MAPPINGS.items():
237
+ if mt == media_type and self.supports_format(media_type, ext):
238
+ formats.append(ext)
239
+ if formats:
240
+ result[media_type.value] = formats
241
+ return result
242
+
243
+ @abstractmethod
244
+ def _process_internal(self, file_path: Path, media_type: MediaType, **kwargs) -> MediaContent:
245
+ """
246
+ Internal processing method to be implemented by subclasses.
247
+
248
+ This method contains the actual processing logic for the specific
249
+ media type or file format.
250
+
251
+ Args:
252
+ file_path: Path to the file to process
253
+ media_type: Detected media type
254
+ **kwargs: Additional processing parameters
255
+
256
+ Returns:
257
+ MediaContent object with processed content
258
+
259
+ Raises:
260
+ MediaProcessingError: If processing fails
261
+ """
262
+ pass
263
+
264
+ def _track_processing(self, file_path: Path, result: MediaProcessingResult,
265
+ start_time: float, success: bool = True,
266
+ error: Optional[Exception] = None):
267
+ """
268
+ Track media processing with telemetry and events.
269
+
270
+ Args:
271
+ file_path: Path to the processed file
272
+ result: Processing result
273
+ start_time: Processing start time
274
+ success: Whether processing succeeded
275
+ error: Error if failed
276
+ """
277
+ duration_ms = (time.time() - start_time) * 1000
278
+
279
+ # Emit processing completed event
280
+ if self.enable_events:
281
+ event_data = {
282
+ "file_path": str(file_path),
283
+ "file_name": file_path.name,
284
+ "file_size": file_path.stat().st_size if file_path.exists() else 0,
285
+ "media_type": detect_media_type(file_path).value,
286
+ "processor": self.__class__.__name__,
287
+ "success": success,
288
+ "duration_ms": duration_ms,
289
+ "error": str(error) if error else None
290
+ }
291
+
292
+ if result.media_content:
293
+ event_data.update({
294
+ "content_format": result.media_content.content_format.value,
295
+ "mime_type": result.media_content.mime_type,
296
+ "content_size": len(str(result.media_content.content))
297
+ })
298
+
299
+ self._emit_processing_event(
300
+ EventType.GENERATION_COMPLETED,
301
+ **event_data
302
+ )
303
+
304
+ # Log processing result
305
+ if error:
306
+ self.logger.error(
307
+ f"Media processing failed for {file_path.name}: {error} "
308
+ f"(duration: {duration_ms:.2f}ms)"
309
+ )
310
+ else:
311
+ content_info = ""
312
+ if result.media_content:
313
+ content_size = len(str(result.media_content.content))
314
+ content_info = f" (content size: {content_size} chars)"
315
+
316
+ self.logger.info(
317
+ f"Media processing completed for {file_path.name}: "
318
+ f"{duration_ms:.2f}ms{content_info}"
319
+ )
320
+
321
+ def _emit_processing_event(self, event_type: EventType, **event_data):
322
+ """
323
+ Emit a media processing event.
324
+
325
+ Args:
326
+ event_type: Type of event to emit
327
+ **event_data: Event data
328
+ """
329
+ if self.enable_events:
330
+ emit_global(event_type, event_data, source=self.__class__.__name__)
331
+
332
+ def _validate_content_size(self, content: Union[str, bytes], max_size: Optional[int] = None) -> None:
333
+ """
334
+ Validate that content size doesn't exceed limits.
335
+
336
+ Args:
337
+ content: Content to validate
338
+ max_size: Maximum allowed size in bytes
339
+
340
+ Raises:
341
+ FileSizeExceededError: If content exceeds size limit
342
+ """
343
+ if max_size is None:
344
+ max_size = self.max_file_size
345
+
346
+ content_size = len(content.encode('utf-8') if isinstance(content, str) else content)
347
+ if content_size > max_size:
348
+ raise FileSizeExceededError(
349
+ f"Processed content size {content_size} exceeds maximum {max_size} bytes"
350
+ )
351
+
352
+ def _create_media_content(self, content: Union[str, bytes], file_path: Path,
353
+ media_type: MediaType, content_format: ContentFormat,
354
+ mime_type: str = "auto", **metadata) -> MediaContent:
355
+ """
356
+ Create a MediaContent object with consistent metadata.
357
+
358
+ Args:
359
+ content: Processed content
360
+ file_path: Original file path
361
+ media_type: Type of media content
362
+ content_format: Format of the content
363
+ mime_type: MIME type of the content
364
+ **metadata: Additional metadata
365
+
366
+ Returns:
367
+ MediaContent object
368
+ """
369
+ # Validate content size
370
+ self._validate_content_size(content)
371
+
372
+ # Create base metadata
373
+ base_metadata = {
374
+ 'file_size': file_path.stat().st_size,
375
+ 'file_name': file_path.name,
376
+ 'file_extension': file_path.suffix,
377
+ 'processor': self.__class__.__name__,
378
+ 'processing_timestamp': time.time()
379
+ }
380
+ base_metadata.update(metadata)
381
+
382
+ return MediaContent(
383
+ media_type=media_type,
384
+ content=content,
385
+ content_format=content_format,
386
+ mime_type=mime_type,
387
+ file_path=str(file_path),
388
+ metadata=base_metadata
389
+ )
390
+
391
+
392
+ class BaseProviderMediaHandler(BaseMediaHandler):
393
+ """
394
+ Base class for provider-specific media handlers.
395
+
396
+ This class extends BaseMediaHandler to provide provider-specific
397
+ media formatting capabilities.
398
+ """
399
+
400
+ def __init__(self, provider_name: str, model_capabilities: Optional[Dict[str, Any]] = None, **kwargs):
401
+ """
402
+ Initialize provider media handler.
403
+
404
+ Args:
405
+ provider_name: Name of the provider (e.g., "openai", "anthropic")
406
+ model_capabilities: Model capabilities from model_capabilities.json
407
+ **kwargs: Additional configuration
408
+ """
409
+ super().__init__(**kwargs)
410
+
411
+ self.provider_name = provider_name
412
+ self.model_capabilities = model_capabilities or {}
413
+
414
+ # Set capabilities based on model capabilities
415
+ self._initialize_capabilities_from_model()
416
+
417
+ self.logger.debug(f"Initialized provider media handler for {provider_name}")
418
+
419
+ def _initialize_capabilities_from_model(self):
420
+ """Initialize capabilities based on model capabilities."""
421
+ if self.model_capabilities:
422
+ self.capabilities = MediaCapabilities(
423
+ vision_support=self.model_capabilities.get('vision_support', False),
424
+ audio_support=self.model_capabilities.get('audio_support', False),
425
+ video_support=self.model_capabilities.get('video_support', False),
426
+ document_support=True, # Assume document support for all providers
427
+ max_image_resolution=self.model_capabilities.get('image_resolutions', [None])[0],
428
+ supported_image_formats=['jpg', 'png', 'gif', 'webp'] if self.model_capabilities.get('vision_support') else [],
429
+ supported_document_formats=['pdf', 'txt', 'md', 'csv', 'tsv'],
430
+ max_file_size=self.max_file_size
431
+ )
432
+
433
+ @abstractmethod
434
+ def format_for_provider(self, media_content: MediaContent) -> Dict[str, Any]:
435
+ """
436
+ Format media content for the specific provider's API.
437
+
438
+ Args:
439
+ media_content: MediaContent to format
440
+
441
+ Returns:
442
+ Dictionary formatted for provider's API
443
+ """
444
+ pass
445
+
446
+ def can_handle_media(self, media_content: MediaContent) -> bool:
447
+ """
448
+ Check if this provider can handle the given media content.
449
+
450
+ Args:
451
+ media_content: MediaContent to check
452
+
453
+ Returns:
454
+ True if provider can handle this content
455
+ """
456
+ return self.supports_media_type(media_content.media_type)