abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +19 -1
- abstractcore/architectures/detection.py +252 -6
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +533 -10
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +64 -0
- abstractcore/config/manager.py +100 -5
- abstractcore/core/retry.py +2 -2
- abstractcore/core/session.py +193 -7
- abstractcore/download.py +253 -0
- abstractcore/embeddings/manager.py +2 -2
- abstractcore/events/__init__.py +113 -2
- abstractcore/exceptions/__init__.py +49 -2
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/office_processor.py +2 -2
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/media/utils/image_scaler.py +2 -2
- abstractcore/media/vision_fallback.py +2 -2
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +228 -8
- abstractcore/providers/base.py +378 -11
- abstractcore/providers/huggingface_provider.py +563 -23
- abstractcore/providers/lmstudio_provider.py +284 -4
- abstractcore/providers/mlx_provider.py +27 -2
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +282 -6
- abstractcore/providers/openai_provider.py +286 -8
- abstractcore/providers/registry.py +85 -13
- abstractcore/providers/streaming.py +2 -2
- abstractcore/server/app.py +91 -81
- abstractcore/tools/common_tools.py +2 -2
- abstractcore/tools/handler.py +2 -2
- abstractcore/tools/parser.py +2 -2
- abstractcore/tools/registry.py +2 -2
- abstractcore/tools/syntax_rewriter.py +2 -2
- abstractcore/tools/tag_rewriter.py +3 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/self_fixes.py +2 -2
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
- abstractcore-2.6.0.dist-info/RECORD +108 -0
- abstractcore-2.5.2.dist-info/RECORD +0 -90
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
|
@@ -106,10 +106,55 @@ def format_model_error(provider: str, invalid_model: str, available_models: list
|
|
|
106
106
|
return message.rstrip()
|
|
107
107
|
|
|
108
108
|
|
|
109
|
+
def format_auth_error(provider: str, reason: str = None) -> str:
|
|
110
|
+
"""
|
|
111
|
+
Format actionable authentication error with setup instructions.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
provider: Provider name (e.g., "openai", "anthropic")
|
|
115
|
+
reason: Optional reason for auth failure
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Formatted error message with fix instructions
|
|
119
|
+
"""
|
|
120
|
+
urls = {
|
|
121
|
+
"openai": "https://platform.openai.com/api-keys",
|
|
122
|
+
"anthropic": "https://console.anthropic.com/settings/keys",
|
|
123
|
+
}
|
|
124
|
+
msg = f"{provider.upper()} authentication failed"
|
|
125
|
+
if reason:
|
|
126
|
+
msg += f": {reason}"
|
|
127
|
+
msg += f"\nFix: abstractcore --set-api-key {provider} YOUR_KEY"
|
|
128
|
+
if provider.lower() in urls:
|
|
129
|
+
msg += f"\nGet key: {urls[provider.lower()]}"
|
|
130
|
+
return msg
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def format_provider_error(provider: str, reason: str) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Format actionable provider unavailability error with setup instructions.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
provider: Provider name (e.g., "ollama", "lmstudio")
|
|
139
|
+
reason: Reason for unavailability (e.g., "Connection refused")
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Formatted error message with setup instructions
|
|
143
|
+
"""
|
|
144
|
+
instructions = {
|
|
145
|
+
"ollama": "Install: https://ollama.com/download\nStart: ollama serve",
|
|
146
|
+
"lmstudio": "Install: https://lmstudio.ai/\nEnable API in settings",
|
|
147
|
+
}
|
|
148
|
+
msg = f"Provider '{provider}' unavailable: {reason}"
|
|
149
|
+
if provider.lower() in instructions:
|
|
150
|
+
msg += f"\n{instructions[provider.lower()]}"
|
|
151
|
+
return msg
|
|
152
|
+
|
|
153
|
+
|
|
109
154
|
# Export all exceptions for easy importing
|
|
110
155
|
__all__ = [
|
|
111
156
|
'AbstractCoreError',
|
|
112
|
-
'ProviderError',
|
|
157
|
+
'ProviderError',
|
|
113
158
|
'ProviderAPIError',
|
|
114
159
|
'AuthenticationError',
|
|
115
160
|
'Authentication', # Backward compatibility alias
|
|
@@ -121,5 +166,7 @@ __all__ = [
|
|
|
121
166
|
'SessionError',
|
|
122
167
|
'ConfigurationError',
|
|
123
168
|
'ModelNotFoundError',
|
|
124
|
-
'format_model_error'
|
|
169
|
+
'format_model_error',
|
|
170
|
+
'format_auth_error',
|
|
171
|
+
'format_provider_error'
|
|
125
172
|
]
|
|
@@ -13,6 +13,25 @@ from typing import Dict, Any, Optional, List
|
|
|
13
13
|
from .base import BaseMediaHandler
|
|
14
14
|
from .types import MediaContent, MediaType, ContentFormat, detect_media_type
|
|
15
15
|
from .processors import ImageProcessor, TextProcessor, PDFProcessor, OfficeProcessor
|
|
16
|
+
from ..exceptions import UnsupportedFeatureError
|
|
17
|
+
|
|
18
|
+
# Import Glyph compression support
|
|
19
|
+
try:
|
|
20
|
+
from ..compression.orchestrator import CompressionOrchestrator
|
|
21
|
+
from ..compression.config import GlyphConfig
|
|
22
|
+
GLYPH_AVAILABLE = True
|
|
23
|
+
except ImportError:
|
|
24
|
+
CompressionOrchestrator = None
|
|
25
|
+
GlyphConfig = None
|
|
26
|
+
GLYPH_AVAILABLE = False
|
|
27
|
+
|
|
28
|
+
# Import vision detection
|
|
29
|
+
try:
|
|
30
|
+
from ..architectures.detection import supports_vision
|
|
31
|
+
VISION_DETECTION_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
supports_vision = None
|
|
34
|
+
VISION_DETECTION_AVAILABLE = False
|
|
16
35
|
|
|
17
36
|
|
|
18
37
|
class AutoMediaHandler(BaseMediaHandler):
|
|
@@ -41,6 +60,11 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
41
60
|
self._text_processor = None
|
|
42
61
|
self._pdf_processor = None
|
|
43
62
|
self._office_processor = None
|
|
63
|
+
|
|
64
|
+
# Initialize Glyph compression support
|
|
65
|
+
self._compression_orchestrator = None
|
|
66
|
+
self.glyph_config = kwargs.get('glyph_config')
|
|
67
|
+
self.enable_compression = kwargs.get('enable_glyph_compression', GLYPH_AVAILABLE)
|
|
44
68
|
|
|
45
69
|
# Track which processors are available
|
|
46
70
|
self._available_processors = self._check_processor_availability()
|
|
@@ -74,6 +98,20 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
74
98
|
availability['office'] = True
|
|
75
99
|
except ImportError:
|
|
76
100
|
availability['office'] = False
|
|
101
|
+
|
|
102
|
+
# GlyphProcessor (requires reportlab and pdf2image)
|
|
103
|
+
glyph_deps_available = True
|
|
104
|
+
if GLYPH_AVAILABLE and self.enable_compression:
|
|
105
|
+
# Check actual dependencies
|
|
106
|
+
try:
|
|
107
|
+
import reportlab
|
|
108
|
+
import pdf2image
|
|
109
|
+
except ImportError:
|
|
110
|
+
glyph_deps_available = False
|
|
111
|
+
else:
|
|
112
|
+
glyph_deps_available = False
|
|
113
|
+
|
|
114
|
+
availability['glyph'] = glyph_deps_available
|
|
77
115
|
|
|
78
116
|
return availability
|
|
79
117
|
|
|
@@ -100,6 +138,13 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
100
138
|
if self._office_processor is None:
|
|
101
139
|
self._office_processor = OfficeProcessor(**self.processor_config)
|
|
102
140
|
return self._office_processor
|
|
141
|
+
|
|
142
|
+
def _get_compression_orchestrator(self) -> 'CompressionOrchestrator':
|
|
143
|
+
"""Get or create CompressionOrchestrator instance."""
|
|
144
|
+
if self._compression_orchestrator is None and GLYPH_AVAILABLE:
|
|
145
|
+
config = self.glyph_config or GlyphConfig.from_abstractcore_config()
|
|
146
|
+
self._compression_orchestrator = CompressionOrchestrator(config)
|
|
147
|
+
return self._compression_orchestrator
|
|
103
148
|
|
|
104
149
|
def _select_processor(self, file_path: Path, media_type: MediaType) -> Optional[BaseMediaHandler]:
|
|
105
150
|
"""
|
|
@@ -167,6 +212,20 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
167
212
|
Returns:
|
|
168
213
|
MediaContent object with processed content
|
|
169
214
|
"""
|
|
215
|
+
# Check if Glyph compression should be applied
|
|
216
|
+
provider = kwargs.get('provider')
|
|
217
|
+
model = kwargs.get('model')
|
|
218
|
+
glyph_compression = kwargs.get('glyph_compression', 'auto')
|
|
219
|
+
|
|
220
|
+
if self._should_apply_compression(file_path, media_type, provider, model, glyph_compression):
|
|
221
|
+
try:
|
|
222
|
+
# Remove provider and model from kwargs to avoid duplicate arguments
|
|
223
|
+
compression_kwargs = {k: v for k, v in kwargs.items() if k not in ['provider', 'model']}
|
|
224
|
+
return self._apply_compression(file_path, provider, model, **compression_kwargs)
|
|
225
|
+
except Exception as e:
|
|
226
|
+
self.logger.warning(f"Glyph compression failed, falling back to standard processing: {e}")
|
|
227
|
+
# Continue with standard processing
|
|
228
|
+
|
|
170
229
|
# Select the appropriate processor
|
|
171
230
|
processor = self._select_processor(file_path, media_type)
|
|
172
231
|
|
|
@@ -218,6 +277,221 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
218
277
|
fallback_processing=True,
|
|
219
278
|
available_processors=list(self._available_processors.keys())
|
|
220
279
|
)
|
|
280
|
+
|
|
281
|
+
def _should_apply_compression(self, file_path: Path, media_type: MediaType,
|
|
282
|
+
provider: str, model: str, glyph_compression: str) -> bool:
|
|
283
|
+
"""
|
|
284
|
+
Check if Glyph compression should be applied.
|
|
285
|
+
|
|
286
|
+
⚠️ EXPERIMENTAL FEATURE: Glyph compression requires vision-capable models.
|
|
287
|
+
|
|
288
|
+
Raises:
|
|
289
|
+
UnsupportedFeatureError: When glyph_compression="always" but model lacks vision support
|
|
290
|
+
"""
|
|
291
|
+
# Check if Glyph is available
|
|
292
|
+
if not self._available_processors.get('glyph', False):
|
|
293
|
+
if glyph_compression == "always":
|
|
294
|
+
# User explicitly requested compression but it's not available
|
|
295
|
+
self._log_compression_unavailable_warning()
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
if glyph_compression == "never":
|
|
299
|
+
return False
|
|
300
|
+
|
|
301
|
+
# Check vision support for compression
|
|
302
|
+
model_supports_vision = self._check_vision_support(model)
|
|
303
|
+
|
|
304
|
+
if glyph_compression == "always":
|
|
305
|
+
# Explicit compression request - enforce vision requirement
|
|
306
|
+
if not model_supports_vision:
|
|
307
|
+
raise UnsupportedFeatureError(
|
|
308
|
+
f"Glyph compression requires a vision-capable model. "
|
|
309
|
+
f"Model '{model}' does not support vision. "
|
|
310
|
+
f"Vision-capable models include: gpt-4o, gpt-4o-mini, claude-3-5-sonnet, "
|
|
311
|
+
f"llama3.2-vision, qwen2-vl, gemini-1.5-pro, gemini-1.5-flash, etc."
|
|
312
|
+
)
|
|
313
|
+
return True
|
|
314
|
+
|
|
315
|
+
# Auto-decision logic
|
|
316
|
+
if not provider or not model:
|
|
317
|
+
return False
|
|
318
|
+
|
|
319
|
+
# Only compress text-based content
|
|
320
|
+
if media_type not in [MediaType.TEXT, MediaType.DOCUMENT]:
|
|
321
|
+
return False
|
|
322
|
+
|
|
323
|
+
# Auto mode: check vision support and warn if not supported
|
|
324
|
+
if not model_supports_vision:
|
|
325
|
+
self.logger.warning(
|
|
326
|
+
f"Glyph compression skipped: model '{model}' does not support vision. "
|
|
327
|
+
f"Use a vision-capable model to enable compression."
|
|
328
|
+
)
|
|
329
|
+
return False
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
orchestrator = self._get_compression_orchestrator()
|
|
333
|
+
if orchestrator:
|
|
334
|
+
return orchestrator.should_compress(file_path, provider, model, glyph_compression)
|
|
335
|
+
except Exception as e:
|
|
336
|
+
self.logger.debug(f"Compression decision failed: {e}")
|
|
337
|
+
|
|
338
|
+
return False
|
|
339
|
+
|
|
340
|
+
def _check_vision_support(self, model: str) -> bool:
|
|
341
|
+
"""
|
|
342
|
+
Check if the model supports vision capabilities.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
model: Model name to check
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
True if model supports vision, False otherwise
|
|
349
|
+
"""
|
|
350
|
+
if not model or not VISION_DETECTION_AVAILABLE:
|
|
351
|
+
# Conservative approach: assume no vision if detection unavailable
|
|
352
|
+
return False
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
return supports_vision(model)
|
|
356
|
+
except Exception as e:
|
|
357
|
+
self.logger.debug(f"Failed to check vision support for model '{model}': {e}")
|
|
358
|
+
return False
|
|
359
|
+
|
|
360
|
+
def _log_compression_unavailable_warning(self):
|
|
361
|
+
"""Log detailed warning about why Glyph compression is unavailable."""
|
|
362
|
+
self.logger.warning("Glyph compression requested but not available")
|
|
363
|
+
|
|
364
|
+
# Check specific reasons
|
|
365
|
+
if not GLYPH_AVAILABLE:
|
|
366
|
+
self.logger.warning("Glyph compression modules could not be imported")
|
|
367
|
+
|
|
368
|
+
# Check dependencies
|
|
369
|
+
missing_deps = []
|
|
370
|
+
try:
|
|
371
|
+
import reportlab
|
|
372
|
+
except ImportError:
|
|
373
|
+
missing_deps.append("reportlab")
|
|
374
|
+
|
|
375
|
+
try:
|
|
376
|
+
import pdf2image
|
|
377
|
+
except ImportError:
|
|
378
|
+
missing_deps.append("pdf2image")
|
|
379
|
+
|
|
380
|
+
if missing_deps:
|
|
381
|
+
deps_str = ", ".join(missing_deps)
|
|
382
|
+
self.logger.warning(f"Missing Glyph dependencies: {deps_str}")
|
|
383
|
+
self.logger.warning(f"Install with: pip install {' '.join(missing_deps)}")
|
|
384
|
+
|
|
385
|
+
if not self.enable_compression:
|
|
386
|
+
self.logger.warning("Glyph compression is disabled in AutoMediaHandler configuration")
|
|
387
|
+
|
|
388
|
+
def _apply_compression(self, file_path: Path, provider: str, model: str, **kwargs) -> MediaContent:
|
|
389
|
+
"""Apply Glyph compression to the file."""
|
|
390
|
+
media_type = detect_media_type(file_path)
|
|
391
|
+
|
|
392
|
+
# For PDF files, use direct PDF-to-image conversion (no text extraction!)
|
|
393
|
+
if media_type == MediaType.DOCUMENT and file_path.suffix.lower() == '.pdf':
|
|
394
|
+
try:
|
|
395
|
+
from .processors.direct_pdf_processor import DirectPDFProcessor
|
|
396
|
+
|
|
397
|
+
# Configure for optimal compression (2 pages per image)
|
|
398
|
+
direct_processor = DirectPDFProcessor(
|
|
399
|
+
pages_per_image=2, # 16 pages → 8 images
|
|
400
|
+
dpi=150, # Good quality for VLM processing
|
|
401
|
+
layout='horizontal', # Side-by-side like open book
|
|
402
|
+
gap=20, # Small gap between pages
|
|
403
|
+
**kwargs
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Get all combined images
|
|
407
|
+
combined_images = direct_processor.get_combined_image_paths(file_path)
|
|
408
|
+
|
|
409
|
+
# Get session info for metadata from DirectPDFProcessor
|
|
410
|
+
from ..config import get_config_manager
|
|
411
|
+
import hashlib
|
|
412
|
+
config_manager = get_config_manager()
|
|
413
|
+
glyph_cache_base = Path(config_manager.config.cache.glyph_cache_dir).expanduser()
|
|
414
|
+
pdf_hash = hashlib.md5(str(file_path).encode()).hexdigest()[:8]
|
|
415
|
+
session_id = f"pdf_{pdf_hash}_{len(combined_images)}pages"
|
|
416
|
+
|
|
417
|
+
# Create MediaContent objects for each combined image
|
|
418
|
+
media_contents = []
|
|
419
|
+
for i, img_path in enumerate(combined_images):
|
|
420
|
+
with open(img_path, 'rb') as f:
|
|
421
|
+
image_data = f.read()
|
|
422
|
+
|
|
423
|
+
import base64
|
|
424
|
+
encoded_data = base64.b64encode(image_data).decode('utf-8')
|
|
425
|
+
|
|
426
|
+
media_content = MediaContent(
|
|
427
|
+
media_type=MediaType.IMAGE,
|
|
428
|
+
content=encoded_data,
|
|
429
|
+
content_format=ContentFormat.BASE64,
|
|
430
|
+
mime_type="image/png",
|
|
431
|
+
metadata={
|
|
432
|
+
'compression_used': True,
|
|
433
|
+
'compression_method': 'direct_pdf_conversion',
|
|
434
|
+
'pages_per_image': 2,
|
|
435
|
+
'image_index': i,
|
|
436
|
+
'total_images': len(combined_images),
|
|
437
|
+
'original_file': str(file_path),
|
|
438
|
+
'glyph_session_id': session_id,
|
|
439
|
+
'glyph_cache_dir': str(glyph_cache_base / session_id),
|
|
440
|
+
'processing_method': 'direct_pdf_conversion' # For compatibility with test script
|
|
441
|
+
}
|
|
442
|
+
)
|
|
443
|
+
media_contents.append(media_content)
|
|
444
|
+
|
|
445
|
+
self.logger.info(f"Direct PDF conversion: {len(combined_images)} combined images created")
|
|
446
|
+
|
|
447
|
+
# Return first image (in full implementation, would handle multiple)
|
|
448
|
+
if media_contents:
|
|
449
|
+
return media_contents[0]
|
|
450
|
+
else:
|
|
451
|
+
raise Exception("No combined images created")
|
|
452
|
+
|
|
453
|
+
except Exception as e:
|
|
454
|
+
self.logger.warning(f"DirectPDFProcessor failed: {e}, falling back to text extraction")
|
|
455
|
+
# Fall back to text extraction method
|
|
456
|
+
pass
|
|
457
|
+
|
|
458
|
+
# Fallback: text extraction method (for non-PDF or if direct method fails)
|
|
459
|
+
orchestrator = self._get_compression_orchestrator()
|
|
460
|
+
if not orchestrator:
|
|
461
|
+
raise Exception("Compression orchestrator not available")
|
|
462
|
+
|
|
463
|
+
if media_type == MediaType.DOCUMENT and file_path.suffix.lower() == '.pdf':
|
|
464
|
+
processor = self._get_pdf_processor()
|
|
465
|
+
elif media_type == MediaType.DOCUMENT:
|
|
466
|
+
processor = self._get_office_processor()
|
|
467
|
+
else:
|
|
468
|
+
processor = self._get_text_processor()
|
|
469
|
+
|
|
470
|
+
# Extract text content
|
|
471
|
+
extracted_content = processor._process_internal(file_path, media_type, **kwargs)
|
|
472
|
+
text_content = extracted_content.content
|
|
473
|
+
|
|
474
|
+
# Compress the extracted text content
|
|
475
|
+
glyph_compression = kwargs.get('glyph_compression', 'auto')
|
|
476
|
+
compressed_content = orchestrator.compress_content(text_content, provider, model, glyph_compression)
|
|
477
|
+
|
|
478
|
+
if compressed_content and len(compressed_content) > 0:
|
|
479
|
+
# Return first compressed image as primary content
|
|
480
|
+
# Additional images can be accessed through metadata
|
|
481
|
+
primary_content = compressed_content[0]
|
|
482
|
+
|
|
483
|
+
# Add information about additional images
|
|
484
|
+
if len(compressed_content) > 1:
|
|
485
|
+
primary_content.metadata['additional_images'] = len(compressed_content) - 1
|
|
486
|
+
primary_content.metadata['total_compressed_images'] = len(compressed_content)
|
|
487
|
+
|
|
488
|
+
# Add compression metadata
|
|
489
|
+
primary_content.metadata['compression_used'] = True
|
|
490
|
+
primary_content.metadata['original_file'] = str(file_path)
|
|
491
|
+
|
|
492
|
+
return primary_content
|
|
493
|
+
else:
|
|
494
|
+
raise Exception("No compressed content generated")
|
|
221
495
|
|
|
222
496
|
def supports_media_type(self, media_type: MediaType) -> bool:
|
|
223
497
|
"""
|
|
@@ -259,9 +533,9 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
259
533
|
return format_ext.lower() in image_formats
|
|
260
534
|
|
|
261
535
|
elif media_type == MediaType.TEXT:
|
|
262
|
-
#
|
|
263
|
-
|
|
264
|
-
return
|
|
536
|
+
# TextProcessor can handle ANY text file through its plain text fallback
|
|
537
|
+
# This is always available and supports all text-based files
|
|
538
|
+
return True
|
|
265
539
|
|
|
266
540
|
elif media_type == MediaType.DOCUMENT:
|
|
267
541
|
# PDF support
|
|
@@ -272,9 +546,9 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
272
546
|
if format_ext.lower() in {'docx', 'xlsx', 'pptx'}:
|
|
273
547
|
return self._available_processors.get('office', False) or True # Fallback to text
|
|
274
548
|
|
|
275
|
-
#
|
|
276
|
-
|
|
277
|
-
return
|
|
549
|
+
# Any other document type can be handled by text processor as fallback
|
|
550
|
+
# This allows processing of unknown document formats
|
|
551
|
+
return True
|
|
278
552
|
|
|
279
553
|
return False
|
|
280
554
|
|
|
@@ -282,27 +556,47 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
282
556
|
"""
|
|
283
557
|
Get supported formats organized by media type.
|
|
284
558
|
|
|
559
|
+
Returns comprehensive list of all supported file extensions.
|
|
560
|
+
Note: TEXT type supports ANY text-based file through content detection
|
|
561
|
+
and fallback processing, not just the listed extensions.
|
|
562
|
+
|
|
285
563
|
Returns:
|
|
286
564
|
Dictionary mapping media type to list of supported extensions
|
|
565
|
+
|
|
566
|
+
Example:
|
|
567
|
+
>>> handler = AutoMediaHandler()
|
|
568
|
+
>>> formats = handler.get_supported_formats()
|
|
569
|
+
>>> len(formats['text']) # 70+ text extensions
|
|
570
|
+
70+
|
|
571
|
+
>>> 'r' in formats['text'] # R scripts supported
|
|
572
|
+
True
|
|
287
573
|
"""
|
|
288
|
-
|
|
574
|
+
from .types import get_all_supported_extensions
|
|
289
575
|
|
|
290
|
-
#
|
|
291
|
-
|
|
292
|
-
formats['image'] = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp']
|
|
576
|
+
# Get comprehensive list from FILE_TYPE_MAPPINGS
|
|
577
|
+
all_formats = get_all_supported_extensions()
|
|
293
578
|
|
|
294
|
-
#
|
|
295
|
-
|
|
579
|
+
# Filter based on available processors
|
|
580
|
+
result = {}
|
|
296
581
|
|
|
297
|
-
|
|
298
|
-
|
|
582
|
+
# Image formats (requires PIL)
|
|
583
|
+
if self._available_processors.get('image', False):
|
|
584
|
+
result['image'] = all_formats.get('image', [])
|
|
299
585
|
|
|
300
|
-
|
|
301
|
-
|
|
586
|
+
# Text formats (always available - TextProcessor has built-in fallback)
|
|
587
|
+
# Note: This includes 70+ extensions + unknown text files via content detection
|
|
588
|
+
result['text'] = all_formats.get('text', [])
|
|
589
|
+
|
|
590
|
+
# Document formats (includes PDFs, Office docs, and text fallbacks)
|
|
591
|
+
result['document'] = all_formats.get('document', [])
|
|
302
592
|
|
|
303
|
-
|
|
593
|
+
# Audio/Video (not yet implemented but listed for completeness)
|
|
594
|
+
if 'audio' in all_formats:
|
|
595
|
+
result['audio'] = all_formats['audio']
|
|
596
|
+
if 'video' in all_formats:
|
|
597
|
+
result['video'] = all_formats['video']
|
|
304
598
|
|
|
305
|
-
return
|
|
599
|
+
return result
|
|
306
600
|
|
|
307
601
|
def get_processor_info(self) -> Dict[str, Any]:
|
|
308
602
|
"""
|
|
@@ -412,12 +412,24 @@ class LocalMediaHandler(BaseProviderMediaHandler):
|
|
|
412
412
|
if media_content.media_type == MediaType.IMAGE and self.can_handle_media(media_content):
|
|
413
413
|
if media_content.content_format == ContentFormat.BASE64:
|
|
414
414
|
data_url = f"data:{media_content.mime_type};base64,{media_content.content}"
|
|
415
|
-
|
|
415
|
+
image_obj = {
|
|
416
416
|
"type": "image_url",
|
|
417
417
|
"image_url": {
|
|
418
418
|
"url": data_url
|
|
419
419
|
}
|
|
420
|
-
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
# Add detail level if specified in metadata (for Qwen models)
|
|
423
|
+
detail_level = media_content.metadata.get('detail_level', 'auto')
|
|
424
|
+
self.logger.debug(f"MediaContent metadata: {media_content.metadata}")
|
|
425
|
+
self.logger.debug(f"Found detail_level: {detail_level}")
|
|
426
|
+
if detail_level in ['low', 'high', 'auto']:
|
|
427
|
+
image_obj["image_url"]["detail"] = detail_level
|
|
428
|
+
self.logger.info(f"Setting detail level to '{detail_level}' for LMStudio image")
|
|
429
|
+
else:
|
|
430
|
+
self.logger.warning(f"Invalid detail level '{detail_level}', skipping")
|
|
431
|
+
|
|
432
|
+
content.append(image_obj)
|
|
421
433
|
else:
|
|
422
434
|
self.logger.warning(f"LMStudio requires base64 image format, got {media_content.content_format}")
|
|
423
435
|
|
|
@@ -30,6 +30,9 @@ class OpenAIMediaHandler(BaseProviderMediaHandler):
|
|
|
30
30
|
"""
|
|
31
31
|
super().__init__("openai", model_capabilities, **kwargs)
|
|
32
32
|
|
|
33
|
+
# Store model name for Qwen-specific optimizations
|
|
34
|
+
self.model_name = kwargs.get('model_name', '')
|
|
35
|
+
|
|
33
36
|
# OpenAI-specific configuration
|
|
34
37
|
self.max_image_size = kwargs.get('max_image_size', 20 * 1024 * 1024) # 20MB
|
|
35
38
|
self.supported_image_detail = kwargs.get('supported_image_detail', ['auto', 'low', 'high'])
|
|
@@ -118,11 +121,64 @@ class OpenAIMediaHandler(BaseProviderMediaHandler):
|
|
|
118
121
|
# Add detail level if supported by model
|
|
119
122
|
if self.model_capabilities.get('vision_support'):
|
|
120
123
|
detail_level = media_content.metadata.get('detail_level', 'auto')
|
|
124
|
+
self.logger.debug(f"OpenAI Handler - MediaContent metadata: {media_content.metadata}")
|
|
125
|
+
self.logger.debug(f"OpenAI Handler - Found detail_level: {detail_level}")
|
|
126
|
+
|
|
127
|
+
# Auto-adjust detail level for Qwen models to prevent context overflow
|
|
128
|
+
if self._is_qwen_model() and detail_level == 'auto':
|
|
129
|
+
detail_level = self._get_optimal_detail_for_qwen(media_content)
|
|
130
|
+
self.logger.debug(f"OpenAI Handler - Qwen auto-adjusted detail_level: {detail_level}")
|
|
131
|
+
|
|
121
132
|
if detail_level in self.supported_image_detail:
|
|
122
133
|
image_obj["image_url"]["detail"] = detail_level
|
|
134
|
+
self.logger.info(f"OpenAI Handler - Setting detail level to '{detail_level}' for image")
|
|
135
|
+
else:
|
|
136
|
+
self.logger.warning(f"OpenAI Handler - Invalid detail level '{detail_level}', supported: {self.supported_image_detail}")
|
|
123
137
|
|
|
124
138
|
return image_obj
|
|
125
139
|
|
|
140
|
+
def _is_qwen_model(self) -> bool:
|
|
141
|
+
"""Check if the current model is a Qwen vision model."""
|
|
142
|
+
if not hasattr(self, 'model_name') or not self.model_name:
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
model_name_lower = self.model_name.lower()
|
|
146
|
+
return any(qwen_variant in model_name_lower for qwen_variant in [
|
|
147
|
+
'qwen3-vl', 'qwen2.5-vl', 'qwen-vl', 'qwen/qwen3-vl', 'qwen/qwen2.5-vl'
|
|
148
|
+
])
|
|
149
|
+
|
|
150
|
+
def _get_optimal_detail_for_qwen(self, media_content: MediaContent) -> str:
|
|
151
|
+
"""
|
|
152
|
+
Determine optimal detail level for Qwen models based on context constraints.
|
|
153
|
+
|
|
154
|
+
According to SiliconFlow documentation:
|
|
155
|
+
- detail=low: 256 tokens per image (448x448 resize)
|
|
156
|
+
- detail=high: Variable tokens based on resolution (can be 24,576+ tokens)
|
|
157
|
+
|
|
158
|
+
For Qwen3-VL-30B with 131,072 token context limit, we should use detail=low
|
|
159
|
+
when processing multiple images to avoid context overflow.
|
|
160
|
+
"""
|
|
161
|
+
# Get model context limit
|
|
162
|
+
max_tokens = self.model_capabilities.get('max_tokens', 32768)
|
|
163
|
+
max_image_tokens = self.model_capabilities.get('max_image_tokens', 24576)
|
|
164
|
+
|
|
165
|
+
# Estimate how many images we might be processing
|
|
166
|
+
# This is a heuristic - in practice we'd need the full batch context
|
|
167
|
+
estimated_images = getattr(self, '_estimated_image_count', 1)
|
|
168
|
+
|
|
169
|
+
# Calculate potential token usage with high detail
|
|
170
|
+
high_detail_tokens = estimated_images * max_image_tokens
|
|
171
|
+
|
|
172
|
+
# Use low detail if high detail would consume >60% of context
|
|
173
|
+
context_threshold = max_tokens * 0.6
|
|
174
|
+
|
|
175
|
+
if high_detail_tokens > context_threshold:
|
|
176
|
+
self.logger.info(f"Using detail=low for Qwen model: {estimated_images} images would consume "
|
|
177
|
+
f"{high_detail_tokens:,} tokens (>{context_threshold:,} threshold)")
|
|
178
|
+
return 'low'
|
|
179
|
+
else:
|
|
180
|
+
return 'high'
|
|
181
|
+
|
|
126
182
|
def _format_text_for_openai(self, media_content: MediaContent) -> Dict[str, Any]:
|
|
127
183
|
"""
|
|
128
184
|
Format text/document content for OpenAI API.
|
|
@@ -226,12 +282,15 @@ class OpenAIMediaHandler(BaseProviderMediaHandler):
|
|
|
226
282
|
Estimated token count
|
|
227
283
|
"""
|
|
228
284
|
if media_content.media_type == MediaType.IMAGE:
|
|
229
|
-
#
|
|
230
|
-
# Base cost varies by detail level and image size
|
|
285
|
+
# Image token estimation varies by model
|
|
231
286
|
detail_level = media_content.metadata.get('detail_level', 'auto')
|
|
232
287
|
|
|
233
288
|
if detail_level == 'low':
|
|
234
|
-
|
|
289
|
+
# Qwen models use 256 tokens for low detail, OpenAI uses 85
|
|
290
|
+
if self._is_qwen_model():
|
|
291
|
+
return 256 # Qwen low detail token count
|
|
292
|
+
else:
|
|
293
|
+
return 85 # OpenAI low detail token count
|
|
235
294
|
else:
|
|
236
295
|
# High detail calculation based on image dimensions
|
|
237
296
|
width = media_content.metadata.get('final_size', [512, 512])[0]
|
|
@@ -10,4 +10,14 @@ from .text_processor import TextProcessor
|
|
|
10
10
|
from .pdf_processor import PDFProcessor
|
|
11
11
|
from .office_processor import OfficeProcessor
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
# Import Glyph processor if available
|
|
14
|
+
try:
|
|
15
|
+
from ...compression.glyph_processor import GlyphProcessor
|
|
16
|
+
GLYPH_AVAILABLE = True
|
|
17
|
+
except ImportError:
|
|
18
|
+
GlyphProcessor = None
|
|
19
|
+
GLYPH_AVAILABLE = False
|
|
20
|
+
|
|
21
|
+
__all__ = ['ImageProcessor', 'TextProcessor', 'PDFProcessor', 'OfficeProcessor']
|
|
22
|
+
if GLYPH_AVAILABLE:
|
|
23
|
+
__all__.append('GlyphProcessor')
|