abstractcore 2.9.1__py3-none-any.whl → 2.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/deepsearch.py +9 -4
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +882 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +52 -20
- abstractcore/config/manager.py +390 -12
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +30 -916
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +478 -28
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/structured_logging.py +29 -8
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.4.dist-info/METADATA +562 -0
- abstractcore-2.11.4.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,7 @@ from typing import Dict, Any, Optional, List
|
|
|
12
12
|
|
|
13
13
|
from .base import BaseMediaHandler
|
|
14
14
|
from .types import MediaContent, MediaType, ContentFormat, detect_media_type
|
|
15
|
-
from .processors import ImageProcessor, TextProcessor, PDFProcessor, OfficeProcessor
|
|
15
|
+
from .processors import ImageProcessor, TextProcessor, PDFProcessor, OfficeProcessor, AudioProcessor, VideoProcessor
|
|
16
16
|
from ..exceptions import UnsupportedFeatureError
|
|
17
17
|
|
|
18
18
|
# Import Glyph compression support
|
|
@@ -60,6 +60,7 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
60
60
|
self._text_processor = None
|
|
61
61
|
self._pdf_processor = None
|
|
62
62
|
self._office_processor = None
|
|
63
|
+
self._video_processor = None
|
|
63
64
|
|
|
64
65
|
# Initialize Glyph compression support
|
|
65
66
|
self._compression_orchestrator = None
|
|
@@ -98,20 +99,17 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
98
99
|
availability['office'] = True
|
|
99
100
|
except ImportError:
|
|
100
101
|
availability['office'] = False
|
|
102
|
+
|
|
103
|
+
# AudioProcessor (dependency-free)
|
|
104
|
+
availability['audio'] = True
|
|
105
|
+
|
|
106
|
+
# VideoProcessor (dependency-free)
|
|
107
|
+
availability['video'] = True
|
|
101
108
|
|
|
102
|
-
# GlyphProcessor (
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
try:
|
|
107
|
-
import reportlab
|
|
108
|
-
import pdf2image
|
|
109
|
-
except ImportError:
|
|
110
|
-
glyph_deps_available = False
|
|
111
|
-
else:
|
|
112
|
-
glyph_deps_available = False
|
|
113
|
-
|
|
114
|
-
availability['glyph'] = glyph_deps_available
|
|
109
|
+
# GlyphProcessor (PIL renderer). Requires Pillow at runtime.
|
|
110
|
+
availability['glyph'] = bool(
|
|
111
|
+
GLYPH_AVAILABLE and self.enable_compression and availability.get('image', False)
|
|
112
|
+
)
|
|
115
113
|
|
|
116
114
|
return availability
|
|
117
115
|
|
|
@@ -138,6 +136,12 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
138
136
|
if self._office_processor is None:
|
|
139
137
|
self._office_processor = OfficeProcessor(**self.processor_config)
|
|
140
138
|
return self._office_processor
|
|
139
|
+
|
|
140
|
+
def _get_video_processor(self) -> VideoProcessor:
|
|
141
|
+
"""Get or create VideoProcessor instance."""
|
|
142
|
+
if self._video_processor is None:
|
|
143
|
+
self._video_processor = VideoProcessor(**self.processor_config)
|
|
144
|
+
return self._video_processor
|
|
141
145
|
|
|
142
146
|
def _get_compression_orchestrator(self) -> 'CompressionOrchestrator':
|
|
143
147
|
"""Get or create CompressionOrchestrator instance."""
|
|
@@ -195,7 +199,14 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
195
199
|
else:
|
|
196
200
|
return self._get_text_processor()
|
|
197
201
|
|
|
198
|
-
# Handle
|
|
202
|
+
# Handle audio
|
|
203
|
+
elif media_type == MediaType.AUDIO:
|
|
204
|
+
return AudioProcessor(**self.processor_config)
|
|
205
|
+
# Handle video
|
|
206
|
+
elif media_type == MediaType.VIDEO:
|
|
207
|
+
if self._available_processors.get('video', False):
|
|
208
|
+
return self._get_video_processor()
|
|
209
|
+
return None
|
|
199
210
|
else:
|
|
200
211
|
self.logger.warning(f"Media type {media_type.value} not yet supported")
|
|
201
212
|
return None
|
|
@@ -368,19 +379,21 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
368
379
|
# Check dependencies
|
|
369
380
|
missing_deps = []
|
|
370
381
|
try:
|
|
371
|
-
import
|
|
382
|
+
from PIL import Image # noqa: F401
|
|
372
383
|
except ImportError:
|
|
373
|
-
missing_deps.append("
|
|
384
|
+
missing_deps.append("Pillow")
|
|
374
385
|
|
|
375
386
|
try:
|
|
376
387
|
import pdf2image
|
|
377
388
|
except ImportError:
|
|
378
|
-
|
|
389
|
+
# Only required for the experimental direct PDF→image path.
|
|
390
|
+
missing_deps.append("pdf2image (optional)")
|
|
379
391
|
|
|
380
392
|
if missing_deps:
|
|
381
393
|
deps_str = ", ".join(missing_deps)
|
|
382
394
|
self.logger.warning(f"Missing Glyph dependencies: {deps_str}")
|
|
383
|
-
self.logger.warning(
|
|
395
|
+
self.logger.warning("Install with: pip install \"abstractcore[compression]\" (Pillow renderer)")
|
|
396
|
+
self.logger.warning("Optional (PDF→image): pip install pdf2image (+ Poppler installed on your system)")
|
|
384
397
|
|
|
385
398
|
if not self.enable_compression:
|
|
386
399
|
self.logger.warning("Glyph compression is disabled in AutoMediaHandler configuration")
|
|
@@ -510,9 +523,9 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
510
523
|
elif media_type == MediaType.DOCUMENT:
|
|
511
524
|
return True # Always supported via text processor at minimum
|
|
512
525
|
elif media_type == MediaType.AUDIO:
|
|
513
|
-
return False
|
|
526
|
+
return self._available_processors.get('audio', False)
|
|
514
527
|
elif media_type == MediaType.VIDEO:
|
|
515
|
-
return False
|
|
528
|
+
return self._available_processors.get('video', False)
|
|
516
529
|
return False
|
|
517
530
|
|
|
518
531
|
def supports_format(self, media_type: MediaType, format_ext: str) -> bool:
|
|
@@ -550,6 +563,13 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
550
563
|
# This allows processing of unknown document formats
|
|
551
564
|
return True
|
|
552
565
|
|
|
566
|
+
elif media_type == MediaType.AUDIO:
|
|
567
|
+
# AudioProcessor is dependency-free in v0; accept common audio containers.
|
|
568
|
+
return format_ext.lower() in {'mp3', 'wav', 'm4a', 'ogg', 'flac', 'aac', 'webm'}
|
|
569
|
+
elif media_type == MediaType.VIDEO:
|
|
570
|
+
# VideoProcessor is dependency-free in v0; frame extraction fallback may require ffmpeg.
|
|
571
|
+
return format_ext.lower() in {'mp4', 'mov', 'mkv', 'webm', 'avi', 'wmv', 'm4v'}
|
|
572
|
+
|
|
553
573
|
return False
|
|
554
574
|
|
|
555
575
|
def get_supported_formats(self) -> Dict[str, List[str]]:
|
|
@@ -654,4 +674,4 @@ class AutoMediaHandler(BaseMediaHandler):
|
|
|
654
674
|
else:
|
|
655
675
|
# Basic estimation based on file size
|
|
656
676
|
file_size_mb = file_path.stat().st_size / (1024 * 1024)
|
|
657
|
-
return max(0.1, file_size_mb / 10.0) # ~10MB/second processing rate
|
|
677
|
+
return max(0.1, file_size_mb / 10.0) # ~10MB/second processing rate
|
abstractcore/media/base.py
CHANGED
|
@@ -453,4 +453,47 @@ class BaseProviderMediaHandler(BaseMediaHandler):
|
|
|
453
453
|
Returns:
|
|
454
454
|
True if provider can handle this content
|
|
455
455
|
"""
|
|
456
|
-
return self.supports_media_type(media_content.media_type)
|
|
456
|
+
return self.supports_media_type(media_content.media_type)
|
|
457
|
+
|
|
458
|
+
def estimate_tokens_for_media(self, media_content: MediaContent) -> int:
|
|
459
|
+
"""
|
|
460
|
+
Estimate token usage for media content.
|
|
461
|
+
|
|
462
|
+
Base implementation that uses pre-computed estimates when available.
|
|
463
|
+
Subclasses can override _estimate_image_tokens() for provider-specific
|
|
464
|
+
image token calculations.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
media_content: MediaContent to estimate
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
Estimated token count
|
|
471
|
+
"""
|
|
472
|
+
if media_content.media_type == MediaType.IMAGE:
|
|
473
|
+
return self._estimate_image_tokens(media_content)
|
|
474
|
+
|
|
475
|
+
elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
|
|
476
|
+
# Use pre-computed estimate from processor if available (uses TokenUtils)
|
|
477
|
+
if 'estimated_tokens' in media_content.metadata:
|
|
478
|
+
return media_content.metadata['estimated_tokens']
|
|
479
|
+
# Fallback: rough estimation (~4 chars per token)
|
|
480
|
+
content_length = len(str(media_content.content))
|
|
481
|
+
return content_length // 4
|
|
482
|
+
|
|
483
|
+
return 0
|
|
484
|
+
|
|
485
|
+
def _estimate_image_tokens(self, media_content: MediaContent) -> int:
|
|
486
|
+
"""
|
|
487
|
+
Estimate token usage for image content.
|
|
488
|
+
|
|
489
|
+
Override in subclasses for provider-specific image token calculations.
|
|
490
|
+
Default implementation returns a conservative estimate.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
media_content: Image MediaContent to estimate
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Estimated token count for the image
|
|
497
|
+
"""
|
|
498
|
+
# Conservative default - subclasses should override with provider-specific logic
|
|
499
|
+
return 512
|
|
@@ -66,11 +66,18 @@ class MediaCapabilities:
|
|
|
66
66
|
caps = {}
|
|
67
67
|
|
|
68
68
|
# Base capabilities from JSON
|
|
69
|
+
video_mode = caps.get('video_input_mode')
|
|
70
|
+
if isinstance(video_mode, str):
|
|
71
|
+
vm = video_mode.strip().lower()
|
|
72
|
+
video_support = vm in {'frames', 'native'}
|
|
73
|
+
else:
|
|
74
|
+
video_support = caps.get('video_support', False)
|
|
75
|
+
|
|
69
76
|
instance = cls(
|
|
70
77
|
model_name=model,
|
|
71
78
|
vision_support=caps.get('vision_support', False),
|
|
72
79
|
audio_support=caps.get('audio_support', False),
|
|
73
|
-
video_support=
|
|
80
|
+
video_support=video_support,
|
|
74
81
|
image_resolutions=caps.get('image_resolutions', [])
|
|
75
82
|
)
|
|
76
83
|
|
|
@@ -159,37 +166,9 @@ class MediaCapabilities:
|
|
|
159
166
|
'text_embedding_preferred': self.text_embedding_preferred
|
|
160
167
|
}
|
|
161
168
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
media_type: Type of media
|
|
168
|
-
content_size: Size of content in bytes (optional)
|
|
169
|
-
|
|
170
|
-
Returns:
|
|
171
|
-
Estimated token count
|
|
172
|
-
"""
|
|
173
|
-
if not self.media_token_estimation:
|
|
174
|
-
return 0
|
|
175
|
-
|
|
176
|
-
if media_type == MediaType.IMAGE and self.vision_support:
|
|
177
|
-
# Base token cost for images varies by model
|
|
178
|
-
model_lower = self.model_name.lower()
|
|
179
|
-
if 'gpt-4o' in model_lower:
|
|
180
|
-
return 85 + (170 * 4) # Simplified GPT-4o calculation
|
|
181
|
-
elif 'claude' in model_lower:
|
|
182
|
-
return 1600 # Anthropic standard
|
|
183
|
-
else:
|
|
184
|
-
return 512 # Conservative estimate for local models
|
|
185
|
-
|
|
186
|
-
elif media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
|
|
187
|
-
# Text content token estimation
|
|
188
|
-
if content_size > 0:
|
|
189
|
-
return content_size // 4 # ~4 chars per token
|
|
190
|
-
return 100 # Default estimate
|
|
191
|
-
|
|
192
|
-
return 0
|
|
169
|
+
# Note: Token estimation is now handled by processors (TextProcessor, PDFProcessor, etc.)
|
|
170
|
+
# which add 'estimated_tokens' to MediaContent.metadata using TokenUtils.
|
|
171
|
+
# Handlers use BaseProviderMediaHandler.estimate_tokens_for_media() to retrieve it.
|
|
193
172
|
|
|
194
173
|
def validate_media_content(self, media_type: MediaType, file_size: int = 0,
|
|
195
174
|
format: str = None) -> tuple[bool, Optional[str]]:
|
|
@@ -332,4 +311,4 @@ def get_max_images(model: str, provider: str = None) -> int:
|
|
|
332
311
|
|
|
333
312
|
def should_use_text_embedding(model: str, provider: str = None) -> bool:
|
|
334
313
|
"""Check if model prefers text embedding over multimodal messages."""
|
|
335
|
-
return get_media_capabilities(model, provider).text_embedding_preferred
|
|
314
|
+
return get_media_capabilities(model, provider).text_embedding_preferred
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Media enrichment metadata contract (v0).
|
|
3
|
+
|
|
4
|
+
This module defines a provider-agnostic, response-level transparency hook for
|
|
5
|
+
"input enrichment fallbacks" where non-text inputs (image/audio/video) are
|
|
6
|
+
converted into short grounded text context (caption/transcript/frames) so a
|
|
7
|
+
text-only model can proceed.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Any, Dict, List, Literal, Optional, TypedDict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
MEDIA_ENRICHMENT_KEY = "media_enrichment"
|
|
16
|
+
|
|
17
|
+
EnrichmentStatus = Literal["used", "skipped", "error"]
|
|
18
|
+
BackendKind = Literal["llm", "plugin", "local_model", "unknown"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EnrichmentBackend(TypedDict, total=False):
|
|
22
|
+
kind: BackendKind
|
|
23
|
+
provider: str
|
|
24
|
+
model: str
|
|
25
|
+
backend_id: str
|
|
26
|
+
source: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MediaEnrichmentItem(TypedDict, total=False):
|
|
30
|
+
status: EnrichmentStatus
|
|
31
|
+
input_modality: str # image|audio|video (string for forward-compat)
|
|
32
|
+
input_index: int
|
|
33
|
+
input_name: str
|
|
34
|
+
policy: str
|
|
35
|
+
summary_kind: str # caption|transcript|frames (string for forward-compat)
|
|
36
|
+
backend: EnrichmentBackend
|
|
37
|
+
injected_text: str
|
|
38
|
+
injected_chars: int
|
|
39
|
+
artifact: Dict[str, Any]
|
|
40
|
+
error: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def build_enrichment_item(
|
|
44
|
+
*,
|
|
45
|
+
status: EnrichmentStatus,
|
|
46
|
+
input_modality: str,
|
|
47
|
+
summary_kind: str,
|
|
48
|
+
policy: Optional[str] = None,
|
|
49
|
+
backend: Optional[Dict[str, Any]] = None,
|
|
50
|
+
input_index: Optional[int] = None,
|
|
51
|
+
input_name: Optional[str] = None,
|
|
52
|
+
injected_text: Optional[str] = None,
|
|
53
|
+
artifact: Optional[Dict[str, Any]] = None,
|
|
54
|
+
error: Optional[str] = None,
|
|
55
|
+
) -> MediaEnrichmentItem:
|
|
56
|
+
item: MediaEnrichmentItem = {
|
|
57
|
+
"status": status,
|
|
58
|
+
"input_modality": str(input_modality),
|
|
59
|
+
"summary_kind": str(summary_kind),
|
|
60
|
+
}
|
|
61
|
+
if isinstance(policy, str) and policy.strip():
|
|
62
|
+
item["policy"] = policy.strip()
|
|
63
|
+
if isinstance(backend, dict) and backend:
|
|
64
|
+
# Preserve only JSON-serializable primitives; callers should keep this small.
|
|
65
|
+
cleaned: Dict[str, Any] = {}
|
|
66
|
+
for k in ("kind", "provider", "model", "backend_id", "source"):
|
|
67
|
+
v = backend.get(k)
|
|
68
|
+
if isinstance(v, str) and v.strip():
|
|
69
|
+
cleaned[str(k)] = v.strip()
|
|
70
|
+
if cleaned:
|
|
71
|
+
item["backend"] = cleaned # type: ignore[assignment]
|
|
72
|
+
if isinstance(input_index, int):
|
|
73
|
+
item["input_index"] = int(input_index)
|
|
74
|
+
if isinstance(input_name, str) and input_name.strip():
|
|
75
|
+
item["input_name"] = input_name.strip()
|
|
76
|
+
if isinstance(injected_text, str) and injected_text.strip():
|
|
77
|
+
txt = injected_text.strip()
|
|
78
|
+
item["injected_text"] = txt
|
|
79
|
+
item["injected_chars"] = len(txt)
|
|
80
|
+
if isinstance(artifact, dict) and artifact:
|
|
81
|
+
item["artifact"] = artifact
|
|
82
|
+
if isinstance(error, str) and error.strip():
|
|
83
|
+
item["error"] = error.strip()
|
|
84
|
+
return item
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def merge_enrichment_metadata(
|
|
88
|
+
metadata: Optional[Dict[str, Any]],
|
|
89
|
+
enrichments: Optional[List[Dict[str, Any]]],
|
|
90
|
+
) -> Dict[str, Any]:
|
|
91
|
+
out: Dict[str, Any] = metadata if isinstance(metadata, dict) else {}
|
|
92
|
+
if not enrichments:
|
|
93
|
+
return out
|
|
94
|
+
|
|
95
|
+
existing = out.get(MEDIA_ENRICHMENT_KEY)
|
|
96
|
+
if not isinstance(existing, list):
|
|
97
|
+
existing = []
|
|
98
|
+
out[MEDIA_ENRICHMENT_KEY] = existing
|
|
99
|
+
|
|
100
|
+
for item in enrichments:
|
|
101
|
+
if isinstance(item, dict) and item:
|
|
102
|
+
existing.append(item)
|
|
103
|
+
|
|
104
|
+
return out
|
|
105
|
+
|
|
@@ -192,7 +192,16 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
|
|
|
192
192
|
if media_content.media_type == MediaType.IMAGE:
|
|
193
193
|
# Check if model supports vision
|
|
194
194
|
if not self.model_capabilities.get('vision_support', False):
|
|
195
|
-
|
|
195
|
+
# Fallback: consult the centralized media capability database.
|
|
196
|
+
# This keeps validation in sync with `abstractcore.media.capabilities.is_vision_model`
|
|
197
|
+
# and avoids hard-coding model-name heuristics here.
|
|
198
|
+
try:
|
|
199
|
+
from ..capabilities import is_vision_model
|
|
200
|
+
|
|
201
|
+
if not is_vision_model(model):
|
|
202
|
+
return False
|
|
203
|
+
except Exception:
|
|
204
|
+
return False
|
|
196
205
|
|
|
197
206
|
# Check image size
|
|
198
207
|
if hasattr(media_content, 'metadata'):
|
|
@@ -200,13 +209,9 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
|
|
|
200
209
|
if file_size > self.max_image_size:
|
|
201
210
|
return False
|
|
202
211
|
|
|
203
|
-
#
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
elif 'claude-3.5' in model_lower:
|
|
207
|
-
return True # All Claude 3.5 models support vision
|
|
208
|
-
elif 'claude-4' in model_lower:
|
|
209
|
-
return True # Future Claude 4 models
|
|
212
|
+
# If vision is supported (either by injected model capabilities or the centralized lookup),
|
|
213
|
+
# accept the image.
|
|
214
|
+
return True
|
|
210
215
|
|
|
211
216
|
# Text/document validation
|
|
212
217
|
elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
|
|
@@ -219,28 +224,14 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
|
|
|
219
224
|
|
|
220
225
|
return False
|
|
221
226
|
|
|
222
|
-
def
|
|
227
|
+
def _estimate_image_tokens(self, media_content: MediaContent) -> int:
|
|
223
228
|
"""
|
|
224
|
-
|
|
229
|
+
Anthropic-specific image token estimation.
|
|
225
230
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
Returns:
|
|
230
|
-
Estimated token count
|
|
231
|
+
Anthropic uses roughly ~1600 tokens per image for most cases.
|
|
232
|
+
This varies based on image content and complexity.
|
|
231
233
|
"""
|
|
232
|
-
|
|
233
|
-
# Anthropic image token estimation
|
|
234
|
-
# Roughly ~1600 tokens per image for most cases
|
|
235
|
-
# This varies based on image content and complexity
|
|
236
|
-
return 1600
|
|
237
|
-
|
|
238
|
-
elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
|
|
239
|
-
# Rough estimation: 3.5 characters per token (slightly better than GPT)
|
|
240
|
-
content_length = len(str(media_content.content))
|
|
241
|
-
return int(content_length / 3.5)
|
|
242
|
-
|
|
243
|
-
return 0
|
|
234
|
+
return 1600
|
|
244
235
|
|
|
245
236
|
def get_model_media_limits(self, model: str) -> Dict[str, Any]:
|
|
246
237
|
"""
|
|
@@ -323,4 +314,4 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
|
|
|
323
314
|
3. Any notable patterns, relationships, or conclusions
|
|
324
315
|
4. Suggestions for how this information might be used or what actions might be taken
|
|
325
316
|
|
|
326
|
-
Be thorough but concise in your analysis."""
|
|
317
|
+
Be thorough but concise in your analysis."""
|