abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +19 -1
- abstractcore/architectures/detection.py +252 -6
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +533 -10
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +64 -0
- abstractcore/config/manager.py +100 -5
- abstractcore/core/retry.py +2 -2
- abstractcore/core/session.py +193 -7
- abstractcore/download.py +253 -0
- abstractcore/embeddings/manager.py +2 -2
- abstractcore/events/__init__.py +113 -2
- abstractcore/exceptions/__init__.py +49 -2
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/office_processor.py +2 -2
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/media/utils/image_scaler.py +2 -2
- abstractcore/media/vision_fallback.py +2 -2
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +228 -8
- abstractcore/providers/base.py +378 -11
- abstractcore/providers/huggingface_provider.py +563 -23
- abstractcore/providers/lmstudio_provider.py +284 -4
- abstractcore/providers/mlx_provider.py +27 -2
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +282 -6
- abstractcore/providers/openai_provider.py +286 -8
- abstractcore/providers/registry.py +85 -13
- abstractcore/providers/streaming.py +2 -2
- abstractcore/server/app.py +91 -81
- abstractcore/tools/common_tools.py +2 -2
- abstractcore/tools/handler.py +2 -2
- abstractcore/tools/parser.py +2 -2
- abstractcore/tools/registry.py +2 -2
- abstractcore/tools/syntax_rewriter.py +2 -2
- abstractcore/tools/tag_rewriter.py +3 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/self_fixes.py +2 -2
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
- abstractcore-2.6.0.dist-info/RECORD +108 -0
- abstractcore-2.5.2.dist-info/RECORD +0 -90
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
abstractcore/media/types.py
CHANGED
|
@@ -207,15 +207,51 @@ FILE_TYPE_MAPPINGS = {
|
|
|
207
207
|
'gif': MediaType.IMAGE, 'bmp': MediaType.IMAGE, 'tif': MediaType.IMAGE,
|
|
208
208
|
'tiff': MediaType.IMAGE, 'webp': MediaType.IMAGE, 'ico': MediaType.IMAGE,
|
|
209
209
|
|
|
210
|
-
# Documents
|
|
210
|
+
# Documents (binary formats that need special processing)
|
|
211
211
|
'pdf': MediaType.DOCUMENT, 'doc': MediaType.DOCUMENT, 'docx': MediaType.DOCUMENT,
|
|
212
212
|
'xls': MediaType.DOCUMENT, 'xlsx': MediaType.DOCUMENT, 'ppt': MediaType.DOCUMENT,
|
|
213
213
|
'pptx': MediaType.DOCUMENT, 'odt': MediaType.DOCUMENT, 'rtf': MediaType.DOCUMENT,
|
|
214
214
|
|
|
215
|
-
# Text formats
|
|
216
|
-
'txt': MediaType.TEXT, 'md': MediaType.TEXT, '
|
|
217
|
-
'
|
|
218
|
-
'
|
|
215
|
+
# Text formats - Common markup and data formats
|
|
216
|
+
'txt': MediaType.TEXT, 'md': MediaType.TEXT, 'markdown': MediaType.TEXT,
|
|
217
|
+
'csv': MediaType.TEXT, 'tsv': MediaType.TEXT,
|
|
218
|
+
'json': MediaType.TEXT, 'jsonl': MediaType.TEXT, 'ndjson': MediaType.TEXT,
|
|
219
|
+
'xml': MediaType.TEXT, 'html': MediaType.TEXT, 'htm': MediaType.TEXT,
|
|
220
|
+
'yaml': MediaType.TEXT, 'yml': MediaType.TEXT, 'toml': MediaType.TEXT,
|
|
221
|
+
'ini': MediaType.TEXT, 'cfg': MediaType.TEXT, 'conf': MediaType.TEXT,
|
|
222
|
+
|
|
223
|
+
# Text formats - Programming and scripting languages
|
|
224
|
+
'py': MediaType.TEXT, 'pyw': MediaType.TEXT, 'pyx': MediaType.TEXT,
|
|
225
|
+
'js': MediaType.TEXT, 'jsx': MediaType.TEXT, 'ts': MediaType.TEXT, 'tsx': MediaType.TEXT,
|
|
226
|
+
'java': MediaType.TEXT, 'kt': MediaType.TEXT, 'scala': MediaType.TEXT,
|
|
227
|
+
'c': MediaType.TEXT, 'cpp': MediaType.TEXT, 'cc': MediaType.TEXT, 'cxx': MediaType.TEXT,
|
|
228
|
+
'h': MediaType.TEXT, 'hpp': MediaType.TEXT, 'hxx': MediaType.TEXT,
|
|
229
|
+
'cs': MediaType.TEXT, 'go': MediaType.TEXT, 'rs': MediaType.TEXT, 'swift': MediaType.TEXT,
|
|
230
|
+
'rb': MediaType.TEXT, 'php': MediaType.TEXT, 'pl': MediaType.TEXT, 'pm': MediaType.TEXT,
|
|
231
|
+
'sh': MediaType.TEXT, 'bash': MediaType.TEXT, 'zsh': MediaType.TEXT, 'fish': MediaType.TEXT,
|
|
232
|
+
'r': MediaType.TEXT, 'R': MediaType.TEXT, 'rmd': MediaType.TEXT, 'Rmd': MediaType.TEXT,
|
|
233
|
+
'jl': MediaType.TEXT, 'matlab': MediaType.TEXT, 'm': MediaType.TEXT,
|
|
234
|
+
'sql': MediaType.TEXT, 'lua': MediaType.TEXT, 'vim': MediaType.TEXT,
|
|
235
|
+
'dart': MediaType.TEXT, 'ex': MediaType.TEXT, 'exs': MediaType.TEXT,
|
|
236
|
+
'erl': MediaType.TEXT, 'hrl': MediaType.TEXT, 'clj': MediaType.TEXT, 'cljs': MediaType.TEXT,
|
|
237
|
+
|
|
238
|
+
# Text formats - Notebooks and documentation
|
|
239
|
+
'ipynb': MediaType.TEXT, 'qmd': MediaType.TEXT, 'rst': MediaType.TEXT,
|
|
240
|
+
'tex': MediaType.TEXT, 'latex': MediaType.TEXT, 'bib': MediaType.TEXT,
|
|
241
|
+
'org': MediaType.TEXT, 'adoc': MediaType.TEXT, 'asciidoc': MediaType.TEXT,
|
|
242
|
+
|
|
243
|
+
# Text formats - Web and styles
|
|
244
|
+
'css': MediaType.TEXT, 'scss': MediaType.TEXT, 'sass': MediaType.TEXT, 'less': MediaType.TEXT,
|
|
245
|
+
'vue': MediaType.TEXT, 'svelte': MediaType.TEXT,
|
|
246
|
+
|
|
247
|
+
# Text formats - Build and config files
|
|
248
|
+
'gradle': MediaType.TEXT, 'cmake': MediaType.TEXT, 'make': MediaType.TEXT,
|
|
249
|
+
'dockerfile': MediaType.TEXT, 'containerfile': MediaType.TEXT,
|
|
250
|
+
'gitignore': MediaType.TEXT, 'gitattributes': MediaType.TEXT,
|
|
251
|
+
'env': MediaType.TEXT, 'properties': MediaType.TEXT,
|
|
252
|
+
|
|
253
|
+
# Text formats - Log and output files
|
|
254
|
+
'log': MediaType.TEXT, 'out': MediaType.TEXT, 'err': MediaType.TEXT,
|
|
219
255
|
|
|
220
256
|
# Audio
|
|
221
257
|
'mp3': MediaType.AUDIO, 'wav': MediaType.AUDIO, 'm4a': MediaType.AUDIO,
|
|
@@ -227,9 +263,66 @@ FILE_TYPE_MAPPINGS = {
|
|
|
227
263
|
}
|
|
228
264
|
|
|
229
265
|
|
|
266
|
+
def is_text_file(file_path: Union[str, Path]) -> bool:
|
|
267
|
+
"""
|
|
268
|
+
Detect if a file is text-based by attempting to read it.
|
|
269
|
+
|
|
270
|
+
This is a heuristic check that samples the beginning of the file
|
|
271
|
+
to determine if it contains text content.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
file_path: Path to the file
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
True if file appears to be text-based, False otherwise
|
|
278
|
+
"""
|
|
279
|
+
path = Path(file_path)
|
|
280
|
+
|
|
281
|
+
if not path.exists():
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
# Check file size - avoid reading very large files
|
|
285
|
+
try:
|
|
286
|
+
file_size = path.stat().st_size
|
|
287
|
+
if file_size == 0:
|
|
288
|
+
return True # Empty files are text
|
|
289
|
+
|
|
290
|
+
# Sample first 8KB to detect if it's text
|
|
291
|
+
sample_size = min(8192, file_size)
|
|
292
|
+
|
|
293
|
+
with open(path, 'rb') as f:
|
|
294
|
+
sample = f.read(sample_size)
|
|
295
|
+
|
|
296
|
+
# Check for null bytes (strong indicator of binary)
|
|
297
|
+
if b'\x00' in sample:
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
# Try to decode as UTF-8
|
|
301
|
+
try:
|
|
302
|
+
sample.decode('utf-8')
|
|
303
|
+
return True
|
|
304
|
+
except UnicodeDecodeError:
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
# Try other common encodings
|
|
308
|
+
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
|
|
309
|
+
try:
|
|
310
|
+
sample.decode(encoding)
|
|
311
|
+
return True
|
|
312
|
+
except (UnicodeDecodeError, LookupError):
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
# If we can't decode it, it's probably binary
|
|
316
|
+
return False
|
|
317
|
+
|
|
318
|
+
except Exception:
|
|
319
|
+
# On any error, assume it's not text
|
|
320
|
+
return False
|
|
321
|
+
|
|
322
|
+
|
|
230
323
|
def detect_media_type(file_path: Union[str, Path]) -> MediaType:
|
|
231
324
|
"""
|
|
232
|
-
Detect the media type of a file based on its extension.
|
|
325
|
+
Detect the media type of a file based on its extension and content.
|
|
233
326
|
|
|
234
327
|
Args:
|
|
235
328
|
file_path: Path to the file
|
|
@@ -240,7 +333,71 @@ def detect_media_type(file_path: Union[str, Path]) -> MediaType:
|
|
|
240
333
|
path = Path(file_path)
|
|
241
334
|
extension = path.suffix.lower().lstrip('.')
|
|
242
335
|
|
|
243
|
-
|
|
336
|
+
# First check the known extension mappings
|
|
337
|
+
if extension in FILE_TYPE_MAPPINGS:
|
|
338
|
+
return FILE_TYPE_MAPPINGS[extension]
|
|
339
|
+
|
|
340
|
+
# For unknown extensions, try to detect if it's a text file
|
|
341
|
+
# This handles cases like .R, .Rmd, .ipynb, and any other text-based files
|
|
342
|
+
if is_text_file(path):
|
|
343
|
+
return MediaType.TEXT
|
|
344
|
+
|
|
345
|
+
# Fall back to DOCUMENT for binary files with unknown extensions
|
|
346
|
+
return MediaType.DOCUMENT
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def get_all_supported_extensions() -> Dict[str, List[str]]:
|
|
350
|
+
"""
|
|
351
|
+
Get all supported file extensions organized by media type.
|
|
352
|
+
|
|
353
|
+
This function provides programmatic access to all file extensions
|
|
354
|
+
that AbstractCore can process.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Dictionary mapping media type names to lists of supported extensions.
|
|
358
|
+
|
|
359
|
+
Example:
|
|
360
|
+
>>> from abstractcore.media.types import get_all_supported_extensions
|
|
361
|
+
>>> formats = get_all_supported_extensions()
|
|
362
|
+
>>> print(f"Text formats: {len(formats['text'])} extensions")
|
|
363
|
+
Text formats: 70+ extensions
|
|
364
|
+
>>> print(formats['text'][:5])
|
|
365
|
+
['txt', 'md', 'markdown', 'csv', 'tsv']
|
|
366
|
+
"""
|
|
367
|
+
result = {}
|
|
368
|
+
for ext, media_type in FILE_TYPE_MAPPINGS.items():
|
|
369
|
+
type_name = media_type.value
|
|
370
|
+
if type_name not in result:
|
|
371
|
+
result[type_name] = []
|
|
372
|
+
result[type_name].append(ext)
|
|
373
|
+
|
|
374
|
+
# Sort extensions within each type for consistency
|
|
375
|
+
for type_name in result:
|
|
376
|
+
result[type_name].sort()
|
|
377
|
+
|
|
378
|
+
return result
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def get_supported_extensions_by_type(media_type: MediaType) -> List[str]:
|
|
382
|
+
"""
|
|
383
|
+
Get all supported file extensions for a specific media type.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
media_type: The MediaType to query
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
List of file extensions (without dots) supported for this type
|
|
390
|
+
|
|
391
|
+
Example:
|
|
392
|
+
>>> from abstractcore.media.types import get_supported_extensions_by_type, MediaType
|
|
393
|
+
>>> text_exts = get_supported_extensions_by_type(MediaType.TEXT)
|
|
394
|
+
>>> 'r' in text_exts # R scripts
|
|
395
|
+
True
|
|
396
|
+
>>> 'ipynb' in text_exts # Jupyter notebooks
|
|
397
|
+
True
|
|
398
|
+
"""
|
|
399
|
+
extensions = [ext for ext, mt in FILE_TYPE_MAPPINGS.items() if mt == media_type]
|
|
400
|
+
return sorted(extensions)
|
|
244
401
|
|
|
245
402
|
|
|
246
403
|
def create_media_content(
|
|
@@ -8,7 +8,6 @@ and capabilities for vision models.
|
|
|
8
8
|
from typing import Tuple, Optional, Union, Dict, Any
|
|
9
9
|
from enum import Enum
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
import logging
|
|
12
11
|
|
|
13
12
|
try:
|
|
14
13
|
from PIL import Image, ImageOps
|
|
@@ -17,6 +16,7 @@ except ImportError:
|
|
|
17
16
|
PIL_AVAILABLE = False
|
|
18
17
|
|
|
19
18
|
from ..base import MediaProcessingError
|
|
19
|
+
from ...utils.structured_logging import get_logger
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class ScalingMode(Enum):
|
|
@@ -36,7 +36,7 @@ class ModelOptimizedScaler:
|
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
38
|
def __init__(self):
|
|
39
|
-
self.logger =
|
|
39
|
+
self.logger = get_logger(__name__)
|
|
40
40
|
|
|
41
41
|
if not PIL_AVAILABLE:
|
|
42
42
|
raise MediaProcessingError("PIL (Pillow) is required for image scaling")
|
|
@@ -5,11 +5,11 @@ Implements two-stage pipeline: vision model → description → text-only model
|
|
|
5
5
|
Uses unified AbstractCore configuration system.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import logging
|
|
9
8
|
from pathlib import Path
|
|
10
9
|
from typing import Optional, Dict, Any
|
|
10
|
+
from ..utils.structured_logging import get_logger
|
|
11
11
|
|
|
12
|
-
logger =
|
|
12
|
+
logger = get_logger(__name__)
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class VisionNotConfiguredError(Exception):
|
|
@@ -22,6 +22,16 @@ from .registry import (
|
|
|
22
22
|
get_available_models_for_provider
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
+
# Model capability filtering (new system)
|
|
26
|
+
from .model_capabilities import (
|
|
27
|
+
ModelInputCapability,
|
|
28
|
+
ModelOutputCapability,
|
|
29
|
+
get_model_input_capabilities,
|
|
30
|
+
get_model_output_capabilities,
|
|
31
|
+
filter_models_by_capabilities,
|
|
32
|
+
get_capability_summary
|
|
33
|
+
)
|
|
34
|
+
|
|
25
35
|
__all__ = [
|
|
26
36
|
# Provider classes
|
|
27
37
|
'BaseProvider',
|
|
@@ -43,4 +53,12 @@ __all__ = [
|
|
|
43
53
|
'get_all_providers_status',
|
|
44
54
|
'create_provider',
|
|
45
55
|
'get_available_models_for_provider',
|
|
56
|
+
|
|
57
|
+
# Model capability filtering (new system)
|
|
58
|
+
'ModelInputCapability',
|
|
59
|
+
'ModelOutputCapability',
|
|
60
|
+
'get_model_input_capabilities',
|
|
61
|
+
'get_model_output_capabilities',
|
|
62
|
+
'filter_models_by_capabilities',
|
|
63
|
+
'get_capability_summary',
|
|
46
64
|
]
|
|
@@ -5,7 +5,7 @@ Anthropic provider implementation.
|
|
|
5
5
|
import os
|
|
6
6
|
import json
|
|
7
7
|
import time
|
|
8
|
-
from typing import List, Dict, Any, Optional, Union, Iterator, Type
|
|
8
|
+
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
|
|
9
9
|
|
|
10
10
|
try:
|
|
11
11
|
from pydantic import BaseModel
|
|
@@ -16,7 +16,7 @@ except ImportError:
|
|
|
16
16
|
from .base import BaseProvider
|
|
17
17
|
from ..core.types import GenerateResponse
|
|
18
18
|
from ..media import MediaHandler
|
|
19
|
-
from ..exceptions import AuthenticationError, ProviderAPIError, ModelNotFoundError, format_model_error
|
|
19
|
+
from ..exceptions import AuthenticationError, ProviderAPIError, ModelNotFoundError, format_model_error, format_auth_error
|
|
20
20
|
from ..tools import UniversalToolHandler, execute_tools
|
|
21
21
|
from ..events import EventType
|
|
22
22
|
|
|
@@ -30,7 +30,8 @@ except ImportError:
|
|
|
30
30
|
class AnthropicProvider(BaseProvider):
|
|
31
31
|
"""Anthropic Claude API provider with full integration"""
|
|
32
32
|
|
|
33
|
-
def __init__(self, model: str = "claude-3-haiku-20240307", api_key: Optional[str] = None,
|
|
33
|
+
def __init__(self, model: str = "claude-3-haiku-20240307", api_key: Optional[str] = None,
|
|
34
|
+
base_url: Optional[str] = None, **kwargs):
|
|
34
35
|
super().__init__(model, **kwargs)
|
|
35
36
|
self.provider = "anthropic"
|
|
36
37
|
|
|
@@ -42,8 +43,15 @@ class AnthropicProvider(BaseProvider):
|
|
|
42
43
|
if not self.api_key:
|
|
43
44
|
raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY environment variable.")
|
|
44
45
|
|
|
45
|
-
#
|
|
46
|
-
self.
|
|
46
|
+
# Get base URL from param or environment
|
|
47
|
+
self.base_url = base_url or os.getenv("ANTHROPIC_BASE_URL")
|
|
48
|
+
|
|
49
|
+
# Initialize client with timeout and optional base_url
|
|
50
|
+
client_kwargs = {"api_key": self.api_key, "timeout": self._timeout}
|
|
51
|
+
if self.base_url:
|
|
52
|
+
client_kwargs["base_url"] = self.base_url
|
|
53
|
+
self.client = anthropic.Anthropic(**client_kwargs)
|
|
54
|
+
self._async_client = None # Lazy-loaded async client
|
|
47
55
|
|
|
48
56
|
# Initialize tool handler
|
|
49
57
|
self.tool_handler = UniversalToolHandler(model)
|
|
@@ -56,6 +64,16 @@ class AnthropicProvider(BaseProvider):
|
|
|
56
64
|
"""Public generate method that includes telemetry"""
|
|
57
65
|
return self.generate_with_telemetry(*args, **kwargs)
|
|
58
66
|
|
|
67
|
+
@property
|
|
68
|
+
def async_client(self):
|
|
69
|
+
"""Lazy-load AsyncAnthropic client for native async operations."""
|
|
70
|
+
if self._async_client is None:
|
|
71
|
+
client_kwargs = {"api_key": self.api_key, "timeout": self._timeout}
|
|
72
|
+
if self.base_url:
|
|
73
|
+
client_kwargs["base_url"] = self.base_url
|
|
74
|
+
self._async_client = anthropic.AsyncAnthropic(**client_kwargs)
|
|
75
|
+
return self._async_client
|
|
76
|
+
|
|
59
77
|
def _generate_internal(self,
|
|
60
78
|
prompt: str,
|
|
61
79
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
@@ -207,7 +225,7 @@ class AnthropicProvider(BaseProvider):
|
|
|
207
225
|
error_str = str(e).lower()
|
|
208
226
|
|
|
209
227
|
if 'api_key' in error_str or 'authentication' in error_str:
|
|
210
|
-
raise AuthenticationError(
|
|
228
|
+
raise AuthenticationError(format_auth_error("anthropic", str(e)))
|
|
211
229
|
elif ('not_found_error' in error_str and 'model:' in error_str) or '404' in error_str:
|
|
212
230
|
# Model not found - show available models
|
|
213
231
|
available_models = self.list_available_models(api_key=self.api_key)
|
|
@@ -216,6 +234,182 @@ class AnthropicProvider(BaseProvider):
|
|
|
216
234
|
else:
|
|
217
235
|
raise ProviderAPIError(f"Anthropic API error: {str(e)}")
|
|
218
236
|
|
|
237
|
+
async def _agenerate_internal(self,
|
|
238
|
+
prompt: str,
|
|
239
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
240
|
+
system_prompt: Optional[str] = None,
|
|
241
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
242
|
+
media: Optional[List['MediaContent']] = None,
|
|
243
|
+
stream: bool = False,
|
|
244
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
245
|
+
**kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse]]:
|
|
246
|
+
"""Native async implementation using AsyncAnthropic - 3-10x faster for batch operations."""
|
|
247
|
+
|
|
248
|
+
# Build messages array (same logic as sync)
|
|
249
|
+
api_messages = []
|
|
250
|
+
|
|
251
|
+
# Add conversation history
|
|
252
|
+
if messages:
|
|
253
|
+
for msg in messages:
|
|
254
|
+
# Skip system messages as they're handled separately
|
|
255
|
+
if msg.get("role") != "system":
|
|
256
|
+
# Convert assistant role if needed
|
|
257
|
+
role = msg["role"]
|
|
258
|
+
if role == "assistant":
|
|
259
|
+
api_messages.append({
|
|
260
|
+
"role": "assistant",
|
|
261
|
+
"content": msg["content"]
|
|
262
|
+
})
|
|
263
|
+
else:
|
|
264
|
+
api_messages.append({
|
|
265
|
+
"role": "user",
|
|
266
|
+
"content": msg["content"]
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
# Add current prompt as user message
|
|
270
|
+
if prompt and prompt not in [msg.get("content") for msg in (messages or [])]:
|
|
271
|
+
# Handle multimodal message with media content
|
|
272
|
+
if media:
|
|
273
|
+
try:
|
|
274
|
+
from ..media.handlers import AnthropicMediaHandler
|
|
275
|
+
media_handler = AnthropicMediaHandler(self.model_capabilities)
|
|
276
|
+
|
|
277
|
+
# Create multimodal message combining text and media
|
|
278
|
+
multimodal_message = media_handler.create_multimodal_message(prompt, media)
|
|
279
|
+
api_messages.append(multimodal_message)
|
|
280
|
+
except ImportError:
|
|
281
|
+
self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
|
|
282
|
+
api_messages.append({"role": "user", "content": prompt})
|
|
283
|
+
except Exception as e:
|
|
284
|
+
self.logger.warning(f"Failed to process media content: {e}")
|
|
285
|
+
api_messages.append({"role": "user", "content": prompt})
|
|
286
|
+
else:
|
|
287
|
+
api_messages.append({"role": "user", "content": prompt})
|
|
288
|
+
|
|
289
|
+
# Prepare API call parameters (same logic as sync)
|
|
290
|
+
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
291
|
+
max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
292
|
+
|
|
293
|
+
call_params = {
|
|
294
|
+
"model": self.model,
|
|
295
|
+
"messages": api_messages,
|
|
296
|
+
"max_tokens": max_output_tokens,
|
|
297
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
298
|
+
"stream": stream
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
# Add system prompt if provided (Anthropic-specific: separate parameter)
|
|
302
|
+
if system_prompt:
|
|
303
|
+
call_params["system"] = system_prompt
|
|
304
|
+
|
|
305
|
+
# Add top_p if specified
|
|
306
|
+
if kwargs.get("top_p") or self.top_p < 1.0:
|
|
307
|
+
call_params["top_p"] = kwargs.get("top_p", self.top_p)
|
|
308
|
+
|
|
309
|
+
# Add top_k if specified
|
|
310
|
+
if kwargs.get("top_k") or self.top_k:
|
|
311
|
+
call_params["top_k"] = kwargs.get("top_k", self.top_k)
|
|
312
|
+
|
|
313
|
+
# Handle seed parameter (Anthropic doesn't support seed natively)
|
|
314
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
315
|
+
if seed_value is not None:
|
|
316
|
+
import warnings
|
|
317
|
+
warnings.warn(
|
|
318
|
+
f"Seed parameter ({seed_value}) is not supported by Anthropic Claude API. "
|
|
319
|
+
f"For deterministic outputs, use temperature=0.0 which may provide more consistent results, "
|
|
320
|
+
f"though true determinism is not guaranteed.",
|
|
321
|
+
UserWarning,
|
|
322
|
+
stacklevel=3
|
|
323
|
+
)
|
|
324
|
+
self.logger.warning(f"Seed {seed_value} requested but not supported by Anthropic API")
|
|
325
|
+
|
|
326
|
+
# Handle structured output using the "tool trick"
|
|
327
|
+
structured_tool_name = None
|
|
328
|
+
if response_model and PYDANTIC_AVAILABLE:
|
|
329
|
+
structured_tool = self._create_structured_output_tool(response_model)
|
|
330
|
+
|
|
331
|
+
if tools:
|
|
332
|
+
tools = list(tools) + [structured_tool]
|
|
333
|
+
else:
|
|
334
|
+
tools = [structured_tool]
|
|
335
|
+
|
|
336
|
+
structured_tool_name = structured_tool["name"]
|
|
337
|
+
|
|
338
|
+
if api_messages and api_messages[-1]["role"] == "user":
|
|
339
|
+
api_messages[-1]["content"] += f"\n\nPlease use the {structured_tool_name} tool to provide your response."
|
|
340
|
+
|
|
341
|
+
# Add tools if provided
|
|
342
|
+
if tools:
|
|
343
|
+
if self.tool_handler.supports_native:
|
|
344
|
+
call_params["tools"] = self._format_tools_for_anthropic(tools)
|
|
345
|
+
|
|
346
|
+
if structured_tool_name:
|
|
347
|
+
call_params["tool_choice"] = {"type": "tool", "name": structured_tool_name}
|
|
348
|
+
elif kwargs.get("tool_choice"):
|
|
349
|
+
call_params["tool_choice"] = {"type": kwargs.get("tool_choice", "auto")}
|
|
350
|
+
else:
|
|
351
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools)
|
|
352
|
+
if call_params.get("system"):
|
|
353
|
+
call_params["system"] += f"\n\n{tool_prompt}"
|
|
354
|
+
else:
|
|
355
|
+
call_params["system"] = tool_prompt
|
|
356
|
+
|
|
357
|
+
# Make async API call
|
|
358
|
+
try:
|
|
359
|
+
if stream:
|
|
360
|
+
return self._async_stream_response(call_params, tools)
|
|
361
|
+
else:
|
|
362
|
+
start_time = time.time()
|
|
363
|
+
response = await self.async_client.messages.create(**call_params)
|
|
364
|
+
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
365
|
+
|
|
366
|
+
formatted = self._format_response(response)
|
|
367
|
+
formatted.gen_time = gen_time
|
|
368
|
+
|
|
369
|
+
if tools and (formatted.has_tool_calls() or
|
|
370
|
+
(self.tool_handler.supports_prompted and formatted.content)):
|
|
371
|
+
formatted = self._handle_tool_execution(formatted, tools)
|
|
372
|
+
|
|
373
|
+
return formatted
|
|
374
|
+
except Exception as e:
|
|
375
|
+
error_str = str(e).lower()
|
|
376
|
+
|
|
377
|
+
if 'api_key' in error_str or 'authentication' in error_str:
|
|
378
|
+
raise AuthenticationError(format_auth_error("anthropic", str(e)))
|
|
379
|
+
elif ('not_found_error' in error_str and 'model:' in error_str) or '404' in error_str:
|
|
380
|
+
available_models = self.list_available_models(api_key=self.api_key)
|
|
381
|
+
error_message = format_model_error("Anthropic", self.model, available_models)
|
|
382
|
+
raise ModelNotFoundError(error_message)
|
|
383
|
+
else:
|
|
384
|
+
raise ProviderAPIError(f"Anthropic API error: {str(e)}")
|
|
385
|
+
|
|
386
|
+
async def _async_stream_response(self, call_params: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None) -> AsyncIterator[GenerateResponse]:
|
|
387
|
+
"""Native async streaming with Anthropic's context manager pattern."""
|
|
388
|
+
stream_params = {k: v for k, v in call_params.items() if k != 'stream'}
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
async with self.async_client.messages.stream(**stream_params) as stream:
|
|
392
|
+
async for chunk in stream:
|
|
393
|
+
yield GenerateResponse(
|
|
394
|
+
content=getattr(chunk, 'content', ''),
|
|
395
|
+
model=self.model,
|
|
396
|
+
finish_reason=getattr(chunk, 'finish_reason', None),
|
|
397
|
+
raw_response=chunk
|
|
398
|
+
)
|
|
399
|
+
except Exception as e:
|
|
400
|
+
raise ProviderAPIError(f"Anthropic streaming error: {str(e)}")
|
|
401
|
+
|
|
402
|
+
def unload(self) -> None:
|
|
403
|
+
"""Close async client if it was created."""
|
|
404
|
+
if self._async_client is not None:
|
|
405
|
+
import asyncio
|
|
406
|
+
try:
|
|
407
|
+
loop = asyncio.get_running_loop()
|
|
408
|
+
loop.create_task(self._async_client.close())
|
|
409
|
+
except RuntimeError:
|
|
410
|
+
import asyncio
|
|
411
|
+
asyncio.run(self._async_client.close())
|
|
412
|
+
|
|
219
413
|
def _format_tools_for_anthropic(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
220
414
|
"""Format tools for Anthropic API format"""
|
|
221
415
|
formatted_tools = []
|
|
@@ -455,9 +649,21 @@ class AnthropicProvider(BaseProvider):
|
|
|
455
649
|
# Create new client with updated timeout
|
|
456
650
|
self.client = anthropic.Anthropic(api_key=self.api_key, timeout=self._timeout)
|
|
457
651
|
def list_available_models(self, **kwargs) -> List[str]:
|
|
458
|
-
"""
|
|
652
|
+
"""
|
|
653
|
+
List available models from Anthropic API.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
**kwargs: Optional parameters including:
|
|
657
|
+
- api_key: Anthropic API key
|
|
658
|
+
- input_capabilities: List of ModelInputCapability enums to filter by input capability
|
|
659
|
+
- output_capabilities: List of ModelOutputCapability enums to filter by output capability
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
List of model names, optionally filtered by capabilities
|
|
663
|
+
"""
|
|
459
664
|
try:
|
|
460
665
|
import httpx
|
|
666
|
+
from .model_capabilities import filter_models_by_capabilities
|
|
461
667
|
|
|
462
668
|
# Use provided API key or instance API key
|
|
463
669
|
api_key = kwargs.get('api_key', self.api_key)
|
|
@@ -481,7 +687,21 @@ class AnthropicProvider(BaseProvider):
|
|
|
481
687
|
data = response.json()
|
|
482
688
|
models = [model["id"] for model in data.get("data", [])]
|
|
483
689
|
self.logger.debug(f"Retrieved {len(models)} models from Anthropic API")
|
|
484
|
-
|
|
690
|
+
models = sorted(models, reverse=True) # Latest models first
|
|
691
|
+
|
|
692
|
+
# Apply new capability filtering if provided
|
|
693
|
+
input_capabilities = kwargs.get('input_capabilities')
|
|
694
|
+
output_capabilities = kwargs.get('output_capabilities')
|
|
695
|
+
|
|
696
|
+
if input_capabilities or output_capabilities:
|
|
697
|
+
models = filter_models_by_capabilities(
|
|
698
|
+
models,
|
|
699
|
+
input_capabilities=input_capabilities,
|
|
700
|
+
output_capabilities=output_capabilities
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
return models
|
|
485
705
|
else:
|
|
486
706
|
self.logger.warning(f"Anthropic API returned status {response.status_code}")
|
|
487
707
|
return []
|