abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. abstractcore/__init__.py +19 -1
  2. abstractcore/architectures/detection.py +252 -6
  3. abstractcore/assets/architecture_formats.json +14 -1
  4. abstractcore/assets/model_capabilities.json +533 -10
  5. abstractcore/compression/__init__.py +29 -0
  6. abstractcore/compression/analytics.py +420 -0
  7. abstractcore/compression/cache.py +250 -0
  8. abstractcore/compression/config.py +279 -0
  9. abstractcore/compression/exceptions.py +30 -0
  10. abstractcore/compression/glyph_processor.py +381 -0
  11. abstractcore/compression/optimizer.py +388 -0
  12. abstractcore/compression/orchestrator.py +380 -0
  13. abstractcore/compression/pil_text_renderer.py +818 -0
  14. abstractcore/compression/quality.py +226 -0
  15. abstractcore/compression/text_formatter.py +666 -0
  16. abstractcore/compression/vision_compressor.py +371 -0
  17. abstractcore/config/main.py +64 -0
  18. abstractcore/config/manager.py +100 -5
  19. abstractcore/core/retry.py +2 -2
  20. abstractcore/core/session.py +193 -7
  21. abstractcore/download.py +253 -0
  22. abstractcore/embeddings/manager.py +2 -2
  23. abstractcore/events/__init__.py +113 -2
  24. abstractcore/exceptions/__init__.py +49 -2
  25. abstractcore/media/auto_handler.py +312 -18
  26. abstractcore/media/handlers/local_handler.py +14 -2
  27. abstractcore/media/handlers/openai_handler.py +62 -3
  28. abstractcore/media/processors/__init__.py +11 -1
  29. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  30. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  31. abstractcore/media/processors/image_processor.py +7 -1
  32. abstractcore/media/processors/office_processor.py +2 -2
  33. abstractcore/media/processors/text_processor.py +18 -3
  34. abstractcore/media/types.py +164 -7
  35. abstractcore/media/utils/image_scaler.py +2 -2
  36. abstractcore/media/vision_fallback.py +2 -2
  37. abstractcore/providers/__init__.py +18 -0
  38. abstractcore/providers/anthropic_provider.py +228 -8
  39. abstractcore/providers/base.py +378 -11
  40. abstractcore/providers/huggingface_provider.py +563 -23
  41. abstractcore/providers/lmstudio_provider.py +284 -4
  42. abstractcore/providers/mlx_provider.py +27 -2
  43. abstractcore/providers/model_capabilities.py +352 -0
  44. abstractcore/providers/ollama_provider.py +282 -6
  45. abstractcore/providers/openai_provider.py +286 -8
  46. abstractcore/providers/registry.py +85 -13
  47. abstractcore/providers/streaming.py +2 -2
  48. abstractcore/server/app.py +91 -81
  49. abstractcore/tools/common_tools.py +2 -2
  50. abstractcore/tools/handler.py +2 -2
  51. abstractcore/tools/parser.py +2 -2
  52. abstractcore/tools/registry.py +2 -2
  53. abstractcore/tools/syntax_rewriter.py +2 -2
  54. abstractcore/tools/tag_rewriter.py +3 -3
  55. abstractcore/utils/__init__.py +4 -1
  56. abstractcore/utils/self_fixes.py +2 -2
  57. abstractcore/utils/trace_export.py +287 -0
  58. abstractcore/utils/version.py +1 -1
  59. abstractcore/utils/vlm_token_calculator.py +655 -0
  60. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
  61. abstractcore-2.6.0.dist-info/RECORD +108 -0
  62. abstractcore-2.5.2.dist-info/RECORD +0 -90
  63. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
  64. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
  65. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
  66. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -261,53 +261,43 @@ async def general_exception_handler(request: Request, exc: Exception):
261
261
  # Model Type Detection
262
262
  # ============================================================================
263
263
 
264
- class ModelType(str, Enum):
265
- """Model type enumeration for filtering"""
266
- TEXT_GENERATION = "text-generation"
267
- TEXT_EMBEDDING = "text-embedding"
264
+ # Import the core capability enums directly
265
+ from ..providers.model_capabilities import ModelInputCapability, ModelOutputCapability
268
266
 
269
- def is_embedding_model(model_name: str) -> bool:
270
- """
271
- Detect if a model is an embedding model based on naming heuristics.
272
-
273
- Args:
274
- model_name: The model name to check
275
-
276
- Returns:
277
- True if the model appears to be an embedding model
278
- """
279
- model_lower = model_name.lower()
280
-
281
- # Heuristics for embedding models
282
- embedding_patterns = [
283
- "embed", # Most embedding models contain "embed"
284
- "all-minilm", # Sentence-transformers MiniLM models
285
- "all-mpnet", # Sentence-transformers MPNet models
286
- "nomic-embed", # Nomic embedding models
287
- "bert-", # BERT models (e.g., bert-base-uncased)
288
- "-bert", # BERT-based embedding models (e.g., nomic-bert-2048)
289
- "bge-", # BAAI BGE embedding models
290
- "gte-", # GTE embedding models
291
- "e5-", # E5 embedding models
292
- "instructor-", # Instructor embedding models
293
- "granite-embedding", # IBM Granite embedding models
294
- ]
295
-
296
- return any(pattern in model_lower for pattern in embedding_patterns)
297
267
 
298
268
  # ============================================================================
299
269
  # Provider Model Discovery (Using Centralized Registry)
300
270
  # ============================================================================
301
271
 
302
- def get_models_from_provider(provider_name: str) -> List[str]:
303
- """Get available models from a specific provider using the centralized provider registry."""
272
+ def get_models_from_provider(
273
+ provider_name: str,
274
+ input_capabilities=None,
275
+ output_capabilities=None
276
+ ) -> List[str]:
277
+ """
278
+ Get available models from a specific provider using the centralized provider registry.
279
+
280
+ Args:
281
+ provider_name: Name of the provider
282
+ input_capabilities: Optional list of ModelInputCapability enums
283
+ output_capabilities: Optional list of ModelOutputCapability enums
284
+
285
+ Returns:
286
+ List of model names from the provider, optionally filtered
287
+ """
304
288
  try:
305
289
  from ..providers.registry import get_available_models_for_provider
306
- return get_available_models_for_provider(provider_name)
290
+ return get_available_models_for_provider(
291
+ provider_name,
292
+ input_capabilities=input_capabilities,
293
+ output_capabilities=output_capabilities
294
+ )
307
295
  except Exception as e:
308
296
  logger.debug(f"Failed to get models from provider {provider_name}: {e}")
309
297
  return []
310
298
 
299
+
300
+
311
301
  # ============================================================================
312
302
  # OpenAI Responses API Models (100% Compatible)
313
303
  # ============================================================================
@@ -994,43 +984,47 @@ async def list_models(
994
984
  description="Filter by provider (e.g., 'ollama', 'openai', 'anthropic', 'lmstudio')",
995
985
  example=""
996
986
  ),
997
- type: Optional[ModelType] = Query(
987
+ input_type: Optional[ModelInputCapability] = Query(
998
988
  None,
999
- description="Filter by model type: 'text-generation' for chat/completion models, 'text-embedding' for embedding models",
1000
- example="text-generation"
1001
- )
989
+ description="Filter by input capability: 'text', 'image', 'audio', 'video'"
990
+ ),
991
+ output_type: Optional[ModelOutputCapability] = Query(
992
+ None,
993
+ description="Filter by output capability: 'text', 'embeddings'"
994
+ ),
1002
995
  ):
1003
996
  """
1004
997
  List available models from AbstractCore providers.
1005
-
1006
- Returns a list of all available models, optionally filtered by provider and/or model type.
1007
-
1008
- **Filters:**
1009
- - `provider`: Limit results to a specific provider
1010
- - `type`: Limit results to a specific model type (text-generation or text-embedding)
1011
-
998
+
999
+ Returns a list of all available models, optionally filtered by provider and/or capabilities.
1000
+
1001
+ **Filtering System:**
1002
+ - `input_type`: Filter by what INPUT the model can process (text, image, audio, video)
1003
+ - `output_type`: Filter by what OUTPUT the model generates (text, embeddings)
1004
+
1012
1005
  **Examples:**
1013
1006
  - `/v1/models` - All models from all providers
1014
- - `/v1/models?type=text-embedding` - Only embedding models
1015
- - `/v1/models?type=text-generation` - Only text generation models
1016
- - `/v1/models?provider=ollama` - Only Ollama models
1017
- - `/v1/models?provider=ollama&type=text-embedding` - Ollama embedding models only
1007
+ - `/v1/models?output_type=embeddings` - Only embedding models
1008
+ - `/v1/models?input_type=text&output_type=text` - Text-only models that generate text
1009
+ - `/v1/models?input_type=image` - Models that can analyze images
1010
+ - `/v1/models?provider=ollama&input_type=image` - Ollama vision models only
1018
1011
  """
1019
1012
  try:
1020
1013
  models_data = []
1021
1014
 
1015
+ # Use the capability enums directly
1016
+ input_capabilities = [input_type] if input_type else None
1017
+ output_capabilities = [output_type] if output_type else None
1018
+
1019
+
1022
1020
  if provider:
1023
- # Get models from specific provider
1024
- models = get_models_from_provider(provider.lower())
1021
+ # Get models from specific provider with optional filtering
1022
+ models = get_models_from_provider(
1023
+ provider.lower(),
1024
+ input_capabilities=input_capabilities,
1025
+ output_capabilities=output_capabilities
1026
+ )
1025
1027
  for model in models:
1026
- # Apply type filter if specified
1027
- if type:
1028
- is_embedding = is_embedding_model(model)
1029
- if type == ModelType.TEXT_EMBEDDING and not is_embedding:
1030
- continue # Skip non-embedding models
1031
- if type == ModelType.TEXT_GENERATION and is_embedding:
1032
- continue # Skip embedding models
1033
-
1034
1028
  model_id = f"{provider.lower()}/{model}"
1035
1029
  models_data.append({
1036
1030
  "id": model_id,
@@ -1040,23 +1034,25 @@ async def list_models(
1040
1034
  "permission": [{"allow_create_engine": False, "allow_sampling": True}]
1041
1035
  })
1042
1036
 
1043
- filter_msg = f" (type={type.value})" if type else ""
1037
+ filter_parts = []
1038
+ if input_type:
1039
+ filter_parts.append(f"input_type={input_type.value}")
1040
+ if output_type:
1041
+ filter_parts.append(f"output_type={output_type.value}")
1042
+
1043
+ filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
1044
1044
  logger.info(f"Listed {len(models_data)} models for provider {provider}{filter_msg}")
1045
1045
  else:
1046
1046
  # Get models from all providers using centralized registry
1047
1047
  from ..providers.registry import list_available_providers
1048
1048
  providers = list_available_providers()
1049
1049
  for prov in providers:
1050
- models = get_models_from_provider(prov)
1050
+ models = get_models_from_provider(
1051
+ prov,
1052
+ input_capabilities=input_capabilities,
1053
+ output_capabilities=output_capabilities
1054
+ )
1051
1055
  for model in models:
1052
- # Apply type filter if specified
1053
- if type:
1054
- is_embedding = is_embedding_model(model)
1055
- if type == ModelType.TEXT_EMBEDDING and not is_embedding:
1056
- continue # Skip non-embedding models
1057
- if type == ModelType.TEXT_GENERATION and is_embedding:
1058
- continue # Skip embedding models
1059
-
1060
1056
  model_id = f"{prov}/{model}"
1061
1057
  models_data.append({
1062
1058
  "id": model_id,
@@ -1066,7 +1062,13 @@ async def list_models(
1066
1062
  "permission": [{"allow_create_engine": False, "allow_sampling": True}]
1067
1063
  })
1068
1064
 
1069
- filter_msg = f" (type={type.value})" if type else ""
1065
+ filter_parts = []
1066
+ if input_type:
1067
+ filter_parts.append(f"input_type={input_type.value}")
1068
+ if output_type:
1069
+ filter_parts.append(f"output_type={output_type.value}")
1070
+
1071
+ filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
1070
1072
  logger.info(f"Listed {len(models_data)} models from all providers{filter_msg}")
1071
1073
 
1072
1074
  return {
@@ -1082,18 +1084,31 @@ async def list_models(
1082
1084
  }
1083
1085
 
1084
1086
  @app.get("/providers")
1085
- async def list_providers():
1087
+ async def list_providers(
1088
+ include_models: bool = Query(
1089
+ False,
1090
+ description="Include model lists for each provider. Set to true for full information (slower)."
1091
+ )
1092
+ ):
1086
1093
  """
1087
1094
  List all available AbstractCore providers and their capabilities.
1088
1095
 
1089
1096
  Returns comprehensive information about all registered LLM providers, including:
1090
1097
  - Provider name, display name, and type
1091
- - Number of available models and sample models
1098
+ - Number of available models and sample models (if include_models=True)
1092
1099
  - Current availability status and detailed error information
1093
1100
  - Provider description and supported features
1094
1101
  - Authentication requirements and installation instructions
1095
1102
  - Local vs. cloud provider designation
1096
1103
 
1104
+ **Query Parameters:**
1105
+ - `include_models` (bool, default=False): Include model lists for each provider.
1106
+ Set to `true` for full information (slower).
1107
+
1108
+ **Performance:**
1109
+ - `include_models=false`: Metadata only (very fast, ~15ms) - **DEFAULT**
1110
+ - `include_models=true`: Full information including model lists (slower, ~800ms)
1111
+
1097
1112
  **Supported Providers:**
1098
1113
  - **OpenAI**: Commercial API with GPT-4, GPT-3.5, and embedding models
1099
1114
  - **Anthropic**: Commercial API with Claude 3 family models
@@ -1103,24 +1118,19 @@ async def list_providers():
1103
1118
  - **HuggingFace**: Access to HuggingFace models (transformers and embeddings)
1104
1119
 
1105
1120
  **Use Cases:**
1106
- - Discover available providers before making requests
1107
- - Check provider availability and model counts
1121
+ - Fast provider discovery: `GET /providers` (default, very fast)
1122
+ - Full provider information: `GET /providers?include_models=true`
1108
1123
  - Build dynamic provider selection UIs
1109
1124
  - Monitor provider status and troubleshoot issues
1110
1125
  - Get installation instructions for missing dependencies
1111
1126
 
1112
- **Enhanced Information:**
1113
- This endpoint now uses the centralized provider registry to provide
1114
- comprehensive information including supported features, authentication
1115
- requirements, and detailed status information.
1116
-
1117
1127
  **Returns:** A list of provider objects with comprehensive metadata.
1118
1128
  """
1119
1129
  try:
1120
1130
  from ..providers.registry import get_all_providers_with_models, get_all_providers_status
1121
1131
 
1122
1132
  # Get providers with models (available providers)
1123
- available_providers = get_all_providers_with_models()
1133
+ available_providers = get_all_providers_with_models(include_models=include_models)
1124
1134
 
1125
1135
  # Optionally include all providers (even those with issues) for debugging
1126
1136
  # Uncomment the next line if you want to see providers with errors too:
@@ -12,7 +12,6 @@ import subprocess
12
12
  import requests
13
13
  from pathlib import Path
14
14
  from typing import Optional, Dict, Any, Union
15
- import logging
16
15
  import platform
17
16
  import re
18
17
  import time
@@ -43,8 +42,9 @@ except ImportError:
43
42
 
44
43
  # Import our enhanced tool decorator
45
44
  from abstractcore.tools.core import tool
45
+ from abstractcore.utils.structured_logging import get_logger
46
46
 
47
- logger = logging.getLogger(__name__)
47
+ logger = get_logger(__name__)
48
48
 
49
49
  # File Operations
50
50
  @tool(
@@ -6,14 +6,14 @@ across all models, whether they have native tool APIs or require prompting.
6
6
  """
7
7
 
8
8
  import json
9
- import logging
10
9
  from typing import List, Dict, Any, Optional, Union, Callable
11
10
 
12
11
  from ..architectures import detect_architecture, get_model_capabilities, get_architecture_format
13
12
  from .core import ToolDefinition, ToolCall, ToolCallResponse, ToolResult
14
13
  from .parser import detect_tool_calls, parse_tool_calls, format_tool_prompt
14
+ from ..utils.structured_logging import get_logger
15
15
 
16
- logger = logging.getLogger(__name__)
16
+ logger = get_logger(__name__)
17
17
 
18
18
 
19
19
  class UniversalToolHandler:
@@ -7,14 +7,14 @@ responses based on their architecture.
7
7
 
8
8
  import re
9
9
  import json
10
- import logging
11
10
  from typing import List, Optional, Dict, Any
12
11
  from enum import Enum
13
12
 
14
13
  from .core import ToolCall, ToolDefinition
15
14
  from ..architectures import detect_architecture, get_architecture_format
15
+ from ..utils.structured_logging import get_logger
16
16
 
17
- logger = logging.getLogger(__name__)
17
+ logger = get_logger(__name__)
18
18
 
19
19
 
20
20
  class ToolFormat(Enum):
@@ -5,15 +5,15 @@ This module provides a centralized registry for managing available tools
5
5
  and executing them safely.
6
6
  """
7
7
 
8
- import logging
9
8
  import time
10
9
  from typing import Dict, List, Any, Callable, Optional, Union
11
10
  from functools import wraps
12
11
 
13
12
  from .core import ToolDefinition, ToolCall, ToolResult
14
13
  from ..events import EventType, emit_global, create_tool_event
14
+ from ..utils.structured_logging import get_logger
15
15
 
16
- logger = logging.getLogger(__name__)
16
+ logger = get_logger(__name__)
17
17
 
18
18
 
19
19
  class ToolRegistry:
@@ -8,15 +8,15 @@ Supports multiple target formats including OpenAI, Codex, and custom agent forma
8
8
  import re
9
9
  import json
10
10
  import uuid
11
- import logging
12
11
  from typing import List, Dict, Any, Optional, Union
13
12
  from dataclasses import dataclass
14
13
  from enum import Enum
15
14
 
16
15
  from .core import ToolCall
17
16
  from .parser import parse_tool_calls
17
+ from ..utils.structured_logging import get_logger
18
18
 
19
- logger = logging.getLogger(__name__)
19
+ logger = get_logger(__name__)
20
20
 
21
21
 
22
22
  class SyntaxFormat(Enum):
@@ -9,6 +9,9 @@ import re
9
9
  import json
10
10
  from typing import Dict, Any, Optional, Tuple, List
11
11
  from dataclasses import dataclass
12
+ from ..utils.structured_logging import get_logger
13
+
14
+ logger = get_logger(__name__)
12
15
 
13
16
 
14
17
  @dataclass
@@ -161,9 +164,6 @@ class ToolCallTagRewriter:
161
164
  Returns:
162
165
  Text with rewritten tool call tags
163
166
  """
164
- import logging
165
- logger = logging.getLogger(__name__)
166
-
167
167
  logger.debug(f"rewrite_text called with text: {text[:100] if text else None}")
168
168
  logger.debug(f"Target output tags: start='{self._output_start_tag}', end='{self._output_end_tag}'")
169
169
 
@@ -13,6 +13,7 @@ from .token_utils import (
13
13
  ContentType
14
14
  )
15
15
  from .message_preprocessor import MessagePreprocessor, parse_files, has_files
16
+ from .trace_export import export_traces, summarize_traces
16
17
 
17
18
  __all__ = [
18
19
  'configure_logging',
@@ -27,5 +28,7 @@ __all__ = [
27
28
  'ContentType',
28
29
  'MessagePreprocessor',
29
30
  'parse_files',
30
- 'has_files'
31
+ 'has_files',
32
+ 'export_traces',
33
+ 'summarize_traces'
31
34
  ]
@@ -8,9 +8,9 @@ before giving up on parsing.
8
8
  import json
9
9
  import re
10
10
  from typing import Optional
11
- import logging
11
+ from .structured_logging import get_logger
12
12
 
13
- logger = logging.getLogger(__name__)
13
+ logger = get_logger(__name__)
14
14
 
15
15
 
16
16
  def fix_json(text: str) -> Optional[str]: