abstractcore 2.5.2__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. abstractcore/__init__.py +12 -0
  2. abstractcore/architectures/detection.py +250 -4
  3. abstractcore/assets/architecture_formats.json +14 -1
  4. abstractcore/assets/model_capabilities.json +533 -10
  5. abstractcore/compression/__init__.py +29 -0
  6. abstractcore/compression/analytics.py +420 -0
  7. abstractcore/compression/cache.py +250 -0
  8. abstractcore/compression/config.py +279 -0
  9. abstractcore/compression/exceptions.py +30 -0
  10. abstractcore/compression/glyph_processor.py +381 -0
  11. abstractcore/compression/optimizer.py +388 -0
  12. abstractcore/compression/orchestrator.py +380 -0
  13. abstractcore/compression/pil_text_renderer.py +818 -0
  14. abstractcore/compression/quality.py +226 -0
  15. abstractcore/compression/text_formatter.py +666 -0
  16. abstractcore/compression/vision_compressor.py +371 -0
  17. abstractcore/config/main.py +64 -0
  18. abstractcore/config/manager.py +100 -5
  19. abstractcore/core/session.py +61 -6
  20. abstractcore/events/__init__.py +1 -1
  21. abstractcore/media/auto_handler.py +312 -18
  22. abstractcore/media/handlers/local_handler.py +14 -2
  23. abstractcore/media/handlers/openai_handler.py +62 -3
  24. abstractcore/media/processors/__init__.py +11 -1
  25. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  26. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  27. abstractcore/media/processors/image_processor.py +7 -1
  28. abstractcore/media/processors/text_processor.py +18 -3
  29. abstractcore/media/types.py +164 -7
  30. abstractcore/providers/__init__.py +18 -0
  31. abstractcore/providers/anthropic_provider.py +28 -2
  32. abstractcore/providers/base.py +278 -6
  33. abstractcore/providers/huggingface_provider.py +563 -23
  34. abstractcore/providers/lmstudio_provider.py +38 -2
  35. abstractcore/providers/mlx_provider.py +27 -2
  36. abstractcore/providers/model_capabilities.py +352 -0
  37. abstractcore/providers/ollama_provider.py +38 -4
  38. abstractcore/providers/openai_provider.py +28 -2
  39. abstractcore/providers/registry.py +85 -13
  40. abstractcore/server/app.py +91 -81
  41. abstractcore/utils/__init__.py +4 -1
  42. abstractcore/utils/trace_export.py +287 -0
  43. abstractcore/utils/version.py +1 -1
  44. abstractcore/utils/vlm_token_calculator.py +655 -0
  45. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/METADATA +107 -6
  46. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/RECORD +50 -33
  47. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
  48. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +0 -0
  49. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
  50. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
@@ -202,10 +202,14 @@ class ProviderRegistry:
202
202
 
203
203
  Args:
204
204
  provider_name: Name of the provider
205
- **kwargs: Provider-specific parameters (e.g., api_key, base_url)
205
+ **kwargs: Provider-specific parameters including:
206
+ - api_key: API key for authentication (if required)
207
+ - base_url: Base URL for API endpoint (if applicable)
208
+ - input_capabilities: List of ModelInputCapability enums to filter by input capability
209
+ - output_capabilities: List of ModelOutputCapability enums to filter by output capability
206
210
 
207
211
  Returns:
208
- List of available model names
212
+ List of available model names, optionally filtered by capabilities
209
213
  """
210
214
  try:
211
215
  provider_class = self.get_provider_class(provider_name)
@@ -285,13 +289,64 @@ class ProviderRegistry:
285
289
  for provider_name in self.list_provider_names()
286
290
  ]
287
291
 
288
- def get_providers_with_models(self) -> List[Dict[str, Any]]:
289
- """Get only providers that have available models."""
290
- all_providers = self.get_all_providers_status()
291
- return [
292
- provider for provider in all_providers
293
- if provider.get("status") == "available" and provider.get("model_count", 0) > 0
294
- ]
292
+ def get_providers_with_models(self, include_models: bool = True) -> List[Dict[str, Any]]:
293
+ """
294
+ Get only providers that have available models.
295
+
296
+ Args:
297
+ include_models: If True, include actual model lists (slower).
298
+ If False, return metadata only (much faster). Default: True.
299
+ """
300
+ if include_models:
301
+ # Original behavior - get full status including model lists
302
+ all_providers = self.get_all_providers_status()
303
+ return [
304
+ provider for provider in all_providers
305
+ if provider.get("status") == "available" and provider.get("model_count", 0) > 0
306
+ ]
307
+ else:
308
+ # Fast path - get all provider metadata without model enumeration
309
+ # Note: We return all providers since we can't quickly determine which have models
310
+ return self.get_providers_metadata_only()
311
+
312
+ def get_providers_metadata_only(self) -> List[Dict[str, Any]]:
313
+ """
314
+ Get provider metadata without enumerating models (fast path).
315
+
316
+ This method returns provider information without making API calls
317
+ or scanning for models, making it extremely fast for UI discovery.
318
+ """
319
+ providers_metadata = []
320
+
321
+ for provider_name in self.list_provider_names():
322
+ provider_info = self.get_provider_info(provider_name)
323
+ if not provider_info:
324
+ continue
325
+
326
+ # Basic availability check without model enumeration
327
+ try:
328
+ provider_class = self.get_provider_class(provider_name)
329
+ status = "available" # Assume available if class can be imported
330
+ except Exception:
331
+ status = "error"
332
+
333
+ metadata = {
334
+ "name": provider_info.name,
335
+ "display_name": provider_info.display_name,
336
+ "type": provider_info.provider_type,
337
+ "model_count": "unknown", # Don't enumerate models
338
+ "status": status,
339
+ "description": provider_info.description,
340
+ "local_provider": provider_info.local_provider,
341
+ "authentication_required": provider_info.authentication_required,
342
+ "supported_features": provider_info.supported_features,
343
+ "installation_extras": provider_info.installation_extras,
344
+ "models": [] # Empty list for fast response
345
+ }
346
+
347
+ providers_metadata.append(metadata)
348
+
349
+ return providers_metadata
295
350
 
296
351
  def create_provider_instance(self, provider_name: str, model: Optional[str] = None, **kwargs):
297
352
  """
@@ -348,7 +403,7 @@ def is_provider_available(provider_name: str) -> bool:
348
403
  return get_provider_registry().is_provider_available(provider_name)
349
404
 
350
405
 
351
- def get_all_providers_with_models() -> List[Dict[str, Any]]:
406
+ def get_all_providers_with_models(include_models: bool = True) -> List[Dict[str, Any]]:
352
407
  """
353
408
  Get comprehensive information about all providers with available models.
354
409
 
@@ -356,14 +411,18 @@ def get_all_providers_with_models() -> List[Dict[str, Any]]:
356
411
  for provider discovery and information. It replaces the manual provider
357
412
  lists in factory.py and server/app.py.
358
413
 
414
+ Args:
415
+ include_models: If True, include actual model lists (slower).
416
+ If False, return metadata only (much faster). Default: True.
417
+
359
418
  Returns:
360
419
  List of provider dictionaries with comprehensive metadata including:
361
420
  - name, display_name, type, description
362
421
  - model_count, status, supported_features
363
422
  - local_provider, authentication_required
364
- - installation_extras, sample models
423
+ - installation_extras, sample models (if include_models=True)
365
424
  """
366
- return get_provider_registry().get_providers_with_models()
425
+ return get_provider_registry().get_providers_with_models(include_models=include_models)
367
426
 
368
427
 
369
428
  def get_all_providers_status() -> List[Dict[str, Any]]:
@@ -386,5 +445,18 @@ def create_provider(provider_name: str, model: Optional[str] = None, **kwargs):
386
445
 
387
446
 
388
447
  def get_available_models_for_provider(provider_name: str, **kwargs) -> List[str]:
389
- """Get available models for a specific provider."""
448
+ """
449
+ Get available models for a specific provider.
450
+
451
+ Args:
452
+ provider_name: Name of the provider
453
+ **kwargs: Provider-specific parameters including:
454
+ - api_key: API key for authentication (if required)
455
+ - base_url: Base URL for API endpoint (if applicable)
456
+ - input_capabilities: List of ModelInputCapability enums to filter by input capability
457
+ - output_capabilities: List of ModelOutputCapability enums to filter by output capability
458
+
459
+ Returns:
460
+ List of available model names, optionally filtered by capabilities
461
+ """
390
462
  return get_provider_registry().get_available_models(provider_name, **kwargs)
@@ -261,53 +261,43 @@ async def general_exception_handler(request: Request, exc: Exception):
261
261
  # Model Type Detection
262
262
  # ============================================================================
263
263
 
264
- class ModelType(str, Enum):
265
- """Model type enumeration for filtering"""
266
- TEXT_GENERATION = "text-generation"
267
- TEXT_EMBEDDING = "text-embedding"
264
+ # Import the core capability enums directly
265
+ from ..providers.model_capabilities import ModelInputCapability, ModelOutputCapability
268
266
 
269
- def is_embedding_model(model_name: str) -> bool:
270
- """
271
- Detect if a model is an embedding model based on naming heuristics.
272
-
273
- Args:
274
- model_name: The model name to check
275
-
276
- Returns:
277
- True if the model appears to be an embedding model
278
- """
279
- model_lower = model_name.lower()
280
-
281
- # Heuristics for embedding models
282
- embedding_patterns = [
283
- "embed", # Most embedding models contain "embed"
284
- "all-minilm", # Sentence-transformers MiniLM models
285
- "all-mpnet", # Sentence-transformers MPNet models
286
- "nomic-embed", # Nomic embedding models
287
- "bert-", # BERT models (e.g., bert-base-uncased)
288
- "-bert", # BERT-based embedding models (e.g., nomic-bert-2048)
289
- "bge-", # BAAI BGE embedding models
290
- "gte-", # GTE embedding models
291
- "e5-", # E5 embedding models
292
- "instructor-", # Instructor embedding models
293
- "granite-embedding", # IBM Granite embedding models
294
- ]
295
-
296
- return any(pattern in model_lower for pattern in embedding_patterns)
297
267
 
298
268
  # ============================================================================
299
269
  # Provider Model Discovery (Using Centralized Registry)
300
270
  # ============================================================================
301
271
 
302
- def get_models_from_provider(provider_name: str) -> List[str]:
303
- """Get available models from a specific provider using the centralized provider registry."""
272
+ def get_models_from_provider(
273
+ provider_name: str,
274
+ input_capabilities=None,
275
+ output_capabilities=None
276
+ ) -> List[str]:
277
+ """
278
+ Get available models from a specific provider using the centralized provider registry.
279
+
280
+ Args:
281
+ provider_name: Name of the provider
282
+ input_capabilities: Optional list of ModelInputCapability enums
283
+ output_capabilities: Optional list of ModelOutputCapability enums
284
+
285
+ Returns:
286
+ List of model names from the provider, optionally filtered
287
+ """
304
288
  try:
305
289
  from ..providers.registry import get_available_models_for_provider
306
- return get_available_models_for_provider(provider_name)
290
+ return get_available_models_for_provider(
291
+ provider_name,
292
+ input_capabilities=input_capabilities,
293
+ output_capabilities=output_capabilities
294
+ )
307
295
  except Exception as e:
308
296
  logger.debug(f"Failed to get models from provider {provider_name}: {e}")
309
297
  return []
310
298
 
299
+
300
+
311
301
  # ============================================================================
312
302
  # OpenAI Responses API Models (100% Compatible)
313
303
  # ============================================================================
@@ -994,43 +984,47 @@ async def list_models(
994
984
  description="Filter by provider (e.g., 'ollama', 'openai', 'anthropic', 'lmstudio')",
995
985
  example=""
996
986
  ),
997
- type: Optional[ModelType] = Query(
987
+ input_type: Optional[ModelInputCapability] = Query(
998
988
  None,
999
- description="Filter by model type: 'text-generation' for chat/completion models, 'text-embedding' for embedding models",
1000
- example="text-generation"
1001
- )
989
+ description="Filter by input capability: 'text', 'image', 'audio', 'video'"
990
+ ),
991
+ output_type: Optional[ModelOutputCapability] = Query(
992
+ None,
993
+ description="Filter by output capability: 'text', 'embeddings'"
994
+ ),
1002
995
  ):
1003
996
  """
1004
997
  List available models from AbstractCore providers.
1005
-
1006
- Returns a list of all available models, optionally filtered by provider and/or model type.
1007
-
1008
- **Filters:**
1009
- - `provider`: Limit results to a specific provider
1010
- - `type`: Limit results to a specific model type (text-generation or text-embedding)
1011
-
998
+
999
+ Returns a list of all available models, optionally filtered by provider and/or capabilities.
1000
+
1001
+ **Filtering System:**
1002
+ - `input_type`: Filter by what INPUT the model can process (text, image, audio, video)
1003
+ - `output_type`: Filter by what OUTPUT the model generates (text, embeddings)
1004
+
1012
1005
  **Examples:**
1013
1006
  - `/v1/models` - All models from all providers
1014
- - `/v1/models?type=text-embedding` - Only embedding models
1015
- - `/v1/models?type=text-generation` - Only text generation models
1016
- - `/v1/models?provider=ollama` - Only Ollama models
1017
- - `/v1/models?provider=ollama&type=text-embedding` - Ollama embedding models only
1007
+ - `/v1/models?output_type=embeddings` - Only embedding models
1008
+ - `/v1/models?input_type=text&output_type=text` - Text-only models that generate text
1009
+ - `/v1/models?input_type=image` - Models that can analyze images
1010
+ - `/v1/models?provider=ollama&input_type=image` - Ollama vision models only
1018
1011
  """
1019
1012
  try:
1020
1013
  models_data = []
1021
1014
 
1015
+ # Use the capability enums directly
1016
+ input_capabilities = [input_type] if input_type else None
1017
+ output_capabilities = [output_type] if output_type else None
1018
+
1019
+
1022
1020
  if provider:
1023
- # Get models from specific provider
1024
- models = get_models_from_provider(provider.lower())
1021
+ # Get models from specific provider with optional filtering
1022
+ models = get_models_from_provider(
1023
+ provider.lower(),
1024
+ input_capabilities=input_capabilities,
1025
+ output_capabilities=output_capabilities
1026
+ )
1025
1027
  for model in models:
1026
- # Apply type filter if specified
1027
- if type:
1028
- is_embedding = is_embedding_model(model)
1029
- if type == ModelType.TEXT_EMBEDDING and not is_embedding:
1030
- continue # Skip non-embedding models
1031
- if type == ModelType.TEXT_GENERATION and is_embedding:
1032
- continue # Skip embedding models
1033
-
1034
1028
  model_id = f"{provider.lower()}/{model}"
1035
1029
  models_data.append({
1036
1030
  "id": model_id,
@@ -1040,23 +1034,25 @@ async def list_models(
1040
1034
  "permission": [{"allow_create_engine": False, "allow_sampling": True}]
1041
1035
  })
1042
1036
 
1043
- filter_msg = f" (type={type.value})" if type else ""
1037
+ filter_parts = []
1038
+ if input_type:
1039
+ filter_parts.append(f"input_type={input_type.value}")
1040
+ if output_type:
1041
+ filter_parts.append(f"output_type={output_type.value}")
1042
+
1043
+ filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
1044
1044
  logger.info(f"Listed {len(models_data)} models for provider {provider}{filter_msg}")
1045
1045
  else:
1046
1046
  # Get models from all providers using centralized registry
1047
1047
  from ..providers.registry import list_available_providers
1048
1048
  providers = list_available_providers()
1049
1049
  for prov in providers:
1050
- models = get_models_from_provider(prov)
1050
+ models = get_models_from_provider(
1051
+ prov,
1052
+ input_capabilities=input_capabilities,
1053
+ output_capabilities=output_capabilities
1054
+ )
1051
1055
  for model in models:
1052
- # Apply type filter if specified
1053
- if type:
1054
- is_embedding = is_embedding_model(model)
1055
- if type == ModelType.TEXT_EMBEDDING and not is_embedding:
1056
- continue # Skip non-embedding models
1057
- if type == ModelType.TEXT_GENERATION and is_embedding:
1058
- continue # Skip embedding models
1059
-
1060
1056
  model_id = f"{prov}/{model}"
1061
1057
  models_data.append({
1062
1058
  "id": model_id,
@@ -1066,7 +1062,13 @@ async def list_models(
1066
1062
  "permission": [{"allow_create_engine": False, "allow_sampling": True}]
1067
1063
  })
1068
1064
 
1069
- filter_msg = f" (type={type.value})" if type else ""
1065
+ filter_parts = []
1066
+ if input_type:
1067
+ filter_parts.append(f"input_type={input_type.value}")
1068
+ if output_type:
1069
+ filter_parts.append(f"output_type={output_type.value}")
1070
+
1071
+ filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
1070
1072
  logger.info(f"Listed {len(models_data)} models from all providers{filter_msg}")
1071
1073
 
1072
1074
  return {
@@ -1082,18 +1084,31 @@ async def list_models(
1082
1084
  }
1083
1085
 
1084
1086
  @app.get("/providers")
1085
- async def list_providers():
1087
+ async def list_providers(
1088
+ include_models: bool = Query(
1089
+ False,
1090
+ description="Include model lists for each provider. Set to true for full information (slower)."
1091
+ )
1092
+ ):
1086
1093
  """
1087
1094
  List all available AbstractCore providers and their capabilities.
1088
1095
 
1089
1096
  Returns comprehensive information about all registered LLM providers, including:
1090
1097
  - Provider name, display name, and type
1091
- - Number of available models and sample models
1098
+ - Number of available models and sample models (if include_models=True)
1092
1099
  - Current availability status and detailed error information
1093
1100
  - Provider description and supported features
1094
1101
  - Authentication requirements and installation instructions
1095
1102
  - Local vs. cloud provider designation
1096
1103
 
1104
+ **Query Parameters:**
1105
+ - `include_models` (bool, default=False): Include model lists for each provider.
1106
+ Set to `true` for full information (slower).
1107
+
1108
+ **Performance:**
1109
+ - `include_models=false`: Metadata only (very fast, ~15ms) - **DEFAULT**
1110
+ - `include_models=true`: Full information including model lists (slower, ~800ms)
1111
+
1097
1112
  **Supported Providers:**
1098
1113
  - **OpenAI**: Commercial API with GPT-4, GPT-3.5, and embedding models
1099
1114
  - **Anthropic**: Commercial API with Claude 3 family models
@@ -1103,24 +1118,19 @@ async def list_providers():
1103
1118
  - **HuggingFace**: Access to HuggingFace models (transformers and embeddings)
1104
1119
 
1105
1120
  **Use Cases:**
1106
- - Discover available providers before making requests
1107
- - Check provider availability and model counts
1121
+ - Fast provider discovery: `GET /providers` (default, very fast)
1122
+ - Full provider information: `GET /providers?include_models=true`
1108
1123
  - Build dynamic provider selection UIs
1109
1124
  - Monitor provider status and troubleshoot issues
1110
1125
  - Get installation instructions for missing dependencies
1111
1126
 
1112
- **Enhanced Information:**
1113
- This endpoint now uses the centralized provider registry to provide
1114
- comprehensive information including supported features, authentication
1115
- requirements, and detailed status information.
1116
-
1117
1127
  **Returns:** A list of provider objects with comprehensive metadata.
1118
1128
  """
1119
1129
  try:
1120
1130
  from ..providers.registry import get_all_providers_with_models, get_all_providers_status
1121
1131
 
1122
1132
  # Get providers with models (available providers)
1123
- available_providers = get_all_providers_with_models()
1133
+ available_providers = get_all_providers_with_models(include_models=include_models)
1124
1134
 
1125
1135
  # Optionally include all providers (even those with issues) for debugging
1126
1136
  # Uncomment the next line if you want to see providers with errors too:
@@ -13,6 +13,7 @@ from .token_utils import (
13
13
  ContentType
14
14
  )
15
15
  from .message_preprocessor import MessagePreprocessor, parse_files, has_files
16
+ from .trace_export import export_traces, summarize_traces
16
17
 
17
18
  __all__ = [
18
19
  'configure_logging',
@@ -27,5 +28,7 @@ __all__ = [
27
28
  'ContentType',
28
29
  'MessagePreprocessor',
29
30
  'parse_files',
30
- 'has_files'
31
+ 'has_files',
32
+ 'export_traces',
33
+ 'summarize_traces'
31
34
  ]