abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. abstractcore/__init__.py +12 -0
  2. abstractcore/apps/__main__.py +8 -1
  3. abstractcore/apps/deepsearch.py +644 -0
  4. abstractcore/apps/intent.py +614 -0
  5. abstractcore/architectures/detection.py +250 -4
  6. abstractcore/assets/architecture_formats.json +14 -1
  7. abstractcore/assets/model_capabilities.json +583 -44
  8. abstractcore/compression/__init__.py +29 -0
  9. abstractcore/compression/analytics.py +420 -0
  10. abstractcore/compression/cache.py +250 -0
  11. abstractcore/compression/config.py +279 -0
  12. abstractcore/compression/exceptions.py +30 -0
  13. abstractcore/compression/glyph_processor.py +381 -0
  14. abstractcore/compression/optimizer.py +388 -0
  15. abstractcore/compression/orchestrator.py +380 -0
  16. abstractcore/compression/pil_text_renderer.py +818 -0
  17. abstractcore/compression/quality.py +226 -0
  18. abstractcore/compression/text_formatter.py +666 -0
  19. abstractcore/compression/vision_compressor.py +371 -0
  20. abstractcore/config/main.py +66 -1
  21. abstractcore/config/manager.py +111 -5
  22. abstractcore/core/session.py +105 -5
  23. abstractcore/events/__init__.py +1 -1
  24. abstractcore/media/auto_handler.py +312 -18
  25. abstractcore/media/handlers/local_handler.py +14 -2
  26. abstractcore/media/handlers/openai_handler.py +62 -3
  27. abstractcore/media/processors/__init__.py +11 -1
  28. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  29. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  30. abstractcore/media/processors/image_processor.py +7 -1
  31. abstractcore/media/processors/text_processor.py +18 -3
  32. abstractcore/media/types.py +164 -7
  33. abstractcore/processing/__init__.py +5 -1
  34. abstractcore/processing/basic_deepsearch.py +2173 -0
  35. abstractcore/processing/basic_intent.py +690 -0
  36. abstractcore/providers/__init__.py +18 -0
  37. abstractcore/providers/anthropic_provider.py +29 -2
  38. abstractcore/providers/base.py +279 -6
  39. abstractcore/providers/huggingface_provider.py +658 -27
  40. abstractcore/providers/lmstudio_provider.py +52 -2
  41. abstractcore/providers/mlx_provider.py +103 -4
  42. abstractcore/providers/model_capabilities.py +352 -0
  43. abstractcore/providers/ollama_provider.py +44 -6
  44. abstractcore/providers/openai_provider.py +29 -2
  45. abstractcore/providers/registry.py +91 -19
  46. abstractcore/server/app.py +91 -81
  47. abstractcore/structured/handler.py +161 -1
  48. abstractcore/tools/common_tools.py +98 -3
  49. abstractcore/utils/__init__.py +4 -1
  50. abstractcore/utils/cli.py +114 -1
  51. abstractcore/utils/trace_export.py +287 -0
  52. abstractcore/utils/version.py +1 -1
  53. abstractcore/utils/vlm_token_calculator.py +655 -0
  54. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
  55. abstractcore-2.5.3.dist-info/RECORD +107 -0
  56. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
  57. abstractcore-2.5.0.dist-info/RECORD +0 -86
  58. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
  59. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
  60. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ class LMStudioProvider(BaseProvider):
25
25
 
26
26
  def __init__(self, model: str = "local-model", base_url: str = "http://localhost:1234/v1", **kwargs):
27
27
  super().__init__(model, **kwargs)
28
+ self.provider = "lmstudio"
28
29
 
29
30
  # Initialize tool handler
30
31
  self.tool_handler = UniversalToolHandler(model)
@@ -201,12 +202,34 @@ class LMStudioProvider(BaseProvider):
201
202
  "max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
202
203
  "top_p": kwargs.get("top_p", 0.9),
203
204
  }
205
+
206
+ # Add additional generation parameters if provided (OpenAI-compatible)
207
+ if "frequency_penalty" in kwargs:
208
+ payload["frequency_penalty"] = kwargs["frequency_penalty"]
209
+ if "presence_penalty" in kwargs:
210
+ payload["presence_penalty"] = kwargs["presence_penalty"]
211
+ if "repetition_penalty" in kwargs:
212
+ # Some models support repetition_penalty directly
213
+ payload["repetition_penalty"] = kwargs["repetition_penalty"]
204
214
 
205
215
  # Add seed if provided (LMStudio supports seed via OpenAI-compatible API)
206
216
  seed_value = kwargs.get("seed", self.seed)
207
217
  if seed_value is not None:
208
218
  payload["seed"] = seed_value
209
219
 
220
+ # Add structured output support (OpenAI-compatible format)
221
+ # LMStudio supports native structured outputs using the response_format parameter
222
+ # This provides server-side guaranteed schema compliance
223
+ if response_model and PYDANTIC_AVAILABLE:
224
+ json_schema = response_model.model_json_schema()
225
+ payload["response_format"] = {
226
+ "type": "json_schema",
227
+ "json_schema": {
228
+ "name": response_model.__name__,
229
+ "schema": json_schema
230
+ }
231
+ }
232
+
210
233
  if stream:
211
234
  # Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
212
235
  return self._stream_generate(payload)
@@ -412,8 +435,21 @@ class LMStudioProvider(BaseProvider):
412
435
  return handler
413
436
 
414
437
  def list_available_models(self, **kwargs) -> List[str]:
415
- """List available models from LMStudio server."""
438
+ """
439
+ List available models from LMStudio server.
440
+
441
+ Args:
442
+ **kwargs: Optional parameters including:
443
+ - base_url: LMStudio server URL
444
+ - input_capabilities: List of ModelInputCapability enums to filter by input capability
445
+ - output_capabilities: List of ModelOutputCapability enums to filter by output capability
446
+
447
+ Returns:
448
+ List of model names, optionally filtered by capabilities
449
+ """
416
450
  try:
451
+ from .model_capabilities import filter_models_by_capabilities
452
+
417
453
  # Use provided base_url or fall back to instance base_url
418
454
  base_url = kwargs.get('base_url', self.base_url)
419
455
 
@@ -421,7 +457,21 @@ class LMStudioProvider(BaseProvider):
421
457
  if response.status_code == 200:
422
458
  data = response.json()
423
459
  models = [model["id"] for model in data.get("data", [])]
424
- return sorted(models)
460
+ models = sorted(models)
461
+
462
+ # Apply new capability filtering if provided
463
+ input_capabilities = kwargs.get('input_capabilities')
464
+ output_capabilities = kwargs.get('output_capabilities')
465
+
466
+ if input_capabilities or output_capabilities:
467
+ models = filter_models_by_capabilities(
468
+ models,
469
+ input_capabilities=input_capabilities,
470
+ output_capabilities=output_capabilities
471
+ )
472
+
473
+
474
+ return models
425
475
  else:
426
476
  self.logger.warning(f"LMStudio API returned status {response.status_code}")
427
477
  return []
@@ -11,6 +11,14 @@ try:
11
11
  except ImportError:
12
12
  PYDANTIC_AVAILABLE = False
13
13
  BaseModel = None
14
+
15
+ # Try to import Outlines (native structured output for MLX models)
16
+ try:
17
+ import outlines
18
+ OUTLINES_AVAILABLE = True
19
+ except ImportError:
20
+ OUTLINES_AVAILABLE = False
21
+
14
22
  from .base import BaseProvider
15
23
  from ..core.types import GenerateResponse
16
24
  from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
@@ -21,12 +29,20 @@ from ..events import EventType
21
29
  class MLXProvider(BaseProvider):
22
30
  """MLX provider for Apple Silicon models with full integration"""
23
31
 
24
- def __init__(self, model: str = "mlx-community/Mistral-7B-Instruct-v0.1-4bit", **kwargs):
32
+ def __init__(self, model: str = "mlx-community/Mistral-7B-Instruct-v0.1-4bit",
33
+ structured_output_method: str = "auto", **kwargs):
25
34
  super().__init__(model, **kwargs)
35
+ self.provider = "mlx"
26
36
 
27
37
  # Handle timeout parameter for local models
28
38
  self._handle_timeout_parameter(kwargs)
29
39
 
40
+ # Structured output method: "auto", "native_outlines", "prompted"
41
+ # auto: Use Outlines if available, otherwise prompted (default)
42
+ # native_outlines: Force Outlines (error if unavailable)
43
+ # prompted: Always use prompted fallback (fastest, still 100% success)
44
+ self.structured_output_method = structured_output_method
45
+
30
46
  # Initialize tool handler
31
47
  self.tool_handler = UniversalToolHandler(model)
32
48
 
@@ -143,7 +159,7 @@ class MLXProvider(BaseProvider):
143
159
  stream: bool = False,
144
160
  response_model: Optional[Type[BaseModel]] = None,
145
161
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
146
- """Internal generation with MLX"""
162
+ """Internal generation with MLX and optional Outlines native structured output"""
147
163
 
148
164
  if not self.llm or not self.tokenizer:
149
165
  return GenerateResponse(
@@ -152,6 +168,64 @@ class MLXProvider(BaseProvider):
152
168
  finish_reason="error"
153
169
  )
154
170
 
171
+ # Native structured output via Outlines (if configured and available)
172
+ should_use_outlines = (
173
+ response_model and
174
+ PYDANTIC_AVAILABLE and
175
+ not stream and
176
+ self.structured_output_method != "prompted" # Skip if explicitly prompted
177
+ )
178
+
179
+ if should_use_outlines:
180
+ # Check if Outlines is required but unavailable
181
+ if self.structured_output_method == "native_outlines" and not OUTLINES_AVAILABLE:
182
+ return GenerateResponse(
183
+ content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install abstractcore[mlx]",
184
+ model=self.model,
185
+ finish_reason="error"
186
+ )
187
+
188
+ # Try Outlines if available (auto or native_outlines mode)
189
+ if OUTLINES_AVAILABLE:
190
+ try:
191
+ # Cache Outlines MLX model wrapper to avoid re-initialization
192
+ if not hasattr(self, '_outlines_model') or self._outlines_model is None:
193
+ self.logger.debug("Creating Outlines MLX model wrapper for native structured output")
194
+ self._outlines_model = outlines.from_mlxlm(self.llm, self.tokenizer)
195
+
196
+ # Build full prompt (same as normal generation)
197
+ processed_prompt = prompt
198
+ full_prompt = self._build_prompt(processed_prompt, messages, system_prompt, tools)
199
+
200
+ # Create constrained generator with JSON schema
201
+ self.logger.debug(f"Using Outlines native structured output for {response_model.__name__}")
202
+ generator = self._outlines_model(
203
+ full_prompt,
204
+ outlines.json_schema(response_model),
205
+ max_tokens=kwargs.get("max_tokens", self.max_tokens or 512)
206
+ )
207
+
208
+ # Validate and return
209
+ validated_obj = response_model.model_validate(generator)
210
+
211
+ return GenerateResponse(
212
+ content=validated_obj.model_dump_json(),
213
+ model=self.model,
214
+ finish_reason="stop",
215
+ validated_object=validated_obj
216
+ )
217
+ except Exception as e:
218
+ # If native_outlines was explicitly requested, don't fall back
219
+ if self.structured_output_method == "native_outlines":
220
+ return GenerateResponse(
221
+ content=f"Error: Outlines native structured output failed: {str(e)}",
222
+ model=self.model,
223
+ finish_reason="error"
224
+ )
225
+ # Otherwise fall back to prompted approach
226
+ self.logger.debug(f"Outlines generation failed, falling back to prompted: {e}")
227
+ # Continue with normal generation below
228
+
155
229
  # Handle media content first if present
156
230
  processed_prompt = prompt
157
231
  if media:
@@ -420,8 +494,19 @@ class MLXProvider(BaseProvider):
420
494
 
421
495
  @classmethod
422
496
  def list_available_models(cls, **kwargs) -> List[str]:
423
- """List available MLX models from HuggingFace cache."""
497
+ """
498
+ List available MLX models from HuggingFace cache.
499
+
500
+ Args:
501
+ **kwargs: Optional parameters including:
502
+ - input_capabilities: List of ModelInputCapability enums to filter by input capability
503
+ - output_capabilities: List of ModelOutputCapability enums to filter by output capability
504
+
505
+ Returns:
506
+ List of model names, optionally filtered by capabilities
507
+ """
424
508
  from pathlib import Path
509
+ from .model_capabilities import filter_models_by_capabilities
425
510
 
426
511
  try:
427
512
  hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
@@ -439,7 +524,21 @@ class MLXProvider(BaseProvider):
439
524
  if "mlx" in model_name.lower():
440
525
  models.append(model_name)
441
526
 
442
- return sorted(models)
527
+ models = sorted(models)
528
+
529
+ # Apply new capability filtering if provided
530
+ input_capabilities = kwargs.get('input_capabilities')
531
+ output_capabilities = kwargs.get('output_capabilities')
532
+
533
+ if input_capabilities or output_capabilities:
534
+ models = filter_models_by_capabilities(
535
+ models,
536
+ input_capabilities=input_capabilities,
537
+ output_capabilities=output_capabilities
538
+ )
539
+
540
+
541
+ return models
443
542
 
444
543
  except Exception:
445
544
  return []
@@ -0,0 +1,352 @@
1
+ """
2
+ Model capability definitions for input and output filtering.
3
+
4
+ This module provides clear enums for filtering models based on what types of
5
+ input they can process and what types of output they can generate.
6
+
7
+ Key Concepts:
8
+ - Input Capabilities: What data types can the model accept and analyze?
9
+ - Output Capabilities: What data types can the model generate?
10
+
11
+ Examples:
12
+ >>> from abstractcore.providers.model_capabilities import ModelInputCapability, ModelOutputCapability
13
+ >>> from abstractcore.providers import OllamaProvider
14
+ >>>
15
+ >>> # Get models that can analyze images
16
+ >>> vision_models = OllamaProvider.list_available_models(
17
+ ... input_capabilities=[ModelInputCapability.IMAGE]
18
+ ... )
19
+ >>>
20
+ >>> # Get embedding models
21
+ >>> embedding_models = OllamaProvider.list_available_models(
22
+ ... output_capabilities=[ModelOutputCapability.EMBEDDINGS]
23
+ ... )
24
+ >>>
25
+ >>> # Get vision models that generate text (most common case)
26
+ >>> vision_text_models = OllamaProvider.list_available_models(
27
+ ... input_capabilities=[ModelInputCapability.TEXT, ModelInputCapability.IMAGE],
28
+ ... output_capabilities=[ModelOutputCapability.TEXT]
29
+ ... )
30
+ """
31
+
32
+ from enum import Enum
33
+ from typing import List, Set, Optional, Dict, Any
34
+ from ..architectures.detection import get_model_capabilities
35
+
36
+
37
+ class ModelInputCapability(Enum):
38
+ """
39
+ Enumeration of input data types that models can process and analyze.
40
+
41
+ These capabilities define what types of input data a model can accept
42
+ and understand. Most multimodal models support TEXT plus one or more
43
+ additional input types.
44
+
45
+ Values:
46
+ TEXT: Model can process text input (all models support this)
47
+ IMAGE: Model can analyze and understand images (vision models)
48
+ AUDIO: Model can process and analyze audio input
49
+ VIDEO: Model can analyze video content
50
+
51
+ Examples:
52
+ >>> # Text-only model
53
+ >>> text_only = [ModelInputCapability.TEXT]
54
+ >>>
55
+ >>> # Vision model (supports both text and images)
56
+ >>> vision_model = [ModelInputCapability.TEXT, ModelInputCapability.IMAGE]
57
+ >>>
58
+ >>> # Audio model (supports both text and audio)
59
+ >>> audio_model = [ModelInputCapability.TEXT, ModelInputCapability.AUDIO]
60
+ """
61
+
62
+ TEXT = "text"
63
+ """Model can process and understand text input (supported by all models)"""
64
+
65
+ IMAGE = "image"
66
+ """Model can analyze and understand image input (vision models)"""
67
+
68
+ AUDIO = "audio"
69
+ """Model can process and analyze audio input"""
70
+
71
+ VIDEO = "video"
72
+ """Model can analyze and understand video input"""
73
+
74
+
75
+ class ModelOutputCapability(Enum):
76
+ """
77
+ Enumeration of output data types that models can generate.
78
+
79
+ These capabilities define what types of output a model can produce.
80
+ Currently, AbstractCore supports text generation and embedding generation.
81
+
82
+ Values:
83
+ TEXT: Model generates text responses (most common)
84
+ EMBEDDINGS: Model generates vector embeddings (embedding models)
85
+
86
+ Examples:
87
+ >>> # Regular chat/completion model
88
+ >>> text_model = [ModelOutputCapability.TEXT]
89
+ >>>
90
+ >>> # Embedding model
91
+ >>> embedding_model = [ModelOutputCapability.EMBEDDINGS]
92
+
93
+ Note:
94
+ Future versions may include IMAGE, AUDIO, VIDEO for generative models.
95
+ """
96
+
97
+ TEXT = "text"
98
+ """Model generates text responses (chat, completion, etc.)"""
99
+
100
+ EMBEDDINGS = "embeddings"
101
+ """Model generates vector embeddings for semantic search/similarity"""
102
+
103
+
104
+ def get_model_input_capabilities(model_name: str) -> List[ModelInputCapability]:
105
+ """
106
+ Determine what input capabilities a model supports.
107
+
108
+ Args:
109
+ model_name: Name of the model to check
110
+
111
+ Returns:
112
+ List of input capabilities the model supports
113
+
114
+ Examples:
115
+ >>> caps = get_model_input_capabilities("gpt-4-vision-preview")
116
+ >>> print(caps)
117
+ [<ModelInputCapability.TEXT: 'text'>, <ModelInputCapability.IMAGE: 'image'>]
118
+
119
+ >>> caps = get_model_input_capabilities("gpt-4")
120
+ >>> print(caps)
121
+ [<ModelInputCapability.TEXT: 'text'>]
122
+ """
123
+ try:
124
+ capabilities = get_model_capabilities(model_name)
125
+ except Exception:
126
+ # If we can't get capabilities, assume text-only
127
+ return [ModelInputCapability.TEXT]
128
+
129
+ input_caps = [ModelInputCapability.TEXT] # All models support text
130
+
131
+ if capabilities.get("vision_support", False):
132
+ input_caps.append(ModelInputCapability.IMAGE)
133
+
134
+ if capabilities.get("audio_support", False):
135
+ input_caps.append(ModelInputCapability.AUDIO)
136
+
137
+ if capabilities.get("video_support", False):
138
+ input_caps.append(ModelInputCapability.VIDEO)
139
+
140
+ return input_caps
141
+
142
+
143
+ def get_model_output_capabilities(model_name: str) -> List[ModelOutputCapability]:
144
+ """
145
+ Determine what output capabilities a model supports.
146
+
147
+ Args:
148
+ model_name: Name of the model to check
149
+
150
+ Returns:
151
+ List of output capabilities the model supports
152
+
153
+ Examples:
154
+ >>> caps = get_model_output_capabilities("gpt-4")
155
+ >>> print(caps)
156
+ [<ModelOutputCapability.TEXT: 'text'>]
157
+
158
+ >>> caps = get_model_output_capabilities("text-embedding-3-small")
159
+ >>> print(caps)
160
+ [<ModelOutputCapability.EMBEDDINGS: 'embeddings'>]
161
+ """
162
+ try:
163
+ capabilities = get_model_capabilities(model_name)
164
+ except Exception:
165
+ # If we can't get capabilities, assume text generation
166
+ return [ModelOutputCapability.TEXT]
167
+
168
+ # Check if it's explicitly marked as an embedding model
169
+ if capabilities.get("model_type") == "embedding":
170
+ return [ModelOutputCapability.EMBEDDINGS]
171
+
172
+ # Check for embedding model name patterns
173
+ model_lower = model_name.lower()
174
+ embedding_patterns = [
175
+ "embedding", "embed", "embeddings",
176
+ "text-embedding", "sentence-transformer",
177
+ "all-minilm", "nomic-embed", "granite-embedding",
178
+ "qwen3-embedding", "embeddinggemma"
179
+ ]
180
+
181
+ if any(pattern in model_lower for pattern in embedding_patterns):
182
+ return [ModelOutputCapability.EMBEDDINGS]
183
+
184
+ # Default to text generation
185
+ return [ModelOutputCapability.TEXT]
186
+
187
+
188
+ def model_matches_input_capabilities(
189
+ model_name: str,
190
+ required_capabilities: List[ModelInputCapability]
191
+ ) -> bool:
192
+ """
193
+ Check if a model supports all required input capabilities.
194
+
195
+ Args:
196
+ model_name: Name of the model to check
197
+ required_capabilities: List of required input capabilities
198
+
199
+ Returns:
200
+ True if model supports all required capabilities, False otherwise
201
+
202
+ Examples:
203
+ >>> # Check if model supports both text and image input
204
+ >>> required = [ModelInputCapability.TEXT, ModelInputCapability.IMAGE]
205
+ >>> model_matches_input_capabilities("gpt-4-vision-preview", required)
206
+ True
207
+
208
+ >>> model_matches_input_capabilities("gpt-4", required)
209
+ False
210
+ """
211
+ if not required_capabilities:
212
+ return True
213
+
214
+ model_caps = get_model_input_capabilities(model_name)
215
+ model_caps_set = set(model_caps)
216
+ required_set = set(required_capabilities)
217
+
218
+ return required_set.issubset(model_caps_set)
219
+
220
+
221
+ def model_matches_output_capabilities(
222
+ model_name: str,
223
+ required_capabilities: List[ModelOutputCapability]
224
+ ) -> bool:
225
+ """
226
+ Check if a model supports all required output capabilities.
227
+
228
+ Args:
229
+ model_name: Name of the model to check
230
+ required_capabilities: List of required output capabilities
231
+
232
+ Returns:
233
+ True if model supports all required capabilities, False otherwise
234
+
235
+ Examples:
236
+ >>> # Check if model generates text
237
+ >>> required = [ModelOutputCapability.TEXT]
238
+ >>> model_matches_output_capabilities("gpt-4", required)
239
+ True
240
+
241
+ >>> # Check if model generates embeddings
242
+ >>> required = [ModelOutputCapability.EMBEDDINGS]
243
+ >>> model_matches_output_capabilities("text-embedding-3-small", required)
244
+ True
245
+ >>> model_matches_output_capabilities("gpt-4", required)
246
+ False
247
+ """
248
+ if not required_capabilities:
249
+ return True
250
+
251
+ model_caps = get_model_output_capabilities(model_name)
252
+ model_caps_set = set(model_caps)
253
+ required_set = set(required_capabilities)
254
+
255
+ return required_set.issubset(model_caps_set)
256
+
257
+
258
+ def filter_models_by_capabilities(
259
+ models: List[str],
260
+ input_capabilities: Optional[List[ModelInputCapability]] = None,
261
+ output_capabilities: Optional[List[ModelOutputCapability]] = None
262
+ ) -> List[str]:
263
+ """
264
+ Filter a list of models based on input and output capability requirements.
265
+
266
+ Args:
267
+ models: List of model names to filter
268
+ input_capabilities: Required input capabilities (None = no filtering)
269
+ output_capabilities: Required output capabilities (None = no filtering)
270
+
271
+ Returns:
272
+ Filtered list of model names that match all requirements
273
+
274
+ Examples:
275
+ >>> models = ["gpt-4", "gpt-4-vision-preview", "text-embedding-3-small"]
276
+ >>>
277
+ >>> # Get vision models
278
+ >>> vision_models = filter_models_by_capabilities(
279
+ ... models,
280
+ ... input_capabilities=[ModelInputCapability.IMAGE]
281
+ ... )
282
+ >>> print(vision_models)
283
+ ['gpt-4-vision-preview']
284
+ >>>
285
+ >>> # Get embedding models
286
+ >>> embedding_models = filter_models_by_capabilities(
287
+ ... models,
288
+ ... output_capabilities=[ModelOutputCapability.EMBEDDINGS]
289
+ ... )
290
+ >>> print(embedding_models)
291
+ ['text-embedding-3-small']
292
+ >>>
293
+ >>> # Get text generation models
294
+ >>> text_models = filter_models_by_capabilities(
295
+ ... models,
296
+ ... output_capabilities=[ModelOutputCapability.TEXT]
297
+ ... )
298
+ >>> print(text_models)
299
+ ['gpt-4', 'gpt-4-vision-preview']
300
+ """
301
+ filtered_models = []
302
+
303
+ for model_name in models:
304
+ try:
305
+ # Check input capabilities
306
+ if input_capabilities and not model_matches_input_capabilities(model_name, input_capabilities):
307
+ continue
308
+
309
+ # Check output capabilities
310
+ if output_capabilities and not model_matches_output_capabilities(model_name, output_capabilities):
311
+ continue
312
+
313
+ filtered_models.append(model_name)
314
+ except Exception:
315
+ # If we can't get capabilities, skip this model
316
+ # (it likely doesn't have an entry in model_capabilities.json)
317
+ continue
318
+
319
+ return filtered_models
320
+
321
+
322
+ def get_capability_summary(model_name: str) -> Dict[str, Any]:
323
+ """
324
+ Get a comprehensive summary of a model's input and output capabilities.
325
+
326
+ Args:
327
+ model_name: Name of the model to analyze
328
+
329
+ Returns:
330
+ Dictionary containing input and output capabilities
331
+
332
+ Examples:
333
+ >>> summary = get_capability_summary("gpt-4-vision-preview")
334
+ >>> print(summary)
335
+ {
336
+ 'model_name': 'gpt-4-vision-preview',
337
+ 'input_capabilities': ['text', 'image'],
338
+ 'output_capabilities': ['text'],
339
+ 'is_multimodal': True,
340
+ 'is_embedding_model': False
341
+ }
342
+ """
343
+ input_caps = get_model_input_capabilities(model_name)
344
+ output_caps = get_model_output_capabilities(model_name)
345
+
346
+ return {
347
+ 'model_name': model_name,
348
+ 'input_capabilities': [cap.value for cap in input_caps],
349
+ 'output_capabilities': [cap.value for cap in output_caps],
350
+ 'is_multimodal': len(input_caps) > 1,
351
+ 'is_embedding_model': ModelOutputCapability.EMBEDDINGS in output_caps
352
+ }