abstractcore 2.5.2__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. abstractcore/__init__.py +12 -0
  2. abstractcore/architectures/detection.py +250 -4
  3. abstractcore/assets/architecture_formats.json +14 -1
  4. abstractcore/assets/model_capabilities.json +533 -10
  5. abstractcore/compression/__init__.py +29 -0
  6. abstractcore/compression/analytics.py +420 -0
  7. abstractcore/compression/cache.py +250 -0
  8. abstractcore/compression/config.py +279 -0
  9. abstractcore/compression/exceptions.py +30 -0
  10. abstractcore/compression/glyph_processor.py +381 -0
  11. abstractcore/compression/optimizer.py +388 -0
  12. abstractcore/compression/orchestrator.py +380 -0
  13. abstractcore/compression/pil_text_renderer.py +818 -0
  14. abstractcore/compression/quality.py +226 -0
  15. abstractcore/compression/text_formatter.py +666 -0
  16. abstractcore/compression/vision_compressor.py +371 -0
  17. abstractcore/config/main.py +64 -0
  18. abstractcore/config/manager.py +100 -5
  19. abstractcore/core/session.py +61 -6
  20. abstractcore/events/__init__.py +1 -1
  21. abstractcore/media/auto_handler.py +312 -18
  22. abstractcore/media/handlers/local_handler.py +14 -2
  23. abstractcore/media/handlers/openai_handler.py +62 -3
  24. abstractcore/media/processors/__init__.py +11 -1
  25. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  26. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  27. abstractcore/media/processors/image_processor.py +7 -1
  28. abstractcore/media/processors/text_processor.py +18 -3
  29. abstractcore/media/types.py +164 -7
  30. abstractcore/providers/__init__.py +18 -0
  31. abstractcore/providers/anthropic_provider.py +28 -2
  32. abstractcore/providers/base.py +278 -6
  33. abstractcore/providers/huggingface_provider.py +563 -23
  34. abstractcore/providers/lmstudio_provider.py +38 -2
  35. abstractcore/providers/mlx_provider.py +27 -2
  36. abstractcore/providers/model_capabilities.py +352 -0
  37. abstractcore/providers/ollama_provider.py +38 -4
  38. abstractcore/providers/openai_provider.py +28 -2
  39. abstractcore/providers/registry.py +85 -13
  40. abstractcore/server/app.py +91 -81
  41. abstractcore/utils/__init__.py +4 -1
  42. abstractcore/utils/trace_export.py +287 -0
  43. abstractcore/utils/version.py +1 -1
  44. abstractcore/utils/vlm_token_calculator.py +655 -0
  45. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/METADATA +107 -6
  46. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/RECORD +50 -33
  47. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
  48. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +0 -0
  49. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
  50. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
@@ -207,15 +207,51 @@ FILE_TYPE_MAPPINGS = {
207
207
  'gif': MediaType.IMAGE, 'bmp': MediaType.IMAGE, 'tif': MediaType.IMAGE,
208
208
  'tiff': MediaType.IMAGE, 'webp': MediaType.IMAGE, 'ico': MediaType.IMAGE,
209
209
 
210
- # Documents
210
+ # Documents (binary formats that need special processing)
211
211
  'pdf': MediaType.DOCUMENT, 'doc': MediaType.DOCUMENT, 'docx': MediaType.DOCUMENT,
212
212
  'xls': MediaType.DOCUMENT, 'xlsx': MediaType.DOCUMENT, 'ppt': MediaType.DOCUMENT,
213
213
  'pptx': MediaType.DOCUMENT, 'odt': MediaType.DOCUMENT, 'rtf': MediaType.DOCUMENT,
214
214
 
215
- # Text formats
216
- 'txt': MediaType.TEXT, 'md': MediaType.TEXT, 'csv': MediaType.TEXT,
217
- 'tsv': MediaType.TEXT, 'json': MediaType.TEXT, 'xml': MediaType.TEXT,
218
- 'html': MediaType.TEXT, 'htm': MediaType.TEXT,
215
+ # Text formats - Common markup and data formats
216
+ 'txt': MediaType.TEXT, 'md': MediaType.TEXT, 'markdown': MediaType.TEXT,
217
+ 'csv': MediaType.TEXT, 'tsv': MediaType.TEXT,
218
+ 'json': MediaType.TEXT, 'jsonl': MediaType.TEXT, 'ndjson': MediaType.TEXT,
219
+ 'xml': MediaType.TEXT, 'html': MediaType.TEXT, 'htm': MediaType.TEXT,
220
+ 'yaml': MediaType.TEXT, 'yml': MediaType.TEXT, 'toml': MediaType.TEXT,
221
+ 'ini': MediaType.TEXT, 'cfg': MediaType.TEXT, 'conf': MediaType.TEXT,
222
+
223
+ # Text formats - Programming and scripting languages
224
+ 'py': MediaType.TEXT, 'pyw': MediaType.TEXT, 'pyx': MediaType.TEXT,
225
+ 'js': MediaType.TEXT, 'jsx': MediaType.TEXT, 'ts': MediaType.TEXT, 'tsx': MediaType.TEXT,
226
+ 'java': MediaType.TEXT, 'kt': MediaType.TEXT, 'scala': MediaType.TEXT,
227
+ 'c': MediaType.TEXT, 'cpp': MediaType.TEXT, 'cc': MediaType.TEXT, 'cxx': MediaType.TEXT,
228
+ 'h': MediaType.TEXT, 'hpp': MediaType.TEXT, 'hxx': MediaType.TEXT,
229
+ 'cs': MediaType.TEXT, 'go': MediaType.TEXT, 'rs': MediaType.TEXT, 'swift': MediaType.TEXT,
230
+ 'rb': MediaType.TEXT, 'php': MediaType.TEXT, 'pl': MediaType.TEXT, 'pm': MediaType.TEXT,
231
+ 'sh': MediaType.TEXT, 'bash': MediaType.TEXT, 'zsh': MediaType.TEXT, 'fish': MediaType.TEXT,
232
+ 'r': MediaType.TEXT, 'R': MediaType.TEXT, 'rmd': MediaType.TEXT, 'Rmd': MediaType.TEXT,
233
+ 'jl': MediaType.TEXT, 'matlab': MediaType.TEXT, 'm': MediaType.TEXT,
234
+ 'sql': MediaType.TEXT, 'lua': MediaType.TEXT, 'vim': MediaType.TEXT,
235
+ 'dart': MediaType.TEXT, 'ex': MediaType.TEXT, 'exs': MediaType.TEXT,
236
+ 'erl': MediaType.TEXT, 'hrl': MediaType.TEXT, 'clj': MediaType.TEXT, 'cljs': MediaType.TEXT,
237
+
238
+ # Text formats - Notebooks and documentation
239
+ 'ipynb': MediaType.TEXT, 'qmd': MediaType.TEXT, 'rst': MediaType.TEXT,
240
+ 'tex': MediaType.TEXT, 'latex': MediaType.TEXT, 'bib': MediaType.TEXT,
241
+ 'org': MediaType.TEXT, 'adoc': MediaType.TEXT, 'asciidoc': MediaType.TEXT,
242
+
243
+ # Text formats - Web and styles
244
+ 'css': MediaType.TEXT, 'scss': MediaType.TEXT, 'sass': MediaType.TEXT, 'less': MediaType.TEXT,
245
+ 'vue': MediaType.TEXT, 'svelte': MediaType.TEXT,
246
+
247
+ # Text formats - Build and config files
248
+ 'gradle': MediaType.TEXT, 'cmake': MediaType.TEXT, 'make': MediaType.TEXT,
249
+ 'dockerfile': MediaType.TEXT, 'containerfile': MediaType.TEXT,
250
+ 'gitignore': MediaType.TEXT, 'gitattributes': MediaType.TEXT,
251
+ 'env': MediaType.TEXT, 'properties': MediaType.TEXT,
252
+
253
+ # Text formats - Log and output files
254
+ 'log': MediaType.TEXT, 'out': MediaType.TEXT, 'err': MediaType.TEXT,
219
255
 
220
256
  # Audio
221
257
  'mp3': MediaType.AUDIO, 'wav': MediaType.AUDIO, 'm4a': MediaType.AUDIO,
@@ -227,9 +263,66 @@ FILE_TYPE_MAPPINGS = {
227
263
  }
228
264
 
229
265
 
266
+ def is_text_file(file_path: Union[str, Path]) -> bool:
267
+ """
268
+ Detect if a file is text-based by attempting to read it.
269
+
270
+ This is a heuristic check that samples the beginning of the file
271
+ to determine if it contains text content.
272
+
273
+ Args:
274
+ file_path: Path to the file
275
+
276
+ Returns:
277
+ True if file appears to be text-based, False otherwise
278
+ """
279
+ path = Path(file_path)
280
+
281
+ if not path.exists():
282
+ return False
283
+
284
+ # Check file size - avoid reading very large files
285
+ try:
286
+ file_size = path.stat().st_size
287
+ if file_size == 0:
288
+ return True # Empty files are text
289
+
290
+ # Sample first 8KB to detect if it's text
291
+ sample_size = min(8192, file_size)
292
+
293
+ with open(path, 'rb') as f:
294
+ sample = f.read(sample_size)
295
+
296
+ # Check for null bytes (strong indicator of binary)
297
+ if b'\x00' in sample:
298
+ return False
299
+
300
+ # Try to decode as UTF-8
301
+ try:
302
+ sample.decode('utf-8')
303
+ return True
304
+ except UnicodeDecodeError:
305
+ pass
306
+
307
+ # Try other common encodings
308
+ for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
309
+ try:
310
+ sample.decode(encoding)
311
+ return True
312
+ except (UnicodeDecodeError, LookupError):
313
+ continue
314
+
315
+ # If we can't decode it, it's probably binary
316
+ return False
317
+
318
+ except Exception:
319
+ # On any error, assume it's not text
320
+ return False
321
+
322
+
230
323
  def detect_media_type(file_path: Union[str, Path]) -> MediaType:
231
324
  """
232
- Detect the media type of a file based on its extension.
325
+ Detect the media type of a file based on its extension and content.
233
326
 
234
327
  Args:
235
328
  file_path: Path to the file
@@ -240,7 +333,71 @@ def detect_media_type(file_path: Union[str, Path]) -> MediaType:
240
333
  path = Path(file_path)
241
334
  extension = path.suffix.lower().lstrip('.')
242
335
 
243
- return FILE_TYPE_MAPPINGS.get(extension, MediaType.DOCUMENT)
336
+ # First check the known extension mappings
337
+ if extension in FILE_TYPE_MAPPINGS:
338
+ return FILE_TYPE_MAPPINGS[extension]
339
+
340
+ # For unknown extensions, try to detect if it's a text file
341
+ # This handles cases like .R, .Rmd, .ipynb, and any other text-based files
342
+ if is_text_file(path):
343
+ return MediaType.TEXT
344
+
345
+ # Fall back to DOCUMENT for binary files with unknown extensions
346
+ return MediaType.DOCUMENT
347
+
348
+
349
+ def get_all_supported_extensions() -> Dict[str, List[str]]:
350
+ """
351
+ Get all supported file extensions organized by media type.
352
+
353
+ This function provides programmatic access to all file extensions
354
+ that AbstractCore can process.
355
+
356
+ Returns:
357
+ Dictionary mapping media type names to lists of supported extensions.
358
+
359
+ Example:
360
+ >>> from abstractcore.media.types import get_all_supported_extensions
361
+ >>> formats = get_all_supported_extensions()
362
+ >>> print(f"Text formats: {len(formats['text'])} extensions")
363
+ Text formats: 70+ extensions
364
+ >>> print(formats['text'][:5])
365
+ ['txt', 'md', 'markdown', 'csv', 'tsv']
366
+ """
367
+ result = {}
368
+ for ext, media_type in FILE_TYPE_MAPPINGS.items():
369
+ type_name = media_type.value
370
+ if type_name not in result:
371
+ result[type_name] = []
372
+ result[type_name].append(ext)
373
+
374
+ # Sort extensions within each type for consistency
375
+ for type_name in result:
376
+ result[type_name].sort()
377
+
378
+ return result
379
+
380
+
381
+ def get_supported_extensions_by_type(media_type: MediaType) -> List[str]:
382
+ """
383
+ Get all supported file extensions for a specific media type.
384
+
385
+ Args:
386
+ media_type: The MediaType to query
387
+
388
+ Returns:
389
+ List of file extensions (without dots) supported for this type
390
+
391
+ Example:
392
+ >>> from abstractcore.media.types import get_supported_extensions_by_type, MediaType
393
+ >>> text_exts = get_supported_extensions_by_type(MediaType.TEXT)
394
+ >>> 'r' in text_exts # R scripts
395
+ True
396
+ >>> 'ipynb' in text_exts # Jupyter notebooks
397
+ True
398
+ """
399
+ extensions = [ext for ext, mt in FILE_TYPE_MAPPINGS.items() if mt == media_type]
400
+ return sorted(extensions)
244
401
 
245
402
 
246
403
  def create_media_content(
@@ -22,6 +22,16 @@ from .registry import (
22
22
  get_available_models_for_provider
23
23
  )
24
24
 
25
+ # Model capability filtering (new system)
26
+ from .model_capabilities import (
27
+ ModelInputCapability,
28
+ ModelOutputCapability,
29
+ get_model_input_capabilities,
30
+ get_model_output_capabilities,
31
+ filter_models_by_capabilities,
32
+ get_capability_summary
33
+ )
34
+
25
35
  __all__ = [
26
36
  # Provider classes
27
37
  'BaseProvider',
@@ -43,4 +53,12 @@ __all__ = [
43
53
  'get_all_providers_status',
44
54
  'create_provider',
45
55
  'get_available_models_for_provider',
56
+
57
+ # Model capability filtering (new system)
58
+ 'ModelInputCapability',
59
+ 'ModelOutputCapability',
60
+ 'get_model_input_capabilities',
61
+ 'get_model_output_capabilities',
62
+ 'filter_models_by_capabilities',
63
+ 'get_capability_summary',
46
64
  ]
@@ -455,9 +455,21 @@ class AnthropicProvider(BaseProvider):
455
455
  # Create new client with updated timeout
456
456
  self.client = anthropic.Anthropic(api_key=self.api_key, timeout=self._timeout)
457
457
  def list_available_models(self, **kwargs) -> List[str]:
458
- """List available models from Anthropic API."""
458
+ """
459
+ List available models from Anthropic API.
460
+
461
+ Args:
462
+ **kwargs: Optional parameters including:
463
+ - api_key: Anthropic API key
464
+ - input_capabilities: List of ModelInputCapability enums to filter by input capability
465
+ - output_capabilities: List of ModelOutputCapability enums to filter by output capability
466
+
467
+ Returns:
468
+ List of model names, optionally filtered by capabilities
469
+ """
459
470
  try:
460
471
  import httpx
472
+ from .model_capabilities import filter_models_by_capabilities
461
473
 
462
474
  # Use provided API key or instance API key
463
475
  api_key = kwargs.get('api_key', self.api_key)
@@ -481,7 +493,21 @@ class AnthropicProvider(BaseProvider):
481
493
  data = response.json()
482
494
  models = [model["id"] for model in data.get("data", [])]
483
495
  self.logger.debug(f"Retrieved {len(models)} models from Anthropic API")
484
- return sorted(models, reverse=True) # Latest models first
496
+ models = sorted(models, reverse=True) # Latest models first
497
+
498
+ # Apply new capability filtering if provided
499
+ input_capabilities = kwargs.get('input_capabilities')
500
+ output_capabilities = kwargs.get('output_capabilities')
501
+
502
+ if input_capabilities or output_capabilities:
503
+ models = filter_models_by_capabilities(
504
+ models,
505
+ input_capabilities=input_capabilities,
506
+ output_capabilities=output_capabilities
507
+ )
508
+
509
+
510
+ return models
485
511
  else:
486
512
  self.logger.warning(f"Anthropic API returned status {response.status_code}")
487
513
  return []
@@ -3,6 +3,8 @@ Base provider with integrated telemetry, events, and exception handling.
3
3
  """
4
4
 
5
5
  import time
6
+ import uuid
7
+ from collections import deque
6
8
  from typing import List, Dict, Any, Optional, Union, Iterator, Type
7
9
  from abc import ABC, abstractmethod
8
10
 
@@ -67,6 +69,13 @@ class BaseProvider(AbstractCoreInterface, ABC):
67
69
 
68
70
  # Create provider key for circuit breaker tracking
69
71
  self.provider_key = f"{self.__class__.__name__}:{self.model}"
72
+
73
+ # Setup Glyph compression configuration
74
+ self.glyph_config = kwargs.get('glyph_config', None)
75
+
76
+ # Setup interaction tracing
77
+ self.enable_tracing = kwargs.get('enable_tracing', False)
78
+ self._traces = deque(maxlen=kwargs.get('max_traces', 100)) # Ring buffer for memory efficiency
70
79
 
71
80
  # Provider created successfully - no event emission needed
72
81
  # (The simplified event system focuses on generation and tool events only)
@@ -173,6 +182,97 @@ class BaseProvider(AbstractCoreInterface, ABC):
173
182
  result_info = f" (result length: {len(str(result))})" if result else ""
174
183
  self.logger.info(f"Tool call completed: {tool_name}{result_info}")
175
184
 
185
+ def _capture_trace(self, prompt: str, messages: Optional[List[Dict[str, str]]],
186
+ system_prompt: Optional[str], tools: Optional[List[Dict[str, Any]]],
187
+ response: GenerateResponse, kwargs: Dict[str, Any]) -> str:
188
+ """
189
+ Capture interaction trace for observability.
190
+
191
+ Args:
192
+ prompt: Input prompt
193
+ messages: Conversation history
194
+ system_prompt: System prompt
195
+ tools: Available tools
196
+ response: Generated response
197
+ kwargs: Additional generation parameters
198
+
199
+ Returns:
200
+ Trace ID (UUID string)
201
+ """
202
+ trace_id = str(uuid.uuid4())
203
+
204
+ # Extract generation parameters
205
+ temperature = kwargs.get('temperature', self.temperature)
206
+ max_tokens = kwargs.get('max_tokens', self.max_tokens)
207
+ max_output_tokens = kwargs.get('max_output_tokens', self.max_output_tokens)
208
+ seed = kwargs.get('seed', self.seed)
209
+ top_p = kwargs.get('top_p', getattr(self, 'top_p', None))
210
+ top_k = kwargs.get('top_k', getattr(self, 'top_k', None))
211
+
212
+ # Build parameters dict
213
+ parameters = {
214
+ 'temperature': temperature,
215
+ 'max_tokens': max_tokens,
216
+ 'max_output_tokens': max_output_tokens,
217
+ }
218
+ if seed is not None:
219
+ parameters['seed'] = seed
220
+ if top_p is not None:
221
+ parameters['top_p'] = top_p
222
+ if top_k is not None:
223
+ parameters['top_k'] = top_k
224
+
225
+ # Create trace record
226
+ trace = {
227
+ 'trace_id': trace_id,
228
+ 'timestamp': datetime.now().isoformat(),
229
+ 'provider': self.__class__.__name__,
230
+ 'model': self.model,
231
+ 'system_prompt': system_prompt,
232
+ 'prompt': prompt,
233
+ 'messages': messages,
234
+ 'tools': tools,
235
+ 'parameters': parameters,
236
+ 'response': {
237
+ 'content': response.content,
238
+ 'raw_response': None, # Omit raw_response to save memory and avoid logging sensitive data
239
+ 'tool_calls': response.tool_calls,
240
+ 'finish_reason': response.finish_reason,
241
+ 'usage': response.usage,
242
+ 'generation_time_ms': response.gen_time,
243
+ },
244
+ 'metadata': kwargs.get('trace_metadata', {})
245
+ }
246
+
247
+ # Store trace in ring buffer
248
+ self._traces.append(trace)
249
+
250
+ return trace_id
251
+
252
+ def get_traces(self, trace_id: Optional[str] = None, last_n: Optional[int] = None) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
253
+ """
254
+ Retrieve interaction traces.
255
+
256
+ Args:
257
+ trace_id: Optional specific trace ID to retrieve
258
+ last_n: Optional number of most recent traces to retrieve
259
+
260
+ Returns:
261
+ Single trace dict if trace_id provided, list of traces otherwise
262
+ """
263
+ if trace_id:
264
+ # Find specific trace by ID
265
+ for trace in self._traces:
266
+ if trace['trace_id'] == trace_id:
267
+ return trace
268
+ return None
269
+
270
+ if last_n:
271
+ # Return last N traces
272
+ return list(self._traces)[-last_n:] if len(self._traces) >= last_n else list(self._traces)
273
+
274
+ # Return all traces
275
+ return list(self._traces)
176
276
 
177
277
  def _handle_api_error(self, error: Exception) -> Exception:
178
278
  """
@@ -211,6 +311,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
211
311
  retry_strategy=None, # Custom retry strategy for structured output
212
312
  tool_call_tags: Optional[str] = None, # Tool call tag rewriting
213
313
  execute_tools: Optional[bool] = None, # Tool execution control
314
+ glyph_compression: Optional[str] = None, # Glyph compression preference
214
315
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse], BaseModel]:
215
316
  """
216
317
  Generate with integrated telemetry and error handling.
@@ -227,6 +328,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
227
328
  retry_strategy: Optional retry strategy for structured output validation
228
329
  tool_call_tags: Optional tool call tag format for rewriting
229
330
  execute_tools: Whether to execute tools automatically (True) or let agent handle execution (False)
331
+ glyph_compression: Glyph compression preference ("auto", "always", "never")
230
332
  """
231
333
  # Handle structured output request
232
334
  if response_model is not None:
@@ -269,8 +371,17 @@ class BaseProvider(AbstractCoreInterface, ABC):
269
371
 
270
372
  # Process media content if provided
271
373
  processed_media = None
374
+ media_metadata = None
272
375
  if media:
273
- processed_media = self._process_media_content(media)
376
+ compression_pref = glyph_compression or kwargs.get('glyph_compression', 'auto')
377
+ processed_media = self._process_media_content(media, compression_pref)
378
+
379
+ # Extract metadata from processed media for response
380
+ if processed_media:
381
+ media_metadata = []
382
+ for media_content in processed_media:
383
+ if hasattr(media_content, 'metadata') and media_content.metadata:
384
+ media_metadata.append(media_content.metadata)
274
385
 
275
386
  # Convert tools to ToolDefinition objects first (outside retry loop)
276
387
  converted_tools = None
@@ -327,6 +438,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
327
438
  stream=stream,
328
439
  execute_tools=should_execute_tools,
329
440
  tool_call_tags=tool_call_tags,
441
+ media_metadata=media_metadata,
330
442
  **kwargs
331
443
  )
332
444
 
@@ -380,6 +492,26 @@ class BaseProvider(AbstractCoreInterface, ABC):
380
492
  # Apply default qwen3 rewriting for non-streaming responses
381
493
  response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
382
494
 
495
+ # Add visual token calculation if media metadata is available
496
+ if media_metadata and response:
497
+ self.logger.debug(f"Enhancing response with visual tokens from {len(media_metadata)} media items")
498
+ response = self._enhance_response_with_visual_tokens(response, media_metadata)
499
+
500
+ # Capture interaction trace if enabled
501
+ if self.enable_tracing and response:
502
+ trace_id = self._capture_trace(
503
+ prompt=prompt,
504
+ messages=messages,
505
+ system_prompt=system_prompt,
506
+ tools=converted_tools,
507
+ response=response,
508
+ kwargs=kwargs
509
+ )
510
+ # Attach trace_id to response metadata
511
+ if not response.metadata:
512
+ response.metadata = {}
513
+ response.metadata['trace_id'] = trace_id
514
+
383
515
  self._track_generation(prompt, response, start_time, success=True, stream=False)
384
516
  return response
385
517
 
@@ -411,6 +543,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
411
543
  stream: bool = False,
412
544
  response_model: Optional[Type[BaseModel]] = None,
413
545
  execute_tools: Optional[bool] = None,
546
+ media_metadata: Optional[List[Dict[str, Any]]] = None,
414
547
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
415
548
  """
416
549
  Internal generation method to be implemented by subclasses.
@@ -429,6 +562,102 @@ class BaseProvider(AbstractCoreInterface, ABC):
429
562
  """
430
563
  raise NotImplementedError("Subclasses must implement _generate_internal")
431
564
 
565
+ def _enhance_response_with_visual_tokens(self, response: GenerateResponse, media_metadata: List[Dict[str, Any]]) -> GenerateResponse:
566
+ """
567
+ Enhance the response with visual token calculations for Glyph compression.
568
+ This method is called automatically by BaseProvider for all providers.
569
+ """
570
+ try:
571
+ # Calculate visual tokens using VLM token calculator
572
+ provider_name = self.provider or self.__class__.__name__.lower().replace('provider', '')
573
+ self.logger.debug(f"Calculating visual tokens for provider={provider_name}, model={self.model}")
574
+
575
+ visual_tokens = self._calculate_visual_tokens(media_metadata, provider_name, self.model)
576
+ self.logger.debug(f"Calculated visual tokens: {visual_tokens}")
577
+
578
+ if visual_tokens > 0:
579
+ # Ensure response has metadata
580
+ if not response.metadata:
581
+ response.metadata = {}
582
+
583
+ # Add visual token information to metadata
584
+ response.metadata['visual_tokens'] = visual_tokens
585
+
586
+ # Ensure response has usage dict
587
+ if not response.usage:
588
+ response.usage = {}
589
+
590
+ # Add visual tokens to usage
591
+ response.usage['visual_tokens'] = visual_tokens
592
+
593
+ # Update total tokens to include visual tokens
594
+ original_total = response.usage.get('total_tokens', 0)
595
+ response.usage['total_tokens'] = original_total + visual_tokens
596
+
597
+ self.logger.info(f"Enhanced response with {visual_tokens} visual tokens (new total: {response.usage['total_tokens']})")
598
+ else:
599
+ self.logger.debug("No visual tokens calculated - skipping enhancement")
600
+
601
+ except Exception as e:
602
+ self.logger.warning(f"Failed to enhance response with visual tokens: {e}")
603
+
604
+ return response
605
+
606
+ def _calculate_visual_tokens(self, media_metadata: List[Dict[str, Any]], provider: str, model: str) -> int:
607
+ """Calculate visual tokens from media metadata using VLM token calculator."""
608
+ try:
609
+ from ..utils.vlm_token_calculator import VLMTokenCalculator
610
+ from pathlib import Path
611
+
612
+ calculator = VLMTokenCalculator()
613
+ total_visual_tokens = 0
614
+
615
+ self.logger.debug(f"Processing {len(media_metadata)} media metadata items")
616
+
617
+ for i, metadata in enumerate(media_metadata):
618
+ self.logger.debug(f"Metadata {i}: processing_method={metadata.get('processing_method')}")
619
+
620
+ # Check if this is Glyph compression
621
+ if metadata.get('processing_method') == 'direct_pdf_conversion':
622
+ glyph_cache_dir = metadata.get('glyph_cache_dir')
623
+ total_images = metadata.get('total_images', 0)
624
+
625
+ self.logger.debug(f"Glyph metadata found: cache_dir={glyph_cache_dir}, total_images={total_images}")
626
+
627
+ if glyph_cache_dir and Path(glyph_cache_dir).exists():
628
+ # Get actual image paths
629
+ cache_dir = Path(glyph_cache_dir)
630
+ image_paths = list(cache_dir.glob("image_*.png"))
631
+
632
+ self.logger.debug(f"Found {len(image_paths)} images in cache directory")
633
+
634
+ if image_paths:
635
+ # Calculate tokens for all images
636
+ token_analysis = calculator.calculate_tokens_for_images(
637
+ image_paths=image_paths,
638
+ provider=provider,
639
+ model=model
640
+ )
641
+ total_visual_tokens += token_analysis['total_tokens']
642
+
643
+ self.logger.debug(f"Calculated {token_analysis['total_tokens']} visual tokens for {len(image_paths)} Glyph images")
644
+ else:
645
+ # Fallback: estimate based on total_images
646
+ base_tokens = calculator.PROVIDER_CONFIGS.get(provider, {}).get('base_tokens', 512)
647
+ estimated_tokens = total_images * base_tokens
648
+ total_visual_tokens += estimated_tokens
649
+
650
+ self.logger.debug(f"Estimated {estimated_tokens} visual tokens for {total_images} Glyph images (fallback)")
651
+ else:
652
+ self.logger.debug(f"Cache directory not found or doesn't exist: {glyph_cache_dir}")
653
+
654
+ self.logger.debug(f"Total visual tokens calculated: {total_visual_tokens}")
655
+ return total_visual_tokens
656
+
657
+ except Exception as e:
658
+ self.logger.warning(f"Failed to calculate visual tokens: {e}")
659
+ return 0
660
+
432
661
  def _initialize_token_limits(self):
433
662
  """Initialize default token limits based on model capabilities"""
434
663
  # Set default max_tokens if not provided
@@ -805,12 +1034,14 @@ class BaseProvider(AbstractCoreInterface, ABC):
805
1034
  """Rough estimation of token count for given text"""
806
1035
  return super().estimate_tokens(text)
807
1036
 
808
- def _process_media_content(self, media: List[Union[str, Dict[str, Any], 'MediaContent']]) -> List['MediaContent']:
1037
+ def _process_media_content(self, media: List[Union[str, Dict[str, Any], 'MediaContent']],
1038
+ glyph_compression: str = "auto") -> List['MediaContent']:
809
1039
  """
810
1040
  Process media content from various input formats into standardized MediaContent objects.
811
1041
 
812
1042
  Args:
813
1043
  media: List of media inputs (file paths, MediaContent objects, or dicts)
1044
+ glyph_compression: Glyph compression preference (auto, always, never)
814
1045
 
815
1046
  Returns:
816
1047
  List of processed MediaContent objects
@@ -838,8 +1069,16 @@ class BaseProvider(AbstractCoreInterface, ABC):
838
1069
  try:
839
1070
  if isinstance(media_item, str):
840
1071
  # File path - process with auto media handler
841
- handler = AutoMediaHandler()
842
- result = handler.process_file(media_item)
1072
+ handler = AutoMediaHandler(
1073
+ enable_glyph_compression=True,
1074
+ glyph_config=getattr(self, 'glyph_config', None)
1075
+ )
1076
+ result = handler.process_file(
1077
+ media_item,
1078
+ provider=self.provider,
1079
+ model=self.model,
1080
+ glyph_compression=glyph_compression
1081
+ )
843
1082
  if result.success:
844
1083
  processed_media.append(result.media_content)
845
1084
  else:
@@ -881,14 +1120,47 @@ class BaseProvider(AbstractCoreInterface, ABC):
881
1120
  The server will use this method to aggregate models across all providers.
882
1121
 
883
1122
  Args:
884
- **kwargs: Provider-specific parameters (e.g., api_key, base_url)
1123
+ **kwargs: Provider-specific parameters including:
1124
+ - api_key: API key for authentication (if required)
1125
+ - base_url: Base URL for API endpoint (if applicable)
1126
+ - input_capabilities: Optional list of ModelInputCapability enums to filter by input capability
1127
+ (e.g., [ModelInputCapability.IMAGE] for vision models)
1128
+ - output_capabilities: Optional list of ModelOutputCapability enums to filter by output capability
1129
+ (e.g., [ModelOutputCapability.EMBEDDINGS] for embedding models)
885
1130
 
886
1131
  Returns:
887
- List of model names available for this provider
1132
+ List of model names available for this provider, optionally filtered by capabilities
1133
+
1134
+ Examples:
1135
+ >>> from abstractcore.providers import OpenAIProvider
1136
+ >>> from abstractcore.providers.model_capabilities import ModelInputCapability, ModelOutputCapability
1137
+ >>>
1138
+ >>> # Get all models
1139
+ >>> all_models = OpenAIProvider.list_available_models(api_key="...")
1140
+ >>>
1141
+ >>> # Get models that can analyze images
1142
+ >>> vision_models = OpenAIProvider.list_available_models(
1143
+ ... api_key="...",
1144
+ ... input_capabilities=[ModelInputCapability.IMAGE]
1145
+ ... )
1146
+ >>>
1147
+ >>> # Get embedding models
1148
+ >>> embedding_models = OpenAIProvider.list_available_models(
1149
+ ... api_key="...",
1150
+ ... output_capabilities=[ModelOutputCapability.EMBEDDINGS]
1151
+ ... )
1152
+ >>>
1153
+ >>> # Get vision models that generate text (most common case)
1154
+ >>> vision_text_models = OpenAIProvider.list_available_models(
1155
+ ... api_key="...",
1156
+ ... input_capabilities=[ModelInputCapability.TEXT, ModelInputCapability.IMAGE],
1157
+ ... output_capabilities=[ModelOutputCapability.TEXT]
1158
+ ... )
888
1159
 
889
1160
  Note:
890
1161
  This is an abstract method that MUST be implemented by all provider subclasses.
891
1162
  Each provider should implement its own discovery logic (API calls, local scanning, etc.).
1163
+ Providers should apply the capability filters if provided in kwargs.
892
1164
  """
893
1165
  pass
894
1166