abstractcore 2.9.1__py3-none-any.whl → 2.11.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. abstractcore/__init__.py +7 -27
  2. abstractcore/apps/deepsearch.py +9 -4
  3. abstractcore/apps/extractor.py +33 -100
  4. abstractcore/apps/intent.py +19 -0
  5. abstractcore/apps/judge.py +20 -1
  6. abstractcore/apps/summarizer.py +20 -1
  7. abstractcore/architectures/detection.py +34 -1
  8. abstractcore/architectures/response_postprocessing.py +313 -0
  9. abstractcore/assets/architecture_formats.json +38 -8
  10. abstractcore/assets/model_capabilities.json +882 -160
  11. abstractcore/compression/__init__.py +1 -2
  12. abstractcore/compression/glyph_processor.py +6 -4
  13. abstractcore/config/main.py +52 -20
  14. abstractcore/config/manager.py +390 -12
  15. abstractcore/config/vision_config.py +5 -5
  16. abstractcore/core/interface.py +151 -3
  17. abstractcore/core/session.py +16 -10
  18. abstractcore/download.py +1 -1
  19. abstractcore/embeddings/manager.py +20 -6
  20. abstractcore/endpoint/__init__.py +2 -0
  21. abstractcore/endpoint/app.py +458 -0
  22. abstractcore/mcp/client.py +3 -1
  23. abstractcore/media/__init__.py +52 -17
  24. abstractcore/media/auto_handler.py +42 -22
  25. abstractcore/media/base.py +44 -1
  26. abstractcore/media/capabilities.py +12 -33
  27. abstractcore/media/enrichment.py +105 -0
  28. abstractcore/media/handlers/anthropic_handler.py +19 -28
  29. abstractcore/media/handlers/local_handler.py +124 -70
  30. abstractcore/media/handlers/openai_handler.py +19 -31
  31. abstractcore/media/processors/__init__.py +4 -2
  32. abstractcore/media/processors/audio_processor.py +57 -0
  33. abstractcore/media/processors/office_processor.py +8 -3
  34. abstractcore/media/processors/pdf_processor.py +46 -3
  35. abstractcore/media/processors/text_processor.py +22 -24
  36. abstractcore/media/processors/video_processor.py +58 -0
  37. abstractcore/media/types.py +97 -4
  38. abstractcore/media/utils/image_scaler.py +20 -2
  39. abstractcore/media/utils/video_frames.py +219 -0
  40. abstractcore/media/vision_fallback.py +136 -22
  41. abstractcore/processing/__init__.py +32 -3
  42. abstractcore/processing/basic_deepsearch.py +15 -10
  43. abstractcore/processing/basic_intent.py +3 -2
  44. abstractcore/processing/basic_judge.py +3 -2
  45. abstractcore/processing/basic_summarizer.py +1 -1
  46. abstractcore/providers/__init__.py +3 -1
  47. abstractcore/providers/anthropic_provider.py +95 -8
  48. abstractcore/providers/base.py +1516 -81
  49. abstractcore/providers/huggingface_provider.py +546 -69
  50. abstractcore/providers/lmstudio_provider.py +30 -916
  51. abstractcore/providers/mlx_provider.py +382 -35
  52. abstractcore/providers/model_capabilities.py +5 -1
  53. abstractcore/providers/ollama_provider.py +99 -15
  54. abstractcore/providers/openai_compatible_provider.py +406 -180
  55. abstractcore/providers/openai_provider.py +188 -44
  56. abstractcore/providers/openrouter_provider.py +76 -0
  57. abstractcore/providers/registry.py +61 -5
  58. abstractcore/providers/streaming.py +138 -33
  59. abstractcore/providers/vllm_provider.py +92 -817
  60. abstractcore/server/app.py +478 -28
  61. abstractcore/server/audio_endpoints.py +139 -0
  62. abstractcore/server/vision_endpoints.py +1319 -0
  63. abstractcore/structured/handler.py +316 -41
  64. abstractcore/tools/common_tools.py +5501 -2012
  65. abstractcore/tools/comms_tools.py +1641 -0
  66. abstractcore/tools/core.py +37 -7
  67. abstractcore/tools/handler.py +4 -9
  68. abstractcore/tools/parser.py +49 -2
  69. abstractcore/tools/tag_rewriter.py +2 -1
  70. abstractcore/tools/telegram_tdlib.py +407 -0
  71. abstractcore/tools/telegram_tools.py +261 -0
  72. abstractcore/utils/cli.py +1085 -72
  73. abstractcore/utils/structured_logging.py +29 -8
  74. abstractcore/utils/token_utils.py +2 -0
  75. abstractcore/utils/truncation.py +29 -0
  76. abstractcore/utils/version.py +3 -4
  77. abstractcore/utils/vlm_token_calculator.py +12 -2
  78. abstractcore-2.11.4.dist-info/METADATA +562 -0
  79. abstractcore-2.11.4.dist-info/RECORD +133 -0
  80. {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/WHEEL +1 -1
  81. {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/entry_points.txt +1 -0
  82. abstractcore-2.9.1.dist-info/METADATA +0 -1190
  83. abstractcore-2.9.1.dist-info/RECORD +0 -119
  84. {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/licenses/LICENSE +0 -0
  85. {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from typing import Dict, Any, Optional, List
12
12
 
13
13
  from .base import BaseMediaHandler
14
14
  from .types import MediaContent, MediaType, ContentFormat, detect_media_type
15
- from .processors import ImageProcessor, TextProcessor, PDFProcessor, OfficeProcessor
15
+ from .processors import ImageProcessor, TextProcessor, PDFProcessor, OfficeProcessor, AudioProcessor, VideoProcessor
16
16
  from ..exceptions import UnsupportedFeatureError
17
17
 
18
18
  # Import Glyph compression support
@@ -60,6 +60,7 @@ class AutoMediaHandler(BaseMediaHandler):
60
60
  self._text_processor = None
61
61
  self._pdf_processor = None
62
62
  self._office_processor = None
63
+ self._video_processor = None
63
64
 
64
65
  # Initialize Glyph compression support
65
66
  self._compression_orchestrator = None
@@ -98,20 +99,17 @@ class AutoMediaHandler(BaseMediaHandler):
98
99
  availability['office'] = True
99
100
  except ImportError:
100
101
  availability['office'] = False
102
+
103
+ # AudioProcessor (dependency-free)
104
+ availability['audio'] = True
105
+
106
+ # VideoProcessor (dependency-free)
107
+ availability['video'] = True
101
108
 
102
- # GlyphProcessor (requires reportlab and pdf2image)
103
- glyph_deps_available = True
104
- if GLYPH_AVAILABLE and self.enable_compression:
105
- # Check actual dependencies
106
- try:
107
- import reportlab
108
- import pdf2image
109
- except ImportError:
110
- glyph_deps_available = False
111
- else:
112
- glyph_deps_available = False
113
-
114
- availability['glyph'] = glyph_deps_available
109
+ # GlyphProcessor (PIL renderer). Requires Pillow at runtime.
110
+ availability['glyph'] = bool(
111
+ GLYPH_AVAILABLE and self.enable_compression and availability.get('image', False)
112
+ )
115
113
 
116
114
  return availability
117
115
 
@@ -138,6 +136,12 @@ class AutoMediaHandler(BaseMediaHandler):
138
136
  if self._office_processor is None:
139
137
  self._office_processor = OfficeProcessor(**self.processor_config)
140
138
  return self._office_processor
139
+
140
+ def _get_video_processor(self) -> VideoProcessor:
141
+ """Get or create VideoProcessor instance."""
142
+ if self._video_processor is None:
143
+ self._video_processor = VideoProcessor(**self.processor_config)
144
+ return self._video_processor
141
145
 
142
146
  def _get_compression_orchestrator(self) -> 'CompressionOrchestrator':
143
147
  """Get or create CompressionOrchestrator instance."""
@@ -195,7 +199,14 @@ class AutoMediaHandler(BaseMediaHandler):
195
199
  else:
196
200
  return self._get_text_processor()
197
201
 
198
- # Handle other media types (audio, video) - not yet implemented
202
+ # Handle audio
203
+ elif media_type == MediaType.AUDIO:
204
+ return AudioProcessor(**self.processor_config)
205
+ # Handle video
206
+ elif media_type == MediaType.VIDEO:
207
+ if self._available_processors.get('video', False):
208
+ return self._get_video_processor()
209
+ return None
199
210
  else:
200
211
  self.logger.warning(f"Media type {media_type.value} not yet supported")
201
212
  return None
@@ -368,19 +379,21 @@ class AutoMediaHandler(BaseMediaHandler):
368
379
  # Check dependencies
369
380
  missing_deps = []
370
381
  try:
371
- import reportlab
382
+ from PIL import Image # noqa: F401
372
383
  except ImportError:
373
- missing_deps.append("reportlab")
384
+ missing_deps.append("Pillow")
374
385
 
375
386
  try:
376
387
  import pdf2image
377
388
  except ImportError:
378
- missing_deps.append("pdf2image")
389
+ # Only required for the experimental direct PDF→image path.
390
+ missing_deps.append("pdf2image (optional)")
379
391
 
380
392
  if missing_deps:
381
393
  deps_str = ", ".join(missing_deps)
382
394
  self.logger.warning(f"Missing Glyph dependencies: {deps_str}")
383
- self.logger.warning(f"Install with: pip install {' '.join(missing_deps)}")
395
+ self.logger.warning("Install with: pip install \"abstractcore[compression]\" (Pillow renderer)")
396
+ self.logger.warning("Optional (PDF→image): pip install pdf2image (+ Poppler installed on your system)")
384
397
 
385
398
  if not self.enable_compression:
386
399
  self.logger.warning("Glyph compression is disabled in AutoMediaHandler configuration")
@@ -510,9 +523,9 @@ class AutoMediaHandler(BaseMediaHandler):
510
523
  elif media_type == MediaType.DOCUMENT:
511
524
  return True # Always supported via text processor at minimum
512
525
  elif media_type == MediaType.AUDIO:
513
- return False # Not yet implemented
526
+ return self._available_processors.get('audio', False)
514
527
  elif media_type == MediaType.VIDEO:
515
- return False # Not yet implemented
528
+ return self._available_processors.get('video', False)
516
529
  return False
517
530
 
518
531
  def supports_format(self, media_type: MediaType, format_ext: str) -> bool:
@@ -550,6 +563,13 @@ class AutoMediaHandler(BaseMediaHandler):
550
563
  # This allows processing of unknown document formats
551
564
  return True
552
565
 
566
+ elif media_type == MediaType.AUDIO:
567
+ # AudioProcessor is dependency-free in v0; accept common audio containers.
568
+ return format_ext.lower() in {'mp3', 'wav', 'm4a', 'ogg', 'flac', 'aac', 'webm'}
569
+ elif media_type == MediaType.VIDEO:
570
+ # VideoProcessor is dependency-free in v0; frame extraction fallback may require ffmpeg.
571
+ return format_ext.lower() in {'mp4', 'mov', 'mkv', 'webm', 'avi', 'wmv', 'm4v'}
572
+
553
573
  return False
554
574
 
555
575
  def get_supported_formats(self) -> Dict[str, List[str]]:
@@ -654,4 +674,4 @@ class AutoMediaHandler(BaseMediaHandler):
654
674
  else:
655
675
  # Basic estimation based on file size
656
676
  file_size_mb = file_path.stat().st_size / (1024 * 1024)
657
- return max(0.1, file_size_mb / 10.0) # ~10MB/second processing rate
677
+ return max(0.1, file_size_mb / 10.0) # ~10MB/second processing rate
@@ -453,4 +453,47 @@ class BaseProviderMediaHandler(BaseMediaHandler):
453
453
  Returns:
454
454
  True if provider can handle this content
455
455
  """
456
- return self.supports_media_type(media_content.media_type)
456
+ return self.supports_media_type(media_content.media_type)
457
+
458
+ def estimate_tokens_for_media(self, media_content: MediaContent) -> int:
459
+ """
460
+ Estimate token usage for media content.
461
+
462
+ Base implementation that uses pre-computed estimates when available.
463
+ Subclasses can override _estimate_image_tokens() for provider-specific
464
+ image token calculations.
465
+
466
+ Args:
467
+ media_content: MediaContent to estimate
468
+
469
+ Returns:
470
+ Estimated token count
471
+ """
472
+ if media_content.media_type == MediaType.IMAGE:
473
+ return self._estimate_image_tokens(media_content)
474
+
475
+ elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
476
+ # Use pre-computed estimate from processor if available (uses TokenUtils)
477
+ if 'estimated_tokens' in media_content.metadata:
478
+ return media_content.metadata['estimated_tokens']
479
+ # Fallback: rough estimation (~4 chars per token)
480
+ content_length = len(str(media_content.content))
481
+ return content_length // 4
482
+
483
+ return 0
484
+
485
+ def _estimate_image_tokens(self, media_content: MediaContent) -> int:
486
+ """
487
+ Estimate token usage for image content.
488
+
489
+ Override in subclasses for provider-specific image token calculations.
490
+ Default implementation returns a conservative estimate.
491
+
492
+ Args:
493
+ media_content: Image MediaContent to estimate
494
+
495
+ Returns:
496
+ Estimated token count for the image
497
+ """
498
+ # Conservative default - subclasses should override with provider-specific logic
499
+ return 512
@@ -66,11 +66,18 @@ class MediaCapabilities:
66
66
  caps = {}
67
67
 
68
68
  # Base capabilities from JSON
69
+ video_mode = caps.get('video_input_mode')
70
+ if isinstance(video_mode, str):
71
+ vm = video_mode.strip().lower()
72
+ video_support = vm in {'frames', 'native'}
73
+ else:
74
+ video_support = caps.get('video_support', False)
75
+
69
76
  instance = cls(
70
77
  model_name=model,
71
78
  vision_support=caps.get('vision_support', False),
72
79
  audio_support=caps.get('audio_support', False),
73
- video_support=caps.get('video_support', False),
80
+ video_support=video_support,
74
81
  image_resolutions=caps.get('image_resolutions', [])
75
82
  )
76
83
 
@@ -159,37 +166,9 @@ class MediaCapabilities:
159
166
  'text_embedding_preferred': self.text_embedding_preferred
160
167
  }
161
168
 
162
- def estimate_media_tokens(self, media_type: MediaType, content_size: int = 0) -> int:
163
- """
164
- Estimate token usage for media content.
165
-
166
- Args:
167
- media_type: Type of media
168
- content_size: Size of content in bytes (optional)
169
-
170
- Returns:
171
- Estimated token count
172
- """
173
- if not self.media_token_estimation:
174
- return 0
175
-
176
- if media_type == MediaType.IMAGE and self.vision_support:
177
- # Base token cost for images varies by model
178
- model_lower = self.model_name.lower()
179
- if 'gpt-4o' in model_lower:
180
- return 85 + (170 * 4) # Simplified GPT-4o calculation
181
- elif 'claude' in model_lower:
182
- return 1600 # Anthropic standard
183
- else:
184
- return 512 # Conservative estimate for local models
185
-
186
- elif media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
187
- # Text content token estimation
188
- if content_size > 0:
189
- return content_size // 4 # ~4 chars per token
190
- return 100 # Default estimate
191
-
192
- return 0
169
+ # Note: Token estimation is now handled by processors (TextProcessor, PDFProcessor, etc.)
170
+ # which add 'estimated_tokens' to MediaContent.metadata using TokenUtils.
171
+ # Handlers use BaseProviderMediaHandler.estimate_tokens_for_media() to retrieve it.
193
172
 
194
173
  def validate_media_content(self, media_type: MediaType, file_size: int = 0,
195
174
  format: str = None) -> tuple[bool, Optional[str]]:
@@ -332,4 +311,4 @@ def get_max_images(model: str, provider: str = None) -> int:
332
311
 
333
312
  def should_use_text_embedding(model: str, provider: str = None) -> bool:
334
313
  """Check if model prefers text embedding over multimodal messages."""
335
- return get_media_capabilities(model, provider).text_embedding_preferred
314
+ return get_media_capabilities(model, provider).text_embedding_preferred
@@ -0,0 +1,105 @@
1
+ """
2
+ Media enrichment metadata contract (v0).
3
+
4
+ This module defines a provider-agnostic, response-level transparency hook for
5
+ "input enrichment fallbacks" where non-text inputs (image/audio/video) are
6
+ converted into short grounded text context (caption/transcript/frames) so a
7
+ text-only model can proceed.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any, Dict, List, Literal, Optional, TypedDict
13
+
14
+
15
+ MEDIA_ENRICHMENT_KEY = "media_enrichment"
16
+
17
+ EnrichmentStatus = Literal["used", "skipped", "error"]
18
+ BackendKind = Literal["llm", "plugin", "local_model", "unknown"]
19
+
20
+
21
+ class EnrichmentBackend(TypedDict, total=False):
22
+ kind: BackendKind
23
+ provider: str
24
+ model: str
25
+ backend_id: str
26
+ source: str
27
+
28
+
29
+ class MediaEnrichmentItem(TypedDict, total=False):
30
+ status: EnrichmentStatus
31
+ input_modality: str # image|audio|video (string for forward-compat)
32
+ input_index: int
33
+ input_name: str
34
+ policy: str
35
+ summary_kind: str # caption|transcript|frames (string for forward-compat)
36
+ backend: EnrichmentBackend
37
+ injected_text: str
38
+ injected_chars: int
39
+ artifact: Dict[str, Any]
40
+ error: str
41
+
42
+
43
+ def build_enrichment_item(
44
+ *,
45
+ status: EnrichmentStatus,
46
+ input_modality: str,
47
+ summary_kind: str,
48
+ policy: Optional[str] = None,
49
+ backend: Optional[Dict[str, Any]] = None,
50
+ input_index: Optional[int] = None,
51
+ input_name: Optional[str] = None,
52
+ injected_text: Optional[str] = None,
53
+ artifact: Optional[Dict[str, Any]] = None,
54
+ error: Optional[str] = None,
55
+ ) -> MediaEnrichmentItem:
56
+ item: MediaEnrichmentItem = {
57
+ "status": status,
58
+ "input_modality": str(input_modality),
59
+ "summary_kind": str(summary_kind),
60
+ }
61
+ if isinstance(policy, str) and policy.strip():
62
+ item["policy"] = policy.strip()
63
+ if isinstance(backend, dict) and backend:
64
+ # Preserve only JSON-serializable primitives; callers should keep this small.
65
+ cleaned: Dict[str, Any] = {}
66
+ for k in ("kind", "provider", "model", "backend_id", "source"):
67
+ v = backend.get(k)
68
+ if isinstance(v, str) and v.strip():
69
+ cleaned[str(k)] = v.strip()
70
+ if cleaned:
71
+ item["backend"] = cleaned # type: ignore[assignment]
72
+ if isinstance(input_index, int):
73
+ item["input_index"] = int(input_index)
74
+ if isinstance(input_name, str) and input_name.strip():
75
+ item["input_name"] = input_name.strip()
76
+ if isinstance(injected_text, str) and injected_text.strip():
77
+ txt = injected_text.strip()
78
+ item["injected_text"] = txt
79
+ item["injected_chars"] = len(txt)
80
+ if isinstance(artifact, dict) and artifact:
81
+ item["artifact"] = artifact
82
+ if isinstance(error, str) and error.strip():
83
+ item["error"] = error.strip()
84
+ return item
85
+
86
+
87
+ def merge_enrichment_metadata(
88
+ metadata: Optional[Dict[str, Any]],
89
+ enrichments: Optional[List[Dict[str, Any]]],
90
+ ) -> Dict[str, Any]:
91
+ out: Dict[str, Any] = metadata if isinstance(metadata, dict) else {}
92
+ if not enrichments:
93
+ return out
94
+
95
+ existing = out.get(MEDIA_ENRICHMENT_KEY)
96
+ if not isinstance(existing, list):
97
+ existing = []
98
+ out[MEDIA_ENRICHMENT_KEY] = existing
99
+
100
+ for item in enrichments:
101
+ if isinstance(item, dict) and item:
102
+ existing.append(item)
103
+
104
+ return out
105
+
@@ -192,7 +192,16 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
192
192
  if media_content.media_type == MediaType.IMAGE:
193
193
  # Check if model supports vision
194
194
  if not self.model_capabilities.get('vision_support', False):
195
- return False
195
+ # Fallback: consult the centralized media capability database.
196
+ # This keeps validation in sync with `abstractcore.media.capabilities.is_vision_model`
197
+ # and avoids hard-coding model-name heuristics here.
198
+ try:
199
+ from ..capabilities import is_vision_model
200
+
201
+ if not is_vision_model(model):
202
+ return False
203
+ except Exception:
204
+ return False
196
205
 
197
206
  # Check image size
198
207
  if hasattr(media_content, 'metadata'):
@@ -200,13 +209,9 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
200
209
  if file_size > self.max_image_size:
201
210
  return False
202
211
 
203
- # Model-specific checks
204
- if 'claude-3' in model_lower:
205
- return True # All Claude 3 models support vision
206
- elif 'claude-3.5' in model_lower:
207
- return True # All Claude 3.5 models support vision
208
- elif 'claude-4' in model_lower:
209
- return True # Future Claude 4 models
212
+ # If vision is supported (either by injected model capabilities or the centralized lookup),
213
+ # accept the image.
214
+ return True
210
215
 
211
216
  # Text/document validation
212
217
  elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
@@ -219,28 +224,14 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
219
224
 
220
225
  return False
221
226
 
222
- def estimate_tokens_for_media(self, media_content: MediaContent) -> int:
227
+ def _estimate_image_tokens(self, media_content: MediaContent) -> int:
223
228
  """
224
- Estimate token usage for media content.
229
+ Anthropic-specific image token estimation.
225
230
 
226
- Args:
227
- media_content: MediaContent to estimate
228
-
229
- Returns:
230
- Estimated token count
231
+ Anthropic uses roughly ~1600 tokens per image for most cases.
232
+ This varies based on image content and complexity.
231
233
  """
232
- if media_content.media_type == MediaType.IMAGE:
233
- # Anthropic image token estimation
234
- # Roughly ~1600 tokens per image for most cases
235
- # This varies based on image content and complexity
236
- return 1600
237
-
238
- elif media_content.media_type in [MediaType.TEXT, MediaType.DOCUMENT]:
239
- # Rough estimation: 3.5 characters per token (slightly better than GPT)
240
- content_length = len(str(media_content.content))
241
- return int(content_length / 3.5)
242
-
243
- return 0
234
+ return 1600
244
235
 
245
236
  def get_model_media_limits(self, model: str) -> Dict[str, Any]:
246
237
  """
@@ -323,4 +314,4 @@ class AnthropicMediaHandler(BaseProviderMediaHandler):
323
314
  3. Any notable patterns, relationships, or conclusions
324
315
  4. Suggestions for how this information might be used or what actions might be taken
325
316
 
326
- Be thorough but concise in your analysis."""
317
+ Be thorough but concise in your analysis."""