abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. abstractcore/__init__.py +19 -1
  2. abstractcore/architectures/detection.py +252 -6
  3. abstractcore/assets/architecture_formats.json +14 -1
  4. abstractcore/assets/model_capabilities.json +533 -10
  5. abstractcore/compression/__init__.py +29 -0
  6. abstractcore/compression/analytics.py +420 -0
  7. abstractcore/compression/cache.py +250 -0
  8. abstractcore/compression/config.py +279 -0
  9. abstractcore/compression/exceptions.py +30 -0
  10. abstractcore/compression/glyph_processor.py +381 -0
  11. abstractcore/compression/optimizer.py +388 -0
  12. abstractcore/compression/orchestrator.py +380 -0
  13. abstractcore/compression/pil_text_renderer.py +818 -0
  14. abstractcore/compression/quality.py +226 -0
  15. abstractcore/compression/text_formatter.py +666 -0
  16. abstractcore/compression/vision_compressor.py +371 -0
  17. abstractcore/config/main.py +64 -0
  18. abstractcore/config/manager.py +100 -5
  19. abstractcore/core/retry.py +2 -2
  20. abstractcore/core/session.py +193 -7
  21. abstractcore/download.py +253 -0
  22. abstractcore/embeddings/manager.py +2 -2
  23. abstractcore/events/__init__.py +113 -2
  24. abstractcore/exceptions/__init__.py +49 -2
  25. abstractcore/media/auto_handler.py +312 -18
  26. abstractcore/media/handlers/local_handler.py +14 -2
  27. abstractcore/media/handlers/openai_handler.py +62 -3
  28. abstractcore/media/processors/__init__.py +11 -1
  29. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  30. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  31. abstractcore/media/processors/image_processor.py +7 -1
  32. abstractcore/media/processors/office_processor.py +2 -2
  33. abstractcore/media/processors/text_processor.py +18 -3
  34. abstractcore/media/types.py +164 -7
  35. abstractcore/media/utils/image_scaler.py +2 -2
  36. abstractcore/media/vision_fallback.py +2 -2
  37. abstractcore/providers/__init__.py +18 -0
  38. abstractcore/providers/anthropic_provider.py +228 -8
  39. abstractcore/providers/base.py +378 -11
  40. abstractcore/providers/huggingface_provider.py +563 -23
  41. abstractcore/providers/lmstudio_provider.py +284 -4
  42. abstractcore/providers/mlx_provider.py +27 -2
  43. abstractcore/providers/model_capabilities.py +352 -0
  44. abstractcore/providers/ollama_provider.py +282 -6
  45. abstractcore/providers/openai_provider.py +286 -8
  46. abstractcore/providers/registry.py +85 -13
  47. abstractcore/providers/streaming.py +2 -2
  48. abstractcore/server/app.py +91 -81
  49. abstractcore/tools/common_tools.py +2 -2
  50. abstractcore/tools/handler.py +2 -2
  51. abstractcore/tools/parser.py +2 -2
  52. abstractcore/tools/registry.py +2 -2
  53. abstractcore/tools/syntax_rewriter.py +2 -2
  54. abstractcore/tools/tag_rewriter.py +3 -3
  55. abstractcore/utils/__init__.py +4 -1
  56. abstractcore/utils/self_fixes.py +2 -2
  57. abstractcore/utils/trace_export.py +287 -0
  58. abstractcore/utils/version.py +1 -1
  59. abstractcore/utils/vlm_token_calculator.py +655 -0
  60. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
  61. abstractcore-2.6.0.dist-info/RECORD +108 -0
  62. abstractcore-2.5.2.dist-info/RECORD +0 -90
  63. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
  64. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
  65. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
  66. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,388 @@
1
+ """
2
+ Provider-specific optimization profiles for enhanced Glyph compression.
3
+ """
4
+
5
+ from typing import Dict, Any, Optional
6
+ from dataclasses import dataclass
7
+ import json
8
+ from pathlib import Path
9
+
10
+ from ..utils.structured_logging import get_logger
11
+ from .config import RenderingConfig
12
+
13
+
14
+ @dataclass
15
+ class OptimizationProfile:
16
+ """Optimization profile for a specific provider/model combination."""
17
+ provider: str
18
+ model: str
19
+ dpi: int
20
+ font_size: int
21
+ line_height: int
22
+ columns: int
23
+ margin_x: int
24
+ margin_y: int
25
+ target_compression: float
26
+ quality_threshold: float
27
+ notes: str = ""
28
+
29
+ def to_rendering_config(self) -> RenderingConfig:
30
+ """Convert to RenderingConfig."""
31
+ return RenderingConfig(
32
+ dpi=self.dpi,
33
+ font_size=self.font_size,
34
+ line_height=self.line_height,
35
+ columns=self.columns,
36
+ margin_x=self.margin_x,
37
+ margin_y=self.margin_y,
38
+ font_path="Verdana",
39
+ auto_crop=True
40
+ )
41
+
42
+
43
+ class CompressionOptimizer:
44
+ """
45
+ Optimizes Glyph compression for different providers and models.
46
+
47
+ Based on empirical testing, provides optimized rendering configurations
48
+ to achieve maximum compression while maintaining quality.
49
+ """
50
+
51
+ def __init__(self):
52
+ """Initialize compression optimizer."""
53
+ self.logger = get_logger(self.__class__.__name__)
54
+ self.profiles = self._load_optimization_profiles()
55
+
56
+ def _load_optimization_profiles(self) -> Dict[str, OptimizationProfile]:
57
+ """Load provider-specific optimization profiles."""
58
+ profiles = {}
59
+
60
+ # OpenAI models - optimized for GPT-4 vision
61
+ profiles["openai/gpt-4o"] = OptimizationProfile(
62
+ provider="openai",
63
+ model="gpt-4o",
64
+ dpi=72, # Lower DPI for better compression
65
+ font_size=8, # Balanced readability
66
+ line_height=9,
67
+ columns=4, # Multi-column for density
68
+ margin_x=3,
69
+ margin_y=3,
70
+ target_compression=3.5,
71
+ quality_threshold=0.93,
72
+ notes="Optimized for GPT-4o vision encoder"
73
+ )
74
+
75
+ profiles["openai/gpt-4o-mini"] = OptimizationProfile(
76
+ provider="openai",
77
+ model="gpt-4o-mini",
78
+ dpi=72,
79
+ font_size=7, # Smaller for mini model
80
+ line_height=8,
81
+ columns=5, # More columns for efficiency
82
+ margin_x=2,
83
+ margin_y=2,
84
+ target_compression=4.0,
85
+ quality_threshold=0.90,
86
+ notes="Aggressive compression for mini model"
87
+ )
88
+
89
+ # Anthropic models - optimized for Claude vision
90
+ profiles["anthropic/claude-3-5-sonnet"] = OptimizationProfile(
91
+ provider="anthropic",
92
+ model="claude-3-5-sonnet",
93
+ dpi=96, # Higher DPI for Claude
94
+ font_size=9,
95
+ line_height=10,
96
+ columns=3, # Fewer columns for clarity
97
+ margin_x=4,
98
+ margin_y=4,
99
+ target_compression=3.0,
100
+ quality_threshold=0.94,
101
+ notes="Conservative for Claude's detail focus"
102
+ )
103
+
104
+ profiles["anthropic/claude-3-5-haiku"] = OptimizationProfile(
105
+ provider="anthropic",
106
+ model="claude-3-5-haiku",
107
+ dpi=72,
108
+ font_size=7,
109
+ line_height=8,
110
+ columns=4,
111
+ margin_x=3,
112
+ margin_y=3,
113
+ target_compression=3.5,
114
+ quality_threshold=0.91,
115
+ notes="Balanced for Haiku efficiency"
116
+ )
117
+
118
+ # Ollama models - optimized for open source vision models
119
+ profiles["ollama/llama3.2-vision"] = OptimizationProfile(
120
+ provider="ollama",
121
+ model="llama3.2-vision",
122
+ dpi=72,
123
+ font_size=6, # Aggressive for local model
124
+ line_height=7,
125
+ columns=6, # Maximum columns
126
+ margin_x=2,
127
+ margin_y=2,
128
+ target_compression=4.5,
129
+ quality_threshold=0.88,
130
+ notes="Maximum compression for Llama vision"
131
+ )
132
+
133
+ profiles["ollama/qwen2.5-vision"] = OptimizationProfile(
134
+ provider="ollama",
135
+ model="qwen2.5-vision",
136
+ dpi=72,
137
+ font_size=7,
138
+ line_height=8,
139
+ columns=5,
140
+ margin_x=2,
141
+ margin_y=2,
142
+ target_compression=4.0,
143
+ quality_threshold=0.89,
144
+ notes="Optimized for Qwen2.5 vision"
145
+ )
146
+
147
+ # LMStudio models
148
+ profiles["lmstudio/default"] = OptimizationProfile(
149
+ provider="lmstudio",
150
+ model="default",
151
+ dpi=72,
152
+ font_size=8,
153
+ line_height=9,
154
+ columns=4,
155
+ margin_x=3,
156
+ margin_y=3,
157
+ target_compression=3.5,
158
+ quality_threshold=0.90,
159
+ notes="Generic LMStudio optimization"
160
+ )
161
+
162
+ # Default profile for unknown providers
163
+ profiles["default"] = OptimizationProfile(
164
+ provider="default",
165
+ model="default",
166
+ dpi=72,
167
+ font_size=8,
168
+ line_height=9,
169
+ columns=4,
170
+ margin_x=3,
171
+ margin_y=3,
172
+ target_compression=3.0,
173
+ quality_threshold=0.92,
174
+ notes="Safe default configuration"
175
+ )
176
+
177
+ return profiles
178
+
179
+ def get_optimized_config(
180
+ self,
181
+ provider: str,
182
+ model: str,
183
+ aggressive: bool = False
184
+ ) -> RenderingConfig:
185
+ """
186
+ Get optimized rendering configuration for provider/model.
187
+
188
+ Args:
189
+ provider: Provider name
190
+ model: Model name
191
+ aggressive: Use more aggressive compression
192
+
193
+ Returns:
194
+ Optimized RenderingConfig
195
+ """
196
+ # Look for exact match
197
+ key = f"{provider}/{model}"
198
+ if key in self.profiles:
199
+ profile = self.profiles[key]
200
+ # Look for provider default
201
+ elif f"{provider}/default" in self.profiles:
202
+ profile = self.profiles[f"{provider}/default"]
203
+ else:
204
+ profile = self.profiles["default"]
205
+
206
+ self.logger.debug(f"Using optimization profile: {profile.provider}/{profile.model}")
207
+
208
+ # Apply aggressive modifications if requested
209
+ if aggressive:
210
+ profile = self._apply_aggressive_settings(profile)
211
+
212
+ return profile.to_rendering_config()
213
+
214
+ def _apply_aggressive_settings(self, profile: OptimizationProfile) -> OptimizationProfile:
215
+ """Apply more aggressive compression settings."""
216
+ # Create modified profile
217
+ aggressive = OptimizationProfile(
218
+ provider=profile.provider,
219
+ model=profile.model,
220
+ dpi=profile.dpi,
221
+ font_size=max(5, profile.font_size - 1), # Smaller font
222
+ line_height=max(6, profile.line_height - 1),
223
+ columns=min(8, profile.columns + 2), # More columns
224
+ margin_x=max(1, profile.margin_x - 1),
225
+ margin_y=max(1, profile.margin_y - 1),
226
+ target_compression=profile.target_compression * 1.5,
227
+ quality_threshold=profile.quality_threshold * 0.95,
228
+ notes=f"{profile.notes} (aggressive mode)"
229
+ )
230
+ return aggressive
231
+
232
+ def analyze_compression_potential(
233
+ self,
234
+ text_length: int,
235
+ provider: str,
236
+ model: str
237
+ ) -> Dict[str, Any]:
238
+ """
239
+ Analyze potential compression for given text.
240
+
241
+ Args:
242
+ text_length: Length of text in characters
243
+ provider: Provider name
244
+ model: Model name
245
+
246
+ Returns:
247
+ Analysis of compression potential
248
+ """
249
+ # Get profile
250
+ key = f"{provider}/{model}"
251
+ profile = self.profiles.get(key, self.profiles["default"])
252
+
253
+ # Estimate tokens (rough estimate)
254
+ estimated_tokens = text_length // 4
255
+
256
+ # Calculate potential compression
257
+ chars_per_page = (
258
+ (profile.columns * 40) * # Characters per line
259
+ (60) # Lines per page (approximate)
260
+ )
261
+ estimated_pages = text_length / chars_per_page
262
+ estimated_images = max(1, int(estimated_pages / 2)) # 2 pages per image
263
+
264
+ # Estimate compressed tokens
265
+ tokens_per_image = 1500 # Approximate for most vision models
266
+ compressed_tokens = estimated_images * tokens_per_image
267
+
268
+ # Calculate metrics
269
+ compression_ratio = estimated_tokens / compressed_tokens if compressed_tokens > 0 else 1.0
270
+
271
+ return {
272
+ "text_length": text_length,
273
+ "estimated_tokens": estimated_tokens,
274
+ "estimated_images": estimated_images,
275
+ "compressed_tokens": compressed_tokens,
276
+ "compression_ratio": compression_ratio,
277
+ "profile_used": f"{profile.provider}/{profile.model}",
278
+ "target_compression": profile.target_compression,
279
+ "achievable": compression_ratio >= profile.target_compression * 0.8
280
+ }
281
+
282
+ def save_profiles(self, path: Path):
283
+ """Save optimization profiles to JSON file."""
284
+ profiles_dict = {}
285
+ for key, profile in self.profiles.items():
286
+ profiles_dict[key] = {
287
+ "provider": profile.provider,
288
+ "model": profile.model,
289
+ "dpi": profile.dpi,
290
+ "font_size": profile.font_size,
291
+ "line_height": profile.line_height,
292
+ "columns": profile.columns,
293
+ "margin_x": profile.margin_x,
294
+ "margin_y": profile.margin_y,
295
+ "target_compression": profile.target_compression,
296
+ "quality_threshold": profile.quality_threshold,
297
+ "notes": profile.notes
298
+ }
299
+
300
+ with open(path, 'w') as f:
301
+ json.dump(profiles_dict, f, indent=2)
302
+
303
+ self.logger.info(f"Saved {len(profiles_dict)} optimization profiles to {path}")
304
+
305
+ def benchmark_profile(
306
+ self,
307
+ profile: OptimizationProfile,
308
+ test_text: str
309
+ ) -> Dict[str, Any]:
310
+ """
311
+ Benchmark a specific optimization profile.
312
+
313
+ Args:
314
+ profile: Profile to benchmark
315
+ test_text: Text to test with
316
+
317
+ Returns:
318
+ Benchmark results
319
+ """
320
+ from .glyph_processor import GlyphProcessor
321
+ from .config import GlyphConfig
322
+ import time
323
+
324
+ # Create processor with profile
325
+ config = GlyphConfig()
326
+ config.enabled = True
327
+ config.min_token_threshold = 100
328
+
329
+ processor = GlyphProcessor(config=config)
330
+
331
+ # Test compression
332
+ start_time = time.time()
333
+
334
+ try:
335
+ results = processor.process_text(
336
+ test_text,
337
+ provider=profile.provider,
338
+ model=profile.model,
339
+ user_preference="always"
340
+ )
341
+
342
+ processing_time = time.time() - start_time
343
+
344
+ # Calculate metrics
345
+ from ..utils.token_utils import TokenUtils
346
+ original_tokens = TokenUtils.estimate_tokens(test_text, profile.model)
347
+ compressed_tokens = len(results) * 1500
348
+ actual_ratio = original_tokens / compressed_tokens if compressed_tokens > 0 else 1.0
349
+
350
+ # Get quality from results
351
+ quality = results[0].metadata.get("quality_score", 0.0) if results else 0.0
352
+
353
+ return {
354
+ "success": True,
355
+ "profile": f"{profile.provider}/{profile.model}",
356
+ "original_tokens": original_tokens,
357
+ "compressed_tokens": compressed_tokens,
358
+ "compression_ratio": actual_ratio,
359
+ "target_ratio": profile.target_compression,
360
+ "quality_score": quality,
361
+ "quality_threshold": profile.quality_threshold,
362
+ "processing_time": processing_time,
363
+ "images_created": len(results),
364
+ "meets_target": actual_ratio >= profile.target_compression * 0.9
365
+ }
366
+
367
+ except Exception as e:
368
+ return {
369
+ "success": False,
370
+ "profile": f"{profile.provider}/{profile.model}",
371
+ "error": str(e)
372
+ }
373
+
374
+
375
+ def create_optimized_config(provider: str, model: str, aggressive: bool = False) -> RenderingConfig:
376
+ """
377
+ Convenience function to create optimized rendering configuration.
378
+
379
+ Args:
380
+ provider: Provider name
381
+ model: Model name
382
+ aggressive: Use more aggressive compression
383
+
384
+ Returns:
385
+ Optimized RenderingConfig
386
+ """
387
+ optimizer = CompressionOptimizer()
388
+ return optimizer.get_optimized_config(provider, model, aggressive)