abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +19 -1
- abstractcore/architectures/detection.py +252 -6
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +533 -10
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +64 -0
- abstractcore/config/manager.py +100 -5
- abstractcore/core/retry.py +2 -2
- abstractcore/core/session.py +193 -7
- abstractcore/download.py +253 -0
- abstractcore/embeddings/manager.py +2 -2
- abstractcore/events/__init__.py +113 -2
- abstractcore/exceptions/__init__.py +49 -2
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/office_processor.py +2 -2
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/media/utils/image_scaler.py +2 -2
- abstractcore/media/vision_fallback.py +2 -2
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +228 -8
- abstractcore/providers/base.py +378 -11
- abstractcore/providers/huggingface_provider.py +563 -23
- abstractcore/providers/lmstudio_provider.py +284 -4
- abstractcore/providers/mlx_provider.py +27 -2
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +282 -6
- abstractcore/providers/openai_provider.py +286 -8
- abstractcore/providers/registry.py +85 -13
- abstractcore/providers/streaming.py +2 -2
- abstractcore/server/app.py +91 -81
- abstractcore/tools/common_tools.py +2 -2
- abstractcore/tools/handler.py +2 -2
- abstractcore/tools/parser.py +2 -2
- abstractcore/tools/registry.py +2 -2
- abstractcore/tools/syntax_rewriter.py +2 -2
- abstractcore/tools/tag_rewriter.py +3 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/self_fixes.py +2 -2
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
- abstractcore-2.6.0.dist-info/RECORD +108 -0
- abstractcore-2.5.2.dist-info/RECORD +0 -90
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provider-specific optimization profiles for enhanced Glyph compression.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Dict, Any, Optional
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from ..utils.structured_logging import get_logger
|
|
11
|
+
from .config import RenderingConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class OptimizationProfile:
|
|
16
|
+
"""Optimization profile for a specific provider/model combination."""
|
|
17
|
+
provider: str
|
|
18
|
+
model: str
|
|
19
|
+
dpi: int
|
|
20
|
+
font_size: int
|
|
21
|
+
line_height: int
|
|
22
|
+
columns: int
|
|
23
|
+
margin_x: int
|
|
24
|
+
margin_y: int
|
|
25
|
+
target_compression: float
|
|
26
|
+
quality_threshold: float
|
|
27
|
+
notes: str = ""
|
|
28
|
+
|
|
29
|
+
def to_rendering_config(self) -> RenderingConfig:
|
|
30
|
+
"""Convert to RenderingConfig."""
|
|
31
|
+
return RenderingConfig(
|
|
32
|
+
dpi=self.dpi,
|
|
33
|
+
font_size=self.font_size,
|
|
34
|
+
line_height=self.line_height,
|
|
35
|
+
columns=self.columns,
|
|
36
|
+
margin_x=self.margin_x,
|
|
37
|
+
margin_y=self.margin_y,
|
|
38
|
+
font_path="Verdana",
|
|
39
|
+
auto_crop=True
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CompressionOptimizer:
|
|
44
|
+
"""
|
|
45
|
+
Optimizes Glyph compression for different providers and models.
|
|
46
|
+
|
|
47
|
+
Based on empirical testing, provides optimized rendering configurations
|
|
48
|
+
to achieve maximum compression while maintaining quality.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self):
|
|
52
|
+
"""Initialize compression optimizer."""
|
|
53
|
+
self.logger = get_logger(self.__class__.__name__)
|
|
54
|
+
self.profiles = self._load_optimization_profiles()
|
|
55
|
+
|
|
56
|
+
def _load_optimization_profiles(self) -> Dict[str, OptimizationProfile]:
|
|
57
|
+
"""Load provider-specific optimization profiles."""
|
|
58
|
+
profiles = {}
|
|
59
|
+
|
|
60
|
+
# OpenAI models - optimized for GPT-4 vision
|
|
61
|
+
profiles["openai/gpt-4o"] = OptimizationProfile(
|
|
62
|
+
provider="openai",
|
|
63
|
+
model="gpt-4o",
|
|
64
|
+
dpi=72, # Lower DPI for better compression
|
|
65
|
+
font_size=8, # Balanced readability
|
|
66
|
+
line_height=9,
|
|
67
|
+
columns=4, # Multi-column for density
|
|
68
|
+
margin_x=3,
|
|
69
|
+
margin_y=3,
|
|
70
|
+
target_compression=3.5,
|
|
71
|
+
quality_threshold=0.93,
|
|
72
|
+
notes="Optimized for GPT-4o vision encoder"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
profiles["openai/gpt-4o-mini"] = OptimizationProfile(
|
|
76
|
+
provider="openai",
|
|
77
|
+
model="gpt-4o-mini",
|
|
78
|
+
dpi=72,
|
|
79
|
+
font_size=7, # Smaller for mini model
|
|
80
|
+
line_height=8,
|
|
81
|
+
columns=5, # More columns for efficiency
|
|
82
|
+
margin_x=2,
|
|
83
|
+
margin_y=2,
|
|
84
|
+
target_compression=4.0,
|
|
85
|
+
quality_threshold=0.90,
|
|
86
|
+
notes="Aggressive compression for mini model"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Anthropic models - optimized for Claude vision
|
|
90
|
+
profiles["anthropic/claude-3-5-sonnet"] = OptimizationProfile(
|
|
91
|
+
provider="anthropic",
|
|
92
|
+
model="claude-3-5-sonnet",
|
|
93
|
+
dpi=96, # Higher DPI for Claude
|
|
94
|
+
font_size=9,
|
|
95
|
+
line_height=10,
|
|
96
|
+
columns=3, # Fewer columns for clarity
|
|
97
|
+
margin_x=4,
|
|
98
|
+
margin_y=4,
|
|
99
|
+
target_compression=3.0,
|
|
100
|
+
quality_threshold=0.94,
|
|
101
|
+
notes="Conservative for Claude's detail focus"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
profiles["anthropic/claude-3-5-haiku"] = OptimizationProfile(
|
|
105
|
+
provider="anthropic",
|
|
106
|
+
model="claude-3-5-haiku",
|
|
107
|
+
dpi=72,
|
|
108
|
+
font_size=7,
|
|
109
|
+
line_height=8,
|
|
110
|
+
columns=4,
|
|
111
|
+
margin_x=3,
|
|
112
|
+
margin_y=3,
|
|
113
|
+
target_compression=3.5,
|
|
114
|
+
quality_threshold=0.91,
|
|
115
|
+
notes="Balanced for Haiku efficiency"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Ollama models - optimized for open source vision models
|
|
119
|
+
profiles["ollama/llama3.2-vision"] = OptimizationProfile(
|
|
120
|
+
provider="ollama",
|
|
121
|
+
model="llama3.2-vision",
|
|
122
|
+
dpi=72,
|
|
123
|
+
font_size=6, # Aggressive for local model
|
|
124
|
+
line_height=7,
|
|
125
|
+
columns=6, # Maximum columns
|
|
126
|
+
margin_x=2,
|
|
127
|
+
margin_y=2,
|
|
128
|
+
target_compression=4.5,
|
|
129
|
+
quality_threshold=0.88,
|
|
130
|
+
notes="Maximum compression for Llama vision"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
profiles["ollama/qwen2.5-vision"] = OptimizationProfile(
|
|
134
|
+
provider="ollama",
|
|
135
|
+
model="qwen2.5-vision",
|
|
136
|
+
dpi=72,
|
|
137
|
+
font_size=7,
|
|
138
|
+
line_height=8,
|
|
139
|
+
columns=5,
|
|
140
|
+
margin_x=2,
|
|
141
|
+
margin_y=2,
|
|
142
|
+
target_compression=4.0,
|
|
143
|
+
quality_threshold=0.89,
|
|
144
|
+
notes="Optimized for Qwen2.5 vision"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# LMStudio models
|
|
148
|
+
profiles["lmstudio/default"] = OptimizationProfile(
|
|
149
|
+
provider="lmstudio",
|
|
150
|
+
model="default",
|
|
151
|
+
dpi=72,
|
|
152
|
+
font_size=8,
|
|
153
|
+
line_height=9,
|
|
154
|
+
columns=4,
|
|
155
|
+
margin_x=3,
|
|
156
|
+
margin_y=3,
|
|
157
|
+
target_compression=3.5,
|
|
158
|
+
quality_threshold=0.90,
|
|
159
|
+
notes="Generic LMStudio optimization"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Default profile for unknown providers
|
|
163
|
+
profiles["default"] = OptimizationProfile(
|
|
164
|
+
provider="default",
|
|
165
|
+
model="default",
|
|
166
|
+
dpi=72,
|
|
167
|
+
font_size=8,
|
|
168
|
+
line_height=9,
|
|
169
|
+
columns=4,
|
|
170
|
+
margin_x=3,
|
|
171
|
+
margin_y=3,
|
|
172
|
+
target_compression=3.0,
|
|
173
|
+
quality_threshold=0.92,
|
|
174
|
+
notes="Safe default configuration"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
return profiles
|
|
178
|
+
|
|
179
|
+
def get_optimized_config(
|
|
180
|
+
self,
|
|
181
|
+
provider: str,
|
|
182
|
+
model: str,
|
|
183
|
+
aggressive: bool = False
|
|
184
|
+
) -> RenderingConfig:
|
|
185
|
+
"""
|
|
186
|
+
Get optimized rendering configuration for provider/model.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
provider: Provider name
|
|
190
|
+
model: Model name
|
|
191
|
+
aggressive: Use more aggressive compression
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Optimized RenderingConfig
|
|
195
|
+
"""
|
|
196
|
+
# Look for exact match
|
|
197
|
+
key = f"{provider}/{model}"
|
|
198
|
+
if key in self.profiles:
|
|
199
|
+
profile = self.profiles[key]
|
|
200
|
+
# Look for provider default
|
|
201
|
+
elif f"{provider}/default" in self.profiles:
|
|
202
|
+
profile = self.profiles[f"{provider}/default"]
|
|
203
|
+
else:
|
|
204
|
+
profile = self.profiles["default"]
|
|
205
|
+
|
|
206
|
+
self.logger.debug(f"Using optimization profile: {profile.provider}/{profile.model}")
|
|
207
|
+
|
|
208
|
+
# Apply aggressive modifications if requested
|
|
209
|
+
if aggressive:
|
|
210
|
+
profile = self._apply_aggressive_settings(profile)
|
|
211
|
+
|
|
212
|
+
return profile.to_rendering_config()
|
|
213
|
+
|
|
214
|
+
def _apply_aggressive_settings(self, profile: OptimizationProfile) -> OptimizationProfile:
|
|
215
|
+
"""Apply more aggressive compression settings."""
|
|
216
|
+
# Create modified profile
|
|
217
|
+
aggressive = OptimizationProfile(
|
|
218
|
+
provider=profile.provider,
|
|
219
|
+
model=profile.model,
|
|
220
|
+
dpi=profile.dpi,
|
|
221
|
+
font_size=max(5, profile.font_size - 1), # Smaller font
|
|
222
|
+
line_height=max(6, profile.line_height - 1),
|
|
223
|
+
columns=min(8, profile.columns + 2), # More columns
|
|
224
|
+
margin_x=max(1, profile.margin_x - 1),
|
|
225
|
+
margin_y=max(1, profile.margin_y - 1),
|
|
226
|
+
target_compression=profile.target_compression * 1.5,
|
|
227
|
+
quality_threshold=profile.quality_threshold * 0.95,
|
|
228
|
+
notes=f"{profile.notes} (aggressive mode)"
|
|
229
|
+
)
|
|
230
|
+
return aggressive
|
|
231
|
+
|
|
232
|
+
def analyze_compression_potential(
|
|
233
|
+
self,
|
|
234
|
+
text_length: int,
|
|
235
|
+
provider: str,
|
|
236
|
+
model: str
|
|
237
|
+
) -> Dict[str, Any]:
|
|
238
|
+
"""
|
|
239
|
+
Analyze potential compression for given text.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
text_length: Length of text in characters
|
|
243
|
+
provider: Provider name
|
|
244
|
+
model: Model name
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Analysis of compression potential
|
|
248
|
+
"""
|
|
249
|
+
# Get profile
|
|
250
|
+
key = f"{provider}/{model}"
|
|
251
|
+
profile = self.profiles.get(key, self.profiles["default"])
|
|
252
|
+
|
|
253
|
+
# Estimate tokens (rough estimate)
|
|
254
|
+
estimated_tokens = text_length // 4
|
|
255
|
+
|
|
256
|
+
# Calculate potential compression
|
|
257
|
+
chars_per_page = (
|
|
258
|
+
(profile.columns * 40) * # Characters per line
|
|
259
|
+
(60) # Lines per page (approximate)
|
|
260
|
+
)
|
|
261
|
+
estimated_pages = text_length / chars_per_page
|
|
262
|
+
estimated_images = max(1, int(estimated_pages / 2)) # 2 pages per image
|
|
263
|
+
|
|
264
|
+
# Estimate compressed tokens
|
|
265
|
+
tokens_per_image = 1500 # Approximate for most vision models
|
|
266
|
+
compressed_tokens = estimated_images * tokens_per_image
|
|
267
|
+
|
|
268
|
+
# Calculate metrics
|
|
269
|
+
compression_ratio = estimated_tokens / compressed_tokens if compressed_tokens > 0 else 1.0
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
"text_length": text_length,
|
|
273
|
+
"estimated_tokens": estimated_tokens,
|
|
274
|
+
"estimated_images": estimated_images,
|
|
275
|
+
"compressed_tokens": compressed_tokens,
|
|
276
|
+
"compression_ratio": compression_ratio,
|
|
277
|
+
"profile_used": f"{profile.provider}/{profile.model}",
|
|
278
|
+
"target_compression": profile.target_compression,
|
|
279
|
+
"achievable": compression_ratio >= profile.target_compression * 0.8
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
def save_profiles(self, path: Path):
|
|
283
|
+
"""Save optimization profiles to JSON file."""
|
|
284
|
+
profiles_dict = {}
|
|
285
|
+
for key, profile in self.profiles.items():
|
|
286
|
+
profiles_dict[key] = {
|
|
287
|
+
"provider": profile.provider,
|
|
288
|
+
"model": profile.model,
|
|
289
|
+
"dpi": profile.dpi,
|
|
290
|
+
"font_size": profile.font_size,
|
|
291
|
+
"line_height": profile.line_height,
|
|
292
|
+
"columns": profile.columns,
|
|
293
|
+
"margin_x": profile.margin_x,
|
|
294
|
+
"margin_y": profile.margin_y,
|
|
295
|
+
"target_compression": profile.target_compression,
|
|
296
|
+
"quality_threshold": profile.quality_threshold,
|
|
297
|
+
"notes": profile.notes
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
with open(path, 'w') as f:
|
|
301
|
+
json.dump(profiles_dict, f, indent=2)
|
|
302
|
+
|
|
303
|
+
self.logger.info(f"Saved {len(profiles_dict)} optimization profiles to {path}")
|
|
304
|
+
|
|
305
|
+
def benchmark_profile(
|
|
306
|
+
self,
|
|
307
|
+
profile: OptimizationProfile,
|
|
308
|
+
test_text: str
|
|
309
|
+
) -> Dict[str, Any]:
|
|
310
|
+
"""
|
|
311
|
+
Benchmark a specific optimization profile.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
profile: Profile to benchmark
|
|
315
|
+
test_text: Text to test with
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Benchmark results
|
|
319
|
+
"""
|
|
320
|
+
from .glyph_processor import GlyphProcessor
|
|
321
|
+
from .config import GlyphConfig
|
|
322
|
+
import time
|
|
323
|
+
|
|
324
|
+
# Create processor with profile
|
|
325
|
+
config = GlyphConfig()
|
|
326
|
+
config.enabled = True
|
|
327
|
+
config.min_token_threshold = 100
|
|
328
|
+
|
|
329
|
+
processor = GlyphProcessor(config=config)
|
|
330
|
+
|
|
331
|
+
# Test compression
|
|
332
|
+
start_time = time.time()
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
results = processor.process_text(
|
|
336
|
+
test_text,
|
|
337
|
+
provider=profile.provider,
|
|
338
|
+
model=profile.model,
|
|
339
|
+
user_preference="always"
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
processing_time = time.time() - start_time
|
|
343
|
+
|
|
344
|
+
# Calculate metrics
|
|
345
|
+
from ..utils.token_utils import TokenUtils
|
|
346
|
+
original_tokens = TokenUtils.estimate_tokens(test_text, profile.model)
|
|
347
|
+
compressed_tokens = len(results) * 1500
|
|
348
|
+
actual_ratio = original_tokens / compressed_tokens if compressed_tokens > 0 else 1.0
|
|
349
|
+
|
|
350
|
+
# Get quality from results
|
|
351
|
+
quality = results[0].metadata.get("quality_score", 0.0) if results else 0.0
|
|
352
|
+
|
|
353
|
+
return {
|
|
354
|
+
"success": True,
|
|
355
|
+
"profile": f"{profile.provider}/{profile.model}",
|
|
356
|
+
"original_tokens": original_tokens,
|
|
357
|
+
"compressed_tokens": compressed_tokens,
|
|
358
|
+
"compression_ratio": actual_ratio,
|
|
359
|
+
"target_ratio": profile.target_compression,
|
|
360
|
+
"quality_score": quality,
|
|
361
|
+
"quality_threshold": profile.quality_threshold,
|
|
362
|
+
"processing_time": processing_time,
|
|
363
|
+
"images_created": len(results),
|
|
364
|
+
"meets_target": actual_ratio >= profile.target_compression * 0.9
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
except Exception as e:
|
|
368
|
+
return {
|
|
369
|
+
"success": False,
|
|
370
|
+
"profile": f"{profile.provider}/{profile.model}",
|
|
371
|
+
"error": str(e)
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def create_optimized_config(provider: str, model: str, aggressive: bool = False) -> RenderingConfig:
|
|
376
|
+
"""
|
|
377
|
+
Convenience function to create optimized rendering configuration.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
provider: Provider name
|
|
381
|
+
model: Model name
|
|
382
|
+
aggressive: Use more aggressive compression
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
Optimized RenderingConfig
|
|
386
|
+
"""
|
|
387
|
+
optimizer = CompressionOptimizer()
|
|
388
|
+
return optimizer.get_optimized_config(provider, model, aggressive)
|