abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +12 -0
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/architectures/detection.py +250 -4
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +583 -44
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +66 -1
- abstractcore/config/manager.py +111 -5
- abstractcore/core/session.py +105 -5
- abstractcore/events/__init__.py +1 -1
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +29 -2
- abstractcore/providers/base.py +279 -6
- abstractcore/providers/huggingface_provider.py +658 -27
- abstractcore/providers/lmstudio_provider.py +52 -2
- abstractcore/providers/mlx_provider.py +103 -4
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +44 -6
- abstractcore/providers/openai_provider.py +29 -2
- abstractcore/providers/registry.py +91 -19
- abstractcore/server/app.py +91 -81
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
- abstractcore-2.5.3.dist-info/RECORD +107 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
- abstractcore-2.5.0.dist-info/RECORD +0 -86
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,10 @@ logger = logging.getLogger(__name__)
|
|
|
17
17
|
_architecture_formats: Optional[Dict[str, Any]] = None
|
|
18
18
|
_model_capabilities: Optional[Dict[str, Any]] = None
|
|
19
19
|
|
|
20
|
+
# Cache for resolved model names and architectures to reduce redundant logging
|
|
21
|
+
_resolved_aliases_cache: Dict[str, str] = {}
|
|
22
|
+
_detected_architectures_cache: Dict[str, str] = {}
|
|
23
|
+
|
|
20
24
|
|
|
21
25
|
def _load_json_assets():
|
|
22
26
|
"""Load architecture formats and model capabilities from JSON files."""
|
|
@@ -58,9 +62,14 @@ def detect_architecture(model_name: str) -> str:
|
|
|
58
62
|
Returns:
|
|
59
63
|
Architecture name (e.g., 'qwen', 'llama', 'openai')
|
|
60
64
|
"""
|
|
65
|
+
# Check cache first to avoid redundant logging
|
|
66
|
+
if model_name in _detected_architectures_cache:
|
|
67
|
+
return _detected_architectures_cache[model_name]
|
|
68
|
+
|
|
61
69
|
_load_json_assets()
|
|
62
70
|
|
|
63
71
|
if not _architecture_formats or "architectures" not in _architecture_formats:
|
|
72
|
+
_detected_architectures_cache[model_name] = "generic"
|
|
64
73
|
return "generic"
|
|
65
74
|
|
|
66
75
|
model_lower = model_name.lower()
|
|
@@ -71,10 +80,12 @@ def detect_architecture(model_name: str) -> str:
|
|
|
71
80
|
for pattern in patterns:
|
|
72
81
|
if pattern.lower() in model_lower:
|
|
73
82
|
logger.debug(f"Detected architecture '{arch_name}' for model '{model_name}' (pattern: '{pattern}')")
|
|
83
|
+
_detected_architectures_cache[model_name] = arch_name
|
|
74
84
|
return arch_name
|
|
75
85
|
|
|
76
86
|
# Fallback to generic
|
|
77
87
|
logger.debug(f"No specific architecture detected for '{model_name}', using generic")
|
|
88
|
+
_detected_architectures_cache[model_name] = "generic"
|
|
78
89
|
return "generic"
|
|
79
90
|
|
|
80
91
|
|
|
@@ -101,6 +112,7 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
|
|
|
101
112
|
Resolve a model name to its canonical name by checking aliases.
|
|
102
113
|
|
|
103
114
|
Automatically converts "--" to "/" for HuggingFace cache format compatibility.
|
|
115
|
+
Normalizes Claude version numbers (e.g., "claude-3-5-sonnet" -> "claude-3.5-sonnet").
|
|
104
116
|
|
|
105
117
|
Args:
|
|
106
118
|
model_name: Model name that might be an alias
|
|
@@ -109,17 +121,35 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
|
|
|
109
121
|
Returns:
|
|
110
122
|
Canonical model name
|
|
111
123
|
"""
|
|
124
|
+
# Check cache first to avoid redundant logging
|
|
125
|
+
if model_name in _resolved_aliases_cache:
|
|
126
|
+
return _resolved_aliases_cache[model_name]
|
|
127
|
+
|
|
112
128
|
# First check if it's already a canonical name
|
|
113
129
|
if model_name in models:
|
|
130
|
+
_resolved_aliases_cache[model_name] = model_name
|
|
114
131
|
return model_name
|
|
115
132
|
|
|
133
|
+
# Normalize model name
|
|
134
|
+
normalized_model_name = model_name
|
|
135
|
+
|
|
116
136
|
# Convert "--" to "/" for HuggingFace cache format compatibility
|
|
117
|
-
normalized_model_name =
|
|
137
|
+
normalized_model_name = normalized_model_name.replace("--", "/")
|
|
138
|
+
|
|
139
|
+
# Normalize Claude version numbers: convert "-X-Y-" to "-X.Y-" or "-X-Y" to "-X.Y"
|
|
140
|
+
# Examples:
|
|
141
|
+
# "claude-3-5-sonnet" -> "claude-3.5-sonnet"
|
|
142
|
+
# "claude-4-1-opus" -> "claude-4.1-opus"
|
|
143
|
+
# "claude-3-5-sonnet-20241022" -> "claude-3.5-sonnet-20241022"
|
|
144
|
+
import re
|
|
145
|
+
normalized_model_name = re.sub(r'(claude-\d+)-(\d+)(?=-|$)', r'\1.\2', normalized_model_name)
|
|
146
|
+
|
|
118
147
|
if normalized_model_name != model_name:
|
|
119
148
|
logger.debug(f"Normalized model name '{model_name}' to '{normalized_model_name}'")
|
|
120
149
|
|
|
121
150
|
# Check if normalized name is a canonical name
|
|
122
151
|
if normalized_model_name in models:
|
|
152
|
+
_resolved_aliases_cache[model_name] = normalized_model_name
|
|
123
153
|
return normalized_model_name
|
|
124
154
|
|
|
125
155
|
# Check if it's an alias of any model (try both original and normalized)
|
|
@@ -127,9 +157,11 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
|
|
|
127
157
|
aliases = model_info.get("aliases", [])
|
|
128
158
|
if model_name in aliases or normalized_model_name in aliases:
|
|
129
159
|
logger.debug(f"Resolved alias '{model_name}' to canonical name '{canonical_name}'")
|
|
160
|
+
_resolved_aliases_cache[model_name] = canonical_name
|
|
130
161
|
return canonical_name
|
|
131
162
|
|
|
132
163
|
# Return normalized name if no alias found
|
|
164
|
+
_resolved_aliases_cache[model_name] = normalized_model_name
|
|
133
165
|
return normalized_model_name
|
|
134
166
|
|
|
135
167
|
|
|
@@ -165,9 +197,10 @@ def get_model_capabilities(model_name: str) -> Dict[str, Any]:
|
|
|
165
197
|
return capabilities
|
|
166
198
|
|
|
167
199
|
# Step 3: Try partial matches for common model naming patterns
|
|
168
|
-
|
|
200
|
+
# Use canonical_name (which has been normalized) for better matching
|
|
201
|
+
canonical_lower = canonical_name.lower()
|
|
169
202
|
for model_key, capabilities in models.items():
|
|
170
|
-
if model_key.lower() in
|
|
203
|
+
if model_key.lower() in canonical_lower or canonical_lower in model_key.lower():
|
|
171
204
|
result = capabilities.copy()
|
|
172
205
|
# Remove alias-specific fields
|
|
173
206
|
result.pop("canonical_name", None)
|
|
@@ -293,4 +326,217 @@ def detect_model_type(model_name: str) -> str:
|
|
|
293
326
|
elif any(x in model_lower for x in ["vision", "vl", "multimodal"]):
|
|
294
327
|
return "vision"
|
|
295
328
|
else:
|
|
296
|
-
return "base"
|
|
329
|
+
return "base"
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def get_vision_capabilities(model_name: str) -> Dict[str, Any]:
|
|
333
|
+
"""
|
|
334
|
+
Get vision-specific capabilities for a model with fallback to generic vision model.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
model_name: Model name to get vision capabilities for
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Dictionary with vision capabilities, using generic fallback if model not found
|
|
341
|
+
"""
|
|
342
|
+
from ..utils.structured_logging import get_logger
|
|
343
|
+
logger = get_logger(__name__)
|
|
344
|
+
|
|
345
|
+
# Get model capabilities
|
|
346
|
+
capabilities = get_model_capabilities(model_name)
|
|
347
|
+
|
|
348
|
+
# Check if model has vision support
|
|
349
|
+
if not capabilities.get('vision_support', False):
|
|
350
|
+
logger.warning(f"Model '{model_name}' does not have vision support")
|
|
351
|
+
return {}
|
|
352
|
+
|
|
353
|
+
# Extract vision-specific fields
|
|
354
|
+
vision_fields = [
|
|
355
|
+
'image_resolutions', 'max_image_resolution', 'image_patch_size',
|
|
356
|
+
'max_image_tokens', 'image_tokenization_method', 'adaptive_resolution',
|
|
357
|
+
'vision_encoder', 'pixel_grouping', 'supported_resolutions',
|
|
358
|
+
'base_tokens_per_resolution', 'fixed_resolution', 'tokens_per_tile',
|
|
359
|
+
'tile_size', 'base_image_tokens', 'pixel_divisor', 'token_cap'
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
vision_capabilities = {}
|
|
363
|
+
for field in vision_fields:
|
|
364
|
+
if field in capabilities:
|
|
365
|
+
vision_capabilities[field] = capabilities[field]
|
|
366
|
+
|
|
367
|
+
# If we have minimal vision capabilities, use generic fallback
|
|
368
|
+
if not vision_capabilities or len(vision_capabilities) < 3:
|
|
369
|
+
logger.warning(
|
|
370
|
+
f"Model '{model_name}' has limited vision metadata, using generic vision model fallback",
|
|
371
|
+
model=model_name,
|
|
372
|
+
found_fields=list(vision_capabilities.keys())
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Get generic vision model capabilities
|
|
376
|
+
_load_json_assets()
|
|
377
|
+
if _model_capabilities and "generic_vision_model" in _model_capabilities:
|
|
378
|
+
generic_caps = _model_capabilities["generic_vision_model"]
|
|
379
|
+
for field in vision_fields:
|
|
380
|
+
if field in generic_caps:
|
|
381
|
+
vision_capabilities[field] = generic_caps[field]
|
|
382
|
+
|
|
383
|
+
return vision_capabilities
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def get_glyph_compression_capabilities(model_name: str) -> Dict[str, Any]:
|
|
387
|
+
"""
|
|
388
|
+
Get capabilities relevant for Glyph compression with intelligent fallbacks.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
model_name: Model name to get Glyph capabilities for
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Dictionary with Glyph-relevant capabilities and recommendations
|
|
395
|
+
"""
|
|
396
|
+
from ..utils.structured_logging import get_logger
|
|
397
|
+
logger = get_logger(__name__)
|
|
398
|
+
|
|
399
|
+
capabilities = get_model_capabilities(model_name)
|
|
400
|
+
|
|
401
|
+
# Check if model supports vision (required for Glyph)
|
|
402
|
+
if not capabilities.get('vision_support', False):
|
|
403
|
+
logger.error(
|
|
404
|
+
f"Model '{model_name}' does not support vision, cannot use Glyph compression",
|
|
405
|
+
model=model_name
|
|
406
|
+
)
|
|
407
|
+
return {
|
|
408
|
+
'glyph_compatible': False,
|
|
409
|
+
'reason': 'no_vision_support'
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
# Get vision capabilities
|
|
413
|
+
vision_caps = get_vision_capabilities(model_name)
|
|
414
|
+
|
|
415
|
+
# Determine Glyph compatibility and optimal settings
|
|
416
|
+
glyph_caps = {
|
|
417
|
+
'glyph_compatible': True,
|
|
418
|
+
'model_name': model_name,
|
|
419
|
+
'vision_support': True
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
# Add vision-specific fields for token calculation
|
|
423
|
+
glyph_caps.update(vision_caps)
|
|
424
|
+
|
|
425
|
+
# Determine optimal compression settings based on model capabilities
|
|
426
|
+
max_image_tokens = vision_caps.get('max_image_tokens', 2048)
|
|
427
|
+
image_patch_size = vision_caps.get('image_patch_size', 16)
|
|
428
|
+
|
|
429
|
+
# Recommend compression parameters
|
|
430
|
+
if max_image_tokens >= 16000:
|
|
431
|
+
glyph_caps['recommended_pages_per_image'] = 2
|
|
432
|
+
glyph_caps['recommended_dpi'] = 150
|
|
433
|
+
elif max_image_tokens >= 8000:
|
|
434
|
+
glyph_caps['recommended_pages_per_image'] = 1
|
|
435
|
+
glyph_caps['recommended_dpi'] = 120
|
|
436
|
+
else:
|
|
437
|
+
glyph_caps['recommended_pages_per_image'] = 1
|
|
438
|
+
glyph_caps['recommended_dpi'] = 100
|
|
439
|
+
|
|
440
|
+
# Check for Glyph-optimized models
|
|
441
|
+
if capabilities.get('optimized_for_glyph', False):
|
|
442
|
+
glyph_caps['glyph_optimized'] = True
|
|
443
|
+
logger.info(f"Model '{model_name}' is optimized for Glyph compression")
|
|
444
|
+
|
|
445
|
+
return glyph_caps
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def check_vision_model_compatibility(model_name: str, provider: str = None) -> Dict[str, Any]:
|
|
449
|
+
"""
|
|
450
|
+
Comprehensive check for vision model compatibility with detailed recommendations.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
model_name: Model name to check
|
|
454
|
+
provider: Provider name (optional, for provider-specific checks)
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
Dictionary with compatibility status and recommendations
|
|
458
|
+
"""
|
|
459
|
+
from ..utils.structured_logging import get_logger
|
|
460
|
+
logger = get_logger(__name__)
|
|
461
|
+
|
|
462
|
+
result = {
|
|
463
|
+
'model_name': model_name,
|
|
464
|
+
'provider': provider,
|
|
465
|
+
'compatible': False,
|
|
466
|
+
'vision_support': False,
|
|
467
|
+
'glyph_compatible': False,
|
|
468
|
+
'warnings': [],
|
|
469
|
+
'recommendations': [],
|
|
470
|
+
'capabilities': {}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
# Get model capabilities
|
|
474
|
+
capabilities = get_model_capabilities(model_name)
|
|
475
|
+
|
|
476
|
+
# Check if this is an unknown model (architecture is 'generic' means it wasn't found in database)
|
|
477
|
+
is_unknown_model = capabilities.get('architecture') == 'generic' and not capabilities.get('vision_support', False)
|
|
478
|
+
|
|
479
|
+
if is_unknown_model:
|
|
480
|
+
result['warnings'].append(f"Model '{model_name}' not found in capabilities database")
|
|
481
|
+
result['recommendations'].append("Add model specifications to model_capabilities.json")
|
|
482
|
+
result['recommendations'].append("Using generic vision model fallback for VLM calculations")
|
|
483
|
+
|
|
484
|
+
# Use generic fallback - assume vision support for unknown models
|
|
485
|
+
_load_json_assets()
|
|
486
|
+
if _model_capabilities and "generic_vision_model" in _model_capabilities:
|
|
487
|
+
generic_caps = _model_capabilities["generic_vision_model"].copy()
|
|
488
|
+
result['compatible'] = True
|
|
489
|
+
result['vision_support'] = True
|
|
490
|
+
result['capabilities'] = generic_caps
|
|
491
|
+
|
|
492
|
+
# Also get vision capabilities using the generic model
|
|
493
|
+
vision_caps = generic_caps.copy()
|
|
494
|
+
result['vision_capabilities'] = vision_caps
|
|
495
|
+
|
|
496
|
+
# Check Glyph compatibility with generic model
|
|
497
|
+
glyph_caps = {
|
|
498
|
+
'glyph_compatible': True,
|
|
499
|
+
'model_name': model_name,
|
|
500
|
+
'vision_support': True,
|
|
501
|
+
'recommended_pages_per_image': 1,
|
|
502
|
+
'recommended_dpi': 100
|
|
503
|
+
}
|
|
504
|
+
glyph_caps.update(vision_caps)
|
|
505
|
+
result['glyph_compatible'] = True
|
|
506
|
+
result['glyph_capabilities'] = glyph_caps
|
|
507
|
+
|
|
508
|
+
logger.warning(f"Using generic vision model fallback for unknown model '{model_name}'")
|
|
509
|
+
|
|
510
|
+
return result
|
|
511
|
+
|
|
512
|
+
# Check vision support
|
|
513
|
+
vision_support = capabilities.get('vision_support', False)
|
|
514
|
+
result['vision_support'] = vision_support
|
|
515
|
+
result['capabilities'] = capabilities
|
|
516
|
+
|
|
517
|
+
if not vision_support:
|
|
518
|
+
result['warnings'].append(f"Model '{model_name}' does not support vision")
|
|
519
|
+
result['recommendations'].append("Use a vision-capable model for image processing")
|
|
520
|
+
return result
|
|
521
|
+
|
|
522
|
+
result['compatible'] = True
|
|
523
|
+
|
|
524
|
+
# Get vision-specific capabilities
|
|
525
|
+
vision_caps = get_vision_capabilities(model_name)
|
|
526
|
+
result['vision_capabilities'] = vision_caps
|
|
527
|
+
|
|
528
|
+
# Check Glyph compatibility
|
|
529
|
+
glyph_caps = get_glyph_compression_capabilities(model_name)
|
|
530
|
+
result['glyph_compatible'] = glyph_caps.get('glyph_compatible', False)
|
|
531
|
+
result['glyph_capabilities'] = glyph_caps
|
|
532
|
+
|
|
533
|
+
# Add specific recommendations based on capabilities
|
|
534
|
+
if not vision_caps.get('image_patch_size'):
|
|
535
|
+
result['warnings'].append("No image_patch_size specified, using generic fallback")
|
|
536
|
+
result['recommendations'].append("Add image_patch_size to model capabilities for better accuracy")
|
|
537
|
+
|
|
538
|
+
if not vision_caps.get('max_image_tokens'):
|
|
539
|
+
result['warnings'].append("No max_image_tokens specified")
|
|
540
|
+
result['recommendations'].append("Add max_image_tokens to model capabilities")
|
|
541
|
+
|
|
542
|
+
return result
|
|
@@ -308,6 +308,18 @@
|
|
|
308
308
|
"tool_prefix": "<|tool_call|>",
|
|
309
309
|
"patterns": ["glm-4.5", "glm-4.6", "glm-4.5-air"]
|
|
310
310
|
},
|
|
311
|
+
"glm4v": {
|
|
312
|
+
"description": "Zhipu AI's GLM-4V multimodal architecture (June 2024)",
|
|
313
|
+
"message_format": "glm_special_tokens",
|
|
314
|
+
"system_prefix": "<|system|>\n",
|
|
315
|
+
"system_suffix": "\n",
|
|
316
|
+
"user_prefix": "<|user|>\n",
|
|
317
|
+
"user_suffix": "\n",
|
|
318
|
+
"assistant_prefix": "<|assistant|>\n",
|
|
319
|
+
"assistant_suffix": "\n",
|
|
320
|
+
"tool_format": "json",
|
|
321
|
+
"patterns": ["glm-4v", "glm4v", "glyph", "zai-org/glyph", "glm-4.1v"]
|
|
322
|
+
},
|
|
311
323
|
"glm4": {
|
|
312
324
|
"description": "Zhipu AI's GLM-4 architecture (June 2024)",
|
|
313
325
|
"message_format": "im_start_end",
|
|
@@ -407,7 +419,8 @@
|
|
|
407
419
|
"basic": "Simple role: content format",
|
|
408
420
|
"human_assistant": "Human/Assistant format",
|
|
409
421
|
"openai_chat": "OpenAI chat completion format",
|
|
410
|
-
"llama3_header": "LLaMA 3+ format with <|start_header_id|> and <|eot_id|>"
|
|
422
|
+
"llama3_header": "LLaMA 3+ format with <|start_header_id|> and <|eot_id|>",
|
|
423
|
+
"glm_special_tokens": "GLM format with <|system|>, <|user|>, <|assistant|> tokens"
|
|
411
424
|
},
|
|
412
425
|
"tool_formats": {
|
|
413
426
|
"pythonic": "Python function call syntax: [func(arg=val)]",
|