abstractcore 2.5.2__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. abstractcore/__init__.py +12 -0
  2. abstractcore/architectures/detection.py +250 -4
  3. abstractcore/assets/architecture_formats.json +14 -1
  4. abstractcore/assets/model_capabilities.json +533 -10
  5. abstractcore/compression/__init__.py +29 -0
  6. abstractcore/compression/analytics.py +420 -0
  7. abstractcore/compression/cache.py +250 -0
  8. abstractcore/compression/config.py +279 -0
  9. abstractcore/compression/exceptions.py +30 -0
  10. abstractcore/compression/glyph_processor.py +381 -0
  11. abstractcore/compression/optimizer.py +388 -0
  12. abstractcore/compression/orchestrator.py +380 -0
  13. abstractcore/compression/pil_text_renderer.py +818 -0
  14. abstractcore/compression/quality.py +226 -0
  15. abstractcore/compression/text_formatter.py +666 -0
  16. abstractcore/compression/vision_compressor.py +371 -0
  17. abstractcore/config/main.py +64 -0
  18. abstractcore/config/manager.py +100 -5
  19. abstractcore/core/session.py +61 -6
  20. abstractcore/events/__init__.py +1 -1
  21. abstractcore/media/auto_handler.py +312 -18
  22. abstractcore/media/handlers/local_handler.py +14 -2
  23. abstractcore/media/handlers/openai_handler.py +62 -3
  24. abstractcore/media/processors/__init__.py +11 -1
  25. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  26. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  27. abstractcore/media/processors/image_processor.py +7 -1
  28. abstractcore/media/processors/text_processor.py +18 -3
  29. abstractcore/media/types.py +164 -7
  30. abstractcore/providers/__init__.py +18 -0
  31. abstractcore/providers/anthropic_provider.py +28 -2
  32. abstractcore/providers/base.py +278 -6
  33. abstractcore/providers/huggingface_provider.py +563 -23
  34. abstractcore/providers/lmstudio_provider.py +38 -2
  35. abstractcore/providers/mlx_provider.py +27 -2
  36. abstractcore/providers/model_capabilities.py +352 -0
  37. abstractcore/providers/ollama_provider.py +38 -4
  38. abstractcore/providers/openai_provider.py +28 -2
  39. abstractcore/providers/registry.py +85 -13
  40. abstractcore/server/app.py +91 -81
  41. abstractcore/utils/__init__.py +4 -1
  42. abstractcore/utils/trace_export.py +287 -0
  43. abstractcore/utils/version.py +1 -1
  44. abstractcore/utils/vlm_token_calculator.py +655 -0
  45. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/METADATA +107 -6
  46. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/RECORD +50 -33
  47. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
  48. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +0 -0
  49. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
  50. {abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
abstractcore/__init__.py CHANGED
@@ -2,6 +2,8 @@
2
2
  """
3
3
  AbstractCore - Unified interface to all LLM providers with essential infrastructure.
4
4
 
5
+ CRITICAL: Offline-first design - enforces offline mode for open source LLMs by default.
6
+
5
7
  Key Features:
6
8
  • Multi-provider support (OpenAI, Anthropic, Ollama, HuggingFace, MLX, LMStudio)
7
9
  • Unified token parameter vocabulary across all providers
@@ -47,6 +49,13 @@ _has_processing = True
47
49
  # Tools module (core functionality)
48
50
  from .tools import tool
49
51
 
52
+ # Compression module (optional import)
53
+ try:
54
+ from .compression import GlyphConfig, CompressionOrchestrator
55
+ _has_compression = True
56
+ except ImportError:
57
+ _has_compression = False
58
+
50
59
  __all__ = [
51
60
  'create_llm',
52
61
  'BasicSession',
@@ -64,5 +73,8 @@ __all__ = [
64
73
  if _has_embeddings:
65
74
  __all__.append('EmbeddingManager')
66
75
 
76
+ if _has_compression:
77
+ __all__.extend(['GlyphConfig', 'CompressionOrchestrator'])
78
+
67
79
  # Processing is core functionality
68
80
  __all__.extend(['BasicSummarizer', 'SummaryStyle', 'SummaryLength', 'BasicExtractor'])
@@ -17,6 +17,10 @@ logger = logging.getLogger(__name__)
17
17
  _architecture_formats: Optional[Dict[str, Any]] = None
18
18
  _model_capabilities: Optional[Dict[str, Any]] = None
19
19
 
20
+ # Cache for resolved model names and architectures to reduce redundant logging
21
+ _resolved_aliases_cache: Dict[str, str] = {}
22
+ _detected_architectures_cache: Dict[str, str] = {}
23
+
20
24
 
21
25
  def _load_json_assets():
22
26
  """Load architecture formats and model capabilities from JSON files."""
@@ -58,9 +62,14 @@ def detect_architecture(model_name: str) -> str:
58
62
  Returns:
59
63
  Architecture name (e.g., 'qwen', 'llama', 'openai')
60
64
  """
65
+ # Check cache first to avoid redundant logging
66
+ if model_name in _detected_architectures_cache:
67
+ return _detected_architectures_cache[model_name]
68
+
61
69
  _load_json_assets()
62
70
 
63
71
  if not _architecture_formats or "architectures" not in _architecture_formats:
72
+ _detected_architectures_cache[model_name] = "generic"
64
73
  return "generic"
65
74
 
66
75
  model_lower = model_name.lower()
@@ -71,10 +80,12 @@ def detect_architecture(model_name: str) -> str:
71
80
  for pattern in patterns:
72
81
  if pattern.lower() in model_lower:
73
82
  logger.debug(f"Detected architecture '{arch_name}' for model '{model_name}' (pattern: '{pattern}')")
83
+ _detected_architectures_cache[model_name] = arch_name
74
84
  return arch_name
75
85
 
76
86
  # Fallback to generic
77
87
  logger.debug(f"No specific architecture detected for '{model_name}', using generic")
88
+ _detected_architectures_cache[model_name] = "generic"
78
89
  return "generic"
79
90
 
80
91
 
@@ -101,6 +112,7 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
101
112
  Resolve a model name to its canonical name by checking aliases.
102
113
 
103
114
  Automatically converts "--" to "/" for HuggingFace cache format compatibility.
115
+ Normalizes Claude version numbers (e.g., "claude-3-5-sonnet" -> "claude-3.5-sonnet").
104
116
 
105
117
  Args:
106
118
  model_name: Model name that might be an alias
@@ -109,17 +121,35 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
109
121
  Returns:
110
122
  Canonical model name
111
123
  """
124
+ # Check cache first to avoid redundant logging
125
+ if model_name in _resolved_aliases_cache:
126
+ return _resolved_aliases_cache[model_name]
127
+
112
128
  # First check if it's already a canonical name
113
129
  if model_name in models:
130
+ _resolved_aliases_cache[model_name] = model_name
114
131
  return model_name
115
132
 
133
+ # Normalize model name
134
+ normalized_model_name = model_name
135
+
116
136
  # Convert "--" to "/" for HuggingFace cache format compatibility
117
- normalized_model_name = model_name.replace("--", "/")
137
+ normalized_model_name = normalized_model_name.replace("--", "/")
138
+
139
+ # Normalize Claude version numbers: convert "-X-Y-" to "-X.Y-" or "-X-Y" to "-X.Y"
140
+ # Examples:
141
+ # "claude-3-5-sonnet" -> "claude-3.5-sonnet"
142
+ # "claude-4-1-opus" -> "claude-4.1-opus"
143
+ # "claude-3-5-sonnet-20241022" -> "claude-3.5-sonnet-20241022"
144
+ import re
145
+ normalized_model_name = re.sub(r'(claude-\d+)-(\d+)(?=-|$)', r'\1.\2', normalized_model_name)
146
+
118
147
  if normalized_model_name != model_name:
119
148
  logger.debug(f"Normalized model name '{model_name}' to '{normalized_model_name}'")
120
149
 
121
150
  # Check if normalized name is a canonical name
122
151
  if normalized_model_name in models:
152
+ _resolved_aliases_cache[model_name] = normalized_model_name
123
153
  return normalized_model_name
124
154
 
125
155
  # Check if it's an alias of any model (try both original and normalized)
@@ -127,9 +157,11 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
127
157
  aliases = model_info.get("aliases", [])
128
158
  if model_name in aliases or normalized_model_name in aliases:
129
159
  logger.debug(f"Resolved alias '{model_name}' to canonical name '{canonical_name}'")
160
+ _resolved_aliases_cache[model_name] = canonical_name
130
161
  return canonical_name
131
162
 
132
163
  # Return normalized name if no alias found
164
+ _resolved_aliases_cache[model_name] = normalized_model_name
133
165
  return normalized_model_name
134
166
 
135
167
 
@@ -165,9 +197,10 @@ def get_model_capabilities(model_name: str) -> Dict[str, Any]:
165
197
  return capabilities
166
198
 
167
199
  # Step 3: Try partial matches for common model naming patterns
168
- model_lower = model_name.lower()
200
+ # Use canonical_name (which has been normalized) for better matching
201
+ canonical_lower = canonical_name.lower()
169
202
  for model_key, capabilities in models.items():
170
- if model_key.lower() in model_lower or model_lower in model_key.lower():
203
+ if model_key.lower() in canonical_lower or canonical_lower in model_key.lower():
171
204
  result = capabilities.copy()
172
205
  # Remove alias-specific fields
173
206
  result.pop("canonical_name", None)
@@ -293,4 +326,217 @@ def detect_model_type(model_name: str) -> str:
293
326
  elif any(x in model_lower for x in ["vision", "vl", "multimodal"]):
294
327
  return "vision"
295
328
  else:
296
- return "base"
329
+ return "base"
330
+
331
+
332
+ def get_vision_capabilities(model_name: str) -> Dict[str, Any]:
333
+ """
334
+ Get vision-specific capabilities for a model with fallback to generic vision model.
335
+
336
+ Args:
337
+ model_name: Model name to get vision capabilities for
338
+
339
+ Returns:
340
+ Dictionary with vision capabilities, using generic fallback if model not found
341
+ """
342
+ from ..utils.structured_logging import get_logger
343
+ logger = get_logger(__name__)
344
+
345
+ # Get model capabilities
346
+ capabilities = get_model_capabilities(model_name)
347
+
348
+ # Check if model has vision support
349
+ if not capabilities.get('vision_support', False):
350
+ logger.warning(f"Model '{model_name}' does not have vision support")
351
+ return {}
352
+
353
+ # Extract vision-specific fields
354
+ vision_fields = [
355
+ 'image_resolutions', 'max_image_resolution', 'image_patch_size',
356
+ 'max_image_tokens', 'image_tokenization_method', 'adaptive_resolution',
357
+ 'vision_encoder', 'pixel_grouping', 'supported_resolutions',
358
+ 'base_tokens_per_resolution', 'fixed_resolution', 'tokens_per_tile',
359
+ 'tile_size', 'base_image_tokens', 'pixel_divisor', 'token_cap'
360
+ ]
361
+
362
+ vision_capabilities = {}
363
+ for field in vision_fields:
364
+ if field in capabilities:
365
+ vision_capabilities[field] = capabilities[field]
366
+
367
+ # If we have minimal vision capabilities, use generic fallback
368
+ if not vision_capabilities or len(vision_capabilities) < 3:
369
+ logger.warning(
370
+ f"Model '{model_name}' has limited vision metadata, using generic vision model fallback",
371
+ model=model_name,
372
+ found_fields=list(vision_capabilities.keys())
373
+ )
374
+
375
+ # Get generic vision model capabilities
376
+ _load_json_assets()
377
+ if _model_capabilities and "generic_vision_model" in _model_capabilities:
378
+ generic_caps = _model_capabilities["generic_vision_model"]
379
+ for field in vision_fields:
380
+ if field in generic_caps:
381
+ vision_capabilities[field] = generic_caps[field]
382
+
383
+ return vision_capabilities
384
+
385
+
386
+ def get_glyph_compression_capabilities(model_name: str) -> Dict[str, Any]:
387
+ """
388
+ Get capabilities relevant for Glyph compression with intelligent fallbacks.
389
+
390
+ Args:
391
+ model_name: Model name to get Glyph capabilities for
392
+
393
+ Returns:
394
+ Dictionary with Glyph-relevant capabilities and recommendations
395
+ """
396
+ from ..utils.structured_logging import get_logger
397
+ logger = get_logger(__name__)
398
+
399
+ capabilities = get_model_capabilities(model_name)
400
+
401
+ # Check if model supports vision (required for Glyph)
402
+ if not capabilities.get('vision_support', False):
403
+ logger.error(
404
+ f"Model '{model_name}' does not support vision, cannot use Glyph compression",
405
+ model=model_name
406
+ )
407
+ return {
408
+ 'glyph_compatible': False,
409
+ 'reason': 'no_vision_support'
410
+ }
411
+
412
+ # Get vision capabilities
413
+ vision_caps = get_vision_capabilities(model_name)
414
+
415
+ # Determine Glyph compatibility and optimal settings
416
+ glyph_caps = {
417
+ 'glyph_compatible': True,
418
+ 'model_name': model_name,
419
+ 'vision_support': True
420
+ }
421
+
422
+ # Add vision-specific fields for token calculation
423
+ glyph_caps.update(vision_caps)
424
+
425
+ # Determine optimal compression settings based on model capabilities
426
+ max_image_tokens = vision_caps.get('max_image_tokens', 2048)
427
+ image_patch_size = vision_caps.get('image_patch_size', 16)
428
+
429
+ # Recommend compression parameters
430
+ if max_image_tokens >= 16000:
431
+ glyph_caps['recommended_pages_per_image'] = 2
432
+ glyph_caps['recommended_dpi'] = 150
433
+ elif max_image_tokens >= 8000:
434
+ glyph_caps['recommended_pages_per_image'] = 1
435
+ glyph_caps['recommended_dpi'] = 120
436
+ else:
437
+ glyph_caps['recommended_pages_per_image'] = 1
438
+ glyph_caps['recommended_dpi'] = 100
439
+
440
+ # Check for Glyph-optimized models
441
+ if capabilities.get('optimized_for_glyph', False):
442
+ glyph_caps['glyph_optimized'] = True
443
+ logger.info(f"Model '{model_name}' is optimized for Glyph compression")
444
+
445
+ return glyph_caps
446
+
447
+
448
+ def check_vision_model_compatibility(model_name: str, provider: str = None) -> Dict[str, Any]:
449
+ """
450
+ Comprehensive check for vision model compatibility with detailed recommendations.
451
+
452
+ Args:
453
+ model_name: Model name to check
454
+ provider: Provider name (optional, for provider-specific checks)
455
+
456
+ Returns:
457
+ Dictionary with compatibility status and recommendations
458
+ """
459
+ from ..utils.structured_logging import get_logger
460
+ logger = get_logger(__name__)
461
+
462
+ result = {
463
+ 'model_name': model_name,
464
+ 'provider': provider,
465
+ 'compatible': False,
466
+ 'vision_support': False,
467
+ 'glyph_compatible': False,
468
+ 'warnings': [],
469
+ 'recommendations': [],
470
+ 'capabilities': {}
471
+ }
472
+
473
+ # Get model capabilities
474
+ capabilities = get_model_capabilities(model_name)
475
+
476
+ # Check if this is an unknown model (architecture is 'generic' means it wasn't found in database)
477
+ is_unknown_model = capabilities.get('architecture') == 'generic' and not capabilities.get('vision_support', False)
478
+
479
+ if is_unknown_model:
480
+ result['warnings'].append(f"Model '{model_name}' not found in capabilities database")
481
+ result['recommendations'].append("Add model specifications to model_capabilities.json")
482
+ result['recommendations'].append("Using generic vision model fallback for VLM calculations")
483
+
484
+ # Use generic fallback - assume vision support for unknown models
485
+ _load_json_assets()
486
+ if _model_capabilities and "generic_vision_model" in _model_capabilities:
487
+ generic_caps = _model_capabilities["generic_vision_model"].copy()
488
+ result['compatible'] = True
489
+ result['vision_support'] = True
490
+ result['capabilities'] = generic_caps
491
+
492
+ # Also get vision capabilities using the generic model
493
+ vision_caps = generic_caps.copy()
494
+ result['vision_capabilities'] = vision_caps
495
+
496
+ # Check Glyph compatibility with generic model
497
+ glyph_caps = {
498
+ 'glyph_compatible': True,
499
+ 'model_name': model_name,
500
+ 'vision_support': True,
501
+ 'recommended_pages_per_image': 1,
502
+ 'recommended_dpi': 100
503
+ }
504
+ glyph_caps.update(vision_caps)
505
+ result['glyph_compatible'] = True
506
+ result['glyph_capabilities'] = glyph_caps
507
+
508
+ logger.warning(f"Using generic vision model fallback for unknown model '{model_name}'")
509
+
510
+ return result
511
+
512
+ # Check vision support
513
+ vision_support = capabilities.get('vision_support', False)
514
+ result['vision_support'] = vision_support
515
+ result['capabilities'] = capabilities
516
+
517
+ if not vision_support:
518
+ result['warnings'].append(f"Model '{model_name}' does not support vision")
519
+ result['recommendations'].append("Use a vision-capable model for image processing")
520
+ return result
521
+
522
+ result['compatible'] = True
523
+
524
+ # Get vision-specific capabilities
525
+ vision_caps = get_vision_capabilities(model_name)
526
+ result['vision_capabilities'] = vision_caps
527
+
528
+ # Check Glyph compatibility
529
+ glyph_caps = get_glyph_compression_capabilities(model_name)
530
+ result['glyph_compatible'] = glyph_caps.get('glyph_compatible', False)
531
+ result['glyph_capabilities'] = glyph_caps
532
+
533
+ # Add specific recommendations based on capabilities
534
+ if not vision_caps.get('image_patch_size'):
535
+ result['warnings'].append("No image_patch_size specified, using generic fallback")
536
+ result['recommendations'].append("Add image_patch_size to model capabilities for better accuracy")
537
+
538
+ if not vision_caps.get('max_image_tokens'):
539
+ result['warnings'].append("No max_image_tokens specified")
540
+ result['recommendations'].append("Add max_image_tokens to model capabilities")
541
+
542
+ return result
@@ -308,6 +308,18 @@
308
308
  "tool_prefix": "<|tool_call|>",
309
309
  "patterns": ["glm-4.5", "glm-4.6", "glm-4.5-air"]
310
310
  },
311
+ "glm4v": {
312
+ "description": "Zhipu AI's GLM-4V multimodal architecture (June 2024)",
313
+ "message_format": "glm_special_tokens",
314
+ "system_prefix": "<|system|>\n",
315
+ "system_suffix": "\n",
316
+ "user_prefix": "<|user|>\n",
317
+ "user_suffix": "\n",
318
+ "assistant_prefix": "<|assistant|>\n",
319
+ "assistant_suffix": "\n",
320
+ "tool_format": "json",
321
+ "patterns": ["glm-4v", "glm4v", "glyph", "zai-org/glyph", "glm-4.1v"]
322
+ },
311
323
  "glm4": {
312
324
  "description": "Zhipu AI's GLM-4 architecture (June 2024)",
313
325
  "message_format": "im_start_end",
@@ -407,7 +419,8 @@
407
419
  "basic": "Simple role: content format",
408
420
  "human_assistant": "Human/Assistant format",
409
421
  "openai_chat": "OpenAI chat completion format",
410
- "llama3_header": "LLaMA 3+ format with <|start_header_id|> and <|eot_id|>"
422
+ "llama3_header": "LLaMA 3+ format with <|start_header_id|> and <|eot_id|>",
423
+ "glm_special_tokens": "GLM format with <|system|>, <|user|>, <|assistant|> tokens"
411
424
  },
412
425
  "tool_formats": {
413
426
  "pythonic": "Python function call syntax: [func(arg=val)]",