PyPI - abstractcore - Versions diffs - 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl - Mend

abstractcore 2.5.0py3-none-any.whl → 2.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

abstractcore/__init__.py +12 -0
abstractcore/apps/__main__.py +8 -1
abstractcore/apps/deepsearch.py +644 -0
abstractcore/apps/intent.py +614 -0
abstractcore/architectures/detection.py +250 -4
abstractcore/assets/architecture_formats.json +14 -1
abstractcore/assets/model_capabilities.json +583 -44
abstractcore/compression/__init__.py +29 -0
abstractcore/compression/analytics.py +420 -0
abstractcore/compression/cache.py +250 -0
abstractcore/compression/config.py +279 -0
abstractcore/compression/exceptions.py +30 -0
abstractcore/compression/glyph_processor.py +381 -0
abstractcore/compression/optimizer.py +388 -0
abstractcore/compression/orchestrator.py +380 -0
abstractcore/compression/pil_text_renderer.py +818 -0
abstractcore/compression/quality.py +226 -0
abstractcore/compression/text_formatter.py +666 -0
abstractcore/compression/vision_compressor.py +371 -0
abstractcore/config/main.py +66 -1
abstractcore/config/manager.py +111 -5
abstractcore/core/session.py +105 -5
abstractcore/events/__init__.py +1 -1
abstractcore/media/auto_handler.py +312 -18
abstractcore/media/handlers/local_handler.py +14 -2
abstractcore/media/handlers/openai_handler.py +62 -3
abstractcore/media/processors/__init__.py +11 -1
abstractcore/media/processors/direct_pdf_processor.py +210 -0
abstractcore/media/processors/glyph_pdf_processor.py +227 -0
abstractcore/media/processors/image_processor.py +7 -1
abstractcore/media/processors/text_processor.py +18 -3
abstractcore/media/types.py +164 -7
abstractcore/processing/__init__.py +5 -1
abstractcore/processing/basic_deepsearch.py +2173 -0
abstractcore/processing/basic_intent.py +690 -0
abstractcore/providers/__init__.py +18 -0
abstractcore/providers/anthropic_provider.py +29 -2
abstractcore/providers/base.py +279 -6
abstractcore/providers/huggingface_provider.py +658 -27
abstractcore/providers/lmstudio_provider.py +52 -2
abstractcore/providers/mlx_provider.py +103 -4
abstractcore/providers/model_capabilities.py +352 -0
abstractcore/providers/ollama_provider.py +44 -6
abstractcore/providers/openai_provider.py +29 -2
abstractcore/providers/registry.py +91 -19
abstractcore/server/app.py +91 -81
abstractcore/structured/handler.py +161 -1
abstractcore/tools/common_tools.py +98 -3
abstractcore/utils/__init__.py +4 -1
abstractcore/utils/cli.py +114 -1
abstractcore/utils/trace_export.py +287 -0
abstractcore/utils/version.py +1 -1
abstractcore/utils/vlm_token_calculator.py +655 -0
{abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
abstractcore-2.5.3.dist-info/RECORD +107 -0
{abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
abstractcore-2.5.0.dist-info/RECORD +0 -86
{abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
{abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
{abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0

abstractcore/structured/handler.py CHANGED Viewed

@@ -6,6 +6,7 @@ import json
 import re
 import time
 from typing import Type, Dict, Any, Optional
+from enum import Enum
 from pydantic import BaseModel, ValidationError
 from .retry import FeedbackRetry
@@ -69,6 +70,9 @@ class StructuredOutputHandler:
                         max_retries=self.retry_strategy.max_attempts)
         try:
+            # Store provider for schema generation
+            self.current_provider = provider
             # Strategy 1: Use native support if available
             if self._has_native_support(provider):
                 self.logger.debug("Using native structured output support",
@@ -125,12 +129,44 @@ class StructuredOutputHandler:
         """
         Check if provider has native structured output support.
+        Checks both provider type (Ollama, LMStudio, HuggingFace, MLX with Outlines)
+        and model capabilities configuration as fallback.
         Args:
             provider: The LLM provider instance
         Returns:
             True if provider supports native structured outputs
         """
+        # Ollama and LMStudio always support native structured outputs
+        # via the format and response_format parameters respectively
+        provider_name = provider.__class__.__name__
+        if provider_name in ['OllamaProvider', 'LMStudioProvider']:
+            return True
+        # HuggingFaceProvider supports native via GGUF or Transformers+Outlines
+        if provider_name == 'HuggingFaceProvider':
+            # Check if it's a GGUF model - these use llama-cpp-python which supports native structured outputs
+            if hasattr(provider, 'model_type') and provider.model_type == 'gguf':
+                return True
+            # Check if it's a Transformers model with Outlines available
+            if hasattr(provider, 'model_type') and provider.model_type == 'transformers':
+                try:
+                    import outlines
+                    return True
+                except ImportError:
+                    return False
+        # MLXProvider supports native via Outlines
+        if provider_name == 'MLXProvider':
+            try:
+                import outlines
+                return True
+            except ImportError:
+                return False
+        # For other providers, check model capabilities
         capabilities = getattr(provider, 'model_capabilities', {})
         return capabilities.get("structured_output") == "native"
@@ -242,6 +278,9 @@ class StructuredOutputHandler:
                 # Try parsing the extracted JSON
                 try:
                     data = json.loads(json_content)
+                    # Preprocess enum responses if we have mappings
+                    if hasattr(self, '_enum_mappings') and self._enum_mappings:
+                        data = self._preprocess_enum_response(data, self._enum_mappings)
                     result = response_model.model_validate(data)
                 except (json.JSONDecodeError, ValidationError) as parse_error:
                     # Try to fix the JSON
@@ -254,6 +293,9 @@ class StructuredOutputHandler:
                     if fixed_json:
                         try:
                             data = json.loads(fixed_json)
+                            # Preprocess enum responses if we have mappings
+                            if hasattr(self, '_enum_mappings') and self._enum_mappings:
+                                data = self._preprocess_enum_response(data, self._enum_mappings)
                             result = response_model.model_validate(data)
                             self.logger.info("JSON self-fix successful", attempt=attempt + 1)
                         except (json.JSONDecodeError, ValidationError) as fix_error:
@@ -350,6 +392,14 @@ class StructuredOutputHandler:
             Enhanced prompt with schema information
         """
         schema = response_model.model_json_schema()
+        # For prompted providers, simplify enum schemas to avoid LLM confusion
+        # Store original enum mappings for response preprocessing
+        if hasattr(self, 'current_provider') and not self._has_native_support(self.current_provider):
+            schema, self._enum_mappings = self._simplify_enum_schemas(schema)
+        else:
+            self._enum_mappings = {}
         model_name = response_model.__name__
         # Create example from schema
@@ -432,4 +482,114 @@ Important: Return ONLY the JSON object, no additional text or formatting."""
             return match.group(0)
         # If nothing found, try the original content
-        return content
+        return content
+    def _simplify_enum_schemas(self, schema: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Dict[str, str]]]:
+        """
+        Simplify enum schemas for prompted providers while preserving enum mappings.
+        Args:
+            schema: Original JSON schema
+        Returns:
+            Tuple of (simplified_schema, enum_mappings)
+            enum_mappings maps field_paths to {enum_notation: enum_value}
+        """
+        if '$defs' not in schema:
+            return schema, {}
+        # Find enum definitions and build mappings
+        enum_mappings = {}
+        enum_refs_to_simplify = {}
+        for def_name, def_schema in schema['$defs'].items():
+            if def_schema.get('type') == 'string' and 'enum' in def_schema:
+                ref_key = f"#/$defs/{def_name}"
+                enum_values = def_schema['enum']
+                # Build mapping from Python enum notation to actual values
+                enum_class_name = def_name
+                field_mappings = {}
+                for value in enum_values:
+                    # Map both "EnumClass.VALUE_NAME" and "<EnumClass.VALUE_NAME: 'value'>" patterns
+                    enum_notation = f"{enum_class_name}.{value.upper().replace(' ', '_')}"
+                    field_mappings[enum_notation] = value
+                    # Also handle the repr format
+                    repr_notation = f"<{enum_class_name}.{value.upper().replace(' ', '_')}: '{value}'>"
+                    field_mappings[repr_notation] = value
+                enum_refs_to_simplify[ref_key] = {
+                    'type': 'string',
+                    'description': f"Use one of: {', '.join(enum_values)}. IMPORTANT: Use the exact string values, not Python enum notation.",
+                    'enum': enum_values
+                }
+                # Store mappings by reference for later use
+                enum_mappings[ref_key] = field_mappings
+        # Create simplified schema by replacing enum references
+        def replace_enum_refs(obj, path=""):
+            if isinstance(obj, dict):
+                if '$ref' in obj and obj['$ref'] in enum_refs_to_simplify:
+                    # Store the field path for this enum reference
+                    if path:
+                        enum_mappings[path] = enum_mappings[obj['$ref']]
+                    return enum_refs_to_simplify[obj['$ref']]
+                return {k: replace_enum_refs(v, f"{path}.{k}" if path else k) for k, v in obj.items()}
+            elif isinstance(obj, list):
+                return [replace_enum_refs(item, path) for item in obj]
+            return obj
+        simplified_schema = replace_enum_refs(schema)
+        # Remove the $defs section since we've inlined the enum definitions
+        if '$defs' in simplified_schema:
+            # Only remove enum definitions, keep other definitions
+            remaining_defs = {k: v for k, v in simplified_schema['$defs'].items()
+                            if not (v.get('type') == 'string' and 'enum' in v)}
+            if remaining_defs:
+                simplified_schema['$defs'] = remaining_defs
+            else:
+                del simplified_schema['$defs']
+        return simplified_schema, enum_mappings
+    def _preprocess_enum_response(self, data: Dict[str, Any], enum_mappings: Dict[str, Dict[str, str]]) -> Dict[str, Any]:
+        """
+        Preprocess LLM response to convert Python enum notation back to valid enum values.
+        Args:
+            data: Parsed JSON data from LLM
+            enum_mappings: Mappings from field paths to enum notation conversions
+        Returns:
+            Preprocessed data with enum notations converted to valid values
+        """
+        if not enum_mappings:
+            return data
+        def convert_enum_values(obj, path=""):
+            if isinstance(obj, dict):
+                result = {}
+                for key, value in obj.items():
+                    field_path = f"{path}.{key}" if path else key
+                    # Check if this field has enum mappings
+                    field_mappings = None
+                    for enum_path, mappings in enum_mappings.items():
+                        if field_path in enum_path or enum_path in field_path:
+                            field_mappings = mappings
+                            break
+                    if field_mappings and isinstance(value, str):
+                        # Try to convert enum notation to actual value
+                        converted_value = field_mappings.get(value, value)
+                        result[key] = converted_value
+                    else:
+                        result[key] = convert_enum_values(value, field_path)
+                return result
+            elif isinstance(obj, list):
+                return [convert_enum_values(item, path) for item in obj]
+            return obj
+        return convert_enum_values(data)

abstractcore/tools/common_tools.py CHANGED Viewed

@@ -1277,17 +1277,23 @@ def _parse_content_by_type(content_bytes: bytes, content_type: str, url: str, ex
                 # Final fallback with error replacement
                 text_content = content_bytes.decode('utf-8', errors='replace')
-        # Parse based on content type
+        # Parse based on content type with fallback content detection
         if main_type.startswith('text/html') or main_type.startswith('application/xhtml'):
             return _parse_html_content(text_content, url, extract_links)
         elif main_type == 'application/json':
             return _parse_json_content(text_content)
-        elif main_type in ['application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml']:
+        elif main_type in ['application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml', 'application/soap+xml']:
             return _parse_xml_content(text_content)
         elif main_type.startswith('text/'):
+            # For generic text types, check if it's actually XML or JSON
+            if text_content and text_content.strip():
+                if _is_xml_content(text_content):
+                    return _parse_xml_content(text_content)
+                elif _is_json_content(text_content):
+                    return _parse_json_content(text_content)
             return _parse_text_content(text_content, main_type)
         elif main_type.startswith('image/'):
@@ -1305,18 +1311,107 @@ def _parse_content_by_type(content_bytes: bytes, content_type: str, url: str, ex
                f"Content size: {len(content_bytes):,} bytes"
+def _is_xml_content(content: str) -> bool:
+    """Detect if content is XML rather than HTML."""
+    if not content:
+        return False
+    content_lower = content.lower().strip()
+    # Check for XML declaration
+    if content_lower.startswith('<?xml'):
+        return True
+    # Check for common XML root elements without HTML indicators
+    xml_indicators = ['<rss', '<feed', '<urlset', '<sitemap', '<soap:', '<xml']
+    html_indicators = ['<!doctype html', '<html', '<head>', '<body>', '<div', '<span', '<p>', '<a ']
+    # Look at the first 1000 characters for indicators
+    sample = content_lower[:1000]
+    # If we find HTML indicators, it's likely HTML
+    if any(indicator in sample for indicator in html_indicators):
+        return False
+    # If we find XML indicators without HTML indicators, it's likely XML
+    if any(indicator in sample for indicator in xml_indicators):
+        return True
+    # Check if it starts with a root element that looks like XML
+    import re
+    root_match = re.search(r'<([^?\s/>]+)', content)
+    if root_match:
+        root_element = root_match.group(1).lower()
+        # Common XML root elements that are not HTML
+        xml_roots = ['rss', 'feed', 'urlset', 'sitemap', 'configuration', 'data', 'response']
+        if root_element in xml_roots:
+            return True
+    return False
+def _is_json_content(content: str) -> bool:
+    """Detect if content is JSON."""
+    if not content:
+        return False
+    content_stripped = content.strip()
+    # Quick check for JSON structure
+    if (content_stripped.startswith('{') and content_stripped.endswith('}')) or \
+       (content_stripped.startswith('[') and content_stripped.endswith(']')):
+        try:
+            import json
+            json.loads(content_stripped)
+            return True
+        except (json.JSONDecodeError, ValueError):
+            pass
+    return False
+def _get_appropriate_parser(content: str) -> str:
+    """Get the appropriate BeautifulSoup parser for the content."""
+    if not BS4_AVAILABLE:
+        return None
+    # If lxml is available and content looks like XML, use xml parser
+    if 'lxml' in BS4_PARSER and _is_xml_content(content):
+        try:
+            import lxml
+            return 'xml'
+        except ImportError:
+            pass
+    # Default to the configured parser (lxml or html.parser)
+    return BS4_PARSER
 def _parse_html_content(html_content: str, url: str, extract_links: bool = True) -> str:
     """Parse HTML content and extract meaningful information."""
     if not html_content:
         return "❌ No HTML content to parse"
+    # Detect if content is actually XML (fallback detection)
+    if _is_xml_content(html_content):
+        return _parse_xml_content(html_content)
     result_parts = []
     result_parts.append("🌐 HTML Document Analysis")
     # Use BeautifulSoup if available for better parsing
     if BS4_AVAILABLE:
         try:
-            soup = BeautifulSoup(html_content, BS4_PARSER)
+            # Choose appropriate parser based on content analysis
+            parser = _get_appropriate_parser(html_content)
+            # Suppress XML parsing warnings when using HTML parser on XML content
+            import warnings
+            from bs4 import XMLParsedAsHTMLWarning
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
+                soup = BeautifulSoup(html_content, parser)
             # Extract title
             title = soup.find('title')

abstractcore/utils/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from .token_utils import (
     ContentType
 )
 from .message_preprocessor import MessagePreprocessor, parse_files, has_files
+from .trace_export import export_traces, summarize_traces
 __all__ = [
     'configure_logging',
@@ -27,5 +28,7 @@ __all__ = [
     'ContentType',
     'MessagePreprocessor',
     'parse_files',
-    'has_files'
+    'has_files',
+    'export_traces',
+    'summarize_traces'
 ]

abstractcore/utils/cli.py CHANGED Viewed

@@ -39,7 +39,7 @@ except ImportError:
 from .. import create_llm, BasicSession
 from ..tools.common_tools import list_files, read_file, write_file, execute_command, search_files
-from ..processing import BasicExtractor, BasicJudge
+from ..processing import BasicExtractor, BasicJudge, BasicIntentAnalyzer
 class SimpleCLI:
@@ -193,6 +193,10 @@ class SimpleCLI:
             print("                           • /facts          - Display in chat")
             print("                           • /facts data     - Save as data.jsonld")
             print("  /judge                   Evaluate conversation quality")
+            print("  /intent [participant]    Analyze intents behind conversation")
+            print("                           • /intent         - Analyze all participants")
+            print("                           • /intent user    - Focus on user intents")
+            print("                           • /intent assistant - Focus on assistant intents")
             print("\n⚙️  CONFIGURATION")
             print("─" * 50)
@@ -317,6 +321,17 @@ class SimpleCLI:
         elif cmd == 'judge':
             self.handle_judge()
+        elif cmd.startswith('intent'):
+            # Parse /intent [participant] command
+            parts = cmd.split()
+            if len(parts) == 1:
+                # No participant specified - analyze all
+                self.handle_intent(None)
+            else:
+                # Participant specified
+                participant = parts[1]
+                self.handle_intent(participant)
         elif cmd.startswith('system'):
             # Parse /system [prompt] command
             if cmd == 'system':
@@ -623,6 +638,103 @@ class SimpleCLI:
                 import traceback
                 traceback.print_exc()
+    def handle_intent(self, focus_participant: str = None):
+        """Handle /intent [participant] command - analyze intents behind conversation"""
+        messages = self.session.get_messages()
+        if len(messages) <= 1:  # Only system message
+            print("📝 No conversation history to analyze intents from")
+            return
+        try:
+            if focus_participant:
+                print(f"🎯 Analyzing {focus_participant} intents in conversation...")
+            else:
+                print("🎯 Analyzing conversation intents for all participants...")
+            # Create intent analyzer using current provider for consistency
+            analyzer = BasicIntentAnalyzer(self.provider)
+            # Convert session messages to the format expected by intent analyzer
+            conversation_messages = [msg for msg in messages if msg.role != 'system']
+            message_dicts = [{"role": msg.role, "content": msg.content} for msg in conversation_messages]
+            if not message_dicts:
+                print("📝 No substantive conversation content found")
+                return
+            print(f"   Processing {len(message_dicts)} messages...")
+            start_time = time.time()
+            # Analyze conversation intents
+            from ..processing.basic_intent import IntentDepth
+            results = analyzer.analyze_conversation_intents(
+                messages=message_dicts,
+                focus_participant=focus_participant,
+                depth=IntentDepth.UNDERLYING
+            )
+            duration = time.time() - start_time
+            print(f"✅ Intent analysis completed in {duration:.1f}s")
+            if not results:
+                print("❌ No intents could be analyzed from the conversation")
+                return
+            # Display results in a conversational format
+            print("\n🎯 CONVERSATION INTENT ANALYSIS")
+            print("=" * 60)
+            for participant, analysis in results.items():
+                print(f"\n👤 {participant.upper()} INTENTS:")
+                print("─" * 40)
+                # Primary Intent
+                primary = analysis.primary_intent
+                print(f"🎯 Primary Intent: {primary.intent_type.value.replace('_', ' ').title()}")
+                print(f"   Description: {primary.description}")
+                print(f"   Underlying Goal: {primary.underlying_goal}")
+                print(f"   Emotional Undertone: {primary.emotional_undertone}")
+                print(f"   Confidence: {primary.confidence:.2f} | Urgency: {primary.urgency_level:.2f}")
+                # Secondary Intents (show top 2 for brevity)
+                if analysis.secondary_intents:
+                    print(f"\n🔄 Secondary Intents:")
+                    for i, intent in enumerate(analysis.secondary_intents[:2], 1):
+                        print(f"   {i}. {intent.intent_type.value.replace('_', ' ').title()}")
+                        print(f"      Goal: {intent.underlying_goal}")
+                        print(f"      Confidence: {intent.confidence:.2f}")
+                # Key contextual factors (show top 3)
+                if analysis.contextual_factors:
+                    print(f"\n🌍 Key Context Factors:")
+                    for factor in analysis.contextual_factors[:3]:
+                        print(f"   • {factor}")
+                # Response approach
+                print(f"\n💡 Suggested Response Approach:")
+                # Truncate long response approaches for readability
+                response_approach = analysis.suggested_response_approach
+                if len(response_approach) > 200:
+                    response_approach = response_approach[:197] + "..."
+                print(f"   {response_approach}")
+                # Analysis metadata
+                print(f"\n📊 Analysis: {analysis.word_count_analyzed} words | "
+                      f"Complexity: {analysis.intent_complexity:.2f} | "
+                      f"Confidence: {analysis.overall_confidence:.2f} | "
+                      f"Time: {duration:.1f}s")
+            print("\n" + "=" * 60)
+            print("💡 Note: This analysis identifies underlying motivations and goals behind communication")
+        except Exception as e:
+            print(f"❌ Intent analysis failed: {e}")
+            if self.debug_mode:
+                import traceback
+                traceback.print_exc()
     def _format_conversation_for_extraction(self, messages):
         """Format conversation messages for fact extraction"""
         formatted_lines = []
@@ -1337,6 +1449,7 @@ Key Commands:
   /compact [focus]                Compress chat history with optional focus
   /facts [file]                   Extract knowledge facts
   /judge                          Evaluate conversation quality
+  /intent [participant]           Analyze conversation intents and motivations
   /system [prompt]                View/change system prompt
 Tools: list_files, search_files, read_file, write_file, execute_command

abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl

abstractcore 2.5.0py3-none-any.whl → 2.5.3py3-none-any.whl