PyPI - abstractcore - Versions diffs - 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl - Mend

abstractcore 2.9.1py3-none-any.whl → 2.11.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

abstractcore/__init__.py +7 -27
abstractcore/apps/extractor.py +33 -100
abstractcore/apps/intent.py +19 -0
abstractcore/apps/judge.py +20 -1
abstractcore/apps/summarizer.py +20 -1
abstractcore/architectures/detection.py +34 -1
abstractcore/architectures/response_postprocessing.py +313 -0
abstractcore/assets/architecture_formats.json +38 -8
abstractcore/assets/model_capabilities.json +781 -160
abstractcore/compression/__init__.py +1 -2
abstractcore/compression/glyph_processor.py +6 -4
abstractcore/config/main.py +31 -19
abstractcore/config/manager.py +389 -11
abstractcore/config/vision_config.py +5 -5
abstractcore/core/interface.py +151 -3
abstractcore/core/session.py +16 -10
abstractcore/download.py +1 -1
abstractcore/embeddings/manager.py +20 -6
abstractcore/endpoint/__init__.py +2 -0
abstractcore/endpoint/app.py +458 -0
abstractcore/mcp/client.py +3 -1
abstractcore/media/__init__.py +52 -17
abstractcore/media/auto_handler.py +42 -22
abstractcore/media/base.py +44 -1
abstractcore/media/capabilities.py +12 -33
abstractcore/media/enrichment.py +105 -0
abstractcore/media/handlers/anthropic_handler.py +19 -28
abstractcore/media/handlers/local_handler.py +124 -70
abstractcore/media/handlers/openai_handler.py +19 -31
abstractcore/media/processors/__init__.py +4 -2
abstractcore/media/processors/audio_processor.py +57 -0
abstractcore/media/processors/office_processor.py +8 -3
abstractcore/media/processors/pdf_processor.py +46 -3
abstractcore/media/processors/text_processor.py +22 -24
abstractcore/media/processors/video_processor.py +58 -0
abstractcore/media/types.py +97 -4
abstractcore/media/utils/image_scaler.py +20 -2
abstractcore/media/utils/video_frames.py +219 -0
abstractcore/media/vision_fallback.py +136 -22
abstractcore/processing/__init__.py +32 -3
abstractcore/processing/basic_deepsearch.py +15 -10
abstractcore/processing/basic_intent.py +3 -2
abstractcore/processing/basic_judge.py +3 -2
abstractcore/processing/basic_summarizer.py +1 -1
abstractcore/providers/__init__.py +3 -1
abstractcore/providers/anthropic_provider.py +95 -8
abstractcore/providers/base.py +1516 -81
abstractcore/providers/huggingface_provider.py +546 -69
abstractcore/providers/lmstudio_provider.py +35 -923
abstractcore/providers/mlx_provider.py +382 -35
abstractcore/providers/model_capabilities.py +5 -1
abstractcore/providers/ollama_provider.py +99 -15
abstractcore/providers/openai_compatible_provider.py +406 -180
abstractcore/providers/openai_provider.py +188 -44
abstractcore/providers/openrouter_provider.py +76 -0
abstractcore/providers/registry.py +61 -5
abstractcore/providers/streaming.py +138 -33
abstractcore/providers/vllm_provider.py +92 -817
abstractcore/server/app.py +461 -13
abstractcore/server/audio_endpoints.py +139 -0
abstractcore/server/vision_endpoints.py +1319 -0
abstractcore/structured/handler.py +316 -41
abstractcore/tools/common_tools.py +5501 -2012
abstractcore/tools/comms_tools.py +1641 -0
abstractcore/tools/core.py +37 -7
abstractcore/tools/handler.py +4 -9
abstractcore/tools/parser.py +49 -2
abstractcore/tools/tag_rewriter.py +2 -1
abstractcore/tools/telegram_tdlib.py +407 -0
abstractcore/tools/telegram_tools.py +261 -0
abstractcore/utils/cli.py +1085 -72
abstractcore/utils/token_utils.py +2 -0
abstractcore/utils/truncation.py +29 -0
abstractcore/utils/version.py +3 -4
abstractcore/utils/vlm_token_calculator.py +12 -2
abstractcore-2.11.2.dist-info/METADATA +562 -0
abstractcore-2.11.2.dist-info/RECORD +133 -0
{abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
{abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
abstractcore-2.9.1.dist-info/METADATA +0 -1190
abstractcore-2.9.1.dist-info/RECORD +0 -119
{abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
{abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0

abstractcore/structured/handler.py CHANGED Viewed

@@ -5,7 +5,7 @@ Structured output handler for managing schema-based LLM responses.
 import json
 import re
 import time
-from typing import Type, Dict, Any, Optional
+from typing import Type, Dict, Any, Optional, get_args, get_origin
 from enum import Enum
 from pydantic import BaseModel, ValidationError
@@ -15,6 +15,64 @@ from ..utils.self_fixes import fix_json
 from ..events import EventType, emit_global, create_structured_output_event
+def _coerce_single_list_wrapper(data: Any, *, response_model: Type[BaseModel]) -> Any:
+    """Repair common wrapper-shape drift for list-centric schemas.
+    Some servers/models will emit either:
+    - the list itself (instead of the object wrapper)
+    - a single list item (instead of the wrapper with a 1-element list)
+    When the response model is an object with exactly one list field, we can safely
+    coerce these shapes back into the expected wrapper.
+    """
+    try:
+        fields = getattr(response_model, "model_fields", None)
+        if not isinstance(fields, dict) or len(fields) != 1:
+            return data
+        (field_name, field_info), = list(fields.items())
+        if not isinstance(field_name, str) or not field_name:
+            return data
+        # If the model already returned the expected wrapper field, keep as-is.
+        if isinstance(data, dict) and field_name in data:
+            return data
+        annotation = getattr(field_info, "annotation", None)
+        origin = get_origin(annotation)
+        args = get_args(annotation)
+        if origin is not list or not args:
+            return data
+        item_type = args[0]
+        item_model_fields = getattr(item_type, "model_fields", None) if isinstance(item_type, type) else None
+        required: set[str] = set()
+        if isinstance(item_model_fields, dict) and item_model_fields:
+            for k, v in item_model_fields.items():
+                if isinstance(k, str) and k and hasattr(v, "is_required") and v.is_required():
+                    required.add(k)
+        # If we got a list of items, wrap it.
+        if isinstance(data, list):
+            return {field_name: data}
+        # If we got a single item-like object, wrap it.
+        if isinstance(data, dict):
+            # When the item type is a BaseModel, only coerce if required fields match.
+            if required:
+                if required.issubset(set(data.keys())):
+                    return {field_name: [data]}
+                return data
+            # Otherwise (e.g. list[dict]), we can't validate shape here; still, this is a
+            # safe coercion when the response model is a 1-field list wrapper.
+            return {field_name: [data]}
+    except Exception:
+        return data
+    return data
 class StructuredOutputHandler:
     """
     Handles structured output generation using two strategies:
@@ -189,41 +247,151 @@ class StructuredOutputHandler:
         Returns:
             Validated instance of response_model
         """
-        # The provider will handle structured output natively
-        # This is implemented in each provider's _generate_internal method
-        response = provider._generate_internal(
-            prompt=prompt,
-            response_model=response_model,
-            **kwargs
-        )
+        def _is_truncated(resp: Any) -> bool:
+            fr = getattr(resp, "finish_reason", None)
+            fr_str = str(fr or "").strip().lower()
+            return fr_str in {"length", "max_tokens", "max_output_tokens"}
+        def _bump_max_output_tokens(current_kwargs: dict) -> dict:
+            updated = dict(current_kwargs)
+            raw = updated.get("max_output_tokens")
+            if raw is None:
+                raw = updated.get("max_tokens")
+            cur = 0
+            if raw is not None and not isinstance(raw, bool):
+                try:
+                    cur = int(raw)
+                except Exception:
+                    cur = 0
+            if cur <= 0:
+                try:
+                    cur = int(getattr(provider, "max_output_tokens", 0) or 0)
+                except Exception:
+                    cur = 0
+                if cur <= 0:
+                    cur = 512
+            # Prefer geometric growth; also add a fixed floor so small values ramp quickly.
+            bumped = max(cur * 2, cur + 500)
+            cap = 0
+            try:
+                # Use model capabilities (not provider defaults) to avoid accidental hard caps.
+                from ..architectures.detection import get_context_limits
+                model_name = getattr(provider, "model", None)
+                limits = get_context_limits(str(model_name or ""))
+                cap = int(limits.get("max_output_tokens") or 0)
+            except Exception:
+                cap = 0
+            if cap <= 0:
+                cap = 1_000_000
+            updated["max_output_tokens"] = min(bumped, cap)
+            # Avoid ambiguity when both keys are present.
+            updated.pop("max_tokens", None)
+            return updated
+        last_error: Exception | None = None
+        attempt_kwargs = dict(kwargs)
+        def _coerce_boolish(value: Any) -> bool:
+            if isinstance(value, bool):
+                return bool(value)
+            if isinstance(value, (int, float)) and not isinstance(value, bool):
+                return float(value) != 0.0
+            if isinstance(value, str):
+                return value.strip().lower() in {"1", "true", "yes", "y", "on"}
+            return False
+        allow_truncation_raw = attempt_kwargs.pop("allow_truncation", None)
+        if allow_truncation_raw is None:
+            allow_truncation_raw = attempt_kwargs.pop("allow_truncated", None)
+        allow_truncation = _coerce_boolish(allow_truncation_raw) if allow_truncation_raw is not None else False
+        max_attempts = int(getattr(self.retry_strategy, "max_attempts", 3) or 3)
+        for attempt in range(1, max_attempts + 1):
+            response = provider._generate_internal(
+                prompt=prompt,
+                response_model=response_model,
+                **attempt_kwargs,
+            )
-        # For native support, the response content should already be structured
-        if isinstance(response.content, dict):
-            return response_model.model_validate(response.content)
-        else:
-            # Parse JSON string
             try:
+                if isinstance(response.content, dict):
+                    validated = response_model.model_validate(
+                        _coerce_single_list_wrapper(response.content, response_model=response_model)
+                    )
+                    if _is_truncated(response) and not allow_truncation:
+                        if attempt < max_attempts:
+                            bumped = _bump_max_output_tokens(attempt_kwargs)
+                            self.logger.warning(
+                                "Structured output truncated; retrying with higher max_output_tokens",
+                                finish_reason=str(getattr(response, "finish_reason", None)),
+                                attempt=attempt,
+                                max_output_tokens=attempt_kwargs.get("max_output_tokens") or attempt_kwargs.get("max_tokens"),
+                                next_max_output_tokens=bumped.get("max_output_tokens"),
+                            )
+                            attempt_kwargs = bumped
+                            continue
+                        raise RuntimeError("Structured output was truncated (finish_reason=length). Increase max_output_tokens.")
+                    return validated
                 data = json.loads(response.content)
-                return response_model.model_validate(data)
+                validated = response_model.model_validate(_coerce_single_list_wrapper(data, response_model=response_model))
+                if _is_truncated(response) and not allow_truncation:
+                    if attempt < max_attempts:
+                        bumped = _bump_max_output_tokens(attempt_kwargs)
+                        self.logger.warning(
+                            "Structured output truncated; retrying with higher max_output_tokens",
+                            finish_reason=str(getattr(response, "finish_reason", None)),
+                            attempt=attempt,
+                            max_output_tokens=attempt_kwargs.get("max_output_tokens") or attempt_kwargs.get("max_tokens"),
+                            next_max_output_tokens=bumped.get("max_output_tokens"),
+                        )
+                        attempt_kwargs = bumped
+                        continue
+                    raise RuntimeError("Structured output was truncated (finish_reason=length). Increase max_output_tokens.")
+                return validated
             except (json.JSONDecodeError, ValidationError) as e:
-                # Try to fix the JSON before falling back
-                self.logger.debug("Native JSON parsing failed, attempting self-fix",
-                                error=str(e),
-                                response_length=len(response.content))
-                fixed_json = fix_json(response.content)
+                last_error = e
+                if _is_truncated(response) and attempt < max_attempts:
+                    bumped = _bump_max_output_tokens(attempt_kwargs)
+                    self.logger.warning(
+                        "Structured output truncated; retrying with higher max_output_tokens",
+                        finish_reason=str(getattr(response, "finish_reason", None)),
+                        attempt=attempt,
+                        max_output_tokens=attempt_kwargs.get("max_output_tokens") or attempt_kwargs.get("max_tokens"),
+                        next_max_output_tokens=bumped.get("max_output_tokens"),
+                    )
+                    attempt_kwargs = bumped
+                    continue
+                fixed_json = None
+                try:
+                    fixed_json = fix_json(response.content)
+                except Exception:
+                    fixed_json = None
                 if fixed_json:
                     try:
                         data = json.loads(fixed_json)
-                        result = response_model.model_validate(data)
-                        self.logger.info("JSON self-fix successful for native response")
-                        return result
-                    except (json.JSONDecodeError, ValidationError) as fix_error:
-                        self.logger.debug("Self-fix failed", error=str(fix_error))
-                # Even native support can fail, fallback to prompted
+                        validated = response_model.model_validate(
+                            _coerce_single_list_wrapper(data, response_model=response_model)
+                        )
+                        if _is_truncated(response) and not allow_truncation:
+                            raise RuntimeError(
+                                "Structured output was truncated (finish_reason=length) and only repaired via JSON self-fix. "
+                                "Increase max_output_tokens."
+                            )
+                        return validated
+                    except (json.JSONDecodeError, ValidationError):
+                        pass
+                # Non-truncation failures can still happen even in native mode; fall back to prompted.
                 return self._generate_prompted(provider, prompt, response_model, **kwargs)
+        assert last_error is not None
+        raise last_error
     def _generate_prompted(
         self,
         provider,
@@ -253,6 +421,20 @@ class StructuredOutputHandler:
         last_error = None
         current_prompt = enhanced_prompt
+        current_kwargs = dict(kwargs)
+        def _coerce_boolish(value: Any) -> bool:
+            if isinstance(value, bool):
+                return bool(value)
+            if isinstance(value, (int, float)) and not isinstance(value, bool):
+                return float(value) != 0.0
+            if isinstance(value, str):
+                return value.strip().lower() in {"1", "true", "yes", "y", "on"}
+            return False
+        allow_truncation_raw = current_kwargs.pop("allow_truncation", None)
+        if allow_truncation_raw is None:
+            allow_truncation_raw = current_kwargs.pop("allow_truncated", None)
+        allow_truncation = _coerce_boolish(allow_truncation_raw) if allow_truncation_raw is not None else False
         for attempt in range(1, self.retry_strategy.max_attempts + 1):
             attempt_start_time = time.time()
@@ -269,7 +451,7 @@ class StructuredOutputHandler:
                 # Generate response
                 response = provider._generate_internal(
                     prompt=current_prompt,
-                    **kwargs
+                    **current_kwargs
                 )
                 # Extract and validate JSON
@@ -281,7 +463,7 @@ class StructuredOutputHandler:
                     # Preprocess enum responses if we have mappings
                     if hasattr(self, '_enum_mappings') and self._enum_mappings:
                         data = self._preprocess_enum_response(data, self._enum_mappings)
-                    result = response_model.model_validate(data)
+                    result = response_model.model_validate(_coerce_single_list_wrapper(data, response_model=response_model))
                 except (json.JSONDecodeError, ValidationError) as parse_error:
                     # Try to fix the JSON
                     self.logger.debug("JSON parsing failed, attempting self-fix",
@@ -296,7 +478,9 @@ class StructuredOutputHandler:
                             # Preprocess enum responses if we have mappings
                             if hasattr(self, '_enum_mappings') and self._enum_mappings:
                                 data = self._preprocess_enum_response(data, self._enum_mappings)
-                            result = response_model.model_validate(data)
+                            result = response_model.model_validate(
+                                _coerce_single_list_wrapper(data, response_model=response_model)
+                            )
                             self.logger.info("JSON self-fix successful", attempt=attempt + 1)
                         except (json.JSONDecodeError, ValidationError) as fix_error:
                             self.logger.debug("Self-fix failed", error=str(fix_error), attempt=attempt + 1)
@@ -309,6 +493,52 @@ class StructuredOutputHandler:
                 # Note: VALIDATION_SUCCEEDED event removed in simplification
                 # Success is indicated by successfully parsing the response
+                finish_reason = str(getattr(response, "finish_reason", "") or "").strip().lower()
+                is_truncated = finish_reason in {"length", "max_tokens", "max_output_tokens"}
+                if is_truncated and not allow_truncation:
+                    if attempt < self.retry_strategy.max_attempts:
+                        raw = current_kwargs.get("max_output_tokens")
+                        if raw is None:
+                            raw = current_kwargs.get("max_tokens")
+                        cur = 0
+                        if raw is not None and not isinstance(raw, bool):
+                            try:
+                                cur = int(raw)
+                            except Exception:
+                                cur = 0
+                        if cur <= 0:
+                            try:
+                                cur = int(getattr(provider, "max_output_tokens", 0) or 0)
+                            except Exception:
+                                cur = 0
+                            if cur <= 0:
+                                cur = 512
+                        bumped = max(cur * 2, cur + 500)
+                        cap = 0
+                        try:
+                            from ..architectures.detection import get_context_limits
+                            model_name = getattr(provider, "model", None)
+                            limits = get_context_limits(str(model_name or ""))
+                            cap = int(limits.get("max_output_tokens") or 0)
+                        except Exception:
+                            cap = 0
+                        if cap <= 0:
+                            cap = 1_000_000
+                        next_budget = min(bumped, cap)
+                        self.logger.warning(
+                            "Structured output truncated; retrying with higher max_output_tokens",
+                            finish_reason=finish_reason,
+                            attempt=attempt,
+                            max_output_tokens=current_kwargs.get("max_output_tokens") or current_kwargs.get("max_tokens"),
+                            next_max_output_tokens=next_budget,
+                        )
+                        current_kwargs["max_output_tokens"] = next_budget
+                        current_kwargs.pop("max_tokens", None)
+                        current_prompt = enhanced_prompt
+                        continue
+                    raise RuntimeError("Structured output was truncated (finish_reason=length). Increase max_output_tokens.")
                 # Log successful validation
                 self.logger.info("Validation attempt succeeded",
                                provider=provider_name,
@@ -352,6 +582,51 @@ class StructuredOutputHandler:
                                   validation_success=False)
                 # Check if we should retry
+                finish_reason = str(getattr(response, "finish_reason", "") or "").strip().lower()
+                is_truncated = finish_reason in {"length", "max_tokens", "max_output_tokens"}
+                if is_truncated and attempt < self.retry_strategy.max_attempts:
+                    raw = current_kwargs.get("max_output_tokens")
+                    if raw is None:
+                        raw = current_kwargs.get("max_tokens")
+                    cur = 0
+                    if raw is not None and not isinstance(raw, bool):
+                        try:
+                            cur = int(raw)
+                        except Exception:
+                            cur = 0
+                    if cur <= 0:
+                        try:
+                            cur = int(getattr(provider, "max_output_tokens", 0) or 0)
+                        except Exception:
+                            cur = 0
+                        if cur <= 0:
+                            cur = 512
+                    bumped = max(cur * 2, cur + 500)
+                    cap = 0
+                    try:
+                        from ..architectures.detection import get_context_limits
+                        model_name = getattr(provider, "model", None)
+                        limits = get_context_limits(str(model_name or ""))
+                        cap = int(limits.get("max_output_tokens") or 0)
+                    except Exception:
+                        cap = 0
+                    if cap <= 0:
+                        cap = 1_000_000
+                    next_budget = min(bumped, cap)
+                    self.logger.warning(
+                        "Structured output truncated; retrying with higher max_output_tokens",
+                        finish_reason=finish_reason,
+                        attempt=attempt,
+                        max_output_tokens=current_kwargs.get("max_output_tokens") or current_kwargs.get("max_tokens"),
+                        next_max_output_tokens=next_budget,
+                    )
+                    current_kwargs["max_output_tokens"] = next_budget
+                    current_kwargs.pop("max_tokens", None)
+                    # Keep the base prompt stable: appending more text makes truncation more likely.
+                    current_prompt = enhanced_prompt
+                    continue
                 if self.retry_strategy.should_retry(attempt, e):
                     # Note: RETRY_ATTEMPTED event removed in simplification
                     # Retry logic tracked through VALIDATION_FAILED event with attempt number
@@ -405,18 +680,18 @@ class StructuredOutputHandler:
         # Create example from schema
         example = self._create_example_from_schema(schema)
-        enhanced_prompt = f"""{prompt}
-Please respond with valid JSON that matches this exact schema for {model_name}:
-{json.dumps(schema, indent=2)}
-Example format:
-{json.dumps(example, indent=2)}
+        schema_block = (
+            f"Please respond with valid JSON that matches this exact schema for {model_name}:\n\n"
+            f"{json.dumps(schema, indent=2)}\n\n"
+            f"Example format:\n{json.dumps(example, indent=2)}\n\n"
+            "Important: Return ONLY the JSON object, no additional text or formatting."
+        )
-Important: Return ONLY the JSON object, no additional text or formatting."""
+        marker = "<<STRUCTURED_OUTPUT_SCHEMA>>"
+        if marker in prompt:
+            return prompt.replace(marker, schema_block, 1)
-        return enhanced_prompt
+        return f"{prompt}\n\n{schema_block}"
     def _create_example_from_schema(self, schema: Dict[str, Any]) -> Dict[str, Any]:
         """
@@ -592,4 +867,4 @@ Important: Return ONLY the JSON object, no additional text or formatting."""
                 return [convert_enum_values(item, path) for item in obj]
             return obj
-        return convert_enum_values(data)
+        return convert_enum_values(data)

abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl

abstractcore 2.9.1py3-none-any.whl → 2.11.2py3-none-any.whl