PyPI - abstractcore - Versions diffs - 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl - Mend

abstractcore 2.6.8py3-none-any.whl → 2.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

abstractcore/apps/summarizer.py +69 -27
abstractcore/architectures/detection.py +190 -25
abstractcore/assets/architecture_formats.json +129 -6
abstractcore/assets/model_capabilities.json +789 -136
abstractcore/config/main.py +2 -2
abstractcore/config/manager.py +3 -1
abstractcore/events/__init__.py +7 -1
abstractcore/mcp/__init__.py +30 -0
abstractcore/mcp/client.py +213 -0
abstractcore/mcp/factory.py +64 -0
abstractcore/mcp/naming.py +28 -0
abstractcore/mcp/stdio_client.py +336 -0
abstractcore/mcp/tool_source.py +164 -0
abstractcore/processing/basic_deepsearch.py +1 -1
abstractcore/processing/basic_summarizer.py +300 -83
abstractcore/providers/anthropic_provider.py +91 -10
abstractcore/providers/base.py +537 -16
abstractcore/providers/huggingface_provider.py +17 -8
abstractcore/providers/lmstudio_provider.py +170 -25
abstractcore/providers/mlx_provider.py +13 -10
abstractcore/providers/ollama_provider.py +42 -26
abstractcore/providers/openai_compatible_provider.py +87 -22
abstractcore/providers/openai_provider.py +12 -9
abstractcore/providers/streaming.py +201 -39
abstractcore/providers/vllm_provider.py +78 -21
abstractcore/server/app.py +65 -28
abstractcore/structured/retry.py +20 -7
abstractcore/tools/__init__.py +5 -4
abstractcore/tools/abstractignore.py +166 -0
abstractcore/tools/arg_canonicalizer.py +61 -0
abstractcore/tools/common_tools.py +2311 -772
abstractcore/tools/core.py +109 -13
abstractcore/tools/handler.py +17 -3
abstractcore/tools/parser.py +798 -155
abstractcore/tools/registry.py +107 -2
abstractcore/tools/syntax_rewriter.py +68 -6
abstractcore/tools/tag_rewriter.py +186 -1
abstractcore/utils/jsonish.py +111 -0
abstractcore/utils/version.py +1 -1
{abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/METADATA +11 -2
{abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/RECORD +45 -36
{abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
{abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
{abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
{abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0

abstractcore/processing/basic_summarizer.py CHANGED Viewed

@@ -6,8 +6,10 @@ text processing capabilities with minimal complexity.
 """
 from enum import Enum
-from typing import List, Optional
-from pydantic import BaseModel, Field
+import json
+import re
+from typing import List, Optional, Tuple
+from pydantic import BaseModel, Field, ValidationError
 from ..core.interface import AbstractCoreInterface
 from ..core.factory import create_llm
@@ -117,9 +119,10 @@ class BasicSummarizer:
         self,
         llm: Optional[AbstractCoreInterface] = None,
         max_chunk_size: int = 8000,
-        max_tokens: int = 32000,
-        max_output_tokens: int = 8000,
-        timeout: Optional[float] = None
+        max_tokens: int = -1,
+        max_output_tokens: int = -1,
+        timeout: Optional[float] = None,
+        retry_strategy: Optional[FeedbackRetry] = None,
     ):
         """
         Initialize the summarizer
@@ -127,14 +130,26 @@ class BasicSummarizer:
         Args:
             llm: AbstractCore instance (any provider). If None, attempts to create ollama gemma3:1b-it-qat
             max_chunk_size: Maximum characters per chunk for long documents (default 8000)
-            max_tokens: Maximum total tokens for LLM context (default 32000)
-            max_output_tokens: Maximum tokens for LLM output generation (default 8000)
+            max_tokens: Maximum total tokens for LLM context (default -1 = AUTO).
+                       - Use -1 (AUTO): Automatically uses model's context window capability
+                       - Use specific value: Hard limit for deployment constraint (GPU/RAM limits)
+                       Example: max_tokens=16000 limits to 16K even if model supports 128K
+            max_output_tokens: Maximum tokens for LLM output generation (default -1 = AUTO).
+                              - Use -1 (AUTO): Automatically uses model's output capability
+                              - Use specific value: Hard limit for output tokens
             timeout: HTTP request timeout in seconds. None for unlimited timeout (default None)
+            retry_strategy: Custom retry strategy for structured output. If None, uses default (3 attempts)
         """
         if llm is None:
             try:
                 # Default to gemma3:1b-it-qat with configurable token limits
-                self.llm = create_llm("ollama", model="gemma3:1b-it-qat", max_tokens=max_tokens, max_output_tokens=max_output_tokens, timeout=timeout)
+                # Only pass token limits if not using AUTO mode (-1)
+                llm_kwargs = {'timeout': timeout} if timeout is not None else {}
+                if max_tokens != -1:
+                    llm_kwargs['max_tokens'] = max_tokens
+                if max_output_tokens != -1:
+                    llm_kwargs['max_output_tokens'] = max_output_tokens
+                self.llm = create_llm("ollama", model="gemma3:1b-it-qat", **llm_kwargs)
             except Exception as e:
                 error_msg = (
                     f"❌ Failed to initialize default Ollama model 'gemma3:1b-it-qat': {e}\n\n"
@@ -162,9 +177,13 @@ class BasicSummarizer:
         else:
             self.llm = llm
         self.max_chunk_size = max_chunk_size
+        # Store token budgets. -1 means AUTO (use model's capability).
+        # In AbstractCore, `max_tokens` is the total (input + output) context budget.
+        self.max_tokens = max_tokens
+        self.max_output_tokens = max_output_tokens
-        # Default retry strategy with 3 attempts
-        self.retry_strategy = FeedbackRetry(max_attempts=3)
+        # Default retry strategy with 3 attempts (callers may override for latency-sensitive UX).
+        self.retry_strategy = retry_strategy or FeedbackRetry(max_attempts=3)
     def summarize(
         self,
@@ -228,36 +247,29 @@ class BasicSummarizer:
         # Build the prompt based on parameters
         prompt = self._build_prompt(text, focus, style, length)
-        # Use AbstractCore's structured output with retry strategy (no word counts in LLM response)
-        response = self.llm.generate(prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
-        # Extract the structured output
-        llm_result = None
-        if isinstance(response, LLMSummaryOutput):
-            # When structured output succeeds, response is the LLMSummaryOutput object directly
-            llm_result = response
-        elif hasattr(response, 'structured_output') and response.structured_output:
-            # Fallback: check for structured_output attribute
-            llm_result = response.structured_output
-        else:
-            # Debug information for troubleshooting
-            error_msg = f"Failed to generate structured summary output. Response type: {type(response)}"
-            if hasattr(response, 'content'):
-                error_msg += f", Content: {response.content[:200]}..."
-            if hasattr(response, 'structured_output'):
-                error_msg += f", Structured output: {response.structured_output}"
-            raise ValueError(error_msg)
+        llm_result: Optional[LLMSummaryOutput] = None
+        try:
+            # Use AbstractCore's structured output with retry strategy (no word counts in LLM response)
+            response = self.llm.generate(prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
+            llm_result = self._extract_summary_structured_output(response, context="summary")
+        except (json.JSONDecodeError, ValidationError) as e:
+            logger.warning(
+                "Structured summary output failed; falling back to marker format",
+                error_type=type(e).__name__,
+                error=str(e),
+            )
+            llm_result = self._summarize_fallback(text=text, focus=focus, style=style, length=length)
         # Compute word counts ourselves (reliable, client-side calculation)
         actual_original_words = len(text.split())
-        actual_summary_words = len(llm_result.summary.split())
+        actual_summary_words = len((llm_result.summary if llm_result else "").split())
         # Create complete result with computed word counts
         return SummaryOutput(
-            summary=llm_result.summary,
-            key_points=llm_result.key_points,
-            confidence=llm_result.confidence,
-            focus_alignment=llm_result.focus_alignment,
+            summary=(llm_result.summary if llm_result else ""),
+            key_points=(llm_result.key_points if llm_result else []),
+            confidence=(llm_result.confidence if llm_result else 0.5),
+            focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
             word_count_original=actual_original_words,
             word_count_summary=actual_summary_words
         )
@@ -297,22 +309,31 @@ class BasicSummarizer:
                 summary: str
                 key_points: List[str] = Field(max_length=5)
-            response = self.llm.generate(chunk_prompt, response_model=ChunkSummary, retry_strategy=self.retry_strategy)
-            if isinstance(response, ChunkSummary):
-                # When structured output succeeds, response is the ChunkSummary object directly
-                chunk_summaries.append(response)
-            elif hasattr(response, 'structured_output') and response.structured_output:
-                # Fallback: check for structured_output attribute
-                chunk_summaries.append(response.structured_output)
-            else:
-                # If chunk processing fails, create a fallback summary
-                logger.warning("Chunk processing failed, creating fallback",
-                             chunk_number=i+1,
-                             total_chunks=len(chunks))
-                chunk_summaries.append(ChunkSummary(
-                    summary=f"Section {i+1} content summary unavailable",
-                    key_points=["Content processing failed"]
-                ))
+            try:
+                response = self.llm.generate(chunk_prompt, response_model=ChunkSummary, retry_strategy=self.retry_strategy)
+                if isinstance(response, ChunkSummary):
+                    # When structured output succeeds, response is the ChunkSummary object directly
+                    chunk_summaries.append(response)
+                elif hasattr(response, 'structured_output') and response.structured_output:
+                    # Fallback: check for structured_output attribute
+                    chunk_summaries.append(response.structured_output)
+                else:
+                    raise ValueError(f"Unexpected chunk response type: {type(response)}")
+            except (json.JSONDecodeError, ValidationError, ValueError) as e:
+                # If chunk processing fails, create a minimal placeholder (do not fail the whole summary).
+                logger.warning(
+                    "Chunk processing failed, creating fallback",
+                    chunk_number=i + 1,
+                    total_chunks=len(chunks),
+                    error_type=type(e).__name__,
+                    error=str(e),
+                )
+                chunk_summaries.append(
+                    ChunkSummary(
+                        summary=f"Section {i+1} content summary unavailable",
+                        key_points=["Content processing failed"],
+                    )
+                )
         # Step 2: Combine chunk summaries (Reduce phase)
         combined_text = "\n\n".join([
@@ -323,35 +344,28 @@ class BasicSummarizer:
         # Generate final summary from combined summaries
         final_prompt = self._build_final_combination_prompt(combined_text, focus, style, length, len(text))
-        response = self.llm.generate(final_prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
-        # Extract the structured output
-        llm_result = None
-        if isinstance(response, LLMSummaryOutput):
-            # When structured output succeeds, response is the LLMSummaryOutput object directly
-            llm_result = response
-        elif hasattr(response, 'structured_output') and response.structured_output:
-            # Fallback: check for structured_output attribute
-            llm_result = response.structured_output
-        else:
-            # Debug information for troubleshooting
-            error_msg = f"Failed to generate final structured summary output. Response type: {type(response)}"
-            if hasattr(response, 'content'):
-                error_msg += f", Content: {response.content[:200]}..."
-            if hasattr(response, 'structured_output'):
-                error_msg += f", Structured output: {response.structured_output}"
-            raise ValueError(error_msg)
+        llm_result: Optional[LLMSummaryOutput] = None
+        try:
+            response = self.llm.generate(final_prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
+            llm_result = self._extract_summary_structured_output(response, context="final_summary")
+        except (json.JSONDecodeError, ValidationError) as e:
+            logger.warning(
+                "Structured final summary output failed; falling back to marker format",
+                error_type=type(e).__name__,
+                error=str(e),
+            )
+            llm_result = self._summarize_fallback(text=combined_text, focus=focus, style=style, length=length)
         # Compute word counts ourselves (reliable, client-side calculation)
         actual_original_words = len(text.split())
-        actual_summary_words = len(llm_result.summary.split())
+        actual_summary_words = len((llm_result.summary if llm_result else "").split())
         # Create complete result with computed word counts
         return SummaryOutput(
-            summary=llm_result.summary,
-            key_points=llm_result.key_points,
-            confidence=llm_result.confidence,
-            focus_alignment=llm_result.focus_alignment,
+            summary=(llm_result.summary if llm_result else ""),
+            key_points=(llm_result.key_points if llm_result else []),
+            confidence=(llm_result.confidence if llm_result else 0.5),
+            focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
             word_count_original=actual_original_words,
             word_count_summary=actual_summary_words
         )
@@ -360,6 +374,13 @@ class BasicSummarizer:
         """
         Determine if text should be chunked based on token count.
+        Token budget logic:
+        - max_tokens = -1 (AUTO): Uses model's full context window capability
+        - max_tokens = N: Hard limit (deployment constraint for GPU/RAM)
+        This ensures we don't exceed GPU memory constraints even when the model
+        theoretically supports larger contexts.
         Uses centralized TokenUtils for accurate token estimation.
         Falls back to character count if model information unavailable.
         """
@@ -370,18 +391,214 @@ class BasicSummarizer:
         if self.llm and hasattr(self.llm, 'model'):
             model_name = self.llm.model
-        # Estimate tokens using centralized utility
-        estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
+        # Estimate tokens using centralized utility. If estimation fails for any reason,
+        # fall back to character chunking (conservative).
+        try:
+            estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
+        except Exception:
+            return len(text) > self.max_chunk_size
-        # Use a conservative token limit (leaving room for prompt overhead)
-        # Most models have 32k+ context nowadays, so 8k tokens for input text is safe
-        token_limit = 8000
+        # Determine the effective token budget
+        # Get provider's capabilities
+        provider_max_input = getattr(self.llm, "max_input_tokens", None) if self.llm else None
+        if provider_max_input is None:
+            provider_total = getattr(self.llm, "max_tokens", None) if self.llm else None
+            provider_output = getattr(self.llm, "max_output_tokens", None) if self.llm else None
+            if provider_total is not None and provider_output is not None:
+                try:
+                    provider_max_input = int(provider_total) - int(provider_output)
+                except Exception:
+                    provider_max_input = None
-        if estimated_tokens > token_limit:
-            return True
+        # Determine effective max_input_tokens based on configuration
+        if self.max_tokens == -1:
+            # AUTO mode: Use model's capability
+            if provider_max_input is not None:
+                max_input_tokens = provider_max_input
+            else:
+                # Fallback to safe default if model info unavailable
+                max_input_tokens = 24000  # Conservative default
+        else:
+            # User-specified limit (deployment constraint)
+            user_max_output = self.max_output_tokens if self.max_output_tokens != -1 else 8000
+            user_max_input = self.max_tokens - user_max_output
-        # Fallback to character-based check for very long texts
-        return len(text) > self.max_chunk_size
+            if provider_max_input is not None:
+                # Respect BOTH user limit AND model capability (take minimum)
+                max_input_tokens = min(provider_max_input, user_max_input)
+            else:
+                # No model info, use user limit
+                max_input_tokens = user_max_input
+        # Reserve prompt/formatting overhead (structured output schemas + instructions).
+        # Keep the historical safety floor (8000) for small-context models.
+        try:
+            token_limit = max(8000, int(max_input_tokens) - 1200)
+        except Exception:
+            token_limit = 8000
+        logger.debug(
+            "Chunking decision",
+            estimated_tokens=estimated_tokens,
+            token_limit=token_limit,
+            max_tokens_config=self.max_tokens,
+            is_auto_mode=(self.max_tokens == -1),
+            will_chunk=(estimated_tokens > token_limit)
+        )
+        return estimated_tokens > token_limit
+    def _extract_summary_structured_output(self, response: object, *, context: str) -> LLMSummaryOutput:
+        """Extract structured summary output from AbstractCore responses."""
+        if isinstance(response, LLMSummaryOutput):
+            return response
+        if hasattr(response, "structured_output") and getattr(response, "structured_output"):
+            return response.structured_output
+        error_msg = f"Failed to generate structured {context} output. Response type: {type(response)}"
+        if hasattr(response, "content") and getattr(response, "content"):
+            try:
+                error_msg += f", Content: {str(response.content)[:200]}..."
+            except Exception:
+                pass
+        if hasattr(response, "structured_output"):
+            try:
+                error_msg += f", Structured output: {getattr(response, 'structured_output')}"
+            except Exception:
+                pass
+        raise ValueError(error_msg)
+    def _summarize_fallback(
+        self,
+        *,
+        text: str,
+        focus: Optional[str],
+        style: SummaryStyle,
+        length: SummaryLength,
+    ) -> LLMSummaryOutput:
+        """Best-effort summary when structured output cannot be produced reliably."""
+        prompt = self._build_fallback_prompt(text=text, focus=focus, style=style, length=length)
+        response = self.llm.generate(prompt)
+        content = getattr(response, "content", None)
+        if content is None:
+            content = str(response)
+        summary, key_points, confidence, focus_alignment = self._parse_fallback_response(str(content))
+        return LLMSummaryOutput(
+            summary=summary,
+            key_points=key_points[:8],
+            confidence=confidence,
+            focus_alignment=focus_alignment,
+        )
+    def _build_fallback_prompt(
+        self,
+        *,
+        text: str,
+        focus: Optional[str],
+        style: SummaryStyle,
+        length: SummaryLength,
+    ) -> str:
+        """Build a non-JSON prompt that is easy to parse deterministically."""
+        style_instructions = {
+            SummaryStyle.STRUCTURED: "Present the summary in a clear, organized format with distinct sections or bullet points.",
+            SummaryStyle.NARRATIVE: "Write the summary as a flowing narrative that tells the story of the content.",
+            SummaryStyle.OBJECTIVE: "Maintain a neutral, factual tone without opinions or interpretations.",
+            SummaryStyle.ANALYTICAL: "Provide critical analysis with insights, implications, and deeper understanding.",
+            SummaryStyle.EXECUTIVE: "Focus on actionable insights, business implications, and key decisions.",
+            SummaryStyle.CONVERSATIONAL: "Preserve conversational context, key decisions, ongoing topics, and user intent. Focus on information needed for conversation continuity.",
+        }
+        length_instructions = {
+            SummaryLength.BRIEF: "Keep the summary very concise - 2-3 sentences covering only the most essential points.",
+            SummaryLength.STANDARD: "Provide a balanced summary of 1-2 paragraphs covering the main ideas.",
+            SummaryLength.DETAILED: "Create a comprehensive summary with multiple paragraphs covering all important aspects.",
+            SummaryLength.COMPREHENSIVE: "Provide an extensive analysis covering all significant points, context, and implications.",
+        }
+        focus_instruction = ""
+        if focus:
+            focus_instruction = f"\nPay special attention to: {focus}\n"
+        return f"""Analyze the following text and produce a summary.
+{style_instructions[style]}
+{length_instructions[length]}{focus_instruction}
+Text to summarize:
+{text}
+Return your answer in this EXACT plain-text format (no JSON, no code blocks):
+SUMMARY:
+<the main summary text>
+KEY POINTS:
+- <point 1>
+- <point 2>
+- <point 3>
+CONFIDENCE: <0-1>
+FOCUS_ALIGNMENT: <0-1>
+"""
+    @staticmethod
+    def _parse_fallback_response(content: str) -> Tuple[str, List[str], float, float]:
+        """Parse marker-format fallback summaries into structured fields."""
+        text = (content or "").strip()
+        if not text:
+            return "", [], 0.5, 0.5
+        def _parse_score(label_re: str, default: float) -> float:
+            m = re.search(rf"(?im)^{label_re}\s*:\s*(.+?)\s*$", text)
+            if not m:
+                return default
+            raw = m.group(1).strip()
+            try:
+                if raw.endswith("%"):
+                    val = float(raw[:-1].strip()) / 100.0
+                else:
+                    val = float(raw)
+            except Exception:
+                return default
+            return max(0.0, min(1.0, val))
+        summary = ""
+        m_summary = re.search(r"(?is)summary\s*:\s*(.*?)\n\s*key\s*points\s*:", text)
+        if m_summary:
+            summary = m_summary.group(1).strip()
+        else:
+            # Best-effort: take the first paragraph.
+            summary = text.split("\n\n", 1)[0].strip()
+        key_points: List[str] = []
+        m_kp = re.search(
+            r"(?is)key\s*points\s*:\s*(.*?)(?:\n\s*confidence\s*:|\n\s*focus[_ ]alignment\s*:|\Z)",
+            text,
+        )
+        if m_kp:
+            block = m_kp.group(1)
+            for line in block.splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                if line.startswith(("-", "•", "*")):
+                    line = line.lstrip("-•*").strip()
+                if line:
+                    key_points.append(line)
+        if not key_points:
+            # Fallback: try to extract bullet-like lines anywhere.
+            for line in text.splitlines():
+                line = line.strip()
+                if line.startswith(("-", "•", "*")):
+                    cleaned = line.lstrip("-•*").strip()
+                    if cleaned:
+                        key_points.append(cleaned)
+        key_points = key_points[:8]
+        confidence = _parse_score("confidence", 0.6)
+        focus_alignment = _parse_score(r"focus[_ ]alignment", 0.6)
+        return summary, key_points, confidence, focus_alignment
     def _split_text_into_chunks(self, text: str, overlap: int = 200) -> List[str]:
         """Split text into overlapping chunks"""
@@ -650,4 +867,4 @@ Create a unified summary that represents the entire document effectively."""
             else:
                 formatted_lines.append(f"[{role.upper()}]: {content}")
-        return "\n\n".join(formatted_lines)
+        return "\n\n".join(formatted_lines)

abstractcore/providers/anthropic_provider.py CHANGED Viewed

@@ -100,6 +100,30 @@ class AnthropicProvider(BaseProvider):
                             "role": "assistant",
                             "content": msg["content"]
                         })
+                    elif role == "tool":
+                        # Anthropic Messages API represents tool outputs as `tool_result`
+                        # content blocks inside a USER message (there is no `role="tool"`).
+                        meta = msg.get("metadata") if isinstance(msg.get("metadata"), dict) else {}
+                        tool_use_id = meta.get("call_id") or meta.get("tool_use_id") or meta.get("id")
+                        tool_text = msg.get("content", "")
+                        tool_text = "" if tool_text is None else str(tool_text)
+                        if isinstance(tool_use_id, str) and tool_use_id.strip():
+                            api_messages.append(
+                                {
+                                    "role": "user",
+                                    "content": [
+                                        {
+                                            "type": "tool_result",
+                                            "tool_use_id": tool_use_id.strip(),
+                                            "content": tool_text,
+                                        }
+                                    ],
+                                }
+                            )
+                        else:
+                            # Fallback: preserve as plain user text when no tool_use_id is available.
+                            api_messages.append({"role": "user", "content": tool_text})
                     else:
                         api_messages.append({
                             "role": "user",
@@ -194,7 +218,9 @@ class AnthropicProvider(BaseProvider):
                     call_params["tool_choice"] = {"type": kwargs.get("tool_choice", "auto")}
             else:
                 # Add tools as system prompt for prompted models
-                tool_prompt = self.tool_handler.format_tools_prompt(tools)
+                system_text = call_params.get("system") if isinstance(call_params.get("system"), str) else ""
+                include_tool_list = "## Tools (session)" not in system_text
+                tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
                 if call_params.get("system"):
                     call_params["system"] += f"\n\n{tool_prompt}"
                 else:
@@ -213,6 +239,8 @@ class AnthropicProvider(BaseProvider):
                 formatted = self._format_response(response)
                 # Add generation time to response
                 formatted.gen_time = gen_time
+                formatted.metadata = dict(formatted.metadata or {})
+                formatted.metadata["_provider_request"] = {"call_params": call_params}
                 # Handle tool execution for Anthropic responses
                 if tools and (formatted.has_tool_calls() or
@@ -232,7 +260,7 @@ class AnthropicProvider(BaseProvider):
                 error_message = format_model_error("Anthropic", self.model, available_models)
                 raise ModelNotFoundError(error_message)
             else:
-                raise ProviderAPIError(f"Anthropic API error: {str(e)}")
+                raise
     async def _agenerate_internal(self,
                                    prompt: str,
@@ -260,6 +288,30 @@ class AnthropicProvider(BaseProvider):
                             "role": "assistant",
                             "content": msg["content"]
                         })
+                    elif role == "tool":
+                        # Anthropic Messages API represents tool outputs as `tool_result`
+                        # content blocks inside a USER message (there is no `role="tool"`).
+                        meta = msg.get("metadata") if isinstance(msg.get("metadata"), dict) else {}
+                        tool_use_id = meta.get("call_id") or meta.get("tool_use_id") or meta.get("id")
+                        tool_text = msg.get("content", "")
+                        tool_text = "" if tool_text is None else str(tool_text)
+                        if isinstance(tool_use_id, str) and tool_use_id.strip():
+                            api_messages.append(
+                                {
+                                    "role": "user",
+                                    "content": [
+                                        {
+                                            "type": "tool_result",
+                                            "tool_use_id": tool_use_id.strip(),
+                                            "content": tool_text,
+                                        }
+                                    ],
+                                }
+                            )
+                        else:
+                            # Fallback: preserve as plain user text when no tool_use_id is available.
+                            api_messages.append({"role": "user", "content": tool_text})
                     else:
                         api_messages.append({
                             "role": "user",
@@ -348,7 +400,9 @@ class AnthropicProvider(BaseProvider):
                 elif kwargs.get("tool_choice"):
                     call_params["tool_choice"] = {"type": kwargs.get("tool_choice", "auto")}
             else:
-                tool_prompt = self.tool_handler.format_tools_prompt(tools)
+                system_text = call_params.get("system") if isinstance(call_params.get("system"), str) else ""
+                include_tool_list = "## Tools (session)" not in system_text
+                tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
                 if call_params.get("system"):
                     call_params["system"] += f"\n\n{tool_prompt}"
                 else:
@@ -365,6 +419,8 @@ class AnthropicProvider(BaseProvider):
                 formatted = self._format_response(response)
                 formatted.gen_time = gen_time
+                formatted.metadata = dict(formatted.metadata or {})
+                formatted.metadata["_provider_request"] = {"call_params": call_params}
                 if tools and (formatted.has_tool_calls() or
                              (self.tool_handler.supports_prompted and formatted.content)):
@@ -381,7 +437,7 @@ class AnthropicProvider(BaseProvider):
                 error_message = format_model_error("Anthropic", self.model, available_models)
                 raise ModelNotFoundError(error_message)
             else:
-                raise ProviderAPIError(f"Anthropic API error: {str(e)}")
+                raise
     async def _async_stream_response(self, call_params: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None) -> AsyncIterator[GenerateResponse]:
         """Native async streaming with Anthropic's context manager pattern."""
@@ -397,7 +453,7 @@ class AnthropicProvider(BaseProvider):
                         raw_response=chunk
                     )
         except Exception as e:
-            raise ProviderAPIError(f"Anthropic streaming error: {str(e)}")
+            raise
     def unload(self) -> None:
         """Close async client if it was created."""
@@ -414,13 +470,38 @@ class AnthropicProvider(BaseProvider):
         """Format tools for Anthropic API format"""
         formatted_tools = []
         for tool in tools:
-            # Get parameters and ensure proper JSON schema format
+            # Anthropic expects `input_schema` to be a JSON Schema object:
+            # https://platform.claude.com/docs/en/agents-and-tools/tool-use/implement-tool-use
+            #
+            # Our internal tool representation typically uses:
+            #   tool["parameters"] = { "arg": {"type": "...", "default": ...?}, ... }
+            # or, less commonly:
+            #   tool["parameters"] = {"type":"object","properties":{...},"required":[...]}
             params = tool.get("parameters", {})
-            input_schema = {
+            properties: Dict[str, Any] = {}
+            required: List[str] = []
+            if isinstance(params, dict) and "properties" in params:
+                # Treat as already-schema-like.
+                raw_props = params.get("properties") if isinstance(params.get("properties"), dict) else {}
+                properties = dict(raw_props)
+                raw_required = params.get("required")
+                if isinstance(raw_required, list):
+                    required = [str(x) for x in raw_required if isinstance(x, (str, int))]
+            elif isinstance(params, dict):
+                # Treat as compact parameter dict; infer required args by absence of `default`.
+                properties = dict(params)
+                for k, v in params.items():
+                    if isinstance(v, dict) and "default" not in v:
+                        required.append(str(k))
+            input_schema: Dict[str, Any] = {
                 "type": "object",
-                "properties": params.get("properties", params),  # Handle both formats
-                "required": params.get("required", list(params.keys()) if "properties" not in params else [])
+                "properties": properties,
             }
+            if required:
+                input_schema["required"] = required
             formatted_tool = {
                 "name": tool.get("name"),
@@ -440,7 +521,7 @@ class AnthropicProvider(BaseProvider):
         # Handle different content types
         for content_block in response.content:
             if content_block.type == "text":
-                content = content_block.text
+                content += content_block.text
             elif content_block.type == "tool_use":
                 if tool_calls is None:
                     tool_calls = []

abstractcore 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl

abstractcore 2.6.8py3-none-any.whl → 2.9.0py3-none-any.whl