PyPI - abstractcore - Versions diffs - 2.6.6__tar.gz → 2.6.8__tar.gz - Mend

abstractcore 2.6.6tar.gz → 2.6.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

{abstractcore-2.6.6 → abstractcore-2.6.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstractcore
-Version: 2.6.6
+Version: 2.6.8
 Summary: Unified interface to all LLM providers with essential infrastructure for tool calling, streaming, and model management
 Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
 Maintainer-email: Laurent-Philippe Albou <contact@abstractcore.ai>
@@ -30,6 +30,7 @@ Requires-Dist: pydantic<3.0.0,>=2.0.0
 Requires-Dist: httpx<1.0.0,>=0.24.0
 Requires-Dist: tiktoken<1.0.0,>=0.5.0
 Requires-Dist: requests<3.0.0,>=2.25.0
+Requires-Dist: Pillow<12.0.0,>=10.0.0
 Provides-Extra: openai
 Requires-Dist: openai<2.0.0,>=1.0.0; extra == "openai"
 Provides-Extra: anthropic
@@ -194,6 +195,50 @@ response = llm.generate(
 print(response.content)
 ```
+### Tool Execution Modes
+AbstractCore supports two tool execution modes:
+**Mode 1: Passthrough (Default)** - Returns raw tool call tags for downstream processing
+```python
+from abstractcore import create_llm
+from abstractcore.tools import tool
+@tool(name="get_weather", description="Get weather for a city")
+def get_weather(city: str) -> str:
+    return f"Weather in {city}: Sunny, 22°C"
+llm = create_llm("ollama", model="qwen3:4b")  # execute_tools=False by default
+response = llm.generate("What's the weather in Paris?", tools=[get_weather])
+# response.content contains raw tool call tags: <|tool_call|>...
+# Downstream runtime (AbstractRuntime, Codex, Claude Code) parses and executes
+```
+**Use case**: Agent loops, AbstractRuntime, Codex, Claude Code, custom orchestration
+**Mode 2: Direct Execution** - AbstractCore executes tools and returns results
+```python
+from abstractcore import create_llm
+from abstractcore.tools import tool
+from abstractcore.tools.registry import register_tool
+@tool(name="get_weather", description="Get weather for a city")
+def get_weather(city: str) -> str:
+    return f"Weather in {city}: Sunny, 22°C"
+register_tool(get_weather)  # Required for direct execution
+llm = create_llm("ollama", model="qwen3:4b", execute_tools=True)
+response = llm.generate("What's the weather in Paris?", tools=[get_weather])
+# response.content contains executed tool results
+```
+**Use case**: Simple scripts, single-turn tool use
+> **Note**: The `@tool` decorator creates metadata but does NOT register globally. Tools are passed explicitly to `generate()`. Use `register_tool()` only when using direct execution mode.
 ### Response Object (GenerateResponse)
 Every LLM generation returns a **GenerateResponse** object with consistent structure across all providers:

{abstractcore-2.6.6 → abstractcore-2.6.8}/README.md RENAMED Viewed

@@ -65,6 +65,50 @@ response = llm.generate(
 print(response.content)
 ```
+### Tool Execution Modes
+AbstractCore supports two tool execution modes:
+**Mode 1: Passthrough (Default)** - Returns raw tool call tags for downstream processing
+```python
+from abstractcore import create_llm
+from abstractcore.tools import tool
+@tool(name="get_weather", description="Get weather for a city")
+def get_weather(city: str) -> str:
+    return f"Weather in {city}: Sunny, 22°C"
+llm = create_llm("ollama", model="qwen3:4b")  # execute_tools=False by default
+response = llm.generate("What's the weather in Paris?", tools=[get_weather])
+# response.content contains raw tool call tags: <|tool_call|>...
+# Downstream runtime (AbstractRuntime, Codex, Claude Code) parses and executes
+```
+**Use case**: Agent loops, AbstractRuntime, Codex, Claude Code, custom orchestration
+**Mode 2: Direct Execution** - AbstractCore executes tools and returns results
+```python
+from abstractcore import create_llm
+from abstractcore.tools import tool
+from abstractcore.tools.registry import register_tool
+@tool(name="get_weather", description="Get weather for a city")
+def get_weather(city: str) -> str:
+    return f"Weather in {city}: Sunny, 22°C"
+register_tool(get_weather)  # Required for direct execution
+llm = create_llm("ollama", model="qwen3:4b", execute_tools=True)
+response = llm.generate("What's the weather in Paris?", tools=[get_weather])
+# response.content contains executed tool results
+```
+**Use case**: Simple scripts, single-turn tool use
+> **Note**: The `@tool` decorator creates metadata but does NOT register globally. Tools are passed explicitly to `generate()`. Use `register_tool()` only when using direct execution mode.
 ### Response Object (GenerateResponse)
 Every LLM generation returns a **GenerateResponse** object with consistent structure across all providers:

{abstractcore-2.6.6 → abstractcore-2.6.8}/abstractcore/assets/model_capabilities.json RENAMED Viewed

@@ -638,17 +638,26 @@
       "max_tokens": 262144
     },
     "qwen3-coder-30b": {
-      "max_output_tokens": 8192,
+      "max_output_tokens": 65536,
       "tool_support": "native",
       "structured_output": "native",
       "parallel_tools": true,
       "vision_support": false,
       "audio_support": false,
-      "notes": "Code-focused model with native tool support via chatml-function-calling format",
-      "source": "Alibaba official docs",
+      "architecture": "mixture_of_experts",
+      "total_parameters": "30.5B",
+      "active_parameters": "3.3B",
+      "experts": 128,
+      "experts_activated": 8,
+      "notes": "Code-focused MoE model (30.5B total/3.3B active, 128 experts/8 activated). Native tool support via chatml-function-calling format. Supports up to 1M tokens with YaRN extension.",
+      "source": "Qwen HuggingFace model card 2025",
       "canonical_name": "qwen3-coder-30b",
-      "aliases": [],
-      "max_tokens": 32768
+      "aliases": [
+        "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+        "qwen3-coder-30b-a3b",
+        "qwen3-coder-30b-a3b-instruct"
+      ],
+      "max_tokens": 262144
     },
     "qwen2-vl": {
       "max_output_tokens": 8192,

{abstractcore-2.6.6 → abstractcore-2.6.8}/abstractcore/media/utils/image_scaler.py RENAMED Viewed

@@ -3,20 +3,13 @@ Image scaling utility for AbstractCore media handling.
 Provides intelligent image scaling based on model-specific requirements
 and capabilities for vision models.
-Requires: PIL (Pillow) - install with `pip install Pillow`
 """
 from typing import Tuple, Optional, Union, Dict, Any
 from enum import Enum
 from pathlib import Path
-try:
-    from PIL import Image, ImageOps
-except ImportError as e:
-    raise ImportError(
-        "PIL (Pillow) is required for image scaling. Install with: pip install Pillow"
-    ) from e
+from PIL import Image, ImageOps
 from ..base import MediaProcessingError
 from ...utils.structured_logging import get_logger

{abstractcore-2.6.6 → abstractcore-2.6.8}/abstractcore/processing/__init__.py RENAMED Viewed

@@ -5,14 +5,14 @@ Basic text processing capabilities built on top of AbstractCore,
 demonstrating how to leverage the core infrastructure for real-world tasks.
 """
-from .basic_summarizer import BasicSummarizer, SummaryStyle, SummaryLength
+from .basic_summarizer import BasicSummarizer, SummaryStyle, SummaryLength, CompressionMode
 from .basic_extractor import BasicExtractor
 from .basic_judge import BasicJudge, JudgmentCriteria, Assessment, create_judge
 from .basic_deepsearch import BasicDeepSearch, ResearchReport, ResearchFinding, ResearchPlan, ResearchSubTask
 from .basic_intent import BasicIntentAnalyzer, IntentType, IntentDepth, IntentContext, IdentifiedIntent, IntentAnalysisOutput
 __all__ = [
-    'BasicSummarizer', 'SummaryStyle', 'SummaryLength',
+    'BasicSummarizer', 'SummaryStyle', 'SummaryLength', 'CompressionMode',
     'BasicExtractor',
     'BasicJudge', 'JudgmentCriteria', 'Assessment', 'create_judge',
     'BasicDeepSearch', 'ResearchReport', 'ResearchFinding', 'ResearchPlan', 'ResearchSubTask',

{abstractcore-2.6.6 → abstractcore-2.6.8}/abstractcore/processing/basic_summarizer.py RENAMED Viewed

@@ -35,6 +35,42 @@ class SummaryLength(Enum):
     COMPREHENSIVE = "comprehensive"  # Full analysis with context
+class CompressionMode(Enum):
+    """Compression aggressiveness for chat history summarization.
+    Controls how aggressively the summarizer compresses conversation history:
+    - LIGHT: Keep most information, only remove redundancy
+    - STANDARD: Balanced compression, main points and context
+    - HEAVY: Aggressive compression, only critical information
+    """
+    LIGHT = "light"
+    STANDARD = "standard"
+    HEAVY = "heavy"
+# Compression mode-specific instructions for summarization prompts
+COMPRESSION_INSTRUCTIONS = {
+    CompressionMode.LIGHT: (
+        "Preserve most details from this conversation while removing only redundancy. "
+        "Keep: all key decisions and outcomes, important context and background, "
+        "specific details/names/numbers/technical terms, all tool calls and results, "
+        "error messages and resolutions. Remove only: repetitive greetings, duplicate information."
+    ),
+    CompressionMode.STANDARD: (
+        "Summarize with balanced compression, keeping main points and essential context. "
+        "Keep: key decisions and rationale, important outcomes, critical context for ongoing work, "
+        "unresolved items and pending tasks. Remove: intermediate reasoning steps, "
+        "exploratory tangents, detailed tool outputs (keep only key findings)."
+    ),
+    CompressionMode.HEAVY: (
+        "Extract only the most critical information. Keep ONLY: final decisions made, "
+        "critical outcomes (success/failure), essential context to continue work, "
+        "blocking issues and hard dependencies. Remove: all exploratory discussion, "
+        "all intermediate steps, all detailed outputs, all background explanations."
+    ),
+}
 class LLMSummaryOutput(BaseModel):
     """LLM-generated summary output (without word counts)"""
     summary: str = Field(description="The main summary text")
@@ -493,7 +529,8 @@ Create a unified summary that represents the entire document effectively."""
         self,
         messages: List[dict],
         preserve_recent: int = 6,
-        focus: Optional[str] = None
+        focus: Optional[str] = None,
+        compression_mode: CompressionMode = CompressionMode.STANDARD
     ) -> SummaryOutput:
         """
         Specialized method for chat history summarization following SOTA 2025 practices
@@ -502,6 +539,7 @@ Create a unified summary that represents the entire document effectively."""
             messages: List of message dicts with 'role' and 'content' keys
             preserve_recent: Number of recent messages to keep intact (default 6)
             focus: Optional focus for summarization (e.g., "key decisions", "technical solutions")
+            compression_mode: How aggressively to compress (LIGHT, STANDARD, HEAVY)
         Returns:
             SummaryOutput: Structured summary optimized for chat history context
@@ -511,36 +549,67 @@ Create a unified summary that represents the entire document effectively."""
         - Focuses on decisions, solutions, and ongoing topics
         - Maintains user intent and assistant responses
         - Optimized for chat continuation rather than standalone summary
+        Compression Modes:
+        - LIGHT: Keep most information, only remove redundancy
+        - STANDARD: Balanced compression, main points and context
+        - HEAVY: Aggressive compression, only critical information
         """
+        # Build focus with compression instructions
+        compression_instruction = COMPRESSION_INSTRUCTIONS.get(
+            compression_mode,
+            COMPRESSION_INSTRUCTIONS[CompressionMode.STANDARD]
+        )
+        # Combine user focus with compression instruction
+        if focus:
+            effective_focus = f"{compression_instruction} Focus especially on: {focus}"
+        else:
+            effective_focus = compression_instruction
+        # Map compression mode to summary length for appropriate output size
+        length_map = {
+            CompressionMode.LIGHT: SummaryLength.DETAILED,
+            CompressionMode.STANDARD: SummaryLength.STANDARD,
+            CompressionMode.HEAVY: SummaryLength.BRIEF,
+        }
+        target_length = length_map.get(compression_mode, SummaryLength.STANDARD)
+        logger.debug("Chat history summarization with compression mode",
+                    message_count=len(messages),
+                    preserve_recent=preserve_recent,
+                    compression_mode=compression_mode.value,
+                    target_length=target_length.value)
         if len(messages) <= preserve_recent:
             # If short enough, just summarize normally
-            logger.debug("Chat history is short, using standard summarization",
-                        message_count=len(messages),
+            logger.debug("Chat history is short, using standard summarization",
+                        message_count=len(messages),
                         preserve_recent=preserve_recent)
             chat_text = self._format_chat_messages_to_text(messages)
             return self.summarize(
                 chat_text,
-                focus=focus or "conversational context and key information",
+                focus=effective_focus,
                 style=SummaryStyle.CONVERSATIONAL,
-                length=SummaryLength.STANDARD
+                length=target_length
             )
         # Split into older messages (to summarize) and recent messages (to preserve)
         older_messages = messages[:-preserve_recent]
         recent_messages = messages[-preserve_recent:]
-        logger.debug("Splitting chat history for summarization",
+        logger.debug("Splitting chat history for summarization",
                     total_messages=len(messages),
                     older_messages=len(older_messages),
                     recent_messages=len(recent_messages))
-        # Summarize older messages with conversational focus
+        # Summarize older messages with conversational focus and compression mode
         older_text = self._format_chat_messages_to_text(older_messages)
         older_summary = self.summarize(
             older_text,
-            focus=focus or "key decisions, solutions, and ongoing context",
+            focus=effective_focus,
             style=SummaryStyle.CONVERSATIONAL,
-            length=SummaryLength.DETAILED
+            length=target_length
         )
         # The summary should ONLY contain the older messages summary

{abstractcore-2.6.6 → abstractcore-2.6.8}/abstractcore/providers/base.py RENAMED Viewed

@@ -5,6 +5,7 @@ Base provider with integrated telemetry, events, and exception handling.
 import time
 import uuid
 import asyncio
+import warnings
 from collections import deque
 from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
 from abc import ABC, abstractmethod
@@ -60,6 +61,13 @@ class BaseProvider(AbstractCoreInterface, ABC):
         # execute_tools: True = AbstractCore executes tools (legacy mode)
         #                False = Pass-through mode (default - for API server / agentic CLI)
         self.execute_tools = kwargs.get('execute_tools', False)
+        if self.execute_tools:
+            warnings.warn(
+                "execute_tools=True is deprecated. Prefer passing tools explicitly to generate() "
+                "and executing tool calls in the host/runtime via a ToolExecutor.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         # Setup retry manager with optional configuration
         retry_config = kwargs.get('retry_config', None)
@@ -202,6 +210,12 @@ class BaseProvider(AbstractCoreInterface, ABC):
         """
         trace_id = str(uuid.uuid4())
+        # If trace retention is disabled, still return a trace_id for correlation
+        # without constructing/storing a full trace payload.
+        maxlen = getattr(getattr(self, "_traces", None), "maxlen", None)
+        if maxlen == 0:
+            return trace_id
         # Extract generation parameters
         temperature = kwargs.get('temperature', self.temperature)
         max_tokens = kwargs.get('max_tokens', self.max_tokens)
@@ -408,6 +422,13 @@ class BaseProvider(AbstractCoreInterface, ABC):
         # Handle tool execution control
         should_execute_tools = execute_tools if execute_tools is not None else self.execute_tools
+        if should_execute_tools and converted_tools:
+            warnings.warn(
+                "execute_tools=True is deprecated. Prefer passing tools explicitly to generate() "
+                "and executing tool calls in the host/runtime via a ToolExecutor.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         if not should_execute_tools and converted_tools:
             # If tools are provided but execution is disabled,
             # we still pass them to the provider for generation but won't execute them
@@ -1556,4 +1577,4 @@ Please provide a structured response."""
         # Yield chunks asynchronously
         for chunk in sync_gen:
             yield chunk
-            await asyncio.sleep(0)  # Yield control to event loop
+            await asyncio.sleep(0)  # Yield control to event loop

{abstractcore-2.6.6 → abstractcore-2.6.8}/abstractcore/server/app.py RENAMED Viewed

@@ -1956,6 +1956,39 @@ async def provider_chat_completions(
     _, model = parse_model_string(request.model)
     return await process_chat_completion(provider, model, request, http_request)
+def _extract_trace_metadata(http_request: Request) -> Dict[str, Any]:
+    """Extract trace metadata from request headers (schema-safe)."""
+    meta: Dict[str, Any] = {}
+    raw = (
+        http_request.headers.get("x-abstractcore-trace-metadata")
+        or http_request.headers.get("x-abstract-trace-metadata")
+    )
+    if raw:
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, dict):
+                meta.update(parsed)
+        except Exception:
+            # Ignore invalid metadata payloads; tracing is best-effort.
+            pass
+    header_map = {
+        "actor_id": "x-abstractcore-actor-id",
+        "session_id": "x-abstractcore-session-id",
+        "run_id": "x-abstractcore-run-id",
+        "parent_run_id": "x-abstractcore-parent-run-id",
+    }
+    for key, header in header_map.items():
+        val = http_request.headers.get(header)
+        if val is not None and key not in meta:
+            meta[key] = val
+    # Never log or return these directly; they are for internal correlation only.
+    return meta
 async def process_chat_completion(
     provider: str,
     model: str,
@@ -2019,6 +2052,11 @@ async def process_chat_completion(
         # Create LLM instance
         # Prepare provider-specific kwargs
         provider_kwargs = {}
+        trace_metadata = _extract_trace_metadata(http_request)
+        if trace_metadata:
+            # Enable trace capture (trace_id) without retaining full trace buffers by default.
+            provider_kwargs["enable_tracing"] = True
+            provider_kwargs.setdefault("max_traces", 0)
         if request.base_url:
             provider_kwargs["base_url"] = request.base_url
             logger.info(
@@ -2047,6 +2085,8 @@ async def process_chat_completion(
             "tool_choice": request.tool_choice if request.tools else None,
             "execute_tools": False,  # Server mode - don't execute tools
         }
+        if trace_metadata:
+            gen_kwargs["trace_metadata"] = trace_metadata
         # Add optional parameters
         if request.stop:
@@ -2081,9 +2121,18 @@ async def process_chat_completion(
                 )
             else:
                 response = llm.generate(**gen_kwargs)
-                return convert_to_openai_response(
+                openai_response = convert_to_openai_response(
                     response, provider, model, syntax_rewriter, request_id
                 )
+                trace_id = None
+                if hasattr(response, "metadata") and isinstance(getattr(response, "metadata"), dict):
+                    trace_id = response.metadata.get("trace_id")
+                if trace_id:
+                    return JSONResponse(
+                        content=openai_response,
+                        headers={"X-AbstractCore-Trace-Id": str(trace_id)},
+                    )
+                return openai_response
         finally:
             # Cleanup temporary files (base64 and downloaded images) with delay to avoid race conditions
             import threading
@@ -2408,4 +2457,4 @@ Debug Mode:
 # ============================================================================
 if __name__ == "__main__":
-    run_server_with_args()
+    run_server_with_args()

abstractcore-2.6.8/abstractcore/tools/__init__.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""
+Universal tool support for AbstractCore.
+This package provides a unified tool system that works across all models
+and providers, whether they have native tool APIs or require prompting.
+Tool Execution Modes
+--------------------
+AbstractCore supports two tool execution modes:
+**Passthrough Mode (Default)** - execute_tools=False
+    The LLM returns raw tool call tags that downstream runtimes
+    (AbstractRuntime, Codex, Claude Code) parse and execute.
+    Use case: Agent loops, custom orchestration, multi-step workflows.
+**Direct Execution Mode** - execute_tools=True
+    AbstractCore parses and executes tools internally using the
+    global registry. Requires register_tool() for each tool.
+    Use case: Simple scripts, single-turn tool use.
+Key Components
+--------------
+- Core types (ToolDefinition, ToolCall, ToolResult)
+- Universal handler for all models
+- Architecture-based parsing and formatting
+- Tool registry for managing available tools
+Example: Passthrough Mode (Default)
+-----------------------------------
+```python
+from abstractcore import create_llm
+from abstractcore.tools import tool
+@tool(name="get_weather", description="Get weather for a city")
+def get_weather(city: str) -> str:
+    return f"Weather in {city}: Sunny"
+llm = create_llm("ollama", model="qwen3:4b")
+response = llm.generate("Weather in Paris?", tools=[get_weather])
+# response.content has tool call tags - parse with your runtime
+```
+Example: Direct Execution Mode
+------------------------------
+```python
+from abstractcore import create_llm
+from abstractcore.tools import tool, register_tool
+@tool(name="get_weather", description="Get weather for a city")
+def get_weather(city: str) -> str:
+    return f"Weather in {city}: Sunny"
+register_tool(get_weather)  # Required for direct execution
+llm = create_llm("ollama", model="qwen3:4b", execute_tools=True)
+response = llm.generate("Weather in Paris?", tools=[get_weather])
+# response.content has executed tool results
+```
+Note: The @tool decorator creates metadata but does NOT auto-register.
+Tools are passed explicitly to generate(). Use register_tool() only
+when using direct execution mode.
+"""
+# Core types
+from .core import (
+    ToolDefinition,
+    ToolCall,
+    ToolResult,
+    ToolCallResponse,
+    tool
+)
+# Handler
+from .handler import (
+    UniversalToolHandler,
+    create_handler
+)
+# Parser functions
+from .parser import (
+    detect_tool_calls,
+    parse_tool_calls,
+    format_tool_prompt
+)
+# Registry
+from .registry import (
+    ToolRegistry,
+    register_tool,
+    get_registry,
+    execute_tool,
+    execute_tools,
+    clear_registry
+)
+__all__ = [
+    # Core types
+    "ToolDefinition",
+    "ToolCall",
+    "ToolResult",
+    "ToolCallResponse",
+    "tool",
+    # Handler
+    "UniversalToolHandler",
+    "create_handler",
+    # Parser
+    "detect_tool_calls",
+    "parse_tool_calls",
+    "format_tool_prompt",
+    # Registry
+    "ToolRegistry",
+    "register_tool",
+    "get_registry",
+    "execute_tool",
+    "execute_tools",
+    "clear_registry",
+]

abstractcore 2.6.6__tar.gz → 2.6.8__tar.gz

abstractcore 2.6.6tar.gz → 2.6.8tar.gz