PyPI - shotgun-sh - Versions diffs - 0.1.0.dev13__py3-none-any.whl → 0.1.0.dev14__py3-none-any.whl - Mend

shotgun-sh 0.1.0.dev13py3-none-any.whl → 0.1.0.dev14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of shotgun-sh might be problematic. Click here for more details.

Files changed (40) hide show

shotgun/agents/agent_manager.py +16 -3
shotgun/agents/artifact_state.py +58 -0
shotgun/agents/common.py +48 -14
shotgun/agents/config/models.py +61 -0
shotgun/agents/history/compaction.py +85 -0
shotgun/agents/history/constants.py +19 -0
shotgun/agents/history/context_extraction.py +108 -0
shotgun/agents/history/history_building.py +104 -0
shotgun/agents/history/history_processors.py +354 -157
shotgun/agents/history/message_utils.py +46 -0
shotgun/agents/history/token_counting.py +429 -0
shotgun/agents/history/token_estimation.py +138 -0
shotgun/agents/models.py +125 -1
shotgun/agents/tools/artifact_management.py +56 -24
shotgun/agents/tools/file_management.py +30 -11
shotgun/agents/tools/web_search/anthropic.py +78 -17
shotgun/agents/tools/web_search/gemini.py +1 -1
shotgun/agents/tools/web_search/openai.py +16 -2
shotgun/artifacts/manager.py +2 -1
shotgun/artifacts/models.py +6 -4
shotgun/codebase/core/nl_query.py +4 -4
shotgun/prompts/agents/partials/artifact_system.j2 +4 -1
shotgun/prompts/agents/partials/codebase_understanding.j2 +1 -2
shotgun/prompts/agents/plan.j2 +9 -7
shotgun/prompts/agents/research.j2 +7 -5
shotgun/prompts/agents/specify.j2 +8 -7
shotgun/prompts/agents/state/artifact_templates_available.j2 +18 -0
shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +3 -1
shotgun/prompts/agents/state/existing_artifacts_available.j2 +23 -0
shotgun/prompts/agents/state/system_state.j2 +9 -1
shotgun/prompts/history/incremental_summarization.j2 +53 -0
shotgun/sdk/services.py +14 -0
shotgun/tui/app.py +1 -1
shotgun/tui/screens/chat.py +4 -2
shotgun/utils/file_system_utils.py +6 -1
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/METADATA +2 -1
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/RECORD +40 -29
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/WHEEL +0 -0
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/entry_points.txt +0 -0
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/licenses/LICENSE +0 -0

shotgun/agents/history/token_counting.py ADDED Viewed

@@ -0,0 +1,429 @@
+"""Real token counting for all supported providers.
+This module provides accurate token counting using each provider's official
+APIs and libraries, eliminating the need for rough character-based estimation.
+"""
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+from pydantic_ai.messages import ModelMessage
+from shotgun.agents.config.models import ModelConfig, ProviderType
+from shotgun.logging_config import get_logger
+if TYPE_CHECKING:
+    pass
+logger = get_logger(__name__)
+# Global cache for token counter instances (singleton pattern)
+_token_counter_cache: dict[tuple[str, str, str], "TokenCounter"] = {}
+class TokenCounter(ABC):
+    """Abstract base class for provider-specific token counting."""
+    @abstractmethod
+    def count_tokens(self, text: str) -> int:
+        """Count tokens in text using provider-specific method.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count as determined by the provider
+        Raises:
+            RuntimeError: If token counting fails
+        """
+    @abstractmethod
+    def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens in PydanticAI message structures.
+        Args:
+            messages: List of messages to count tokens for
+        Returns:
+            Total token count across all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+class OpenAITokenCounter(TokenCounter):
+    """Token counter for OpenAI models using tiktoken."""
+    # Official encoding mappings for OpenAI models
+    ENCODING_MAP = {
+        "gpt-5": "o200k_base",
+        "gpt-4o": "o200k_base",
+        "gpt-4": "cl100k_base",
+        "gpt-3.5-turbo": "cl100k_base",
+    }
+    def __init__(self, model_name: str):
+        """Initialize OpenAI token counter.
+        Args:
+            model_name: OpenAI model name to get correct encoding for
+        Raises:
+            RuntimeError: If encoding initialization fails
+        """
+        self.model_name = model_name
+        import tiktoken
+        try:
+            # Get the appropriate encoding for this model
+            encoding_name = self.ENCODING_MAP.get(model_name, "o200k_base")
+            self.encoding = tiktoken.get_encoding(encoding_name)
+            logger.debug(
+                f"Initialized OpenAI token counter with {encoding_name} encoding"
+            )
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to initialize tiktoken encoding for {model_name}"
+            ) from e
+    def count_tokens(self, text: str) -> int:
+        """Count tokens using tiktoken.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count using tiktoken
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        try:
+            return len(self.encoding.encode(text))
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to count tokens for OpenAI model {self.model_name}"
+            ) from e
+    def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens across all messages using tiktoken.
+        Args:
+            messages: List of PydanticAI messages
+        Returns:
+            Total token count for all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        total_text = self._extract_text_from_messages(messages)
+        return self.count_tokens(total_text)
+    def _extract_text_from_messages(self, messages: list[ModelMessage]) -> str:
+        """Extract all text content from messages for token counting."""
+        text_parts = []
+        for message in messages:
+            if hasattr(message, "parts"):
+                for part in message.parts:
+                    if hasattr(part, "content") and isinstance(part.content, str):
+                        text_parts.append(part.content)
+                    else:
+                        # Handle non-text parts (tool calls, etc.)
+                        text_parts.append(str(part))
+            else:
+                # Handle messages without parts
+                text_parts.append(str(message))
+        return "\n".join(text_parts)
+class AnthropicTokenCounter(TokenCounter):
+    """Token counter for Anthropic models using official client."""
+    def __init__(self, model_name: str, api_key: str):
+        """Initialize Anthropic token counter.
+        Args:
+            model_name: Anthropic model name for token counting
+            api_key: Anthropic API key
+        Raises:
+            RuntimeError: If client initialization fails
+        """
+        self.model_name = model_name
+        import anthropic
+        try:
+            self.client = anthropic.Anthropic(api_key=api_key)
+            logger.debug(f"Initialized Anthropic token counter for {model_name}")
+        except Exception as e:
+            raise RuntimeError("Failed to initialize Anthropic client") from e
+    def count_tokens(self, text: str) -> int:
+        """Count tokens using Anthropic's official API.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count from Anthropic API
+        Raises:
+            RuntimeError: If API call fails
+        """
+        try:
+            # Anthropic API expects messages format and model parameter
+            result = self.client.messages.count_tokens(
+                messages=[{"role": "user", "content": text}], model=self.model_name
+            )
+            return result.input_tokens
+        except Exception as e:
+            raise RuntimeError(
+                f"Anthropic token counting API failed for {self.model_name}"
+            ) from e
+    def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens across all messages using Anthropic API.
+        Args:
+            messages: List of PydanticAI messages
+        Returns:
+            Total token count for all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        total_text = self._extract_text_from_messages(messages)
+        return self.count_tokens(total_text)
+    def _extract_text_from_messages(self, messages: list[ModelMessage]) -> str:
+        """Extract all text content from messages for token counting."""
+        text_parts = []
+        for message in messages:
+            if hasattr(message, "parts"):
+                for part in message.parts:
+                    if hasattr(part, "content") and isinstance(part.content, str):
+                        text_parts.append(part.content)
+                    else:
+                        # Handle non-text parts (tool calls, etc.)
+                        text_parts.append(str(part))
+            else:
+                # Handle messages without parts
+                text_parts.append(str(message))
+        return "\n".join(text_parts)
+class GoogleTokenCounter(TokenCounter):
+    """Token counter for Google models using genai API."""
+    def __init__(self, model_name: str, api_key: str):
+        """Initialize Google token counter.
+        Args:
+            model_name: Google model name
+            api_key: Google API key
+        Raises:
+            RuntimeError: If configuration fails
+        """
+        self.model_name = model_name
+        import google.generativeai as genai
+        try:
+            genai.configure(api_key=api_key)  # type: ignore[attr-defined]
+            self.model = genai.GenerativeModel(model_name)  # type: ignore[attr-defined]
+            logger.debug(f"Initialized Google token counter for {model_name}")
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to configure Google genai client for {model_name}"
+            ) from e
+    def count_tokens(self, text: str) -> int:
+        """Count tokens using Google's genai API.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count from Google API
+        Raises:
+            RuntimeError: If API call fails
+        """
+        try:
+            result = self.model.count_tokens(text)
+            return result.total_tokens
+        except Exception as e:
+            raise RuntimeError(
+                f"Google token counting API failed for {self.model_name}"
+            ) from e
+    def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens across all messages using Google API.
+        Args:
+            messages: List of PydanticAI messages
+        Returns:
+            Total token count for all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        total_text = self._extract_text_from_messages(messages)
+        return self.count_tokens(total_text)
+    def _extract_text_from_messages(self, messages: list[ModelMessage]) -> str:
+        """Extract all text content from messages for token counting."""
+        text_parts = []
+        for message in messages:
+            if hasattr(message, "parts"):
+                for part in message.parts:
+                    if hasattr(part, "content") and isinstance(part.content, str):
+                        text_parts.append(part.content)
+                    else:
+                        # Handle non-text parts (tool calls, etc.)
+                        text_parts.append(str(part))
+            else:
+                # Handle messages without parts
+                text_parts.append(str(message))
+        return "\n".join(text_parts)
+def get_token_counter(model_config: ModelConfig) -> TokenCounter:
+    """Get appropriate token counter for the model provider (cached singleton).
+    This function ensures that every provider has a proper token counting
+    implementation without any fallbacks to estimation. Token counters are
+    cached to avoid repeated initialization overhead.
+    Args:
+        model_config: Model configuration with provider and credentials
+    Returns:
+        Cached provider-specific token counter
+    Raises:
+        ValueError: If provider is not supported for token counting
+        RuntimeError: If token counter initialization fails
+    """
+    # Create cache key from provider, model name, and API key
+    cache_key = (
+        model_config.provider.value,
+        model_config.name,
+        model_config.api_key[:10]
+        if model_config.api_key
+        else "no-key",  # Partial key for cache
+    )
+    # Return cached instance if available
+    if cache_key in _token_counter_cache:
+        logger.debug(
+            f"Reusing cached token counter for {model_config.provider.value}:{model_config.name}"
+        )
+        return _token_counter_cache[cache_key]
+    # Create new instance and cache it
+    logger.debug(
+        f"Creating new token counter for {model_config.provider.value}:{model_config.name}"
+    )
+    counter: TokenCounter
+    if model_config.provider == ProviderType.OPENAI:
+        counter = OpenAITokenCounter(model_config.name)
+    elif model_config.provider == ProviderType.ANTHROPIC:
+        counter = AnthropicTokenCounter(model_config.name, model_config.api_key)
+    elif model_config.provider == ProviderType.GOOGLE:
+        counter = GoogleTokenCounter(model_config.name, model_config.api_key)
+    else:
+        raise ValueError(
+            f"Unsupported provider for token counting: {model_config.provider}. "
+            f"Supported providers: {[p.value for p in ProviderType]}"
+        )
+    # Cache the instance
+    _token_counter_cache[cache_key] = counter
+    logger.debug(
+        f"Cached token counter for {model_config.provider.value}:{model_config.name}"
+    )
+    return counter
+def count_tokens_from_messages(
+    messages: list[ModelMessage], model_config: ModelConfig
+) -> int:
+    """Count actual tokens from messages using provider-specific methods.
+    This replaces the old estimation approach with accurate token counting
+    using each provider's official APIs and libraries.
+    Args:
+        messages: List of messages to count tokens for
+        model_config: Model configuration with provider info
+    Returns:
+        Exact token count for the messages
+    Raises:
+        ValueError: If provider is not supported
+        RuntimeError: If token counting fails
+    """
+    counter = get_token_counter(model_config)
+    return counter.count_message_tokens(messages)
+def count_post_summary_tokens(
+    messages: list[ModelMessage], summary_index: int, model_config: ModelConfig
+) -> int:
+    """Count actual tokens from summary onwards for incremental compaction decisions.
+    Args:
+        messages: Full message history
+        summary_index: Index of the last summary message
+        model_config: Model configuration with provider info
+    Returns:
+        Exact token count from summary onwards
+    Raises:
+        ValueError: If provider is not supported
+        RuntimeError: If token counting fails
+    """
+    if summary_index >= len(messages):
+        return 0
+    post_summary_messages = messages[summary_index:]
+    return count_tokens_from_messages(post_summary_messages, model_config)
+def count_tokens_from_message_parts(
+    messages: list[ModelMessage], model_config: ModelConfig
+) -> int:
+    """Count actual tokens from message parts for summarization requests.
+    Args:
+        messages: List of messages to count tokens for
+        model_config: Model configuration with provider info
+    Returns:
+        Exact token count from message parts
+    Raises:
+        ValueError: If provider is not supported
+        RuntimeError: If token counting fails
+    """
+    # For now, use the same logic as count_tokens_from_messages
+    # This can be optimized later if needed for different counting strategies
+    return count_tokens_from_messages(messages, model_config)

shotgun/agents/history/token_estimation.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Real token counting utilities for history processing.
+This module provides accurate token counting using provider-specific APIs
+and libraries, replacing the old character-based estimation approach.
+"""
+from typing import TYPE_CHECKING, Union
+from pydantic_ai.messages import ModelMessage
+from shotgun.agents.config.models import ModelConfig
+if TYPE_CHECKING:
+    from pydantic_ai import RunContext
+    from shotgun.agents.models import AgentDeps
+from .constants import INPUT_BUFFER_TOKENS, MIN_SUMMARY_TOKENS
+from .token_counting import count_tokens_from_messages as _count_tokens_from_messages
+def estimate_tokens_from_messages(
+    messages: list[ModelMessage], model_config: ModelConfig
+) -> int:
+    """Count actual tokens from current message list.
+    This provides accurate token counting for compaction decisions using
+    provider-specific token counting methods instead of rough estimation.
+    Args:
+        messages: List of messages to count tokens for
+        model_config: Model configuration with provider info
+    Returns:
+        Exact token count using provider-specific counting
+    Raises:
+        ValueError: If provider is not supported
+        RuntimeError: If token counting fails
+    """
+    return _count_tokens_from_messages(messages, model_config)
+def estimate_post_summary_tokens(
+    messages: list[ModelMessage], summary_index: int, model_config: ModelConfig
+) -> int:
+    """Count actual tokens from summary onwards for incremental compaction decisions.
+    This treats the summary as a reset point and only counts tokens from the summary
+    message onwards. Used to determine if incremental compaction is needed.
+    Args:
+        messages: Full message history
+        summary_index: Index of the last summary message
+        model_config: Model configuration with provider info
+    Returns:
+        Exact token count from summary onwards
+    Raises:
+        ValueError: If provider is not supported
+        RuntimeError: If token counting fails
+    """
+    if summary_index >= len(messages):
+        return 0
+    post_summary_messages = messages[summary_index:]
+    return estimate_tokens_from_messages(post_summary_messages, model_config)
+def estimate_tokens_from_message_parts(
+    messages: list[ModelMessage], model_config: ModelConfig
+) -> int:
+    """Count actual tokens from message parts for summarization requests.
+    This provides accurate token counting across the codebase using
+    provider-specific methods instead of character estimation.
+    Args:
+        messages: List of messages to count tokens for
+        model_config: Model configuration with provider info
+    Returns:
+        Exact token count from message parts
+    Raises:
+        ValueError: If provider is not supported
+        RuntimeError: If token counting fails
+    """
+    return _count_tokens_from_messages(messages, model_config)
+def calculate_max_summarization_tokens(
+    ctx_or_model_config: Union["RunContext[AgentDeps]", ModelConfig],
+    request_messages: list[ModelMessage],
+) -> int:
+    """Calculate maximum tokens available for summarization output.
+    This ensures we use the model's full capacity while leaving room for input tokens.
+    Args:
+        ctx_or_model_config: RunContext or model configuration with token limits
+        request_messages: The messages that will be sent for summarization
+    Returns:
+        Maximum tokens available for the summarization response
+    """
+    # Support both RunContext and direct model config
+    if hasattr(ctx_or_model_config, "deps"):
+        model_config = ctx_or_model_config.deps.llm_model
+    else:
+        model_config = ctx_or_model_config
+    if not model_config:
+        return MIN_SUMMARY_TOKENS
+    # Count actual input tokens using shared utility
+    estimated_input_tokens = estimate_tokens_from_message_parts(
+        request_messages, model_config
+    )
+    # Add buffer for prompt overhead, system instructions, etc.
+    total_estimated_input = estimated_input_tokens + INPUT_BUFFER_TOKENS
+    # For models with combined token limits (like GPT), use total limit
+    # For models with separate limits (like Claude), use output limit directly
+    if hasattr(model_config, "max_total_tokens"):
+        # Combined limit model
+        available_for_output = (
+            int(model_config.max_total_tokens) - total_estimated_input
+        )
+        max_output = min(available_for_output, int(model_config.max_output_tokens))
+    else:
+        # Separate limits model - just use max_output_tokens
+        max_output = int(model_config.max_output_tokens)
+    # Ensure we don't go below a minimum useful amount
+    return max(MIN_SUMMARY_TOKENS, max_output)

shotgun-sh 0.1.0.dev13__py3-none-any.whl → 0.1.0.dev14__py3-none-any.whl

Potentially problematic release.

shotgun-sh 0.1.0.dev13py3-none-any.whl → 0.1.0.dev14py3-none-any.whl