PyPI - shotgun-sh - Versions diffs - 0.1.16.dev2__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

shotgun-sh 0.1.16.dev2py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of shotgun-sh might be problematic. Click here for more details.

Files changed (55) hide show

shotgun/agents/common.py +4 -5
shotgun/agents/config/constants.py +23 -6
shotgun/agents/config/manager.py +239 -76
shotgun/agents/config/models.py +74 -84
shotgun/agents/config/provider.py +174 -85
shotgun/agents/history/compaction.py +1 -1
shotgun/agents/history/history_processors.py +18 -9
shotgun/agents/history/token_counting/__init__.py +31 -0
shotgun/agents/history/token_counting/anthropic.py +89 -0
shotgun/agents/history/token_counting/base.py +67 -0
shotgun/agents/history/token_counting/openai.py +80 -0
shotgun/agents/history/token_counting/sentencepiece_counter.py +119 -0
shotgun/agents/history/token_counting/tokenizer_cache.py +90 -0
shotgun/agents/history/token_counting/utils.py +147 -0
shotgun/agents/history/token_estimation.py +12 -12
shotgun/agents/llm.py +62 -0
shotgun/agents/models.py +2 -2
shotgun/agents/tools/web_search/__init__.py +42 -15
shotgun/agents/tools/web_search/anthropic.py +54 -50
shotgun/agents/tools/web_search/gemini.py +31 -20
shotgun/agents/tools/web_search/openai.py +4 -4
shotgun/build_constants.py +2 -2
shotgun/cli/config.py +34 -63
shotgun/cli/feedback.py +4 -2
shotgun/cli/models.py +2 -2
shotgun/codebase/core/ingestor.py +47 -8
shotgun/codebase/core/manager.py +7 -3
shotgun/codebase/models.py +4 -4
shotgun/llm_proxy/__init__.py +16 -0
shotgun/llm_proxy/clients.py +39 -0
shotgun/llm_proxy/constants.py +8 -0
shotgun/main.py +6 -0
shotgun/posthog_telemetry.py +15 -11
shotgun/sentry_telemetry.py +3 -3
shotgun/shotgun_web/__init__.py +19 -0
shotgun/shotgun_web/client.py +138 -0
shotgun/shotgun_web/constants.py +17 -0
shotgun/shotgun_web/models.py +47 -0
shotgun/telemetry.py +7 -4
shotgun/tui/app.py +26 -8
shotgun/tui/screens/chat.py +2 -8
shotgun/tui/screens/chat_screen/command_providers.py +118 -11
shotgun/tui/screens/chat_screen/history.py +3 -1
shotgun/tui/screens/feedback.py +2 -2
shotgun/tui/screens/model_picker.py +327 -0
shotgun/tui/screens/provider_config.py +118 -28
shotgun/tui/screens/shotgun_auth.py +295 -0
shotgun/tui/screens/welcome.py +176 -0
shotgun/utils/env_utils.py +12 -0
{shotgun_sh-0.1.16.dev2.dist-info → shotgun_sh-0.2.1.dist-info}/METADATA +2 -2
{shotgun_sh-0.1.16.dev2.dist-info → shotgun_sh-0.2.1.dist-info}/RECORD +54 -37
shotgun/agents/history/token_counting.py +0 -429
{shotgun_sh-0.1.16.dev2.dist-info → shotgun_sh-0.2.1.dist-info}/WHEEL +0 -0
{shotgun_sh-0.1.16.dev2.dist-info → shotgun_sh-0.2.1.dist-info}/entry_points.txt +0 -0
{shotgun_sh-0.1.16.dev2.dist-info → shotgun_sh-0.2.1.dist-info}/licenses/LICENSE +0 -0

shotgun/agents/history/history_processors.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from typing import TYPE_CHECKING, Any, Protocol
+from pydantic_ai import ModelSettings
 from pydantic_ai.messages import (
     ModelMessage,
     ModelRequest,
@@ -10,7 +11,7 @@ from pydantic_ai.messages import (
     UserPromptPart,
 )
-from shotgun.agents.config.models import shotgun_model_request
+from shotgun.agents.llm import shotgun_model_request
 from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
 from shotgun.agents.models import AgentDeps
 from shotgun.logging_config import get_logger
@@ -154,7 +155,7 @@ async def token_limit_compactor(
     if last_summary_index is not None:
         # Check if post-summary conversation exceeds threshold for incremental compaction
-        post_summary_tokens = estimate_post_summary_tokens(
+        post_summary_tokens = await estimate_post_summary_tokens(
             messages, last_summary_index, deps.llm_model
         )
         post_summary_percentage = (
@@ -248,7 +249,7 @@ async def token_limit_compactor(
         ]
         # Calculate optimal max_tokens for summarization
-        max_tokens = calculate_max_summarization_tokens(
+        max_tokens = await calculate_max_summarization_tokens(
             deps.llm_model, request_messages
         )
@@ -261,7 +262,9 @@ async def token_limit_compactor(
         summary_response = await shotgun_model_request(
             model_config=deps.llm_model,
             messages=request_messages,
-            max_tokens=max_tokens,  # Use calculated optimal tokens for summarization
+            model_settings=ModelSettings(
+                max_tokens=max_tokens  # Use calculated optimal tokens for summarization
+            ),
         )
         log_summarization_response(summary_response, "INCREMENTAL")
@@ -328,7 +331,9 @@ async def token_limit_compactor(
         # Track compaction completion
         messages_after = len(compacted_messages)
-        tokens_after = estimate_tokens_from_messages(compacted_messages, deps.llm_model)
+        tokens_after = await estimate_tokens_from_messages(
+            compacted_messages, deps.llm_model
+        )
         reduction_percentage = (
             ((messages_before - messages_after) / messages_before * 100)
             if messages_before > 0
@@ -354,7 +359,7 @@ async def token_limit_compactor(
     else:
         # Check if total conversation exceeds threshold for full compaction
-        total_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
+        total_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
         total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
         logger.debug(
@@ -392,7 +397,9 @@ async def _full_compaction(
     ]
     # Calculate optimal max_tokens for summarization
-    max_tokens = calculate_max_summarization_tokens(deps.llm_model, request_messages)
+    max_tokens = await calculate_max_summarization_tokens(
+        deps.llm_model, request_messages
+    )
     # Debug logging using shared utilities
     log_summarization_request(
@@ -403,11 +410,13 @@ async def _full_compaction(
     summary_response = await shotgun_model_request(
         model_config=deps.llm_model,
         messages=request_messages,
-        max_tokens=max_tokens,  # Use calculated optimal tokens for summarization
+        model_settings=ModelSettings(
+            max_tokens=max_tokens  # Use calculated optimal tokens for summarization
+        ),
     )
     # Calculate token reduction
-    current_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
+    current_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
     summary_usage = summary_response.usage
     reduction_percentage = (
         ((current_tokens - summary_usage.output_tokens) / current_tokens) * 100

shotgun/agents/history/token_counting/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""Real token counting for all supported providers.
+This module provides accurate token counting using each provider's official
+APIs and libraries, eliminating the need for rough character-based estimation.
+"""
+from .anthropic import AnthropicTokenCounter
+from .base import TokenCounter, extract_text_from_messages
+from .openai import OpenAITokenCounter
+from .sentencepiece_counter import SentencePieceTokenCounter
+from .utils import (
+    count_post_summary_tokens,
+    count_tokens_from_message_parts,
+    count_tokens_from_messages,
+    get_token_counter,
+)
+__all__ = [
+    # Base classes
+    "TokenCounter",
+    # Counter implementations
+    "OpenAITokenCounter",
+    "AnthropicTokenCounter",
+    "SentencePieceTokenCounter",
+    # Utility functions
+    "get_token_counter",
+    "count_tokens_from_messages",
+    "count_post_summary_tokens",
+    "count_tokens_from_message_parts",
+    "extract_text_from_messages",
+]

shotgun/agents/history/token_counting/anthropic.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""Anthropic token counting using official client."""
+from pydantic_ai.messages import ModelMessage
+from shotgun.agents.config.models import KeyProvider
+from shotgun.llm_proxy import create_anthropic_proxy_client
+from shotgun.logging_config import get_logger
+from .base import TokenCounter, extract_text_from_messages
+logger = get_logger(__name__)
+class AnthropicTokenCounter(TokenCounter):
+    """Token counter for Anthropic models using official client."""
+    def __init__(
+        self,
+        model_name: str,
+        api_key: str,
+        key_provider: KeyProvider = KeyProvider.BYOK,
+    ):
+        """Initialize Anthropic token counter.
+        Args:
+            model_name: Anthropic model name for token counting
+            api_key: API key (Anthropic for BYOK, Shotgun for proxy)
+            key_provider: Key provider type (BYOK or SHOTGUN)
+        Raises:
+            RuntimeError: If client initialization fails
+        """
+        self.model_name = model_name
+        import anthropic
+        try:
+            if key_provider == KeyProvider.SHOTGUN:
+                # Use LiteLLM proxy for Shotgun Account
+                # Proxies to Anthropic's token counting API
+                self.client = create_anthropic_proxy_client(api_key)
+                logger.debug(
+                    f"Initialized Anthropic token counter for {model_name} via LiteLLM proxy"
+                )
+            else:
+                # Direct Anthropic API for BYOK
+                self.client = anthropic.Anthropic(api_key=api_key)
+                logger.debug(
+                    f"Initialized Anthropic token counter for {model_name} via direct API"
+                )
+        except Exception as e:
+            raise RuntimeError("Failed to initialize Anthropic client") from e
+    async def count_tokens(self, text: str) -> int:
+        """Count tokens using Anthropic's official API (async).
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count from Anthropic API
+        Raises:
+            RuntimeError: If API call fails
+        """
+        try:
+            # Anthropic API expects messages format and model parameter
+            result = self.client.messages.count_tokens(
+                messages=[{"role": "user", "content": text}], model=self.model_name
+            )
+            return result.input_tokens
+        except Exception as e:
+            raise RuntimeError(
+                f"Anthropic token counting API failed for {self.model_name}"
+            ) from e
+    async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens across all messages using Anthropic API (async).
+        Args:
+            messages: List of PydanticAI messages
+        Returns:
+            Total token count for all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        total_text = extract_text_from_messages(messages)
+        return await self.count_tokens(total_text)

shotgun/agents/history/token_counting/base.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Base classes and shared utilities for token counting."""
+from abc import ABC, abstractmethod
+from pydantic_ai.messages import ModelMessage
+class TokenCounter(ABC):
+    """Abstract base class for provider-specific token counting.
+    All methods are async to support non-blocking operations like
+    downloading tokenizer models or making API calls.
+    """
+    @abstractmethod
+    async def count_tokens(self, text: str) -> int:
+        """Count tokens in text using provider-specific method (async).
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count as determined by the provider
+        Raises:
+            RuntimeError: If token counting fails
+        """
+    @abstractmethod
+    async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens in PydanticAI message structures (async).
+        Args:
+            messages: List of messages to count tokens for
+        Returns:
+            Total token count across all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+def extract_text_from_messages(messages: list[ModelMessage]) -> str:
+    """Extract all text content from messages for token counting.
+    Args:
+        messages: List of PydanticAI messages
+    Returns:
+        Combined text content from all messages
+    """
+    text_parts = []
+    for message in messages:
+        if hasattr(message, "parts"):
+            for part in message.parts:
+                if hasattr(part, "content") and isinstance(part.content, str):
+                    text_parts.append(part.content)
+                else:
+                    # Handle non-text parts (tool calls, etc.)
+                    text_parts.append(str(part))
+        else:
+            # Handle messages without parts
+            text_parts.append(str(message))
+    return "\n".join(text_parts)

shotgun/agents/history/token_counting/openai.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""OpenAI token counting using tiktoken."""
+from pydantic_ai.messages import ModelMessage
+from shotgun.logging_config import get_logger
+from .base import TokenCounter, extract_text_from_messages
+logger = get_logger(__name__)
+class OpenAITokenCounter(TokenCounter):
+    """Token counter for OpenAI models using tiktoken."""
+    # Official encoding mappings for OpenAI models
+    ENCODING_MAP = {
+        "gpt-5": "o200k_base",
+        "gpt-4o": "o200k_base",
+        "gpt-4": "cl100k_base",
+        "gpt-3.5-turbo": "cl100k_base",
+    }
+    def __init__(self, model_name: str):
+        """Initialize OpenAI token counter.
+        Args:
+            model_name: OpenAI model name to get correct encoding for
+        Raises:
+            RuntimeError: If encoding initialization fails
+        """
+        self.model_name = model_name
+        import tiktoken
+        try:
+            # Get the appropriate encoding for this model
+            encoding_name = self.ENCODING_MAP.get(model_name, "o200k_base")
+            self.encoding = tiktoken.get_encoding(encoding_name)
+            logger.debug(
+                f"Initialized OpenAI token counter with {encoding_name} encoding"
+            )
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to initialize tiktoken encoding for {model_name}"
+            ) from e
+    async def count_tokens(self, text: str) -> int:
+        """Count tokens using tiktoken (async).
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count using tiktoken
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        try:
+            return len(self.encoding.encode(text))
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to count tokens for OpenAI model {self.model_name}"
+            ) from e
+    async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens across all messages using tiktoken (async).
+        Args:
+            messages: List of PydanticAI messages
+        Returns:
+            Total token count for all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        total_text = extract_text_from_messages(messages)
+        return await self.count_tokens(total_text)

shotgun/agents/history/token_counting/sentencepiece_counter.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Gemini token counting using official SentencePiece tokenizer.
+This implementation uses Google's official Gemini/Gemma tokenizer model
+for 100% accurate local token counting without API calls.
+Performance: 10-100x faster than API-based counting.
+Accuracy: 100% match with actual Gemini API usage.
+The tokenizer is downloaded on first use and cached locally for future use.
+"""
+from typing import Any
+from pydantic_ai.messages import ModelMessage
+from shotgun.logging_config import get_logger
+from .base import TokenCounter, extract_text_from_messages
+from .tokenizer_cache import download_gemini_tokenizer, get_gemini_tokenizer_path
+logger = get_logger(__name__)
+class SentencePieceTokenCounter(TokenCounter):
+    """Token counter for Gemini models using official SentencePiece tokenizer.
+    This counter provides 100% accurate token counting for Gemini models
+    using the official tokenizer model from Google's gemma_pytorch repository.
+    Token counting is performed locally without any API calls, resulting in
+    10-100x performance improvement over API-based methods.
+    The tokenizer is downloaded asynchronously on first use and cached locally.
+    """
+    def __init__(self, model_name: str):
+        """Initialize Gemini SentencePiece token counter.
+        The tokenizer is not loaded immediately - it will be downloaded and
+        loaded lazily on first use.
+        Args:
+            model_name: Gemini model name (used for logging)
+        """
+        self.model_name = model_name
+        self.sp: Any | None = None  # SentencePieceProcessor, loaded lazily
+    async def _ensure_tokenizer(self) -> None:
+        """Ensure tokenizer is downloaded and loaded.
+        This method downloads the tokenizer on first call (if not cached)
+        and loads it into memory. Subsequent calls reuse the loaded tokenizer.
+        Raises:
+            RuntimeError: If tokenizer download or loading fails
+        """
+        if self.sp is not None:
+            # Already loaded
+            return
+        import sentencepiece as spm  # type: ignore[import-untyped]
+        try:
+            # Check if already cached, otherwise download
+            tokenizer_path = get_gemini_tokenizer_path()
+            if not tokenizer_path.exists():
+                await download_gemini_tokenizer()
+            # Load the tokenizer
+            self.sp = spm.SentencePieceProcessor()
+            self.sp.load(str(tokenizer_path))
+            logger.debug(f"Loaded SentencePiece tokenizer for {self.model_name}")
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to load Gemini tokenizer for {self.model_name}"
+            ) from e
+    async def count_tokens(self, text: str) -> int:
+        """Count tokens using SentencePiece (async).
+        Downloads tokenizer on first call if not cached.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Exact token count using Gemini's tokenizer
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        await self._ensure_tokenizer()
+        if self.sp is None:
+            raise RuntimeError(f"Tokenizer not initialized for {self.model_name}")
+        try:
+            tokens = self.sp.encode(text)
+            return len(tokens)
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to count tokens for Gemini model {self.model_name}"
+            ) from e
+    async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
+        """Count tokens across all messages using SentencePiece (async).
+        Downloads tokenizer on first call if not cached.
+        Args:
+            messages: List of PydanticAI messages
+        Returns:
+            Total token count for all messages
+        Raises:
+            RuntimeError: If token counting fails
+        """
+        total_text = extract_text_from_messages(messages)
+        return await self.count_tokens(total_text)

shotgun/agents/history/token_counting/tokenizer_cache.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""Async tokenizer download and caching utilities."""
+import hashlib
+from pathlib import Path
+import httpx
+from shotgun.logging_config import get_logger
+from shotgun.utils.file_system_utils import get_shotgun_home
+logger = get_logger(__name__)
+# Gemini tokenizer constants
+GEMINI_TOKENIZER_URL = "https://raw.githubusercontent.com/google/gemma_pytorch/main/tokenizer/tokenizer.model"
+GEMINI_TOKENIZER_SHA256 = (
+    "61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2"
+)
+def get_tokenizer_cache_dir() -> Path:
+    """Get the directory for cached tokenizer models.
+    Returns:
+        Path to tokenizers cache directory
+    """
+    cache_dir = get_shotgun_home() / "tokenizers"
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    return cache_dir
+def get_gemini_tokenizer_path() -> Path:
+    """Get the path where the Gemini tokenizer should be cached.
+    Returns:
+        Path to cached Gemini tokenizer
+    """
+    return get_tokenizer_cache_dir() / "gemini_tokenizer.model"
+async def download_gemini_tokenizer() -> Path:
+    """Download and cache the official Gemini tokenizer model.
+    This downloads Google's official Gemini/Gemma tokenizer from the
+    gemma_pytorch repository and caches it locally for future use.
+    The download is async and non-blocking, with SHA256 verification
+    for security.
+    Returns:
+        Path to the cached tokenizer file
+    Raises:
+        RuntimeError: If download fails or checksum verification fails
+    """
+    cache_path = get_gemini_tokenizer_path()
+    # Check if already cached
+    if cache_path.exists():
+        logger.debug(f"Gemini tokenizer already cached at {cache_path}")
+        return cache_path
+    logger.info("Downloading Gemini tokenizer (4MB, first time only)...")
+    try:
+        # Download with async httpx
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(GEMINI_TOKENIZER_URL, follow_redirects=True)
+            response.raise_for_status()
+            content = response.content
+        # Verify SHA256 checksum
+        actual_hash = hashlib.sha256(content).hexdigest()
+        if actual_hash != GEMINI_TOKENIZER_SHA256:
+            raise RuntimeError(
+                f"Gemini tokenizer checksum mismatch. "
+                f"Expected: {GEMINI_TOKENIZER_SHA256}, got: {actual_hash}"
+            )
+        # Atomic write: write to temp file first, then rename
+        temp_path = cache_path.with_suffix(".tmp")
+        temp_path.write_bytes(content)
+        temp_path.rename(cache_path)
+        logger.info(f"Gemini tokenizer downloaded and cached at {cache_path}")
+        return cache_path
+    except httpx.HTTPError as e:
+        raise RuntimeError(f"Failed to download Gemini tokenizer: {e}") from e
+    except OSError as e:
+        raise RuntimeError(f"Failed to save Gemini tokenizer: {e}") from e

shotgun-sh 0.1.16.dev2__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

shotgun-sh 0.1.16.dev2py3-none-any.whl → 0.2.1py3-none-any.whl