PyPI - shotgun-sh - Versions diffs - 0.2.8.dev2__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

shotgun-sh 0.2.8.dev2py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

shotgun/agents/agent_manager.py +354 -46
shotgun/agents/common.py +14 -8
shotgun/agents/config/constants.py +0 -6
shotgun/agents/config/manager.py +66 -35
shotgun/agents/config/models.py +41 -1
shotgun/agents/config/provider.py +33 -5
shotgun/agents/context_analyzer/__init__.py +28 -0
shotgun/agents/context_analyzer/analyzer.py +471 -0
shotgun/agents/context_analyzer/constants.py +9 -0
shotgun/agents/context_analyzer/formatter.py +115 -0
shotgun/agents/context_analyzer/models.py +212 -0
shotgun/agents/conversation_history.py +2 -0
shotgun/agents/conversation_manager.py +35 -19
shotgun/agents/export.py +2 -2
shotgun/agents/history/compaction.py +9 -4
shotgun/agents/history/history_processors.py +113 -5
shotgun/agents/history/token_counting/anthropic.py +17 -1
shotgun/agents/history/token_counting/base.py +14 -3
shotgun/agents/history/token_counting/openai.py +11 -1
shotgun/agents/history/token_counting/sentencepiece_counter.py +8 -0
shotgun/agents/history/token_counting/tokenizer_cache.py +3 -1
shotgun/agents/history/token_counting/utils.py +0 -3
shotgun/agents/plan.py +2 -2
shotgun/agents/research.py +3 -3
shotgun/agents/specify.py +2 -2
shotgun/agents/tasks.py +2 -2
shotgun/agents/tools/codebase/codebase_shell.py +6 -0
shotgun/agents/tools/codebase/directory_lister.py +6 -0
shotgun/agents/tools/codebase/file_read.py +11 -2
shotgun/agents/tools/codebase/query_graph.py +6 -0
shotgun/agents/tools/codebase/retrieve_code.py +6 -0
shotgun/agents/tools/file_management.py +27 -7
shotgun/agents/tools/registry.py +217 -0
shotgun/agents/tools/web_search/__init__.py +8 -8
shotgun/agents/tools/web_search/anthropic.py +8 -2
shotgun/agents/tools/web_search/gemini.py +7 -1
shotgun/agents/tools/web_search/openai.py +7 -1
shotgun/agents/tools/web_search/utils.py +2 -2
shotgun/agents/usage_manager.py +16 -11
shotgun/api_endpoints.py +7 -3
shotgun/build_constants.py +3 -3
shotgun/cli/clear.py +53 -0
shotgun/cli/compact.py +186 -0
shotgun/cli/config.py +8 -5
shotgun/cli/context.py +111 -0
shotgun/cli/export.py +1 -1
shotgun/cli/feedback.py +4 -2
shotgun/cli/models.py +1 -0
shotgun/cli/plan.py +1 -1
shotgun/cli/research.py +1 -1
shotgun/cli/specify.py +1 -1
shotgun/cli/tasks.py +1 -1
shotgun/cli/update.py +16 -2
shotgun/codebase/core/change_detector.py +5 -3
shotgun/codebase/core/code_retrieval.py +4 -2
shotgun/codebase/core/ingestor.py +10 -8
shotgun/codebase/core/manager.py +13 -4
shotgun/codebase/core/nl_query.py +1 -1
shotgun/exceptions.py +32 -0
shotgun/logging_config.py +18 -27
shotgun/main.py +73 -11
shotgun/posthog_telemetry.py +37 -28
shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +3 -2
shotgun/sentry_telemetry.py +163 -16
shotgun/settings.py +238 -0
shotgun/telemetry.py +10 -33
shotgun/tui/app.py +243 -43
shotgun/tui/commands/__init__.py +1 -1
shotgun/tui/components/context_indicator.py +179 -0
shotgun/tui/components/mode_indicator.py +70 -0
shotgun/tui/components/status_bar.py +48 -0
shotgun/tui/containers.py +91 -0
shotgun/tui/dependencies.py +39 -0
shotgun/tui/protocols.py +45 -0
shotgun/tui/screens/chat/__init__.py +5 -0
shotgun/tui/screens/chat/chat.tcss +54 -0
shotgun/tui/screens/chat/chat_screen.py +1254 -0
shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
shotgun/tui/screens/chat/help_text.py +40 -0
shotgun/tui/screens/chat/prompt_history.py +48 -0
shotgun/tui/screens/chat.tcss +11 -0
shotgun/tui/screens/chat_screen/command_providers.py +78 -2
shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
shotgun/tui/screens/chat_screen/history/chat_history.py +115 -0
shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
shotgun/tui/screens/confirmation_dialog.py +151 -0
shotgun/tui/screens/feedback.py +4 -4
shotgun/tui/screens/github_issue.py +102 -0
shotgun/tui/screens/model_picker.py +49 -24
shotgun/tui/screens/onboarding.py +431 -0
shotgun/tui/screens/pipx_migration.py +153 -0
shotgun/tui/screens/provider_config.py +50 -27
shotgun/tui/screens/shotgun_auth.py +2 -2
shotgun/tui/screens/welcome.py +14 -11
shotgun/tui/services/__init__.py +5 -0
shotgun/tui/services/conversation_service.py +184 -0
shotgun/tui/state/__init__.py +7 -0
shotgun/tui/state/processing_state.py +185 -0
shotgun/tui/utils/mode_progress.py +14 -7
shotgun/tui/widgets/__init__.py +5 -0
shotgun/tui/widgets/widget_coordinator.py +263 -0
shotgun/utils/file_system_utils.py +22 -2
shotgun/utils/marketing.py +110 -0
shotgun/utils/update_checker.py +69 -14
shotgun_sh-0.2.17.dist-info/METADATA +465 -0
shotgun_sh-0.2.17.dist-info/RECORD +194 -0
{shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/entry_points.txt +1 -0
{shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/licenses/LICENSE +1 -1
shotgun/tui/screens/chat.py +0 -996
shotgun/tui/screens/chat_screen/history.py +0 -335
shotgun_sh-0.2.8.dev2.dist-info/METADATA +0 -126
shotgun_sh-0.2.8.dev2.dist-info/RECORD +0 -155
{shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/WHEEL +0 -0

shotgun/agents/context_analyzer/models.py ADDED Viewed

@@ -0,0 +1,212 @@
+"""Pydantic models for context analysis."""
+from typing import Any
+from pydantic import BaseModel, Field
+class TokenAllocation(BaseModel):
+    """Token counts allocated from API usage data by message/tool type.
+    Used internally by ContextAnalyzer to track token distribution across
+    different message types and tool categories.
+    """
+    user: int = Field(ge=0, default=0, description="Tokens from user prompts")
+    agent_responses: int = Field(
+        ge=0, default=0, description="Tokens from agent text responses"
+    )
+    system_prompts: int = Field(
+        ge=0, default=0, description="Tokens from system prompts"
+    )
+    system_status: int = Field(
+        ge=0, default=0, description="Tokens from system status messages"
+    )
+    codebase_understanding: int = Field(
+        ge=0, default=0, description="Tokens from codebase understanding tools"
+    )
+    artifact_management: int = Field(
+        ge=0, default=0, description="Tokens from artifact management tools"
+    )
+    web_research: int = Field(
+        ge=0, default=0, description="Tokens from web research tools"
+    )
+    unknown: int = Field(ge=0, default=0, description="Tokens from uncategorized tools")
+class MessageTypeStats(BaseModel):
+    """Statistics for a specific message type."""
+    count: int = Field(ge=0, description="Number of messages of this type")
+    tokens: int = Field(ge=0, description="Total tokens consumed by this type")
+    @property
+    def avg_tokens(self) -> float:
+        """Calculate average tokens per message."""
+        return self.tokens / self.count if self.count > 0 else 0.0
+class ContextAnalysis(BaseModel):
+    """Complete analysis of conversation context composition."""
+    user_messages: MessageTypeStats
+    agent_responses: MessageTypeStats
+    system_prompts: MessageTypeStats
+    system_status: MessageTypeStats
+    codebase_understanding: MessageTypeStats
+    artifact_management: MessageTypeStats
+    web_research: MessageTypeStats
+    unknown: MessageTypeStats
+    hint_messages: MessageTypeStats
+    total_tokens: int = Field(ge=0, description="Total tokens including hints")
+    total_messages: int = Field(ge=0, description="Total message count including hints")
+    context_window: int = Field(ge=0, description="Model's maximum input tokens")
+    agent_context_tokens: int = Field(
+        ge=0,
+        description="Tokens that actually consume agent context (excluding UI-only)",
+    )
+    model_name: str = Field(description="Name of the model being used")
+    max_usable_tokens: int = Field(
+        ge=0, description="80% of max_input_tokens (usable limit)"
+    )
+    free_space_tokens: int = Field(
+        description="Remaining tokens available (negative if over capacity)"
+    )
+    def get_percentage(self, stats: MessageTypeStats) -> float:
+        """Calculate percentage of agent context tokens for a message type.
+        Args:
+            stats: Message type statistics to calculate percentage for
+        Returns:
+            Percentage of total agent context tokens (0-100)
+        """
+        return (
+            (stats.tokens / self.agent_context_tokens * 100)
+            if self.agent_context_tokens > 0
+            else 0.0
+        )
+class ContextCompositionTelemetry(BaseModel):
+    """Telemetry data for context composition tracking to PostHog."""
+    # Context usage
+    total_messages: int = Field(ge=0)
+    agent_context_tokens: int = Field(ge=0)
+    context_window: int = Field(ge=0)
+    max_usable_tokens: int = Field(ge=0)
+    free_space_tokens: int = Field(ge=0)
+    usage_percentage: float = Field(ge=0, le=100)
+    # Message type counts
+    user_messages_count: int = Field(ge=0)
+    agent_responses_count: int = Field(ge=0)
+    system_prompts_count: int = Field(ge=0)
+    system_status_count: int = Field(ge=0)
+    codebase_understanding_count: int = Field(ge=0)
+    artifact_management_count: int = Field(ge=0)
+    web_research_count: int = Field(ge=0)
+    unknown_tools_count: int = Field(ge=0)
+    # Token distribution percentages
+    user_messages_pct: float = Field(ge=0, le=100)
+    agent_responses_pct: float = Field(ge=0, le=100)
+    system_prompts_pct: float = Field(ge=0, le=100)
+    system_status_pct: float = Field(ge=0, le=100)
+    codebase_understanding_pct: float = Field(ge=0, le=100)
+    artifact_management_pct: float = Field(ge=0, le=100)
+    web_research_pct: float = Field(ge=0, le=100)
+    unknown_tools_pct: float = Field(ge=0, le=100)
+    # Compaction info
+    compaction_occurred: bool
+    messages_before_compaction: int | None = None
+    messages_after_compaction: int | None = None
+    compaction_reduction_pct: float | None = None
+    @classmethod
+    def from_analysis(
+        cls,
+        analysis: "ContextAnalysis",
+        compaction_occurred: bool = False,
+        messages_before_compaction: int | None = None,
+    ) -> "ContextCompositionTelemetry":
+        """Create telemetry from context analysis.
+        Args:
+            analysis: The context analysis to convert
+            compaction_occurred: Whether message compaction occurred
+            messages_before_compaction: Number of messages before compaction
+        Returns:
+            ContextCompositionTelemetry instance
+        """
+        total_messages = analysis.total_messages - analysis.hint_messages.count
+        usage_pct = (
+            round((analysis.agent_context_tokens / analysis.max_usable_tokens * 100), 1)
+            if analysis.max_usable_tokens > 0
+            else 0
+        )
+        # Calculate compaction metrics
+        messages_after: int | None = None
+        compaction_reduction_pct: float | None = None
+        if compaction_occurred and messages_before_compaction is not None:
+            messages_after = total_messages
+            if messages_before_compaction > 0:
+                compaction_reduction_pct = round(
+                    (1 - (total_messages / messages_before_compaction)) * 100, 1
+                )
+        return cls(
+            # Context usage
+            total_messages=total_messages,
+            agent_context_tokens=analysis.agent_context_tokens,
+            context_window=analysis.context_window,
+            max_usable_tokens=analysis.max_usable_tokens,
+            free_space_tokens=analysis.free_space_tokens,
+            usage_percentage=usage_pct,
+            # Message type counts
+            user_messages_count=analysis.user_messages.count,
+            agent_responses_count=analysis.agent_responses.count,
+            system_prompts_count=analysis.system_prompts.count,
+            system_status_count=analysis.system_status.count,
+            codebase_understanding_count=analysis.codebase_understanding.count,
+            artifact_management_count=analysis.artifact_management.count,
+            web_research_count=analysis.web_research.count,
+            unknown_tools_count=analysis.unknown.count,
+            # Token distribution percentages
+            user_messages_pct=round(analysis.get_percentage(analysis.user_messages), 1),
+            agent_responses_pct=round(
+                analysis.get_percentage(analysis.agent_responses), 1
+            ),
+            system_prompts_pct=round(
+                analysis.get_percentage(analysis.system_prompts), 1
+            ),
+            system_status_pct=round(analysis.get_percentage(analysis.system_status), 1),
+            codebase_understanding_pct=round(
+                analysis.get_percentage(analysis.codebase_understanding), 1
+            ),
+            artifact_management_pct=round(
+                analysis.get_percentage(analysis.artifact_management), 1
+            ),
+            web_research_pct=round(analysis.get_percentage(analysis.web_research), 1),
+            unknown_tools_pct=round(analysis.get_percentage(analysis.unknown), 1),
+            # Compaction info
+            compaction_occurred=compaction_occurred,
+            messages_before_compaction=messages_before_compaction,
+            messages_after_compaction=messages_after,
+            compaction_reduction_pct=compaction_reduction_pct,
+        )
+class ContextAnalysisOutput(BaseModel):
+    """Output format for context analysis with multiple representations."""
+    markdown: str = Field(description="Markdown-formatted analysis for display")
+    json_data: dict[str, Any] = Field(
+        description="JSON representation of analysis data"
+    )

shotgun/agents/conversation_history.py CHANGED Viewed

@@ -16,6 +16,8 @@ from pydantic_core import to_jsonable_python
 from shotgun.tui.screens.chat_screen.hint_message import HintMessage
+__all__ = ["HintMessage", "ConversationHistory"]
 logger = logging.getLogger(__name__)
 SerializedMessage = dict[str, Any]

shotgun/agents/conversation_manager.py CHANGED Viewed

@@ -1,11 +1,15 @@
 """Manager for handling conversation persistence operations."""
+import asyncio
 import json
-import shutil
 from pathlib import Path
+import aiofiles
+import aiofiles.os
 from shotgun.logging_config import get_logger
 from shotgun.utils import get_shotgun_home
+from shotgun.utils.file_system_utils import async_copy_file
 from .conversation_history import ConversationHistory
@@ -27,14 +31,14 @@ class ConversationManager:
         else:
             self.conversation_path = conversation_path
-    def save(self, conversation: ConversationHistory) -> None:
+    async def save(self, conversation: ConversationHistory) -> None:
         """Save conversation history to file.
         Args:
             conversation: ConversationHistory to save
         """
         # Ensure directory exists
-        self.conversation_path.parent.mkdir(parents=True, exist_ok=True)
+        await aiofiles.os.makedirs(self.conversation_path.parent, exist_ok=True)
         try:
             # Update timestamp
@@ -42,11 +46,17 @@ class ConversationManager:
             conversation.updated_at = datetime.now()
-            # Serialize to JSON using Pydantic's model_dump
-            data = conversation.model_dump(mode="json")
+            # Serialize to JSON in background thread to avoid blocking event loop
+            # This is crucial for large conversations (5k+ tokens)
+            data = await asyncio.to_thread(conversation.model_dump, mode="json")
+            json_content = await asyncio.to_thread(
+                json.dumps, data, indent=2, ensure_ascii=False
+            )
-            with open(self.conversation_path, "w", encoding="utf-8") as f:
-                json.dump(data, f, indent=2, ensure_ascii=False)
+            async with aiofiles.open(
+                self.conversation_path, "w", encoding="utf-8"
+            ) as f:
+                await f.write(json_content)
             logger.debug("Conversation saved to %s", self.conversation_path)
@@ -56,21 +66,26 @@ class ConversationManager:
             )
             # Don't raise - we don't want to interrupt the user's session
-    def load(self) -> ConversationHistory | None:
+    async def load(self) -> ConversationHistory | None:
         """Load conversation history from file.
         Returns:
             ConversationHistory if file exists and is valid, None otherwise
         """
-        if not self.conversation_path.exists():
+        if not await aiofiles.os.path.exists(self.conversation_path):
             logger.debug("No conversation history found at %s", self.conversation_path)
             return None
         try:
-            with open(self.conversation_path, encoding="utf-8") as f:
-                data = json.load(f)
-            conversation = ConversationHistory.model_validate(data)
+            async with aiofiles.open(self.conversation_path, encoding="utf-8") as f:
+                content = await f.read()
+                # Deserialize JSON in background thread to avoid blocking
+                data = await asyncio.to_thread(json.loads, content)
+            # Validate model in background thread for large conversations
+            conversation = await asyncio.to_thread(
+                ConversationHistory.model_validate, data
+            )
             logger.debug(
                 "Conversation loaded from %s with %d agent messages",
                 self.conversation_path,
@@ -89,7 +104,7 @@ class ConversationManager:
             # Create a backup of the corrupted file for debugging
             backup_path = self.conversation_path.with_suffix(".json.backup")
             try:
-                shutil.copy2(self.conversation_path, backup_path)
+                await async_copy_file(self.conversation_path, backup_path)
                 logger.info("Backed up corrupted conversation to %s", backup_path)
             except Exception as backup_error:  # pragma: no cover
                 logger.warning("Failed to backup corrupted file: %s", backup_error)
@@ -105,11 +120,12 @@ class ConversationManager:
             )
             return None
-    def clear(self) -> None:
+    async def clear(self) -> None:
         """Delete the conversation history file."""
-        if self.conversation_path.exists():
+        if await aiofiles.os.path.exists(self.conversation_path):
             try:
-                self.conversation_path.unlink()
+                # Use asyncio.to_thread for unlink operation
+                await asyncio.to_thread(self.conversation_path.unlink)
                 logger.debug(
                     "Conversation history cleared at %s", self.conversation_path
                 )
@@ -118,10 +134,10 @@ class ConversationManager:
                     "Failed to clear conversation at %s: %s", self.conversation_path, e
                 )
-    def exists(self) -> bool:
+    async def exists(self) -> bool:
         """Check if a conversation history file exists.
         Returns:
             True if conversation file exists, False otherwise
         """
-        return self.conversation_path.exists()
+        return await aiofiles.os.path.exists(str(self.conversation_path))

shotgun/agents/export.py CHANGED Viewed

@@ -23,7 +23,7 @@ from .models import AgentDeps, AgentResponse, AgentRuntimeOptions, AgentType
 logger = get_logger(__name__)
-def create_export_agent(
+async def create_export_agent(
     agent_runtime_options: AgentRuntimeOptions, provider: ProviderType | None = None
 ) -> tuple[Agent[AgentDeps, AgentResponse], AgentDeps]:
     """Create an export agent with file management capabilities.
@@ -39,7 +39,7 @@ def create_export_agent(
     # Use partial to create system prompt function for export agent
     system_prompt_fn = partial(build_agent_system_prompt, "export")
-    agent, deps = create_base_agent(
+    agent, deps = await create_base_agent(
         system_prompt_fn,
         agent_runtime_options,
         provider=provider,

shotgun/agents/history/compaction.py CHANGED Viewed

@@ -13,7 +13,7 @@ logger = get_logger(__name__)
 async def apply_persistent_compaction(
-    messages: list[ModelMessage], deps: AgentDeps
+    messages: list[ModelMessage], deps: AgentDeps, force: bool = False
 ) -> list[ModelMessage]:
     """Apply compaction to message history for persistent storage.
@@ -23,6 +23,7 @@ async def apply_persistent_compaction(
     Args:
         messages: Full message history from agent run
         deps: Agent dependencies containing model config
+        force: If True, force compaction even if below token threshold
     Returns:
         Compacted message history that should be stored as conversation state
@@ -46,7 +47,7 @@ async def apply_persistent_compaction(
                 self.usage = usage
         ctx = MockContext(deps, usage)
-        compacted_messages = await token_limit_compactor(ctx, messages)
+        compacted_messages = await token_limit_compactor(ctx, messages, force=force)
         # Log the result for monitoring
         original_size = len(messages)
@@ -59,17 +60,21 @@ async def apply_persistent_compaction(
                 f"({reduction_pct:.1f}% reduction)"
             )
-            # Track persistent compaction event
+            # Track persistent compaction event with simple metrics (fast, no token counting)
             track_event(
                 "persistent_compaction_applied",
                 {
+                    # Basic compaction metrics
                     "messages_before": original_size,
                     "messages_after": compacted_size,
-                    "tokens_before": estimated_tokens,
                     "reduction_percentage": round(reduction_pct, 2),
                     "agent_mode": deps.agent_mode.value
                     if hasattr(deps, "agent_mode") and deps.agent_mode
                     else "unknown",
+                    # Model and provider info (no computation needed)
+                    "model_name": deps.llm_model.name.value,
+                    "provider": deps.llm_model.provider.value,
+                    "key_provider": deps.llm_model.key_provider.value,
                 },
             )
         else:

shotgun/agents/history/history_processors.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """History processors for managing conversation history in Shotgun agents."""
+from collections.abc import Awaitable, Callable
 from typing import TYPE_CHECKING, Any, Protocol
+from anthropic import APIStatusError
 from pydantic_ai import ModelSettings
 from pydantic_ai.messages import (
     ModelMessage,
@@ -14,6 +16,7 @@ from pydantic_ai.messages import (
 from shotgun.agents.llm import shotgun_model_request
 from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
 from shotgun.agents.models import AgentDeps
+from shotgun.exceptions import ContextSizeLimitExceeded
 from shotgun.logging_config import get_logger
 from shotgun.posthog_telemetry import track_event
 from shotgun.prompts import PromptLoader
@@ -51,6 +54,86 @@ logger = get_logger(__name__)
 prompt_loader = PromptLoader()
+async def _safe_token_estimation(
+    estimation_func: Callable[..., Awaitable[int]],
+    model_name: str,
+    max_tokens: int,
+    *args: Any,
+    **kwargs: Any,
+) -> int:
+    """Safely estimate tokens with proper error handling.
+    Wraps token estimation functions to handle failures gracefully.
+    Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
+    Other errors (network, auth) are allowed to bubble up.
+    Args:
+        estimation_func: Async function that estimates tokens
+        model_name: Name of the model for error messages
+        max_tokens: Maximum tokens for the model
+        *args: Arguments to pass to estimation_func
+        **kwargs: Keyword arguments to pass to estimation_func
+    Returns:
+        Token count from estimation_func
+    Raises:
+        ContextSizeLimitExceeded: If token counting fails with RuntimeError
+        Exception: Any other exceptions from estimation_func
+    """
+    try:
+        return await estimation_func(*args, **kwargs)
+    except Exception as e:
+        # Log the error with full context
+        logger.warning(
+            f"Token counting failed for {model_name}",
+            extra={
+                "error_type": type(e).__name__,
+                "error_message": str(e),
+                "model": model_name,
+            },
+        )
+        # Token counting behavior with oversized context (verified via testing):
+        #
+        # 1. OpenAI/tiktoken:
+        #    - Successfully counts any size (tested with 752K tokens, no error)
+        #    - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
+        #    - Wrapped as: RuntimeError by our counter
+        #
+        # 2. Gemini/SentencePiece:
+        #    - Successfully counts any size (tested with 752K tokens, no error)
+        #    - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
+        #    - Wrapped as: RuntimeError by our counter
+        #
+        # 3. Anthropic API:
+        #    - Successfully counts large token counts (tested with 752K tokens, no error)
+        #    - Only enforces 32 MB request size limit (not token count)
+        #    - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
+        #    - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
+        #    - Wrapped as: RuntimeError by our counter
+        #
+        # IMPORTANT: No provider raises errors for "too many tokens" during counting.
+        # Token count validation happens separately by comparing count to max_input_tokens.
+        #
+        # We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
+        # We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
+        # context is effectively too large and needs user action to reduce it.
+        if isinstance(e, RuntimeError):
+            raise ContextSizeLimitExceeded(
+                model_name=model_name, max_tokens=max_tokens
+            ) from e
+        # Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
+        if isinstance(e, APIStatusError) and e.status_code == 413:
+            raise ContextSizeLimitExceeded(
+                model_name=model_name, max_tokens=max_tokens
+            ) from e
+        # Re-raise other exceptions (network errors, auth failures, etc.)
+        raise
 def is_summary_part(part: Any) -> bool:
     """Check if a message part is a compacted summary."""
     return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
@@ -127,6 +210,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
 async def token_limit_compactor(
     ctx: ContextProtocol,
     messages: list[ModelMessage],
+    force: bool = False,
 ) -> list[ModelMessage]:
     """Compact message history based on token limits with incremental processing.
@@ -139,6 +223,7 @@ async def token_limit_compactor(
     Args:
         ctx: Run context with usage information and dependencies
         messages: Current conversation history
+        force: If True, force compaction even if below token threshold
     Returns:
         Compacted list of messages within token limits
@@ -155,9 +240,15 @@ async def token_limit_compactor(
     if last_summary_index is not None:
         # Check if post-summary conversation exceeds threshold for incremental compaction
-        post_summary_tokens = await estimate_post_summary_tokens(
-            messages, last_summary_index, deps.llm_model
+        post_summary_tokens = await _safe_token_estimation(
+            estimate_post_summary_tokens,
+            deps.llm_model.name,
+            model_max_tokens,
+            messages,
+            last_summary_index,
+            deps.llm_model,
         )
         post_summary_percentage = (
             (post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
         )
@@ -169,7 +260,7 @@ async def token_limit_compactor(
         )
         # Only do incremental compaction if post-summary conversation exceeds threshold
-        if post_summary_tokens < max_tokens:
+        if post_summary_tokens < max_tokens and not force:
             logger.debug(
                 f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
                 f"keeping all {len(messages)} messages"
@@ -340,6 +431,7 @@ async def token_limit_compactor(
             else 0
         )
+        # Track incremental compaction with simple metrics (fast, no token counting)
         track_event(
             "context_compaction_triggered",
             {
@@ -352,6 +444,10 @@ async def token_limit_compactor(
                 "agent_mode": deps.agent_mode.value
                 if hasattr(deps, "agent_mode") and deps.agent_mode
                 else "unknown",
+                # Model and provider info (no computation needed)
+                "model_name": deps.llm_model.name.value,
+                "provider": deps.llm_model.provider.value,
+                "key_provider": deps.llm_model.key_provider.value,
             },
         )
@@ -359,7 +455,14 @@ async def token_limit_compactor(
     else:
         # Check if total conversation exceeds threshold for full compaction
-        total_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
+        total_tokens = await _safe_token_estimation(
+            estimate_tokens_from_messages,
+            deps.llm_model.name,
+            model_max_tokens,
+            messages,
+            deps.llm_model,
+        )
         total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
         logger.debug(
@@ -368,7 +471,7 @@ async def token_limit_compactor(
         )
         # Only do full compaction if total conversation exceeds threshold
-        if total_tokens < max_tokens:
+        if total_tokens < max_tokens and not force:
             logger.debug(
                 f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
                 f"keeping all {len(messages)} messages"
@@ -468,6 +571,7 @@ async def _full_compaction(
     tokens_before = current_tokens  # Already calculated above
     tokens_after = summary_usage.output_tokens if summary_usage else 0
+    # Track full compaction with simple metrics (fast, no token counting)
     track_event(
         "context_compaction_triggered",
         {
@@ -480,6 +584,10 @@ async def _full_compaction(
             "agent_mode": deps.agent_mode.value
             if hasattr(deps, "agent_mode") and deps.agent_mode
             else "unknown",
+            # Model and provider info (no computation needed)
+            "model_name": deps.llm_model.name.value,
+            "provider": deps.llm_model.provider.value,
+            "key_provider": deps.llm_model.key_provider.value,
         },
     )

shotgun/agents/history/token_counting/anthropic.py CHANGED Viewed

@@ -72,11 +72,23 @@ class AnthropicTokenCounter(TokenCounter):
         Raises:
             RuntimeError: If API call fails
         """
+        # Handle empty text to avoid unnecessary API calls
+        # Anthropic API requires non-empty content, so we need a strict check
+        if not text or not text.strip():
+            return 0
+        # Additional validation: ensure the text has actual content
+        # Some edge cases might have only whitespace or control characters
+        cleaned_text = text.strip()
+        if not cleaned_text:
+            return 0
         try:
             # Anthropic API expects messages format and model parameter
             # Use await with async client
             result = await self.client.messages.count_tokens(
-                messages=[{"role": "user", "content": text}], model=self.model_name
+                messages=[{"role": "user", "content": cleaned_text}],
+                model=self.model_name,
             )
             return result.input_tokens
         except Exception as e:
@@ -107,5 +119,9 @@ class AnthropicTokenCounter(TokenCounter):
         Raises:
             RuntimeError: If token counting fails
         """
+        # Handle empty message list early
+        if not messages:
+            return 0
         total_text = extract_text_from_messages(messages)
         return await self.count_tokens(total_text)

shotgun/agents/history/token_counting/base.py CHANGED Viewed

@@ -56,12 +56,23 @@ def extract_text_from_messages(messages: list[ModelMessage]) -> str:
         if hasattr(message, "parts"):
             for part in message.parts:
                 if hasattr(part, "content") and isinstance(part.content, str):
-                    text_parts.append(part.content)
+                    # Only add non-empty content
+                    if part.content.strip():
+                        text_parts.append(part.content)
                 else:
                     # Handle non-text parts (tool calls, etc.)
-                    text_parts.append(str(part))
+                    part_str = str(part)
+                    if part_str.strip():
+                        text_parts.append(part_str)
         else:
             # Handle messages without parts
-            text_parts.append(str(message))
+            msg_str = str(message)
+            if msg_str.strip():
+                text_parts.append(msg_str)
+    # If no valid text parts found, return a minimal placeholder
+    # This ensures we never send completely empty content to APIs
+    if not text_parts:
+        return "."
     return "\n".join(text_parts)

shotgun-sh 0.2.8.dev2__py3-none-any.whl → 0.2.17__py3-none-any.whl

shotgun-sh 0.2.8.dev2py3-none-any.whl → 0.2.17py3-none-any.whl