PyPI - shotgun-sh - Versions diffs - 0.1.0.dev13__py3-none-any.whl → 0.1.0.dev15__py3-none-any.whl - Mend

shotgun-sh 0.1.0.dev13py3-none-any.whl → 0.1.0.dev15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of shotgun-sh might be problematic. Click here for more details.

Files changed (40) hide show

shotgun/agents/agent_manager.py +30 -6
shotgun/agents/artifact_state.py +58 -0
shotgun/agents/common.py +48 -14
shotgun/agents/config/models.py +61 -0
shotgun/agents/history/compaction.py +85 -0
shotgun/agents/history/constants.py +19 -0
shotgun/agents/history/context_extraction.py +108 -0
shotgun/agents/history/history_building.py +104 -0
shotgun/agents/history/history_processors.py +354 -157
shotgun/agents/history/message_utils.py +46 -0
shotgun/agents/history/token_counting.py +429 -0
shotgun/agents/history/token_estimation.py +138 -0
shotgun/agents/models.py +145 -1
shotgun/agents/tools/artifact_management.py +56 -24
shotgun/agents/tools/file_management.py +30 -11
shotgun/agents/tools/web_search/anthropic.py +78 -17
shotgun/agents/tools/web_search/gemini.py +1 -1
shotgun/agents/tools/web_search/openai.py +16 -2
shotgun/artifacts/manager.py +2 -1
shotgun/artifacts/models.py +6 -4
shotgun/codebase/core/nl_query.py +4 -4
shotgun/prompts/agents/partials/artifact_system.j2 +4 -1
shotgun/prompts/agents/partials/codebase_understanding.j2 +1 -2
shotgun/prompts/agents/plan.j2 +9 -7
shotgun/prompts/agents/research.j2 +7 -5
shotgun/prompts/agents/specify.j2 +8 -7
shotgun/prompts/agents/state/artifact_templates_available.j2 +18 -0
shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +3 -1
shotgun/prompts/agents/state/existing_artifacts_available.j2 +23 -0
shotgun/prompts/agents/state/system_state.j2 +9 -1
shotgun/prompts/history/incremental_summarization.j2 +53 -0
shotgun/sdk/services.py +14 -0
shotgun/tui/app.py +1 -1
shotgun/tui/screens/chat.py +42 -3
shotgun/utils/file_system_utils.py +6 -1
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev15.dist-info}/METADATA +2 -1
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev15.dist-info}/RECORD +40 -29
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev15.dist-info}/WHEEL +0 -0
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev15.dist-info}/entry_points.txt +0 -0
{shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev15.dist-info}/licenses/LICENSE +0 -0

shotgun/agents/history/history_processors.py CHANGED Viewed

@@ -1,213 +1,410 @@
 """History processors for managing conversation history in Shotgun agents."""
-from pydantic_ai import RunContext
-from pydantic_ai.direct import model_request
+from typing import TYPE_CHECKING, Any, Protocol
 from pydantic_ai.messages import (
-    BuiltinToolCallPart,
-    BuiltinToolReturnPart,
     ModelMessage,
     ModelRequest,
     ModelResponse,
-    ModelResponsePart,
-    RetryPromptPart,
     SystemPromptPart,
     TextPart,
-    ThinkingPart,
-    ToolCallPart,
-    ToolReturnPart,
     UserPromptPart,
 )
+from shotgun.agents.config.models import shotgun_model_request
 from shotgun.agents.models import AgentDeps
 from shotgun.logging_config import get_logger
 from shotgun.prompts import PromptLoader
+from .constants import SUMMARY_MARKER, TOKEN_LIMIT_RATIO
+from .context_extraction import extract_context_from_messages
+from .history_building import ensure_ends_with_model_request
+from .message_utils import (
+    get_first_user_request,
+    get_system_prompt,
+)
+from .token_estimation import (
+    calculate_max_summarization_tokens as _calculate_max_summarization_tokens,
+)
+from .token_estimation import (
+    estimate_post_summary_tokens,
+    estimate_tokens_from_messages,
+)
+if TYPE_CHECKING:
+    pass
+class ContextProtocol(Protocol):
+    """Protocol defining the interface needed by token_limit_compactor."""
+    deps: AgentDeps
+    usage: Any  # Optional usage information
 logger = get_logger(__name__)
 # Global prompt loader instance
 prompt_loader = PromptLoader()
+def is_summary_part(part: Any) -> bool:
+    """Check if a message part is a compacted summary."""
+    return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
+def find_last_summary_index(messages: list[ModelMessage]) -> int | None:
+    """Find the index of the last summary in the message history.
+    Args:
+        messages: List of messages in the conversation history
+    Returns:
+        Index of the last summary message, or None if no summary exists.
+    """
+    for i in range(len(messages) - 1, -1, -1):
+        if isinstance(messages[i], ModelResponse):
+            for part in messages[i].parts:
+                if is_summary_part(part):
+                    return i
+    return None
+def extract_summary_content(summary_part: Any) -> str:
+    """Extract the summary content without the marker prefix."""
+    if isinstance(summary_part, TextPart):
+        return summary_part.content[len(SUMMARY_MARKER) :].strip()
+    return ""
+def create_marked_summary_part(summary_response: Any) -> TextPart:
+    """Create a TextPart with the summary marker prefix.
+    This consolidates the duplicate summary creation logic.
+    """
+    first_part = summary_response.parts[0]
+    if isinstance(first_part, TextPart):
+        summary_content = f"{SUMMARY_MARKER} {first_part.content}"
+        return TextPart(content=summary_content)
+    else:
+        # Fallback in case the response part is not TextPart
+        summary_content = f"{SUMMARY_MARKER} Summary content unavailable"
+        return TextPart(content=summary_content)
+def log_summarization_request(
+    model: Any, max_tokens: int, prompt: str, context: str, request_type: str
+) -> None:
+    """Log detailed summarization request information.
+    Consolidates duplicate logging patterns across the codebase.
+    """
+    logger.debug(f"{request_type} SUMMARIZATION REQUEST - Model: {model}")
+    logger.debug(f"{request_type} SUMMARIZATION REQUEST - Max tokens: {max_tokens}")
+    logger.debug(f"{request_type} SUMMARIZATION REQUEST - Instructions: {prompt}")
+    logger.debug(f"{request_type} SUMMARIZATION REQUEST - Context: {context}")
+def log_summarization_response(response: Any, request_type: str) -> None:
+    """Log detailed summarization response information.
+    Consolidates duplicate logging patterns across the codebase.
+    """
+    logger.debug(f"{request_type} SUMMARIZATION RESPONSE - Full response: {response}")
+    logger.debug(
+        f"{request_type} SUMMARIZATION RESPONSE - Content: "
+        f"{response.parts[0] if response.parts else 'No content'}"
+    )
+    logger.debug(f"{request_type} SUMMARIZATION RESPONSE - Usage: {response.usage}")
+# Use centralized calculate_max_summarization_tokens function
+calculate_max_summarization_tokens = _calculate_max_summarization_tokens
 async def token_limit_compactor(
-    ctx: RunContext[AgentDeps],
+    ctx: ContextProtocol,
     messages: list[ModelMessage],
 ) -> list[ModelMessage]:
-    """Compact message history based on token limits.
+    """Compact message history based on token limits with incremental processing.
-    This context-aware processor monitors token usage and removes older messages
-    when the conversation history becomes too large. It preserves system messages
-    and recent context while removing older user/assistant exchanges.
+    This incremental compactor prevents cascading summarization by:
+    1. Preserving existing summaries
+    2. Only processing NEW messages since the last summary
+    3. Combining summaries incrementally
+    4. Never re-processing already compacted content
     Args:
         ctx: Run context with usage information and dependencies
-        messages: List of messages in the conversation history
+        messages: Current conversation history
     Returns:
         Compacted list of messages within token limits
     """
-    # Get current token usage from context
-    current_tokens = ctx.usage.total_tokens if ctx.usage else 0
-    # Get token limit from model configuration or use fallback
-    model_max_tokens = ctx.deps.llm_model.max_input_tokens
-    max_tokens = int(
-        model_max_tokens * 0.8
-    )  # Use 80% of max to leave room for response
-    percentage_of_limit_used = (
-        (current_tokens / max_tokens) * 100 if max_tokens > 0 else 0
-    )
-    logger.debug(
-        "History compactor: current tokens=%d, limit=%d, percentage used=%.2f%%",
-        current_tokens,
-        max_tokens,
-        percentage_of_limit_used,
-    )
+    # Extract dependencies from context
+    deps = ctx.deps
+    # Get token limit from model configuration
+    model_max_tokens = deps.llm_model.max_input_tokens
+    max_tokens = int(model_max_tokens * TOKEN_LIMIT_RATIO)
+    # Find existing summaries to determine compaction strategy
+    last_summary_index = find_last_summary_index(messages)
+    if last_summary_index is not None:
+        # Check if post-summary conversation exceeds threshold for incremental compaction
+        post_summary_tokens = estimate_post_summary_tokens(
+            messages, last_summary_index, deps.llm_model
+        )
+        post_summary_percentage = (
+            (post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
+        )
+        logger.debug(
+            f"Found existing summary at index {last_summary_index}. "
+            f"Post-summary tokens: {post_summary_tokens}, threshold: {max_tokens}, "
+            f"percentage: {post_summary_percentage:.2f}%%"
+        )
+        # Only do incremental compaction if post-summary conversation exceeds threshold
+        if post_summary_tokens < max_tokens:
+            logger.debug(
+                f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
+                f"keeping all {len(messages)} messages"
+            )
+            return messages
+        # INCREMENTAL COMPACTION: Process new messages since last summary
+        logger.debug(
+            "Post-summary conversation exceeds threshold, performing incremental compaction"
+        )
+        # Extract existing summary content
+        summary_message = messages[last_summary_index]
+        existing_summary_part = None
+        for part in summary_message.parts:
+            if is_summary_part(part):
+                existing_summary_part = part
+                break
+        if not existing_summary_part:
+            logger.warning(
+                "Found summary index but no summary part, falling back to full compaction"
+            )
+            return await _full_compaction(deps, messages)
+        existing_summary = extract_summary_content(existing_summary_part)
+        # Get messages AFTER the last summary for incremental processing
+        messages_to_process = messages[last_summary_index + 1 :]
+        if not messages_to_process:
+            logger.debug(
+                "No new messages since last summary, returning existing history"
+            )
+            return messages
+        # Extract context from new messages only
+        new_context = extract_context_from_messages(messages_to_process)
+        # Check if there's meaningful content (responses) to summarize
+        has_meaningful_content = any(
+            isinstance(msg, ModelResponse) for msg in messages_to_process
+        )
+        # If there are only user requests and no responses, no need to summarize
+        if not has_meaningful_content or not new_context.strip():
+            logger.debug(
+                "No meaningful new content to summarize, returning existing history"
+            )
+            return messages
+        # Use incremental summarization prompt with proper template variables
+        try:
+            incremental_prompt = prompt_loader.render(
+                "history/incremental_summarization.j2",
+                existing_summary=existing_summary,
+                new_messages=new_context,
+            )
+        except Exception:
+            # Fallback to regular summarization if incremental template doesn't exist yet
+            logger.warning(
+                "Incremental summarization template not found, using regular template"
+            )
+            incremental_prompt = prompt_loader.render("history/summarization.j2")
+            # Combine existing and new context for fallback
+            new_context = (
+                f"EXISTING SUMMARY:\n{existing_summary}\n\nNEW MESSAGES:\n{new_context}"
+            )
+        # Create incremental summary
+        request_messages: list[ModelMessage] = [
+            ModelRequest.user_text_prompt(new_context, instructions=incremental_prompt)
+        ]
+        # Calculate optimal max_tokens for summarization
+        max_tokens = calculate_max_summarization_tokens(
+            deps.llm_model, request_messages
+        )
+        # Debug logging using shared utilities
+        log_summarization_request(
+            deps.llm_model, max_tokens, incremental_prompt, new_context, "INCREMENTAL"
+        )
+        # Use shotgun wrapper to ensure full token utilization
+        summary_response = await shotgun_model_request(
+            model_config=deps.llm_model,
+            messages=request_messages,
+            max_tokens=max_tokens,  # Use calculated optimal tokens for summarization
+        )
+        log_summarization_response(summary_response, "INCREMENTAL")
+        # Calculate token reduction (from new messages only)
+        new_tokens = len(new_context.split())  # Rough estimate
+        summary_tokens = (
+            summary_response.usage.output_tokens if summary_response.usage else 0
+        )
+        logger.debug(
+            f"Incremental compaction: processed {len(messages_to_process)} new messages, "
+            f"reduced ~{new_tokens} tokens to {summary_tokens} tokens"
+        )
-    # If we're under the limit, return all messages
-    if current_tokens < max_tokens:
-        logger.debug("Under token limit, keeping all %d messages", len(messages))
-        return messages
+        # Build the new compacted history with the updated summary
+        new_summary_part = create_marked_summary_part(summary_response)
-    # Get current token usage from context
-    current_tokens = ctx.usage.total_tokens if ctx.usage else 0
+        # Extract essential context from messages before the last summary (if any)
+        system_prompt = ""
+        first_user_prompt = ""
+        if last_summary_index > 0:
+            # Get system and first user from original conversation
+            system_prompt = get_system_prompt(messages[:last_summary_index]) or ""
+            first_user_prompt = (
+                get_first_user_request(messages[:last_summary_index]) or ""
+            )
-    context = ""
+        # Create the updated summary message
+        updated_summary_message = ModelResponse(parts=[new_summary_part])
-    # Separate system messages from conversation messages
-    for msg in messages:
-        if isinstance(msg, ModelResponse) or isinstance(msg, ModelRequest):
-            for part in msg.parts:
-                message_content = get_context_from_message(part)
-                if not message_content:
-                    continue
-                context += get_context_from_message(part) + "\n"
-        else:
-            # Handle whatever this is
-            pass
+        # Build final compacted history with CLEAN structure
+        compacted_messages: list[ModelMessage] = []
+        # Only add system/user context if it exists and is meaningful
+        if system_prompt or first_user_prompt:
+            compacted_messages.append(
+                ModelRequest(
+                    parts=[
+                        SystemPromptPart(content=system_prompt),
+                        UserPromptPart(content=first_user_prompt),
+                    ]
+                )
+            )
+        # Add the summary
+        compacted_messages.append(updated_summary_message)
+        # Ensure history ends with ModelRequest for PydanticAI compatibility
+        compacted_messages = ensure_ends_with_model_request(
+            compacted_messages, messages
+        )
+        logger.debug(
+            f"Incremental compaction complete: {len(messages)} -> {len(compacted_messages)} messages"
+        )
+        return compacted_messages
+    else:
+        # Check if total conversation exceeds threshold for full compaction
+        total_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
+        total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
+        logger.debug(
+            f"No existing summary found. Total tokens: {total_tokens}, threshold: {max_tokens}, "
+            f"percentage: {total_percentage:.2f}%%"
+        )
+        # Only do full compaction if total conversation exceeds threshold
+        if total_tokens < max_tokens:
+            logger.debug(
+                f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
+                f"keeping all {len(messages)} messages"
+            )
+            return messages
+        # FIRST-TIME COMPACTION: Process all messages
+        logger.debug(
+            "Total conversation exceeds threshold, performing initial full compaction"
+        )
+        return await _full_compaction(deps, messages)
+async def _full_compaction(
+    deps: AgentDeps,
+    messages: list[ModelMessage],
+) -> list[ModelMessage]:
+    """Perform full compaction for first-time summarization."""
+    # Extract context from all messages
+    context = extract_context_from_messages(messages)
+    # Use regular summarization prompt
     summarization_prompt = prompt_loader.render("history/summarization.j2")
-    summary_response = await model_request(
-        model=ctx.model,
-        messages=[
-            ModelRequest.user_text_prompt(context, instructions=summarization_prompt)
-        ],
+    request_messages: list[ModelMessage] = [
+        ModelRequest.user_text_prompt(context, instructions=summarization_prompt)
+    ]
+    # Calculate optimal max_tokens for summarization
+    max_tokens = calculate_max_summarization_tokens(deps.llm_model, request_messages)
+    # Debug logging using shared utilities
+    log_summarization_request(
+        deps.llm_model, max_tokens, summarization_prompt, context, "FULL"
+    )
+    # Use shotgun wrapper to ensure full token utilization
+    summary_response = await shotgun_model_request(
+        model_config=deps.llm_model,
+        messages=request_messages,
+        max_tokens=max_tokens,  # Use calculated optimal tokens for summarization
     )
-    # Usage before and after
+    # Calculate token reduction
+    current_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
     summary_usage = summary_response.usage
     reduction_percentage = (
-        (current_tokens - summary_usage.output_tokens) / current_tokens
-    ) * 100
+        ((current_tokens - summary_usage.output_tokens) / current_tokens) * 100
+        if current_tokens > 0 and summary_usage
+        else 0
+    )
+    log_summarization_response(summary_response, "FULL")
+    # Log token reduction (already calculated above)
     logger.debug(
-        "Compacted %s tokens into %s tokens for a %.2f percent reduction",
+        "Full compaction: %s tokens -> %s tokens (%.2f%% reduction)",
         current_tokens,
-        summary_usage.output_tokens,
+        summary_usage.output_tokens if summary_usage else 0,
         reduction_percentage,
     )
-    system_prompt = get_system_promt(messages) or ""
+    # Mark summary with special prefix
+    marked_summary_part = create_marked_summary_part(summary_response)
+    # Build compacted history structure
+    system_prompt = get_system_prompt(messages) or ""
     user_prompt = get_first_user_request(messages) or ""
-    # Extract content from the first response part safely
-    summarization_part = summary_response.parts[0]
-    return [
+    # Create base structure
+    compacted_messages: list[ModelMessage] = [
         ModelRequest(
             parts=[
                 SystemPromptPart(content=system_prompt),
                 UserPromptPart(content=user_prompt),
             ]
         ),
-        ModelResponse(
-            parts=[
-                summarization_part,
-            ]
-        ),
+        ModelResponse(parts=[marked_summary_part]),
     ]
+    # Ensure history ends with ModelRequest for PydanticAI compatibility
+    compacted_messages = ensure_ends_with_model_request(compacted_messages, messages)
-def get_first_user_request(messages: list[ModelMessage]) -> str | None:
-    """Extract first user request from messages.
-    Args:
-        messages: List of messages in the conversation history
-    Returns:
-        The first user request as a string.
-    """
-    for msg in messages:
-        if isinstance(msg, ModelRequest):
-            for part in msg.parts:
-                if isinstance(part, UserPromptPart):
-                    if isinstance(part.content, str):
-                        return part.content
-    return None
-def get_system_promt(messages: list[ModelMessage]) -> str | None:
-    """Extract system prompt from messages.
-    Args:
-        messages: List of messages in the conversation history
-    Returns:
-        The system prompt as a string.
-    """
-    for msg in messages:
-        if isinstance(msg, ModelRequest):
-            for part in msg.parts:
-                if isinstance(part, SystemPromptPart):
-                    return part.content
-    return None
-def get_context_from_message(
-    message_part: SystemPromptPart
-    | UserPromptPart
-    | ToolReturnPart
-    | RetryPromptPart
-    | ModelResponsePart,
-) -> str:
-    """Extract context from a message part.
-    Args:
-        message: The message part to extract context from.
-    Returns:
-        The extracted context as a string.
-    """
-    if isinstance(message_part, SystemPromptPart):
-        return ""  # We do not include system prompts in the summary
-    elif isinstance(message_part, UserPromptPart):
-        if isinstance(message_part.content, str):
-            return "<USER_PROMPT>\n" + message_part.content + "\n</USER_PROMPT>"
-        else:
-            return ""
-    elif isinstance(message_part, ToolReturnPart):
-        return "<TOOL_RETURN>\n" + str(message_part.content) + "\n</TOOL_RETURN>"
-    elif isinstance(message_part, RetryPromptPart):
-        if isinstance(message_part.content, str):
-            return "<RETRY_PROMPT>\n" + message_part.content + "\n</RETRY_PROMPT>"
-        return ""
-    # TextPart | ToolCallPart | BuiltinToolCallPart | BuiltinToolReturnPart | ThinkingPart
-    if isinstance(message_part, TextPart):
-        return "<ASSISTANT_TEXT>\n" + message_part.content + "\n</ASSISTANT_TEXT>"
-    elif isinstance(message_part, ToolCallPart):
-        if isinstance(message_part.args, dict):
-            args_str = ", ".join(f"{k}={repr(v)}" for k, v in message_part.args.items())
-            tool_call_str = f"{message_part.tool_name}({args_str})"
-        else:
-            tool_call_str = f"{message_part.tool_name}({message_part.args})"
-        return "<TOOL_CALL>\n" + tool_call_str + "\n</TOOL_CALL>"
-    elif isinstance(message_part, BuiltinToolCallPart):
-        return (
-            "<BUILTIN_TOOL_CALL>\n" + message_part.tool_name + "\n</BUILTIN_TOOL_CALL>"
-        )
-    elif isinstance(message_part, BuiltinToolReturnPart):
-        return (
-            "<BUILTIN_TOOL_RETURN>\n"
-            + message_part.tool_name
-            + "\n</BUILTIN_TOOL_RETURN>"
-        )
-    elif isinstance(message_part, ThinkingPart):
-        return "<THINKING>\n" + message_part.content + "\n</THINKING>"
-    return ""
+    return compacted_messages

shotgun/agents/history/message_utils.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""Utility functions for working with PydanticAI messages."""
+from pydantic_ai.messages import (
+    ModelMessage,
+    ModelRequest,
+    SystemPromptPart,
+    UserPromptPart,
+)
+def get_first_user_request(messages: list[ModelMessage]) -> str | None:
+    """Extract first user request content from messages."""
+    for msg in messages:
+        if isinstance(msg, ModelRequest):
+            for part in msg.parts:
+                if isinstance(part, UserPromptPart) and isinstance(part.content, str):
+                    return part.content
+    return None
+def get_last_user_request(messages: list[ModelMessage]) -> ModelRequest | None:
+    """Extract the last user request from messages."""
+    for msg in reversed(messages):
+        if isinstance(msg, ModelRequest):
+            for part in msg.parts:
+                if isinstance(part, UserPromptPart):
+                    return msg
+    return None
+def get_user_content_from_request(request: ModelRequest) -> str | None:
+    """Extract user prompt content from a ModelRequest."""
+    for part in request.parts:
+        if isinstance(part, UserPromptPart) and isinstance(part.content, str):
+            return part.content
+    return None
+def get_system_prompt(messages: list[ModelMessage]) -> str | None:
+    """Extract system prompt from messages."""
+    for msg in messages:
+        if isinstance(msg, ModelRequest):
+            for part in msg.parts:
+                if isinstance(part, SystemPromptPart):
+                    return part.content
+    return None

shotgun-sh 0.1.0.dev13__py3-none-any.whl → 0.1.0.dev15__py3-none-any.whl

Potentially problematic release.

shotgun-sh 0.1.0.dev13py3-none-any.whl → 0.1.0.dev15py3-none-any.whl