PyPI - shotgun-sh - Versions diffs - 0.2.8.dev2__py3-none-any.whl → 0.3.3.dev1__py3-none-any.whl - Mend

shotgun-sh 0.2.8.dev2py3-none-any.whl → 0.3.3.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

shotgun/agents/agent_manager.py +382 -60
shotgun/agents/common.py +15 -9
shotgun/agents/config/README.md +89 -0
shotgun/agents/config/__init__.py +10 -1
shotgun/agents/config/constants.py +0 -6
shotgun/agents/config/manager.py +383 -82
shotgun/agents/config/models.py +122 -18
shotgun/agents/config/provider.py +81 -15
shotgun/agents/config/streaming_test.py +119 -0
shotgun/agents/context_analyzer/__init__.py +28 -0
shotgun/agents/context_analyzer/analyzer.py +475 -0
shotgun/agents/context_analyzer/constants.py +9 -0
shotgun/agents/context_analyzer/formatter.py +115 -0
shotgun/agents/context_analyzer/models.py +212 -0
shotgun/agents/conversation/__init__.py +18 -0
shotgun/agents/conversation/filters.py +164 -0
shotgun/agents/conversation/history/chunking.py +278 -0
shotgun/agents/{history → conversation/history}/compaction.py +36 -5
shotgun/agents/{history → conversation/history}/constants.py +5 -0
shotgun/agents/conversation/history/file_content_deduplication.py +216 -0
shotgun/agents/{history → conversation/history}/history_processors.py +380 -8
shotgun/agents/{history → conversation/history}/token_counting/anthropic.py +25 -1
shotgun/agents/{history → conversation/history}/token_counting/base.py +14 -3
shotgun/agents/{history → conversation/history}/token_counting/openai.py +11 -1
shotgun/agents/{history → conversation/history}/token_counting/sentencepiece_counter.py +8 -0
shotgun/agents/{history → conversation/history}/token_counting/tokenizer_cache.py +3 -1
shotgun/agents/{history → conversation/history}/token_counting/utils.py +0 -3
shotgun/agents/{conversation_manager.py → conversation/manager.py} +36 -20
shotgun/agents/{conversation_history.py → conversation/models.py} +8 -92
shotgun/agents/error/__init__.py +11 -0
shotgun/agents/error/models.py +19 -0
shotgun/agents/export.py +2 -2
shotgun/agents/plan.py +2 -2
shotgun/agents/research.py +3 -3
shotgun/agents/runner.py +230 -0
shotgun/agents/specify.py +2 -2
shotgun/agents/tasks.py +2 -2
shotgun/agents/tools/codebase/codebase_shell.py +6 -0
shotgun/agents/tools/codebase/directory_lister.py +6 -0
shotgun/agents/tools/codebase/file_read.py +11 -2
shotgun/agents/tools/codebase/query_graph.py +6 -0
shotgun/agents/tools/codebase/retrieve_code.py +6 -0
shotgun/agents/tools/file_management.py +27 -7
shotgun/agents/tools/registry.py +217 -0
shotgun/agents/tools/web_search/__init__.py +8 -8
shotgun/agents/tools/web_search/anthropic.py +8 -2
shotgun/agents/tools/web_search/gemini.py +7 -1
shotgun/agents/tools/web_search/openai.py +8 -2
shotgun/agents/tools/web_search/utils.py +2 -2
shotgun/agents/usage_manager.py +16 -11
shotgun/api_endpoints.py +7 -3
shotgun/build_constants.py +2 -2
shotgun/cli/clear.py +53 -0
shotgun/cli/compact.py +188 -0
shotgun/cli/config.py +8 -5
shotgun/cli/context.py +154 -0
shotgun/cli/error_handler.py +24 -0
shotgun/cli/export.py +34 -34
shotgun/cli/feedback.py +4 -2
shotgun/cli/models.py +1 -0
shotgun/cli/plan.py +34 -34
shotgun/cli/research.py +18 -10
shotgun/cli/spec/__init__.py +5 -0
shotgun/cli/spec/backup.py +81 -0
shotgun/cli/spec/commands.py +132 -0
shotgun/cli/spec/models.py +48 -0
shotgun/cli/spec/pull_service.py +219 -0
shotgun/cli/specify.py +20 -19
shotgun/cli/tasks.py +34 -34
shotgun/cli/update.py +16 -2
shotgun/codebase/core/change_detector.py +5 -3
shotgun/codebase/core/code_retrieval.py +4 -2
shotgun/codebase/core/ingestor.py +163 -15
shotgun/codebase/core/manager.py +13 -4
shotgun/codebase/core/nl_query.py +1 -1
shotgun/codebase/models.py +2 -0
shotgun/exceptions.py +357 -0
shotgun/llm_proxy/__init__.py +17 -0
shotgun/llm_proxy/client.py +215 -0
shotgun/llm_proxy/models.py +137 -0
shotgun/logging_config.py +60 -27
shotgun/main.py +77 -11
shotgun/posthog_telemetry.py +38 -29
shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +28 -2
shotgun/prompts/agents/partials/interactive_mode.j2 +3 -3
shotgun/prompts/agents/plan.j2 +16 -0
shotgun/prompts/agents/research.j2 +16 -3
shotgun/prompts/agents/specify.j2 +54 -1
shotgun/prompts/agents/state/system_state.j2 +0 -2
shotgun/prompts/agents/tasks.j2 +16 -0
shotgun/prompts/history/chunk_summarization.j2 +34 -0
shotgun/prompts/history/combine_summaries.j2 +53 -0
shotgun/sdk/codebase.py +14 -3
shotgun/sentry_telemetry.py +163 -16
shotgun/settings.py +243 -0
shotgun/shotgun_web/__init__.py +67 -1
shotgun/shotgun_web/client.py +42 -1
shotgun/shotgun_web/constants.py +46 -0
shotgun/shotgun_web/exceptions.py +29 -0
shotgun/shotgun_web/models.py +390 -0
shotgun/shotgun_web/shared_specs/__init__.py +32 -0
shotgun/shotgun_web/shared_specs/file_scanner.py +175 -0
shotgun/shotgun_web/shared_specs/hasher.py +83 -0
shotgun/shotgun_web/shared_specs/models.py +71 -0
shotgun/shotgun_web/shared_specs/upload_pipeline.py +329 -0
shotgun/shotgun_web/shared_specs/utils.py +34 -0
shotgun/shotgun_web/specs_client.py +703 -0
shotgun/shotgun_web/supabase_client.py +31 -0
shotgun/telemetry.py +10 -33
shotgun/tui/app.py +310 -46
shotgun/tui/commands/__init__.py +1 -1
shotgun/tui/components/context_indicator.py +179 -0
shotgun/tui/components/mode_indicator.py +70 -0
shotgun/tui/components/status_bar.py +48 -0
shotgun/tui/containers.py +91 -0
shotgun/tui/dependencies.py +39 -0
shotgun/tui/layout.py +5 -0
shotgun/tui/protocols.py +45 -0
shotgun/tui/screens/chat/__init__.py +5 -0
shotgun/tui/screens/chat/chat.tcss +54 -0
shotgun/tui/screens/chat/chat_screen.py +1531 -0
shotgun/tui/screens/chat/codebase_index_prompt_screen.py +243 -0
shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
shotgun/tui/screens/chat/help_text.py +40 -0
shotgun/tui/screens/chat/prompt_history.py +48 -0
shotgun/tui/screens/chat.tcss +11 -0
shotgun/tui/screens/chat_screen/command_providers.py +91 -4
shotgun/tui/screens/chat_screen/hint_message.py +76 -1
shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
shotgun/tui/screens/chat_screen/history/chat_history.py +115 -0
shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
shotgun/tui/screens/confirmation_dialog.py +191 -0
shotgun/tui/screens/directory_setup.py +45 -41
shotgun/tui/screens/feedback.py +14 -7
shotgun/tui/screens/github_issue.py +111 -0
shotgun/tui/screens/model_picker.py +77 -32
shotgun/tui/screens/onboarding.py +580 -0
shotgun/tui/screens/pipx_migration.py +205 -0
shotgun/tui/screens/provider_config.py +116 -35
shotgun/tui/screens/shared_specs/__init__.py +21 -0
shotgun/tui/screens/shared_specs/create_spec_dialog.py +273 -0
shotgun/tui/screens/shared_specs/models.py +56 -0
shotgun/tui/screens/shared_specs/share_specs_dialog.py +390 -0
shotgun/tui/screens/shared_specs/upload_progress_screen.py +452 -0
shotgun/tui/screens/shotgun_auth.py +112 -18
shotgun/tui/screens/spec_pull.py +288 -0
shotgun/tui/screens/welcome.py +137 -11
shotgun/tui/services/__init__.py +5 -0
shotgun/tui/services/conversation_service.py +187 -0
shotgun/tui/state/__init__.py +7 -0
shotgun/tui/state/processing_state.py +185 -0
shotgun/tui/utils/mode_progress.py +14 -7
shotgun/tui/widgets/__init__.py +5 -0
shotgun/tui/widgets/widget_coordinator.py +263 -0
shotgun/utils/file_system_utils.py +22 -2
shotgun/utils/marketing.py +110 -0
shotgun/utils/update_checker.py +69 -14
shotgun_sh-0.3.3.dev1.dist-info/METADATA +472 -0
shotgun_sh-0.3.3.dev1.dist-info/RECORD +229 -0
{shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/WHEEL +1 -1
{shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/entry_points.txt +1 -0
{shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/licenses/LICENSE +1 -1
shotgun/tui/screens/chat.py +0 -996
shotgun/tui/screens/chat_screen/history.py +0 -335
shotgun_sh-0.2.8.dev2.dist-info/METADATA +0 -126
shotgun_sh-0.2.8.dev2.dist-info/RECORD +0 -155
/shotgun/agents/{history → conversation/history}/__init__.py +0 -0
/shotgun/agents/{history → conversation/history}/context_extraction.py +0 -0
/shotgun/agents/{history → conversation/history}/history_building.py +0 -0
/shotgun/agents/{history → conversation/history}/message_utils.py +0 -0
/shotgun/agents/{history → conversation/history}/token_counting/__init__.py +0 -0
/shotgun/agents/{history → conversation/history}/token_estimation.py +0 -0

shotgun/agents/{history → conversation/history}/history_processors.py RENAMED Viewed

@@ -1,7 +1,9 @@
 """History processors for managing conversation history in Shotgun agents."""
+from collections.abc import Awaitable, Callable
 from typing import TYPE_CHECKING, Any, Protocol
+from anthropic import APIStatusError
 from pydantic_ai import ModelSettings
 from pydantic_ai.messages import (
     ModelMessage,
@@ -11,14 +13,16 @@ from pydantic_ai.messages import (
     UserPromptPart,
 )
+from shotgun.agents.conversation.filters import filter_orphaned_tool_responses
 from shotgun.agents.llm import shotgun_model_request
 from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
 from shotgun.agents.models import AgentDeps
+from shotgun.exceptions import ContextSizeLimitExceeded
 from shotgun.logging_config import get_logger
 from shotgun.posthog_telemetry import track_event
 from shotgun.prompts import PromptLoader
-from .constants import SUMMARY_MARKER, TOKEN_LIMIT_RATIO
+from .constants import CHUNK_SAFE_RATIO, SUMMARY_MARKER, TOKEN_LIMIT_RATIO
 from .context_extraction import extract_context_from_messages
 from .history_building import ensure_ends_with_model_request
 from .message_utils import (
@@ -35,7 +39,7 @@ from .token_estimation import (
 )
 if TYPE_CHECKING:
-    pass
+    from . import chunking
 class ContextProtocol(Protocol):
@@ -51,6 +55,86 @@ logger = get_logger(__name__)
 prompt_loader = PromptLoader()
+async def _safe_token_estimation(
+    estimation_func: Callable[..., Awaitable[int]],
+    model_name: str,
+    max_tokens: int,
+    *args: Any,
+    **kwargs: Any,
+) -> int:
+    """Safely estimate tokens with proper error handling.
+    Wraps token estimation functions to handle failures gracefully.
+    Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
+    Other errors (network, auth) are allowed to bubble up.
+    Args:
+        estimation_func: Async function that estimates tokens
+        model_name: Name of the model for error messages
+        max_tokens: Maximum tokens for the model
+        *args: Arguments to pass to estimation_func
+        **kwargs: Keyword arguments to pass to estimation_func
+    Returns:
+        Token count from estimation_func
+    Raises:
+        ContextSizeLimitExceeded: If token counting fails with RuntimeError
+        Exception: Any other exceptions from estimation_func
+    """
+    try:
+        return await estimation_func(*args, **kwargs)
+    except Exception as e:
+        # Log the error with full context
+        logger.warning(
+            f"Token counting failed for {model_name}",
+            extra={
+                "error_type": type(e).__name__,
+                "error_message": str(e),
+                "model": model_name,
+            },
+        )
+        # Token counting behavior with oversized context (verified via testing):
+        #
+        # 1. OpenAI/tiktoken:
+        #    - Successfully counts any size (tested with 752K tokens, no error)
+        #    - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
+        #    - Wrapped as: RuntimeError by our counter
+        #
+        # 2. Gemini/SentencePiece:
+        #    - Successfully counts any size (tested with 752K tokens, no error)
+        #    - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
+        #    - Wrapped as: RuntimeError by our counter
+        #
+        # 3. Anthropic API:
+        #    - Successfully counts large token counts (tested with 752K tokens, no error)
+        #    - Only enforces 32 MB request size limit (not token count)
+        #    - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
+        #    - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
+        #    - Wrapped as: RuntimeError by our counter
+        #
+        # IMPORTANT: No provider raises errors for "too many tokens" during counting.
+        # Token count validation happens separately by comparing count to max_input_tokens.
+        #
+        # We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
+        # We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
+        # context is effectively too large and needs user action to reduce it.
+        if isinstance(e, RuntimeError):
+            raise ContextSizeLimitExceeded(
+                model_name=model_name, max_tokens=max_tokens
+            ) from e
+        # Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
+        if isinstance(e, APIStatusError) and e.status_code == 413:
+            raise ContextSizeLimitExceeded(
+                model_name=model_name, max_tokens=max_tokens
+            ) from e
+        # Re-raise other exceptions (network errors, auth failures, etc.)
+        raise
 def is_summary_part(part: Any) -> bool:
     """Check if a message part is a compacted summary."""
     return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
@@ -127,6 +211,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
 async def token_limit_compactor(
     ctx: ContextProtocol,
     messages: list[ModelMessage],
+    force: bool = False,
 ) -> list[ModelMessage]:
     """Compact message history based on token limits with incremental processing.
@@ -139,6 +224,7 @@ async def token_limit_compactor(
     Args:
         ctx: Run context with usage information and dependencies
         messages: Current conversation history
+        force: If True, force compaction even if below token threshold
     Returns:
         Compacted list of messages within token limits
@@ -155,9 +241,15 @@ async def token_limit_compactor(
     if last_summary_index is not None:
         # Check if post-summary conversation exceeds threshold for incremental compaction
-        post_summary_tokens = await estimate_post_summary_tokens(
-            messages, last_summary_index, deps.llm_model
+        post_summary_tokens = await _safe_token_estimation(
+            estimate_post_summary_tokens,
+            deps.llm_model.name,
+            model_max_tokens,
+            messages,
+            last_summary_index,
+            deps.llm_model,
         )
         post_summary_percentage = (
             (post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
         )
@@ -169,7 +261,7 @@ async def token_limit_compactor(
         )
         # Only do incremental compaction if post-summary conversation exceeds threshold
-        if post_summary_tokens < max_tokens:
+        if post_summary_tokens < max_tokens and not force:
             logger.debug(
                 f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
                 f"keeping all {len(messages)} messages"
@@ -325,6 +417,9 @@ async def token_limit_compactor(
             compacted_messages, messages
         )
+        # Filter out orphaned tool responses (tool responses without tool calls)
+        compacted_messages = filter_orphaned_tool_responses(compacted_messages)
         logger.debug(
             f"Incremental compaction complete: {len(messages)} -> {len(compacted_messages)} messages"
         )
@@ -340,6 +435,7 @@ async def token_limit_compactor(
             else 0
         )
+        # Track incremental compaction with simple metrics (fast, no token counting)
         track_event(
             "context_compaction_triggered",
             {
@@ -352,6 +448,10 @@ async def token_limit_compactor(
                 "agent_mode": deps.agent_mode.value
                 if hasattr(deps, "agent_mode") and deps.agent_mode
                 else "unknown",
+                # Model and provider info (no computation needed)
+                "model_name": deps.llm_model.name.value,
+                "provider": deps.llm_model.provider.value,
+                "key_provider": deps.llm_model.key_provider.value,
             },
         )
@@ -359,7 +459,14 @@ async def token_limit_compactor(
     else:
         # Check if total conversation exceeds threshold for full compaction
-        total_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
+        total_tokens = await _safe_token_estimation(
+            estimate_tokens_from_messages,
+            deps.llm_model.name,
+            model_max_tokens,
+            messages,
+            deps.llm_model,
+        )
         total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
         logger.debug(
@@ -368,7 +475,7 @@ async def token_limit_compactor(
         )
         # Only do full compaction if total conversation exceeds threshold
-        if total_tokens < max_tokens:
+        if total_tokens < max_tokens and not force:
             logger.debug(
                 f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
                 f"keeping all {len(messages)} messages"
@@ -386,10 +493,32 @@ async def _full_compaction(
     deps: AgentDeps,
     messages: list[ModelMessage],
 ) -> list[ModelMessage]:
-    """Perform full compaction for first-time summarization."""
+    """Perform full compaction for first-time summarization.
+    If the conversation is too large for single-pass compaction, delegates
+    to chunked compaction which breaks the conversation into logical chunks.
+    """
     # Extract context from all messages
     context = extract_context_from_messages(messages)
+    # Check if context would exceed model limit for compaction request
+    # We use CHUNK_SAFE_RATIO (70%) to leave room for prompt overhead
+    max_safe_input = int(deps.llm_model.max_input_tokens * CHUNK_SAFE_RATIO)
+    # Estimate context tokens
+    context_request: list[ModelMessage] = [ModelRequest.user_text_prompt(context)]
+    context_tokens = await estimate_tokens_from_messages(
+        context_request, deps.llm_model
+    )
+    if context_tokens > max_safe_input:
+        # Context too large for single-pass compaction - use chunked approach
+        logger.info(
+            f"Context ({context_tokens:,} tokens) exceeds safe limit "
+            f"({max_safe_input:,} tokens), using chunked compaction"
+        )
+        return await _chunked_compaction(deps, messages)
     # Use regular summarization prompt
     summarization_prompt = prompt_loader.render("history/summarization.j2")
     request_messages: list[ModelMessage] = [
@@ -462,12 +591,16 @@ async def _full_compaction(
     # Ensure history ends with ModelRequest for PydanticAI compatibility
     compacted_messages = ensure_ends_with_model_request(compacted_messages, messages)
+    # Filter out orphaned tool responses (tool responses without tool calls)
+    compacted_messages = filter_orphaned_tool_responses(compacted_messages)
     # Track full compaction event
     messages_before = len(messages)
     messages_after = len(compacted_messages)
     tokens_before = current_tokens  # Already calculated above
     tokens_after = summary_usage.output_tokens if summary_usage else 0
+    # Track full compaction with simple metrics (fast, no token counting)
     track_event(
         "context_compaction_triggered",
         {
@@ -480,7 +613,246 @@ async def _full_compaction(
             "agent_mode": deps.agent_mode.value
             if hasattr(deps, "agent_mode") and deps.agent_mode
             else "unknown",
+            # Model and provider info (no computation needed)
+            "model_name": deps.llm_model.name.value,
+            "provider": deps.llm_model.provider.value,
+            "key_provider": deps.llm_model.key_provider.value,
         },
     )
     return compacted_messages
+async def _chunked_compaction(
+    deps: AgentDeps,
+    messages: list[ModelMessage],
+) -> list[ModelMessage]:
+    """Perform chunked compaction for oversized conversations.
+    Breaks the conversation into logical chunks, summarizes each sequentially,
+    then combines the summaries into a master summary.
+    """
+    from .chunking import chunk_messages_for_compaction
+    # Split into chunks and retention window
+    chunks, retained_messages = await chunk_messages_for_compaction(
+        messages, deps.llm_model
+    )
+    if not chunks:
+        # No chunks to summarize (conversation too small), return retained messages
+        logger.debug("No chunks to summarize, returning retained messages")
+        return retained_messages
+    # Track chunked compaction
+    total_chunks = len(chunks)
+    logger.info(f"Starting chunked compaction: {total_chunks} chunks to process")
+    # Summarize each chunk sequentially
+    chunk_summaries: list[str] = []
+    for chunk in chunks:
+        try:
+            summary = await _summarize_chunk(chunk, total_chunks, deps)
+            chunk_summaries.append(summary)
+            logger.debug(
+                f"Chunk {chunk.chunk_index + 1}/{total_chunks} summarized successfully"
+            )
+        except Exception as e:
+            logger.warning(
+                f"Failed to summarize chunk {chunk.chunk_index + 1}/{total_chunks}: {e}"
+            )
+            # Continue with other chunks - we'll note the gap in fusion
+            chunk_summaries.append(
+                f"[Chunk {chunk.chunk_index + 1} summary unavailable]"
+            )
+    # Combine summaries into master summary
+    if len(chunk_summaries) == 1:
+        final_summary = chunk_summaries[0]
+    else:
+        final_summary = await _combine_chunk_summaries(chunk_summaries, deps)
+    # Build final compacted history
+    compacted = _build_chunked_compaction_result(
+        final_summary, messages, retained_messages, deps
+    )
+    # Track chunked compaction event
+    track_event(
+        "chunked_compaction_triggered",
+        {
+            "num_chunks": total_chunks,
+            "chunks_succeeded": sum(
+                1 for s in chunk_summaries if not s.startswith("[Chunk")
+            ),
+            "retention_window_size": len(retained_messages),
+            "model_name": deps.llm_model.name.value,
+            "provider": deps.llm_model.provider.value,
+        },
+    )
+    return compacted
+async def _summarize_chunk(
+    chunk: "chunking.Chunk",
+    total_chunks: int,
+    deps: AgentDeps,
+) -> str:
+    """Summarize a single chunk of messages."""
+    chunk_messages = chunk.get_all_messages()
+    context = extract_context_from_messages(chunk_messages)
+    # Use chunk summarization template
+    chunk_prompt = prompt_loader.render(
+        "history/chunk_summarization.j2",
+        chunk_index=chunk.chunk_index + 1,
+        total_chunks=total_chunks,
+        chunk_content=context,
+    )
+    request_messages: list[ModelMessage] = [
+        ModelRequest.user_text_prompt(context, instructions=chunk_prompt)
+    ]
+    max_tokens = await calculate_max_summarization_tokens(
+        deps.llm_model, request_messages
+    )
+    log_summarization_request(
+        deps.llm_model,
+        max_tokens,
+        chunk_prompt,
+        context[:500] + "..." if len(context) > 500 else context,
+        f"CHUNK_{chunk.chunk_index + 1}",
+    )
+    response = await shotgun_model_request(
+        model_config=deps.llm_model,
+        messages=request_messages,
+        model_settings=ModelSettings(max_tokens=max_tokens),
+    )
+    log_summarization_response(response, f"CHUNK_{chunk.chunk_index + 1}")
+    if response.parts and isinstance(response.parts[0], TextPart):
+        return response.parts[0].content
+    return ""
+async def _combine_chunk_summaries(
+    summaries: list[str],
+    deps: AgentDeps,
+) -> str:
+    """Combine multiple chunk summaries into a unified summary."""
+    # Check if combined summaries exceed limit (may need recursive combination)
+    combined_text = "\n\n".join(summaries)
+    combined_request: list[ModelMessage] = [
+        ModelRequest.user_text_prompt(combined_text)
+    ]
+    combined_tokens = await estimate_tokens_from_messages(
+        combined_request, deps.llm_model
+    )
+    max_safe_input = int(deps.llm_model.max_input_tokens * CHUNK_SAFE_RATIO)
+    if combined_tokens > max_safe_input:
+        # Recursive: split summaries in half and combine each half first
+        logger.warning(
+            f"Combined summaries too large ({combined_tokens:,} tokens), "
+            f"applying recursive combination"
+        )
+        mid = len(summaries) // 2
+        first_half = await _combine_chunk_summaries(summaries[:mid], deps)
+        second_half = await _combine_chunk_summaries(summaries[mid:], deps)
+        summaries = [first_half, second_half]
+    # Use combination template
+    combine_prompt = prompt_loader.render(
+        "history/combine_summaries.j2",
+        num_summaries=len(summaries),
+        chunk_summaries=summaries,
+    )
+    request_messages: list[ModelMessage] = [
+        ModelRequest.user_text_prompt(
+            "\n\n---\n\n".join(summaries), instructions=combine_prompt
+        )
+    ]
+    max_tokens = await calculate_max_summarization_tokens(
+        deps.llm_model, request_messages
+    )
+    log_summarization_request(
+        deps.llm_model,
+        max_tokens,
+        combine_prompt,
+        f"[{len(summaries)} summaries to combine]",
+        "COMBINE",
+    )
+    response = await shotgun_model_request(
+        model_config=deps.llm_model,
+        messages=request_messages,
+        model_settings=ModelSettings(max_tokens=max_tokens),
+    )
+    log_summarization_response(response, "COMBINE")
+    if response.parts and isinstance(response.parts[0], TextPart):
+        return response.parts[0].content
+    return ""
+def _build_chunked_compaction_result(
+    final_summary: str,
+    original_messages: list[ModelMessage],
+    retained_messages: list[ModelMessage],
+    deps: AgentDeps,
+) -> list[ModelMessage]:
+    """Build the final compacted history from chunked compaction."""
+    from pydantic_ai.messages import ModelRequestPart
+    # Extract system context from original messages
+    agent_prompt = get_agent_system_prompt(original_messages) or ""
+    system_status = get_latest_system_status(original_messages) or ""
+    first_user = get_first_user_request(original_messages) or ""
+    # Create marked summary
+    summary_part = TextPart(content=f"{SUMMARY_MARKER} {final_summary}")
+    summary_message = ModelResponse(parts=[summary_part])
+    # Build compacted structure
+    compacted: list[ModelMessage] = []
+    # Initial request with system context
+    parts: list[ModelRequestPart] = []
+    if agent_prompt:
+        parts.append(AgentSystemPrompt(content=agent_prompt))
+    if system_status:
+        parts.append(SystemStatusPrompt(content=system_status))
+    if first_user:
+        parts.append(UserPromptPart(content=first_user))
+    if parts:
+        compacted.append(ModelRequest(parts=parts))
+    # Add summary
+    compacted.append(summary_message)
+    # Add retained messages (recent context)
+    compacted.extend(retained_messages)
+    # Ensure ends with ModelRequest for PydanticAI compatibility
+    compacted = ensure_ends_with_model_request(compacted, original_messages)
+    # Filter orphaned tool responses
+    compacted = filter_orphaned_tool_responses(compacted)
+    logger.info(
+        f"Chunked compaction complete: {len(original_messages)} messages -> "
+        f"{len(compacted)} messages (retained {len(retained_messages)} recent)"
+    )
+    return compacted

shotgun/agents/{history → conversation/history}/token_counting/anthropic.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """Anthropic token counting using official client."""
 import logfire
+from anthropic import APIStatusError
 from pydantic_ai.messages import ModelMessage
 from shotgun.agents.config.models import KeyProvider
@@ -72,11 +73,23 @@ class AnthropicTokenCounter(TokenCounter):
         Raises:
             RuntimeError: If API call fails
         """
+        # Handle empty text to avoid unnecessary API calls
+        # Anthropic API requires non-empty content, so we need a strict check
+        if not text or not text.strip():
+            return 0
+        # Additional validation: ensure the text has actual content
+        # Some edge cases might have only whitespace or control characters
+        cleaned_text = text.strip()
+        if not cleaned_text:
+            return 0
         try:
             # Anthropic API expects messages format and model parameter
             # Use await with async client
             result = await self.client.messages.count_tokens(
-                messages=[{"role": "user", "content": text}], model=self.model_name
+                messages=[{"role": "user", "content": cleaned_text}],
+                model=self.model_name,
             )
             return result.input_tokens
         except Exception as e:
@@ -91,6 +104,13 @@ class AnthropicTokenCounter(TokenCounter):
                 exception_type=type(e).__name__,
                 exception_message=str(e),
             )
+            # Re-raise API errors directly so they can be classified by the runner
+            # This allows proper error classification for BYOK users (authentication, rate limits, etc.)
+            if isinstance(e, APIStatusError):
+                raise
+            # Only wrap library-level errors in RuntimeError
             raise RuntimeError(
                 f"Anthropic token counting API failed for {self.model_name}: {type(e).__name__}: {str(e)}"
             ) from e
@@ -107,5 +127,9 @@ class AnthropicTokenCounter(TokenCounter):
         Raises:
             RuntimeError: If token counting fails
         """
+        # Handle empty message list early
+        if not messages:
+            return 0
         total_text = extract_text_from_messages(messages)
         return await self.count_tokens(total_text)

shotgun/agents/{history → conversation/history}/token_counting/base.py RENAMED Viewed

@@ -56,12 +56,23 @@ def extract_text_from_messages(messages: list[ModelMessage]) -> str:
         if hasattr(message, "parts"):
             for part in message.parts:
                 if hasattr(part, "content") and isinstance(part.content, str):
-                    text_parts.append(part.content)
+                    # Only add non-empty content
+                    if part.content.strip():
+                        text_parts.append(part.content)
                 else:
                     # Handle non-text parts (tool calls, etc.)
-                    text_parts.append(str(part))
+                    part_str = str(part)
+                    if part_str.strip():
+                        text_parts.append(part_str)
         else:
             # Handle messages without parts
-            text_parts.append(str(message))
+            msg_str = str(message)
+            if msg_str.strip():
+                text_parts.append(msg_str)
+    # If no valid text parts found, return a minimal placeholder
+    # This ensures we never send completely empty content to APIs
+    if not text_parts:
+        return "."
     return "\n".join(text_parts)

shotgun/agents/{history → conversation/history}/token_counting/openai.py RENAMED Viewed

@@ -57,9 +57,15 @@ class OpenAITokenCounter(TokenCounter):
         Raises:
             RuntimeError: If token counting fails
         """
+        # Handle empty text to avoid unnecessary encoding
+        if not text or not text.strip():
+            return 0
         try:
             return len(self.encoding.encode(text))
-        except Exception as e:
+        except BaseException as e:
+            # Must catch BaseException to handle PanicException from tiktoken's Rust layer
+            # which can occur with extremely long texts. Regular Exception won't catch it.
             raise RuntimeError(
                 f"Failed to count tokens for OpenAI model {self.model_name}"
             ) from e
@@ -76,5 +82,9 @@ class OpenAITokenCounter(TokenCounter):
         Raises:
             RuntimeError: If token counting fails
         """
+        # Handle empty message list early
+        if not messages:
+            return 0
         total_text = extract_text_from_messages(messages)
         return await self.count_tokens(total_text)

shotgun/agents/{history → conversation/history}/token_counting/sentencepiece_counter.py RENAMED Viewed

@@ -88,6 +88,10 @@ class SentencePieceTokenCounter(TokenCounter):
         Raises:
             RuntimeError: If token counting fails
         """
+        # Handle empty text to avoid unnecessary tokenization
+        if not text or not text.strip():
+            return 0
         await self._ensure_tokenizer()
         if self.sp is None:
@@ -115,5 +119,9 @@ class SentencePieceTokenCounter(TokenCounter):
         Raises:
             RuntimeError: If token counting fails
         """
+        # Handle empty message list early
+        if not messages:
+            return 0
         total_text = extract_text_from_messages(messages)
         return await self.count_tokens(total_text)

shotgun/agents/{history → conversation/history}/token_counting/tokenizer_cache.py RENAMED Viewed

@@ -3,6 +3,7 @@
 import hashlib
 from pathlib import Path
+import aiofiles
 import httpx
 from shotgun.logging_config import get_logger
@@ -78,7 +79,8 @@ async def download_gemini_tokenizer() -> Path:
         # Atomic write: write to temp file first, then rename
         temp_path = cache_path.with_suffix(".tmp")
-        temp_path.write_bytes(content)
+        async with aiofiles.open(temp_path, "wb") as f:
+            await f.write(content)
         temp_path.rename(cache_path)
         logger.info(f"Gemini tokenizer downloaded and cached at {cache_path}")

shotgun/agents/{history → conversation/history}/token_counting/utils.py RENAMED Viewed

@@ -44,9 +44,6 @@ def get_token_counter(model_config: ModelConfig) -> TokenCounter:
     # Return cached instance if available
     if cache_key in _token_counter_cache:
-        logger.debug(
-            f"Reusing cached token counter for {model_config.provider.value}:{model_config.name}"
-        )
         return _token_counter_cache[cache_key]
     # Create new instance and cache it

shotgun-sh 0.2.8.dev2__py3-none-any.whl → 0.3.3.dev1__py3-none-any.whl

shotgun-sh 0.2.8.dev2py3-none-any.whl → 0.3.3.dev1py3-none-any.whl