shotgun-sh 0.2.8.dev2__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. shotgun/agents/agent_manager.py +354 -46
  2. shotgun/agents/common.py +14 -8
  3. shotgun/agents/config/constants.py +0 -6
  4. shotgun/agents/config/manager.py +66 -35
  5. shotgun/agents/config/models.py +41 -1
  6. shotgun/agents/config/provider.py +33 -5
  7. shotgun/agents/context_analyzer/__init__.py +28 -0
  8. shotgun/agents/context_analyzer/analyzer.py +471 -0
  9. shotgun/agents/context_analyzer/constants.py +9 -0
  10. shotgun/agents/context_analyzer/formatter.py +115 -0
  11. shotgun/agents/context_analyzer/models.py +212 -0
  12. shotgun/agents/conversation_history.py +2 -0
  13. shotgun/agents/conversation_manager.py +35 -19
  14. shotgun/agents/export.py +2 -2
  15. shotgun/agents/history/compaction.py +9 -4
  16. shotgun/agents/history/history_processors.py +113 -5
  17. shotgun/agents/history/token_counting/anthropic.py +17 -1
  18. shotgun/agents/history/token_counting/base.py +14 -3
  19. shotgun/agents/history/token_counting/openai.py +11 -1
  20. shotgun/agents/history/token_counting/sentencepiece_counter.py +8 -0
  21. shotgun/agents/history/token_counting/tokenizer_cache.py +3 -1
  22. shotgun/agents/history/token_counting/utils.py +0 -3
  23. shotgun/agents/plan.py +2 -2
  24. shotgun/agents/research.py +3 -3
  25. shotgun/agents/specify.py +2 -2
  26. shotgun/agents/tasks.py +2 -2
  27. shotgun/agents/tools/codebase/codebase_shell.py +6 -0
  28. shotgun/agents/tools/codebase/directory_lister.py +6 -0
  29. shotgun/agents/tools/codebase/file_read.py +11 -2
  30. shotgun/agents/tools/codebase/query_graph.py +6 -0
  31. shotgun/agents/tools/codebase/retrieve_code.py +6 -0
  32. shotgun/agents/tools/file_management.py +27 -7
  33. shotgun/agents/tools/registry.py +217 -0
  34. shotgun/agents/tools/web_search/__init__.py +8 -8
  35. shotgun/agents/tools/web_search/anthropic.py +8 -2
  36. shotgun/agents/tools/web_search/gemini.py +7 -1
  37. shotgun/agents/tools/web_search/openai.py +7 -1
  38. shotgun/agents/tools/web_search/utils.py +2 -2
  39. shotgun/agents/usage_manager.py +16 -11
  40. shotgun/api_endpoints.py +7 -3
  41. shotgun/build_constants.py +3 -3
  42. shotgun/cli/clear.py +53 -0
  43. shotgun/cli/compact.py +186 -0
  44. shotgun/cli/config.py +8 -5
  45. shotgun/cli/context.py +111 -0
  46. shotgun/cli/export.py +1 -1
  47. shotgun/cli/feedback.py +4 -2
  48. shotgun/cli/models.py +1 -0
  49. shotgun/cli/plan.py +1 -1
  50. shotgun/cli/research.py +1 -1
  51. shotgun/cli/specify.py +1 -1
  52. shotgun/cli/tasks.py +1 -1
  53. shotgun/cli/update.py +16 -2
  54. shotgun/codebase/core/change_detector.py +5 -3
  55. shotgun/codebase/core/code_retrieval.py +4 -2
  56. shotgun/codebase/core/ingestor.py +10 -8
  57. shotgun/codebase/core/manager.py +13 -4
  58. shotgun/codebase/core/nl_query.py +1 -1
  59. shotgun/exceptions.py +32 -0
  60. shotgun/logging_config.py +18 -27
  61. shotgun/main.py +73 -11
  62. shotgun/posthog_telemetry.py +37 -28
  63. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +3 -2
  64. shotgun/sentry_telemetry.py +163 -16
  65. shotgun/settings.py +238 -0
  66. shotgun/telemetry.py +10 -33
  67. shotgun/tui/app.py +243 -43
  68. shotgun/tui/commands/__init__.py +1 -1
  69. shotgun/tui/components/context_indicator.py +179 -0
  70. shotgun/tui/components/mode_indicator.py +70 -0
  71. shotgun/tui/components/status_bar.py +48 -0
  72. shotgun/tui/containers.py +91 -0
  73. shotgun/tui/dependencies.py +39 -0
  74. shotgun/tui/protocols.py +45 -0
  75. shotgun/tui/screens/chat/__init__.py +5 -0
  76. shotgun/tui/screens/chat/chat.tcss +54 -0
  77. shotgun/tui/screens/chat/chat_screen.py +1254 -0
  78. shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
  79. shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
  80. shotgun/tui/screens/chat/help_text.py +40 -0
  81. shotgun/tui/screens/chat/prompt_history.py +48 -0
  82. shotgun/tui/screens/chat.tcss +11 -0
  83. shotgun/tui/screens/chat_screen/command_providers.py +78 -2
  84. shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
  85. shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
  86. shotgun/tui/screens/chat_screen/history/chat_history.py +115 -0
  87. shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
  88. shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
  89. shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
  90. shotgun/tui/screens/confirmation_dialog.py +151 -0
  91. shotgun/tui/screens/feedback.py +4 -4
  92. shotgun/tui/screens/github_issue.py +102 -0
  93. shotgun/tui/screens/model_picker.py +49 -24
  94. shotgun/tui/screens/onboarding.py +431 -0
  95. shotgun/tui/screens/pipx_migration.py +153 -0
  96. shotgun/tui/screens/provider_config.py +50 -27
  97. shotgun/tui/screens/shotgun_auth.py +2 -2
  98. shotgun/tui/screens/welcome.py +14 -11
  99. shotgun/tui/services/__init__.py +5 -0
  100. shotgun/tui/services/conversation_service.py +184 -0
  101. shotgun/tui/state/__init__.py +7 -0
  102. shotgun/tui/state/processing_state.py +185 -0
  103. shotgun/tui/utils/mode_progress.py +14 -7
  104. shotgun/tui/widgets/__init__.py +5 -0
  105. shotgun/tui/widgets/widget_coordinator.py +263 -0
  106. shotgun/utils/file_system_utils.py +22 -2
  107. shotgun/utils/marketing.py +110 -0
  108. shotgun/utils/update_checker.py +69 -14
  109. shotgun_sh-0.2.17.dist-info/METADATA +465 -0
  110. shotgun_sh-0.2.17.dist-info/RECORD +194 -0
  111. {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/entry_points.txt +1 -0
  112. {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/licenses/LICENSE +1 -1
  113. shotgun/tui/screens/chat.py +0 -996
  114. shotgun/tui/screens/chat_screen/history.py +0 -335
  115. shotgun_sh-0.2.8.dev2.dist-info/METADATA +0 -126
  116. shotgun_sh-0.2.8.dev2.dist-info/RECORD +0 -155
  117. {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/WHEEL +0 -0
@@ -0,0 +1,212 @@
1
+ """Pydantic models for context analysis."""
2
+
3
+ from typing import Any
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class TokenAllocation(BaseModel):
9
+ """Token counts allocated from API usage data by message/tool type.
10
+
11
+ Used internally by ContextAnalyzer to track token distribution across
12
+ different message types and tool categories.
13
+ """
14
+
15
+ user: int = Field(ge=0, default=0, description="Tokens from user prompts")
16
+ agent_responses: int = Field(
17
+ ge=0, default=0, description="Tokens from agent text responses"
18
+ )
19
+ system_prompts: int = Field(
20
+ ge=0, default=0, description="Tokens from system prompts"
21
+ )
22
+ system_status: int = Field(
23
+ ge=0, default=0, description="Tokens from system status messages"
24
+ )
25
+ codebase_understanding: int = Field(
26
+ ge=0, default=0, description="Tokens from codebase understanding tools"
27
+ )
28
+ artifact_management: int = Field(
29
+ ge=0, default=0, description="Tokens from artifact management tools"
30
+ )
31
+ web_research: int = Field(
32
+ ge=0, default=0, description="Tokens from web research tools"
33
+ )
34
+ unknown: int = Field(ge=0, default=0, description="Tokens from uncategorized tools")
35
+
36
+
37
+ class MessageTypeStats(BaseModel):
38
+ """Statistics for a specific message type."""
39
+
40
+ count: int = Field(ge=0, description="Number of messages of this type")
41
+ tokens: int = Field(ge=0, description="Total tokens consumed by this type")
42
+
43
+ @property
44
+ def avg_tokens(self) -> float:
45
+ """Calculate average tokens per message."""
46
+ return self.tokens / self.count if self.count > 0 else 0.0
47
+
48
+
49
+ class ContextAnalysis(BaseModel):
50
+ """Complete analysis of conversation context composition."""
51
+
52
+ user_messages: MessageTypeStats
53
+ agent_responses: MessageTypeStats
54
+ system_prompts: MessageTypeStats
55
+ system_status: MessageTypeStats
56
+ codebase_understanding: MessageTypeStats
57
+ artifact_management: MessageTypeStats
58
+ web_research: MessageTypeStats
59
+ unknown: MessageTypeStats
60
+ hint_messages: MessageTypeStats
61
+ total_tokens: int = Field(ge=0, description="Total tokens including hints")
62
+ total_messages: int = Field(ge=0, description="Total message count including hints")
63
+ context_window: int = Field(ge=0, description="Model's maximum input tokens")
64
+ agent_context_tokens: int = Field(
65
+ ge=0,
66
+ description="Tokens that actually consume agent context (excluding UI-only)",
67
+ )
68
+ model_name: str = Field(description="Name of the model being used")
69
+ max_usable_tokens: int = Field(
70
+ ge=0, description="80% of max_input_tokens (usable limit)"
71
+ )
72
+ free_space_tokens: int = Field(
73
+ description="Remaining tokens available (negative if over capacity)"
74
+ )
75
+
76
+ def get_percentage(self, stats: MessageTypeStats) -> float:
77
+ """Calculate percentage of agent context tokens for a message type.
78
+
79
+ Args:
80
+ stats: Message type statistics to calculate percentage for
81
+
82
+ Returns:
83
+ Percentage of total agent context tokens (0-100)
84
+ """
85
+ return (
86
+ (stats.tokens / self.agent_context_tokens * 100)
87
+ if self.agent_context_tokens > 0
88
+ else 0.0
89
+ )
90
+
91
+
92
+ class ContextCompositionTelemetry(BaseModel):
93
+ """Telemetry data for context composition tracking to PostHog."""
94
+
95
+ # Context usage
96
+ total_messages: int = Field(ge=0)
97
+ agent_context_tokens: int = Field(ge=0)
98
+ context_window: int = Field(ge=0)
99
+ max_usable_tokens: int = Field(ge=0)
100
+ free_space_tokens: int = Field(ge=0)
101
+ usage_percentage: float = Field(ge=0, le=100)
102
+
103
+ # Message type counts
104
+ user_messages_count: int = Field(ge=0)
105
+ agent_responses_count: int = Field(ge=0)
106
+ system_prompts_count: int = Field(ge=0)
107
+ system_status_count: int = Field(ge=0)
108
+ codebase_understanding_count: int = Field(ge=0)
109
+ artifact_management_count: int = Field(ge=0)
110
+ web_research_count: int = Field(ge=0)
111
+ unknown_tools_count: int = Field(ge=0)
112
+
113
+ # Token distribution percentages
114
+ user_messages_pct: float = Field(ge=0, le=100)
115
+ agent_responses_pct: float = Field(ge=0, le=100)
116
+ system_prompts_pct: float = Field(ge=0, le=100)
117
+ system_status_pct: float = Field(ge=0, le=100)
118
+ codebase_understanding_pct: float = Field(ge=0, le=100)
119
+ artifact_management_pct: float = Field(ge=0, le=100)
120
+ web_research_pct: float = Field(ge=0, le=100)
121
+ unknown_tools_pct: float = Field(ge=0, le=100)
122
+
123
+ # Compaction info
124
+ compaction_occurred: bool
125
+ messages_before_compaction: int | None = None
126
+ messages_after_compaction: int | None = None
127
+ compaction_reduction_pct: float | None = None
128
+
129
+ @classmethod
130
+ def from_analysis(
131
+ cls,
132
+ analysis: "ContextAnalysis",
133
+ compaction_occurred: bool = False,
134
+ messages_before_compaction: int | None = None,
135
+ ) -> "ContextCompositionTelemetry":
136
+ """Create telemetry from context analysis.
137
+
138
+ Args:
139
+ analysis: The context analysis to convert
140
+ compaction_occurred: Whether message compaction occurred
141
+ messages_before_compaction: Number of messages before compaction
142
+
143
+ Returns:
144
+ ContextCompositionTelemetry instance
145
+ """
146
+ total_messages = analysis.total_messages - analysis.hint_messages.count
147
+ usage_pct = (
148
+ round((analysis.agent_context_tokens / analysis.max_usable_tokens * 100), 1)
149
+ if analysis.max_usable_tokens > 0
150
+ else 0
151
+ )
152
+
153
+ # Calculate compaction metrics
154
+ messages_after: int | None = None
155
+ compaction_reduction_pct: float | None = None
156
+
157
+ if compaction_occurred and messages_before_compaction is not None:
158
+ messages_after = total_messages
159
+ if messages_before_compaction > 0:
160
+ compaction_reduction_pct = round(
161
+ (1 - (total_messages / messages_before_compaction)) * 100, 1
162
+ )
163
+
164
+ return cls(
165
+ # Context usage
166
+ total_messages=total_messages,
167
+ agent_context_tokens=analysis.agent_context_tokens,
168
+ context_window=analysis.context_window,
169
+ max_usable_tokens=analysis.max_usable_tokens,
170
+ free_space_tokens=analysis.free_space_tokens,
171
+ usage_percentage=usage_pct,
172
+ # Message type counts
173
+ user_messages_count=analysis.user_messages.count,
174
+ agent_responses_count=analysis.agent_responses.count,
175
+ system_prompts_count=analysis.system_prompts.count,
176
+ system_status_count=analysis.system_status.count,
177
+ codebase_understanding_count=analysis.codebase_understanding.count,
178
+ artifact_management_count=analysis.artifact_management.count,
179
+ web_research_count=analysis.web_research.count,
180
+ unknown_tools_count=analysis.unknown.count,
181
+ # Token distribution percentages
182
+ user_messages_pct=round(analysis.get_percentage(analysis.user_messages), 1),
183
+ agent_responses_pct=round(
184
+ analysis.get_percentage(analysis.agent_responses), 1
185
+ ),
186
+ system_prompts_pct=round(
187
+ analysis.get_percentage(analysis.system_prompts), 1
188
+ ),
189
+ system_status_pct=round(analysis.get_percentage(analysis.system_status), 1),
190
+ codebase_understanding_pct=round(
191
+ analysis.get_percentage(analysis.codebase_understanding), 1
192
+ ),
193
+ artifact_management_pct=round(
194
+ analysis.get_percentage(analysis.artifact_management), 1
195
+ ),
196
+ web_research_pct=round(analysis.get_percentage(analysis.web_research), 1),
197
+ unknown_tools_pct=round(analysis.get_percentage(analysis.unknown), 1),
198
+ # Compaction info
199
+ compaction_occurred=compaction_occurred,
200
+ messages_before_compaction=messages_before_compaction,
201
+ messages_after_compaction=messages_after,
202
+ compaction_reduction_pct=compaction_reduction_pct,
203
+ )
204
+
205
+
206
+ class ContextAnalysisOutput(BaseModel):
207
+ """Output format for context analysis with multiple representations."""
208
+
209
+ markdown: str = Field(description="Markdown-formatted analysis for display")
210
+ json_data: dict[str, Any] = Field(
211
+ description="JSON representation of analysis data"
212
+ )
@@ -16,6 +16,8 @@ from pydantic_core import to_jsonable_python
16
16
 
17
17
  from shotgun.tui.screens.chat_screen.hint_message import HintMessage
18
18
 
19
+ __all__ = ["HintMessage", "ConversationHistory"]
20
+
19
21
  logger = logging.getLogger(__name__)
20
22
 
21
23
  SerializedMessage = dict[str, Any]
@@ -1,11 +1,15 @@
1
1
  """Manager for handling conversation persistence operations."""
2
2
 
3
+ import asyncio
3
4
  import json
4
- import shutil
5
5
  from pathlib import Path
6
6
 
7
+ import aiofiles
8
+ import aiofiles.os
9
+
7
10
  from shotgun.logging_config import get_logger
8
11
  from shotgun.utils import get_shotgun_home
12
+ from shotgun.utils.file_system_utils import async_copy_file
9
13
 
10
14
  from .conversation_history import ConversationHistory
11
15
 
@@ -27,14 +31,14 @@ class ConversationManager:
27
31
  else:
28
32
  self.conversation_path = conversation_path
29
33
 
30
- def save(self, conversation: ConversationHistory) -> None:
34
+ async def save(self, conversation: ConversationHistory) -> None:
31
35
  """Save conversation history to file.
32
36
 
33
37
  Args:
34
38
  conversation: ConversationHistory to save
35
39
  """
36
40
  # Ensure directory exists
37
- self.conversation_path.parent.mkdir(parents=True, exist_ok=True)
41
+ await aiofiles.os.makedirs(self.conversation_path.parent, exist_ok=True)
38
42
 
39
43
  try:
40
44
  # Update timestamp
@@ -42,11 +46,17 @@ class ConversationManager:
42
46
 
43
47
  conversation.updated_at = datetime.now()
44
48
 
45
- # Serialize to JSON using Pydantic's model_dump
46
- data = conversation.model_dump(mode="json")
49
+ # Serialize to JSON in background thread to avoid blocking event loop
50
+ # This is crucial for large conversations (5k+ tokens)
51
+ data = await asyncio.to_thread(conversation.model_dump, mode="json")
52
+ json_content = await asyncio.to_thread(
53
+ json.dumps, data, indent=2, ensure_ascii=False
54
+ )
47
55
 
48
- with open(self.conversation_path, "w", encoding="utf-8") as f:
49
- json.dump(data, f, indent=2, ensure_ascii=False)
56
+ async with aiofiles.open(
57
+ self.conversation_path, "w", encoding="utf-8"
58
+ ) as f:
59
+ await f.write(json_content)
50
60
 
51
61
  logger.debug("Conversation saved to %s", self.conversation_path)
52
62
 
@@ -56,21 +66,26 @@ class ConversationManager:
56
66
  )
57
67
  # Don't raise - we don't want to interrupt the user's session
58
68
 
59
- def load(self) -> ConversationHistory | None:
69
+ async def load(self) -> ConversationHistory | None:
60
70
  """Load conversation history from file.
61
71
 
62
72
  Returns:
63
73
  ConversationHistory if file exists and is valid, None otherwise
64
74
  """
65
- if not self.conversation_path.exists():
75
+ if not await aiofiles.os.path.exists(self.conversation_path):
66
76
  logger.debug("No conversation history found at %s", self.conversation_path)
67
77
  return None
68
78
 
69
79
  try:
70
- with open(self.conversation_path, encoding="utf-8") as f:
71
- data = json.load(f)
72
-
73
- conversation = ConversationHistory.model_validate(data)
80
+ async with aiofiles.open(self.conversation_path, encoding="utf-8") as f:
81
+ content = await f.read()
82
+ # Deserialize JSON in background thread to avoid blocking
83
+ data = await asyncio.to_thread(json.loads, content)
84
+
85
+ # Validate model in background thread for large conversations
86
+ conversation = await asyncio.to_thread(
87
+ ConversationHistory.model_validate, data
88
+ )
74
89
  logger.debug(
75
90
  "Conversation loaded from %s with %d agent messages",
76
91
  self.conversation_path,
@@ -89,7 +104,7 @@ class ConversationManager:
89
104
  # Create a backup of the corrupted file for debugging
90
105
  backup_path = self.conversation_path.with_suffix(".json.backup")
91
106
  try:
92
- shutil.copy2(self.conversation_path, backup_path)
107
+ await async_copy_file(self.conversation_path, backup_path)
93
108
  logger.info("Backed up corrupted conversation to %s", backup_path)
94
109
  except Exception as backup_error: # pragma: no cover
95
110
  logger.warning("Failed to backup corrupted file: %s", backup_error)
@@ -105,11 +120,12 @@ class ConversationManager:
105
120
  )
106
121
  return None
107
122
 
108
- def clear(self) -> None:
123
+ async def clear(self) -> None:
109
124
  """Delete the conversation history file."""
110
- if self.conversation_path.exists():
125
+ if await aiofiles.os.path.exists(self.conversation_path):
111
126
  try:
112
- self.conversation_path.unlink()
127
+ # Use asyncio.to_thread for unlink operation
128
+ await asyncio.to_thread(self.conversation_path.unlink)
113
129
  logger.debug(
114
130
  "Conversation history cleared at %s", self.conversation_path
115
131
  )
@@ -118,10 +134,10 @@ class ConversationManager:
118
134
  "Failed to clear conversation at %s: %s", self.conversation_path, e
119
135
  )
120
136
 
121
- def exists(self) -> bool:
137
+ async def exists(self) -> bool:
122
138
  """Check if a conversation history file exists.
123
139
 
124
140
  Returns:
125
141
  True if conversation file exists, False otherwise
126
142
  """
127
- return self.conversation_path.exists()
143
+ return await aiofiles.os.path.exists(str(self.conversation_path))
shotgun/agents/export.py CHANGED
@@ -23,7 +23,7 @@ from .models import AgentDeps, AgentResponse, AgentRuntimeOptions, AgentType
23
23
  logger = get_logger(__name__)
24
24
 
25
25
 
26
- def create_export_agent(
26
+ async def create_export_agent(
27
27
  agent_runtime_options: AgentRuntimeOptions, provider: ProviderType | None = None
28
28
  ) -> tuple[Agent[AgentDeps, AgentResponse], AgentDeps]:
29
29
  """Create an export agent with file management capabilities.
@@ -39,7 +39,7 @@ def create_export_agent(
39
39
  # Use partial to create system prompt function for export agent
40
40
  system_prompt_fn = partial(build_agent_system_prompt, "export")
41
41
 
42
- agent, deps = create_base_agent(
42
+ agent, deps = await create_base_agent(
43
43
  system_prompt_fn,
44
44
  agent_runtime_options,
45
45
  provider=provider,
@@ -13,7 +13,7 @@ logger = get_logger(__name__)
13
13
 
14
14
 
15
15
  async def apply_persistent_compaction(
16
- messages: list[ModelMessage], deps: AgentDeps
16
+ messages: list[ModelMessage], deps: AgentDeps, force: bool = False
17
17
  ) -> list[ModelMessage]:
18
18
  """Apply compaction to message history for persistent storage.
19
19
 
@@ -23,6 +23,7 @@ async def apply_persistent_compaction(
23
23
  Args:
24
24
  messages: Full message history from agent run
25
25
  deps: Agent dependencies containing model config
26
+ force: If True, force compaction even if below token threshold
26
27
 
27
28
  Returns:
28
29
  Compacted message history that should be stored as conversation state
@@ -46,7 +47,7 @@ async def apply_persistent_compaction(
46
47
  self.usage = usage
47
48
 
48
49
  ctx = MockContext(deps, usage)
49
- compacted_messages = await token_limit_compactor(ctx, messages)
50
+ compacted_messages = await token_limit_compactor(ctx, messages, force=force)
50
51
 
51
52
  # Log the result for monitoring
52
53
  original_size = len(messages)
@@ -59,17 +60,21 @@ async def apply_persistent_compaction(
59
60
  f"({reduction_pct:.1f}% reduction)"
60
61
  )
61
62
 
62
- # Track persistent compaction event
63
+ # Track persistent compaction event with simple metrics (fast, no token counting)
63
64
  track_event(
64
65
  "persistent_compaction_applied",
65
66
  {
67
+ # Basic compaction metrics
66
68
  "messages_before": original_size,
67
69
  "messages_after": compacted_size,
68
- "tokens_before": estimated_tokens,
69
70
  "reduction_percentage": round(reduction_pct, 2),
70
71
  "agent_mode": deps.agent_mode.value
71
72
  if hasattr(deps, "agent_mode") and deps.agent_mode
72
73
  else "unknown",
74
+ # Model and provider info (no computation needed)
75
+ "model_name": deps.llm_model.name.value,
76
+ "provider": deps.llm_model.provider.value,
77
+ "key_provider": deps.llm_model.key_provider.value,
73
78
  },
74
79
  )
75
80
  else:
@@ -1,7 +1,9 @@
1
1
  """History processors for managing conversation history in Shotgun agents."""
2
2
 
3
+ from collections.abc import Awaitable, Callable
3
4
  from typing import TYPE_CHECKING, Any, Protocol
4
5
 
6
+ from anthropic import APIStatusError
5
7
  from pydantic_ai import ModelSettings
6
8
  from pydantic_ai.messages import (
7
9
  ModelMessage,
@@ -14,6 +16,7 @@ from pydantic_ai.messages import (
14
16
  from shotgun.agents.llm import shotgun_model_request
15
17
  from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
16
18
  from shotgun.agents.models import AgentDeps
19
+ from shotgun.exceptions import ContextSizeLimitExceeded
17
20
  from shotgun.logging_config import get_logger
18
21
  from shotgun.posthog_telemetry import track_event
19
22
  from shotgun.prompts import PromptLoader
@@ -51,6 +54,86 @@ logger = get_logger(__name__)
51
54
  prompt_loader = PromptLoader()
52
55
 
53
56
 
57
+ async def _safe_token_estimation(
58
+ estimation_func: Callable[..., Awaitable[int]],
59
+ model_name: str,
60
+ max_tokens: int,
61
+ *args: Any,
62
+ **kwargs: Any,
63
+ ) -> int:
64
+ """Safely estimate tokens with proper error handling.
65
+
66
+ Wraps token estimation functions to handle failures gracefully.
67
+ Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
68
+ Other errors (network, auth) are allowed to bubble up.
69
+
70
+ Args:
71
+ estimation_func: Async function that estimates tokens
72
+ model_name: Name of the model for error messages
73
+ max_tokens: Maximum tokens for the model
74
+ *args: Arguments to pass to estimation_func
75
+ **kwargs: Keyword arguments to pass to estimation_func
76
+
77
+ Returns:
78
+ Token count from estimation_func
79
+
80
+ Raises:
81
+ ContextSizeLimitExceeded: If token counting fails with RuntimeError
82
+ Exception: Any other exceptions from estimation_func
83
+ """
84
+ try:
85
+ return await estimation_func(*args, **kwargs)
86
+ except Exception as e:
87
+ # Log the error with full context
88
+ logger.warning(
89
+ f"Token counting failed for {model_name}",
90
+ extra={
91
+ "error_type": type(e).__name__,
92
+ "error_message": str(e),
93
+ "model": model_name,
94
+ },
95
+ )
96
+
97
+ # Token counting behavior with oversized context (verified via testing):
98
+ #
99
+ # 1. OpenAI/tiktoken:
100
+ # - Successfully counts any size (tested with 752K tokens, no error)
101
+ # - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
102
+ # - Wrapped as: RuntimeError by our counter
103
+ #
104
+ # 2. Gemini/SentencePiece:
105
+ # - Successfully counts any size (tested with 752K tokens, no error)
106
+ # - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
107
+ # - Wrapped as: RuntimeError by our counter
108
+ #
109
+ # 3. Anthropic API:
110
+ # - Successfully counts large token counts (tested with 752K tokens, no error)
111
+ # - Only enforces 32 MB request size limit (not token count)
112
+ # - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
113
+ # - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
114
+ # - Wrapped as: RuntimeError by our counter
115
+ #
116
+ # IMPORTANT: No provider raises errors for "too many tokens" during counting.
117
+ # Token count validation happens separately by comparing count to max_input_tokens.
118
+ #
119
+ # We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
120
+ # We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
121
+ # context is effectively too large and needs user action to reduce it.
122
+ if isinstance(e, RuntimeError):
123
+ raise ContextSizeLimitExceeded(
124
+ model_name=model_name, max_tokens=max_tokens
125
+ ) from e
126
+
127
+ # Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
128
+ if isinstance(e, APIStatusError) and e.status_code == 413:
129
+ raise ContextSizeLimitExceeded(
130
+ model_name=model_name, max_tokens=max_tokens
131
+ ) from e
132
+
133
+ # Re-raise other exceptions (network errors, auth failures, etc.)
134
+ raise
135
+
136
+
54
137
  def is_summary_part(part: Any) -> bool:
55
138
  """Check if a message part is a compacted summary."""
56
139
  return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
@@ -127,6 +210,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
127
210
  async def token_limit_compactor(
128
211
  ctx: ContextProtocol,
129
212
  messages: list[ModelMessage],
213
+ force: bool = False,
130
214
  ) -> list[ModelMessage]:
131
215
  """Compact message history based on token limits with incremental processing.
132
216
 
@@ -139,6 +223,7 @@ async def token_limit_compactor(
139
223
  Args:
140
224
  ctx: Run context with usage information and dependencies
141
225
  messages: Current conversation history
226
+ force: If True, force compaction even if below token threshold
142
227
 
143
228
  Returns:
144
229
  Compacted list of messages within token limits
@@ -155,9 +240,15 @@ async def token_limit_compactor(
155
240
 
156
241
  if last_summary_index is not None:
157
242
  # Check if post-summary conversation exceeds threshold for incremental compaction
158
- post_summary_tokens = await estimate_post_summary_tokens(
159
- messages, last_summary_index, deps.llm_model
243
+ post_summary_tokens = await _safe_token_estimation(
244
+ estimate_post_summary_tokens,
245
+ deps.llm_model.name,
246
+ model_max_tokens,
247
+ messages,
248
+ last_summary_index,
249
+ deps.llm_model,
160
250
  )
251
+
161
252
  post_summary_percentage = (
162
253
  (post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
163
254
  )
@@ -169,7 +260,7 @@ async def token_limit_compactor(
169
260
  )
170
261
 
171
262
  # Only do incremental compaction if post-summary conversation exceeds threshold
172
- if post_summary_tokens < max_tokens:
263
+ if post_summary_tokens < max_tokens and not force:
173
264
  logger.debug(
174
265
  f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
175
266
  f"keeping all {len(messages)} messages"
@@ -340,6 +431,7 @@ async def token_limit_compactor(
340
431
  else 0
341
432
  )
342
433
 
434
+ # Track incremental compaction with simple metrics (fast, no token counting)
343
435
  track_event(
344
436
  "context_compaction_triggered",
345
437
  {
@@ -352,6 +444,10 @@ async def token_limit_compactor(
352
444
  "agent_mode": deps.agent_mode.value
353
445
  if hasattr(deps, "agent_mode") and deps.agent_mode
354
446
  else "unknown",
447
+ # Model and provider info (no computation needed)
448
+ "model_name": deps.llm_model.name.value,
449
+ "provider": deps.llm_model.provider.value,
450
+ "key_provider": deps.llm_model.key_provider.value,
355
451
  },
356
452
  )
357
453
 
@@ -359,7 +455,14 @@ async def token_limit_compactor(
359
455
 
360
456
  else:
361
457
  # Check if total conversation exceeds threshold for full compaction
362
- total_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
458
+ total_tokens = await _safe_token_estimation(
459
+ estimate_tokens_from_messages,
460
+ deps.llm_model.name,
461
+ model_max_tokens,
462
+ messages,
463
+ deps.llm_model,
464
+ )
465
+
363
466
  total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
364
467
 
365
468
  logger.debug(
@@ -368,7 +471,7 @@ async def token_limit_compactor(
368
471
  )
369
472
 
370
473
  # Only do full compaction if total conversation exceeds threshold
371
- if total_tokens < max_tokens:
474
+ if total_tokens < max_tokens and not force:
372
475
  logger.debug(
373
476
  f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
374
477
  f"keeping all {len(messages)} messages"
@@ -468,6 +571,7 @@ async def _full_compaction(
468
571
  tokens_before = current_tokens # Already calculated above
469
572
  tokens_after = summary_usage.output_tokens if summary_usage else 0
470
573
 
574
+ # Track full compaction with simple metrics (fast, no token counting)
471
575
  track_event(
472
576
  "context_compaction_triggered",
473
577
  {
@@ -480,6 +584,10 @@ async def _full_compaction(
480
584
  "agent_mode": deps.agent_mode.value
481
585
  if hasattr(deps, "agent_mode") and deps.agent_mode
482
586
  else "unknown",
587
+ # Model and provider info (no computation needed)
588
+ "model_name": deps.llm_model.name.value,
589
+ "provider": deps.llm_model.provider.value,
590
+ "key_provider": deps.llm_model.key_provider.value,
483
591
  },
484
592
  )
485
593
 
@@ -72,11 +72,23 @@ class AnthropicTokenCounter(TokenCounter):
72
72
  Raises:
73
73
  RuntimeError: If API call fails
74
74
  """
75
+ # Handle empty text to avoid unnecessary API calls
76
+ # Anthropic API requires non-empty content, so we need a strict check
77
+ if not text or not text.strip():
78
+ return 0
79
+
80
+ # Additional validation: ensure the text has actual content
81
+ # Some edge cases might have only whitespace or control characters
82
+ cleaned_text = text.strip()
83
+ if not cleaned_text:
84
+ return 0
85
+
75
86
  try:
76
87
  # Anthropic API expects messages format and model parameter
77
88
  # Use await with async client
78
89
  result = await self.client.messages.count_tokens(
79
- messages=[{"role": "user", "content": text}], model=self.model_name
90
+ messages=[{"role": "user", "content": cleaned_text}],
91
+ model=self.model_name,
80
92
  )
81
93
  return result.input_tokens
82
94
  except Exception as e:
@@ -107,5 +119,9 @@ class AnthropicTokenCounter(TokenCounter):
107
119
  Raises:
108
120
  RuntimeError: If token counting fails
109
121
  """
122
+ # Handle empty message list early
123
+ if not messages:
124
+ return 0
125
+
110
126
  total_text = extract_text_from_messages(messages)
111
127
  return await self.count_tokens(total_text)
@@ -56,12 +56,23 @@ def extract_text_from_messages(messages: list[ModelMessage]) -> str:
56
56
  if hasattr(message, "parts"):
57
57
  for part in message.parts:
58
58
  if hasattr(part, "content") and isinstance(part.content, str):
59
- text_parts.append(part.content)
59
+ # Only add non-empty content
60
+ if part.content.strip():
61
+ text_parts.append(part.content)
60
62
  else:
61
63
  # Handle non-text parts (tool calls, etc.)
62
- text_parts.append(str(part))
64
+ part_str = str(part)
65
+ if part_str.strip():
66
+ text_parts.append(part_str)
63
67
  else:
64
68
  # Handle messages without parts
65
- text_parts.append(str(message))
69
+ msg_str = str(message)
70
+ if msg_str.strip():
71
+ text_parts.append(msg_str)
72
+
73
+ # If no valid text parts found, return a minimal placeholder
74
+ # This ensures we never send completely empty content to APIs
75
+ if not text_parts:
76
+ return "."
66
77
 
67
78
  return "\n".join(text_parts)