shotgun-sh 0.2.8.dev2__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shotgun/agents/agent_manager.py +354 -46
- shotgun/agents/common.py +14 -8
- shotgun/agents/config/constants.py +0 -6
- shotgun/agents/config/manager.py +66 -35
- shotgun/agents/config/models.py +41 -1
- shotgun/agents/config/provider.py +33 -5
- shotgun/agents/context_analyzer/__init__.py +28 -0
- shotgun/agents/context_analyzer/analyzer.py +471 -0
- shotgun/agents/context_analyzer/constants.py +9 -0
- shotgun/agents/context_analyzer/formatter.py +115 -0
- shotgun/agents/context_analyzer/models.py +212 -0
- shotgun/agents/conversation_history.py +2 -0
- shotgun/agents/conversation_manager.py +35 -19
- shotgun/agents/export.py +2 -2
- shotgun/agents/history/compaction.py +9 -4
- shotgun/agents/history/history_processors.py +113 -5
- shotgun/agents/history/token_counting/anthropic.py +17 -1
- shotgun/agents/history/token_counting/base.py +14 -3
- shotgun/agents/history/token_counting/openai.py +11 -1
- shotgun/agents/history/token_counting/sentencepiece_counter.py +8 -0
- shotgun/agents/history/token_counting/tokenizer_cache.py +3 -1
- shotgun/agents/history/token_counting/utils.py +0 -3
- shotgun/agents/plan.py +2 -2
- shotgun/agents/research.py +3 -3
- shotgun/agents/specify.py +2 -2
- shotgun/agents/tasks.py +2 -2
- shotgun/agents/tools/codebase/codebase_shell.py +6 -0
- shotgun/agents/tools/codebase/directory_lister.py +6 -0
- shotgun/agents/tools/codebase/file_read.py +11 -2
- shotgun/agents/tools/codebase/query_graph.py +6 -0
- shotgun/agents/tools/codebase/retrieve_code.py +6 -0
- shotgun/agents/tools/file_management.py +27 -7
- shotgun/agents/tools/registry.py +217 -0
- shotgun/agents/tools/web_search/__init__.py +8 -8
- shotgun/agents/tools/web_search/anthropic.py +8 -2
- shotgun/agents/tools/web_search/gemini.py +7 -1
- shotgun/agents/tools/web_search/openai.py +7 -1
- shotgun/agents/tools/web_search/utils.py +2 -2
- shotgun/agents/usage_manager.py +16 -11
- shotgun/api_endpoints.py +7 -3
- shotgun/build_constants.py +3 -3
- shotgun/cli/clear.py +53 -0
- shotgun/cli/compact.py +186 -0
- shotgun/cli/config.py +8 -5
- shotgun/cli/context.py +111 -0
- shotgun/cli/export.py +1 -1
- shotgun/cli/feedback.py +4 -2
- shotgun/cli/models.py +1 -0
- shotgun/cli/plan.py +1 -1
- shotgun/cli/research.py +1 -1
- shotgun/cli/specify.py +1 -1
- shotgun/cli/tasks.py +1 -1
- shotgun/cli/update.py +16 -2
- shotgun/codebase/core/change_detector.py +5 -3
- shotgun/codebase/core/code_retrieval.py +4 -2
- shotgun/codebase/core/ingestor.py +10 -8
- shotgun/codebase/core/manager.py +13 -4
- shotgun/codebase/core/nl_query.py +1 -1
- shotgun/exceptions.py +32 -0
- shotgun/logging_config.py +18 -27
- shotgun/main.py +73 -11
- shotgun/posthog_telemetry.py +37 -28
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +3 -2
- shotgun/sentry_telemetry.py +163 -16
- shotgun/settings.py +238 -0
- shotgun/telemetry.py +10 -33
- shotgun/tui/app.py +243 -43
- shotgun/tui/commands/__init__.py +1 -1
- shotgun/tui/components/context_indicator.py +179 -0
- shotgun/tui/components/mode_indicator.py +70 -0
- shotgun/tui/components/status_bar.py +48 -0
- shotgun/tui/containers.py +91 -0
- shotgun/tui/dependencies.py +39 -0
- shotgun/tui/protocols.py +45 -0
- shotgun/tui/screens/chat/__init__.py +5 -0
- shotgun/tui/screens/chat/chat.tcss +54 -0
- shotgun/tui/screens/chat/chat_screen.py +1254 -0
- shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
- shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
- shotgun/tui/screens/chat/help_text.py +40 -0
- shotgun/tui/screens/chat/prompt_history.py +48 -0
- shotgun/tui/screens/chat.tcss +11 -0
- shotgun/tui/screens/chat_screen/command_providers.py +78 -2
- shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
- shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
- shotgun/tui/screens/chat_screen/history/chat_history.py +115 -0
- shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
- shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
- shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
- shotgun/tui/screens/confirmation_dialog.py +151 -0
- shotgun/tui/screens/feedback.py +4 -4
- shotgun/tui/screens/github_issue.py +102 -0
- shotgun/tui/screens/model_picker.py +49 -24
- shotgun/tui/screens/onboarding.py +431 -0
- shotgun/tui/screens/pipx_migration.py +153 -0
- shotgun/tui/screens/provider_config.py +50 -27
- shotgun/tui/screens/shotgun_auth.py +2 -2
- shotgun/tui/screens/welcome.py +14 -11
- shotgun/tui/services/__init__.py +5 -0
- shotgun/tui/services/conversation_service.py +184 -0
- shotgun/tui/state/__init__.py +7 -0
- shotgun/tui/state/processing_state.py +185 -0
- shotgun/tui/utils/mode_progress.py +14 -7
- shotgun/tui/widgets/__init__.py +5 -0
- shotgun/tui/widgets/widget_coordinator.py +263 -0
- shotgun/utils/file_system_utils.py +22 -2
- shotgun/utils/marketing.py +110 -0
- shotgun/utils/update_checker.py +69 -14
- shotgun_sh-0.2.17.dist-info/METADATA +465 -0
- shotgun_sh-0.2.17.dist-info/RECORD +194 -0
- {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/entry_points.txt +1 -0
- {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/licenses/LICENSE +1 -1
- shotgun/tui/screens/chat.py +0 -996
- shotgun/tui/screens/chat_screen/history.py +0 -335
- shotgun_sh-0.2.8.dev2.dist-info/METADATA +0 -126
- shotgun_sh-0.2.8.dev2.dist-info/RECORD +0 -155
- {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.2.17.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Pydantic models for context analysis."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TokenAllocation(BaseModel):
|
|
9
|
+
"""Token counts allocated from API usage data by message/tool type.
|
|
10
|
+
|
|
11
|
+
Used internally by ContextAnalyzer to track token distribution across
|
|
12
|
+
different message types and tool categories.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
user: int = Field(ge=0, default=0, description="Tokens from user prompts")
|
|
16
|
+
agent_responses: int = Field(
|
|
17
|
+
ge=0, default=0, description="Tokens from agent text responses"
|
|
18
|
+
)
|
|
19
|
+
system_prompts: int = Field(
|
|
20
|
+
ge=0, default=0, description="Tokens from system prompts"
|
|
21
|
+
)
|
|
22
|
+
system_status: int = Field(
|
|
23
|
+
ge=0, default=0, description="Tokens from system status messages"
|
|
24
|
+
)
|
|
25
|
+
codebase_understanding: int = Field(
|
|
26
|
+
ge=0, default=0, description="Tokens from codebase understanding tools"
|
|
27
|
+
)
|
|
28
|
+
artifact_management: int = Field(
|
|
29
|
+
ge=0, default=0, description="Tokens from artifact management tools"
|
|
30
|
+
)
|
|
31
|
+
web_research: int = Field(
|
|
32
|
+
ge=0, default=0, description="Tokens from web research tools"
|
|
33
|
+
)
|
|
34
|
+
unknown: int = Field(ge=0, default=0, description="Tokens from uncategorized tools")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class MessageTypeStats(BaseModel):
|
|
38
|
+
"""Statistics for a specific message type."""
|
|
39
|
+
|
|
40
|
+
count: int = Field(ge=0, description="Number of messages of this type")
|
|
41
|
+
tokens: int = Field(ge=0, description="Total tokens consumed by this type")
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def avg_tokens(self) -> float:
|
|
45
|
+
"""Calculate average tokens per message."""
|
|
46
|
+
return self.tokens / self.count if self.count > 0 else 0.0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ContextAnalysis(BaseModel):
|
|
50
|
+
"""Complete analysis of conversation context composition."""
|
|
51
|
+
|
|
52
|
+
user_messages: MessageTypeStats
|
|
53
|
+
agent_responses: MessageTypeStats
|
|
54
|
+
system_prompts: MessageTypeStats
|
|
55
|
+
system_status: MessageTypeStats
|
|
56
|
+
codebase_understanding: MessageTypeStats
|
|
57
|
+
artifact_management: MessageTypeStats
|
|
58
|
+
web_research: MessageTypeStats
|
|
59
|
+
unknown: MessageTypeStats
|
|
60
|
+
hint_messages: MessageTypeStats
|
|
61
|
+
total_tokens: int = Field(ge=0, description="Total tokens including hints")
|
|
62
|
+
total_messages: int = Field(ge=0, description="Total message count including hints")
|
|
63
|
+
context_window: int = Field(ge=0, description="Model's maximum input tokens")
|
|
64
|
+
agent_context_tokens: int = Field(
|
|
65
|
+
ge=0,
|
|
66
|
+
description="Tokens that actually consume agent context (excluding UI-only)",
|
|
67
|
+
)
|
|
68
|
+
model_name: str = Field(description="Name of the model being used")
|
|
69
|
+
max_usable_tokens: int = Field(
|
|
70
|
+
ge=0, description="80% of max_input_tokens (usable limit)"
|
|
71
|
+
)
|
|
72
|
+
free_space_tokens: int = Field(
|
|
73
|
+
description="Remaining tokens available (negative if over capacity)"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def get_percentage(self, stats: MessageTypeStats) -> float:
|
|
77
|
+
"""Calculate percentage of agent context tokens for a message type.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
stats: Message type statistics to calculate percentage for
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Percentage of total agent context tokens (0-100)
|
|
84
|
+
"""
|
|
85
|
+
return (
|
|
86
|
+
(stats.tokens / self.agent_context_tokens * 100)
|
|
87
|
+
if self.agent_context_tokens > 0
|
|
88
|
+
else 0.0
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ContextCompositionTelemetry(BaseModel):
|
|
93
|
+
"""Telemetry data for context composition tracking to PostHog."""
|
|
94
|
+
|
|
95
|
+
# Context usage
|
|
96
|
+
total_messages: int = Field(ge=0)
|
|
97
|
+
agent_context_tokens: int = Field(ge=0)
|
|
98
|
+
context_window: int = Field(ge=0)
|
|
99
|
+
max_usable_tokens: int = Field(ge=0)
|
|
100
|
+
free_space_tokens: int = Field(ge=0)
|
|
101
|
+
usage_percentage: float = Field(ge=0, le=100)
|
|
102
|
+
|
|
103
|
+
# Message type counts
|
|
104
|
+
user_messages_count: int = Field(ge=0)
|
|
105
|
+
agent_responses_count: int = Field(ge=0)
|
|
106
|
+
system_prompts_count: int = Field(ge=0)
|
|
107
|
+
system_status_count: int = Field(ge=0)
|
|
108
|
+
codebase_understanding_count: int = Field(ge=0)
|
|
109
|
+
artifact_management_count: int = Field(ge=0)
|
|
110
|
+
web_research_count: int = Field(ge=0)
|
|
111
|
+
unknown_tools_count: int = Field(ge=0)
|
|
112
|
+
|
|
113
|
+
# Token distribution percentages
|
|
114
|
+
user_messages_pct: float = Field(ge=0, le=100)
|
|
115
|
+
agent_responses_pct: float = Field(ge=0, le=100)
|
|
116
|
+
system_prompts_pct: float = Field(ge=0, le=100)
|
|
117
|
+
system_status_pct: float = Field(ge=0, le=100)
|
|
118
|
+
codebase_understanding_pct: float = Field(ge=0, le=100)
|
|
119
|
+
artifact_management_pct: float = Field(ge=0, le=100)
|
|
120
|
+
web_research_pct: float = Field(ge=0, le=100)
|
|
121
|
+
unknown_tools_pct: float = Field(ge=0, le=100)
|
|
122
|
+
|
|
123
|
+
# Compaction info
|
|
124
|
+
compaction_occurred: bool
|
|
125
|
+
messages_before_compaction: int | None = None
|
|
126
|
+
messages_after_compaction: int | None = None
|
|
127
|
+
compaction_reduction_pct: float | None = None
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def from_analysis(
|
|
131
|
+
cls,
|
|
132
|
+
analysis: "ContextAnalysis",
|
|
133
|
+
compaction_occurred: bool = False,
|
|
134
|
+
messages_before_compaction: int | None = None,
|
|
135
|
+
) -> "ContextCompositionTelemetry":
|
|
136
|
+
"""Create telemetry from context analysis.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
analysis: The context analysis to convert
|
|
140
|
+
compaction_occurred: Whether message compaction occurred
|
|
141
|
+
messages_before_compaction: Number of messages before compaction
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
ContextCompositionTelemetry instance
|
|
145
|
+
"""
|
|
146
|
+
total_messages = analysis.total_messages - analysis.hint_messages.count
|
|
147
|
+
usage_pct = (
|
|
148
|
+
round((analysis.agent_context_tokens / analysis.max_usable_tokens * 100), 1)
|
|
149
|
+
if analysis.max_usable_tokens > 0
|
|
150
|
+
else 0
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Calculate compaction metrics
|
|
154
|
+
messages_after: int | None = None
|
|
155
|
+
compaction_reduction_pct: float | None = None
|
|
156
|
+
|
|
157
|
+
if compaction_occurred and messages_before_compaction is not None:
|
|
158
|
+
messages_after = total_messages
|
|
159
|
+
if messages_before_compaction > 0:
|
|
160
|
+
compaction_reduction_pct = round(
|
|
161
|
+
(1 - (total_messages / messages_before_compaction)) * 100, 1
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return cls(
|
|
165
|
+
# Context usage
|
|
166
|
+
total_messages=total_messages,
|
|
167
|
+
agent_context_tokens=analysis.agent_context_tokens,
|
|
168
|
+
context_window=analysis.context_window,
|
|
169
|
+
max_usable_tokens=analysis.max_usable_tokens,
|
|
170
|
+
free_space_tokens=analysis.free_space_tokens,
|
|
171
|
+
usage_percentage=usage_pct,
|
|
172
|
+
# Message type counts
|
|
173
|
+
user_messages_count=analysis.user_messages.count,
|
|
174
|
+
agent_responses_count=analysis.agent_responses.count,
|
|
175
|
+
system_prompts_count=analysis.system_prompts.count,
|
|
176
|
+
system_status_count=analysis.system_status.count,
|
|
177
|
+
codebase_understanding_count=analysis.codebase_understanding.count,
|
|
178
|
+
artifact_management_count=analysis.artifact_management.count,
|
|
179
|
+
web_research_count=analysis.web_research.count,
|
|
180
|
+
unknown_tools_count=analysis.unknown.count,
|
|
181
|
+
# Token distribution percentages
|
|
182
|
+
user_messages_pct=round(analysis.get_percentage(analysis.user_messages), 1),
|
|
183
|
+
agent_responses_pct=round(
|
|
184
|
+
analysis.get_percentage(analysis.agent_responses), 1
|
|
185
|
+
),
|
|
186
|
+
system_prompts_pct=round(
|
|
187
|
+
analysis.get_percentage(analysis.system_prompts), 1
|
|
188
|
+
),
|
|
189
|
+
system_status_pct=round(analysis.get_percentage(analysis.system_status), 1),
|
|
190
|
+
codebase_understanding_pct=round(
|
|
191
|
+
analysis.get_percentage(analysis.codebase_understanding), 1
|
|
192
|
+
),
|
|
193
|
+
artifact_management_pct=round(
|
|
194
|
+
analysis.get_percentage(analysis.artifact_management), 1
|
|
195
|
+
),
|
|
196
|
+
web_research_pct=round(analysis.get_percentage(analysis.web_research), 1),
|
|
197
|
+
unknown_tools_pct=round(analysis.get_percentage(analysis.unknown), 1),
|
|
198
|
+
# Compaction info
|
|
199
|
+
compaction_occurred=compaction_occurred,
|
|
200
|
+
messages_before_compaction=messages_before_compaction,
|
|
201
|
+
messages_after_compaction=messages_after,
|
|
202
|
+
compaction_reduction_pct=compaction_reduction_pct,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class ContextAnalysisOutput(BaseModel):
|
|
207
|
+
"""Output format for context analysis with multiple representations."""
|
|
208
|
+
|
|
209
|
+
markdown: str = Field(description="Markdown-formatted analysis for display")
|
|
210
|
+
json_data: dict[str, Any] = Field(
|
|
211
|
+
description="JSON representation of analysis data"
|
|
212
|
+
)
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
"""Manager for handling conversation persistence operations."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import json
|
|
4
|
-
import shutil
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
+
import aiofiles
|
|
8
|
+
import aiofiles.os
|
|
9
|
+
|
|
7
10
|
from shotgun.logging_config import get_logger
|
|
8
11
|
from shotgun.utils import get_shotgun_home
|
|
12
|
+
from shotgun.utils.file_system_utils import async_copy_file
|
|
9
13
|
|
|
10
14
|
from .conversation_history import ConversationHistory
|
|
11
15
|
|
|
@@ -27,14 +31,14 @@ class ConversationManager:
|
|
|
27
31
|
else:
|
|
28
32
|
self.conversation_path = conversation_path
|
|
29
33
|
|
|
30
|
-
def save(self, conversation: ConversationHistory) -> None:
|
|
34
|
+
async def save(self, conversation: ConversationHistory) -> None:
|
|
31
35
|
"""Save conversation history to file.
|
|
32
36
|
|
|
33
37
|
Args:
|
|
34
38
|
conversation: ConversationHistory to save
|
|
35
39
|
"""
|
|
36
40
|
# Ensure directory exists
|
|
37
|
-
self.conversation_path.parent
|
|
41
|
+
await aiofiles.os.makedirs(self.conversation_path.parent, exist_ok=True)
|
|
38
42
|
|
|
39
43
|
try:
|
|
40
44
|
# Update timestamp
|
|
@@ -42,11 +46,17 @@ class ConversationManager:
|
|
|
42
46
|
|
|
43
47
|
conversation.updated_at = datetime.now()
|
|
44
48
|
|
|
45
|
-
# Serialize to JSON
|
|
46
|
-
|
|
49
|
+
# Serialize to JSON in background thread to avoid blocking event loop
|
|
50
|
+
# This is crucial for large conversations (5k+ tokens)
|
|
51
|
+
data = await asyncio.to_thread(conversation.model_dump, mode="json")
|
|
52
|
+
json_content = await asyncio.to_thread(
|
|
53
|
+
json.dumps, data, indent=2, ensure_ascii=False
|
|
54
|
+
)
|
|
47
55
|
|
|
48
|
-
with open(
|
|
49
|
-
|
|
56
|
+
async with aiofiles.open(
|
|
57
|
+
self.conversation_path, "w", encoding="utf-8"
|
|
58
|
+
) as f:
|
|
59
|
+
await f.write(json_content)
|
|
50
60
|
|
|
51
61
|
logger.debug("Conversation saved to %s", self.conversation_path)
|
|
52
62
|
|
|
@@ -56,21 +66,26 @@ class ConversationManager:
|
|
|
56
66
|
)
|
|
57
67
|
# Don't raise - we don't want to interrupt the user's session
|
|
58
68
|
|
|
59
|
-
def load(self) -> ConversationHistory | None:
|
|
69
|
+
async def load(self) -> ConversationHistory | None:
|
|
60
70
|
"""Load conversation history from file.
|
|
61
71
|
|
|
62
72
|
Returns:
|
|
63
73
|
ConversationHistory if file exists and is valid, None otherwise
|
|
64
74
|
"""
|
|
65
|
-
if not
|
|
75
|
+
if not await aiofiles.os.path.exists(self.conversation_path):
|
|
66
76
|
logger.debug("No conversation history found at %s", self.conversation_path)
|
|
67
77
|
return None
|
|
68
78
|
|
|
69
79
|
try:
|
|
70
|
-
with open(self.conversation_path, encoding="utf-8") as f:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
80
|
+
async with aiofiles.open(self.conversation_path, encoding="utf-8") as f:
|
|
81
|
+
content = await f.read()
|
|
82
|
+
# Deserialize JSON in background thread to avoid blocking
|
|
83
|
+
data = await asyncio.to_thread(json.loads, content)
|
|
84
|
+
|
|
85
|
+
# Validate model in background thread for large conversations
|
|
86
|
+
conversation = await asyncio.to_thread(
|
|
87
|
+
ConversationHistory.model_validate, data
|
|
88
|
+
)
|
|
74
89
|
logger.debug(
|
|
75
90
|
"Conversation loaded from %s with %d agent messages",
|
|
76
91
|
self.conversation_path,
|
|
@@ -89,7 +104,7 @@ class ConversationManager:
|
|
|
89
104
|
# Create a backup of the corrupted file for debugging
|
|
90
105
|
backup_path = self.conversation_path.with_suffix(".json.backup")
|
|
91
106
|
try:
|
|
92
|
-
|
|
107
|
+
await async_copy_file(self.conversation_path, backup_path)
|
|
93
108
|
logger.info("Backed up corrupted conversation to %s", backup_path)
|
|
94
109
|
except Exception as backup_error: # pragma: no cover
|
|
95
110
|
logger.warning("Failed to backup corrupted file: %s", backup_error)
|
|
@@ -105,11 +120,12 @@ class ConversationManager:
|
|
|
105
120
|
)
|
|
106
121
|
return None
|
|
107
122
|
|
|
108
|
-
def clear(self) -> None:
|
|
123
|
+
async def clear(self) -> None:
|
|
109
124
|
"""Delete the conversation history file."""
|
|
110
|
-
if
|
|
125
|
+
if await aiofiles.os.path.exists(self.conversation_path):
|
|
111
126
|
try:
|
|
112
|
-
|
|
127
|
+
# Use asyncio.to_thread for unlink operation
|
|
128
|
+
await asyncio.to_thread(self.conversation_path.unlink)
|
|
113
129
|
logger.debug(
|
|
114
130
|
"Conversation history cleared at %s", self.conversation_path
|
|
115
131
|
)
|
|
@@ -118,10 +134,10 @@ class ConversationManager:
|
|
|
118
134
|
"Failed to clear conversation at %s: %s", self.conversation_path, e
|
|
119
135
|
)
|
|
120
136
|
|
|
121
|
-
def exists(self) -> bool:
|
|
137
|
+
async def exists(self) -> bool:
|
|
122
138
|
"""Check if a conversation history file exists.
|
|
123
139
|
|
|
124
140
|
Returns:
|
|
125
141
|
True if conversation file exists, False otherwise
|
|
126
142
|
"""
|
|
127
|
-
return
|
|
143
|
+
return await aiofiles.os.path.exists(str(self.conversation_path))
|
shotgun/agents/export.py
CHANGED
|
@@ -23,7 +23,7 @@ from .models import AgentDeps, AgentResponse, AgentRuntimeOptions, AgentType
|
|
|
23
23
|
logger = get_logger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def create_export_agent(
|
|
26
|
+
async def create_export_agent(
|
|
27
27
|
agent_runtime_options: AgentRuntimeOptions, provider: ProviderType | None = None
|
|
28
28
|
) -> tuple[Agent[AgentDeps, AgentResponse], AgentDeps]:
|
|
29
29
|
"""Create an export agent with file management capabilities.
|
|
@@ -39,7 +39,7 @@ def create_export_agent(
|
|
|
39
39
|
# Use partial to create system prompt function for export agent
|
|
40
40
|
system_prompt_fn = partial(build_agent_system_prompt, "export")
|
|
41
41
|
|
|
42
|
-
agent, deps = create_base_agent(
|
|
42
|
+
agent, deps = await create_base_agent(
|
|
43
43
|
system_prompt_fn,
|
|
44
44
|
agent_runtime_options,
|
|
45
45
|
provider=provider,
|
|
@@ -13,7 +13,7 @@ logger = get_logger(__name__)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
async def apply_persistent_compaction(
|
|
16
|
-
messages: list[ModelMessage], deps: AgentDeps
|
|
16
|
+
messages: list[ModelMessage], deps: AgentDeps, force: bool = False
|
|
17
17
|
) -> list[ModelMessage]:
|
|
18
18
|
"""Apply compaction to message history for persistent storage.
|
|
19
19
|
|
|
@@ -23,6 +23,7 @@ async def apply_persistent_compaction(
|
|
|
23
23
|
Args:
|
|
24
24
|
messages: Full message history from agent run
|
|
25
25
|
deps: Agent dependencies containing model config
|
|
26
|
+
force: If True, force compaction even if below token threshold
|
|
26
27
|
|
|
27
28
|
Returns:
|
|
28
29
|
Compacted message history that should be stored as conversation state
|
|
@@ -46,7 +47,7 @@ async def apply_persistent_compaction(
|
|
|
46
47
|
self.usage = usage
|
|
47
48
|
|
|
48
49
|
ctx = MockContext(deps, usage)
|
|
49
|
-
compacted_messages = await token_limit_compactor(ctx, messages)
|
|
50
|
+
compacted_messages = await token_limit_compactor(ctx, messages, force=force)
|
|
50
51
|
|
|
51
52
|
# Log the result for monitoring
|
|
52
53
|
original_size = len(messages)
|
|
@@ -59,17 +60,21 @@ async def apply_persistent_compaction(
|
|
|
59
60
|
f"({reduction_pct:.1f}% reduction)"
|
|
60
61
|
)
|
|
61
62
|
|
|
62
|
-
# Track persistent compaction event
|
|
63
|
+
# Track persistent compaction event with simple metrics (fast, no token counting)
|
|
63
64
|
track_event(
|
|
64
65
|
"persistent_compaction_applied",
|
|
65
66
|
{
|
|
67
|
+
# Basic compaction metrics
|
|
66
68
|
"messages_before": original_size,
|
|
67
69
|
"messages_after": compacted_size,
|
|
68
|
-
"tokens_before": estimated_tokens,
|
|
69
70
|
"reduction_percentage": round(reduction_pct, 2),
|
|
70
71
|
"agent_mode": deps.agent_mode.value
|
|
71
72
|
if hasattr(deps, "agent_mode") and deps.agent_mode
|
|
72
73
|
else "unknown",
|
|
74
|
+
# Model and provider info (no computation needed)
|
|
75
|
+
"model_name": deps.llm_model.name.value,
|
|
76
|
+
"provider": deps.llm_model.provider.value,
|
|
77
|
+
"key_provider": deps.llm_model.key_provider.value,
|
|
73
78
|
},
|
|
74
79
|
)
|
|
75
80
|
else:
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""History processors for managing conversation history in Shotgun agents."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Awaitable, Callable
|
|
3
4
|
from typing import TYPE_CHECKING, Any, Protocol
|
|
4
5
|
|
|
6
|
+
from anthropic import APIStatusError
|
|
5
7
|
from pydantic_ai import ModelSettings
|
|
6
8
|
from pydantic_ai.messages import (
|
|
7
9
|
ModelMessage,
|
|
@@ -14,6 +16,7 @@ from pydantic_ai.messages import (
|
|
|
14
16
|
from shotgun.agents.llm import shotgun_model_request
|
|
15
17
|
from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
|
|
16
18
|
from shotgun.agents.models import AgentDeps
|
|
19
|
+
from shotgun.exceptions import ContextSizeLimitExceeded
|
|
17
20
|
from shotgun.logging_config import get_logger
|
|
18
21
|
from shotgun.posthog_telemetry import track_event
|
|
19
22
|
from shotgun.prompts import PromptLoader
|
|
@@ -51,6 +54,86 @@ logger = get_logger(__name__)
|
|
|
51
54
|
prompt_loader = PromptLoader()
|
|
52
55
|
|
|
53
56
|
|
|
57
|
+
async def _safe_token_estimation(
|
|
58
|
+
estimation_func: Callable[..., Awaitable[int]],
|
|
59
|
+
model_name: str,
|
|
60
|
+
max_tokens: int,
|
|
61
|
+
*args: Any,
|
|
62
|
+
**kwargs: Any,
|
|
63
|
+
) -> int:
|
|
64
|
+
"""Safely estimate tokens with proper error handling.
|
|
65
|
+
|
|
66
|
+
Wraps token estimation functions to handle failures gracefully.
|
|
67
|
+
Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
|
|
68
|
+
Other errors (network, auth) are allowed to bubble up.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
estimation_func: Async function that estimates tokens
|
|
72
|
+
model_name: Name of the model for error messages
|
|
73
|
+
max_tokens: Maximum tokens for the model
|
|
74
|
+
*args: Arguments to pass to estimation_func
|
|
75
|
+
**kwargs: Keyword arguments to pass to estimation_func
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Token count from estimation_func
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ContextSizeLimitExceeded: If token counting fails with RuntimeError
|
|
82
|
+
Exception: Any other exceptions from estimation_func
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
return await estimation_func(*args, **kwargs)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
# Log the error with full context
|
|
88
|
+
logger.warning(
|
|
89
|
+
f"Token counting failed for {model_name}",
|
|
90
|
+
extra={
|
|
91
|
+
"error_type": type(e).__name__,
|
|
92
|
+
"error_message": str(e),
|
|
93
|
+
"model": model_name,
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Token counting behavior with oversized context (verified via testing):
|
|
98
|
+
#
|
|
99
|
+
# 1. OpenAI/tiktoken:
|
|
100
|
+
# - Successfully counts any size (tested with 752K tokens, no error)
|
|
101
|
+
# - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
|
|
102
|
+
# - Wrapped as: RuntimeError by our counter
|
|
103
|
+
#
|
|
104
|
+
# 2. Gemini/SentencePiece:
|
|
105
|
+
# - Successfully counts any size (tested with 752K tokens, no error)
|
|
106
|
+
# - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
|
|
107
|
+
# - Wrapped as: RuntimeError by our counter
|
|
108
|
+
#
|
|
109
|
+
# 3. Anthropic API:
|
|
110
|
+
# - Successfully counts large token counts (tested with 752K tokens, no error)
|
|
111
|
+
# - Only enforces 32 MB request size limit (not token count)
|
|
112
|
+
# - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
|
|
113
|
+
# - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
|
|
114
|
+
# - Wrapped as: RuntimeError by our counter
|
|
115
|
+
#
|
|
116
|
+
# IMPORTANT: No provider raises errors for "too many tokens" during counting.
|
|
117
|
+
# Token count validation happens separately by comparing count to max_input_tokens.
|
|
118
|
+
#
|
|
119
|
+
# We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
|
|
120
|
+
# We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
|
|
121
|
+
# context is effectively too large and needs user action to reduce it.
|
|
122
|
+
if isinstance(e, RuntimeError):
|
|
123
|
+
raise ContextSizeLimitExceeded(
|
|
124
|
+
model_name=model_name, max_tokens=max_tokens
|
|
125
|
+
) from e
|
|
126
|
+
|
|
127
|
+
# Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
|
|
128
|
+
if isinstance(e, APIStatusError) and e.status_code == 413:
|
|
129
|
+
raise ContextSizeLimitExceeded(
|
|
130
|
+
model_name=model_name, max_tokens=max_tokens
|
|
131
|
+
) from e
|
|
132
|
+
|
|
133
|
+
# Re-raise other exceptions (network errors, auth failures, etc.)
|
|
134
|
+
raise
|
|
135
|
+
|
|
136
|
+
|
|
54
137
|
def is_summary_part(part: Any) -> bool:
|
|
55
138
|
"""Check if a message part is a compacted summary."""
|
|
56
139
|
return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
|
|
@@ -127,6 +210,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
|
|
|
127
210
|
async def token_limit_compactor(
|
|
128
211
|
ctx: ContextProtocol,
|
|
129
212
|
messages: list[ModelMessage],
|
|
213
|
+
force: bool = False,
|
|
130
214
|
) -> list[ModelMessage]:
|
|
131
215
|
"""Compact message history based on token limits with incremental processing.
|
|
132
216
|
|
|
@@ -139,6 +223,7 @@ async def token_limit_compactor(
|
|
|
139
223
|
Args:
|
|
140
224
|
ctx: Run context with usage information and dependencies
|
|
141
225
|
messages: Current conversation history
|
|
226
|
+
force: If True, force compaction even if below token threshold
|
|
142
227
|
|
|
143
228
|
Returns:
|
|
144
229
|
Compacted list of messages within token limits
|
|
@@ -155,9 +240,15 @@ async def token_limit_compactor(
|
|
|
155
240
|
|
|
156
241
|
if last_summary_index is not None:
|
|
157
242
|
# Check if post-summary conversation exceeds threshold for incremental compaction
|
|
158
|
-
post_summary_tokens = await
|
|
159
|
-
|
|
243
|
+
post_summary_tokens = await _safe_token_estimation(
|
|
244
|
+
estimate_post_summary_tokens,
|
|
245
|
+
deps.llm_model.name,
|
|
246
|
+
model_max_tokens,
|
|
247
|
+
messages,
|
|
248
|
+
last_summary_index,
|
|
249
|
+
deps.llm_model,
|
|
160
250
|
)
|
|
251
|
+
|
|
161
252
|
post_summary_percentage = (
|
|
162
253
|
(post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
163
254
|
)
|
|
@@ -169,7 +260,7 @@ async def token_limit_compactor(
|
|
|
169
260
|
)
|
|
170
261
|
|
|
171
262
|
# Only do incremental compaction if post-summary conversation exceeds threshold
|
|
172
|
-
if post_summary_tokens < max_tokens:
|
|
263
|
+
if post_summary_tokens < max_tokens and not force:
|
|
173
264
|
logger.debug(
|
|
174
265
|
f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
|
|
175
266
|
f"keeping all {len(messages)} messages"
|
|
@@ -340,6 +431,7 @@ async def token_limit_compactor(
|
|
|
340
431
|
else 0
|
|
341
432
|
)
|
|
342
433
|
|
|
434
|
+
# Track incremental compaction with simple metrics (fast, no token counting)
|
|
343
435
|
track_event(
|
|
344
436
|
"context_compaction_triggered",
|
|
345
437
|
{
|
|
@@ -352,6 +444,10 @@ async def token_limit_compactor(
|
|
|
352
444
|
"agent_mode": deps.agent_mode.value
|
|
353
445
|
if hasattr(deps, "agent_mode") and deps.agent_mode
|
|
354
446
|
else "unknown",
|
|
447
|
+
# Model and provider info (no computation needed)
|
|
448
|
+
"model_name": deps.llm_model.name.value,
|
|
449
|
+
"provider": deps.llm_model.provider.value,
|
|
450
|
+
"key_provider": deps.llm_model.key_provider.value,
|
|
355
451
|
},
|
|
356
452
|
)
|
|
357
453
|
|
|
@@ -359,7 +455,14 @@ async def token_limit_compactor(
|
|
|
359
455
|
|
|
360
456
|
else:
|
|
361
457
|
# Check if total conversation exceeds threshold for full compaction
|
|
362
|
-
total_tokens = await
|
|
458
|
+
total_tokens = await _safe_token_estimation(
|
|
459
|
+
estimate_tokens_from_messages,
|
|
460
|
+
deps.llm_model.name,
|
|
461
|
+
model_max_tokens,
|
|
462
|
+
messages,
|
|
463
|
+
deps.llm_model,
|
|
464
|
+
)
|
|
465
|
+
|
|
363
466
|
total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
364
467
|
|
|
365
468
|
logger.debug(
|
|
@@ -368,7 +471,7 @@ async def token_limit_compactor(
|
|
|
368
471
|
)
|
|
369
472
|
|
|
370
473
|
# Only do full compaction if total conversation exceeds threshold
|
|
371
|
-
if total_tokens < max_tokens:
|
|
474
|
+
if total_tokens < max_tokens and not force:
|
|
372
475
|
logger.debug(
|
|
373
476
|
f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
|
|
374
477
|
f"keeping all {len(messages)} messages"
|
|
@@ -468,6 +571,7 @@ async def _full_compaction(
|
|
|
468
571
|
tokens_before = current_tokens # Already calculated above
|
|
469
572
|
tokens_after = summary_usage.output_tokens if summary_usage else 0
|
|
470
573
|
|
|
574
|
+
# Track full compaction with simple metrics (fast, no token counting)
|
|
471
575
|
track_event(
|
|
472
576
|
"context_compaction_triggered",
|
|
473
577
|
{
|
|
@@ -480,6 +584,10 @@ async def _full_compaction(
|
|
|
480
584
|
"agent_mode": deps.agent_mode.value
|
|
481
585
|
if hasattr(deps, "agent_mode") and deps.agent_mode
|
|
482
586
|
else "unknown",
|
|
587
|
+
# Model and provider info (no computation needed)
|
|
588
|
+
"model_name": deps.llm_model.name.value,
|
|
589
|
+
"provider": deps.llm_model.provider.value,
|
|
590
|
+
"key_provider": deps.llm_model.key_provider.value,
|
|
483
591
|
},
|
|
484
592
|
)
|
|
485
593
|
|
|
@@ -72,11 +72,23 @@ class AnthropicTokenCounter(TokenCounter):
|
|
|
72
72
|
Raises:
|
|
73
73
|
RuntimeError: If API call fails
|
|
74
74
|
"""
|
|
75
|
+
# Handle empty text to avoid unnecessary API calls
|
|
76
|
+
# Anthropic API requires non-empty content, so we need a strict check
|
|
77
|
+
if not text or not text.strip():
|
|
78
|
+
return 0
|
|
79
|
+
|
|
80
|
+
# Additional validation: ensure the text has actual content
|
|
81
|
+
# Some edge cases might have only whitespace or control characters
|
|
82
|
+
cleaned_text = text.strip()
|
|
83
|
+
if not cleaned_text:
|
|
84
|
+
return 0
|
|
85
|
+
|
|
75
86
|
try:
|
|
76
87
|
# Anthropic API expects messages format and model parameter
|
|
77
88
|
# Use await with async client
|
|
78
89
|
result = await self.client.messages.count_tokens(
|
|
79
|
-
messages=[{"role": "user", "content":
|
|
90
|
+
messages=[{"role": "user", "content": cleaned_text}],
|
|
91
|
+
model=self.model_name,
|
|
80
92
|
)
|
|
81
93
|
return result.input_tokens
|
|
82
94
|
except Exception as e:
|
|
@@ -107,5 +119,9 @@ class AnthropicTokenCounter(TokenCounter):
|
|
|
107
119
|
Raises:
|
|
108
120
|
RuntimeError: If token counting fails
|
|
109
121
|
"""
|
|
122
|
+
# Handle empty message list early
|
|
123
|
+
if not messages:
|
|
124
|
+
return 0
|
|
125
|
+
|
|
110
126
|
total_text = extract_text_from_messages(messages)
|
|
111
127
|
return await self.count_tokens(total_text)
|
|
@@ -56,12 +56,23 @@ def extract_text_from_messages(messages: list[ModelMessage]) -> str:
|
|
|
56
56
|
if hasattr(message, "parts"):
|
|
57
57
|
for part in message.parts:
|
|
58
58
|
if hasattr(part, "content") and isinstance(part.content, str):
|
|
59
|
-
|
|
59
|
+
# Only add non-empty content
|
|
60
|
+
if part.content.strip():
|
|
61
|
+
text_parts.append(part.content)
|
|
60
62
|
else:
|
|
61
63
|
# Handle non-text parts (tool calls, etc.)
|
|
62
|
-
|
|
64
|
+
part_str = str(part)
|
|
65
|
+
if part_str.strip():
|
|
66
|
+
text_parts.append(part_str)
|
|
63
67
|
else:
|
|
64
68
|
# Handle messages without parts
|
|
65
|
-
|
|
69
|
+
msg_str = str(message)
|
|
70
|
+
if msg_str.strip():
|
|
71
|
+
text_parts.append(msg_str)
|
|
72
|
+
|
|
73
|
+
# If no valid text parts found, return a minimal placeholder
|
|
74
|
+
# This ensures we never send completely empty content to APIs
|
|
75
|
+
if not text_parts:
|
|
76
|
+
return "."
|
|
66
77
|
|
|
67
78
|
return "\n".join(text_parts)
|