shotgun-sh 0.2.6.dev1__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shotgun/agents/agent_manager.py +694 -73
- shotgun/agents/common.py +69 -70
- shotgun/agents/config/constants.py +0 -6
- shotgun/agents/config/manager.py +70 -35
- shotgun/agents/config/models.py +41 -1
- shotgun/agents/config/provider.py +33 -5
- shotgun/agents/context_analyzer/__init__.py +28 -0
- shotgun/agents/context_analyzer/analyzer.py +471 -0
- shotgun/agents/context_analyzer/constants.py +9 -0
- shotgun/agents/context_analyzer/formatter.py +115 -0
- shotgun/agents/context_analyzer/models.py +212 -0
- shotgun/agents/conversation_history.py +125 -2
- shotgun/agents/conversation_manager.py +57 -19
- shotgun/agents/export.py +6 -7
- shotgun/agents/history/compaction.py +9 -4
- shotgun/agents/history/context_extraction.py +93 -6
- shotgun/agents/history/history_processors.py +113 -5
- shotgun/agents/history/token_counting/anthropic.py +39 -3
- shotgun/agents/history/token_counting/base.py +14 -3
- shotgun/agents/history/token_counting/openai.py +11 -1
- shotgun/agents/history/token_counting/sentencepiece_counter.py +8 -0
- shotgun/agents/history/token_counting/tokenizer_cache.py +3 -1
- shotgun/agents/history/token_counting/utils.py +0 -3
- shotgun/agents/models.py +50 -2
- shotgun/agents/plan.py +6 -7
- shotgun/agents/research.py +7 -8
- shotgun/agents/specify.py +6 -7
- shotgun/agents/tasks.py +6 -7
- shotgun/agents/tools/__init__.py +0 -2
- shotgun/agents/tools/codebase/codebase_shell.py +6 -0
- shotgun/agents/tools/codebase/directory_lister.py +6 -0
- shotgun/agents/tools/codebase/file_read.py +11 -2
- shotgun/agents/tools/codebase/query_graph.py +6 -0
- shotgun/agents/tools/codebase/retrieve_code.py +6 -0
- shotgun/agents/tools/file_management.py +82 -16
- shotgun/agents/tools/registry.py +217 -0
- shotgun/agents/tools/web_search/__init__.py +8 -8
- shotgun/agents/tools/web_search/anthropic.py +8 -2
- shotgun/agents/tools/web_search/gemini.py +7 -1
- shotgun/agents/tools/web_search/openai.py +7 -1
- shotgun/agents/tools/web_search/utils.py +2 -2
- shotgun/agents/usage_manager.py +16 -11
- shotgun/api_endpoints.py +7 -3
- shotgun/build_constants.py +3 -3
- shotgun/cli/clear.py +53 -0
- shotgun/cli/compact.py +186 -0
- shotgun/cli/config.py +8 -5
- shotgun/cli/context.py +111 -0
- shotgun/cli/export.py +1 -1
- shotgun/cli/feedback.py +4 -2
- shotgun/cli/models.py +1 -0
- shotgun/cli/plan.py +1 -1
- shotgun/cli/research.py +1 -1
- shotgun/cli/specify.py +1 -1
- shotgun/cli/tasks.py +1 -1
- shotgun/cli/update.py +16 -2
- shotgun/codebase/core/change_detector.py +5 -3
- shotgun/codebase/core/code_retrieval.py +4 -2
- shotgun/codebase/core/ingestor.py +10 -8
- shotgun/codebase/core/manager.py +13 -4
- shotgun/codebase/core/nl_query.py +1 -1
- shotgun/exceptions.py +32 -0
- shotgun/logging_config.py +18 -27
- shotgun/main.py +73 -11
- shotgun/posthog_telemetry.py +37 -28
- shotgun/prompts/agents/export.j2 +18 -1
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +5 -1
- shotgun/prompts/agents/partials/interactive_mode.j2 +24 -7
- shotgun/prompts/agents/plan.j2 +1 -1
- shotgun/prompts/agents/research.j2 +1 -1
- shotgun/prompts/agents/specify.j2 +270 -3
- shotgun/prompts/agents/tasks.j2 +1 -1
- shotgun/sentry_telemetry.py +163 -16
- shotgun/settings.py +238 -0
- shotgun/telemetry.py +18 -33
- shotgun/tui/app.py +243 -43
- shotgun/tui/commands/__init__.py +1 -1
- shotgun/tui/components/context_indicator.py +179 -0
- shotgun/tui/components/mode_indicator.py +70 -0
- shotgun/tui/components/status_bar.py +48 -0
- shotgun/tui/containers.py +91 -0
- shotgun/tui/dependencies.py +39 -0
- shotgun/tui/protocols.py +45 -0
- shotgun/tui/screens/chat/__init__.py +5 -0
- shotgun/tui/screens/chat/chat.tcss +54 -0
- shotgun/tui/screens/chat/chat_screen.py +1254 -0
- shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
- shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
- shotgun/tui/screens/chat/help_text.py +40 -0
- shotgun/tui/screens/chat/prompt_history.py +48 -0
- shotgun/tui/screens/chat.tcss +11 -0
- shotgun/tui/screens/chat_screen/command_providers.py +78 -2
- shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
- shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
- shotgun/tui/screens/chat_screen/history/chat_history.py +115 -0
- shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
- shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
- shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
- shotgun/tui/screens/confirmation_dialog.py +151 -0
- shotgun/tui/screens/feedback.py +4 -4
- shotgun/tui/screens/github_issue.py +102 -0
- shotgun/tui/screens/model_picker.py +49 -24
- shotgun/tui/screens/onboarding.py +431 -0
- shotgun/tui/screens/pipx_migration.py +153 -0
- shotgun/tui/screens/provider_config.py +50 -27
- shotgun/tui/screens/shotgun_auth.py +2 -2
- shotgun/tui/screens/welcome.py +23 -12
- shotgun/tui/services/__init__.py +5 -0
- shotgun/tui/services/conversation_service.py +184 -0
- shotgun/tui/state/__init__.py +7 -0
- shotgun/tui/state/processing_state.py +185 -0
- shotgun/tui/utils/mode_progress.py +14 -7
- shotgun/tui/widgets/__init__.py +5 -0
- shotgun/tui/widgets/widget_coordinator.py +263 -0
- shotgun/utils/file_system_utils.py +22 -2
- shotgun/utils/marketing.py +110 -0
- shotgun/utils/update_checker.py +69 -14
- shotgun_sh-0.2.17.dist-info/METADATA +465 -0
- shotgun_sh-0.2.17.dist-info/RECORD +194 -0
- {shotgun_sh-0.2.6.dev1.dist-info → shotgun_sh-0.2.17.dist-info}/entry_points.txt +1 -0
- {shotgun_sh-0.2.6.dev1.dist-info → shotgun_sh-0.2.17.dist-info}/licenses/LICENSE +1 -1
- shotgun/agents/tools/user_interaction.py +0 -37
- shotgun/tui/screens/chat.py +0 -804
- shotgun/tui/screens/chat_screen/history.py +0 -401
- shotgun_sh-0.2.6.dev1.dist-info/METADATA +0 -467
- shotgun_sh-0.2.6.dev1.dist-info/RECORD +0 -156
- {shotgun_sh-0.2.6.dev1.dist-info → shotgun_sh-0.2.17.dist-info}/WHEEL +0 -0
shotgun/agents/config/models.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Pydantic models for configuration."""
|
|
2
2
|
|
|
3
|
+
from datetime import datetime
|
|
3
4
|
from enum import StrEnum
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, Field, PrivateAttr, SecretStr
|
|
@@ -28,6 +29,7 @@ class ModelName(StrEnum):
|
|
|
28
29
|
GPT_5_MINI = "gpt-5-mini"
|
|
29
30
|
CLAUDE_OPUS_4_1 = "claude-opus-4-1"
|
|
30
31
|
CLAUDE_SONNET_4_5 = "claude-sonnet-4-5"
|
|
32
|
+
CLAUDE_HAIKU_4_5 = "claude-haiku-4-5"
|
|
31
33
|
GEMINI_2_5_PRO = "gemini-2.5-pro"
|
|
32
34
|
GEMINI_2_5_FLASH = "gemini-2.5-flash"
|
|
33
35
|
|
|
@@ -42,6 +44,7 @@ class ModelSpec(BaseModel):
|
|
|
42
44
|
litellm_proxy_model_name: (
|
|
43
45
|
str # LiteLLM format (e.g., "openai/gpt-5", "gemini/gemini-2-pro")
|
|
44
46
|
)
|
|
47
|
+
short_name: str # Display name for UI (e.g., "Sonnet 4.5", "GPT-5")
|
|
45
48
|
|
|
46
49
|
|
|
47
50
|
class ModelConfig(BaseModel):
|
|
@@ -88,6 +91,7 @@ MODEL_SPECS: dict[ModelName, ModelSpec] = {
|
|
|
88
91
|
max_input_tokens=400_000,
|
|
89
92
|
max_output_tokens=128_000,
|
|
90
93
|
litellm_proxy_model_name="openai/gpt-5",
|
|
94
|
+
short_name="GPT-5",
|
|
91
95
|
),
|
|
92
96
|
ModelName.GPT_5_MINI: ModelSpec(
|
|
93
97
|
name=ModelName.GPT_5_MINI,
|
|
@@ -95,6 +99,7 @@ MODEL_SPECS: dict[ModelName, ModelSpec] = {
|
|
|
95
99
|
max_input_tokens=400_000,
|
|
96
100
|
max_output_tokens=128_000,
|
|
97
101
|
litellm_proxy_model_name="openai/gpt-5-mini",
|
|
102
|
+
short_name="GPT-5 Mini",
|
|
98
103
|
),
|
|
99
104
|
ModelName.CLAUDE_OPUS_4_1: ModelSpec(
|
|
100
105
|
name=ModelName.CLAUDE_OPUS_4_1,
|
|
@@ -102,6 +107,7 @@ MODEL_SPECS: dict[ModelName, ModelSpec] = {
|
|
|
102
107
|
max_input_tokens=200_000,
|
|
103
108
|
max_output_tokens=32_000,
|
|
104
109
|
litellm_proxy_model_name="anthropic/claude-opus-4-1",
|
|
110
|
+
short_name="Opus 4.1",
|
|
105
111
|
),
|
|
106
112
|
ModelName.CLAUDE_SONNET_4_5: ModelSpec(
|
|
107
113
|
name=ModelName.CLAUDE_SONNET_4_5,
|
|
@@ -109,6 +115,15 @@ MODEL_SPECS: dict[ModelName, ModelSpec] = {
|
|
|
109
115
|
max_input_tokens=200_000,
|
|
110
116
|
max_output_tokens=16_000,
|
|
111
117
|
litellm_proxy_model_name="anthropic/claude-sonnet-4-5",
|
|
118
|
+
short_name="Sonnet 4.5",
|
|
119
|
+
),
|
|
120
|
+
ModelName.CLAUDE_HAIKU_4_5: ModelSpec(
|
|
121
|
+
name=ModelName.CLAUDE_HAIKU_4_5,
|
|
122
|
+
provider=ProviderType.ANTHROPIC,
|
|
123
|
+
max_input_tokens=200_000,
|
|
124
|
+
max_output_tokens=64_000,
|
|
125
|
+
litellm_proxy_model_name="anthropic/claude-haiku-4-5",
|
|
126
|
+
short_name="Haiku 4.5",
|
|
112
127
|
),
|
|
113
128
|
ModelName.GEMINI_2_5_PRO: ModelSpec(
|
|
114
129
|
name=ModelName.GEMINI_2_5_PRO,
|
|
@@ -116,6 +131,7 @@ MODEL_SPECS: dict[ModelName, ModelSpec] = {
|
|
|
116
131
|
max_input_tokens=1_000_000,
|
|
117
132
|
max_output_tokens=64_000,
|
|
118
133
|
litellm_proxy_model_name="gemini/gemini-2.5-pro",
|
|
134
|
+
short_name="Gemini 2.5 Pro",
|
|
119
135
|
),
|
|
120
136
|
ModelName.GEMINI_2_5_FLASH: ModelSpec(
|
|
121
137
|
name=ModelName.GEMINI_2_5_FLASH,
|
|
@@ -123,6 +139,7 @@ MODEL_SPECS: dict[ModelName, ModelSpec] = {
|
|
|
123
139
|
max_input_tokens=1_000_000,
|
|
124
140
|
max_output_tokens=64_000,
|
|
125
141
|
litellm_proxy_model_name="gemini/gemini-2.5-flash",
|
|
142
|
+
short_name="Gemini 2.5 Flash",
|
|
126
143
|
),
|
|
127
144
|
}
|
|
128
145
|
|
|
@@ -154,6 +171,21 @@ class ShotgunAccountConfig(BaseModel):
|
|
|
154
171
|
)
|
|
155
172
|
|
|
156
173
|
|
|
174
|
+
class MarketingMessageRecord(BaseModel):
|
|
175
|
+
"""Record of when a marketing message was shown to the user."""
|
|
176
|
+
|
|
177
|
+
shown_at: datetime = Field(description="Timestamp when the message was shown")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class MarketingConfig(BaseModel):
|
|
181
|
+
"""Configuration for marketing messages shown to users."""
|
|
182
|
+
|
|
183
|
+
messages: dict[str, MarketingMessageRecord] = Field(
|
|
184
|
+
default_factory=dict,
|
|
185
|
+
description="Tracking which marketing messages have been shown. Key is message ID (e.g., 'github_star_v1')",
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
157
189
|
class ShotgunConfig(BaseModel):
|
|
158
190
|
"""Main configuration for Shotgun CLI."""
|
|
159
191
|
|
|
@@ -168,8 +200,16 @@ class ShotgunConfig(BaseModel):
|
|
|
168
200
|
shotgun_instance_id: str = Field(
|
|
169
201
|
description="Unique shotgun instance identifier (also used for anonymous telemetry)",
|
|
170
202
|
)
|
|
171
|
-
config_version: int = Field(default=
|
|
203
|
+
config_version: int = Field(default=4, description="Configuration schema version")
|
|
172
204
|
shown_welcome_screen: bool = Field(
|
|
173
205
|
default=False,
|
|
174
206
|
description="Whether the welcome screen has been shown to the user",
|
|
175
207
|
)
|
|
208
|
+
shown_onboarding_popup: datetime | None = Field(
|
|
209
|
+
default=None,
|
|
210
|
+
description="Timestamp when the onboarding popup was shown to the user (ISO8601 format)",
|
|
211
|
+
)
|
|
212
|
+
marketing: MarketingConfig = Field(
|
|
213
|
+
default_factory=MarketingConfig,
|
|
214
|
+
description="Marketing messages configuration and tracking",
|
|
215
|
+
)
|
|
@@ -32,6 +32,34 @@ logger = get_logger(__name__)
|
|
|
32
32
|
_model_cache: dict[tuple[ProviderType, KeyProvider, ModelName, str], Model] = {}
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def get_default_model_for_provider(config: ShotgunConfig) -> ModelName:
|
|
36
|
+
"""Get the default model based on which provider/account is configured.
|
|
37
|
+
|
|
38
|
+
Checks API keys in priority order and returns appropriate default model.
|
|
39
|
+
Treats Shotgun Account as a provider context.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
config: Shotgun configuration containing API keys
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Default ModelName for the configured provider/account
|
|
46
|
+
"""
|
|
47
|
+
# Priority 1: Shotgun Account
|
|
48
|
+
if _get_api_key(config.shotgun.api_key):
|
|
49
|
+
return ModelName.GPT_5
|
|
50
|
+
|
|
51
|
+
# Priority 2: Individual provider keys
|
|
52
|
+
if _get_api_key(config.anthropic.api_key):
|
|
53
|
+
return ModelName.CLAUDE_HAIKU_4_5
|
|
54
|
+
if _get_api_key(config.openai.api_key):
|
|
55
|
+
return ModelName.GPT_5
|
|
56
|
+
if _get_api_key(config.google.api_key):
|
|
57
|
+
return ModelName.GEMINI_2_5_PRO
|
|
58
|
+
|
|
59
|
+
# Fallback: system-wide default
|
|
60
|
+
return ModelName.CLAUDE_HAIKU_4_5
|
|
61
|
+
|
|
62
|
+
|
|
35
63
|
def get_or_create_model(
|
|
36
64
|
provider: ProviderType,
|
|
37
65
|
key_provider: "KeyProvider",
|
|
@@ -142,7 +170,7 @@ def get_or_create_model(
|
|
|
142
170
|
return _model_cache[cache_key]
|
|
143
171
|
|
|
144
172
|
|
|
145
|
-
def get_provider_model(
|
|
173
|
+
async def get_provider_model(
|
|
146
174
|
provider_or_model: ProviderType | ModelName | None = None,
|
|
147
175
|
) -> ModelConfig:
|
|
148
176
|
"""Get a fully configured ModelConfig with API key and Model instance.
|
|
@@ -161,7 +189,7 @@ def get_provider_model(
|
|
|
161
189
|
"""
|
|
162
190
|
config_manager = get_config_manager()
|
|
163
191
|
# Use cached config for read-only access (performance)
|
|
164
|
-
config = config_manager.load(force_reload=False)
|
|
192
|
+
config = await config_manager.load(force_reload=False)
|
|
165
193
|
|
|
166
194
|
# Priority 1: Check if Shotgun key exists - if so, use it for ANY model
|
|
167
195
|
shotgun_api_key = _get_api_key(config.shotgun.api_key)
|
|
@@ -172,7 +200,7 @@ def get_provider_model(
|
|
|
172
200
|
model_name = provider_or_model
|
|
173
201
|
else:
|
|
174
202
|
# No specific model requested - use selected or default
|
|
175
|
-
model_name = config.selected_model or ModelName.
|
|
203
|
+
model_name = config.selected_model or ModelName.GPT_5
|
|
176
204
|
|
|
177
205
|
if model_name not in MODEL_SPECS:
|
|
178
206
|
raise ValueError(f"Model '{model_name.value}' not found")
|
|
@@ -247,8 +275,8 @@ def get_provider_model(
|
|
|
247
275
|
if not api_key:
|
|
248
276
|
raise ValueError("Anthropic API key not configured. Set via config.")
|
|
249
277
|
|
|
250
|
-
# Use requested model or default to claude-
|
|
251
|
-
model_name = requested_model if requested_model else ModelName.
|
|
278
|
+
# Use requested model or default to claude-haiku-4-5
|
|
279
|
+
model_name = requested_model if requested_model else ModelName.CLAUDE_HAIKU_4_5
|
|
252
280
|
if model_name not in MODEL_SPECS:
|
|
253
281
|
raise ValueError(f"Model '{model_name.value}' not found")
|
|
254
282
|
spec = MODEL_SPECS[model_name]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Context analysis module for conversation composition statistics.
|
|
2
|
+
|
|
3
|
+
This module provides tools for analyzing conversation context usage, breaking down
|
|
4
|
+
token consumption by message type and tool category.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .analyzer import ContextAnalyzer
|
|
8
|
+
from .constants import ToolCategory, get_tool_category
|
|
9
|
+
from .formatter import ContextFormatter
|
|
10
|
+
from .models import (
|
|
11
|
+
ContextAnalysis,
|
|
12
|
+
ContextAnalysisOutput,
|
|
13
|
+
ContextCompositionTelemetry,
|
|
14
|
+
MessageTypeStats,
|
|
15
|
+
TokenAllocation,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ContextAnalyzer",
|
|
20
|
+
"ContextAnalysis",
|
|
21
|
+
"ContextAnalysisOutput",
|
|
22
|
+
"ContextCompositionTelemetry",
|
|
23
|
+
"ContextFormatter",
|
|
24
|
+
"MessageTypeStats",
|
|
25
|
+
"TokenAllocation",
|
|
26
|
+
"ToolCategory",
|
|
27
|
+
"get_tool_category",
|
|
28
|
+
]
|
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""Core context analysis logic."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
|
|
6
|
+
from pydantic_ai.messages import (
|
|
7
|
+
ModelMessage,
|
|
8
|
+
ModelRequest,
|
|
9
|
+
ModelResponse,
|
|
10
|
+
SystemPromptPart,
|
|
11
|
+
TextPart,
|
|
12
|
+
ToolCallPart,
|
|
13
|
+
ToolReturnPart,
|
|
14
|
+
UserPromptPart,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from shotgun.agents.config.models import ModelConfig
|
|
18
|
+
from shotgun.agents.history.token_counting.utils import count_tokens_from_messages
|
|
19
|
+
from shotgun.agents.history.token_estimation import estimate_tokens_from_messages
|
|
20
|
+
from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
|
|
21
|
+
from shotgun.logging_config import get_logger
|
|
22
|
+
from shotgun.tui.screens.chat_screen.hint_message import HintMessage
|
|
23
|
+
|
|
24
|
+
from .constants import ToolCategory, get_tool_category
|
|
25
|
+
from .models import ContextAnalysis, MessageTypeStats, TokenAllocation
|
|
26
|
+
|
|
27
|
+
logger = get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ContextAnalyzer:
|
|
31
|
+
"""Analyzes conversation message history for context composition."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, model_config: ModelConfig):
|
|
34
|
+
"""Initialize the analyzer with model configuration for token counting.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
model_config: Model configuration for accurate token counting
|
|
38
|
+
"""
|
|
39
|
+
self.model_config = model_config
|
|
40
|
+
|
|
41
|
+
async def _allocate_tokens_from_usage(
|
|
42
|
+
self,
|
|
43
|
+
message_history: list[ModelMessage],
|
|
44
|
+
) -> TokenAllocation:
|
|
45
|
+
"""Allocate tokens from actual API usage data proportionally to parts.
|
|
46
|
+
|
|
47
|
+
This uses the ground truth token counts from ModelResponse.usage instead of
|
|
48
|
+
creating synthetic messages, which avoids inflating counts with message framing overhead.
|
|
49
|
+
|
|
50
|
+
IMPORTANT: usage.input_tokens is cumulative (includes all conversation history), so we:
|
|
51
|
+
1. Use the LAST response's input_tokens as the ground truth total
|
|
52
|
+
2. Calculate proportions based on content size across ALL requests
|
|
53
|
+
3. Allocate the ground truth total proportionally
|
|
54
|
+
|
|
55
|
+
If usage data is missing or zero (e.g., after compaction), falls back to token estimation.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
message_history: List of actual messages from conversation
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
TokenAllocation with token counts by message/tool type
|
|
62
|
+
"""
|
|
63
|
+
# Step 1: Find the last response's usage data (ground truth for input tokens)
|
|
64
|
+
last_input_tokens = 0
|
|
65
|
+
total_output_tokens = 0
|
|
66
|
+
|
|
67
|
+
for msg in reversed(message_history):
|
|
68
|
+
if isinstance(msg, ModelResponse) and msg.usage:
|
|
69
|
+
last_input_tokens = msg.usage.input_tokens + msg.usage.cache_read_tokens
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
if last_input_tokens == 0:
|
|
73
|
+
# Fallback to token estimation (no logging to reduce verbosity)
|
|
74
|
+
last_input_tokens = await estimate_tokens_from_messages(
|
|
75
|
+
message_history, self.model_config
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Step 2: Calculate total output tokens (sum across all responses)
|
|
79
|
+
for msg in message_history:
|
|
80
|
+
if isinstance(msg, ModelResponse) and msg.usage:
|
|
81
|
+
total_output_tokens += msg.usage.output_tokens
|
|
82
|
+
|
|
83
|
+
# Step 3: Calculate content size proportions for each part type across ALL requests
|
|
84
|
+
# Initialize size accumulators
|
|
85
|
+
user_size = 0
|
|
86
|
+
system_prompts_size = 0
|
|
87
|
+
system_status_size = 0
|
|
88
|
+
codebase_understanding_input_size = 0
|
|
89
|
+
artifact_management_input_size = 0
|
|
90
|
+
web_research_input_size = 0
|
|
91
|
+
unknown_input_size = 0
|
|
92
|
+
|
|
93
|
+
for msg in message_history:
|
|
94
|
+
if isinstance(msg, ModelRequest):
|
|
95
|
+
for part in msg.parts:
|
|
96
|
+
if isinstance(part, (SystemPromptPart, UserPromptPart)):
|
|
97
|
+
size = len(part.content)
|
|
98
|
+
elif isinstance(part, ToolReturnPart):
|
|
99
|
+
# ToolReturnPart.content can be Any type
|
|
100
|
+
try:
|
|
101
|
+
content_str = (
|
|
102
|
+
json.dumps(part.content)
|
|
103
|
+
if part.content is not None
|
|
104
|
+
else ""
|
|
105
|
+
)
|
|
106
|
+
except (TypeError, ValueError):
|
|
107
|
+
content_str = (
|
|
108
|
+
str(part.content) if part.content is not None else ""
|
|
109
|
+
)
|
|
110
|
+
size = len(content_str)
|
|
111
|
+
else:
|
|
112
|
+
size = 0
|
|
113
|
+
|
|
114
|
+
# Categorize by part type
|
|
115
|
+
# Note: Check subclasses first (AgentSystemPrompt, SystemStatusPrompt)
|
|
116
|
+
# before checking base class (SystemPromptPart)
|
|
117
|
+
if isinstance(part, SystemStatusPrompt):
|
|
118
|
+
system_status_size += size
|
|
119
|
+
elif isinstance(part, AgentSystemPrompt):
|
|
120
|
+
system_prompts_size += size
|
|
121
|
+
elif isinstance(part, SystemPromptPart):
|
|
122
|
+
# Generic system prompt (not AgentSystemPrompt or SystemStatusPrompt)
|
|
123
|
+
system_prompts_size += size
|
|
124
|
+
elif isinstance(part, UserPromptPart):
|
|
125
|
+
user_size += size
|
|
126
|
+
elif isinstance(part, ToolReturnPart):
|
|
127
|
+
# Categorize tool results by tool category
|
|
128
|
+
category = get_tool_category(part.tool_name)
|
|
129
|
+
if category == ToolCategory.CODEBASE_UNDERSTANDING:
|
|
130
|
+
codebase_understanding_input_size += size
|
|
131
|
+
elif category == ToolCategory.ARTIFACT_MANAGEMENT:
|
|
132
|
+
artifact_management_input_size += size
|
|
133
|
+
elif category == ToolCategory.WEB_RESEARCH:
|
|
134
|
+
web_research_input_size += size
|
|
135
|
+
elif category == ToolCategory.UNKNOWN:
|
|
136
|
+
unknown_input_size += size
|
|
137
|
+
|
|
138
|
+
# Step 4: Calculate output proportions by tool category
|
|
139
|
+
codebase_understanding_size = 0
|
|
140
|
+
artifact_management_size = 0
|
|
141
|
+
web_research_size = 0
|
|
142
|
+
unknown_size = 0
|
|
143
|
+
agent_response_size = 0
|
|
144
|
+
|
|
145
|
+
for msg in message_history:
|
|
146
|
+
if isinstance(msg, ModelResponse):
|
|
147
|
+
for part in msg.parts: # type: ignore[assignment]
|
|
148
|
+
if isinstance(part, ToolCallPart):
|
|
149
|
+
category = get_tool_category(part.tool_name)
|
|
150
|
+
size = len(str(part.args))
|
|
151
|
+
|
|
152
|
+
if category == ToolCategory.AGENT_RESPONSE:
|
|
153
|
+
agent_response_size += size
|
|
154
|
+
elif category == ToolCategory.CODEBASE_UNDERSTANDING:
|
|
155
|
+
codebase_understanding_size += size
|
|
156
|
+
elif category == ToolCategory.ARTIFACT_MANAGEMENT:
|
|
157
|
+
artifact_management_size += size
|
|
158
|
+
elif category == ToolCategory.WEB_RESEARCH:
|
|
159
|
+
web_research_size += size
|
|
160
|
+
elif category == ToolCategory.UNKNOWN:
|
|
161
|
+
unknown_size += size
|
|
162
|
+
elif isinstance(part, TextPart):
|
|
163
|
+
agent_response_size += len(part.content)
|
|
164
|
+
|
|
165
|
+
# Step 5: Allocate input tokens proportionally
|
|
166
|
+
# Initialize TokenAllocation fields
|
|
167
|
+
user_tokens = 0
|
|
168
|
+
agent_response_tokens = 0
|
|
169
|
+
system_prompt_tokens = 0
|
|
170
|
+
system_status_tokens = 0
|
|
171
|
+
codebase_understanding_tokens = 0
|
|
172
|
+
artifact_management_tokens = 0
|
|
173
|
+
web_research_tokens = 0
|
|
174
|
+
unknown_tokens = 0
|
|
175
|
+
|
|
176
|
+
total_input_size = (
|
|
177
|
+
user_size
|
|
178
|
+
+ system_prompts_size
|
|
179
|
+
+ system_status_size
|
|
180
|
+
+ codebase_understanding_input_size
|
|
181
|
+
+ artifact_management_input_size
|
|
182
|
+
+ web_research_input_size
|
|
183
|
+
+ unknown_input_size
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if total_input_size > 0 and last_input_tokens > 0:
|
|
187
|
+
user_tokens = int(last_input_tokens * (user_size / total_input_size))
|
|
188
|
+
system_prompt_tokens = int(
|
|
189
|
+
last_input_tokens * (system_prompts_size / total_input_size)
|
|
190
|
+
)
|
|
191
|
+
system_status_tokens = int(
|
|
192
|
+
last_input_tokens * (system_status_size / total_input_size)
|
|
193
|
+
)
|
|
194
|
+
codebase_understanding_tokens = int(
|
|
195
|
+
last_input_tokens
|
|
196
|
+
* (codebase_understanding_input_size / total_input_size)
|
|
197
|
+
)
|
|
198
|
+
artifact_management_tokens = int(
|
|
199
|
+
last_input_tokens * (artifact_management_input_size / total_input_size)
|
|
200
|
+
)
|
|
201
|
+
web_research_tokens = int(
|
|
202
|
+
last_input_tokens * (web_research_input_size / total_input_size)
|
|
203
|
+
)
|
|
204
|
+
unknown_tokens = int(
|
|
205
|
+
last_input_tokens * (unknown_input_size / total_input_size)
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Step 6: Allocate output tokens proportionally
|
|
209
|
+
total_output_size = (
|
|
210
|
+
codebase_understanding_size
|
|
211
|
+
+ artifact_management_size
|
|
212
|
+
+ web_research_size
|
|
213
|
+
+ unknown_size
|
|
214
|
+
+ agent_response_size
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if total_output_size > 0 and total_output_tokens > 0:
|
|
218
|
+
codebase_understanding_tokens += int(
|
|
219
|
+
total_output_tokens * (codebase_understanding_size / total_output_size)
|
|
220
|
+
)
|
|
221
|
+
artifact_management_tokens += int(
|
|
222
|
+
total_output_tokens * (artifact_management_size / total_output_size)
|
|
223
|
+
)
|
|
224
|
+
web_research_tokens += int(
|
|
225
|
+
total_output_tokens * (web_research_size / total_output_size)
|
|
226
|
+
)
|
|
227
|
+
unknown_tokens += int(
|
|
228
|
+
total_output_tokens * (unknown_size / total_output_size)
|
|
229
|
+
)
|
|
230
|
+
agent_response_tokens += int(
|
|
231
|
+
total_output_tokens * (agent_response_size / total_output_size)
|
|
232
|
+
)
|
|
233
|
+
elif total_output_tokens > 0:
|
|
234
|
+
# If no content, put all in agent responses
|
|
235
|
+
agent_response_tokens = total_output_tokens
|
|
236
|
+
|
|
237
|
+
# Token allocation complete (no logging to reduce verbosity)
|
|
238
|
+
|
|
239
|
+
# Create TokenAllocation model
|
|
240
|
+
return TokenAllocation(
|
|
241
|
+
user=user_tokens,
|
|
242
|
+
agent_responses=agent_response_tokens,
|
|
243
|
+
system_prompts=system_prompt_tokens,
|
|
244
|
+
system_status=system_status_tokens,
|
|
245
|
+
codebase_understanding=codebase_understanding_tokens,
|
|
246
|
+
artifact_management=artifact_management_tokens,
|
|
247
|
+
web_research=web_research_tokens,
|
|
248
|
+
unknown=unknown_tokens,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
async def analyze_conversation(
|
|
252
|
+
self,
|
|
253
|
+
message_history: list[ModelMessage],
|
|
254
|
+
ui_message_history: list[ModelMessage | HintMessage],
|
|
255
|
+
) -> ContextAnalysis:
|
|
256
|
+
"""Analyze the conversation to determine message type composition.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
message_history: The agent message history (for token counting)
|
|
260
|
+
ui_message_history: The UI message history (includes hints)
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
ContextAnalysis with statistics for each message type
|
|
264
|
+
"""
|
|
265
|
+
# Track counts for each message type
|
|
266
|
+
user_count = 0
|
|
267
|
+
agent_responses_count = 0
|
|
268
|
+
system_prompts_count = 0
|
|
269
|
+
system_status_count = 0
|
|
270
|
+
codebase_understanding_count = 0
|
|
271
|
+
artifact_management_count = 0
|
|
272
|
+
web_research_count = 0
|
|
273
|
+
unknown_count = 0
|
|
274
|
+
|
|
275
|
+
# Analyze message_history to count message types
|
|
276
|
+
for msg in message_history:
|
|
277
|
+
if isinstance(msg, ModelRequest):
|
|
278
|
+
# Track what types are in this message for counting
|
|
279
|
+
has_user_prompt = False
|
|
280
|
+
has_system_prompt = False
|
|
281
|
+
has_system_status = False
|
|
282
|
+
|
|
283
|
+
# Check what part types this message contains
|
|
284
|
+
for part in msg.parts:
|
|
285
|
+
if isinstance(part, AgentSystemPrompt):
|
|
286
|
+
has_system_prompt = True
|
|
287
|
+
elif isinstance(part, SystemStatusPrompt):
|
|
288
|
+
has_system_status = True
|
|
289
|
+
elif isinstance(part, SystemPromptPart):
|
|
290
|
+
# Generic system prompt
|
|
291
|
+
has_system_prompt = True
|
|
292
|
+
elif isinstance(part, UserPromptPart):
|
|
293
|
+
has_user_prompt = True
|
|
294
|
+
elif isinstance(part, ToolReturnPart):
|
|
295
|
+
# Categorize tool results by category
|
|
296
|
+
category = get_tool_category(part.tool_name)
|
|
297
|
+
if category == ToolCategory.CODEBASE_UNDERSTANDING:
|
|
298
|
+
codebase_understanding_count += 1
|
|
299
|
+
elif category == ToolCategory.ARTIFACT_MANAGEMENT:
|
|
300
|
+
artifact_management_count += 1
|
|
301
|
+
elif category == ToolCategory.WEB_RESEARCH:
|
|
302
|
+
web_research_count += 1
|
|
303
|
+
elif category == ToolCategory.UNKNOWN:
|
|
304
|
+
unknown_count += 1
|
|
305
|
+
|
|
306
|
+
# Count the message types (only count once per message)
|
|
307
|
+
if has_system_prompt:
|
|
308
|
+
system_prompts_count += 1
|
|
309
|
+
if has_system_status:
|
|
310
|
+
system_status_count += 1
|
|
311
|
+
if has_user_prompt:
|
|
312
|
+
user_count += 1
|
|
313
|
+
|
|
314
|
+
elif isinstance(msg, ModelResponse):
|
|
315
|
+
# Agent responses - count entire response as one
|
|
316
|
+
agent_responses_count += 1
|
|
317
|
+
|
|
318
|
+
# Check for tool calls in the response
|
|
319
|
+
for part in msg.parts: # type: ignore[assignment]
|
|
320
|
+
if isinstance(part, ToolCallPart):
|
|
321
|
+
category = get_tool_category(part.tool_name)
|
|
322
|
+
if category == ToolCategory.CODEBASE_UNDERSTANDING:
|
|
323
|
+
codebase_understanding_count += 1
|
|
324
|
+
elif category == ToolCategory.ARTIFACT_MANAGEMENT:
|
|
325
|
+
artifact_management_count += 1
|
|
326
|
+
elif category == ToolCategory.WEB_RESEARCH:
|
|
327
|
+
web_research_count += 1
|
|
328
|
+
elif category == ToolCategory.UNKNOWN:
|
|
329
|
+
unknown_count += 1
|
|
330
|
+
|
|
331
|
+
# Count hints from ui_message_history
|
|
332
|
+
hint_count = sum(
|
|
333
|
+
1 for msg in ui_message_history if isinstance(msg, HintMessage)
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Use actual API usage data for accurate token counting (avoids synthetic message overhead)
|
|
337
|
+
usage_tokens = await self._allocate_tokens_from_usage(message_history)
|
|
338
|
+
|
|
339
|
+
user_tokens = usage_tokens.user
|
|
340
|
+
agent_response_tokens = usage_tokens.agent_responses
|
|
341
|
+
system_prompt_tokens = usage_tokens.system_prompts
|
|
342
|
+
system_status_tokens = usage_tokens.system_status
|
|
343
|
+
codebase_understanding_tokens = usage_tokens.codebase_understanding
|
|
344
|
+
artifact_management_tokens = usage_tokens.artifact_management
|
|
345
|
+
web_research_tokens = usage_tokens.web_research
|
|
346
|
+
unknown_tokens = usage_tokens.unknown
|
|
347
|
+
|
|
348
|
+
# Estimate hint tokens (rough estimate based on character count)
|
|
349
|
+
hint_tokens = 0
|
|
350
|
+
for msg in ui_message_history: # type: ignore[assignment]
|
|
351
|
+
if isinstance(msg, HintMessage):
|
|
352
|
+
# Rough estimate: ~4 chars per token
|
|
353
|
+
hint_tokens += len(msg.message) // 4
|
|
354
|
+
|
|
355
|
+
# Calculate agent context tokens (excluding UI-only hints)
|
|
356
|
+
agent_context_tokens = (
|
|
357
|
+
user_tokens
|
|
358
|
+
+ agent_response_tokens
|
|
359
|
+
+ system_prompt_tokens
|
|
360
|
+
+ system_status_tokens
|
|
361
|
+
+ codebase_understanding_tokens
|
|
362
|
+
+ artifact_management_tokens
|
|
363
|
+
+ web_research_tokens
|
|
364
|
+
+ unknown_tokens
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Total tokens includes hints for display purposes, but agent_context_tokens does not
|
|
368
|
+
total_tokens = agent_context_tokens + hint_tokens
|
|
369
|
+
total_messages = (
|
|
370
|
+
user_count
|
|
371
|
+
+ agent_responses_count
|
|
372
|
+
+ system_prompts_count
|
|
373
|
+
+ system_status_count
|
|
374
|
+
+ codebase_understanding_count
|
|
375
|
+
+ artifact_management_count
|
|
376
|
+
+ web_research_count
|
|
377
|
+
+ unknown_count
|
|
378
|
+
+ hint_count
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
# Calculate usable context limit (80% of max_input_tokens) and free space
|
|
382
|
+
# This matches the TOKEN_LIMIT_RATIO = 0.8 from history/constants.py
|
|
383
|
+
max_usable_tokens = int(self.model_config.max_input_tokens * 0.8)
|
|
384
|
+
free_space_tokens = max_usable_tokens - agent_context_tokens
|
|
385
|
+
|
|
386
|
+
return ContextAnalysis(
|
|
387
|
+
user_messages=MessageTypeStats(count=user_count, tokens=user_tokens),
|
|
388
|
+
agent_responses=MessageTypeStats(
|
|
389
|
+
count=agent_responses_count, tokens=agent_response_tokens
|
|
390
|
+
),
|
|
391
|
+
system_prompts=MessageTypeStats(
|
|
392
|
+
count=system_prompts_count, tokens=system_prompt_tokens
|
|
393
|
+
),
|
|
394
|
+
system_status=MessageTypeStats(
|
|
395
|
+
count=system_status_count, tokens=system_status_tokens
|
|
396
|
+
),
|
|
397
|
+
codebase_understanding=MessageTypeStats(
|
|
398
|
+
count=codebase_understanding_count,
|
|
399
|
+
tokens=codebase_understanding_tokens,
|
|
400
|
+
),
|
|
401
|
+
artifact_management=MessageTypeStats(
|
|
402
|
+
count=artifact_management_count, tokens=artifact_management_tokens
|
|
403
|
+
),
|
|
404
|
+
web_research=MessageTypeStats(
|
|
405
|
+
count=web_research_count, tokens=web_research_tokens
|
|
406
|
+
),
|
|
407
|
+
unknown=MessageTypeStats(count=unknown_count, tokens=unknown_tokens),
|
|
408
|
+
hint_messages=MessageTypeStats(count=hint_count, tokens=hint_tokens),
|
|
409
|
+
total_tokens=total_tokens,
|
|
410
|
+
total_messages=total_messages,
|
|
411
|
+
context_window=self.model_config.max_input_tokens,
|
|
412
|
+
agent_context_tokens=agent_context_tokens,
|
|
413
|
+
model_name=self.model_config.name.value,
|
|
414
|
+
max_usable_tokens=max_usable_tokens,
|
|
415
|
+
free_space_tokens=free_space_tokens,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
async def _count_tokens_for_parts(
|
|
419
|
+
self,
|
|
420
|
+
parts: Sequence[
|
|
421
|
+
UserPromptPart | SystemPromptPart | ToolReturnPart | ToolCallPart
|
|
422
|
+
],
|
|
423
|
+
part_type: str,
|
|
424
|
+
) -> int:
|
|
425
|
+
"""Count tokens for a list of parts by creating synthetic single-part messages.
|
|
426
|
+
|
|
427
|
+
This avoids double-counting when a message contains multiple part types.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
parts: List of parts to count tokens for
|
|
431
|
+
part_type: Type of parts ("user", "system", "tool_return", "tool_call")
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Total token count for all parts
|
|
435
|
+
"""
|
|
436
|
+
if not parts:
|
|
437
|
+
return 0
|
|
438
|
+
|
|
439
|
+
# Create synthetic messages with single parts for accurate token counting
|
|
440
|
+
synthetic_messages: list[ModelMessage] = []
|
|
441
|
+
|
|
442
|
+
for part in parts:
|
|
443
|
+
if part_type in ("user", "system", "tool_return"):
|
|
444
|
+
# These are request parts - wrap in ModelRequest
|
|
445
|
+
synthetic_messages.append(ModelRequest(parts=[part])) # type: ignore[list-item]
|
|
446
|
+
elif part_type == "tool_call":
|
|
447
|
+
# Tool calls are in responses - wrap in ModelResponse
|
|
448
|
+
synthetic_messages.append(ModelResponse(parts=[part])) # type: ignore[list-item]
|
|
449
|
+
|
|
450
|
+
# Count tokens for the synthetic messages
|
|
451
|
+
return await self._count_tokens_safe(synthetic_messages)
|
|
452
|
+
|
|
453
|
+
async def _count_tokens_safe(self, messages: Sequence[ModelMessage]) -> int:
|
|
454
|
+
"""Count tokens for a list of messages, returning 0 on error.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
messages: List of messages to count tokens for
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
Token count or 0 if counting fails
|
|
461
|
+
"""
|
|
462
|
+
if not messages:
|
|
463
|
+
return 0
|
|
464
|
+
|
|
465
|
+
try:
|
|
466
|
+
return await count_tokens_from_messages(list(messages), self.model_config)
|
|
467
|
+
except Exception as e:
|
|
468
|
+
logger.warning(f"Failed to count tokens: {e}")
|
|
469
|
+
# Fallback to rough estimate
|
|
470
|
+
total_chars = sum(len(str(msg)) for msg in messages)
|
|
471
|
+
return total_chars // 4 # Rough estimate: 4 chars per token
|