shotgun-sh 0.1.9__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of shotgun-sh might be problematic. Click here for more details.
- shotgun/agents/agent_manager.py +761 -52
- shotgun/agents/common.py +80 -75
- shotgun/agents/config/constants.py +21 -10
- shotgun/agents/config/manager.py +322 -97
- shotgun/agents/config/models.py +114 -84
- shotgun/agents/config/provider.py +232 -88
- shotgun/agents/context_analyzer/__init__.py +28 -0
- shotgun/agents/context_analyzer/analyzer.py +471 -0
- shotgun/agents/context_analyzer/constants.py +9 -0
- shotgun/agents/context_analyzer/formatter.py +115 -0
- shotgun/agents/context_analyzer/models.py +212 -0
- shotgun/agents/conversation_history.py +125 -2
- shotgun/agents/conversation_manager.py +57 -19
- shotgun/agents/export.py +6 -7
- shotgun/agents/history/compaction.py +23 -3
- shotgun/agents/history/context_extraction.py +93 -6
- shotgun/agents/history/history_processors.py +179 -11
- shotgun/agents/history/token_counting/__init__.py +31 -0
- shotgun/agents/history/token_counting/anthropic.py +127 -0
- shotgun/agents/history/token_counting/base.py +78 -0
- shotgun/agents/history/token_counting/openai.py +90 -0
- shotgun/agents/history/token_counting/sentencepiece_counter.py +127 -0
- shotgun/agents/history/token_counting/tokenizer_cache.py +92 -0
- shotgun/agents/history/token_counting/utils.py +144 -0
- shotgun/agents/history/token_estimation.py +12 -12
- shotgun/agents/llm.py +62 -0
- shotgun/agents/models.py +59 -4
- shotgun/agents/plan.py +6 -7
- shotgun/agents/research.py +7 -8
- shotgun/agents/specify.py +6 -7
- shotgun/agents/tasks.py +6 -7
- shotgun/agents/tools/__init__.py +0 -2
- shotgun/agents/tools/codebase/codebase_shell.py +6 -0
- shotgun/agents/tools/codebase/directory_lister.py +6 -0
- shotgun/agents/tools/codebase/file_read.py +11 -2
- shotgun/agents/tools/codebase/query_graph.py +6 -0
- shotgun/agents/tools/codebase/retrieve_code.py +6 -0
- shotgun/agents/tools/file_management.py +82 -16
- shotgun/agents/tools/registry.py +217 -0
- shotgun/agents/tools/web_search/__init__.py +55 -16
- shotgun/agents/tools/web_search/anthropic.py +76 -51
- shotgun/agents/tools/web_search/gemini.py +50 -27
- shotgun/agents/tools/web_search/openai.py +26 -17
- shotgun/agents/tools/web_search/utils.py +2 -2
- shotgun/agents/usage_manager.py +164 -0
- shotgun/api_endpoints.py +15 -0
- shotgun/cli/clear.py +53 -0
- shotgun/cli/codebase/commands.py +71 -2
- shotgun/cli/compact.py +186 -0
- shotgun/cli/config.py +41 -67
- shotgun/cli/context.py +111 -0
- shotgun/cli/export.py +1 -1
- shotgun/cli/feedback.py +50 -0
- shotgun/cli/models.py +3 -2
- shotgun/cli/plan.py +1 -1
- shotgun/cli/research.py +1 -1
- shotgun/cli/specify.py +1 -1
- shotgun/cli/tasks.py +1 -1
- shotgun/cli/update.py +18 -5
- shotgun/codebase/core/change_detector.py +5 -3
- shotgun/codebase/core/code_retrieval.py +4 -2
- shotgun/codebase/core/ingestor.py +169 -19
- shotgun/codebase/core/manager.py +177 -13
- shotgun/codebase/core/nl_query.py +1 -1
- shotgun/codebase/models.py +28 -3
- shotgun/codebase/service.py +14 -2
- shotgun/exceptions.py +32 -0
- shotgun/llm_proxy/__init__.py +19 -0
- shotgun/llm_proxy/clients.py +44 -0
- shotgun/llm_proxy/constants.py +15 -0
- shotgun/logging_config.py +18 -27
- shotgun/main.py +91 -4
- shotgun/posthog_telemetry.py +87 -40
- shotgun/prompts/agents/export.j2 +18 -1
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +5 -1
- shotgun/prompts/agents/partials/interactive_mode.j2 +24 -7
- shotgun/prompts/agents/plan.j2 +1 -1
- shotgun/prompts/agents/research.j2 +1 -1
- shotgun/prompts/agents/specify.j2 +270 -3
- shotgun/prompts/agents/state/system_state.j2 +4 -0
- shotgun/prompts/agents/tasks.j2 +1 -1
- shotgun/prompts/codebase/partials/cypher_rules.j2 +13 -0
- shotgun/prompts/loader.py +2 -2
- shotgun/prompts/tools/web_search.j2 +14 -0
- shotgun/sdk/codebase.py +60 -2
- shotgun/sentry_telemetry.py +28 -21
- shotgun/settings.py +238 -0
- shotgun/shotgun_web/__init__.py +19 -0
- shotgun/shotgun_web/client.py +138 -0
- shotgun/shotgun_web/constants.py +21 -0
- shotgun/shotgun_web/models.py +47 -0
- shotgun/telemetry.py +24 -36
- shotgun/tui/app.py +275 -23
- shotgun/tui/commands/__init__.py +1 -1
- shotgun/tui/components/context_indicator.py +179 -0
- shotgun/tui/components/mode_indicator.py +70 -0
- shotgun/tui/components/status_bar.py +48 -0
- shotgun/tui/components/vertical_tail.py +6 -0
- shotgun/tui/containers.py +91 -0
- shotgun/tui/dependencies.py +39 -0
- shotgun/tui/filtered_codebase_service.py +46 -0
- shotgun/tui/protocols.py +45 -0
- shotgun/tui/screens/chat/__init__.py +5 -0
- shotgun/tui/screens/chat/chat.tcss +54 -0
- shotgun/tui/screens/chat/chat_screen.py +1234 -0
- shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
- shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
- shotgun/tui/screens/chat/help_text.py +40 -0
- shotgun/tui/screens/chat/prompt_history.py +48 -0
- shotgun/tui/screens/chat.tcss +11 -0
- shotgun/tui/screens/chat_screen/command_providers.py +226 -11
- shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
- shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
- shotgun/tui/screens/chat_screen/history/chat_history.py +116 -0
- shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
- shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
- shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
- shotgun/tui/screens/confirmation_dialog.py +151 -0
- shotgun/tui/screens/feedback.py +193 -0
- shotgun/tui/screens/github_issue.py +102 -0
- shotgun/tui/screens/model_picker.py +352 -0
- shotgun/tui/screens/onboarding.py +431 -0
- shotgun/tui/screens/pipx_migration.py +153 -0
- shotgun/tui/screens/provider_config.py +156 -39
- shotgun/tui/screens/shotgun_auth.py +295 -0
- shotgun/tui/screens/welcome.py +198 -0
- shotgun/tui/services/__init__.py +5 -0
- shotgun/tui/services/conversation_service.py +184 -0
- shotgun/tui/state/__init__.py +7 -0
- shotgun/tui/state/processing_state.py +185 -0
- shotgun/tui/utils/mode_progress.py +14 -7
- shotgun/tui/widgets/__init__.py +5 -0
- shotgun/tui/widgets/widget_coordinator.py +262 -0
- shotgun/utils/datetime_utils.py +77 -0
- shotgun/utils/env_utils.py +13 -0
- shotgun/utils/file_system_utils.py +22 -2
- shotgun/utils/marketing.py +110 -0
- shotgun/utils/source_detection.py +16 -0
- shotgun/utils/update_checker.py +73 -21
- shotgun_sh-0.2.11.dist-info/METADATA +130 -0
- shotgun_sh-0.2.11.dist-info/RECORD +194 -0
- {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/entry_points.txt +1 -0
- {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/licenses/LICENSE +1 -1
- shotgun/agents/history/token_counting.py +0 -429
- shotgun/agents/tools/user_interaction.py +0 -37
- shotgun/tui/screens/chat.py +0 -818
- shotgun/tui/screens/chat_screen/history.py +0 -222
- shotgun_sh-0.1.9.dist-info/METADATA +0 -466
- shotgun_sh-0.1.9.dist-info/RECORD +0 -131
- {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/WHEEL +0 -0
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
"""Context extraction utilities for history processing."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import traceback
|
|
6
|
+
|
|
3
7
|
from pydantic_ai.messages import (
|
|
4
8
|
BuiltinToolCallPart,
|
|
5
9
|
BuiltinToolReturnPart,
|
|
@@ -16,6 +20,46 @@ from pydantic_ai.messages import (
|
|
|
16
20
|
UserPromptPart,
|
|
17
21
|
)
|
|
18
22
|
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _safely_parse_tool_args(args: dict[str, object] | str | None) -> dict[str, object]:
|
|
27
|
+
"""Safely parse tool call arguments, handling incomplete/invalid JSON.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
args: Tool call arguments (dict, JSON string, or None)
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Parsed args dict, or empty dict if parsing fails
|
|
34
|
+
"""
|
|
35
|
+
if args is None:
|
|
36
|
+
return {}
|
|
37
|
+
|
|
38
|
+
if isinstance(args, dict):
|
|
39
|
+
return args
|
|
40
|
+
|
|
41
|
+
if not isinstance(args, str):
|
|
42
|
+
return {}
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
parsed = json.loads(args)
|
|
46
|
+
return parsed if isinstance(parsed, dict) else {}
|
|
47
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
48
|
+
# Only log warning if it looks like JSON (starts with { or [) - incomplete JSON
|
|
49
|
+
# Plain strings are valid args and shouldn't trigger warnings
|
|
50
|
+
stripped_args = args.strip()
|
|
51
|
+
if stripped_args.startswith(("{", "[")):
|
|
52
|
+
args_preview = args[:100] + "..." if len(args) > 100 else args
|
|
53
|
+
logger.warning(
|
|
54
|
+
"Detected incomplete/invalid JSON in tool call args during parsing",
|
|
55
|
+
extra={
|
|
56
|
+
"args_preview": args_preview,
|
|
57
|
+
"error": str(e),
|
|
58
|
+
"args_length": len(args),
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
return {}
|
|
62
|
+
|
|
19
63
|
|
|
20
64
|
def extract_context_from_messages(messages: list[ModelMessage]) -> str:
|
|
21
65
|
"""Extract context from a list of messages for summarization."""
|
|
@@ -87,12 +131,55 @@ def extract_context_from_part(
|
|
|
87
131
|
return f"<ASSISTANT_TEXT>\n{message_part.content}\n</ASSISTANT_TEXT>"
|
|
88
132
|
|
|
89
133
|
elif isinstance(message_part, ToolCallPart):
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
134
|
+
# Safely parse args to avoid crashes from incomplete JSON during streaming
|
|
135
|
+
try:
|
|
136
|
+
parsed_args = _safely_parse_tool_args(message_part.args)
|
|
137
|
+
if parsed_args:
|
|
138
|
+
# Successfully parsed as dict - format nicely
|
|
139
|
+
args_str = ", ".join(f"{k}={repr(v)}" for k, v in parsed_args.items())
|
|
140
|
+
tool_call_str = f"{message_part.tool_name}({args_str})"
|
|
141
|
+
elif isinstance(message_part.args, str) and message_part.args:
|
|
142
|
+
# Non-empty string that didn't parse as JSON
|
|
143
|
+
# Check if it looks like JSON (starts with { or [) - if so, it's incomplete
|
|
144
|
+
stripped_args = message_part.args.strip()
|
|
145
|
+
if stripped_args.startswith(("{", "[")):
|
|
146
|
+
# Looks like incomplete JSON - log warning and show empty parens
|
|
147
|
+
args_preview = (
|
|
148
|
+
stripped_args[:100] + "..."
|
|
149
|
+
if len(stripped_args) > 100
|
|
150
|
+
else stripped_args
|
|
151
|
+
)
|
|
152
|
+
stack_trace = "".join(traceback.format_stack())
|
|
153
|
+
logger.warning(
|
|
154
|
+
"ToolCallPart with unparseable args encountered during context extraction",
|
|
155
|
+
extra={
|
|
156
|
+
"tool_name": message_part.tool_name,
|
|
157
|
+
"tool_call_id": message_part.tool_call_id,
|
|
158
|
+
"args_preview": args_preview,
|
|
159
|
+
"args_type": type(message_part.args).__name__,
|
|
160
|
+
"stack_trace": stack_trace,
|
|
161
|
+
},
|
|
162
|
+
)
|
|
163
|
+
tool_call_str = f"{message_part.tool_name}()"
|
|
164
|
+
else:
|
|
165
|
+
# Plain string arg - display as-is
|
|
166
|
+
tool_call_str = f"{message_part.tool_name}({message_part.args})"
|
|
167
|
+
else:
|
|
168
|
+
# No args
|
|
169
|
+
tool_call_str = f"{message_part.tool_name}()"
|
|
170
|
+
return f"<TOOL_CALL>\n{tool_call_str}\n</TOOL_CALL>"
|
|
171
|
+
except Exception as e: # pragma: no cover - defensive catch-all
|
|
172
|
+
# If anything goes wrong, log full exception with stack trace
|
|
173
|
+
logger.error(
|
|
174
|
+
"Unexpected error processing ToolCallPart",
|
|
175
|
+
exc_info=True,
|
|
176
|
+
extra={
|
|
177
|
+
"tool_name": message_part.tool_name,
|
|
178
|
+
"tool_call_id": message_part.tool_call_id,
|
|
179
|
+
"error": str(e),
|
|
180
|
+
},
|
|
181
|
+
)
|
|
182
|
+
return f"<TOOL_CALL>\n{message_part.tool_name}()\n</TOOL_CALL>"
|
|
96
183
|
|
|
97
184
|
elif isinstance(message_part, BuiltinToolCallPart):
|
|
98
185
|
return f"<BUILTIN_TOOL_CALL>\n{message_part.tool_name}\n</BUILTIN_TOOL_CALL>"
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
"""History processors for managing conversation history in Shotgun agents."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Awaitable, Callable
|
|
3
4
|
from typing import TYPE_CHECKING, Any, Protocol
|
|
4
5
|
|
|
6
|
+
from anthropic import APIStatusError
|
|
7
|
+
from pydantic_ai import ModelSettings
|
|
5
8
|
from pydantic_ai.messages import (
|
|
6
9
|
ModelMessage,
|
|
7
10
|
ModelRequest,
|
|
@@ -10,10 +13,12 @@ from pydantic_ai.messages import (
|
|
|
10
13
|
UserPromptPart,
|
|
11
14
|
)
|
|
12
15
|
|
|
13
|
-
from shotgun.agents.
|
|
16
|
+
from shotgun.agents.llm import shotgun_model_request
|
|
14
17
|
from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
|
|
15
18
|
from shotgun.agents.models import AgentDeps
|
|
19
|
+
from shotgun.exceptions import ContextSizeLimitExceeded
|
|
16
20
|
from shotgun.logging_config import get_logger
|
|
21
|
+
from shotgun.posthog_telemetry import track_event
|
|
17
22
|
from shotgun.prompts import PromptLoader
|
|
18
23
|
|
|
19
24
|
from .constants import SUMMARY_MARKER, TOKEN_LIMIT_RATIO
|
|
@@ -49,6 +54,86 @@ logger = get_logger(__name__)
|
|
|
49
54
|
prompt_loader = PromptLoader()
|
|
50
55
|
|
|
51
56
|
|
|
57
|
+
async def _safe_token_estimation(
|
|
58
|
+
estimation_func: Callable[..., Awaitable[int]],
|
|
59
|
+
model_name: str,
|
|
60
|
+
max_tokens: int,
|
|
61
|
+
*args: Any,
|
|
62
|
+
**kwargs: Any,
|
|
63
|
+
) -> int:
|
|
64
|
+
"""Safely estimate tokens with proper error handling.
|
|
65
|
+
|
|
66
|
+
Wraps token estimation functions to handle failures gracefully.
|
|
67
|
+
Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
|
|
68
|
+
Other errors (network, auth) are allowed to bubble up.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
estimation_func: Async function that estimates tokens
|
|
72
|
+
model_name: Name of the model for error messages
|
|
73
|
+
max_tokens: Maximum tokens for the model
|
|
74
|
+
*args: Arguments to pass to estimation_func
|
|
75
|
+
**kwargs: Keyword arguments to pass to estimation_func
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Token count from estimation_func
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ContextSizeLimitExceeded: If token counting fails with RuntimeError
|
|
82
|
+
Exception: Any other exceptions from estimation_func
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
return await estimation_func(*args, **kwargs)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
# Log the error with full context
|
|
88
|
+
logger.warning(
|
|
89
|
+
f"Token counting failed for {model_name}",
|
|
90
|
+
extra={
|
|
91
|
+
"error_type": type(e).__name__,
|
|
92
|
+
"error_message": str(e),
|
|
93
|
+
"model": model_name,
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Token counting behavior with oversized context (verified via testing):
|
|
98
|
+
#
|
|
99
|
+
# 1. OpenAI/tiktoken:
|
|
100
|
+
# - Successfully counts any size (tested with 752K tokens, no error)
|
|
101
|
+
# - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
|
|
102
|
+
# - Wrapped as: RuntimeError by our counter
|
|
103
|
+
#
|
|
104
|
+
# 2. Gemini/SentencePiece:
|
|
105
|
+
# - Successfully counts any size (tested with 752K tokens, no error)
|
|
106
|
+
# - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
|
|
107
|
+
# - Wrapped as: RuntimeError by our counter
|
|
108
|
+
#
|
|
109
|
+
# 3. Anthropic API:
|
|
110
|
+
# - Successfully counts large token counts (tested with 752K tokens, no error)
|
|
111
|
+
# - Only enforces 32 MB request size limit (not token count)
|
|
112
|
+
# - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
|
|
113
|
+
# - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
|
|
114
|
+
# - Wrapped as: RuntimeError by our counter
|
|
115
|
+
#
|
|
116
|
+
# IMPORTANT: No provider raises errors for "too many tokens" during counting.
|
|
117
|
+
# Token count validation happens separately by comparing count to max_input_tokens.
|
|
118
|
+
#
|
|
119
|
+
# We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
|
|
120
|
+
# We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
|
|
121
|
+
# context is effectively too large and needs user action to reduce it.
|
|
122
|
+
if isinstance(e, RuntimeError):
|
|
123
|
+
raise ContextSizeLimitExceeded(
|
|
124
|
+
model_name=model_name, max_tokens=max_tokens
|
|
125
|
+
) from e
|
|
126
|
+
|
|
127
|
+
# Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
|
|
128
|
+
if isinstance(e, APIStatusError) and e.status_code == 413:
|
|
129
|
+
raise ContextSizeLimitExceeded(
|
|
130
|
+
model_name=model_name, max_tokens=max_tokens
|
|
131
|
+
) from e
|
|
132
|
+
|
|
133
|
+
# Re-raise other exceptions (network errors, auth failures, etc.)
|
|
134
|
+
raise
|
|
135
|
+
|
|
136
|
+
|
|
52
137
|
def is_summary_part(part: Any) -> bool:
|
|
53
138
|
"""Check if a message part is a compacted summary."""
|
|
54
139
|
return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
|
|
@@ -125,6 +210,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
|
|
|
125
210
|
async def token_limit_compactor(
|
|
126
211
|
ctx: ContextProtocol,
|
|
127
212
|
messages: list[ModelMessage],
|
|
213
|
+
force: bool = False,
|
|
128
214
|
) -> list[ModelMessage]:
|
|
129
215
|
"""Compact message history based on token limits with incremental processing.
|
|
130
216
|
|
|
@@ -137,6 +223,7 @@ async def token_limit_compactor(
|
|
|
137
223
|
Args:
|
|
138
224
|
ctx: Run context with usage information and dependencies
|
|
139
225
|
messages: Current conversation history
|
|
226
|
+
force: If True, force compaction even if below token threshold
|
|
140
227
|
|
|
141
228
|
Returns:
|
|
142
229
|
Compacted list of messages within token limits
|
|
@@ -153,9 +240,15 @@ async def token_limit_compactor(
|
|
|
153
240
|
|
|
154
241
|
if last_summary_index is not None:
|
|
155
242
|
# Check if post-summary conversation exceeds threshold for incremental compaction
|
|
156
|
-
post_summary_tokens =
|
|
157
|
-
|
|
243
|
+
post_summary_tokens = await _safe_token_estimation(
|
|
244
|
+
estimate_post_summary_tokens,
|
|
245
|
+
deps.llm_model.name,
|
|
246
|
+
model_max_tokens,
|
|
247
|
+
messages,
|
|
248
|
+
last_summary_index,
|
|
249
|
+
deps.llm_model,
|
|
158
250
|
)
|
|
251
|
+
|
|
159
252
|
post_summary_percentage = (
|
|
160
253
|
(post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
161
254
|
)
|
|
@@ -167,7 +260,7 @@ async def token_limit_compactor(
|
|
|
167
260
|
)
|
|
168
261
|
|
|
169
262
|
# Only do incremental compaction if post-summary conversation exceeds threshold
|
|
170
|
-
if post_summary_tokens < max_tokens:
|
|
263
|
+
if post_summary_tokens < max_tokens and not force:
|
|
171
264
|
logger.debug(
|
|
172
265
|
f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
|
|
173
266
|
f"keeping all {len(messages)} messages"
|
|
@@ -179,6 +272,10 @@ async def token_limit_compactor(
|
|
|
179
272
|
"Post-summary conversation exceeds threshold, performing incremental compaction"
|
|
180
273
|
)
|
|
181
274
|
|
|
275
|
+
# Track compaction event
|
|
276
|
+
messages_before = len(messages)
|
|
277
|
+
tokens_before = post_summary_tokens
|
|
278
|
+
|
|
182
279
|
# Extract existing summary content
|
|
183
280
|
summary_message = messages[last_summary_index]
|
|
184
281
|
existing_summary_part = None
|
|
@@ -243,7 +340,7 @@ async def token_limit_compactor(
|
|
|
243
340
|
]
|
|
244
341
|
|
|
245
342
|
# Calculate optimal max_tokens for summarization
|
|
246
|
-
max_tokens = calculate_max_summarization_tokens(
|
|
343
|
+
max_tokens = await calculate_max_summarization_tokens(
|
|
247
344
|
deps.llm_model, request_messages
|
|
248
345
|
)
|
|
249
346
|
|
|
@@ -256,7 +353,9 @@ async def token_limit_compactor(
|
|
|
256
353
|
summary_response = await shotgun_model_request(
|
|
257
354
|
model_config=deps.llm_model,
|
|
258
355
|
messages=request_messages,
|
|
259
|
-
|
|
356
|
+
model_settings=ModelSettings(
|
|
357
|
+
max_tokens=max_tokens # Use calculated optimal tokens for summarization
|
|
358
|
+
),
|
|
260
359
|
)
|
|
261
360
|
|
|
262
361
|
log_summarization_response(summary_response, "INCREMENTAL")
|
|
@@ -320,11 +419,50 @@ async def token_limit_compactor(
|
|
|
320
419
|
logger.debug(
|
|
321
420
|
f"Incremental compaction complete: {len(messages)} -> {len(compacted_messages)} messages"
|
|
322
421
|
)
|
|
422
|
+
|
|
423
|
+
# Track compaction completion
|
|
424
|
+
messages_after = len(compacted_messages)
|
|
425
|
+
tokens_after = await estimate_tokens_from_messages(
|
|
426
|
+
compacted_messages, deps.llm_model
|
|
427
|
+
)
|
|
428
|
+
reduction_percentage = (
|
|
429
|
+
((messages_before - messages_after) / messages_before * 100)
|
|
430
|
+
if messages_before > 0
|
|
431
|
+
else 0
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# Track incremental compaction with simple metrics (fast, no token counting)
|
|
435
|
+
track_event(
|
|
436
|
+
"context_compaction_triggered",
|
|
437
|
+
{
|
|
438
|
+
"compaction_type": "incremental",
|
|
439
|
+
"messages_before": messages_before,
|
|
440
|
+
"messages_after": messages_after,
|
|
441
|
+
"tokens_before": tokens_before,
|
|
442
|
+
"tokens_after": tokens_after,
|
|
443
|
+
"reduction_percentage": round(reduction_percentage, 2),
|
|
444
|
+
"agent_mode": deps.agent_mode.value
|
|
445
|
+
if hasattr(deps, "agent_mode") and deps.agent_mode
|
|
446
|
+
else "unknown",
|
|
447
|
+
# Model and provider info (no computation needed)
|
|
448
|
+
"model_name": deps.llm_model.name.value,
|
|
449
|
+
"provider": deps.llm_model.provider.value,
|
|
450
|
+
"key_provider": deps.llm_model.key_provider.value,
|
|
451
|
+
},
|
|
452
|
+
)
|
|
453
|
+
|
|
323
454
|
return compacted_messages
|
|
324
455
|
|
|
325
456
|
else:
|
|
326
457
|
# Check if total conversation exceeds threshold for full compaction
|
|
327
|
-
total_tokens =
|
|
458
|
+
total_tokens = await _safe_token_estimation(
|
|
459
|
+
estimate_tokens_from_messages,
|
|
460
|
+
deps.llm_model.name,
|
|
461
|
+
model_max_tokens,
|
|
462
|
+
messages,
|
|
463
|
+
deps.llm_model,
|
|
464
|
+
)
|
|
465
|
+
|
|
328
466
|
total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
329
467
|
|
|
330
468
|
logger.debug(
|
|
@@ -333,7 +471,7 @@ async def token_limit_compactor(
|
|
|
333
471
|
)
|
|
334
472
|
|
|
335
473
|
# Only do full compaction if total conversation exceeds threshold
|
|
336
|
-
if total_tokens < max_tokens:
|
|
474
|
+
if total_tokens < max_tokens and not force:
|
|
337
475
|
logger.debug(
|
|
338
476
|
f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
|
|
339
477
|
f"keeping all {len(messages)} messages"
|
|
@@ -362,7 +500,9 @@ async def _full_compaction(
|
|
|
362
500
|
]
|
|
363
501
|
|
|
364
502
|
# Calculate optimal max_tokens for summarization
|
|
365
|
-
max_tokens = calculate_max_summarization_tokens(
|
|
503
|
+
max_tokens = await calculate_max_summarization_tokens(
|
|
504
|
+
deps.llm_model, request_messages
|
|
505
|
+
)
|
|
366
506
|
|
|
367
507
|
# Debug logging using shared utilities
|
|
368
508
|
log_summarization_request(
|
|
@@ -373,11 +513,13 @@ async def _full_compaction(
|
|
|
373
513
|
summary_response = await shotgun_model_request(
|
|
374
514
|
model_config=deps.llm_model,
|
|
375
515
|
messages=request_messages,
|
|
376
|
-
|
|
516
|
+
model_settings=ModelSettings(
|
|
517
|
+
max_tokens=max_tokens # Use calculated optimal tokens for summarization
|
|
518
|
+
),
|
|
377
519
|
)
|
|
378
520
|
|
|
379
521
|
# Calculate token reduction
|
|
380
|
-
current_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
|
|
522
|
+
current_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
|
|
381
523
|
summary_usage = summary_response.usage
|
|
382
524
|
reduction_percentage = (
|
|
383
525
|
((current_tokens - summary_usage.output_tokens) / current_tokens) * 100
|
|
@@ -423,4 +565,30 @@ async def _full_compaction(
|
|
|
423
565
|
# Ensure history ends with ModelRequest for PydanticAI compatibility
|
|
424
566
|
compacted_messages = ensure_ends_with_model_request(compacted_messages, messages)
|
|
425
567
|
|
|
568
|
+
# Track full compaction event
|
|
569
|
+
messages_before = len(messages)
|
|
570
|
+
messages_after = len(compacted_messages)
|
|
571
|
+
tokens_before = current_tokens # Already calculated above
|
|
572
|
+
tokens_after = summary_usage.output_tokens if summary_usage else 0
|
|
573
|
+
|
|
574
|
+
# Track full compaction with simple metrics (fast, no token counting)
|
|
575
|
+
track_event(
|
|
576
|
+
"context_compaction_triggered",
|
|
577
|
+
{
|
|
578
|
+
"compaction_type": "full",
|
|
579
|
+
"messages_before": messages_before,
|
|
580
|
+
"messages_after": messages_after,
|
|
581
|
+
"tokens_before": tokens_before,
|
|
582
|
+
"tokens_after": tokens_after,
|
|
583
|
+
"reduction_percentage": round(reduction_percentage, 2),
|
|
584
|
+
"agent_mode": deps.agent_mode.value
|
|
585
|
+
if hasattr(deps, "agent_mode") and deps.agent_mode
|
|
586
|
+
else "unknown",
|
|
587
|
+
# Model and provider info (no computation needed)
|
|
588
|
+
"model_name": deps.llm_model.name.value,
|
|
589
|
+
"provider": deps.llm_model.provider.value,
|
|
590
|
+
"key_provider": deps.llm_model.key_provider.value,
|
|
591
|
+
},
|
|
592
|
+
)
|
|
593
|
+
|
|
426
594
|
return compacted_messages
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Real token counting for all supported providers.
|
|
2
|
+
|
|
3
|
+
This module provides accurate token counting using each provider's official
|
|
4
|
+
APIs and libraries, eliminating the need for rough character-based estimation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .anthropic import AnthropicTokenCounter
|
|
8
|
+
from .base import TokenCounter, extract_text_from_messages
|
|
9
|
+
from .openai import OpenAITokenCounter
|
|
10
|
+
from .sentencepiece_counter import SentencePieceTokenCounter
|
|
11
|
+
from .utils import (
|
|
12
|
+
count_post_summary_tokens,
|
|
13
|
+
count_tokens_from_message_parts,
|
|
14
|
+
count_tokens_from_messages,
|
|
15
|
+
get_token_counter,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Base classes
|
|
20
|
+
"TokenCounter",
|
|
21
|
+
# Counter implementations
|
|
22
|
+
"OpenAITokenCounter",
|
|
23
|
+
"AnthropicTokenCounter",
|
|
24
|
+
"SentencePieceTokenCounter",
|
|
25
|
+
# Utility functions
|
|
26
|
+
"get_token_counter",
|
|
27
|
+
"count_tokens_from_messages",
|
|
28
|
+
"count_post_summary_tokens",
|
|
29
|
+
"count_tokens_from_message_parts",
|
|
30
|
+
"extract_text_from_messages",
|
|
31
|
+
]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Anthropic token counting using official client."""
|
|
2
|
+
|
|
3
|
+
import logfire
|
|
4
|
+
from pydantic_ai.messages import ModelMessage
|
|
5
|
+
|
|
6
|
+
from shotgun.agents.config.models import KeyProvider
|
|
7
|
+
from shotgun.llm_proxy import create_anthropic_proxy_provider
|
|
8
|
+
from shotgun.logging_config import get_logger
|
|
9
|
+
|
|
10
|
+
from .base import TokenCounter, extract_text_from_messages
|
|
11
|
+
|
|
12
|
+
logger = get_logger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AnthropicTokenCounter(TokenCounter):
|
|
16
|
+
"""Token counter for Anthropic models using official client."""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
model_name: str,
|
|
21
|
+
api_key: str,
|
|
22
|
+
key_provider: KeyProvider = KeyProvider.BYOK,
|
|
23
|
+
):
|
|
24
|
+
"""Initialize Anthropic token counter.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
model_name: Anthropic model name for token counting
|
|
28
|
+
api_key: API key (Anthropic for BYOK, Shotgun for proxy)
|
|
29
|
+
key_provider: Key provider type (BYOK or SHOTGUN)
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
RuntimeError: If client initialization fails
|
|
33
|
+
"""
|
|
34
|
+
self.model_name = model_name
|
|
35
|
+
import anthropic
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
if key_provider == KeyProvider.SHOTGUN:
|
|
39
|
+
# Use LiteLLM proxy for Shotgun Account
|
|
40
|
+
# Get async client from AnthropicProvider
|
|
41
|
+
provider = create_anthropic_proxy_provider(api_key)
|
|
42
|
+
self.client = provider.client
|
|
43
|
+
logger.debug(
|
|
44
|
+
f"Initialized async Anthropic token counter for {model_name} via LiteLLM proxy"
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
# Direct Anthropic API for BYOK - use async client
|
|
48
|
+
self.client = anthropic.AsyncAnthropic(api_key=api_key)
|
|
49
|
+
logger.debug(
|
|
50
|
+
f"Initialized async Anthropic token counter for {model_name} via direct API"
|
|
51
|
+
)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logfire.exception(
|
|
54
|
+
f"Failed to initialize Anthropic token counter for {model_name}",
|
|
55
|
+
model_name=model_name,
|
|
56
|
+
key_provider=key_provider.value,
|
|
57
|
+
exception_type=type(e).__name__,
|
|
58
|
+
)
|
|
59
|
+
raise RuntimeError(
|
|
60
|
+
f"Failed to initialize Anthropic async client for {model_name}: {type(e).__name__}: {str(e)}"
|
|
61
|
+
) from e
|
|
62
|
+
|
|
63
|
+
async def count_tokens(self, text: str) -> int:
|
|
64
|
+
"""Count tokens using Anthropic's official API (async).
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
text: Text to count tokens for
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Exact token count from Anthropic API
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
RuntimeError: If API call fails
|
|
74
|
+
"""
|
|
75
|
+
# Handle empty text to avoid unnecessary API calls
|
|
76
|
+
# Anthropic API requires non-empty content, so we need a strict check
|
|
77
|
+
if not text or not text.strip():
|
|
78
|
+
return 0
|
|
79
|
+
|
|
80
|
+
# Additional validation: ensure the text has actual content
|
|
81
|
+
# Some edge cases might have only whitespace or control characters
|
|
82
|
+
cleaned_text = text.strip()
|
|
83
|
+
if not cleaned_text:
|
|
84
|
+
return 0
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Anthropic API expects messages format and model parameter
|
|
88
|
+
# Use await with async client
|
|
89
|
+
result = await self.client.messages.count_tokens(
|
|
90
|
+
messages=[{"role": "user", "content": cleaned_text}],
|
|
91
|
+
model=self.model_name,
|
|
92
|
+
)
|
|
93
|
+
return result.input_tokens
|
|
94
|
+
except Exception as e:
|
|
95
|
+
# Create a preview of the text for logging (truncated to avoid huge logs)
|
|
96
|
+
text_preview = text[:100] + "..." if len(text) > 100 else text
|
|
97
|
+
|
|
98
|
+
logfire.exception(
|
|
99
|
+
f"Anthropic token counting failed for {self.model_name}",
|
|
100
|
+
model_name=self.model_name,
|
|
101
|
+
text_length=len(text),
|
|
102
|
+
text_preview=text_preview,
|
|
103
|
+
exception_type=type(e).__name__,
|
|
104
|
+
exception_message=str(e),
|
|
105
|
+
)
|
|
106
|
+
raise RuntimeError(
|
|
107
|
+
f"Anthropic token counting API failed for {self.model_name}: {type(e).__name__}: {str(e)}"
|
|
108
|
+
) from e
|
|
109
|
+
|
|
110
|
+
async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
|
|
111
|
+
"""Count tokens across all messages using Anthropic API (async).
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
messages: List of PydanticAI messages
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Total token count for all messages
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
RuntimeError: If token counting fails
|
|
121
|
+
"""
|
|
122
|
+
# Handle empty message list early
|
|
123
|
+
if not messages:
|
|
124
|
+
return 0
|
|
125
|
+
|
|
126
|
+
total_text = extract_text_from_messages(messages)
|
|
127
|
+
return await self.count_tokens(total_text)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Base classes and shared utilities for token counting."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from pydantic_ai.messages import ModelMessage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TokenCounter(ABC):
|
|
9
|
+
"""Abstract base class for provider-specific token counting.
|
|
10
|
+
|
|
11
|
+
All methods are async to support non-blocking operations like
|
|
12
|
+
downloading tokenizer models or making API calls.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def count_tokens(self, text: str) -> int:
|
|
17
|
+
"""Count tokens in text using provider-specific method (async).
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
text: Text to count tokens for
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Exact token count as determined by the provider
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
RuntimeError: If token counting fails
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
|
|
31
|
+
"""Count tokens in PydanticAI message structures (async).
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
messages: List of messages to count tokens for
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Total token count across all messages
|
|
38
|
+
|
|
39
|
+
Raises:
|
|
40
|
+
RuntimeError: If token counting fails
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def extract_text_from_messages(messages: list[ModelMessage]) -> str:
|
|
45
|
+
"""Extract all text content from messages for token counting.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
messages: List of PydanticAI messages
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Combined text content from all messages
|
|
52
|
+
"""
|
|
53
|
+
text_parts = []
|
|
54
|
+
|
|
55
|
+
for message in messages:
|
|
56
|
+
if hasattr(message, "parts"):
|
|
57
|
+
for part in message.parts:
|
|
58
|
+
if hasattr(part, "content") and isinstance(part.content, str):
|
|
59
|
+
# Only add non-empty content
|
|
60
|
+
if part.content.strip():
|
|
61
|
+
text_parts.append(part.content)
|
|
62
|
+
else:
|
|
63
|
+
# Handle non-text parts (tool calls, etc.)
|
|
64
|
+
part_str = str(part)
|
|
65
|
+
if part_str.strip():
|
|
66
|
+
text_parts.append(part_str)
|
|
67
|
+
else:
|
|
68
|
+
# Handle messages without parts
|
|
69
|
+
msg_str = str(message)
|
|
70
|
+
if msg_str.strip():
|
|
71
|
+
text_parts.append(msg_str)
|
|
72
|
+
|
|
73
|
+
# If no valid text parts found, return a minimal placeholder
|
|
74
|
+
# This ensures we never send completely empty content to APIs
|
|
75
|
+
if not text_parts:
|
|
76
|
+
return "."
|
|
77
|
+
|
|
78
|
+
return "\n".join(text_parts)
|