shotgun-sh 0.1.9__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of shotgun-sh might be problematic. Click here for more details.

Files changed (150) hide show
  1. shotgun/agents/agent_manager.py +761 -52
  2. shotgun/agents/common.py +80 -75
  3. shotgun/agents/config/constants.py +21 -10
  4. shotgun/agents/config/manager.py +322 -97
  5. shotgun/agents/config/models.py +114 -84
  6. shotgun/agents/config/provider.py +232 -88
  7. shotgun/agents/context_analyzer/__init__.py +28 -0
  8. shotgun/agents/context_analyzer/analyzer.py +471 -0
  9. shotgun/agents/context_analyzer/constants.py +9 -0
  10. shotgun/agents/context_analyzer/formatter.py +115 -0
  11. shotgun/agents/context_analyzer/models.py +212 -0
  12. shotgun/agents/conversation_history.py +125 -2
  13. shotgun/agents/conversation_manager.py +57 -19
  14. shotgun/agents/export.py +6 -7
  15. shotgun/agents/history/compaction.py +23 -3
  16. shotgun/agents/history/context_extraction.py +93 -6
  17. shotgun/agents/history/history_processors.py +179 -11
  18. shotgun/agents/history/token_counting/__init__.py +31 -0
  19. shotgun/agents/history/token_counting/anthropic.py +127 -0
  20. shotgun/agents/history/token_counting/base.py +78 -0
  21. shotgun/agents/history/token_counting/openai.py +90 -0
  22. shotgun/agents/history/token_counting/sentencepiece_counter.py +127 -0
  23. shotgun/agents/history/token_counting/tokenizer_cache.py +92 -0
  24. shotgun/agents/history/token_counting/utils.py +144 -0
  25. shotgun/agents/history/token_estimation.py +12 -12
  26. shotgun/agents/llm.py +62 -0
  27. shotgun/agents/models.py +59 -4
  28. shotgun/agents/plan.py +6 -7
  29. shotgun/agents/research.py +7 -8
  30. shotgun/agents/specify.py +6 -7
  31. shotgun/agents/tasks.py +6 -7
  32. shotgun/agents/tools/__init__.py +0 -2
  33. shotgun/agents/tools/codebase/codebase_shell.py +6 -0
  34. shotgun/agents/tools/codebase/directory_lister.py +6 -0
  35. shotgun/agents/tools/codebase/file_read.py +11 -2
  36. shotgun/agents/tools/codebase/query_graph.py +6 -0
  37. shotgun/agents/tools/codebase/retrieve_code.py +6 -0
  38. shotgun/agents/tools/file_management.py +82 -16
  39. shotgun/agents/tools/registry.py +217 -0
  40. shotgun/agents/tools/web_search/__init__.py +55 -16
  41. shotgun/agents/tools/web_search/anthropic.py +76 -51
  42. shotgun/agents/tools/web_search/gemini.py +50 -27
  43. shotgun/agents/tools/web_search/openai.py +26 -17
  44. shotgun/agents/tools/web_search/utils.py +2 -2
  45. shotgun/agents/usage_manager.py +164 -0
  46. shotgun/api_endpoints.py +15 -0
  47. shotgun/cli/clear.py +53 -0
  48. shotgun/cli/codebase/commands.py +71 -2
  49. shotgun/cli/compact.py +186 -0
  50. shotgun/cli/config.py +41 -67
  51. shotgun/cli/context.py +111 -0
  52. shotgun/cli/export.py +1 -1
  53. shotgun/cli/feedback.py +50 -0
  54. shotgun/cli/models.py +3 -2
  55. shotgun/cli/plan.py +1 -1
  56. shotgun/cli/research.py +1 -1
  57. shotgun/cli/specify.py +1 -1
  58. shotgun/cli/tasks.py +1 -1
  59. shotgun/cli/update.py +18 -5
  60. shotgun/codebase/core/change_detector.py +5 -3
  61. shotgun/codebase/core/code_retrieval.py +4 -2
  62. shotgun/codebase/core/ingestor.py +169 -19
  63. shotgun/codebase/core/manager.py +177 -13
  64. shotgun/codebase/core/nl_query.py +1 -1
  65. shotgun/codebase/models.py +28 -3
  66. shotgun/codebase/service.py +14 -2
  67. shotgun/exceptions.py +32 -0
  68. shotgun/llm_proxy/__init__.py +19 -0
  69. shotgun/llm_proxy/clients.py +44 -0
  70. shotgun/llm_proxy/constants.py +15 -0
  71. shotgun/logging_config.py +18 -27
  72. shotgun/main.py +91 -4
  73. shotgun/posthog_telemetry.py +87 -40
  74. shotgun/prompts/agents/export.j2 +18 -1
  75. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +5 -1
  76. shotgun/prompts/agents/partials/interactive_mode.j2 +24 -7
  77. shotgun/prompts/agents/plan.j2 +1 -1
  78. shotgun/prompts/agents/research.j2 +1 -1
  79. shotgun/prompts/agents/specify.j2 +270 -3
  80. shotgun/prompts/agents/state/system_state.j2 +4 -0
  81. shotgun/prompts/agents/tasks.j2 +1 -1
  82. shotgun/prompts/codebase/partials/cypher_rules.j2 +13 -0
  83. shotgun/prompts/loader.py +2 -2
  84. shotgun/prompts/tools/web_search.j2 +14 -0
  85. shotgun/sdk/codebase.py +60 -2
  86. shotgun/sentry_telemetry.py +28 -21
  87. shotgun/settings.py +238 -0
  88. shotgun/shotgun_web/__init__.py +19 -0
  89. shotgun/shotgun_web/client.py +138 -0
  90. shotgun/shotgun_web/constants.py +21 -0
  91. shotgun/shotgun_web/models.py +47 -0
  92. shotgun/telemetry.py +24 -36
  93. shotgun/tui/app.py +275 -23
  94. shotgun/tui/commands/__init__.py +1 -1
  95. shotgun/tui/components/context_indicator.py +179 -0
  96. shotgun/tui/components/mode_indicator.py +70 -0
  97. shotgun/tui/components/status_bar.py +48 -0
  98. shotgun/tui/components/vertical_tail.py +6 -0
  99. shotgun/tui/containers.py +91 -0
  100. shotgun/tui/dependencies.py +39 -0
  101. shotgun/tui/filtered_codebase_service.py +46 -0
  102. shotgun/tui/protocols.py +45 -0
  103. shotgun/tui/screens/chat/__init__.py +5 -0
  104. shotgun/tui/screens/chat/chat.tcss +54 -0
  105. shotgun/tui/screens/chat/chat_screen.py +1234 -0
  106. shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
  107. shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
  108. shotgun/tui/screens/chat/help_text.py +40 -0
  109. shotgun/tui/screens/chat/prompt_history.py +48 -0
  110. shotgun/tui/screens/chat.tcss +11 -0
  111. shotgun/tui/screens/chat_screen/command_providers.py +226 -11
  112. shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
  113. shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
  114. shotgun/tui/screens/chat_screen/history/chat_history.py +116 -0
  115. shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
  116. shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
  117. shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
  118. shotgun/tui/screens/confirmation_dialog.py +151 -0
  119. shotgun/tui/screens/feedback.py +193 -0
  120. shotgun/tui/screens/github_issue.py +102 -0
  121. shotgun/tui/screens/model_picker.py +352 -0
  122. shotgun/tui/screens/onboarding.py +431 -0
  123. shotgun/tui/screens/pipx_migration.py +153 -0
  124. shotgun/tui/screens/provider_config.py +156 -39
  125. shotgun/tui/screens/shotgun_auth.py +295 -0
  126. shotgun/tui/screens/welcome.py +198 -0
  127. shotgun/tui/services/__init__.py +5 -0
  128. shotgun/tui/services/conversation_service.py +184 -0
  129. shotgun/tui/state/__init__.py +7 -0
  130. shotgun/tui/state/processing_state.py +185 -0
  131. shotgun/tui/utils/mode_progress.py +14 -7
  132. shotgun/tui/widgets/__init__.py +5 -0
  133. shotgun/tui/widgets/widget_coordinator.py +262 -0
  134. shotgun/utils/datetime_utils.py +77 -0
  135. shotgun/utils/env_utils.py +13 -0
  136. shotgun/utils/file_system_utils.py +22 -2
  137. shotgun/utils/marketing.py +110 -0
  138. shotgun/utils/source_detection.py +16 -0
  139. shotgun/utils/update_checker.py +73 -21
  140. shotgun_sh-0.2.11.dist-info/METADATA +130 -0
  141. shotgun_sh-0.2.11.dist-info/RECORD +194 -0
  142. {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/entry_points.txt +1 -0
  143. {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/licenses/LICENSE +1 -1
  144. shotgun/agents/history/token_counting.py +0 -429
  145. shotgun/agents/tools/user_interaction.py +0 -37
  146. shotgun/tui/screens/chat.py +0 -818
  147. shotgun/tui/screens/chat_screen/history.py +0 -222
  148. shotgun_sh-0.1.9.dist-info/METADATA +0 -466
  149. shotgun_sh-0.1.9.dist-info/RECORD +0 -131
  150. {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/WHEEL +0 -0
@@ -1,5 +1,9 @@
1
1
  """Context extraction utilities for history processing."""
2
2
 
3
+ import json
4
+ import logging
5
+ import traceback
6
+
3
7
  from pydantic_ai.messages import (
4
8
  BuiltinToolCallPart,
5
9
  BuiltinToolReturnPart,
@@ -16,6 +20,46 @@ from pydantic_ai.messages import (
16
20
  UserPromptPart,
17
21
  )
18
22
 
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def _safely_parse_tool_args(args: dict[str, object] | str | None) -> dict[str, object]:
27
+ """Safely parse tool call arguments, handling incomplete/invalid JSON.
28
+
29
+ Args:
30
+ args: Tool call arguments (dict, JSON string, or None)
31
+
32
+ Returns:
33
+ Parsed args dict, or empty dict if parsing fails
34
+ """
35
+ if args is None:
36
+ return {}
37
+
38
+ if isinstance(args, dict):
39
+ return args
40
+
41
+ if not isinstance(args, str):
42
+ return {}
43
+
44
+ try:
45
+ parsed = json.loads(args)
46
+ return parsed if isinstance(parsed, dict) else {}
47
+ except (json.JSONDecodeError, ValueError) as e:
48
+ # Only log warning if it looks like JSON (starts with { or [) - incomplete JSON
49
+ # Plain strings are valid args and shouldn't trigger warnings
50
+ stripped_args = args.strip()
51
+ if stripped_args.startswith(("{", "[")):
52
+ args_preview = args[:100] + "..." if len(args) > 100 else args
53
+ logger.warning(
54
+ "Detected incomplete/invalid JSON in tool call args during parsing",
55
+ extra={
56
+ "args_preview": args_preview,
57
+ "error": str(e),
58
+ "args_length": len(args),
59
+ },
60
+ )
61
+ return {}
62
+
19
63
 
20
64
  def extract_context_from_messages(messages: list[ModelMessage]) -> str:
21
65
  """Extract context from a list of messages for summarization."""
@@ -87,12 +131,55 @@ def extract_context_from_part(
87
131
  return f"<ASSISTANT_TEXT>\n{message_part.content}\n</ASSISTANT_TEXT>"
88
132
 
89
133
  elif isinstance(message_part, ToolCallPart):
90
- if isinstance(message_part.args, dict):
91
- args_str = ", ".join(f"{k}={repr(v)}" for k, v in message_part.args.items())
92
- tool_call_str = f"{message_part.tool_name}({args_str})"
93
- else:
94
- tool_call_str = f"{message_part.tool_name}({message_part.args})"
95
- return f"<TOOL_CALL>\n{tool_call_str}\n</TOOL_CALL>"
134
+ # Safely parse args to avoid crashes from incomplete JSON during streaming
135
+ try:
136
+ parsed_args = _safely_parse_tool_args(message_part.args)
137
+ if parsed_args:
138
+ # Successfully parsed as dict - format nicely
139
+ args_str = ", ".join(f"{k}={repr(v)}" for k, v in parsed_args.items())
140
+ tool_call_str = f"{message_part.tool_name}({args_str})"
141
+ elif isinstance(message_part.args, str) and message_part.args:
142
+ # Non-empty string that didn't parse as JSON
143
+ # Check if it looks like JSON (starts with { or [) - if so, it's incomplete
144
+ stripped_args = message_part.args.strip()
145
+ if stripped_args.startswith(("{", "[")):
146
+ # Looks like incomplete JSON - log warning and show empty parens
147
+ args_preview = (
148
+ stripped_args[:100] + "..."
149
+ if len(stripped_args) > 100
150
+ else stripped_args
151
+ )
152
+ stack_trace = "".join(traceback.format_stack())
153
+ logger.warning(
154
+ "ToolCallPart with unparseable args encountered during context extraction",
155
+ extra={
156
+ "tool_name": message_part.tool_name,
157
+ "tool_call_id": message_part.tool_call_id,
158
+ "args_preview": args_preview,
159
+ "args_type": type(message_part.args).__name__,
160
+ "stack_trace": stack_trace,
161
+ },
162
+ )
163
+ tool_call_str = f"{message_part.tool_name}()"
164
+ else:
165
+ # Plain string arg - display as-is
166
+ tool_call_str = f"{message_part.tool_name}({message_part.args})"
167
+ else:
168
+ # No args
169
+ tool_call_str = f"{message_part.tool_name}()"
170
+ return f"<TOOL_CALL>\n{tool_call_str}\n</TOOL_CALL>"
171
+ except Exception as e: # pragma: no cover - defensive catch-all
172
+ # If anything goes wrong, log full exception with stack trace
173
+ logger.error(
174
+ "Unexpected error processing ToolCallPart",
175
+ exc_info=True,
176
+ extra={
177
+ "tool_name": message_part.tool_name,
178
+ "tool_call_id": message_part.tool_call_id,
179
+ "error": str(e),
180
+ },
181
+ )
182
+ return f"<TOOL_CALL>\n{message_part.tool_name}()\n</TOOL_CALL>"
96
183
 
97
184
  elif isinstance(message_part, BuiltinToolCallPart):
98
185
  return f"<BUILTIN_TOOL_CALL>\n{message_part.tool_name}\n</BUILTIN_TOOL_CALL>"
@@ -1,7 +1,10 @@
1
1
  """History processors for managing conversation history in Shotgun agents."""
2
2
 
3
+ from collections.abc import Awaitable, Callable
3
4
  from typing import TYPE_CHECKING, Any, Protocol
4
5
 
6
+ from anthropic import APIStatusError
7
+ from pydantic_ai import ModelSettings
5
8
  from pydantic_ai.messages import (
6
9
  ModelMessage,
7
10
  ModelRequest,
@@ -10,10 +13,12 @@ from pydantic_ai.messages import (
10
13
  UserPromptPart,
11
14
  )
12
15
 
13
- from shotgun.agents.config.models import shotgun_model_request
16
+ from shotgun.agents.llm import shotgun_model_request
14
17
  from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
15
18
  from shotgun.agents.models import AgentDeps
19
+ from shotgun.exceptions import ContextSizeLimitExceeded
16
20
  from shotgun.logging_config import get_logger
21
+ from shotgun.posthog_telemetry import track_event
17
22
  from shotgun.prompts import PromptLoader
18
23
 
19
24
  from .constants import SUMMARY_MARKER, TOKEN_LIMIT_RATIO
@@ -49,6 +54,86 @@ logger = get_logger(__name__)
49
54
  prompt_loader = PromptLoader()
50
55
 
51
56
 
57
+ async def _safe_token_estimation(
58
+ estimation_func: Callable[..., Awaitable[int]],
59
+ model_name: str,
60
+ max_tokens: int,
61
+ *args: Any,
62
+ **kwargs: Any,
63
+ ) -> int:
64
+ """Safely estimate tokens with proper error handling.
65
+
66
+ Wraps token estimation functions to handle failures gracefully.
67
+ Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
68
+ Other errors (network, auth) are allowed to bubble up.
69
+
70
+ Args:
71
+ estimation_func: Async function that estimates tokens
72
+ model_name: Name of the model for error messages
73
+ max_tokens: Maximum tokens for the model
74
+ *args: Arguments to pass to estimation_func
75
+ **kwargs: Keyword arguments to pass to estimation_func
76
+
77
+ Returns:
78
+ Token count from estimation_func
79
+
80
+ Raises:
81
+ ContextSizeLimitExceeded: If token counting fails with RuntimeError
82
+ Exception: Any other exceptions from estimation_func
83
+ """
84
+ try:
85
+ return await estimation_func(*args, **kwargs)
86
+ except Exception as e:
87
+ # Log the error with full context
88
+ logger.warning(
89
+ f"Token counting failed for {model_name}",
90
+ extra={
91
+ "error_type": type(e).__name__,
92
+ "error_message": str(e),
93
+ "model": model_name,
94
+ },
95
+ )
96
+
97
+ # Token counting behavior with oversized context (verified via testing):
98
+ #
99
+ # 1. OpenAI/tiktoken:
100
+ # - Successfully counts any size (tested with 752K tokens, no error)
101
+ # - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
102
+ # - Wrapped as: RuntimeError by our counter
103
+ #
104
+ # 2. Gemini/SentencePiece:
105
+ # - Successfully counts any size (tested with 752K tokens, no error)
106
+ # - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
107
+ # - Wrapped as: RuntimeError by our counter
108
+ #
109
+ # 3. Anthropic API:
110
+ # - Successfully counts large token counts (tested with 752K tokens, no error)
111
+ # - Only enforces 32 MB request size limit (not token count)
112
+ # - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
113
+ # - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
114
+ # - Wrapped as: RuntimeError by our counter
115
+ #
116
+ # IMPORTANT: No provider raises errors for "too many tokens" during counting.
117
+ # Token count validation happens separately by comparing count to max_input_tokens.
118
+ #
119
+ # We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
120
+ # We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
121
+ # context is effectively too large and needs user action to reduce it.
122
+ if isinstance(e, RuntimeError):
123
+ raise ContextSizeLimitExceeded(
124
+ model_name=model_name, max_tokens=max_tokens
125
+ ) from e
126
+
127
+ # Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
128
+ if isinstance(e, APIStatusError) and e.status_code == 413:
129
+ raise ContextSizeLimitExceeded(
130
+ model_name=model_name, max_tokens=max_tokens
131
+ ) from e
132
+
133
+ # Re-raise other exceptions (network errors, auth failures, etc.)
134
+ raise
135
+
136
+
52
137
  def is_summary_part(part: Any) -> bool:
53
138
  """Check if a message part is a compacted summary."""
54
139
  return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
@@ -125,6 +210,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
125
210
  async def token_limit_compactor(
126
211
  ctx: ContextProtocol,
127
212
  messages: list[ModelMessage],
213
+ force: bool = False,
128
214
  ) -> list[ModelMessage]:
129
215
  """Compact message history based on token limits with incremental processing.
130
216
 
@@ -137,6 +223,7 @@ async def token_limit_compactor(
137
223
  Args:
138
224
  ctx: Run context with usage information and dependencies
139
225
  messages: Current conversation history
226
+ force: If True, force compaction even if below token threshold
140
227
 
141
228
  Returns:
142
229
  Compacted list of messages within token limits
@@ -153,9 +240,15 @@ async def token_limit_compactor(
153
240
 
154
241
  if last_summary_index is not None:
155
242
  # Check if post-summary conversation exceeds threshold for incremental compaction
156
- post_summary_tokens = estimate_post_summary_tokens(
157
- messages, last_summary_index, deps.llm_model
243
+ post_summary_tokens = await _safe_token_estimation(
244
+ estimate_post_summary_tokens,
245
+ deps.llm_model.name,
246
+ model_max_tokens,
247
+ messages,
248
+ last_summary_index,
249
+ deps.llm_model,
158
250
  )
251
+
159
252
  post_summary_percentage = (
160
253
  (post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
161
254
  )
@@ -167,7 +260,7 @@ async def token_limit_compactor(
167
260
  )
168
261
 
169
262
  # Only do incremental compaction if post-summary conversation exceeds threshold
170
- if post_summary_tokens < max_tokens:
263
+ if post_summary_tokens < max_tokens and not force:
171
264
  logger.debug(
172
265
  f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
173
266
  f"keeping all {len(messages)} messages"
@@ -179,6 +272,10 @@ async def token_limit_compactor(
179
272
  "Post-summary conversation exceeds threshold, performing incremental compaction"
180
273
  )
181
274
 
275
+ # Track compaction event
276
+ messages_before = len(messages)
277
+ tokens_before = post_summary_tokens
278
+
182
279
  # Extract existing summary content
183
280
  summary_message = messages[last_summary_index]
184
281
  existing_summary_part = None
@@ -243,7 +340,7 @@ async def token_limit_compactor(
243
340
  ]
244
341
 
245
342
  # Calculate optimal max_tokens for summarization
246
- max_tokens = calculate_max_summarization_tokens(
343
+ max_tokens = await calculate_max_summarization_tokens(
247
344
  deps.llm_model, request_messages
248
345
  )
249
346
 
@@ -256,7 +353,9 @@ async def token_limit_compactor(
256
353
  summary_response = await shotgun_model_request(
257
354
  model_config=deps.llm_model,
258
355
  messages=request_messages,
259
- max_tokens=max_tokens, # Use calculated optimal tokens for summarization
356
+ model_settings=ModelSettings(
357
+ max_tokens=max_tokens # Use calculated optimal tokens for summarization
358
+ ),
260
359
  )
261
360
 
262
361
  log_summarization_response(summary_response, "INCREMENTAL")
@@ -320,11 +419,50 @@ async def token_limit_compactor(
320
419
  logger.debug(
321
420
  f"Incremental compaction complete: {len(messages)} -> {len(compacted_messages)} messages"
322
421
  )
422
+
423
+ # Track compaction completion
424
+ messages_after = len(compacted_messages)
425
+ tokens_after = await estimate_tokens_from_messages(
426
+ compacted_messages, deps.llm_model
427
+ )
428
+ reduction_percentage = (
429
+ ((messages_before - messages_after) / messages_before * 100)
430
+ if messages_before > 0
431
+ else 0
432
+ )
433
+
434
+ # Track incremental compaction with simple metrics (fast, no token counting)
435
+ track_event(
436
+ "context_compaction_triggered",
437
+ {
438
+ "compaction_type": "incremental",
439
+ "messages_before": messages_before,
440
+ "messages_after": messages_after,
441
+ "tokens_before": tokens_before,
442
+ "tokens_after": tokens_after,
443
+ "reduction_percentage": round(reduction_percentage, 2),
444
+ "agent_mode": deps.agent_mode.value
445
+ if hasattr(deps, "agent_mode") and deps.agent_mode
446
+ else "unknown",
447
+ # Model and provider info (no computation needed)
448
+ "model_name": deps.llm_model.name.value,
449
+ "provider": deps.llm_model.provider.value,
450
+ "key_provider": deps.llm_model.key_provider.value,
451
+ },
452
+ )
453
+
323
454
  return compacted_messages
324
455
 
325
456
  else:
326
457
  # Check if total conversation exceeds threshold for full compaction
327
- total_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
458
+ total_tokens = await _safe_token_estimation(
459
+ estimate_tokens_from_messages,
460
+ deps.llm_model.name,
461
+ model_max_tokens,
462
+ messages,
463
+ deps.llm_model,
464
+ )
465
+
328
466
  total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
329
467
 
330
468
  logger.debug(
@@ -333,7 +471,7 @@ async def token_limit_compactor(
333
471
  )
334
472
 
335
473
  # Only do full compaction if total conversation exceeds threshold
336
- if total_tokens < max_tokens:
474
+ if total_tokens < max_tokens and not force:
337
475
  logger.debug(
338
476
  f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
339
477
  f"keeping all {len(messages)} messages"
@@ -362,7 +500,9 @@ async def _full_compaction(
362
500
  ]
363
501
 
364
502
  # Calculate optimal max_tokens for summarization
365
- max_tokens = calculate_max_summarization_tokens(deps.llm_model, request_messages)
503
+ max_tokens = await calculate_max_summarization_tokens(
504
+ deps.llm_model, request_messages
505
+ )
366
506
 
367
507
  # Debug logging using shared utilities
368
508
  log_summarization_request(
@@ -373,11 +513,13 @@ async def _full_compaction(
373
513
  summary_response = await shotgun_model_request(
374
514
  model_config=deps.llm_model,
375
515
  messages=request_messages,
376
- max_tokens=max_tokens, # Use calculated optimal tokens for summarization
516
+ model_settings=ModelSettings(
517
+ max_tokens=max_tokens # Use calculated optimal tokens for summarization
518
+ ),
377
519
  )
378
520
 
379
521
  # Calculate token reduction
380
- current_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
522
+ current_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
381
523
  summary_usage = summary_response.usage
382
524
  reduction_percentage = (
383
525
  ((current_tokens - summary_usage.output_tokens) / current_tokens) * 100
@@ -423,4 +565,30 @@ async def _full_compaction(
423
565
  # Ensure history ends with ModelRequest for PydanticAI compatibility
424
566
  compacted_messages = ensure_ends_with_model_request(compacted_messages, messages)
425
567
 
568
+ # Track full compaction event
569
+ messages_before = len(messages)
570
+ messages_after = len(compacted_messages)
571
+ tokens_before = current_tokens # Already calculated above
572
+ tokens_after = summary_usage.output_tokens if summary_usage else 0
573
+
574
+ # Track full compaction with simple metrics (fast, no token counting)
575
+ track_event(
576
+ "context_compaction_triggered",
577
+ {
578
+ "compaction_type": "full",
579
+ "messages_before": messages_before,
580
+ "messages_after": messages_after,
581
+ "tokens_before": tokens_before,
582
+ "tokens_after": tokens_after,
583
+ "reduction_percentage": round(reduction_percentage, 2),
584
+ "agent_mode": deps.agent_mode.value
585
+ if hasattr(deps, "agent_mode") and deps.agent_mode
586
+ else "unknown",
587
+ # Model and provider info (no computation needed)
588
+ "model_name": deps.llm_model.name.value,
589
+ "provider": deps.llm_model.provider.value,
590
+ "key_provider": deps.llm_model.key_provider.value,
591
+ },
592
+ )
593
+
426
594
  return compacted_messages
@@ -0,0 +1,31 @@
1
+ """Real token counting for all supported providers.
2
+
3
+ This module provides accurate token counting using each provider's official
4
+ APIs and libraries, eliminating the need for rough character-based estimation.
5
+ """
6
+
7
+ from .anthropic import AnthropicTokenCounter
8
+ from .base import TokenCounter, extract_text_from_messages
9
+ from .openai import OpenAITokenCounter
10
+ from .sentencepiece_counter import SentencePieceTokenCounter
11
+ from .utils import (
12
+ count_post_summary_tokens,
13
+ count_tokens_from_message_parts,
14
+ count_tokens_from_messages,
15
+ get_token_counter,
16
+ )
17
+
18
+ __all__ = [
19
+ # Base classes
20
+ "TokenCounter",
21
+ # Counter implementations
22
+ "OpenAITokenCounter",
23
+ "AnthropicTokenCounter",
24
+ "SentencePieceTokenCounter",
25
+ # Utility functions
26
+ "get_token_counter",
27
+ "count_tokens_from_messages",
28
+ "count_post_summary_tokens",
29
+ "count_tokens_from_message_parts",
30
+ "extract_text_from_messages",
31
+ ]
@@ -0,0 +1,127 @@
1
+ """Anthropic token counting using official client."""
2
+
3
+ import logfire
4
+ from pydantic_ai.messages import ModelMessage
5
+
6
+ from shotgun.agents.config.models import KeyProvider
7
+ from shotgun.llm_proxy import create_anthropic_proxy_provider
8
+ from shotgun.logging_config import get_logger
9
+
10
+ from .base import TokenCounter, extract_text_from_messages
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class AnthropicTokenCounter(TokenCounter):
16
+ """Token counter for Anthropic models using official client."""
17
+
18
+ def __init__(
19
+ self,
20
+ model_name: str,
21
+ api_key: str,
22
+ key_provider: KeyProvider = KeyProvider.BYOK,
23
+ ):
24
+ """Initialize Anthropic token counter.
25
+
26
+ Args:
27
+ model_name: Anthropic model name for token counting
28
+ api_key: API key (Anthropic for BYOK, Shotgun for proxy)
29
+ key_provider: Key provider type (BYOK or SHOTGUN)
30
+
31
+ Raises:
32
+ RuntimeError: If client initialization fails
33
+ """
34
+ self.model_name = model_name
35
+ import anthropic
36
+
37
+ try:
38
+ if key_provider == KeyProvider.SHOTGUN:
39
+ # Use LiteLLM proxy for Shotgun Account
40
+ # Get async client from AnthropicProvider
41
+ provider = create_anthropic_proxy_provider(api_key)
42
+ self.client = provider.client
43
+ logger.debug(
44
+ f"Initialized async Anthropic token counter for {model_name} via LiteLLM proxy"
45
+ )
46
+ else:
47
+ # Direct Anthropic API for BYOK - use async client
48
+ self.client = anthropic.AsyncAnthropic(api_key=api_key)
49
+ logger.debug(
50
+ f"Initialized async Anthropic token counter for {model_name} via direct API"
51
+ )
52
+ except Exception as e:
53
+ logfire.exception(
54
+ f"Failed to initialize Anthropic token counter for {model_name}",
55
+ model_name=model_name,
56
+ key_provider=key_provider.value,
57
+ exception_type=type(e).__name__,
58
+ )
59
+ raise RuntimeError(
60
+ f"Failed to initialize Anthropic async client for {model_name}: {type(e).__name__}: {str(e)}"
61
+ ) from e
62
+
63
+ async def count_tokens(self, text: str) -> int:
64
+ """Count tokens using Anthropic's official API (async).
65
+
66
+ Args:
67
+ text: Text to count tokens for
68
+
69
+ Returns:
70
+ Exact token count from Anthropic API
71
+
72
+ Raises:
73
+ RuntimeError: If API call fails
74
+ """
75
+ # Handle empty text to avoid unnecessary API calls
76
+ # Anthropic API requires non-empty content, so we need a strict check
77
+ if not text or not text.strip():
78
+ return 0
79
+
80
+ # Additional validation: ensure the text has actual content
81
+ # Some edge cases might have only whitespace or control characters
82
+ cleaned_text = text.strip()
83
+ if not cleaned_text:
84
+ return 0
85
+
86
+ try:
87
+ # Anthropic API expects messages format and model parameter
88
+ # Use await with async client
89
+ result = await self.client.messages.count_tokens(
90
+ messages=[{"role": "user", "content": cleaned_text}],
91
+ model=self.model_name,
92
+ )
93
+ return result.input_tokens
94
+ except Exception as e:
95
+ # Create a preview of the text for logging (truncated to avoid huge logs)
96
+ text_preview = text[:100] + "..." if len(text) > 100 else text
97
+
98
+ logfire.exception(
99
+ f"Anthropic token counting failed for {self.model_name}",
100
+ model_name=self.model_name,
101
+ text_length=len(text),
102
+ text_preview=text_preview,
103
+ exception_type=type(e).__name__,
104
+ exception_message=str(e),
105
+ )
106
+ raise RuntimeError(
107
+ f"Anthropic token counting API failed for {self.model_name}: {type(e).__name__}: {str(e)}"
108
+ ) from e
109
+
110
+ async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
111
+ """Count tokens across all messages using Anthropic API (async).
112
+
113
+ Args:
114
+ messages: List of PydanticAI messages
115
+
116
+ Returns:
117
+ Total token count for all messages
118
+
119
+ Raises:
120
+ RuntimeError: If token counting fails
121
+ """
122
+ # Handle empty message list early
123
+ if not messages:
124
+ return 0
125
+
126
+ total_text = extract_text_from_messages(messages)
127
+ return await self.count_tokens(total_text)
@@ -0,0 +1,78 @@
1
+ """Base classes and shared utilities for token counting."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from pydantic_ai.messages import ModelMessage
6
+
7
+
8
+ class TokenCounter(ABC):
9
+ """Abstract base class for provider-specific token counting.
10
+
11
+ All methods are async to support non-blocking operations like
12
+ downloading tokenizer models or making API calls.
13
+ """
14
+
15
+ @abstractmethod
16
+ async def count_tokens(self, text: str) -> int:
17
+ """Count tokens in text using provider-specific method (async).
18
+
19
+ Args:
20
+ text: Text to count tokens for
21
+
22
+ Returns:
23
+ Exact token count as determined by the provider
24
+
25
+ Raises:
26
+ RuntimeError: If token counting fails
27
+ """
28
+
29
+ @abstractmethod
30
+ async def count_message_tokens(self, messages: list[ModelMessage]) -> int:
31
+ """Count tokens in PydanticAI message structures (async).
32
+
33
+ Args:
34
+ messages: List of messages to count tokens for
35
+
36
+ Returns:
37
+ Total token count across all messages
38
+
39
+ Raises:
40
+ RuntimeError: If token counting fails
41
+ """
42
+
43
+
44
+ def extract_text_from_messages(messages: list[ModelMessage]) -> str:
45
+ """Extract all text content from messages for token counting.
46
+
47
+ Args:
48
+ messages: List of PydanticAI messages
49
+
50
+ Returns:
51
+ Combined text content from all messages
52
+ """
53
+ text_parts = []
54
+
55
+ for message in messages:
56
+ if hasattr(message, "parts"):
57
+ for part in message.parts:
58
+ if hasattr(part, "content") and isinstance(part.content, str):
59
+ # Only add non-empty content
60
+ if part.content.strip():
61
+ text_parts.append(part.content)
62
+ else:
63
+ # Handle non-text parts (tool calls, etc.)
64
+ part_str = str(part)
65
+ if part_str.strip():
66
+ text_parts.append(part_str)
67
+ else:
68
+ # Handle messages without parts
69
+ msg_str = str(message)
70
+ if msg_str.strip():
71
+ text_parts.append(msg_str)
72
+
73
+ # If no valid text parts found, return a minimal placeholder
74
+ # This ensures we never send completely empty content to APIs
75
+ if not text_parts:
76
+ return "."
77
+
78
+ return "\n".join(text_parts)