shotgun-sh 0.2.8.dev2__py3-none-any.whl → 0.3.3.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. shotgun/agents/agent_manager.py +382 -60
  2. shotgun/agents/common.py +15 -9
  3. shotgun/agents/config/README.md +89 -0
  4. shotgun/agents/config/__init__.py +10 -1
  5. shotgun/agents/config/constants.py +0 -6
  6. shotgun/agents/config/manager.py +383 -82
  7. shotgun/agents/config/models.py +122 -18
  8. shotgun/agents/config/provider.py +81 -15
  9. shotgun/agents/config/streaming_test.py +119 -0
  10. shotgun/agents/context_analyzer/__init__.py +28 -0
  11. shotgun/agents/context_analyzer/analyzer.py +475 -0
  12. shotgun/agents/context_analyzer/constants.py +9 -0
  13. shotgun/agents/context_analyzer/formatter.py +115 -0
  14. shotgun/agents/context_analyzer/models.py +212 -0
  15. shotgun/agents/conversation/__init__.py +18 -0
  16. shotgun/agents/conversation/filters.py +164 -0
  17. shotgun/agents/conversation/history/chunking.py +278 -0
  18. shotgun/agents/{history → conversation/history}/compaction.py +36 -5
  19. shotgun/agents/{history → conversation/history}/constants.py +5 -0
  20. shotgun/agents/conversation/history/file_content_deduplication.py +216 -0
  21. shotgun/agents/{history → conversation/history}/history_processors.py +380 -8
  22. shotgun/agents/{history → conversation/history}/token_counting/anthropic.py +25 -1
  23. shotgun/agents/{history → conversation/history}/token_counting/base.py +14 -3
  24. shotgun/agents/{history → conversation/history}/token_counting/openai.py +11 -1
  25. shotgun/agents/{history → conversation/history}/token_counting/sentencepiece_counter.py +8 -0
  26. shotgun/agents/{history → conversation/history}/token_counting/tokenizer_cache.py +3 -1
  27. shotgun/agents/{history → conversation/history}/token_counting/utils.py +0 -3
  28. shotgun/agents/{conversation_manager.py → conversation/manager.py} +36 -20
  29. shotgun/agents/{conversation_history.py → conversation/models.py} +8 -92
  30. shotgun/agents/error/__init__.py +11 -0
  31. shotgun/agents/error/models.py +19 -0
  32. shotgun/agents/export.py +2 -2
  33. shotgun/agents/plan.py +2 -2
  34. shotgun/agents/research.py +3 -3
  35. shotgun/agents/runner.py +230 -0
  36. shotgun/agents/specify.py +2 -2
  37. shotgun/agents/tasks.py +2 -2
  38. shotgun/agents/tools/codebase/codebase_shell.py +6 -0
  39. shotgun/agents/tools/codebase/directory_lister.py +6 -0
  40. shotgun/agents/tools/codebase/file_read.py +11 -2
  41. shotgun/agents/tools/codebase/query_graph.py +6 -0
  42. shotgun/agents/tools/codebase/retrieve_code.py +6 -0
  43. shotgun/agents/tools/file_management.py +27 -7
  44. shotgun/agents/tools/registry.py +217 -0
  45. shotgun/agents/tools/web_search/__init__.py +8 -8
  46. shotgun/agents/tools/web_search/anthropic.py +8 -2
  47. shotgun/agents/tools/web_search/gemini.py +7 -1
  48. shotgun/agents/tools/web_search/openai.py +8 -2
  49. shotgun/agents/tools/web_search/utils.py +2 -2
  50. shotgun/agents/usage_manager.py +16 -11
  51. shotgun/api_endpoints.py +7 -3
  52. shotgun/build_constants.py +2 -2
  53. shotgun/cli/clear.py +53 -0
  54. shotgun/cli/compact.py +188 -0
  55. shotgun/cli/config.py +8 -5
  56. shotgun/cli/context.py +154 -0
  57. shotgun/cli/error_handler.py +24 -0
  58. shotgun/cli/export.py +34 -34
  59. shotgun/cli/feedback.py +4 -2
  60. shotgun/cli/models.py +1 -0
  61. shotgun/cli/plan.py +34 -34
  62. shotgun/cli/research.py +18 -10
  63. shotgun/cli/spec/__init__.py +5 -0
  64. shotgun/cli/spec/backup.py +81 -0
  65. shotgun/cli/spec/commands.py +132 -0
  66. shotgun/cli/spec/models.py +48 -0
  67. shotgun/cli/spec/pull_service.py +219 -0
  68. shotgun/cli/specify.py +20 -19
  69. shotgun/cli/tasks.py +34 -34
  70. shotgun/cli/update.py +16 -2
  71. shotgun/codebase/core/change_detector.py +5 -3
  72. shotgun/codebase/core/code_retrieval.py +4 -2
  73. shotgun/codebase/core/ingestor.py +163 -15
  74. shotgun/codebase/core/manager.py +13 -4
  75. shotgun/codebase/core/nl_query.py +1 -1
  76. shotgun/codebase/models.py +2 -0
  77. shotgun/exceptions.py +357 -0
  78. shotgun/llm_proxy/__init__.py +17 -0
  79. shotgun/llm_proxy/client.py +215 -0
  80. shotgun/llm_proxy/models.py +137 -0
  81. shotgun/logging_config.py +60 -27
  82. shotgun/main.py +77 -11
  83. shotgun/posthog_telemetry.py +38 -29
  84. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +28 -2
  85. shotgun/prompts/agents/partials/interactive_mode.j2 +3 -3
  86. shotgun/prompts/agents/plan.j2 +16 -0
  87. shotgun/prompts/agents/research.j2 +16 -3
  88. shotgun/prompts/agents/specify.j2 +54 -1
  89. shotgun/prompts/agents/state/system_state.j2 +0 -2
  90. shotgun/prompts/agents/tasks.j2 +16 -0
  91. shotgun/prompts/history/chunk_summarization.j2 +34 -0
  92. shotgun/prompts/history/combine_summaries.j2 +53 -0
  93. shotgun/sdk/codebase.py +14 -3
  94. shotgun/sentry_telemetry.py +163 -16
  95. shotgun/settings.py +243 -0
  96. shotgun/shotgun_web/__init__.py +67 -1
  97. shotgun/shotgun_web/client.py +42 -1
  98. shotgun/shotgun_web/constants.py +46 -0
  99. shotgun/shotgun_web/exceptions.py +29 -0
  100. shotgun/shotgun_web/models.py +390 -0
  101. shotgun/shotgun_web/shared_specs/__init__.py +32 -0
  102. shotgun/shotgun_web/shared_specs/file_scanner.py +175 -0
  103. shotgun/shotgun_web/shared_specs/hasher.py +83 -0
  104. shotgun/shotgun_web/shared_specs/models.py +71 -0
  105. shotgun/shotgun_web/shared_specs/upload_pipeline.py +329 -0
  106. shotgun/shotgun_web/shared_specs/utils.py +34 -0
  107. shotgun/shotgun_web/specs_client.py +703 -0
  108. shotgun/shotgun_web/supabase_client.py +31 -0
  109. shotgun/telemetry.py +10 -33
  110. shotgun/tui/app.py +310 -46
  111. shotgun/tui/commands/__init__.py +1 -1
  112. shotgun/tui/components/context_indicator.py +179 -0
  113. shotgun/tui/components/mode_indicator.py +70 -0
  114. shotgun/tui/components/status_bar.py +48 -0
  115. shotgun/tui/containers.py +91 -0
  116. shotgun/tui/dependencies.py +39 -0
  117. shotgun/tui/layout.py +5 -0
  118. shotgun/tui/protocols.py +45 -0
  119. shotgun/tui/screens/chat/__init__.py +5 -0
  120. shotgun/tui/screens/chat/chat.tcss +54 -0
  121. shotgun/tui/screens/chat/chat_screen.py +1531 -0
  122. shotgun/tui/screens/chat/codebase_index_prompt_screen.py +243 -0
  123. shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
  124. shotgun/tui/screens/chat/help_text.py +40 -0
  125. shotgun/tui/screens/chat/prompt_history.py +48 -0
  126. shotgun/tui/screens/chat.tcss +11 -0
  127. shotgun/tui/screens/chat_screen/command_providers.py +91 -4
  128. shotgun/tui/screens/chat_screen/hint_message.py +76 -1
  129. shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
  130. shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
  131. shotgun/tui/screens/chat_screen/history/chat_history.py +115 -0
  132. shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
  133. shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
  134. shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
  135. shotgun/tui/screens/confirmation_dialog.py +191 -0
  136. shotgun/tui/screens/directory_setup.py +45 -41
  137. shotgun/tui/screens/feedback.py +14 -7
  138. shotgun/tui/screens/github_issue.py +111 -0
  139. shotgun/tui/screens/model_picker.py +77 -32
  140. shotgun/tui/screens/onboarding.py +580 -0
  141. shotgun/tui/screens/pipx_migration.py +205 -0
  142. shotgun/tui/screens/provider_config.py +116 -35
  143. shotgun/tui/screens/shared_specs/__init__.py +21 -0
  144. shotgun/tui/screens/shared_specs/create_spec_dialog.py +273 -0
  145. shotgun/tui/screens/shared_specs/models.py +56 -0
  146. shotgun/tui/screens/shared_specs/share_specs_dialog.py +390 -0
  147. shotgun/tui/screens/shared_specs/upload_progress_screen.py +452 -0
  148. shotgun/tui/screens/shotgun_auth.py +112 -18
  149. shotgun/tui/screens/spec_pull.py +288 -0
  150. shotgun/tui/screens/welcome.py +137 -11
  151. shotgun/tui/services/__init__.py +5 -0
  152. shotgun/tui/services/conversation_service.py +187 -0
  153. shotgun/tui/state/__init__.py +7 -0
  154. shotgun/tui/state/processing_state.py +185 -0
  155. shotgun/tui/utils/mode_progress.py +14 -7
  156. shotgun/tui/widgets/__init__.py +5 -0
  157. shotgun/tui/widgets/widget_coordinator.py +263 -0
  158. shotgun/utils/file_system_utils.py +22 -2
  159. shotgun/utils/marketing.py +110 -0
  160. shotgun/utils/update_checker.py +69 -14
  161. shotgun_sh-0.3.3.dev1.dist-info/METADATA +472 -0
  162. shotgun_sh-0.3.3.dev1.dist-info/RECORD +229 -0
  163. {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/WHEEL +1 -1
  164. {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/entry_points.txt +1 -0
  165. {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/licenses/LICENSE +1 -1
  166. shotgun/tui/screens/chat.py +0 -996
  167. shotgun/tui/screens/chat_screen/history.py +0 -335
  168. shotgun_sh-0.2.8.dev2.dist-info/METADATA +0 -126
  169. shotgun_sh-0.2.8.dev2.dist-info/RECORD +0 -155
  170. /shotgun/agents/{history → conversation/history}/__init__.py +0 -0
  171. /shotgun/agents/{history → conversation/history}/context_extraction.py +0 -0
  172. /shotgun/agents/{history → conversation/history}/history_building.py +0 -0
  173. /shotgun/agents/{history → conversation/history}/message_utils.py +0 -0
  174. /shotgun/agents/{history → conversation/history}/token_counting/__init__.py +0 -0
  175. /shotgun/agents/{history → conversation/history}/token_estimation.py +0 -0
@@ -1,7 +1,9 @@
1
1
  """History processors for managing conversation history in Shotgun agents."""
2
2
 
3
+ from collections.abc import Awaitable, Callable
3
4
  from typing import TYPE_CHECKING, Any, Protocol
4
5
 
6
+ from anthropic import APIStatusError
5
7
  from pydantic_ai import ModelSettings
6
8
  from pydantic_ai.messages import (
7
9
  ModelMessage,
@@ -11,14 +13,16 @@ from pydantic_ai.messages import (
11
13
  UserPromptPart,
12
14
  )
13
15
 
16
+ from shotgun.agents.conversation.filters import filter_orphaned_tool_responses
14
17
  from shotgun.agents.llm import shotgun_model_request
15
18
  from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
16
19
  from shotgun.agents.models import AgentDeps
20
+ from shotgun.exceptions import ContextSizeLimitExceeded
17
21
  from shotgun.logging_config import get_logger
18
22
  from shotgun.posthog_telemetry import track_event
19
23
  from shotgun.prompts import PromptLoader
20
24
 
21
- from .constants import SUMMARY_MARKER, TOKEN_LIMIT_RATIO
25
+ from .constants import CHUNK_SAFE_RATIO, SUMMARY_MARKER, TOKEN_LIMIT_RATIO
22
26
  from .context_extraction import extract_context_from_messages
23
27
  from .history_building import ensure_ends_with_model_request
24
28
  from .message_utils import (
@@ -35,7 +39,7 @@ from .token_estimation import (
35
39
  )
36
40
 
37
41
  if TYPE_CHECKING:
38
- pass
42
+ from . import chunking
39
43
 
40
44
 
41
45
  class ContextProtocol(Protocol):
@@ -51,6 +55,86 @@ logger = get_logger(__name__)
51
55
  prompt_loader = PromptLoader()
52
56
 
53
57
 
58
+ async def _safe_token_estimation(
59
+ estimation_func: Callable[..., Awaitable[int]],
60
+ model_name: str,
61
+ max_tokens: int,
62
+ *args: Any,
63
+ **kwargs: Any,
64
+ ) -> int:
65
+ """Safely estimate tokens with proper error handling.
66
+
67
+ Wraps token estimation functions to handle failures gracefully.
68
+ Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
69
+ Other errors (network, auth) are allowed to bubble up.
70
+
71
+ Args:
72
+ estimation_func: Async function that estimates tokens
73
+ model_name: Name of the model for error messages
74
+ max_tokens: Maximum tokens for the model
75
+ *args: Arguments to pass to estimation_func
76
+ **kwargs: Keyword arguments to pass to estimation_func
77
+
78
+ Returns:
79
+ Token count from estimation_func
80
+
81
+ Raises:
82
+ ContextSizeLimitExceeded: If token counting fails with RuntimeError
83
+ Exception: Any other exceptions from estimation_func
84
+ """
85
+ try:
86
+ return await estimation_func(*args, **kwargs)
87
+ except Exception as e:
88
+ # Log the error with full context
89
+ logger.warning(
90
+ f"Token counting failed for {model_name}",
91
+ extra={
92
+ "error_type": type(e).__name__,
93
+ "error_message": str(e),
94
+ "model": model_name,
95
+ },
96
+ )
97
+
98
+ # Token counting behavior with oversized context (verified via testing):
99
+ #
100
+ # 1. OpenAI/tiktoken:
101
+ # - Successfully counts any size (tested with 752K tokens, no error)
102
+ # - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
103
+ # - Wrapped as: RuntimeError by our counter
104
+ #
105
+ # 2. Gemini/SentencePiece:
106
+ # - Successfully counts any size (tested with 752K tokens, no error)
107
+ # - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
108
+ # - Wrapped as: RuntimeError by our counter
109
+ #
110
+ # 3. Anthropic API:
111
+ # - Successfully counts large token counts (tested with 752K tokens, no error)
112
+ # - Only enforces 32 MB request size limit (not token count)
113
+ # - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
114
+ # - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
115
+ # - Wrapped as: RuntimeError by our counter
116
+ #
117
+ # IMPORTANT: No provider raises errors for "too many tokens" during counting.
118
+ # Token count validation happens separately by comparing count to max_input_tokens.
119
+ #
120
+ # We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
121
+ # We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
122
+ # context is effectively too large and needs user action to reduce it.
123
+ if isinstance(e, RuntimeError):
124
+ raise ContextSizeLimitExceeded(
125
+ model_name=model_name, max_tokens=max_tokens
126
+ ) from e
127
+
128
+ # Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
129
+ if isinstance(e, APIStatusError) and e.status_code == 413:
130
+ raise ContextSizeLimitExceeded(
131
+ model_name=model_name, max_tokens=max_tokens
132
+ ) from e
133
+
134
+ # Re-raise other exceptions (network errors, auth failures, etc.)
135
+ raise
136
+
137
+
54
138
  def is_summary_part(part: Any) -> bool:
55
139
  """Check if a message part is a compacted summary."""
56
140
  return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
@@ -127,6 +211,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
127
211
  async def token_limit_compactor(
128
212
  ctx: ContextProtocol,
129
213
  messages: list[ModelMessage],
214
+ force: bool = False,
130
215
  ) -> list[ModelMessage]:
131
216
  """Compact message history based on token limits with incremental processing.
132
217
 
@@ -139,6 +224,7 @@ async def token_limit_compactor(
139
224
  Args:
140
225
  ctx: Run context with usage information and dependencies
141
226
  messages: Current conversation history
227
+ force: If True, force compaction even if below token threshold
142
228
 
143
229
  Returns:
144
230
  Compacted list of messages within token limits
@@ -155,9 +241,15 @@ async def token_limit_compactor(
155
241
 
156
242
  if last_summary_index is not None:
157
243
  # Check if post-summary conversation exceeds threshold for incremental compaction
158
- post_summary_tokens = await estimate_post_summary_tokens(
159
- messages, last_summary_index, deps.llm_model
244
+ post_summary_tokens = await _safe_token_estimation(
245
+ estimate_post_summary_tokens,
246
+ deps.llm_model.name,
247
+ model_max_tokens,
248
+ messages,
249
+ last_summary_index,
250
+ deps.llm_model,
160
251
  )
252
+
161
253
  post_summary_percentage = (
162
254
  (post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
163
255
  )
@@ -169,7 +261,7 @@ async def token_limit_compactor(
169
261
  )
170
262
 
171
263
  # Only do incremental compaction if post-summary conversation exceeds threshold
172
- if post_summary_tokens < max_tokens:
264
+ if post_summary_tokens < max_tokens and not force:
173
265
  logger.debug(
174
266
  f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
175
267
  f"keeping all {len(messages)} messages"
@@ -325,6 +417,9 @@ async def token_limit_compactor(
325
417
  compacted_messages, messages
326
418
  )
327
419
 
420
+ # Filter out orphaned tool responses (tool responses without tool calls)
421
+ compacted_messages = filter_orphaned_tool_responses(compacted_messages)
422
+
328
423
  logger.debug(
329
424
  f"Incremental compaction complete: {len(messages)} -> {len(compacted_messages)} messages"
330
425
  )
@@ -340,6 +435,7 @@ async def token_limit_compactor(
340
435
  else 0
341
436
  )
342
437
 
438
+ # Track incremental compaction with simple metrics (fast, no token counting)
343
439
  track_event(
344
440
  "context_compaction_triggered",
345
441
  {
@@ -352,6 +448,10 @@ async def token_limit_compactor(
352
448
  "agent_mode": deps.agent_mode.value
353
449
  if hasattr(deps, "agent_mode") and deps.agent_mode
354
450
  else "unknown",
451
+ # Model and provider info (no computation needed)
452
+ "model_name": deps.llm_model.name.value,
453
+ "provider": deps.llm_model.provider.value,
454
+ "key_provider": deps.llm_model.key_provider.value,
355
455
  },
356
456
  )
357
457
 
@@ -359,7 +459,14 @@ async def token_limit_compactor(
359
459
 
360
460
  else:
361
461
  # Check if total conversation exceeds threshold for full compaction
362
- total_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
462
+ total_tokens = await _safe_token_estimation(
463
+ estimate_tokens_from_messages,
464
+ deps.llm_model.name,
465
+ model_max_tokens,
466
+ messages,
467
+ deps.llm_model,
468
+ )
469
+
363
470
  total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
364
471
 
365
472
  logger.debug(
@@ -368,7 +475,7 @@ async def token_limit_compactor(
368
475
  )
369
476
 
370
477
  # Only do full compaction if total conversation exceeds threshold
371
- if total_tokens < max_tokens:
478
+ if total_tokens < max_tokens and not force:
372
479
  logger.debug(
373
480
  f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
374
481
  f"keeping all {len(messages)} messages"
@@ -386,10 +493,32 @@ async def _full_compaction(
386
493
  deps: AgentDeps,
387
494
  messages: list[ModelMessage],
388
495
  ) -> list[ModelMessage]:
389
- """Perform full compaction for first-time summarization."""
496
+ """Perform full compaction for first-time summarization.
497
+
498
+ If the conversation is too large for single-pass compaction, delegates
499
+ to chunked compaction which breaks the conversation into logical chunks.
500
+ """
390
501
  # Extract context from all messages
391
502
  context = extract_context_from_messages(messages)
392
503
 
504
+ # Check if context would exceed model limit for compaction request
505
+ # We use CHUNK_SAFE_RATIO (70%) to leave room for prompt overhead
506
+ max_safe_input = int(deps.llm_model.max_input_tokens * CHUNK_SAFE_RATIO)
507
+
508
+ # Estimate context tokens
509
+ context_request: list[ModelMessage] = [ModelRequest.user_text_prompt(context)]
510
+ context_tokens = await estimate_tokens_from_messages(
511
+ context_request, deps.llm_model
512
+ )
513
+
514
+ if context_tokens > max_safe_input:
515
+ # Context too large for single-pass compaction - use chunked approach
516
+ logger.info(
517
+ f"Context ({context_tokens:,} tokens) exceeds safe limit "
518
+ f"({max_safe_input:,} tokens), using chunked compaction"
519
+ )
520
+ return await _chunked_compaction(deps, messages)
521
+
393
522
  # Use regular summarization prompt
394
523
  summarization_prompt = prompt_loader.render("history/summarization.j2")
395
524
  request_messages: list[ModelMessage] = [
@@ -462,12 +591,16 @@ async def _full_compaction(
462
591
  # Ensure history ends with ModelRequest for PydanticAI compatibility
463
592
  compacted_messages = ensure_ends_with_model_request(compacted_messages, messages)
464
593
 
594
+ # Filter out orphaned tool responses (tool responses without tool calls)
595
+ compacted_messages = filter_orphaned_tool_responses(compacted_messages)
596
+
465
597
  # Track full compaction event
466
598
  messages_before = len(messages)
467
599
  messages_after = len(compacted_messages)
468
600
  tokens_before = current_tokens # Already calculated above
469
601
  tokens_after = summary_usage.output_tokens if summary_usage else 0
470
602
 
603
+ # Track full compaction with simple metrics (fast, no token counting)
471
604
  track_event(
472
605
  "context_compaction_triggered",
473
606
  {
@@ -480,7 +613,246 @@ async def _full_compaction(
480
613
  "agent_mode": deps.agent_mode.value
481
614
  if hasattr(deps, "agent_mode") and deps.agent_mode
482
615
  else "unknown",
616
+ # Model and provider info (no computation needed)
617
+ "model_name": deps.llm_model.name.value,
618
+ "provider": deps.llm_model.provider.value,
619
+ "key_provider": deps.llm_model.key_provider.value,
483
620
  },
484
621
  )
485
622
 
486
623
  return compacted_messages
624
+
625
+
626
+ async def _chunked_compaction(
627
+ deps: AgentDeps,
628
+ messages: list[ModelMessage],
629
+ ) -> list[ModelMessage]:
630
+ """Perform chunked compaction for oversized conversations.
631
+
632
+ Breaks the conversation into logical chunks, summarizes each sequentially,
633
+ then combines the summaries into a master summary.
634
+ """
635
+ from .chunking import chunk_messages_for_compaction
636
+
637
+ # Split into chunks and retention window
638
+ chunks, retained_messages = await chunk_messages_for_compaction(
639
+ messages, deps.llm_model
640
+ )
641
+
642
+ if not chunks:
643
+ # No chunks to summarize (conversation too small), return retained messages
644
+ logger.debug("No chunks to summarize, returning retained messages")
645
+ return retained_messages
646
+
647
+ # Track chunked compaction
648
+ total_chunks = len(chunks)
649
+ logger.info(f"Starting chunked compaction: {total_chunks} chunks to process")
650
+
651
+ # Summarize each chunk sequentially
652
+ chunk_summaries: list[str] = []
653
+ for chunk in chunks:
654
+ try:
655
+ summary = await _summarize_chunk(chunk, total_chunks, deps)
656
+ chunk_summaries.append(summary)
657
+ logger.debug(
658
+ f"Chunk {chunk.chunk_index + 1}/{total_chunks} summarized successfully"
659
+ )
660
+ except Exception as e:
661
+ logger.warning(
662
+ f"Failed to summarize chunk {chunk.chunk_index + 1}/{total_chunks}: {e}"
663
+ )
664
+ # Continue with other chunks - we'll note the gap in fusion
665
+ chunk_summaries.append(
666
+ f"[Chunk {chunk.chunk_index + 1} summary unavailable]"
667
+ )
668
+
669
+ # Combine summaries into master summary
670
+ if len(chunk_summaries) == 1:
671
+ final_summary = chunk_summaries[0]
672
+ else:
673
+ final_summary = await _combine_chunk_summaries(chunk_summaries, deps)
674
+
675
+ # Build final compacted history
676
+ compacted = _build_chunked_compaction_result(
677
+ final_summary, messages, retained_messages, deps
678
+ )
679
+
680
+ # Track chunked compaction event
681
+ track_event(
682
+ "chunked_compaction_triggered",
683
+ {
684
+ "num_chunks": total_chunks,
685
+ "chunks_succeeded": sum(
686
+ 1 for s in chunk_summaries if not s.startswith("[Chunk")
687
+ ),
688
+ "retention_window_size": len(retained_messages),
689
+ "model_name": deps.llm_model.name.value,
690
+ "provider": deps.llm_model.provider.value,
691
+ },
692
+ )
693
+
694
+ return compacted
695
+
696
+
697
+ async def _summarize_chunk(
698
+ chunk: "chunking.Chunk",
699
+ total_chunks: int,
700
+ deps: AgentDeps,
701
+ ) -> str:
702
+ """Summarize a single chunk of messages."""
703
+ chunk_messages = chunk.get_all_messages()
704
+ context = extract_context_from_messages(chunk_messages)
705
+
706
+ # Use chunk summarization template
707
+ chunk_prompt = prompt_loader.render(
708
+ "history/chunk_summarization.j2",
709
+ chunk_index=chunk.chunk_index + 1,
710
+ total_chunks=total_chunks,
711
+ chunk_content=context,
712
+ )
713
+
714
+ request_messages: list[ModelMessage] = [
715
+ ModelRequest.user_text_prompt(context, instructions=chunk_prompt)
716
+ ]
717
+
718
+ max_tokens = await calculate_max_summarization_tokens(
719
+ deps.llm_model, request_messages
720
+ )
721
+
722
+ log_summarization_request(
723
+ deps.llm_model,
724
+ max_tokens,
725
+ chunk_prompt,
726
+ context[:500] + "..." if len(context) > 500 else context,
727
+ f"CHUNK_{chunk.chunk_index + 1}",
728
+ )
729
+
730
+ response = await shotgun_model_request(
731
+ model_config=deps.llm_model,
732
+ messages=request_messages,
733
+ model_settings=ModelSettings(max_tokens=max_tokens),
734
+ )
735
+
736
+ log_summarization_response(response, f"CHUNK_{chunk.chunk_index + 1}")
737
+
738
+ if response.parts and isinstance(response.parts[0], TextPart):
739
+ return response.parts[0].content
740
+ return ""
741
+
742
+
743
+ async def _combine_chunk_summaries(
744
+ summaries: list[str],
745
+ deps: AgentDeps,
746
+ ) -> str:
747
+ """Combine multiple chunk summaries into a unified summary."""
748
+ # Check if combined summaries exceed limit (may need recursive combination)
749
+ combined_text = "\n\n".join(summaries)
750
+ combined_request: list[ModelMessage] = [
751
+ ModelRequest.user_text_prompt(combined_text)
752
+ ]
753
+ combined_tokens = await estimate_tokens_from_messages(
754
+ combined_request, deps.llm_model
755
+ )
756
+
757
+ max_safe_input = int(deps.llm_model.max_input_tokens * CHUNK_SAFE_RATIO)
758
+
759
+ if combined_tokens > max_safe_input:
760
+ # Recursive: split summaries in half and combine each half first
761
+ logger.warning(
762
+ f"Combined summaries too large ({combined_tokens:,} tokens), "
763
+ f"applying recursive combination"
764
+ )
765
+ mid = len(summaries) // 2
766
+ first_half = await _combine_chunk_summaries(summaries[:mid], deps)
767
+ second_half = await _combine_chunk_summaries(summaries[mid:], deps)
768
+ summaries = [first_half, second_half]
769
+
770
+ # Use combination template
771
+ combine_prompt = prompt_loader.render(
772
+ "history/combine_summaries.j2",
773
+ num_summaries=len(summaries),
774
+ chunk_summaries=summaries,
775
+ )
776
+
777
+ request_messages: list[ModelMessage] = [
778
+ ModelRequest.user_text_prompt(
779
+ "\n\n---\n\n".join(summaries), instructions=combine_prompt
780
+ )
781
+ ]
782
+
783
+ max_tokens = await calculate_max_summarization_tokens(
784
+ deps.llm_model, request_messages
785
+ )
786
+
787
+ log_summarization_request(
788
+ deps.llm_model,
789
+ max_tokens,
790
+ combine_prompt,
791
+ f"[{len(summaries)} summaries to combine]",
792
+ "COMBINE",
793
+ )
794
+
795
+ response = await shotgun_model_request(
796
+ model_config=deps.llm_model,
797
+ messages=request_messages,
798
+ model_settings=ModelSettings(max_tokens=max_tokens),
799
+ )
800
+
801
+ log_summarization_response(response, "COMBINE")
802
+
803
+ if response.parts and isinstance(response.parts[0], TextPart):
804
+ return response.parts[0].content
805
+ return ""
806
+
807
+
808
+ def _build_chunked_compaction_result(
809
+ final_summary: str,
810
+ original_messages: list[ModelMessage],
811
+ retained_messages: list[ModelMessage],
812
+ deps: AgentDeps,
813
+ ) -> list[ModelMessage]:
814
+ """Build the final compacted history from chunked compaction."""
815
+ from pydantic_ai.messages import ModelRequestPart
816
+
817
+ # Extract system context from original messages
818
+ agent_prompt = get_agent_system_prompt(original_messages) or ""
819
+ system_status = get_latest_system_status(original_messages) or ""
820
+ first_user = get_first_user_request(original_messages) or ""
821
+
822
+ # Create marked summary
823
+ summary_part = TextPart(content=f"{SUMMARY_MARKER} {final_summary}")
824
+ summary_message = ModelResponse(parts=[summary_part])
825
+
826
+ # Build compacted structure
827
+ compacted: list[ModelMessage] = []
828
+
829
+ # Initial request with system context
830
+ parts: list[ModelRequestPart] = []
831
+ if agent_prompt:
832
+ parts.append(AgentSystemPrompt(content=agent_prompt))
833
+ if system_status:
834
+ parts.append(SystemStatusPrompt(content=system_status))
835
+ if first_user:
836
+ parts.append(UserPromptPart(content=first_user))
837
+
838
+ if parts:
839
+ compacted.append(ModelRequest(parts=parts))
840
+
841
+ # Add summary
842
+ compacted.append(summary_message)
843
+
844
+ # Add retained messages (recent context)
845
+ compacted.extend(retained_messages)
846
+
847
+ # Ensure ends with ModelRequest for PydanticAI compatibility
848
+ compacted = ensure_ends_with_model_request(compacted, original_messages)
849
+
850
+ # Filter orphaned tool responses
851
+ compacted = filter_orphaned_tool_responses(compacted)
852
+
853
+ logger.info(
854
+ f"Chunked compaction complete: {len(original_messages)} messages -> "
855
+ f"{len(compacted)} messages (retained {len(retained_messages)} recent)"
856
+ )
857
+
858
+ return compacted
@@ -1,6 +1,7 @@
1
1
  """Anthropic token counting using official client."""
2
2
 
3
3
  import logfire
4
+ from anthropic import APIStatusError
4
5
  from pydantic_ai.messages import ModelMessage
5
6
 
6
7
  from shotgun.agents.config.models import KeyProvider
@@ -72,11 +73,23 @@ class AnthropicTokenCounter(TokenCounter):
72
73
  Raises:
73
74
  RuntimeError: If API call fails
74
75
  """
76
+ # Handle empty text to avoid unnecessary API calls
77
+ # Anthropic API requires non-empty content, so we need a strict check
78
+ if not text or not text.strip():
79
+ return 0
80
+
81
+ # Additional validation: ensure the text has actual content
82
+ # Some edge cases might have only whitespace or control characters
83
+ cleaned_text = text.strip()
84
+ if not cleaned_text:
85
+ return 0
86
+
75
87
  try:
76
88
  # Anthropic API expects messages format and model parameter
77
89
  # Use await with async client
78
90
  result = await self.client.messages.count_tokens(
79
- messages=[{"role": "user", "content": text}], model=self.model_name
91
+ messages=[{"role": "user", "content": cleaned_text}],
92
+ model=self.model_name,
80
93
  )
81
94
  return result.input_tokens
82
95
  except Exception as e:
@@ -91,6 +104,13 @@ class AnthropicTokenCounter(TokenCounter):
91
104
  exception_type=type(e).__name__,
92
105
  exception_message=str(e),
93
106
  )
107
+
108
+ # Re-raise API errors directly so they can be classified by the runner
109
+ # This allows proper error classification for BYOK users (authentication, rate limits, etc.)
110
+ if isinstance(e, APIStatusError):
111
+ raise
112
+
113
+ # Only wrap library-level errors in RuntimeError
94
114
  raise RuntimeError(
95
115
  f"Anthropic token counting API failed for {self.model_name}: {type(e).__name__}: {str(e)}"
96
116
  ) from e
@@ -107,5 +127,9 @@ class AnthropicTokenCounter(TokenCounter):
107
127
  Raises:
108
128
  RuntimeError: If token counting fails
109
129
  """
130
+ # Handle empty message list early
131
+ if not messages:
132
+ return 0
133
+
110
134
  total_text = extract_text_from_messages(messages)
111
135
  return await self.count_tokens(total_text)
@@ -56,12 +56,23 @@ def extract_text_from_messages(messages: list[ModelMessage]) -> str:
56
56
  if hasattr(message, "parts"):
57
57
  for part in message.parts:
58
58
  if hasattr(part, "content") and isinstance(part.content, str):
59
- text_parts.append(part.content)
59
+ # Only add non-empty content
60
+ if part.content.strip():
61
+ text_parts.append(part.content)
60
62
  else:
61
63
  # Handle non-text parts (tool calls, etc.)
62
- text_parts.append(str(part))
64
+ part_str = str(part)
65
+ if part_str.strip():
66
+ text_parts.append(part_str)
63
67
  else:
64
68
  # Handle messages without parts
65
- text_parts.append(str(message))
69
+ msg_str = str(message)
70
+ if msg_str.strip():
71
+ text_parts.append(msg_str)
72
+
73
+ # If no valid text parts found, return a minimal placeholder
74
+ # This ensures we never send completely empty content to APIs
75
+ if not text_parts:
76
+ return "."
66
77
 
67
78
  return "\n".join(text_parts)
@@ -57,9 +57,15 @@ class OpenAITokenCounter(TokenCounter):
57
57
  Raises:
58
58
  RuntimeError: If token counting fails
59
59
  """
60
+ # Handle empty text to avoid unnecessary encoding
61
+ if not text or not text.strip():
62
+ return 0
63
+
60
64
  try:
61
65
  return len(self.encoding.encode(text))
62
- except Exception as e:
66
+ except BaseException as e:
67
+ # Must catch BaseException to handle PanicException from tiktoken's Rust layer
68
+ # which can occur with extremely long texts. Regular Exception won't catch it.
63
69
  raise RuntimeError(
64
70
  f"Failed to count tokens for OpenAI model {self.model_name}"
65
71
  ) from e
@@ -76,5 +82,9 @@ class OpenAITokenCounter(TokenCounter):
76
82
  Raises:
77
83
  RuntimeError: If token counting fails
78
84
  """
85
+ # Handle empty message list early
86
+ if not messages:
87
+ return 0
88
+
79
89
  total_text = extract_text_from_messages(messages)
80
90
  return await self.count_tokens(total_text)
@@ -88,6 +88,10 @@ class SentencePieceTokenCounter(TokenCounter):
88
88
  Raises:
89
89
  RuntimeError: If token counting fails
90
90
  """
91
+ # Handle empty text to avoid unnecessary tokenization
92
+ if not text or not text.strip():
93
+ return 0
94
+
91
95
  await self._ensure_tokenizer()
92
96
 
93
97
  if self.sp is None:
@@ -115,5 +119,9 @@ class SentencePieceTokenCounter(TokenCounter):
115
119
  Raises:
116
120
  RuntimeError: If token counting fails
117
121
  """
122
+ # Handle empty message list early
123
+ if not messages:
124
+ return 0
125
+
118
126
  total_text = extract_text_from_messages(messages)
119
127
  return await self.count_tokens(total_text)
@@ -3,6 +3,7 @@
3
3
  import hashlib
4
4
  from pathlib import Path
5
5
 
6
+ import aiofiles
6
7
  import httpx
7
8
 
8
9
  from shotgun.logging_config import get_logger
@@ -78,7 +79,8 @@ async def download_gemini_tokenizer() -> Path:
78
79
 
79
80
  # Atomic write: write to temp file first, then rename
80
81
  temp_path = cache_path.with_suffix(".tmp")
81
- temp_path.write_bytes(content)
82
+ async with aiofiles.open(temp_path, "wb") as f:
83
+ await f.write(content)
82
84
  temp_path.rename(cache_path)
83
85
 
84
86
  logger.info(f"Gemini tokenizer downloaded and cached at {cache_path}")
@@ -44,9 +44,6 @@ def get_token_counter(model_config: ModelConfig) -> TokenCounter:
44
44
 
45
45
  # Return cached instance if available
46
46
  if cache_key in _token_counter_cache:
47
- logger.debug(
48
- f"Reusing cached token counter for {model_config.provider.value}:{model_config.name}"
49
- )
50
47
  return _token_counter_cache[cache_key]
51
48
 
52
49
  # Create new instance and cache it