shotgun-sh 0.2.8.dev2__py3-none-any.whl → 0.3.3.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shotgun/agents/agent_manager.py +382 -60
- shotgun/agents/common.py +15 -9
- shotgun/agents/config/README.md +89 -0
- shotgun/agents/config/__init__.py +10 -1
- shotgun/agents/config/constants.py +0 -6
- shotgun/agents/config/manager.py +383 -82
- shotgun/agents/config/models.py +122 -18
- shotgun/agents/config/provider.py +81 -15
- shotgun/agents/config/streaming_test.py +119 -0
- shotgun/agents/context_analyzer/__init__.py +28 -0
- shotgun/agents/context_analyzer/analyzer.py +475 -0
- shotgun/agents/context_analyzer/constants.py +9 -0
- shotgun/agents/context_analyzer/formatter.py +115 -0
- shotgun/agents/context_analyzer/models.py +212 -0
- shotgun/agents/conversation/__init__.py +18 -0
- shotgun/agents/conversation/filters.py +164 -0
- shotgun/agents/conversation/history/chunking.py +278 -0
- shotgun/agents/{history → conversation/history}/compaction.py +36 -5
- shotgun/agents/{history → conversation/history}/constants.py +5 -0
- shotgun/agents/conversation/history/file_content_deduplication.py +216 -0
- shotgun/agents/{history → conversation/history}/history_processors.py +380 -8
- shotgun/agents/{history → conversation/history}/token_counting/anthropic.py +25 -1
- shotgun/agents/{history → conversation/history}/token_counting/base.py +14 -3
- shotgun/agents/{history → conversation/history}/token_counting/openai.py +11 -1
- shotgun/agents/{history → conversation/history}/token_counting/sentencepiece_counter.py +8 -0
- shotgun/agents/{history → conversation/history}/token_counting/tokenizer_cache.py +3 -1
- shotgun/agents/{history → conversation/history}/token_counting/utils.py +0 -3
- shotgun/agents/{conversation_manager.py → conversation/manager.py} +36 -20
- shotgun/agents/{conversation_history.py → conversation/models.py} +8 -92
- shotgun/agents/error/__init__.py +11 -0
- shotgun/agents/error/models.py +19 -0
- shotgun/agents/export.py +2 -2
- shotgun/agents/plan.py +2 -2
- shotgun/agents/research.py +3 -3
- shotgun/agents/runner.py +230 -0
- shotgun/agents/specify.py +2 -2
- shotgun/agents/tasks.py +2 -2
- shotgun/agents/tools/codebase/codebase_shell.py +6 -0
- shotgun/agents/tools/codebase/directory_lister.py +6 -0
- shotgun/agents/tools/codebase/file_read.py +11 -2
- shotgun/agents/tools/codebase/query_graph.py +6 -0
- shotgun/agents/tools/codebase/retrieve_code.py +6 -0
- shotgun/agents/tools/file_management.py +27 -7
- shotgun/agents/tools/registry.py +217 -0
- shotgun/agents/tools/web_search/__init__.py +8 -8
- shotgun/agents/tools/web_search/anthropic.py +8 -2
- shotgun/agents/tools/web_search/gemini.py +7 -1
- shotgun/agents/tools/web_search/openai.py +8 -2
- shotgun/agents/tools/web_search/utils.py +2 -2
- shotgun/agents/usage_manager.py +16 -11
- shotgun/api_endpoints.py +7 -3
- shotgun/build_constants.py +2 -2
- shotgun/cli/clear.py +53 -0
- shotgun/cli/compact.py +188 -0
- shotgun/cli/config.py +8 -5
- shotgun/cli/context.py +154 -0
- shotgun/cli/error_handler.py +24 -0
- shotgun/cli/export.py +34 -34
- shotgun/cli/feedback.py +4 -2
- shotgun/cli/models.py +1 -0
- shotgun/cli/plan.py +34 -34
- shotgun/cli/research.py +18 -10
- shotgun/cli/spec/__init__.py +5 -0
- shotgun/cli/spec/backup.py +81 -0
- shotgun/cli/spec/commands.py +132 -0
- shotgun/cli/spec/models.py +48 -0
- shotgun/cli/spec/pull_service.py +219 -0
- shotgun/cli/specify.py +20 -19
- shotgun/cli/tasks.py +34 -34
- shotgun/cli/update.py +16 -2
- shotgun/codebase/core/change_detector.py +5 -3
- shotgun/codebase/core/code_retrieval.py +4 -2
- shotgun/codebase/core/ingestor.py +163 -15
- shotgun/codebase/core/manager.py +13 -4
- shotgun/codebase/core/nl_query.py +1 -1
- shotgun/codebase/models.py +2 -0
- shotgun/exceptions.py +357 -0
- shotgun/llm_proxy/__init__.py +17 -0
- shotgun/llm_proxy/client.py +215 -0
- shotgun/llm_proxy/models.py +137 -0
- shotgun/logging_config.py +60 -27
- shotgun/main.py +77 -11
- shotgun/posthog_telemetry.py +38 -29
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +28 -2
- shotgun/prompts/agents/partials/interactive_mode.j2 +3 -3
- shotgun/prompts/agents/plan.j2 +16 -0
- shotgun/prompts/agents/research.j2 +16 -3
- shotgun/prompts/agents/specify.j2 +54 -1
- shotgun/prompts/agents/state/system_state.j2 +0 -2
- shotgun/prompts/agents/tasks.j2 +16 -0
- shotgun/prompts/history/chunk_summarization.j2 +34 -0
- shotgun/prompts/history/combine_summaries.j2 +53 -0
- shotgun/sdk/codebase.py +14 -3
- shotgun/sentry_telemetry.py +163 -16
- shotgun/settings.py +243 -0
- shotgun/shotgun_web/__init__.py +67 -1
- shotgun/shotgun_web/client.py +42 -1
- shotgun/shotgun_web/constants.py +46 -0
- shotgun/shotgun_web/exceptions.py +29 -0
- shotgun/shotgun_web/models.py +390 -0
- shotgun/shotgun_web/shared_specs/__init__.py +32 -0
- shotgun/shotgun_web/shared_specs/file_scanner.py +175 -0
- shotgun/shotgun_web/shared_specs/hasher.py +83 -0
- shotgun/shotgun_web/shared_specs/models.py +71 -0
- shotgun/shotgun_web/shared_specs/upload_pipeline.py +329 -0
- shotgun/shotgun_web/shared_specs/utils.py +34 -0
- shotgun/shotgun_web/specs_client.py +703 -0
- shotgun/shotgun_web/supabase_client.py +31 -0
- shotgun/telemetry.py +10 -33
- shotgun/tui/app.py +310 -46
- shotgun/tui/commands/__init__.py +1 -1
- shotgun/tui/components/context_indicator.py +179 -0
- shotgun/tui/components/mode_indicator.py +70 -0
- shotgun/tui/components/status_bar.py +48 -0
- shotgun/tui/containers.py +91 -0
- shotgun/tui/dependencies.py +39 -0
- shotgun/tui/layout.py +5 -0
- shotgun/tui/protocols.py +45 -0
- shotgun/tui/screens/chat/__init__.py +5 -0
- shotgun/tui/screens/chat/chat.tcss +54 -0
- shotgun/tui/screens/chat/chat_screen.py +1531 -0
- shotgun/tui/screens/chat/codebase_index_prompt_screen.py +243 -0
- shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
- shotgun/tui/screens/chat/help_text.py +40 -0
- shotgun/tui/screens/chat/prompt_history.py +48 -0
- shotgun/tui/screens/chat.tcss +11 -0
- shotgun/tui/screens/chat_screen/command_providers.py +91 -4
- shotgun/tui/screens/chat_screen/hint_message.py +76 -1
- shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
- shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
- shotgun/tui/screens/chat_screen/history/chat_history.py +115 -0
- shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
- shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
- shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
- shotgun/tui/screens/confirmation_dialog.py +191 -0
- shotgun/tui/screens/directory_setup.py +45 -41
- shotgun/tui/screens/feedback.py +14 -7
- shotgun/tui/screens/github_issue.py +111 -0
- shotgun/tui/screens/model_picker.py +77 -32
- shotgun/tui/screens/onboarding.py +580 -0
- shotgun/tui/screens/pipx_migration.py +205 -0
- shotgun/tui/screens/provider_config.py +116 -35
- shotgun/tui/screens/shared_specs/__init__.py +21 -0
- shotgun/tui/screens/shared_specs/create_spec_dialog.py +273 -0
- shotgun/tui/screens/shared_specs/models.py +56 -0
- shotgun/tui/screens/shared_specs/share_specs_dialog.py +390 -0
- shotgun/tui/screens/shared_specs/upload_progress_screen.py +452 -0
- shotgun/tui/screens/shotgun_auth.py +112 -18
- shotgun/tui/screens/spec_pull.py +288 -0
- shotgun/tui/screens/welcome.py +137 -11
- shotgun/tui/services/__init__.py +5 -0
- shotgun/tui/services/conversation_service.py +187 -0
- shotgun/tui/state/__init__.py +7 -0
- shotgun/tui/state/processing_state.py +185 -0
- shotgun/tui/utils/mode_progress.py +14 -7
- shotgun/tui/widgets/__init__.py +5 -0
- shotgun/tui/widgets/widget_coordinator.py +263 -0
- shotgun/utils/file_system_utils.py +22 -2
- shotgun/utils/marketing.py +110 -0
- shotgun/utils/update_checker.py +69 -14
- shotgun_sh-0.3.3.dev1.dist-info/METADATA +472 -0
- shotgun_sh-0.3.3.dev1.dist-info/RECORD +229 -0
- {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/WHEEL +1 -1
- {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/entry_points.txt +1 -0
- {shotgun_sh-0.2.8.dev2.dist-info → shotgun_sh-0.3.3.dev1.dist-info}/licenses/LICENSE +1 -1
- shotgun/tui/screens/chat.py +0 -996
- shotgun/tui/screens/chat_screen/history.py +0 -335
- shotgun_sh-0.2.8.dev2.dist-info/METADATA +0 -126
- shotgun_sh-0.2.8.dev2.dist-info/RECORD +0 -155
- /shotgun/agents/{history → conversation/history}/__init__.py +0 -0
- /shotgun/agents/{history → conversation/history}/context_extraction.py +0 -0
- /shotgun/agents/{history → conversation/history}/history_building.py +0 -0
- /shotgun/agents/{history → conversation/history}/message_utils.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/__init__.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_estimation.py +0 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""History processors for managing conversation history in Shotgun agents."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Awaitable, Callable
|
|
3
4
|
from typing import TYPE_CHECKING, Any, Protocol
|
|
4
5
|
|
|
6
|
+
from anthropic import APIStatusError
|
|
5
7
|
from pydantic_ai import ModelSettings
|
|
6
8
|
from pydantic_ai.messages import (
|
|
7
9
|
ModelMessage,
|
|
@@ -11,14 +13,16 @@ from pydantic_ai.messages import (
|
|
|
11
13
|
UserPromptPart,
|
|
12
14
|
)
|
|
13
15
|
|
|
16
|
+
from shotgun.agents.conversation.filters import filter_orphaned_tool_responses
|
|
14
17
|
from shotgun.agents.llm import shotgun_model_request
|
|
15
18
|
from shotgun.agents.messages import AgentSystemPrompt, SystemStatusPrompt
|
|
16
19
|
from shotgun.agents.models import AgentDeps
|
|
20
|
+
from shotgun.exceptions import ContextSizeLimitExceeded
|
|
17
21
|
from shotgun.logging_config import get_logger
|
|
18
22
|
from shotgun.posthog_telemetry import track_event
|
|
19
23
|
from shotgun.prompts import PromptLoader
|
|
20
24
|
|
|
21
|
-
from .constants import SUMMARY_MARKER, TOKEN_LIMIT_RATIO
|
|
25
|
+
from .constants import CHUNK_SAFE_RATIO, SUMMARY_MARKER, TOKEN_LIMIT_RATIO
|
|
22
26
|
from .context_extraction import extract_context_from_messages
|
|
23
27
|
from .history_building import ensure_ends_with_model_request
|
|
24
28
|
from .message_utils import (
|
|
@@ -35,7 +39,7 @@ from .token_estimation import (
|
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
if TYPE_CHECKING:
|
|
38
|
-
|
|
42
|
+
from . import chunking
|
|
39
43
|
|
|
40
44
|
|
|
41
45
|
class ContextProtocol(Protocol):
|
|
@@ -51,6 +55,86 @@ logger = get_logger(__name__)
|
|
|
51
55
|
prompt_loader = PromptLoader()
|
|
52
56
|
|
|
53
57
|
|
|
58
|
+
async def _safe_token_estimation(
|
|
59
|
+
estimation_func: Callable[..., Awaitable[int]],
|
|
60
|
+
model_name: str,
|
|
61
|
+
max_tokens: int,
|
|
62
|
+
*args: Any,
|
|
63
|
+
**kwargs: Any,
|
|
64
|
+
) -> int:
|
|
65
|
+
"""Safely estimate tokens with proper error handling.
|
|
66
|
+
|
|
67
|
+
Wraps token estimation functions to handle failures gracefully.
|
|
68
|
+
Only RuntimeError (from token counters) is wrapped in ContextSizeLimitExceeded.
|
|
69
|
+
Other errors (network, auth) are allowed to bubble up.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
estimation_func: Async function that estimates tokens
|
|
73
|
+
model_name: Name of the model for error messages
|
|
74
|
+
max_tokens: Maximum tokens for the model
|
|
75
|
+
*args: Arguments to pass to estimation_func
|
|
76
|
+
**kwargs: Keyword arguments to pass to estimation_func
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Token count from estimation_func
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ContextSizeLimitExceeded: If token counting fails with RuntimeError
|
|
83
|
+
Exception: Any other exceptions from estimation_func
|
|
84
|
+
"""
|
|
85
|
+
try:
|
|
86
|
+
return await estimation_func(*args, **kwargs)
|
|
87
|
+
except Exception as e:
|
|
88
|
+
# Log the error with full context
|
|
89
|
+
logger.warning(
|
|
90
|
+
f"Token counting failed for {model_name}",
|
|
91
|
+
extra={
|
|
92
|
+
"error_type": type(e).__name__,
|
|
93
|
+
"error_message": str(e),
|
|
94
|
+
"model": model_name,
|
|
95
|
+
},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Token counting behavior with oversized context (verified via testing):
|
|
99
|
+
#
|
|
100
|
+
# 1. OpenAI/tiktoken:
|
|
101
|
+
# - Successfully counts any size (tested with 752K tokens, no error)
|
|
102
|
+
# - Library errors: ValueError, KeyError, AttributeError, SSLError (file/cache issues)
|
|
103
|
+
# - Wrapped as: RuntimeError by our counter
|
|
104
|
+
#
|
|
105
|
+
# 2. Gemini/SentencePiece:
|
|
106
|
+
# - Successfully counts any size (tested with 752K tokens, no error)
|
|
107
|
+
# - Library errors: RuntimeError, IOError, TypeError (file/model loading issues)
|
|
108
|
+
# - Wrapped as: RuntimeError by our counter
|
|
109
|
+
#
|
|
110
|
+
# 3. Anthropic API:
|
|
111
|
+
# - Successfully counts large token counts (tested with 752K tokens, no error)
|
|
112
|
+
# - Only enforces 32 MB request size limit (not token count)
|
|
113
|
+
# - Raises: APIStatusError(413) with error type 'request_too_large' for 32MB+ requests
|
|
114
|
+
# - Other API errors: APIConnectionError, RateLimitError, APIStatusError (4xx/5xx)
|
|
115
|
+
# - Wrapped as: RuntimeError by our counter
|
|
116
|
+
#
|
|
117
|
+
# IMPORTANT: No provider raises errors for "too many tokens" during counting.
|
|
118
|
+
# Token count validation happens separately by comparing count to max_input_tokens.
|
|
119
|
+
#
|
|
120
|
+
# We wrap RuntimeError (library-level failures from tiktoken/sentencepiece).
|
|
121
|
+
# We also wrap Anthropic's 413 error (request exceeds 32 MB) as it indicates
|
|
122
|
+
# context is effectively too large and needs user action to reduce it.
|
|
123
|
+
if isinstance(e, RuntimeError):
|
|
124
|
+
raise ContextSizeLimitExceeded(
|
|
125
|
+
model_name=model_name, max_tokens=max_tokens
|
|
126
|
+
) from e
|
|
127
|
+
|
|
128
|
+
# Check for Anthropic's 32 MB request size limit (APIStatusError with status 413)
|
|
129
|
+
if isinstance(e, APIStatusError) and e.status_code == 413:
|
|
130
|
+
raise ContextSizeLimitExceeded(
|
|
131
|
+
model_name=model_name, max_tokens=max_tokens
|
|
132
|
+
) from e
|
|
133
|
+
|
|
134
|
+
# Re-raise other exceptions (network errors, auth failures, etc.)
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
|
|
54
138
|
def is_summary_part(part: Any) -> bool:
|
|
55
139
|
"""Check if a message part is a compacted summary."""
|
|
56
140
|
return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
|
|
@@ -127,6 +211,7 @@ calculate_max_summarization_tokens = _calculate_max_summarization_tokens
|
|
|
127
211
|
async def token_limit_compactor(
|
|
128
212
|
ctx: ContextProtocol,
|
|
129
213
|
messages: list[ModelMessage],
|
|
214
|
+
force: bool = False,
|
|
130
215
|
) -> list[ModelMessage]:
|
|
131
216
|
"""Compact message history based on token limits with incremental processing.
|
|
132
217
|
|
|
@@ -139,6 +224,7 @@ async def token_limit_compactor(
|
|
|
139
224
|
Args:
|
|
140
225
|
ctx: Run context with usage information and dependencies
|
|
141
226
|
messages: Current conversation history
|
|
227
|
+
force: If True, force compaction even if below token threshold
|
|
142
228
|
|
|
143
229
|
Returns:
|
|
144
230
|
Compacted list of messages within token limits
|
|
@@ -155,9 +241,15 @@ async def token_limit_compactor(
|
|
|
155
241
|
|
|
156
242
|
if last_summary_index is not None:
|
|
157
243
|
# Check if post-summary conversation exceeds threshold for incremental compaction
|
|
158
|
-
post_summary_tokens = await
|
|
159
|
-
|
|
244
|
+
post_summary_tokens = await _safe_token_estimation(
|
|
245
|
+
estimate_post_summary_tokens,
|
|
246
|
+
deps.llm_model.name,
|
|
247
|
+
model_max_tokens,
|
|
248
|
+
messages,
|
|
249
|
+
last_summary_index,
|
|
250
|
+
deps.llm_model,
|
|
160
251
|
)
|
|
252
|
+
|
|
161
253
|
post_summary_percentage = (
|
|
162
254
|
(post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
163
255
|
)
|
|
@@ -169,7 +261,7 @@ async def token_limit_compactor(
|
|
|
169
261
|
)
|
|
170
262
|
|
|
171
263
|
# Only do incremental compaction if post-summary conversation exceeds threshold
|
|
172
|
-
if post_summary_tokens < max_tokens:
|
|
264
|
+
if post_summary_tokens < max_tokens and not force:
|
|
173
265
|
logger.debug(
|
|
174
266
|
f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
|
|
175
267
|
f"keeping all {len(messages)} messages"
|
|
@@ -325,6 +417,9 @@ async def token_limit_compactor(
|
|
|
325
417
|
compacted_messages, messages
|
|
326
418
|
)
|
|
327
419
|
|
|
420
|
+
# Filter out orphaned tool responses (tool responses without tool calls)
|
|
421
|
+
compacted_messages = filter_orphaned_tool_responses(compacted_messages)
|
|
422
|
+
|
|
328
423
|
logger.debug(
|
|
329
424
|
f"Incremental compaction complete: {len(messages)} -> {len(compacted_messages)} messages"
|
|
330
425
|
)
|
|
@@ -340,6 +435,7 @@ async def token_limit_compactor(
|
|
|
340
435
|
else 0
|
|
341
436
|
)
|
|
342
437
|
|
|
438
|
+
# Track incremental compaction with simple metrics (fast, no token counting)
|
|
343
439
|
track_event(
|
|
344
440
|
"context_compaction_triggered",
|
|
345
441
|
{
|
|
@@ -352,6 +448,10 @@ async def token_limit_compactor(
|
|
|
352
448
|
"agent_mode": deps.agent_mode.value
|
|
353
449
|
if hasattr(deps, "agent_mode") and deps.agent_mode
|
|
354
450
|
else "unknown",
|
|
451
|
+
# Model and provider info (no computation needed)
|
|
452
|
+
"model_name": deps.llm_model.name.value,
|
|
453
|
+
"provider": deps.llm_model.provider.value,
|
|
454
|
+
"key_provider": deps.llm_model.key_provider.value,
|
|
355
455
|
},
|
|
356
456
|
)
|
|
357
457
|
|
|
@@ -359,7 +459,14 @@ async def token_limit_compactor(
|
|
|
359
459
|
|
|
360
460
|
else:
|
|
361
461
|
# Check if total conversation exceeds threshold for full compaction
|
|
362
|
-
total_tokens = await
|
|
462
|
+
total_tokens = await _safe_token_estimation(
|
|
463
|
+
estimate_tokens_from_messages,
|
|
464
|
+
deps.llm_model.name,
|
|
465
|
+
model_max_tokens,
|
|
466
|
+
messages,
|
|
467
|
+
deps.llm_model,
|
|
468
|
+
)
|
|
469
|
+
|
|
363
470
|
total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
364
471
|
|
|
365
472
|
logger.debug(
|
|
@@ -368,7 +475,7 @@ async def token_limit_compactor(
|
|
|
368
475
|
)
|
|
369
476
|
|
|
370
477
|
# Only do full compaction if total conversation exceeds threshold
|
|
371
|
-
if total_tokens < max_tokens:
|
|
478
|
+
if total_tokens < max_tokens and not force:
|
|
372
479
|
logger.debug(
|
|
373
480
|
f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
|
|
374
481
|
f"keeping all {len(messages)} messages"
|
|
@@ -386,10 +493,32 @@ async def _full_compaction(
|
|
|
386
493
|
deps: AgentDeps,
|
|
387
494
|
messages: list[ModelMessage],
|
|
388
495
|
) -> list[ModelMessage]:
|
|
389
|
-
"""Perform full compaction for first-time summarization.
|
|
496
|
+
"""Perform full compaction for first-time summarization.
|
|
497
|
+
|
|
498
|
+
If the conversation is too large for single-pass compaction, delegates
|
|
499
|
+
to chunked compaction which breaks the conversation into logical chunks.
|
|
500
|
+
"""
|
|
390
501
|
# Extract context from all messages
|
|
391
502
|
context = extract_context_from_messages(messages)
|
|
392
503
|
|
|
504
|
+
# Check if context would exceed model limit for compaction request
|
|
505
|
+
# We use CHUNK_SAFE_RATIO (70%) to leave room for prompt overhead
|
|
506
|
+
max_safe_input = int(deps.llm_model.max_input_tokens * CHUNK_SAFE_RATIO)
|
|
507
|
+
|
|
508
|
+
# Estimate context tokens
|
|
509
|
+
context_request: list[ModelMessage] = [ModelRequest.user_text_prompt(context)]
|
|
510
|
+
context_tokens = await estimate_tokens_from_messages(
|
|
511
|
+
context_request, deps.llm_model
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
if context_tokens > max_safe_input:
|
|
515
|
+
# Context too large for single-pass compaction - use chunked approach
|
|
516
|
+
logger.info(
|
|
517
|
+
f"Context ({context_tokens:,} tokens) exceeds safe limit "
|
|
518
|
+
f"({max_safe_input:,} tokens), using chunked compaction"
|
|
519
|
+
)
|
|
520
|
+
return await _chunked_compaction(deps, messages)
|
|
521
|
+
|
|
393
522
|
# Use regular summarization prompt
|
|
394
523
|
summarization_prompt = prompt_loader.render("history/summarization.j2")
|
|
395
524
|
request_messages: list[ModelMessage] = [
|
|
@@ -462,12 +591,16 @@ async def _full_compaction(
|
|
|
462
591
|
# Ensure history ends with ModelRequest for PydanticAI compatibility
|
|
463
592
|
compacted_messages = ensure_ends_with_model_request(compacted_messages, messages)
|
|
464
593
|
|
|
594
|
+
# Filter out orphaned tool responses (tool responses without tool calls)
|
|
595
|
+
compacted_messages = filter_orphaned_tool_responses(compacted_messages)
|
|
596
|
+
|
|
465
597
|
# Track full compaction event
|
|
466
598
|
messages_before = len(messages)
|
|
467
599
|
messages_after = len(compacted_messages)
|
|
468
600
|
tokens_before = current_tokens # Already calculated above
|
|
469
601
|
tokens_after = summary_usage.output_tokens if summary_usage else 0
|
|
470
602
|
|
|
603
|
+
# Track full compaction with simple metrics (fast, no token counting)
|
|
471
604
|
track_event(
|
|
472
605
|
"context_compaction_triggered",
|
|
473
606
|
{
|
|
@@ -480,7 +613,246 @@ async def _full_compaction(
|
|
|
480
613
|
"agent_mode": deps.agent_mode.value
|
|
481
614
|
if hasattr(deps, "agent_mode") and deps.agent_mode
|
|
482
615
|
else "unknown",
|
|
616
|
+
# Model and provider info (no computation needed)
|
|
617
|
+
"model_name": deps.llm_model.name.value,
|
|
618
|
+
"provider": deps.llm_model.provider.value,
|
|
619
|
+
"key_provider": deps.llm_model.key_provider.value,
|
|
483
620
|
},
|
|
484
621
|
)
|
|
485
622
|
|
|
486
623
|
return compacted_messages
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
async def _chunked_compaction(
|
|
627
|
+
deps: AgentDeps,
|
|
628
|
+
messages: list[ModelMessage],
|
|
629
|
+
) -> list[ModelMessage]:
|
|
630
|
+
"""Perform chunked compaction for oversized conversations.
|
|
631
|
+
|
|
632
|
+
Breaks the conversation into logical chunks, summarizes each sequentially,
|
|
633
|
+
then combines the summaries into a master summary.
|
|
634
|
+
"""
|
|
635
|
+
from .chunking import chunk_messages_for_compaction
|
|
636
|
+
|
|
637
|
+
# Split into chunks and retention window
|
|
638
|
+
chunks, retained_messages = await chunk_messages_for_compaction(
|
|
639
|
+
messages, deps.llm_model
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
if not chunks:
|
|
643
|
+
# No chunks to summarize (conversation too small), return retained messages
|
|
644
|
+
logger.debug("No chunks to summarize, returning retained messages")
|
|
645
|
+
return retained_messages
|
|
646
|
+
|
|
647
|
+
# Track chunked compaction
|
|
648
|
+
total_chunks = len(chunks)
|
|
649
|
+
logger.info(f"Starting chunked compaction: {total_chunks} chunks to process")
|
|
650
|
+
|
|
651
|
+
# Summarize each chunk sequentially
|
|
652
|
+
chunk_summaries: list[str] = []
|
|
653
|
+
for chunk in chunks:
|
|
654
|
+
try:
|
|
655
|
+
summary = await _summarize_chunk(chunk, total_chunks, deps)
|
|
656
|
+
chunk_summaries.append(summary)
|
|
657
|
+
logger.debug(
|
|
658
|
+
f"Chunk {chunk.chunk_index + 1}/{total_chunks} summarized successfully"
|
|
659
|
+
)
|
|
660
|
+
except Exception as e:
|
|
661
|
+
logger.warning(
|
|
662
|
+
f"Failed to summarize chunk {chunk.chunk_index + 1}/{total_chunks}: {e}"
|
|
663
|
+
)
|
|
664
|
+
# Continue with other chunks - we'll note the gap in fusion
|
|
665
|
+
chunk_summaries.append(
|
|
666
|
+
f"[Chunk {chunk.chunk_index + 1} summary unavailable]"
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# Combine summaries into master summary
|
|
670
|
+
if len(chunk_summaries) == 1:
|
|
671
|
+
final_summary = chunk_summaries[0]
|
|
672
|
+
else:
|
|
673
|
+
final_summary = await _combine_chunk_summaries(chunk_summaries, deps)
|
|
674
|
+
|
|
675
|
+
# Build final compacted history
|
|
676
|
+
compacted = _build_chunked_compaction_result(
|
|
677
|
+
final_summary, messages, retained_messages, deps
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
# Track chunked compaction event
|
|
681
|
+
track_event(
|
|
682
|
+
"chunked_compaction_triggered",
|
|
683
|
+
{
|
|
684
|
+
"num_chunks": total_chunks,
|
|
685
|
+
"chunks_succeeded": sum(
|
|
686
|
+
1 for s in chunk_summaries if not s.startswith("[Chunk")
|
|
687
|
+
),
|
|
688
|
+
"retention_window_size": len(retained_messages),
|
|
689
|
+
"model_name": deps.llm_model.name.value,
|
|
690
|
+
"provider": deps.llm_model.provider.value,
|
|
691
|
+
},
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
return compacted
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
async def _summarize_chunk(
|
|
698
|
+
chunk: "chunking.Chunk",
|
|
699
|
+
total_chunks: int,
|
|
700
|
+
deps: AgentDeps,
|
|
701
|
+
) -> str:
|
|
702
|
+
"""Summarize a single chunk of messages."""
|
|
703
|
+
chunk_messages = chunk.get_all_messages()
|
|
704
|
+
context = extract_context_from_messages(chunk_messages)
|
|
705
|
+
|
|
706
|
+
# Use chunk summarization template
|
|
707
|
+
chunk_prompt = prompt_loader.render(
|
|
708
|
+
"history/chunk_summarization.j2",
|
|
709
|
+
chunk_index=chunk.chunk_index + 1,
|
|
710
|
+
total_chunks=total_chunks,
|
|
711
|
+
chunk_content=context,
|
|
712
|
+
)
|
|
713
|
+
|
|
714
|
+
request_messages: list[ModelMessage] = [
|
|
715
|
+
ModelRequest.user_text_prompt(context, instructions=chunk_prompt)
|
|
716
|
+
]
|
|
717
|
+
|
|
718
|
+
max_tokens = await calculate_max_summarization_tokens(
|
|
719
|
+
deps.llm_model, request_messages
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
log_summarization_request(
|
|
723
|
+
deps.llm_model,
|
|
724
|
+
max_tokens,
|
|
725
|
+
chunk_prompt,
|
|
726
|
+
context[:500] + "..." if len(context) > 500 else context,
|
|
727
|
+
f"CHUNK_{chunk.chunk_index + 1}",
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
response = await shotgun_model_request(
|
|
731
|
+
model_config=deps.llm_model,
|
|
732
|
+
messages=request_messages,
|
|
733
|
+
model_settings=ModelSettings(max_tokens=max_tokens),
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
log_summarization_response(response, f"CHUNK_{chunk.chunk_index + 1}")
|
|
737
|
+
|
|
738
|
+
if response.parts and isinstance(response.parts[0], TextPart):
|
|
739
|
+
return response.parts[0].content
|
|
740
|
+
return ""
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
async def _combine_chunk_summaries(
|
|
744
|
+
summaries: list[str],
|
|
745
|
+
deps: AgentDeps,
|
|
746
|
+
) -> str:
|
|
747
|
+
"""Combine multiple chunk summaries into a unified summary."""
|
|
748
|
+
# Check if combined summaries exceed limit (may need recursive combination)
|
|
749
|
+
combined_text = "\n\n".join(summaries)
|
|
750
|
+
combined_request: list[ModelMessage] = [
|
|
751
|
+
ModelRequest.user_text_prompt(combined_text)
|
|
752
|
+
]
|
|
753
|
+
combined_tokens = await estimate_tokens_from_messages(
|
|
754
|
+
combined_request, deps.llm_model
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
max_safe_input = int(deps.llm_model.max_input_tokens * CHUNK_SAFE_RATIO)
|
|
758
|
+
|
|
759
|
+
if combined_tokens > max_safe_input:
|
|
760
|
+
# Recursive: split summaries in half and combine each half first
|
|
761
|
+
logger.warning(
|
|
762
|
+
f"Combined summaries too large ({combined_tokens:,} tokens), "
|
|
763
|
+
f"applying recursive combination"
|
|
764
|
+
)
|
|
765
|
+
mid = len(summaries) // 2
|
|
766
|
+
first_half = await _combine_chunk_summaries(summaries[:mid], deps)
|
|
767
|
+
second_half = await _combine_chunk_summaries(summaries[mid:], deps)
|
|
768
|
+
summaries = [first_half, second_half]
|
|
769
|
+
|
|
770
|
+
# Use combination template
|
|
771
|
+
combine_prompt = prompt_loader.render(
|
|
772
|
+
"history/combine_summaries.j2",
|
|
773
|
+
num_summaries=len(summaries),
|
|
774
|
+
chunk_summaries=summaries,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
request_messages: list[ModelMessage] = [
|
|
778
|
+
ModelRequest.user_text_prompt(
|
|
779
|
+
"\n\n---\n\n".join(summaries), instructions=combine_prompt
|
|
780
|
+
)
|
|
781
|
+
]
|
|
782
|
+
|
|
783
|
+
max_tokens = await calculate_max_summarization_tokens(
|
|
784
|
+
deps.llm_model, request_messages
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
log_summarization_request(
|
|
788
|
+
deps.llm_model,
|
|
789
|
+
max_tokens,
|
|
790
|
+
combine_prompt,
|
|
791
|
+
f"[{len(summaries)} summaries to combine]",
|
|
792
|
+
"COMBINE",
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
response = await shotgun_model_request(
|
|
796
|
+
model_config=deps.llm_model,
|
|
797
|
+
messages=request_messages,
|
|
798
|
+
model_settings=ModelSettings(max_tokens=max_tokens),
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
log_summarization_response(response, "COMBINE")
|
|
802
|
+
|
|
803
|
+
if response.parts and isinstance(response.parts[0], TextPart):
|
|
804
|
+
return response.parts[0].content
|
|
805
|
+
return ""
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
def _build_chunked_compaction_result(
|
|
809
|
+
final_summary: str,
|
|
810
|
+
original_messages: list[ModelMessage],
|
|
811
|
+
retained_messages: list[ModelMessage],
|
|
812
|
+
deps: AgentDeps,
|
|
813
|
+
) -> list[ModelMessage]:
|
|
814
|
+
"""Build the final compacted history from chunked compaction."""
|
|
815
|
+
from pydantic_ai.messages import ModelRequestPart
|
|
816
|
+
|
|
817
|
+
# Extract system context from original messages
|
|
818
|
+
agent_prompt = get_agent_system_prompt(original_messages) or ""
|
|
819
|
+
system_status = get_latest_system_status(original_messages) or ""
|
|
820
|
+
first_user = get_first_user_request(original_messages) or ""
|
|
821
|
+
|
|
822
|
+
# Create marked summary
|
|
823
|
+
summary_part = TextPart(content=f"{SUMMARY_MARKER} {final_summary}")
|
|
824
|
+
summary_message = ModelResponse(parts=[summary_part])
|
|
825
|
+
|
|
826
|
+
# Build compacted structure
|
|
827
|
+
compacted: list[ModelMessage] = []
|
|
828
|
+
|
|
829
|
+
# Initial request with system context
|
|
830
|
+
parts: list[ModelRequestPart] = []
|
|
831
|
+
if agent_prompt:
|
|
832
|
+
parts.append(AgentSystemPrompt(content=agent_prompt))
|
|
833
|
+
if system_status:
|
|
834
|
+
parts.append(SystemStatusPrompt(content=system_status))
|
|
835
|
+
if first_user:
|
|
836
|
+
parts.append(UserPromptPart(content=first_user))
|
|
837
|
+
|
|
838
|
+
if parts:
|
|
839
|
+
compacted.append(ModelRequest(parts=parts))
|
|
840
|
+
|
|
841
|
+
# Add summary
|
|
842
|
+
compacted.append(summary_message)
|
|
843
|
+
|
|
844
|
+
# Add retained messages (recent context)
|
|
845
|
+
compacted.extend(retained_messages)
|
|
846
|
+
|
|
847
|
+
# Ensure ends with ModelRequest for PydanticAI compatibility
|
|
848
|
+
compacted = ensure_ends_with_model_request(compacted, original_messages)
|
|
849
|
+
|
|
850
|
+
# Filter orphaned tool responses
|
|
851
|
+
compacted = filter_orphaned_tool_responses(compacted)
|
|
852
|
+
|
|
853
|
+
logger.info(
|
|
854
|
+
f"Chunked compaction complete: {len(original_messages)} messages -> "
|
|
855
|
+
f"{len(compacted)} messages (retained {len(retained_messages)} recent)"
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
return compacted
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Anthropic token counting using official client."""
|
|
2
2
|
|
|
3
3
|
import logfire
|
|
4
|
+
from anthropic import APIStatusError
|
|
4
5
|
from pydantic_ai.messages import ModelMessage
|
|
5
6
|
|
|
6
7
|
from shotgun.agents.config.models import KeyProvider
|
|
@@ -72,11 +73,23 @@ class AnthropicTokenCounter(TokenCounter):
|
|
|
72
73
|
Raises:
|
|
73
74
|
RuntimeError: If API call fails
|
|
74
75
|
"""
|
|
76
|
+
# Handle empty text to avoid unnecessary API calls
|
|
77
|
+
# Anthropic API requires non-empty content, so we need a strict check
|
|
78
|
+
if not text or not text.strip():
|
|
79
|
+
return 0
|
|
80
|
+
|
|
81
|
+
# Additional validation: ensure the text has actual content
|
|
82
|
+
# Some edge cases might have only whitespace or control characters
|
|
83
|
+
cleaned_text = text.strip()
|
|
84
|
+
if not cleaned_text:
|
|
85
|
+
return 0
|
|
86
|
+
|
|
75
87
|
try:
|
|
76
88
|
# Anthropic API expects messages format and model parameter
|
|
77
89
|
# Use await with async client
|
|
78
90
|
result = await self.client.messages.count_tokens(
|
|
79
|
-
messages=[{"role": "user", "content":
|
|
91
|
+
messages=[{"role": "user", "content": cleaned_text}],
|
|
92
|
+
model=self.model_name,
|
|
80
93
|
)
|
|
81
94
|
return result.input_tokens
|
|
82
95
|
except Exception as e:
|
|
@@ -91,6 +104,13 @@ class AnthropicTokenCounter(TokenCounter):
|
|
|
91
104
|
exception_type=type(e).__name__,
|
|
92
105
|
exception_message=str(e),
|
|
93
106
|
)
|
|
107
|
+
|
|
108
|
+
# Re-raise API errors directly so they can be classified by the runner
|
|
109
|
+
# This allows proper error classification for BYOK users (authentication, rate limits, etc.)
|
|
110
|
+
if isinstance(e, APIStatusError):
|
|
111
|
+
raise
|
|
112
|
+
|
|
113
|
+
# Only wrap library-level errors in RuntimeError
|
|
94
114
|
raise RuntimeError(
|
|
95
115
|
f"Anthropic token counting API failed for {self.model_name}: {type(e).__name__}: {str(e)}"
|
|
96
116
|
) from e
|
|
@@ -107,5 +127,9 @@ class AnthropicTokenCounter(TokenCounter):
|
|
|
107
127
|
Raises:
|
|
108
128
|
RuntimeError: If token counting fails
|
|
109
129
|
"""
|
|
130
|
+
# Handle empty message list early
|
|
131
|
+
if not messages:
|
|
132
|
+
return 0
|
|
133
|
+
|
|
110
134
|
total_text = extract_text_from_messages(messages)
|
|
111
135
|
return await self.count_tokens(total_text)
|
|
@@ -56,12 +56,23 @@ def extract_text_from_messages(messages: list[ModelMessage]) -> str:
|
|
|
56
56
|
if hasattr(message, "parts"):
|
|
57
57
|
for part in message.parts:
|
|
58
58
|
if hasattr(part, "content") and isinstance(part.content, str):
|
|
59
|
-
|
|
59
|
+
# Only add non-empty content
|
|
60
|
+
if part.content.strip():
|
|
61
|
+
text_parts.append(part.content)
|
|
60
62
|
else:
|
|
61
63
|
# Handle non-text parts (tool calls, etc.)
|
|
62
|
-
|
|
64
|
+
part_str = str(part)
|
|
65
|
+
if part_str.strip():
|
|
66
|
+
text_parts.append(part_str)
|
|
63
67
|
else:
|
|
64
68
|
# Handle messages without parts
|
|
65
|
-
|
|
69
|
+
msg_str = str(message)
|
|
70
|
+
if msg_str.strip():
|
|
71
|
+
text_parts.append(msg_str)
|
|
72
|
+
|
|
73
|
+
# If no valid text parts found, return a minimal placeholder
|
|
74
|
+
# This ensures we never send completely empty content to APIs
|
|
75
|
+
if not text_parts:
|
|
76
|
+
return "."
|
|
66
77
|
|
|
67
78
|
return "\n".join(text_parts)
|
|
@@ -57,9 +57,15 @@ class OpenAITokenCounter(TokenCounter):
|
|
|
57
57
|
Raises:
|
|
58
58
|
RuntimeError: If token counting fails
|
|
59
59
|
"""
|
|
60
|
+
# Handle empty text to avoid unnecessary encoding
|
|
61
|
+
if not text or not text.strip():
|
|
62
|
+
return 0
|
|
63
|
+
|
|
60
64
|
try:
|
|
61
65
|
return len(self.encoding.encode(text))
|
|
62
|
-
except
|
|
66
|
+
except BaseException as e:
|
|
67
|
+
# Must catch BaseException to handle PanicException from tiktoken's Rust layer
|
|
68
|
+
# which can occur with extremely long texts. Regular Exception won't catch it.
|
|
63
69
|
raise RuntimeError(
|
|
64
70
|
f"Failed to count tokens for OpenAI model {self.model_name}"
|
|
65
71
|
) from e
|
|
@@ -76,5 +82,9 @@ class OpenAITokenCounter(TokenCounter):
|
|
|
76
82
|
Raises:
|
|
77
83
|
RuntimeError: If token counting fails
|
|
78
84
|
"""
|
|
85
|
+
# Handle empty message list early
|
|
86
|
+
if not messages:
|
|
87
|
+
return 0
|
|
88
|
+
|
|
79
89
|
total_text = extract_text_from_messages(messages)
|
|
80
90
|
return await self.count_tokens(total_text)
|
|
@@ -88,6 +88,10 @@ class SentencePieceTokenCounter(TokenCounter):
|
|
|
88
88
|
Raises:
|
|
89
89
|
RuntimeError: If token counting fails
|
|
90
90
|
"""
|
|
91
|
+
# Handle empty text to avoid unnecessary tokenization
|
|
92
|
+
if not text or not text.strip():
|
|
93
|
+
return 0
|
|
94
|
+
|
|
91
95
|
await self._ensure_tokenizer()
|
|
92
96
|
|
|
93
97
|
if self.sp is None:
|
|
@@ -115,5 +119,9 @@ class SentencePieceTokenCounter(TokenCounter):
|
|
|
115
119
|
Raises:
|
|
116
120
|
RuntimeError: If token counting fails
|
|
117
121
|
"""
|
|
122
|
+
# Handle empty message list early
|
|
123
|
+
if not messages:
|
|
124
|
+
return 0
|
|
125
|
+
|
|
118
126
|
total_text = extract_text_from_messages(messages)
|
|
119
127
|
return await self.count_tokens(total_text)
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import hashlib
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
+
import aiofiles
|
|
6
7
|
import httpx
|
|
7
8
|
|
|
8
9
|
from shotgun.logging_config import get_logger
|
|
@@ -78,7 +79,8 @@ async def download_gemini_tokenizer() -> Path:
|
|
|
78
79
|
|
|
79
80
|
# Atomic write: write to temp file first, then rename
|
|
80
81
|
temp_path = cache_path.with_suffix(".tmp")
|
|
81
|
-
|
|
82
|
+
async with aiofiles.open(temp_path, "wb") as f:
|
|
83
|
+
await f.write(content)
|
|
82
84
|
temp_path.rename(cache_path)
|
|
83
85
|
|
|
84
86
|
logger.info(f"Gemini tokenizer downloaded and cached at {cache_path}")
|
|
@@ -44,9 +44,6 @@ def get_token_counter(model_config: ModelConfig) -> TokenCounter:
|
|
|
44
44
|
|
|
45
45
|
# Return cached instance if available
|
|
46
46
|
if cache_key in _token_counter_cache:
|
|
47
|
-
logger.debug(
|
|
48
|
-
f"Reusing cached token counter for {model_config.provider.value}:{model_config.name}"
|
|
49
|
-
)
|
|
50
47
|
return _token_counter_cache[cache_key]
|
|
51
48
|
|
|
52
49
|
# Create new instance and cache it
|