shotgun-sh 0.1.0.dev12__py3-none-any.whl → 0.1.0.dev14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of shotgun-sh might be problematic. Click here for more details.
- shotgun/agents/agent_manager.py +16 -3
- shotgun/agents/artifact_state.py +58 -0
- shotgun/agents/common.py +137 -88
- shotgun/agents/config/constants.py +18 -0
- shotgun/agents/config/manager.py +68 -16
- shotgun/agents/config/models.py +61 -0
- shotgun/agents/config/provider.py +11 -6
- shotgun/agents/history/compaction.py +85 -0
- shotgun/agents/history/constants.py +19 -0
- shotgun/agents/history/context_extraction.py +108 -0
- shotgun/agents/history/history_building.py +104 -0
- shotgun/agents/history/history_processors.py +354 -157
- shotgun/agents/history/message_utils.py +46 -0
- shotgun/agents/history/token_counting.py +429 -0
- shotgun/agents/history/token_estimation.py +138 -0
- shotgun/agents/models.py +131 -1
- shotgun/agents/plan.py +15 -37
- shotgun/agents/research.py +10 -45
- shotgun/agents/specify.py +97 -0
- shotgun/agents/tasks.py +7 -36
- shotgun/agents/tools/artifact_management.py +482 -0
- shotgun/agents/tools/file_management.py +31 -12
- shotgun/agents/tools/web_search/anthropic.py +78 -17
- shotgun/agents/tools/web_search/gemini.py +1 -1
- shotgun/agents/tools/web_search/openai.py +16 -2
- shotgun/artifacts/__init__.py +17 -0
- shotgun/artifacts/exceptions.py +89 -0
- shotgun/artifacts/manager.py +530 -0
- shotgun/artifacts/models.py +334 -0
- shotgun/artifacts/service.py +463 -0
- shotgun/artifacts/templates/__init__.py +10 -0
- shotgun/artifacts/templates/loader.py +252 -0
- shotgun/artifacts/templates/models.py +136 -0
- shotgun/artifacts/templates/plan/delivery_and_release_plan.yaml +66 -0
- shotgun/artifacts/templates/research/market_research.yaml +585 -0
- shotgun/artifacts/templates/research/sdk_comparison.yaml +257 -0
- shotgun/artifacts/templates/specify/prd.yaml +331 -0
- shotgun/artifacts/templates/specify/product_spec.yaml +301 -0
- shotgun/artifacts/utils.py +76 -0
- shotgun/cli/plan.py +1 -4
- shotgun/cli/specify.py +69 -0
- shotgun/cli/tasks.py +0 -4
- shotgun/codebase/core/nl_query.py +4 -4
- shotgun/logging_config.py +23 -7
- shotgun/main.py +7 -6
- shotgun/prompts/agents/partials/artifact_system.j2 +35 -0
- shotgun/prompts/agents/partials/codebase_understanding.j2 +1 -2
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +28 -2
- shotgun/prompts/agents/partials/content_formatting.j2 +65 -0
- shotgun/prompts/agents/partials/interactive_mode.j2 +10 -2
- shotgun/prompts/agents/plan.j2 +33 -32
- shotgun/prompts/agents/research.j2 +39 -29
- shotgun/prompts/agents/specify.j2 +32 -0
- shotgun/prompts/agents/state/artifact_templates_available.j2 +18 -0
- shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +3 -1
- shotgun/prompts/agents/state/existing_artifacts_available.j2 +23 -0
- shotgun/prompts/agents/state/system_state.j2 +9 -1
- shotgun/prompts/agents/tasks.j2 +27 -12
- shotgun/prompts/history/incremental_summarization.j2 +53 -0
- shotgun/sdk/artifact_models.py +186 -0
- shotgun/sdk/artifacts.py +448 -0
- shotgun/sdk/services.py +14 -0
- shotgun/tui/app.py +26 -7
- shotgun/tui/screens/chat.py +32 -5
- shotgun/tui/screens/directory_setup.py +113 -0
- shotgun/utils/file_system_utils.py +6 -1
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/METADATA +3 -2
- shotgun_sh-0.1.0.dev14.dist-info/RECORD +138 -0
- shotgun/prompts/user/research.j2 +0 -5
- shotgun_sh-0.1.0.dev12.dist-info/RECORD +0 -104
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/WHEEL +0 -0
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/entry_points.txt +0 -0
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,213 +1,410 @@
|
|
|
1
1
|
"""History processors for managing conversation history in Shotgun agents."""
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
4
|
+
|
|
5
5
|
from pydantic_ai.messages import (
|
|
6
|
-
BuiltinToolCallPart,
|
|
7
|
-
BuiltinToolReturnPart,
|
|
8
6
|
ModelMessage,
|
|
9
7
|
ModelRequest,
|
|
10
8
|
ModelResponse,
|
|
11
|
-
ModelResponsePart,
|
|
12
|
-
RetryPromptPart,
|
|
13
9
|
SystemPromptPart,
|
|
14
10
|
TextPart,
|
|
15
|
-
ThinkingPart,
|
|
16
|
-
ToolCallPart,
|
|
17
|
-
ToolReturnPart,
|
|
18
11
|
UserPromptPart,
|
|
19
12
|
)
|
|
20
13
|
|
|
14
|
+
from shotgun.agents.config.models import shotgun_model_request
|
|
21
15
|
from shotgun.agents.models import AgentDeps
|
|
22
16
|
from shotgun.logging_config import get_logger
|
|
23
17
|
from shotgun.prompts import PromptLoader
|
|
24
18
|
|
|
19
|
+
from .constants import SUMMARY_MARKER, TOKEN_LIMIT_RATIO
|
|
20
|
+
from .context_extraction import extract_context_from_messages
|
|
21
|
+
from .history_building import ensure_ends_with_model_request
|
|
22
|
+
from .message_utils import (
|
|
23
|
+
get_first_user_request,
|
|
24
|
+
get_system_prompt,
|
|
25
|
+
)
|
|
26
|
+
from .token_estimation import (
|
|
27
|
+
calculate_max_summarization_tokens as _calculate_max_summarization_tokens,
|
|
28
|
+
)
|
|
29
|
+
from .token_estimation import (
|
|
30
|
+
estimate_post_summary_tokens,
|
|
31
|
+
estimate_tokens_from_messages,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ContextProtocol(Protocol):
|
|
39
|
+
"""Protocol defining the interface needed by token_limit_compactor."""
|
|
40
|
+
|
|
41
|
+
deps: AgentDeps
|
|
42
|
+
usage: Any # Optional usage information
|
|
43
|
+
|
|
44
|
+
|
|
25
45
|
logger = get_logger(__name__)
|
|
26
46
|
|
|
27
47
|
# Global prompt loader instance
|
|
28
48
|
prompt_loader = PromptLoader()
|
|
29
49
|
|
|
30
50
|
|
|
51
|
+
def is_summary_part(part: Any) -> bool:
|
|
52
|
+
"""Check if a message part is a compacted summary."""
|
|
53
|
+
return isinstance(part, TextPart) and part.content.startswith(SUMMARY_MARKER)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def find_last_summary_index(messages: list[ModelMessage]) -> int | None:
|
|
57
|
+
"""Find the index of the last summary in the message history.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
messages: List of messages in the conversation history
|
|
61
|
+
Returns:
|
|
62
|
+
Index of the last summary message, or None if no summary exists.
|
|
63
|
+
"""
|
|
64
|
+
for i in range(len(messages) - 1, -1, -1):
|
|
65
|
+
if isinstance(messages[i], ModelResponse):
|
|
66
|
+
for part in messages[i].parts:
|
|
67
|
+
if is_summary_part(part):
|
|
68
|
+
return i
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def extract_summary_content(summary_part: Any) -> str:
|
|
73
|
+
"""Extract the summary content without the marker prefix."""
|
|
74
|
+
if isinstance(summary_part, TextPart):
|
|
75
|
+
return summary_part.content[len(SUMMARY_MARKER) :].strip()
|
|
76
|
+
return ""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def create_marked_summary_part(summary_response: Any) -> TextPart:
|
|
80
|
+
"""Create a TextPart with the summary marker prefix.
|
|
81
|
+
|
|
82
|
+
This consolidates the duplicate summary creation logic.
|
|
83
|
+
"""
|
|
84
|
+
first_part = summary_response.parts[0]
|
|
85
|
+
if isinstance(first_part, TextPart):
|
|
86
|
+
summary_content = f"{SUMMARY_MARKER} {first_part.content}"
|
|
87
|
+
return TextPart(content=summary_content)
|
|
88
|
+
else:
|
|
89
|
+
# Fallback in case the response part is not TextPart
|
|
90
|
+
summary_content = f"{SUMMARY_MARKER} Summary content unavailable"
|
|
91
|
+
return TextPart(content=summary_content)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def log_summarization_request(
|
|
95
|
+
model: Any, max_tokens: int, prompt: str, context: str, request_type: str
|
|
96
|
+
) -> None:
|
|
97
|
+
"""Log detailed summarization request information.
|
|
98
|
+
|
|
99
|
+
Consolidates duplicate logging patterns across the codebase.
|
|
100
|
+
"""
|
|
101
|
+
logger.debug(f"{request_type} SUMMARIZATION REQUEST - Model: {model}")
|
|
102
|
+
logger.debug(f"{request_type} SUMMARIZATION REQUEST - Max tokens: {max_tokens}")
|
|
103
|
+
logger.debug(f"{request_type} SUMMARIZATION REQUEST - Instructions: {prompt}")
|
|
104
|
+
logger.debug(f"{request_type} SUMMARIZATION REQUEST - Context: {context}")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def log_summarization_response(response: Any, request_type: str) -> None:
|
|
108
|
+
"""Log detailed summarization response information.
|
|
109
|
+
|
|
110
|
+
Consolidates duplicate logging patterns across the codebase.
|
|
111
|
+
"""
|
|
112
|
+
logger.debug(f"{request_type} SUMMARIZATION RESPONSE - Full response: {response}")
|
|
113
|
+
logger.debug(
|
|
114
|
+
f"{request_type} SUMMARIZATION RESPONSE - Content: "
|
|
115
|
+
f"{response.parts[0] if response.parts else 'No content'}"
|
|
116
|
+
)
|
|
117
|
+
logger.debug(f"{request_type} SUMMARIZATION RESPONSE - Usage: {response.usage}")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# Use centralized calculate_max_summarization_tokens function
|
|
121
|
+
calculate_max_summarization_tokens = _calculate_max_summarization_tokens
|
|
122
|
+
|
|
123
|
+
|
|
31
124
|
async def token_limit_compactor(
|
|
32
|
-
ctx:
|
|
125
|
+
ctx: ContextProtocol,
|
|
33
126
|
messages: list[ModelMessage],
|
|
34
127
|
) -> list[ModelMessage]:
|
|
35
|
-
"""Compact message history based on token limits.
|
|
128
|
+
"""Compact message history based on token limits with incremental processing.
|
|
36
129
|
|
|
37
|
-
This
|
|
38
|
-
|
|
39
|
-
|
|
130
|
+
This incremental compactor prevents cascading summarization by:
|
|
131
|
+
1. Preserving existing summaries
|
|
132
|
+
2. Only processing NEW messages since the last summary
|
|
133
|
+
3. Combining summaries incrementally
|
|
134
|
+
4. Never re-processing already compacted content
|
|
40
135
|
|
|
41
136
|
Args:
|
|
42
137
|
ctx: Run context with usage information and dependencies
|
|
43
|
-
messages:
|
|
138
|
+
messages: Current conversation history
|
|
44
139
|
|
|
45
140
|
Returns:
|
|
46
141
|
Compacted list of messages within token limits
|
|
47
142
|
"""
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Get token limit from model configuration
|
|
52
|
-
model_max_tokens =
|
|
53
|
-
max_tokens = int(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
143
|
+
# Extract dependencies from context
|
|
144
|
+
deps = ctx.deps
|
|
145
|
+
|
|
146
|
+
# Get token limit from model configuration
|
|
147
|
+
model_max_tokens = deps.llm_model.max_input_tokens
|
|
148
|
+
max_tokens = int(model_max_tokens * TOKEN_LIMIT_RATIO)
|
|
149
|
+
|
|
150
|
+
# Find existing summaries to determine compaction strategy
|
|
151
|
+
last_summary_index = find_last_summary_index(messages)
|
|
152
|
+
|
|
153
|
+
if last_summary_index is not None:
|
|
154
|
+
# Check if post-summary conversation exceeds threshold for incremental compaction
|
|
155
|
+
post_summary_tokens = estimate_post_summary_tokens(
|
|
156
|
+
messages, last_summary_index, deps.llm_model
|
|
157
|
+
)
|
|
158
|
+
post_summary_percentage = (
|
|
159
|
+
(post_summary_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
logger.debug(
|
|
163
|
+
f"Found existing summary at index {last_summary_index}. "
|
|
164
|
+
f"Post-summary tokens: {post_summary_tokens}, threshold: {max_tokens}, "
|
|
165
|
+
f"percentage: {post_summary_percentage:.2f}%%"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Only do incremental compaction if post-summary conversation exceeds threshold
|
|
169
|
+
if post_summary_tokens < max_tokens:
|
|
170
|
+
logger.debug(
|
|
171
|
+
f"Post-summary conversation under threshold ({post_summary_tokens} < {max_tokens}), "
|
|
172
|
+
f"keeping all {len(messages)} messages"
|
|
173
|
+
)
|
|
174
|
+
return messages
|
|
175
|
+
|
|
176
|
+
# INCREMENTAL COMPACTION: Process new messages since last summary
|
|
177
|
+
logger.debug(
|
|
178
|
+
"Post-summary conversation exceeds threshold, performing incremental compaction"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Extract existing summary content
|
|
182
|
+
summary_message = messages[last_summary_index]
|
|
183
|
+
existing_summary_part = None
|
|
184
|
+
for part in summary_message.parts:
|
|
185
|
+
if is_summary_part(part):
|
|
186
|
+
existing_summary_part = part
|
|
187
|
+
break
|
|
188
|
+
|
|
189
|
+
if not existing_summary_part:
|
|
190
|
+
logger.warning(
|
|
191
|
+
"Found summary index but no summary part, falling back to full compaction"
|
|
192
|
+
)
|
|
193
|
+
return await _full_compaction(deps, messages)
|
|
194
|
+
|
|
195
|
+
existing_summary = extract_summary_content(existing_summary_part)
|
|
196
|
+
|
|
197
|
+
# Get messages AFTER the last summary for incremental processing
|
|
198
|
+
messages_to_process = messages[last_summary_index + 1 :]
|
|
199
|
+
|
|
200
|
+
if not messages_to_process:
|
|
201
|
+
logger.debug(
|
|
202
|
+
"No new messages since last summary, returning existing history"
|
|
203
|
+
)
|
|
204
|
+
return messages
|
|
205
|
+
|
|
206
|
+
# Extract context from new messages only
|
|
207
|
+
new_context = extract_context_from_messages(messages_to_process)
|
|
208
|
+
|
|
209
|
+
# Check if there's meaningful content (responses) to summarize
|
|
210
|
+
has_meaningful_content = any(
|
|
211
|
+
isinstance(msg, ModelResponse) for msg in messages_to_process
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# If there are only user requests and no responses, no need to summarize
|
|
215
|
+
if not has_meaningful_content or not new_context.strip():
|
|
216
|
+
logger.debug(
|
|
217
|
+
"No meaningful new content to summarize, returning existing history"
|
|
218
|
+
)
|
|
219
|
+
return messages
|
|
220
|
+
|
|
221
|
+
# Use incremental summarization prompt with proper template variables
|
|
222
|
+
try:
|
|
223
|
+
incremental_prompt = prompt_loader.render(
|
|
224
|
+
"history/incremental_summarization.j2",
|
|
225
|
+
existing_summary=existing_summary,
|
|
226
|
+
new_messages=new_context,
|
|
227
|
+
)
|
|
228
|
+
except Exception:
|
|
229
|
+
# Fallback to regular summarization if incremental template doesn't exist yet
|
|
230
|
+
logger.warning(
|
|
231
|
+
"Incremental summarization template not found, using regular template"
|
|
232
|
+
)
|
|
233
|
+
incremental_prompt = prompt_loader.render("history/summarization.j2")
|
|
234
|
+
# Combine existing and new context for fallback
|
|
235
|
+
new_context = (
|
|
236
|
+
f"EXISTING SUMMARY:\n{existing_summary}\n\nNEW MESSAGES:\n{new_context}"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Create incremental summary
|
|
240
|
+
request_messages: list[ModelMessage] = [
|
|
241
|
+
ModelRequest.user_text_prompt(new_context, instructions=incremental_prompt)
|
|
242
|
+
]
|
|
243
|
+
|
|
244
|
+
# Calculate optimal max_tokens for summarization
|
|
245
|
+
max_tokens = calculate_max_summarization_tokens(
|
|
246
|
+
deps.llm_model, request_messages
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Debug logging using shared utilities
|
|
250
|
+
log_summarization_request(
|
|
251
|
+
deps.llm_model, max_tokens, incremental_prompt, new_context, "INCREMENTAL"
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Use shotgun wrapper to ensure full token utilization
|
|
255
|
+
summary_response = await shotgun_model_request(
|
|
256
|
+
model_config=deps.llm_model,
|
|
257
|
+
messages=request_messages,
|
|
258
|
+
max_tokens=max_tokens, # Use calculated optimal tokens for summarization
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
log_summarization_response(summary_response, "INCREMENTAL")
|
|
262
|
+
|
|
263
|
+
# Calculate token reduction (from new messages only)
|
|
264
|
+
new_tokens = len(new_context.split()) # Rough estimate
|
|
265
|
+
summary_tokens = (
|
|
266
|
+
summary_response.usage.output_tokens if summary_response.usage else 0
|
|
267
|
+
)
|
|
268
|
+
logger.debug(
|
|
269
|
+
f"Incremental compaction: processed {len(messages_to_process)} new messages, "
|
|
270
|
+
f"reduced ~{new_tokens} tokens to {summary_tokens} tokens"
|
|
271
|
+
)
|
|
65
272
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
logger.debug("Under token limit, keeping all %d messages", len(messages))
|
|
69
|
-
return messages
|
|
273
|
+
# Build the new compacted history with the updated summary
|
|
274
|
+
new_summary_part = create_marked_summary_part(summary_response)
|
|
70
275
|
|
|
71
|
-
|
|
72
|
-
|
|
276
|
+
# Extract essential context from messages before the last summary (if any)
|
|
277
|
+
system_prompt = ""
|
|
278
|
+
first_user_prompt = ""
|
|
279
|
+
if last_summary_index > 0:
|
|
280
|
+
# Get system and first user from original conversation
|
|
281
|
+
system_prompt = get_system_prompt(messages[:last_summary_index]) or ""
|
|
282
|
+
first_user_prompt = (
|
|
283
|
+
get_first_user_request(messages[:last_summary_index]) or ""
|
|
284
|
+
)
|
|
73
285
|
|
|
74
|
-
|
|
286
|
+
# Create the updated summary message
|
|
287
|
+
updated_summary_message = ModelResponse(parts=[new_summary_part])
|
|
75
288
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if isinstance(msg, ModelResponse) or isinstance(msg, ModelRequest):
|
|
79
|
-
for part in msg.parts:
|
|
80
|
-
message_content = get_context_from_message(part)
|
|
81
|
-
if not message_content:
|
|
82
|
-
continue
|
|
83
|
-
context += get_context_from_message(part) + "\n"
|
|
84
|
-
else:
|
|
85
|
-
# Handle whatever this is
|
|
86
|
-
pass
|
|
289
|
+
# Build final compacted history with CLEAN structure
|
|
290
|
+
compacted_messages: list[ModelMessage] = []
|
|
87
291
|
|
|
292
|
+
# Only add system/user context if it exists and is meaningful
|
|
293
|
+
if system_prompt or first_user_prompt:
|
|
294
|
+
compacted_messages.append(
|
|
295
|
+
ModelRequest(
|
|
296
|
+
parts=[
|
|
297
|
+
SystemPromptPart(content=system_prompt),
|
|
298
|
+
UserPromptPart(content=first_user_prompt),
|
|
299
|
+
]
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Add the summary
|
|
304
|
+
compacted_messages.append(updated_summary_message)
|
|
305
|
+
|
|
306
|
+
# Ensure history ends with ModelRequest for PydanticAI compatibility
|
|
307
|
+
compacted_messages = ensure_ends_with_model_request(
|
|
308
|
+
compacted_messages, messages
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
logger.debug(
|
|
312
|
+
f"Incremental compaction complete: {len(messages)} -> {len(compacted_messages)} messages"
|
|
313
|
+
)
|
|
314
|
+
return compacted_messages
|
|
315
|
+
|
|
316
|
+
else:
|
|
317
|
+
# Check if total conversation exceeds threshold for full compaction
|
|
318
|
+
total_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
|
|
319
|
+
total_percentage = (total_tokens / max_tokens) * 100 if max_tokens > 0 else 0
|
|
320
|
+
|
|
321
|
+
logger.debug(
|
|
322
|
+
f"No existing summary found. Total tokens: {total_tokens}, threshold: {max_tokens}, "
|
|
323
|
+
f"percentage: {total_percentage:.2f}%%"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Only do full compaction if total conversation exceeds threshold
|
|
327
|
+
if total_tokens < max_tokens:
|
|
328
|
+
logger.debug(
|
|
329
|
+
f"Total conversation under threshold ({total_tokens} < {max_tokens}), "
|
|
330
|
+
f"keeping all {len(messages)} messages"
|
|
331
|
+
)
|
|
332
|
+
return messages
|
|
333
|
+
|
|
334
|
+
# FIRST-TIME COMPACTION: Process all messages
|
|
335
|
+
logger.debug(
|
|
336
|
+
"Total conversation exceeds threshold, performing initial full compaction"
|
|
337
|
+
)
|
|
338
|
+
return await _full_compaction(deps, messages)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
async def _full_compaction(
|
|
342
|
+
deps: AgentDeps,
|
|
343
|
+
messages: list[ModelMessage],
|
|
344
|
+
) -> list[ModelMessage]:
|
|
345
|
+
"""Perform full compaction for first-time summarization."""
|
|
346
|
+
# Extract context from all messages
|
|
347
|
+
context = extract_context_from_messages(messages)
|
|
348
|
+
|
|
349
|
+
# Use regular summarization prompt
|
|
88
350
|
summarization_prompt = prompt_loader.render("history/summarization.j2")
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
351
|
+
request_messages: list[ModelMessage] = [
|
|
352
|
+
ModelRequest.user_text_prompt(context, instructions=summarization_prompt)
|
|
353
|
+
]
|
|
354
|
+
|
|
355
|
+
# Calculate optimal max_tokens for summarization
|
|
356
|
+
max_tokens = calculate_max_summarization_tokens(deps.llm_model, request_messages)
|
|
357
|
+
|
|
358
|
+
# Debug logging using shared utilities
|
|
359
|
+
log_summarization_request(
|
|
360
|
+
deps.llm_model, max_tokens, summarization_prompt, context, "FULL"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Use shotgun wrapper to ensure full token utilization
|
|
364
|
+
summary_response = await shotgun_model_request(
|
|
365
|
+
model_config=deps.llm_model,
|
|
366
|
+
messages=request_messages,
|
|
367
|
+
max_tokens=max_tokens, # Use calculated optimal tokens for summarization
|
|
94
368
|
)
|
|
95
|
-
|
|
369
|
+
|
|
370
|
+
# Calculate token reduction
|
|
371
|
+
current_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
|
|
96
372
|
summary_usage = summary_response.usage
|
|
97
373
|
reduction_percentage = (
|
|
98
|
-
(current_tokens - summary_usage.output_tokens) / current_tokens
|
|
99
|
-
|
|
374
|
+
((current_tokens - summary_usage.output_tokens) / current_tokens) * 100
|
|
375
|
+
if current_tokens > 0 and summary_usage
|
|
376
|
+
else 0
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
log_summarization_response(summary_response, "FULL")
|
|
380
|
+
|
|
381
|
+
# Log token reduction (already calculated above)
|
|
100
382
|
logger.debug(
|
|
101
|
-
"
|
|
383
|
+
"Full compaction: %s tokens -> %s tokens (%.2f%% reduction)",
|
|
102
384
|
current_tokens,
|
|
103
|
-
summary_usage.output_tokens,
|
|
385
|
+
summary_usage.output_tokens if summary_usage else 0,
|
|
104
386
|
reduction_percentage,
|
|
105
387
|
)
|
|
106
388
|
|
|
107
|
-
|
|
389
|
+
# Mark summary with special prefix
|
|
390
|
+
marked_summary_part = create_marked_summary_part(summary_response)
|
|
391
|
+
|
|
392
|
+
# Build compacted history structure
|
|
393
|
+
system_prompt = get_system_prompt(messages) or ""
|
|
108
394
|
user_prompt = get_first_user_request(messages) or ""
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
395
|
+
|
|
396
|
+
# Create base structure
|
|
397
|
+
compacted_messages: list[ModelMessage] = [
|
|
112
398
|
ModelRequest(
|
|
113
399
|
parts=[
|
|
114
400
|
SystemPromptPart(content=system_prompt),
|
|
115
401
|
UserPromptPart(content=user_prompt),
|
|
116
402
|
]
|
|
117
403
|
),
|
|
118
|
-
ModelResponse(
|
|
119
|
-
parts=[
|
|
120
|
-
summarization_part,
|
|
121
|
-
]
|
|
122
|
-
),
|
|
404
|
+
ModelResponse(parts=[marked_summary_part]),
|
|
123
405
|
]
|
|
124
406
|
|
|
407
|
+
# Ensure history ends with ModelRequest for PydanticAI compatibility
|
|
408
|
+
compacted_messages = ensure_ends_with_model_request(compacted_messages, messages)
|
|
125
409
|
|
|
126
|
-
|
|
127
|
-
"""Extract first user request from messages.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
messages: List of messages in the conversation history
|
|
131
|
-
Returns:
|
|
132
|
-
The first user request as a string.
|
|
133
|
-
"""
|
|
134
|
-
for msg in messages:
|
|
135
|
-
if isinstance(msg, ModelRequest):
|
|
136
|
-
for part in msg.parts:
|
|
137
|
-
if isinstance(part, UserPromptPart):
|
|
138
|
-
if isinstance(part.content, str):
|
|
139
|
-
return part.content
|
|
140
|
-
return None
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def get_system_promt(messages: list[ModelMessage]) -> str | None:
|
|
144
|
-
"""Extract system prompt from messages.
|
|
145
|
-
|
|
146
|
-
Args:
|
|
147
|
-
messages: List of messages in the conversation history
|
|
148
|
-
|
|
149
|
-
Returns:
|
|
150
|
-
The system prompt as a string.
|
|
151
|
-
"""
|
|
152
|
-
for msg in messages:
|
|
153
|
-
if isinstance(msg, ModelRequest):
|
|
154
|
-
for part in msg.parts:
|
|
155
|
-
if isinstance(part, SystemPromptPart):
|
|
156
|
-
return part.content
|
|
157
|
-
return None
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def get_context_from_message(
|
|
161
|
-
message_part: SystemPromptPart
|
|
162
|
-
| UserPromptPart
|
|
163
|
-
| ToolReturnPart
|
|
164
|
-
| RetryPromptPart
|
|
165
|
-
| ModelResponsePart,
|
|
166
|
-
) -> str:
|
|
167
|
-
"""Extract context from a message part.
|
|
168
|
-
|
|
169
|
-
Args:
|
|
170
|
-
message: The message part to extract context from.
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
The extracted context as a string.
|
|
174
|
-
"""
|
|
175
|
-
|
|
176
|
-
if isinstance(message_part, SystemPromptPart):
|
|
177
|
-
return "" # We do not include system prompts in the summary
|
|
178
|
-
elif isinstance(message_part, UserPromptPart):
|
|
179
|
-
if isinstance(message_part.content, str):
|
|
180
|
-
return "<USER_PROMPT>\n" + message_part.content + "\n</USER_PROMPT>"
|
|
181
|
-
else:
|
|
182
|
-
return ""
|
|
183
|
-
elif isinstance(message_part, ToolReturnPart):
|
|
184
|
-
return "<TOOL_RETURN>\n" + str(message_part.content) + "\n</TOOL_RETURN>"
|
|
185
|
-
elif isinstance(message_part, RetryPromptPart):
|
|
186
|
-
if isinstance(message_part.content, str):
|
|
187
|
-
return "<RETRY_PROMPT>\n" + message_part.content + "\n</RETRY_PROMPT>"
|
|
188
|
-
return ""
|
|
189
|
-
|
|
190
|
-
# TextPart | ToolCallPart | BuiltinToolCallPart | BuiltinToolReturnPart | ThinkingPart
|
|
191
|
-
if isinstance(message_part, TextPart):
|
|
192
|
-
return "<ASSISTANT_TEXT>\n" + message_part.content + "\n</ASSISTANT_TEXT>"
|
|
193
|
-
elif isinstance(message_part, ToolCallPart):
|
|
194
|
-
if isinstance(message_part.args, dict):
|
|
195
|
-
args_str = ", ".join(f"{k}={repr(v)}" for k, v in message_part.args.items())
|
|
196
|
-
tool_call_str = f"{message_part.tool_name}({args_str})"
|
|
197
|
-
else:
|
|
198
|
-
tool_call_str = f"{message_part.tool_name}({message_part.args})"
|
|
199
|
-
return "<TOOL_CALL>\n" + tool_call_str + "\n</TOOL_CALL>"
|
|
200
|
-
elif isinstance(message_part, BuiltinToolCallPart):
|
|
201
|
-
return (
|
|
202
|
-
"<BUILTIN_TOOL_CALL>\n" + message_part.tool_name + "\n</BUILTIN_TOOL_CALL>"
|
|
203
|
-
)
|
|
204
|
-
elif isinstance(message_part, BuiltinToolReturnPart):
|
|
205
|
-
return (
|
|
206
|
-
"<BUILTIN_TOOL_RETURN>\n"
|
|
207
|
-
+ message_part.tool_name
|
|
208
|
-
+ "\n</BUILTIN_TOOL_RETURN>"
|
|
209
|
-
)
|
|
210
|
-
elif isinstance(message_part, ThinkingPart):
|
|
211
|
-
return "<THINKING>\n" + message_part.content + "\n</THINKING>"
|
|
212
|
-
|
|
213
|
-
return ""
|
|
410
|
+
return compacted_messages
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Utility functions for working with PydanticAI messages."""
|
|
2
|
+
|
|
3
|
+
from pydantic_ai.messages import (
|
|
4
|
+
ModelMessage,
|
|
5
|
+
ModelRequest,
|
|
6
|
+
SystemPromptPart,
|
|
7
|
+
UserPromptPart,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_first_user_request(messages: list[ModelMessage]) -> str | None:
|
|
12
|
+
"""Extract first user request content from messages."""
|
|
13
|
+
for msg in messages:
|
|
14
|
+
if isinstance(msg, ModelRequest):
|
|
15
|
+
for part in msg.parts:
|
|
16
|
+
if isinstance(part, UserPromptPart) and isinstance(part.content, str):
|
|
17
|
+
return part.content
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_last_user_request(messages: list[ModelMessage]) -> ModelRequest | None:
|
|
22
|
+
"""Extract the last user request from messages."""
|
|
23
|
+
for msg in reversed(messages):
|
|
24
|
+
if isinstance(msg, ModelRequest):
|
|
25
|
+
for part in msg.parts:
|
|
26
|
+
if isinstance(part, UserPromptPart):
|
|
27
|
+
return msg
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_user_content_from_request(request: ModelRequest) -> str | None:
|
|
32
|
+
"""Extract user prompt content from a ModelRequest."""
|
|
33
|
+
for part in request.parts:
|
|
34
|
+
if isinstance(part, UserPromptPart) and isinstance(part.content, str):
|
|
35
|
+
return part.content
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_system_prompt(messages: list[ModelMessage]) -> str | None:
|
|
40
|
+
"""Extract system prompt from messages."""
|
|
41
|
+
for msg in messages:
|
|
42
|
+
if isinstance(msg, ModelRequest):
|
|
43
|
+
for part in msg.parts:
|
|
44
|
+
if isinstance(part, SystemPromptPart):
|
|
45
|
+
return part.content
|
|
46
|
+
return None
|