shotgun-sh 0.2.23.dev1__py3-none-any.whl → 0.2.29.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of shotgun-sh might be problematic. Click here for more details.
- shotgun/agents/agent_manager.py +3 -3
- shotgun/agents/common.py +1 -1
- shotgun/agents/config/manager.py +36 -21
- shotgun/agents/config/models.py +30 -0
- shotgun/agents/config/provider.py +27 -14
- shotgun/agents/context_analyzer/analyzer.py +6 -2
- shotgun/agents/conversation/__init__.py +18 -0
- shotgun/agents/conversation/filters.py +164 -0
- shotgun/agents/conversation/history/chunking.py +278 -0
- shotgun/agents/{history → conversation/history}/compaction.py +27 -1
- shotgun/agents/{history → conversation/history}/constants.py +5 -0
- shotgun/agents/conversation/history/file_content_deduplication.py +216 -0
- shotgun/agents/{history → conversation/history}/history_processors.py +267 -3
- shotgun/agents/{conversation_manager.py → conversation/manager.py} +1 -1
- shotgun/agents/{conversation_history.py → conversation/models.py} +8 -94
- shotgun/agents/tools/web_search/openai.py +1 -1
- shotgun/cli/clear.py +1 -1
- shotgun/cli/compact.py +5 -3
- shotgun/cli/context.py +1 -1
- shotgun/cli/spec/__init__.py +5 -0
- shotgun/cli/spec/backup.py +81 -0
- shotgun/cli/spec/commands.py +130 -0
- shotgun/cli/spec/models.py +30 -0
- shotgun/cli/spec/pull_service.py +165 -0
- shotgun/codebase/core/ingestor.py +153 -7
- shotgun/codebase/models.py +2 -0
- shotgun/exceptions.py +5 -3
- shotgun/main.py +2 -0
- shotgun/posthog_telemetry.py +1 -1
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +3 -3
- shotgun/prompts/agents/partials/interactive_mode.j2 +3 -3
- shotgun/prompts/agents/research.j2 +0 -3
- shotgun/prompts/history/chunk_summarization.j2 +34 -0
- shotgun/prompts/history/combine_summaries.j2 +53 -0
- shotgun/shotgun_web/__init__.py +67 -1
- shotgun/shotgun_web/client.py +42 -1
- shotgun/shotgun_web/constants.py +46 -0
- shotgun/shotgun_web/exceptions.py +29 -0
- shotgun/shotgun_web/models.py +390 -0
- shotgun/shotgun_web/shared_specs/__init__.py +32 -0
- shotgun/shotgun_web/shared_specs/file_scanner.py +175 -0
- shotgun/shotgun_web/shared_specs/hasher.py +83 -0
- shotgun/shotgun_web/shared_specs/models.py +71 -0
- shotgun/shotgun_web/shared_specs/upload_pipeline.py +291 -0
- shotgun/shotgun_web/shared_specs/utils.py +34 -0
- shotgun/shotgun_web/specs_client.py +703 -0
- shotgun/shotgun_web/supabase_client.py +31 -0
- shotgun/tui/app.py +39 -0
- shotgun/tui/containers.py +1 -1
- shotgun/tui/layout.py +5 -0
- shotgun/tui/screens/chat/chat_screen.py +212 -16
- shotgun/tui/screens/chat/codebase_index_prompt_screen.py +147 -19
- shotgun/tui/screens/chat_screen/command_providers.py +10 -0
- shotgun/tui/screens/chat_screen/history/chat_history.py +0 -36
- shotgun/tui/screens/confirmation_dialog.py +40 -0
- shotgun/tui/screens/model_picker.py +7 -1
- shotgun/tui/screens/onboarding.py +149 -0
- shotgun/tui/screens/pipx_migration.py +46 -0
- shotgun/tui/screens/provider_config.py +41 -0
- shotgun/tui/screens/shared_specs/__init__.py +21 -0
- shotgun/tui/screens/shared_specs/create_spec_dialog.py +273 -0
- shotgun/tui/screens/shared_specs/models.py +56 -0
- shotgun/tui/screens/shared_specs/share_specs_dialog.py +390 -0
- shotgun/tui/screens/shared_specs/upload_progress_screen.py +452 -0
- shotgun/tui/screens/shotgun_auth.py +60 -6
- shotgun/tui/screens/spec_pull.py +286 -0
- shotgun/tui/screens/welcome.py +91 -0
- shotgun/tui/services/conversation_service.py +5 -2
- shotgun/tui/widgets/widget_coordinator.py +1 -1
- {shotgun_sh-0.2.23.dev1.dist-info → shotgun_sh-0.2.29.dev2.dist-info}/METADATA +1 -1
- {shotgun_sh-0.2.23.dev1.dist-info → shotgun_sh-0.2.29.dev2.dist-info}/RECORD +86 -59
- {shotgun_sh-0.2.23.dev1.dist-info → shotgun_sh-0.2.29.dev2.dist-info}/WHEEL +1 -1
- /shotgun/agents/{history → conversation/history}/__init__.py +0 -0
- /shotgun/agents/{history → conversation/history}/context_extraction.py +0 -0
- /shotgun/agents/{history → conversation/history}/history_building.py +0 -0
- /shotgun/agents/{history → conversation/history}/message_utils.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/__init__.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/anthropic.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/base.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/openai.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/sentencepiece_counter.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/tokenizer_cache.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_counting/utils.py +0 -0
- /shotgun/agents/{history → conversation/history}/token_estimation.py +0 -0
- {shotgun_sh-0.2.23.dev1.dist-info → shotgun_sh-0.2.29.dev2.dist-info}/entry_points.txt +0 -0
- {shotgun_sh-0.2.23.dev1.dist-info → shotgun_sh-0.2.29.dev2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""Pattern-based chunking for oversized conversation compaction.
|
|
2
|
+
|
|
3
|
+
This module provides functions to break oversized conversations into logical
|
|
4
|
+
chunks for summarization, preserving semantic units like tool call sequences.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
|
|
10
|
+
from pydantic_ai.messages import (
|
|
11
|
+
ModelMessage,
|
|
12
|
+
ModelRequest,
|
|
13
|
+
ModelResponse,
|
|
14
|
+
ToolCallPart,
|
|
15
|
+
ToolReturnPart,
|
|
16
|
+
UserPromptPart,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from shotgun.agents.config.models import ModelConfig
|
|
20
|
+
|
|
21
|
+
from .constants import CHUNK_TARGET_RATIO, RETENTION_WINDOW_MESSAGES
|
|
22
|
+
from .token_estimation import estimate_tokens_from_messages
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class MessageGroup:
|
|
29
|
+
"""A logical group of messages that must stay together.
|
|
30
|
+
|
|
31
|
+
Examples:
|
|
32
|
+
- A single user message
|
|
33
|
+
- A tool call sequence: ModelResponse(ToolCallPart) -> ModelRequest(ToolReturnPart)
|
|
34
|
+
- A standalone assistant response
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
messages: list[ModelMessage]
|
|
38
|
+
is_tool_sequence: bool = False
|
|
39
|
+
start_index: int = 0
|
|
40
|
+
end_index: int = 0
|
|
41
|
+
_token_count: int | None = field(default=None, repr=False)
|
|
42
|
+
|
|
43
|
+
async def get_token_count(self, model_config: ModelConfig) -> int:
|
|
44
|
+
"""Lazily compute and cache token count for this group."""
|
|
45
|
+
if self._token_count is None:
|
|
46
|
+
self._token_count = await estimate_tokens_from_messages(
|
|
47
|
+
self.messages, model_config
|
|
48
|
+
)
|
|
49
|
+
return self._token_count
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class Chunk:
|
|
54
|
+
"""A chunk of message groups ready for summarization."""
|
|
55
|
+
|
|
56
|
+
groups: list[MessageGroup]
|
|
57
|
+
chunk_index: int
|
|
58
|
+
total_token_estimate: int = 0
|
|
59
|
+
|
|
60
|
+
def get_all_messages(self) -> list[ModelMessage]:
|
|
61
|
+
"""Flatten all messages in this chunk."""
|
|
62
|
+
messages: list[ModelMessage] = []
|
|
63
|
+
for group in self.groups:
|
|
64
|
+
messages.extend(group.messages)
|
|
65
|
+
return messages
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def identify_message_groups(messages: list[ModelMessage]) -> list[MessageGroup]:
|
|
69
|
+
"""Identify logical message groups that must stay together.
|
|
70
|
+
|
|
71
|
+
Rules:
|
|
72
|
+
1. Tool calls must include their responses (matched by tool_call_id)
|
|
73
|
+
2. User messages are individual groups
|
|
74
|
+
3. Standalone assistant responses are individual groups
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
messages: The full message history
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
List of MessageGroup objects
|
|
81
|
+
"""
|
|
82
|
+
groups: list[MessageGroup] = []
|
|
83
|
+
|
|
84
|
+
# Track pending tool calls that need their returns
|
|
85
|
+
# Maps tool_call_id -> group index
|
|
86
|
+
pending_tool_calls: dict[str, int] = {}
|
|
87
|
+
|
|
88
|
+
for i, msg in enumerate(messages):
|
|
89
|
+
if isinstance(msg, ModelResponse):
|
|
90
|
+
# Check for tool calls in response
|
|
91
|
+
tool_calls = [p for p in msg.parts if isinstance(p, ToolCallPart)]
|
|
92
|
+
|
|
93
|
+
if tool_calls:
|
|
94
|
+
# Start a tool sequence group
|
|
95
|
+
group = MessageGroup(
|
|
96
|
+
messages=[msg],
|
|
97
|
+
is_tool_sequence=True,
|
|
98
|
+
start_index=i,
|
|
99
|
+
end_index=i,
|
|
100
|
+
)
|
|
101
|
+
group_idx = len(groups)
|
|
102
|
+
groups.append(group)
|
|
103
|
+
|
|
104
|
+
# Track all tool call IDs in this response
|
|
105
|
+
for tc in tool_calls:
|
|
106
|
+
if tc.tool_call_id:
|
|
107
|
+
pending_tool_calls[tc.tool_call_id] = group_idx
|
|
108
|
+
else:
|
|
109
|
+
# Standalone assistant response (text only)
|
|
110
|
+
groups.append(
|
|
111
|
+
MessageGroup(
|
|
112
|
+
messages=[msg],
|
|
113
|
+
is_tool_sequence=False,
|
|
114
|
+
start_index=i,
|
|
115
|
+
end_index=i,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
elif isinstance(msg, ModelRequest):
|
|
120
|
+
# Check for tool returns in request
|
|
121
|
+
tool_returns = [p for p in msg.parts if isinstance(p, ToolReturnPart)]
|
|
122
|
+
user_prompts = [p for p in msg.parts if isinstance(p, UserPromptPart)]
|
|
123
|
+
|
|
124
|
+
if tool_returns:
|
|
125
|
+
# Add to corresponding tool call groups
|
|
126
|
+
for tr in tool_returns:
|
|
127
|
+
if tr.tool_call_id and tr.tool_call_id in pending_tool_calls:
|
|
128
|
+
group_idx = pending_tool_calls.pop(tr.tool_call_id)
|
|
129
|
+
groups[group_idx].messages.append(msg)
|
|
130
|
+
groups[group_idx].end_index = i
|
|
131
|
+
# Note: orphaned tool returns are handled by filter_orphaned_tool_responses
|
|
132
|
+
|
|
133
|
+
elif user_prompts:
|
|
134
|
+
# User message - standalone group
|
|
135
|
+
groups.append(
|
|
136
|
+
MessageGroup(
|
|
137
|
+
messages=[msg],
|
|
138
|
+
is_tool_sequence=False,
|
|
139
|
+
start_index=i,
|
|
140
|
+
end_index=i,
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
# Note: System prompts are handled separately by compaction
|
|
144
|
+
|
|
145
|
+
logger.debug(
|
|
146
|
+
f"Identified {len(groups)} message groups "
|
|
147
|
+
f"({sum(1 for g in groups if g.is_tool_sequence)} tool sequences)"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return groups
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def create_chunks(
|
|
154
|
+
groups: list[MessageGroup],
|
|
155
|
+
model_config: ModelConfig,
|
|
156
|
+
retention_window: int = RETENTION_WINDOW_MESSAGES,
|
|
157
|
+
) -> tuple[list[Chunk], list[ModelMessage]]:
|
|
158
|
+
"""Create chunks from message groups, respecting token limits.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
groups: List of message groups from identify_message_groups()
|
|
162
|
+
model_config: Model configuration for token limits
|
|
163
|
+
retention_window: Number of recent groups to keep outside compaction
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Tuple of (chunks_to_summarize, retained_recent_messages)
|
|
167
|
+
"""
|
|
168
|
+
max_chunk_tokens = int(model_config.max_input_tokens * CHUNK_TARGET_RATIO)
|
|
169
|
+
|
|
170
|
+
# Handle edge case: too few groups
|
|
171
|
+
if len(groups) <= retention_window:
|
|
172
|
+
all_messages: list[ModelMessage] = []
|
|
173
|
+
for g in groups:
|
|
174
|
+
all_messages.extend(g.messages)
|
|
175
|
+
return [], all_messages
|
|
176
|
+
|
|
177
|
+
# Separate retention window from groups to chunk
|
|
178
|
+
groups_to_chunk = groups[:-retention_window]
|
|
179
|
+
retained_groups = groups[-retention_window:]
|
|
180
|
+
|
|
181
|
+
# Build chunks
|
|
182
|
+
chunks: list[Chunk] = []
|
|
183
|
+
current_groups: list[MessageGroup] = []
|
|
184
|
+
current_tokens = 0
|
|
185
|
+
|
|
186
|
+
for group in groups_to_chunk:
|
|
187
|
+
group_tokens = await group.get_token_count(model_config)
|
|
188
|
+
|
|
189
|
+
# Handle oversized single group - becomes its own chunk
|
|
190
|
+
if group_tokens > max_chunk_tokens:
|
|
191
|
+
# Finish current chunk if any
|
|
192
|
+
if current_groups:
|
|
193
|
+
chunks.append(
|
|
194
|
+
Chunk(
|
|
195
|
+
groups=current_groups,
|
|
196
|
+
chunk_index=len(chunks),
|
|
197
|
+
total_token_estimate=current_tokens,
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
current_groups = []
|
|
201
|
+
current_tokens = 0
|
|
202
|
+
|
|
203
|
+
# Add oversized as its own chunk
|
|
204
|
+
chunks.append(
|
|
205
|
+
Chunk(
|
|
206
|
+
groups=[group],
|
|
207
|
+
chunk_index=len(chunks),
|
|
208
|
+
total_token_estimate=group_tokens,
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
logger.warning(
|
|
212
|
+
f"Oversized message group ({group_tokens:,} tokens) "
|
|
213
|
+
f"added as single chunk - may need special handling"
|
|
214
|
+
)
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
# Would adding this group exceed limit?
|
|
218
|
+
if current_tokens + group_tokens > max_chunk_tokens:
|
|
219
|
+
# Finish current chunk
|
|
220
|
+
if current_groups:
|
|
221
|
+
chunks.append(
|
|
222
|
+
Chunk(
|
|
223
|
+
groups=current_groups,
|
|
224
|
+
chunk_index=len(chunks),
|
|
225
|
+
total_token_estimate=current_tokens,
|
|
226
|
+
)
|
|
227
|
+
)
|
|
228
|
+
current_groups = [group]
|
|
229
|
+
current_tokens = group_tokens
|
|
230
|
+
else:
|
|
231
|
+
current_groups.append(group)
|
|
232
|
+
current_tokens += group_tokens
|
|
233
|
+
|
|
234
|
+
# Don't forget last chunk
|
|
235
|
+
if current_groups:
|
|
236
|
+
chunks.append(
|
|
237
|
+
Chunk(
|
|
238
|
+
groups=current_groups,
|
|
239
|
+
chunk_index=len(chunks),
|
|
240
|
+
total_token_estimate=current_tokens,
|
|
241
|
+
)
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Extract retained messages
|
|
245
|
+
retained_messages: list[ModelMessage] = []
|
|
246
|
+
for g in retained_groups:
|
|
247
|
+
retained_messages.extend(g.messages)
|
|
248
|
+
|
|
249
|
+
# Update chunk indices (in case any were out of order)
|
|
250
|
+
for i, chunk in enumerate(chunks):
|
|
251
|
+
chunk.chunk_index = i
|
|
252
|
+
|
|
253
|
+
logger.info(
|
|
254
|
+
f"Created {len(chunks)} chunks for compaction, "
|
|
255
|
+
f"retaining {len(retained_messages)} recent messages"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
return chunks, retained_messages
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
async def chunk_messages_for_compaction(
|
|
262
|
+
messages: list[ModelMessage],
|
|
263
|
+
model_config: ModelConfig,
|
|
264
|
+
) -> tuple[list[Chunk], list[ModelMessage]]:
|
|
265
|
+
"""Main entry point: chunk oversized conversation for summarization.
|
|
266
|
+
|
|
267
|
+
This function identifies logical message groups (preserving tool call sequences),
|
|
268
|
+
then packs them into chunks that fit within model token limits.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
messages: Full conversation message history
|
|
272
|
+
model_config: Model configuration for token limits
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Tuple of (chunks_to_summarize, retention_window_messages)
|
|
276
|
+
"""
|
|
277
|
+
groups = identify_message_groups(messages)
|
|
278
|
+
return await create_chunks(groups, model_config)
|
|
@@ -20,6 +20,10 @@ async def apply_persistent_compaction(
|
|
|
20
20
|
This ensures that compacted history is actually used as the conversation baseline,
|
|
21
21
|
preventing cascading compaction issues across both CLI and TUI usage patterns.
|
|
22
22
|
|
|
23
|
+
Compaction happens in two phases:
|
|
24
|
+
1. Deterministic pre-compaction: Remove file content (no LLM needed)
|
|
25
|
+
2. LLM-based compaction: Summarize conversation if still over threshold
|
|
26
|
+
|
|
23
27
|
Args:
|
|
24
28
|
messages: Full message history from agent run
|
|
25
29
|
deps: Agent dependencies containing model config
|
|
@@ -28,10 +32,32 @@ async def apply_persistent_compaction(
|
|
|
28
32
|
Returns:
|
|
29
33
|
Compacted message history that should be stored as conversation state
|
|
30
34
|
"""
|
|
35
|
+
from .file_content_deduplication import deduplicate_file_content
|
|
31
36
|
from .history_processors import token_limit_compactor
|
|
32
37
|
|
|
33
38
|
try:
|
|
34
|
-
#
|
|
39
|
+
# STEP 1: Deterministic pre-compaction (no LLM cost)
|
|
40
|
+
# Remove file content from tool returns - files are still accessible
|
|
41
|
+
# via retrieve_code (codebase) or read_file (.shotgun/ folder)
|
|
42
|
+
messages, tokens_saved = deduplicate_file_content(
|
|
43
|
+
messages,
|
|
44
|
+
retention_window=3, # Keep last 3 messages' file content intact
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if tokens_saved > 0:
|
|
48
|
+
logger.info(
|
|
49
|
+
f"Pre-compaction: removed ~{tokens_saved:,} tokens of file content"
|
|
50
|
+
)
|
|
51
|
+
track_event(
|
|
52
|
+
"file_content_deduplication",
|
|
53
|
+
{
|
|
54
|
+
"tokens_saved_estimate": tokens_saved,
|
|
55
|
+
"retention_window": 3,
|
|
56
|
+
"model_name": deps.llm_model.name.value,
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# STEP 2: Count tokens after pre-compaction
|
|
35
61
|
estimated_tokens = await estimate_tokens_from_messages(messages, deps.llm_model)
|
|
36
62
|
|
|
37
63
|
# Create minimal usage info for compaction check
|
|
@@ -10,6 +10,11 @@ INPUT_BUFFER_TOKENS = 500
|
|
|
10
10
|
MIN_SUMMARY_TOKENS = 100
|
|
11
11
|
TOKEN_LIMIT_RATIO = 0.8
|
|
12
12
|
|
|
13
|
+
# Chunked compaction constants
|
|
14
|
+
CHUNK_TARGET_RATIO = 0.60 # Target chunk size as % of max_input_tokens
|
|
15
|
+
CHUNK_SAFE_RATIO = 0.70 # Max safe ratio before triggering chunked compaction
|
|
16
|
+
RETENTION_WINDOW_MESSAGES = 5 # Keep last N message groups outside compaction
|
|
17
|
+
|
|
13
18
|
|
|
14
19
|
class SummaryType(Enum):
|
|
15
20
|
"""Types of summarization requests for logging."""
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""Pre-compaction file content deduplication for conversation history.
|
|
2
|
+
|
|
3
|
+
This module provides a deterministic pre-pass that removes file content from
|
|
4
|
+
tool returns before LLM-based compaction. Files are still accessible via
|
|
5
|
+
`retrieve_code` (codebase) or `read_file` (.shotgun/ folder).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import copy
|
|
9
|
+
import re
|
|
10
|
+
from enum import StrEnum
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from pydantic_ai.messages import (
|
|
14
|
+
ModelMessage,
|
|
15
|
+
ModelRequest,
|
|
16
|
+
ToolReturnPart,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from shotgun.logging_config import get_logger
|
|
20
|
+
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FileReadTool(StrEnum):
|
|
25
|
+
"""Tool names that read file content."""
|
|
26
|
+
|
|
27
|
+
CODEBASE = "file_read" # Reads from indexed codebase (Kuzu graph)
|
|
28
|
+
SHOTGUN_FOLDER = "read_file" # Reads from .shotgun/ folder
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Minimum content length to bother deduplicating (skip tiny files)
|
|
32
|
+
MIN_CONTENT_LENGTH = 500
|
|
33
|
+
|
|
34
|
+
# Placeholder templates for each type
|
|
35
|
+
CODEBASE_PLACEHOLDER = (
|
|
36
|
+
"**File**: `{file_path}`\n"
|
|
37
|
+
"**Size**: {size_bytes} bytes | **Language**: {language}\n"
|
|
38
|
+
"**Content**: [Removed for compaction - use `retrieve_code` or `file_read` to access]"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
SHOTGUN_PLACEHOLDER = (
|
|
42
|
+
"**File**: `.shotgun/{filename}`\n"
|
|
43
|
+
"**Content**: [Removed for compaction - file persisted in .shotgun/ folder]"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Pattern for parsing file_read output (codebase files)
|
|
47
|
+
# Format: **File**: `path`\n**Size**: N bytes\n[optional encoding]\n\n**Content**:\n```lang\ncontent```
|
|
48
|
+
CODEBASE_FILE_PATTERN = re.compile(
|
|
49
|
+
r"\*\*File\*\*:\s*`([^`]+)`\s*\n" # File path
|
|
50
|
+
r"\*\*Size\*\*:\s*(\d+)\s*bytes\s*\n" # Size in bytes
|
|
51
|
+
r"(?:\*\*Encoding\*\*:.*?\n)?" # Optional encoding line
|
|
52
|
+
r"\n\*\*Content\*\*:\s*\n" # Blank line + Content header
|
|
53
|
+
r"```(\w*)\n" # Language tag
|
|
54
|
+
r"(.*?)```", # Actual content
|
|
55
|
+
re.DOTALL,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _parse_codebase_file_content(
|
|
60
|
+
content: str,
|
|
61
|
+
) -> tuple[str, int, str, str] | None:
|
|
62
|
+
"""Parse file_read tool return content.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
content: The tool return content string
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Tuple of (file_path, size_bytes, language, actual_content) or None if not parseable
|
|
69
|
+
"""
|
|
70
|
+
match = CODEBASE_FILE_PATTERN.search(content)
|
|
71
|
+
if not match:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
file_path = match.group(1)
|
|
75
|
+
size_bytes = int(match.group(2))
|
|
76
|
+
language = match.group(3) or ""
|
|
77
|
+
actual_content = match.group(4)
|
|
78
|
+
|
|
79
|
+
return file_path, size_bytes, language, actual_content
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _create_codebase_placeholder(file_path: str, size_bytes: int, language: str) -> str:
|
|
83
|
+
"""Create placeholder for codebase file content."""
|
|
84
|
+
return CODEBASE_PLACEHOLDER.format(
|
|
85
|
+
file_path=file_path,
|
|
86
|
+
size_bytes=size_bytes,
|
|
87
|
+
language=language or "unknown",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _create_shotgun_placeholder(filename: str) -> str:
|
|
92
|
+
"""Create placeholder for .shotgun/ file content."""
|
|
93
|
+
return SHOTGUN_PLACEHOLDER.format(filename=filename)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _estimate_tokens_saved(original: str, replacement: str) -> int:
|
|
97
|
+
"""Rough estimate of tokens saved (~4 chars per token)."""
|
|
98
|
+
original_chars = len(original)
|
|
99
|
+
replacement_chars = len(replacement)
|
|
100
|
+
# Rough token estimate: ~4 characters per token for code
|
|
101
|
+
return max(0, (original_chars - replacement_chars) // 4)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def deduplicate_file_content(
|
|
105
|
+
messages: list[ModelMessage],
|
|
106
|
+
retention_window: int = 3,
|
|
107
|
+
) -> tuple[list[ModelMessage], int]:
|
|
108
|
+
"""Replace file read content with placeholders for indexed/persisted files.
|
|
109
|
+
|
|
110
|
+
This is a deterministic pre-compaction pass that reduces tokens without
|
|
111
|
+
requiring an LLM. Files remain accessible via their respective tools.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
messages: Conversation history
|
|
115
|
+
retention_window: Keep full content in last N messages (for recent context)
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Tuple of (modified_messages, estimated_tokens_saved)
|
|
119
|
+
"""
|
|
120
|
+
if not messages:
|
|
121
|
+
return messages, 0
|
|
122
|
+
|
|
123
|
+
# Deep copy to avoid modifying original
|
|
124
|
+
modified_messages = copy.deepcopy(messages)
|
|
125
|
+
total_tokens_saved = 0
|
|
126
|
+
files_deduplicated = 0
|
|
127
|
+
|
|
128
|
+
# Calculate retention boundary (keep last N messages intact)
|
|
129
|
+
retention_start = max(0, len(modified_messages) - retention_window)
|
|
130
|
+
|
|
131
|
+
for msg_idx, message in enumerate(modified_messages):
|
|
132
|
+
# Skip messages in retention window
|
|
133
|
+
if msg_idx >= retention_start:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# Only process ModelRequest (which contains ToolReturnPart)
|
|
137
|
+
if not isinstance(message, ModelRequest):
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
# Build new parts list, replacing file content where appropriate
|
|
141
|
+
new_parts: list[Any] = []
|
|
142
|
+
message_modified = False
|
|
143
|
+
|
|
144
|
+
for part in message.parts:
|
|
145
|
+
if not isinstance(part, ToolReturnPart):
|
|
146
|
+
new_parts.append(part)
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
tool_name = part.tool_name
|
|
150
|
+
content = part.content
|
|
151
|
+
|
|
152
|
+
# Skip if content is too short to bother
|
|
153
|
+
if not isinstance(content, str) or len(content) < MIN_CONTENT_LENGTH:
|
|
154
|
+
new_parts.append(part)
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
replacement = None
|
|
158
|
+
original_content = content
|
|
159
|
+
|
|
160
|
+
# Handle codebase file reads (file_read)
|
|
161
|
+
if tool_name == FileReadTool.CODEBASE:
|
|
162
|
+
parsed = _parse_codebase_file_content(content)
|
|
163
|
+
if parsed:
|
|
164
|
+
file_path, size_bytes, language, actual_content = parsed
|
|
165
|
+
# Only replace if actual content is substantial
|
|
166
|
+
if len(actual_content) >= MIN_CONTENT_LENGTH:
|
|
167
|
+
replacement = _create_codebase_placeholder(
|
|
168
|
+
file_path, size_bytes, language
|
|
169
|
+
)
|
|
170
|
+
logger.debug(
|
|
171
|
+
f"Deduplicating codebase file: {file_path} "
|
|
172
|
+
f"({size_bytes} bytes)"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Handle .shotgun/ file reads (read_file)
|
|
176
|
+
elif tool_name == FileReadTool.SHOTGUN_FOLDER:
|
|
177
|
+
# For read_file, content is raw - we need to figure out filename
|
|
178
|
+
# from the tool call args (but we only have the return here)
|
|
179
|
+
# Use a generic placeholder since we don't have the filename
|
|
180
|
+
if len(content) >= MIN_CONTENT_LENGTH:
|
|
181
|
+
# Try to extract filename from content if it looks like markdown
|
|
182
|
+
# Otherwise use generic placeholder
|
|
183
|
+
replacement = _create_shotgun_placeholder("artifact")
|
|
184
|
+
logger.debug(
|
|
185
|
+
f"Deduplicating .shotgun/ file read ({len(content)} chars)"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Apply replacement if we have one
|
|
189
|
+
if replacement:
|
|
190
|
+
# Create new ToolReturnPart with replaced content
|
|
191
|
+
new_part = ToolReturnPart(
|
|
192
|
+
tool_name=part.tool_name,
|
|
193
|
+
tool_call_id=part.tool_call_id,
|
|
194
|
+
content=replacement,
|
|
195
|
+
timestamp=part.timestamp,
|
|
196
|
+
)
|
|
197
|
+
new_parts.append(new_part)
|
|
198
|
+
message_modified = True
|
|
199
|
+
|
|
200
|
+
tokens_saved = _estimate_tokens_saved(original_content, replacement)
|
|
201
|
+
total_tokens_saved += tokens_saved
|
|
202
|
+
files_deduplicated += 1
|
|
203
|
+
else:
|
|
204
|
+
new_parts.append(part)
|
|
205
|
+
|
|
206
|
+
# Replace message with new parts if modified
|
|
207
|
+
if message_modified:
|
|
208
|
+
modified_messages[msg_idx] = ModelRequest(parts=new_parts)
|
|
209
|
+
|
|
210
|
+
if files_deduplicated > 0:
|
|
211
|
+
logger.info(
|
|
212
|
+
f"File content deduplication: {files_deduplicated} files, "
|
|
213
|
+
f"~{total_tokens_saved:,} tokens saved"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return modified_messages, total_tokens_saved
|