shotgun-sh 0.1.0.dev13__py3-none-any.whl → 0.1.0.dev14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of shotgun-sh might be problematic. Click here for more details.
- shotgun/agents/agent_manager.py +16 -3
- shotgun/agents/artifact_state.py +58 -0
- shotgun/agents/common.py +48 -14
- shotgun/agents/config/models.py +61 -0
- shotgun/agents/history/compaction.py +85 -0
- shotgun/agents/history/constants.py +19 -0
- shotgun/agents/history/context_extraction.py +108 -0
- shotgun/agents/history/history_building.py +104 -0
- shotgun/agents/history/history_processors.py +354 -157
- shotgun/agents/history/message_utils.py +46 -0
- shotgun/agents/history/token_counting.py +429 -0
- shotgun/agents/history/token_estimation.py +138 -0
- shotgun/agents/models.py +125 -1
- shotgun/agents/tools/artifact_management.py +56 -24
- shotgun/agents/tools/file_management.py +30 -11
- shotgun/agents/tools/web_search/anthropic.py +78 -17
- shotgun/agents/tools/web_search/gemini.py +1 -1
- shotgun/agents/tools/web_search/openai.py +16 -2
- shotgun/artifacts/manager.py +2 -1
- shotgun/artifacts/models.py +6 -4
- shotgun/codebase/core/nl_query.py +4 -4
- shotgun/prompts/agents/partials/artifact_system.j2 +4 -1
- shotgun/prompts/agents/partials/codebase_understanding.j2 +1 -2
- shotgun/prompts/agents/plan.j2 +9 -7
- shotgun/prompts/agents/research.j2 +7 -5
- shotgun/prompts/agents/specify.j2 +8 -7
- shotgun/prompts/agents/state/artifact_templates_available.j2 +18 -0
- shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +3 -1
- shotgun/prompts/agents/state/existing_artifacts_available.j2 +23 -0
- shotgun/prompts/agents/state/system_state.j2 +9 -1
- shotgun/prompts/history/incremental_summarization.j2 +53 -0
- shotgun/sdk/services.py +14 -0
- shotgun/tui/app.py +1 -1
- shotgun/tui/screens/chat.py +4 -2
- shotgun/utils/file_system_utils.py +6 -1
- {shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/METADATA +2 -1
- {shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/RECORD +40 -29
- {shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/WHEEL +0 -0
- {shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/entry_points.txt +0 -0
- {shotgun_sh-0.1.0.dev13.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/licenses/LICENSE +0 -0
shotgun/agents/agent_manager.py
CHANGED
|
@@ -3,13 +3,19 @@
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from pydantic_ai import
|
|
6
|
+
from pydantic_ai import (
|
|
7
|
+
Agent,
|
|
8
|
+
DeferredToolRequests,
|
|
9
|
+
DeferredToolResults,
|
|
10
|
+
UsageLimits,
|
|
11
|
+
)
|
|
7
12
|
from pydantic_ai.agent import AgentRunResult
|
|
8
13
|
from pydantic_ai.messages import ModelMessage, ModelRequest
|
|
9
14
|
from textual.message import Message
|
|
10
15
|
from textual.widget import Widget
|
|
11
16
|
|
|
12
|
-
from .
|
|
17
|
+
from .history.compaction import apply_persistent_compaction
|
|
18
|
+
from .models import AgentDeps, AgentRuntimeOptions, FileOperation
|
|
13
19
|
from .plan import create_plan_agent
|
|
14
20
|
from .research import create_research_agent
|
|
15
21
|
from .tasks import create_tasks_agent
|
|
@@ -84,6 +90,7 @@ class AgentManager(Widget):
|
|
|
84
90
|
# Maintain shared message history
|
|
85
91
|
self.ui_message_history: list[ModelMessage] = []
|
|
86
92
|
self.message_history: list[ModelMessage] = []
|
|
93
|
+
self.recently_change_files: list[FileOperation] = []
|
|
87
94
|
|
|
88
95
|
@property
|
|
89
96
|
def current_agent(self) -> Agent[AgentDeps, str | DeferredToolRequests]:
|
|
@@ -181,9 +188,15 @@ class AgentManager(Widget):
|
|
|
181
188
|
mes for mes in result.new_messages() if not isinstance(mes, ModelRequest)
|
|
182
189
|
]
|
|
183
190
|
|
|
184
|
-
|
|
191
|
+
# Apply compaction to persistent message history to prevent cascading growth
|
|
192
|
+
self.message_history = await apply_persistent_compaction(
|
|
193
|
+
result.all_messages(), deps
|
|
194
|
+
)
|
|
185
195
|
self._post_messages_updated()
|
|
186
196
|
|
|
197
|
+
# Log file operations summary if any files were modified
|
|
198
|
+
self.recently_change_files = deps.file_tracker.operations.copy()
|
|
199
|
+
|
|
187
200
|
return result
|
|
188
201
|
|
|
189
202
|
def _post_messages_updated(self) -> None:
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Utilities for collecting and organizing artifact state information."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import TypedDict
|
|
5
|
+
|
|
6
|
+
from shotgun.artifacts.models import ArtifactSummary
|
|
7
|
+
from shotgun.artifacts.templates.models import TemplateSummary
|
|
8
|
+
from shotgun.sdk.services import get_artifact_service
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ArtifactState(TypedDict):
|
|
12
|
+
"""Type definition for artifact state information."""
|
|
13
|
+
|
|
14
|
+
available_templates: dict[str, list[TemplateSummary]]
|
|
15
|
+
existing_artifacts: dict[str, list[ArtifactSummary]]
|
|
16
|
+
current_date: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def collect_artifact_state() -> ArtifactState:
|
|
20
|
+
"""Collect and organize artifact state information for system context.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
ArtifactState containing organized templates and artifacts by mode, plus current date
|
|
24
|
+
"""
|
|
25
|
+
artifact_service = get_artifact_service()
|
|
26
|
+
|
|
27
|
+
# Get available templates
|
|
28
|
+
available_templates_list = artifact_service.list_templates()
|
|
29
|
+
|
|
30
|
+
# Group templates by mode for better organization
|
|
31
|
+
templates_by_mode: dict[str, list[TemplateSummary]] = {}
|
|
32
|
+
for template in available_templates_list:
|
|
33
|
+
mode_name = template.template_id.split("/")[0]
|
|
34
|
+
if mode_name not in templates_by_mode:
|
|
35
|
+
templates_by_mode[mode_name] = []
|
|
36
|
+
templates_by_mode[mode_name].append(template)
|
|
37
|
+
|
|
38
|
+
# Get ALL existing artifacts regardless of current agent mode for complete visibility
|
|
39
|
+
existing_artifacts_list = (
|
|
40
|
+
artifact_service.list_artifacts()
|
|
41
|
+
) # No mode filter = all modes
|
|
42
|
+
|
|
43
|
+
# Group artifacts by mode for organized display
|
|
44
|
+
artifacts_by_mode: dict[str, list[ArtifactSummary]] = {}
|
|
45
|
+
for artifact in existing_artifacts_list:
|
|
46
|
+
mode_name = artifact.agent_mode.value
|
|
47
|
+
if mode_name not in artifacts_by_mode:
|
|
48
|
+
artifacts_by_mode[mode_name] = []
|
|
49
|
+
artifacts_by_mode[mode_name].append(artifact)
|
|
50
|
+
|
|
51
|
+
# Get current date for temporal context (month in words for clarity)
|
|
52
|
+
current_date = datetime.now().strftime("%B %d, %Y")
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
"available_templates": templates_by_mode,
|
|
56
|
+
"existing_artifacts": artifacts_by_mode,
|
|
57
|
+
"current_date": current_date,
|
|
58
|
+
}
|
shotgun/agents/common.py
CHANGED
|
@@ -23,10 +23,11 @@ from pydantic_ai.messages import (
|
|
|
23
23
|
from shotgun.agents.config import ProviderType, get_config_manager, get_provider_model
|
|
24
24
|
from shotgun.logging_config import get_logger
|
|
25
25
|
from shotgun.prompts import PromptLoader
|
|
26
|
-
from shotgun.sdk.services import get_codebase_service
|
|
26
|
+
from shotgun.sdk.services import get_artifact_service, get_codebase_service
|
|
27
27
|
from shotgun.utils import ensure_shotgun_directory_exists
|
|
28
28
|
|
|
29
29
|
from .history import token_limit_compactor
|
|
30
|
+
from .history.compaction import apply_persistent_compaction
|
|
30
31
|
from .models import AgentDeps, AgentRuntimeOptions
|
|
31
32
|
from .tools import (
|
|
32
33
|
append_file,
|
|
@@ -70,10 +71,17 @@ async def add_system_status_message(
|
|
|
70
71
|
message_history = message_history or []
|
|
71
72
|
codebase_understanding_graphs = await deps.codebase_service.list_graphs()
|
|
72
73
|
|
|
74
|
+
# Collect artifact state information
|
|
75
|
+
from .artifact_state import collect_artifact_state
|
|
76
|
+
|
|
77
|
+
artifact_state = collect_artifact_state()
|
|
78
|
+
|
|
73
79
|
system_state = prompt_loader.render(
|
|
74
80
|
"agents/state/system_state.j2",
|
|
75
81
|
codebase_understanding_graphs=codebase_understanding_graphs,
|
|
82
|
+
**artifact_state,
|
|
76
83
|
)
|
|
84
|
+
|
|
77
85
|
message_history.append(
|
|
78
86
|
ModelResponse(
|
|
79
87
|
parts=[
|
|
@@ -117,12 +125,14 @@ def create_base_agent(
|
|
|
117
125
|
# Use the Model instance directly (has API key baked in)
|
|
118
126
|
model = model_config.model_instance
|
|
119
127
|
|
|
120
|
-
# Create deps with model config and
|
|
128
|
+
# Create deps with model config and services
|
|
121
129
|
codebase_service = get_codebase_service()
|
|
130
|
+
artifact_service = get_artifact_service()
|
|
122
131
|
deps = AgentDeps(
|
|
123
132
|
**agent_runtime_options.model_dump(),
|
|
124
133
|
llm_model=model_config,
|
|
125
134
|
codebase_service=codebase_service,
|
|
135
|
+
artifact_service=artifact_service,
|
|
126
136
|
system_prompt_fn=system_prompt_fn,
|
|
127
137
|
)
|
|
128
138
|
|
|
@@ -131,12 +141,25 @@ def create_base_agent(
|
|
|
131
141
|
logger.debug("🤖 Creating agent with fallback OpenAI GPT-4o")
|
|
132
142
|
raise ValueError("Configured model is required") from e
|
|
133
143
|
|
|
144
|
+
# Create a history processor that has access to deps via closure
|
|
145
|
+
async def history_processor(messages: list[ModelMessage]) -> list[ModelMessage]:
|
|
146
|
+
"""History processor with access to deps via closure."""
|
|
147
|
+
|
|
148
|
+
# Create a minimal context for compaction
|
|
149
|
+
class ProcessorContext:
|
|
150
|
+
def __init__(self, deps: AgentDeps):
|
|
151
|
+
self.deps = deps
|
|
152
|
+
self.usage = None # Will be estimated from messages
|
|
153
|
+
|
|
154
|
+
ctx = ProcessorContext(deps)
|
|
155
|
+
return await token_limit_compactor(ctx, messages)
|
|
156
|
+
|
|
134
157
|
agent = Agent(
|
|
135
158
|
model,
|
|
136
159
|
output_type=[str, DeferredToolRequests],
|
|
137
160
|
deps_type=AgentDeps,
|
|
138
161
|
instrument=True,
|
|
139
|
-
history_processors=[
|
|
162
|
+
history_processors=[history_processor],
|
|
140
163
|
)
|
|
141
164
|
|
|
142
165
|
# System prompt function is stored in deps and will be called manually in run_agent
|
|
@@ -153,17 +176,17 @@ def create_base_agent(
|
|
|
153
176
|
logger.debug("📞 Interactive mode enabled - ask_user tool registered")
|
|
154
177
|
|
|
155
178
|
# Register common file management tools (always available)
|
|
156
|
-
agent.
|
|
157
|
-
agent.
|
|
158
|
-
agent.
|
|
179
|
+
agent.tool(read_file)
|
|
180
|
+
agent.tool(write_file)
|
|
181
|
+
agent.tool(append_file)
|
|
159
182
|
|
|
160
183
|
# Register artifact management tools (always available)
|
|
161
|
-
agent.
|
|
162
|
-
agent.
|
|
163
|
-
agent.
|
|
164
|
-
agent.
|
|
165
|
-
agent.
|
|
166
|
-
agent.
|
|
184
|
+
agent.tool(create_artifact)
|
|
185
|
+
agent.tool(list_artifacts)
|
|
186
|
+
agent.tool(list_artifact_templates)
|
|
187
|
+
agent.tool(read_artifact)
|
|
188
|
+
agent.tool(read_artifact_section)
|
|
189
|
+
agent.tool(write_artifact_section)
|
|
167
190
|
|
|
168
191
|
# Register codebase understanding tools (conditional)
|
|
169
192
|
if load_codebase_understanding_tools:
|
|
@@ -273,6 +296,10 @@ async def run_agent(
|
|
|
273
296
|
message_history: list[ModelMessage] | None = None,
|
|
274
297
|
usage_limits: UsageLimits | None = None,
|
|
275
298
|
) -> AgentRunResult[str | DeferredToolRequests]:
|
|
299
|
+
# Clear file tracker for new run
|
|
300
|
+
deps.file_tracker.clear()
|
|
301
|
+
logger.debug("🔧 Cleared file tracker for new agent run")
|
|
302
|
+
|
|
276
303
|
# Add system prompt as first message
|
|
277
304
|
message_history = await add_system_prompt_message(deps, message_history)
|
|
278
305
|
|
|
@@ -283,7 +310,8 @@ async def run_agent(
|
|
|
283
310
|
message_history=message_history,
|
|
284
311
|
)
|
|
285
312
|
|
|
286
|
-
|
|
313
|
+
# Apply persistent compaction to prevent cascading token growth across CLI commands
|
|
314
|
+
messages = await apply_persistent_compaction(result.all_messages(), deps)
|
|
287
315
|
while isinstance(result.output, DeferredToolRequests):
|
|
288
316
|
logger.info("got deferred tool requests")
|
|
289
317
|
await deps.queue.join()
|
|
@@ -306,6 +334,12 @@ async def run_agent(
|
|
|
306
334
|
message_history=messages,
|
|
307
335
|
deferred_tool_results=results,
|
|
308
336
|
)
|
|
309
|
-
|
|
337
|
+
# Apply persistent compaction to prevent cascading token growth in multi-turn loops
|
|
338
|
+
messages = await apply_persistent_compaction(result.all_messages(), deps)
|
|
339
|
+
|
|
340
|
+
# Log file operations summary if any files were modified
|
|
341
|
+
if deps.file_tracker.operations:
|
|
342
|
+
summary = deps.file_tracker.format_summary()
|
|
343
|
+
logger.info("📁 %s", summary)
|
|
310
344
|
|
|
311
345
|
return result
|
shotgun/agents/config/models.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
"""Pydantic models for configuration."""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, Field, PrivateAttr, SecretStr
|
|
7
|
+
from pydantic_ai.direct import model_request
|
|
8
|
+
from pydantic_ai.messages import ModelMessage, ModelResponse
|
|
6
9
|
from pydantic_ai.models import Model
|
|
10
|
+
from pydantic_ai.settings import ModelSettings
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
class ProviderType(str, Enum):
|
|
@@ -57,6 +61,22 @@ class ModelConfig(BaseModel):
|
|
|
57
61
|
}
|
|
58
62
|
return f"{provider_prefix[self.provider]}:{self.name}"
|
|
59
63
|
|
|
64
|
+
def get_model_settings(self, max_tokens: int | None = None) -> ModelSettings:
|
|
65
|
+
"""Get ModelSettings with optional token override.
|
|
66
|
+
|
|
67
|
+
This provides flexibility for specific use cases that need different
|
|
68
|
+
token limits while defaulting to maximum utilization.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
max_tokens: Optional override for max_tokens. If None, uses max_output_tokens
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
ModelSettings configured with specified or maximum tokens
|
|
75
|
+
"""
|
|
76
|
+
return ModelSettings(
|
|
77
|
+
max_tokens=max_tokens if max_tokens is not None else self.max_output_tokens
|
|
78
|
+
)
|
|
79
|
+
|
|
60
80
|
|
|
61
81
|
# Model specifications registry (static metadata)
|
|
62
82
|
MODEL_SPECS: dict[str, ModelSpec] = {
|
|
@@ -125,3 +145,44 @@ class ShotgunConfig(BaseModel):
|
|
|
125
145
|
)
|
|
126
146
|
user_id: str = Field(description="Unique anonymous user identifier")
|
|
127
147
|
config_version: int = Field(default=1, description="Configuration schema version")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def shotgun_model_request(
|
|
151
|
+
model_config: ModelConfig,
|
|
152
|
+
messages: list[ModelMessage],
|
|
153
|
+
max_tokens: int | None = None,
|
|
154
|
+
**kwargs: Any,
|
|
155
|
+
) -> ModelResponse:
|
|
156
|
+
"""Model request wrapper that uses full token capacity by default.
|
|
157
|
+
|
|
158
|
+
This wrapper ensures all LLM calls in Shotgun use the maximum available
|
|
159
|
+
token capacity of each model, improving response quality and completeness.
|
|
160
|
+
The most common issue this fixes is truncated summaries that were cut off
|
|
161
|
+
at default token limits (e.g., 4096 for Claude models).
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
model_config: ModelConfig instance with model settings and API key
|
|
165
|
+
messages: Messages to send to the model
|
|
166
|
+
max_tokens: Optional override for max_tokens. If None, uses model's max_output_tokens
|
|
167
|
+
**kwargs: Additional arguments passed to model_request
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
ModelResponse from the model
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
# Uses full token capacity (e.g., 4096 for Claude, 128k for GPT-5)
|
|
174
|
+
response = await shotgun_model_request(model_config, messages)
|
|
175
|
+
|
|
176
|
+
# Override for specific use case
|
|
177
|
+
response = await shotgun_model_request(model_config, messages, max_tokens=1000)
|
|
178
|
+
"""
|
|
179
|
+
# Get properly configured ModelSettings with maximum or overridden token limit
|
|
180
|
+
model_settings = model_config.get_model_settings(max_tokens)
|
|
181
|
+
|
|
182
|
+
# Make the model request with full token utilization
|
|
183
|
+
return await model_request(
|
|
184
|
+
model=model_config.model_instance,
|
|
185
|
+
messages=messages,
|
|
186
|
+
model_settings=model_settings,
|
|
187
|
+
**kwargs,
|
|
188
|
+
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Conversation compaction utilities."""
|
|
2
|
+
|
|
3
|
+
from pydantic_ai.messages import ModelMessage
|
|
4
|
+
from pydantic_ai.usage import RequestUsage
|
|
5
|
+
|
|
6
|
+
from shotgun.agents.models import AgentDeps
|
|
7
|
+
from shotgun.logging_config import get_logger
|
|
8
|
+
|
|
9
|
+
from .token_estimation import estimate_tokens_from_messages
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def apply_persistent_compaction(
|
|
15
|
+
messages: list[ModelMessage], deps: AgentDeps
|
|
16
|
+
) -> list[ModelMessage]:
|
|
17
|
+
"""Apply compaction to message history for persistent storage.
|
|
18
|
+
|
|
19
|
+
This ensures that compacted history is actually used as the conversation baseline,
|
|
20
|
+
preventing cascading compaction issues across both CLI and TUI usage patterns.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
messages: Full message history from agent run
|
|
24
|
+
deps: Agent dependencies containing model config
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Compacted message history that should be stored as conversation state
|
|
28
|
+
"""
|
|
29
|
+
from .history_processors import token_limit_compactor
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
# Count actual token usage using shared utility
|
|
33
|
+
estimated_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
|
|
34
|
+
|
|
35
|
+
# Create minimal usage info for compaction check
|
|
36
|
+
usage = RequestUsage(
|
|
37
|
+
input_tokens=estimated_tokens,
|
|
38
|
+
output_tokens=0,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Create a minimal context object for compaction
|
|
42
|
+
class MockContext:
|
|
43
|
+
def __init__(self, deps: AgentDeps, usage: RequestUsage | None):
|
|
44
|
+
self.deps = deps
|
|
45
|
+
self.usage = usage
|
|
46
|
+
|
|
47
|
+
ctx = MockContext(deps, usage)
|
|
48
|
+
compacted_messages = await token_limit_compactor(ctx, messages)
|
|
49
|
+
|
|
50
|
+
# Log the result for monitoring
|
|
51
|
+
original_size = len(messages)
|
|
52
|
+
compacted_size = len(compacted_messages)
|
|
53
|
+
|
|
54
|
+
if compacted_size < original_size:
|
|
55
|
+
reduction_pct = ((original_size - compacted_size) / original_size) * 100
|
|
56
|
+
logger.debug(
|
|
57
|
+
f"Persistent compaction applied: {original_size} → {compacted_size} messages "
|
|
58
|
+
f"({reduction_pct:.1f}% reduction)"
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
logger.debug(
|
|
62
|
+
f"No persistent compaction needed: {original_size} messages unchanged"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return compacted_messages
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
# If compaction fails, return original messages
|
|
69
|
+
# This ensures the system remains functional even if compaction has issues
|
|
70
|
+
logger.warning(f"Persistent compaction failed, using original history: {e}")
|
|
71
|
+
return messages
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def should_apply_persistent_compaction(deps: AgentDeps) -> bool:
|
|
75
|
+
"""Check if persistent compaction should be applied.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
deps: Agent dependencies
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
True if persistent compaction should be applied
|
|
82
|
+
"""
|
|
83
|
+
# For now, always apply persistent compaction
|
|
84
|
+
# Future: Add configuration option in deps or environment variable
|
|
85
|
+
return True
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Constants for history processing and compaction."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
# Summary marker for compacted history
|
|
6
|
+
SUMMARY_MARKER = "📌 COMPACTED_HISTORY:"
|
|
7
|
+
|
|
8
|
+
# Token calculation constants
|
|
9
|
+
INPUT_BUFFER_TOKENS = 500
|
|
10
|
+
MIN_SUMMARY_TOKENS = 100
|
|
11
|
+
TOKEN_LIMIT_RATIO = 0.8
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SummaryType(Enum):
|
|
15
|
+
"""Types of summarization requests for logging."""
|
|
16
|
+
|
|
17
|
+
INCREMENTAL = "INCREMENTAL"
|
|
18
|
+
FULL = "FULL"
|
|
19
|
+
CONTEXT_EXTRACTION = "CONTEXT_EXTRACTION"
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Context extraction utilities for history processing."""
|
|
2
|
+
|
|
3
|
+
from pydantic_ai.messages import (
|
|
4
|
+
BuiltinToolCallPart,
|
|
5
|
+
BuiltinToolReturnPart,
|
|
6
|
+
ModelMessage,
|
|
7
|
+
ModelRequest,
|
|
8
|
+
ModelResponse,
|
|
9
|
+
ModelResponsePart,
|
|
10
|
+
RetryPromptPart,
|
|
11
|
+
SystemPromptPart,
|
|
12
|
+
TextPart,
|
|
13
|
+
ThinkingPart,
|
|
14
|
+
ToolCallPart,
|
|
15
|
+
ToolReturnPart,
|
|
16
|
+
UserPromptPart,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_context_from_messages(messages: list[ModelMessage]) -> str:
|
|
21
|
+
"""Extract context from a list of messages for summarization."""
|
|
22
|
+
context = ""
|
|
23
|
+
for msg in messages:
|
|
24
|
+
if isinstance(msg, ModelResponse | ModelRequest):
|
|
25
|
+
for part in msg.parts:
|
|
26
|
+
message_content = extract_context_from_part(part)
|
|
27
|
+
if message_content:
|
|
28
|
+
context += message_content + "\n"
|
|
29
|
+
return context
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def extract_context_from_message_range(
|
|
33
|
+
messages: list[ModelMessage],
|
|
34
|
+
start_index: int,
|
|
35
|
+
end_index: int | None = None,
|
|
36
|
+
) -> str:
|
|
37
|
+
"""Extract context from a specific range of messages."""
|
|
38
|
+
if end_index is None:
|
|
39
|
+
end_index = len(messages)
|
|
40
|
+
|
|
41
|
+
message_slice = messages[start_index:end_index]
|
|
42
|
+
return extract_context_from_messages(message_slice)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def has_meaningful_content(messages: list[ModelMessage]) -> bool:
|
|
46
|
+
"""Check if messages contain meaningful content worth summarizing.
|
|
47
|
+
|
|
48
|
+
Only ModelResponse messages are considered meaningful for summarization.
|
|
49
|
+
User requests alone don't need summarization.
|
|
50
|
+
"""
|
|
51
|
+
for msg in messages:
|
|
52
|
+
if isinstance(msg, ModelResponse):
|
|
53
|
+
for part in msg.parts:
|
|
54
|
+
if extract_context_from_part(part):
|
|
55
|
+
return True
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def extract_context_from_part(
|
|
60
|
+
message_part: (
|
|
61
|
+
SystemPromptPart
|
|
62
|
+
| UserPromptPart
|
|
63
|
+
| ToolReturnPart
|
|
64
|
+
| RetryPromptPart
|
|
65
|
+
| ModelResponsePart
|
|
66
|
+
),
|
|
67
|
+
) -> str:
|
|
68
|
+
"""Extract context from a single message part."""
|
|
69
|
+
if isinstance(message_part, SystemPromptPart):
|
|
70
|
+
return "" # Exclude system prompts from summary
|
|
71
|
+
|
|
72
|
+
elif isinstance(message_part, UserPromptPart):
|
|
73
|
+
if isinstance(message_part.content, str):
|
|
74
|
+
return f"<USER_PROMPT>\n{message_part.content}\n</USER_PROMPT>"
|
|
75
|
+
return ""
|
|
76
|
+
|
|
77
|
+
elif isinstance(message_part, ToolReturnPart):
|
|
78
|
+
return f"<TOOL_RETURN>\n{str(message_part.content)}\n</TOOL_RETURN>"
|
|
79
|
+
|
|
80
|
+
elif isinstance(message_part, RetryPromptPart):
|
|
81
|
+
if isinstance(message_part.content, str):
|
|
82
|
+
return f"<RETRY_PROMPT>\n{message_part.content}\n</RETRY_PROMPT>"
|
|
83
|
+
return ""
|
|
84
|
+
|
|
85
|
+
# Handle ModelResponsePart types
|
|
86
|
+
elif isinstance(message_part, TextPart):
|
|
87
|
+
return f"<ASSISTANT_TEXT>\n{message_part.content}\n</ASSISTANT_TEXT>"
|
|
88
|
+
|
|
89
|
+
elif isinstance(message_part, ToolCallPart):
|
|
90
|
+
if isinstance(message_part.args, dict):
|
|
91
|
+
args_str = ", ".join(f"{k}={repr(v)}" for k, v in message_part.args.items())
|
|
92
|
+
tool_call_str = f"{message_part.tool_name}({args_str})"
|
|
93
|
+
else:
|
|
94
|
+
tool_call_str = f"{message_part.tool_name}({message_part.args})"
|
|
95
|
+
return f"<TOOL_CALL>\n{tool_call_str}\n</TOOL_CALL>"
|
|
96
|
+
|
|
97
|
+
elif isinstance(message_part, BuiltinToolCallPart):
|
|
98
|
+
return f"<BUILTIN_TOOL_CALL>\n{message_part.tool_name}\n</BUILTIN_TOOL_CALL>"
|
|
99
|
+
|
|
100
|
+
elif isinstance(message_part, BuiltinToolReturnPart):
|
|
101
|
+
return (
|
|
102
|
+
f"<BUILTIN_TOOL_RETURN>\n{message_part.tool_name}\n</BUILTIN_TOOL_RETURN>"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
elif isinstance(message_part, ThinkingPart):
|
|
106
|
+
return f"<THINKING>\n{message_part.content}\n</THINKING>"
|
|
107
|
+
|
|
108
|
+
return ""
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Functions for building compacted message history."""
|
|
2
|
+
|
|
3
|
+
from pydantic_ai.messages import (
|
|
4
|
+
ModelMessage,
|
|
5
|
+
ModelRequest,
|
|
6
|
+
ModelRequestPart,
|
|
7
|
+
ModelResponse,
|
|
8
|
+
SystemPromptPart,
|
|
9
|
+
TextPart,
|
|
10
|
+
UserPromptPart,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from .message_utils import (
|
|
14
|
+
get_first_user_request,
|
|
15
|
+
get_last_user_request,
|
|
16
|
+
get_system_prompt,
|
|
17
|
+
get_user_content_from_request,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_clean_compacted_history(
|
|
22
|
+
summary_part: TextPart,
|
|
23
|
+
all_messages: list[ModelMessage],
|
|
24
|
+
last_summary_index: int | None = None,
|
|
25
|
+
) -> list[ModelMessage]:
|
|
26
|
+
"""Build a clean compacted history without preserving old verbose content.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
summary_part: The marked summary part to include
|
|
30
|
+
all_messages: Original message history
|
|
31
|
+
last_summary_index: Index of the last summary (if any)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Clean compacted message history
|
|
35
|
+
"""
|
|
36
|
+
# Extract essential context from pre-summary messages (if any)
|
|
37
|
+
system_prompt = ""
|
|
38
|
+
first_user_prompt = ""
|
|
39
|
+
|
|
40
|
+
if last_summary_index is not None and last_summary_index > 0:
|
|
41
|
+
# Get system and first user from original conversation
|
|
42
|
+
pre_summary_messages = all_messages[:last_summary_index]
|
|
43
|
+
system_prompt = get_system_prompt(pre_summary_messages) or ""
|
|
44
|
+
first_user_prompt = get_first_user_request(pre_summary_messages) or ""
|
|
45
|
+
|
|
46
|
+
# Build the base structure
|
|
47
|
+
compacted_messages: list[ModelMessage] = []
|
|
48
|
+
|
|
49
|
+
# Add system/user context if it exists and is meaningful
|
|
50
|
+
if system_prompt or first_user_prompt:
|
|
51
|
+
compacted_messages.append(
|
|
52
|
+
_create_base_request(system_prompt, first_user_prompt)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Add the summary
|
|
56
|
+
summary_message = ModelResponse(parts=[summary_part])
|
|
57
|
+
compacted_messages.append(summary_message)
|
|
58
|
+
|
|
59
|
+
# Ensure proper ending
|
|
60
|
+
return ensure_ends_with_model_request(compacted_messages, all_messages)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def ensure_ends_with_model_request(
|
|
64
|
+
compacted_messages: list[ModelMessage],
|
|
65
|
+
original_messages: list[ModelMessage],
|
|
66
|
+
) -> list[ModelMessage]:
|
|
67
|
+
"""Ensure the message history ends with ModelRequest for PydanticAI compatibility."""
|
|
68
|
+
last_user_request = get_last_user_request(original_messages)
|
|
69
|
+
|
|
70
|
+
if not last_user_request:
|
|
71
|
+
return compacted_messages
|
|
72
|
+
|
|
73
|
+
# Check if we need to add the last request or restructure
|
|
74
|
+
if compacted_messages and isinstance(compacted_messages[0], ModelRequest):
|
|
75
|
+
first_request = compacted_messages[0]
|
|
76
|
+
last_user_content = get_user_content_from_request(last_user_request)
|
|
77
|
+
first_user_content = get_user_content_from_request(first_request)
|
|
78
|
+
|
|
79
|
+
if last_user_content != first_user_content:
|
|
80
|
+
# Different messages - append the last request
|
|
81
|
+
compacted_messages.append(last_user_request)
|
|
82
|
+
else:
|
|
83
|
+
# Same message - restructure to end with ModelRequest
|
|
84
|
+
if len(compacted_messages) >= 2:
|
|
85
|
+
summary_message = compacted_messages[1] # The summary
|
|
86
|
+
compacted_messages = [summary_message, first_request]
|
|
87
|
+
else:
|
|
88
|
+
# No first request, just add the last one
|
|
89
|
+
compacted_messages.append(last_user_request)
|
|
90
|
+
|
|
91
|
+
return compacted_messages
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _create_base_request(system_prompt: str, user_prompt: str) -> ModelRequest:
|
|
95
|
+
"""Create the base ModelRequest with system and user prompts."""
|
|
96
|
+
parts: list[ModelRequestPart] = []
|
|
97
|
+
|
|
98
|
+
if system_prompt:
|
|
99
|
+
parts.append(SystemPromptPart(content=system_prompt))
|
|
100
|
+
|
|
101
|
+
if user_prompt:
|
|
102
|
+
parts.append(UserPromptPart(content=user_prompt))
|
|
103
|
+
|
|
104
|
+
return ModelRequest(parts=parts)
|