shotgun-sh 0.1.0.dev12__py3-none-any.whl → 0.1.0.dev14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of shotgun-sh might be problematic. Click here for more details.
- shotgun/agents/agent_manager.py +16 -3
- shotgun/agents/artifact_state.py +58 -0
- shotgun/agents/common.py +137 -88
- shotgun/agents/config/constants.py +18 -0
- shotgun/agents/config/manager.py +68 -16
- shotgun/agents/config/models.py +61 -0
- shotgun/agents/config/provider.py +11 -6
- shotgun/agents/history/compaction.py +85 -0
- shotgun/agents/history/constants.py +19 -0
- shotgun/agents/history/context_extraction.py +108 -0
- shotgun/agents/history/history_building.py +104 -0
- shotgun/agents/history/history_processors.py +354 -157
- shotgun/agents/history/message_utils.py +46 -0
- shotgun/agents/history/token_counting.py +429 -0
- shotgun/agents/history/token_estimation.py +138 -0
- shotgun/agents/models.py +131 -1
- shotgun/agents/plan.py +15 -37
- shotgun/agents/research.py +10 -45
- shotgun/agents/specify.py +97 -0
- shotgun/agents/tasks.py +7 -36
- shotgun/agents/tools/artifact_management.py +482 -0
- shotgun/agents/tools/file_management.py +31 -12
- shotgun/agents/tools/web_search/anthropic.py +78 -17
- shotgun/agents/tools/web_search/gemini.py +1 -1
- shotgun/agents/tools/web_search/openai.py +16 -2
- shotgun/artifacts/__init__.py +17 -0
- shotgun/artifacts/exceptions.py +89 -0
- shotgun/artifacts/manager.py +530 -0
- shotgun/artifacts/models.py +334 -0
- shotgun/artifacts/service.py +463 -0
- shotgun/artifacts/templates/__init__.py +10 -0
- shotgun/artifacts/templates/loader.py +252 -0
- shotgun/artifacts/templates/models.py +136 -0
- shotgun/artifacts/templates/plan/delivery_and_release_plan.yaml +66 -0
- shotgun/artifacts/templates/research/market_research.yaml +585 -0
- shotgun/artifacts/templates/research/sdk_comparison.yaml +257 -0
- shotgun/artifacts/templates/specify/prd.yaml +331 -0
- shotgun/artifacts/templates/specify/product_spec.yaml +301 -0
- shotgun/artifacts/utils.py +76 -0
- shotgun/cli/plan.py +1 -4
- shotgun/cli/specify.py +69 -0
- shotgun/cli/tasks.py +0 -4
- shotgun/codebase/core/nl_query.py +4 -4
- shotgun/logging_config.py +23 -7
- shotgun/main.py +7 -6
- shotgun/prompts/agents/partials/artifact_system.j2 +35 -0
- shotgun/prompts/agents/partials/codebase_understanding.j2 +1 -2
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +28 -2
- shotgun/prompts/agents/partials/content_formatting.j2 +65 -0
- shotgun/prompts/agents/partials/interactive_mode.j2 +10 -2
- shotgun/prompts/agents/plan.j2 +33 -32
- shotgun/prompts/agents/research.j2 +39 -29
- shotgun/prompts/agents/specify.j2 +32 -0
- shotgun/prompts/agents/state/artifact_templates_available.j2 +18 -0
- shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +3 -1
- shotgun/prompts/agents/state/existing_artifacts_available.j2 +23 -0
- shotgun/prompts/agents/state/system_state.j2 +9 -1
- shotgun/prompts/agents/tasks.j2 +27 -12
- shotgun/prompts/history/incremental_summarization.j2 +53 -0
- shotgun/sdk/artifact_models.py +186 -0
- shotgun/sdk/artifacts.py +448 -0
- shotgun/sdk/services.py +14 -0
- shotgun/tui/app.py +26 -7
- shotgun/tui/screens/chat.py +32 -5
- shotgun/tui/screens/directory_setup.py +113 -0
- shotgun/utils/file_system_utils.py +6 -1
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/METADATA +3 -2
- shotgun_sh-0.1.0.dev14.dist-info/RECORD +138 -0
- shotgun/prompts/user/research.j2 +0 -5
- shotgun_sh-0.1.0.dev12.dist-info/RECORD +0 -104
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/WHEEL +0 -0
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/entry_points.txt +0 -0
- {shotgun_sh-0.1.0.dev12.dist-info → shotgun_sh-0.1.0.dev14.dist-info}/licenses/LICENSE +0 -0
shotgun/agents/config/models.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
"""Pydantic models for configuration."""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, Field, PrivateAttr, SecretStr
|
|
7
|
+
from pydantic_ai.direct import model_request
|
|
8
|
+
from pydantic_ai.messages import ModelMessage, ModelResponse
|
|
6
9
|
from pydantic_ai.models import Model
|
|
10
|
+
from pydantic_ai.settings import ModelSettings
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
class ProviderType(str, Enum):
|
|
@@ -57,6 +61,22 @@ class ModelConfig(BaseModel):
|
|
|
57
61
|
}
|
|
58
62
|
return f"{provider_prefix[self.provider]}:{self.name}"
|
|
59
63
|
|
|
64
|
+
def get_model_settings(self, max_tokens: int | None = None) -> ModelSettings:
|
|
65
|
+
"""Get ModelSettings with optional token override.
|
|
66
|
+
|
|
67
|
+
This provides flexibility for specific use cases that need different
|
|
68
|
+
token limits while defaulting to maximum utilization.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
max_tokens: Optional override for max_tokens. If None, uses max_output_tokens
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
ModelSettings configured with specified or maximum tokens
|
|
75
|
+
"""
|
|
76
|
+
return ModelSettings(
|
|
77
|
+
max_tokens=max_tokens if max_tokens is not None else self.max_output_tokens
|
|
78
|
+
)
|
|
79
|
+
|
|
60
80
|
|
|
61
81
|
# Model specifications registry (static metadata)
|
|
62
82
|
MODEL_SPECS: dict[str, ModelSpec] = {
|
|
@@ -125,3 +145,44 @@ class ShotgunConfig(BaseModel):
|
|
|
125
145
|
)
|
|
126
146
|
user_id: str = Field(description="Unique anonymous user identifier")
|
|
127
147
|
config_version: int = Field(default=1, description="Configuration schema version")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def shotgun_model_request(
|
|
151
|
+
model_config: ModelConfig,
|
|
152
|
+
messages: list[ModelMessage],
|
|
153
|
+
max_tokens: int | None = None,
|
|
154
|
+
**kwargs: Any,
|
|
155
|
+
) -> ModelResponse:
|
|
156
|
+
"""Model request wrapper that uses full token capacity by default.
|
|
157
|
+
|
|
158
|
+
This wrapper ensures all LLM calls in Shotgun use the maximum available
|
|
159
|
+
token capacity of each model, improving response quality and completeness.
|
|
160
|
+
The most common issue this fixes is truncated summaries that were cut off
|
|
161
|
+
at default token limits (e.g., 4096 for Claude models).
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
model_config: ModelConfig instance with model settings and API key
|
|
165
|
+
messages: Messages to send to the model
|
|
166
|
+
max_tokens: Optional override for max_tokens. If None, uses model's max_output_tokens
|
|
167
|
+
**kwargs: Additional arguments passed to model_request
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
ModelResponse from the model
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
# Uses full token capacity (e.g., 4096 for Claude, 128k for GPT-5)
|
|
174
|
+
response = await shotgun_model_request(model_config, messages)
|
|
175
|
+
|
|
176
|
+
# Override for specific use case
|
|
177
|
+
response = await shotgun_model_request(model_config, messages, max_tokens=1000)
|
|
178
|
+
"""
|
|
179
|
+
# Get properly configured ModelSettings with maximum or overridden token limit
|
|
180
|
+
model_settings = model_config.get_model_settings(max_tokens)
|
|
181
|
+
|
|
182
|
+
# Make the model request with full token utilization
|
|
183
|
+
return await model_request(
|
|
184
|
+
model=model_config.model_instance,
|
|
185
|
+
messages=messages,
|
|
186
|
+
model_settings=model_settings,
|
|
187
|
+
**kwargs,
|
|
188
|
+
)
|
|
@@ -13,6 +13,11 @@ from pydantic_ai.providers.openai import OpenAIProvider
|
|
|
13
13
|
|
|
14
14
|
from shotgun.logging_config import get_logger
|
|
15
15
|
|
|
16
|
+
from .constants import (
|
|
17
|
+
ANTHROPIC_API_KEY_ENV,
|
|
18
|
+
GEMINI_API_KEY_ENV,
|
|
19
|
+
OPENAI_API_KEY_ENV,
|
|
20
|
+
)
|
|
16
21
|
from .manager import get_config_manager
|
|
17
22
|
from .models import MODEL_SPECS, ModelConfig, ProviderType
|
|
18
23
|
|
|
@@ -86,10 +91,10 @@ def get_provider_model(provider: ProviderType | None = None) -> ModelConfig:
|
|
|
86
91
|
)
|
|
87
92
|
|
|
88
93
|
if provider_enum == ProviderType.OPENAI:
|
|
89
|
-
api_key = _get_api_key(config.openai.api_key,
|
|
94
|
+
api_key = _get_api_key(config.openai.api_key, OPENAI_API_KEY_ENV)
|
|
90
95
|
if not api_key:
|
|
91
96
|
raise ValueError(
|
|
92
|
-
"OpenAI API key not configured. Set via environment variable
|
|
97
|
+
f"OpenAI API key not configured. Set via environment variable {OPENAI_API_KEY_ENV} or config."
|
|
93
98
|
)
|
|
94
99
|
|
|
95
100
|
# Get model spec
|
|
@@ -108,10 +113,10 @@ def get_provider_model(provider: ProviderType | None = None) -> ModelConfig:
|
|
|
108
113
|
)
|
|
109
114
|
|
|
110
115
|
elif provider_enum == ProviderType.ANTHROPIC:
|
|
111
|
-
api_key = _get_api_key(config.anthropic.api_key,
|
|
116
|
+
api_key = _get_api_key(config.anthropic.api_key, ANTHROPIC_API_KEY_ENV)
|
|
112
117
|
if not api_key:
|
|
113
118
|
raise ValueError(
|
|
114
|
-
"Anthropic API key not configured. Set via environment variable
|
|
119
|
+
f"Anthropic API key not configured. Set via environment variable {ANTHROPIC_API_KEY_ENV} or config."
|
|
115
120
|
)
|
|
116
121
|
|
|
117
122
|
# Get model spec
|
|
@@ -130,10 +135,10 @@ def get_provider_model(provider: ProviderType | None = None) -> ModelConfig:
|
|
|
130
135
|
)
|
|
131
136
|
|
|
132
137
|
elif provider_enum == ProviderType.GOOGLE:
|
|
133
|
-
api_key = _get_api_key(config.google.api_key,
|
|
138
|
+
api_key = _get_api_key(config.google.api_key, GEMINI_API_KEY_ENV)
|
|
134
139
|
if not api_key:
|
|
135
140
|
raise ValueError(
|
|
136
|
-
"Gemini API key not configured. Set via environment variable
|
|
141
|
+
f"Gemini API key not configured. Set via environment variable {GEMINI_API_KEY_ENV} or config."
|
|
137
142
|
)
|
|
138
143
|
|
|
139
144
|
# Get model spec
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Conversation compaction utilities."""
|
|
2
|
+
|
|
3
|
+
from pydantic_ai.messages import ModelMessage
|
|
4
|
+
from pydantic_ai.usage import RequestUsage
|
|
5
|
+
|
|
6
|
+
from shotgun.agents.models import AgentDeps
|
|
7
|
+
from shotgun.logging_config import get_logger
|
|
8
|
+
|
|
9
|
+
from .token_estimation import estimate_tokens_from_messages
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def apply_persistent_compaction(
|
|
15
|
+
messages: list[ModelMessage], deps: AgentDeps
|
|
16
|
+
) -> list[ModelMessage]:
|
|
17
|
+
"""Apply compaction to message history for persistent storage.
|
|
18
|
+
|
|
19
|
+
This ensures that compacted history is actually used as the conversation baseline,
|
|
20
|
+
preventing cascading compaction issues across both CLI and TUI usage patterns.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
messages: Full message history from agent run
|
|
24
|
+
deps: Agent dependencies containing model config
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Compacted message history that should be stored as conversation state
|
|
28
|
+
"""
|
|
29
|
+
from .history_processors import token_limit_compactor
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
# Count actual token usage using shared utility
|
|
33
|
+
estimated_tokens = estimate_tokens_from_messages(messages, deps.llm_model)
|
|
34
|
+
|
|
35
|
+
# Create minimal usage info for compaction check
|
|
36
|
+
usage = RequestUsage(
|
|
37
|
+
input_tokens=estimated_tokens,
|
|
38
|
+
output_tokens=0,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Create a minimal context object for compaction
|
|
42
|
+
class MockContext:
|
|
43
|
+
def __init__(self, deps: AgentDeps, usage: RequestUsage | None):
|
|
44
|
+
self.deps = deps
|
|
45
|
+
self.usage = usage
|
|
46
|
+
|
|
47
|
+
ctx = MockContext(deps, usage)
|
|
48
|
+
compacted_messages = await token_limit_compactor(ctx, messages)
|
|
49
|
+
|
|
50
|
+
# Log the result for monitoring
|
|
51
|
+
original_size = len(messages)
|
|
52
|
+
compacted_size = len(compacted_messages)
|
|
53
|
+
|
|
54
|
+
if compacted_size < original_size:
|
|
55
|
+
reduction_pct = ((original_size - compacted_size) / original_size) * 100
|
|
56
|
+
logger.debug(
|
|
57
|
+
f"Persistent compaction applied: {original_size} → {compacted_size} messages "
|
|
58
|
+
f"({reduction_pct:.1f}% reduction)"
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
logger.debug(
|
|
62
|
+
f"No persistent compaction needed: {original_size} messages unchanged"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return compacted_messages
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
# If compaction fails, return original messages
|
|
69
|
+
# This ensures the system remains functional even if compaction has issues
|
|
70
|
+
logger.warning(f"Persistent compaction failed, using original history: {e}")
|
|
71
|
+
return messages
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def should_apply_persistent_compaction(deps: AgentDeps) -> bool:
|
|
75
|
+
"""Check if persistent compaction should be applied.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
deps: Agent dependencies
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
True if persistent compaction should be applied
|
|
82
|
+
"""
|
|
83
|
+
# For now, always apply persistent compaction
|
|
84
|
+
# Future: Add configuration option in deps or environment variable
|
|
85
|
+
return True
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Constants for history processing and compaction."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
# Summary marker for compacted history
|
|
6
|
+
SUMMARY_MARKER = "📌 COMPACTED_HISTORY:"
|
|
7
|
+
|
|
8
|
+
# Token calculation constants
|
|
9
|
+
INPUT_BUFFER_TOKENS = 500
|
|
10
|
+
MIN_SUMMARY_TOKENS = 100
|
|
11
|
+
TOKEN_LIMIT_RATIO = 0.8
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SummaryType(Enum):
|
|
15
|
+
"""Types of summarization requests for logging."""
|
|
16
|
+
|
|
17
|
+
INCREMENTAL = "INCREMENTAL"
|
|
18
|
+
FULL = "FULL"
|
|
19
|
+
CONTEXT_EXTRACTION = "CONTEXT_EXTRACTION"
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Context extraction utilities for history processing."""
|
|
2
|
+
|
|
3
|
+
from pydantic_ai.messages import (
|
|
4
|
+
BuiltinToolCallPart,
|
|
5
|
+
BuiltinToolReturnPart,
|
|
6
|
+
ModelMessage,
|
|
7
|
+
ModelRequest,
|
|
8
|
+
ModelResponse,
|
|
9
|
+
ModelResponsePart,
|
|
10
|
+
RetryPromptPart,
|
|
11
|
+
SystemPromptPart,
|
|
12
|
+
TextPart,
|
|
13
|
+
ThinkingPart,
|
|
14
|
+
ToolCallPart,
|
|
15
|
+
ToolReturnPart,
|
|
16
|
+
UserPromptPart,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_context_from_messages(messages: list[ModelMessage]) -> str:
|
|
21
|
+
"""Extract context from a list of messages for summarization."""
|
|
22
|
+
context = ""
|
|
23
|
+
for msg in messages:
|
|
24
|
+
if isinstance(msg, ModelResponse | ModelRequest):
|
|
25
|
+
for part in msg.parts:
|
|
26
|
+
message_content = extract_context_from_part(part)
|
|
27
|
+
if message_content:
|
|
28
|
+
context += message_content + "\n"
|
|
29
|
+
return context
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def extract_context_from_message_range(
|
|
33
|
+
messages: list[ModelMessage],
|
|
34
|
+
start_index: int,
|
|
35
|
+
end_index: int | None = None,
|
|
36
|
+
) -> str:
|
|
37
|
+
"""Extract context from a specific range of messages."""
|
|
38
|
+
if end_index is None:
|
|
39
|
+
end_index = len(messages)
|
|
40
|
+
|
|
41
|
+
message_slice = messages[start_index:end_index]
|
|
42
|
+
return extract_context_from_messages(message_slice)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def has_meaningful_content(messages: list[ModelMessage]) -> bool:
|
|
46
|
+
"""Check if messages contain meaningful content worth summarizing.
|
|
47
|
+
|
|
48
|
+
Only ModelResponse messages are considered meaningful for summarization.
|
|
49
|
+
User requests alone don't need summarization.
|
|
50
|
+
"""
|
|
51
|
+
for msg in messages:
|
|
52
|
+
if isinstance(msg, ModelResponse):
|
|
53
|
+
for part in msg.parts:
|
|
54
|
+
if extract_context_from_part(part):
|
|
55
|
+
return True
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def extract_context_from_part(
|
|
60
|
+
message_part: (
|
|
61
|
+
SystemPromptPart
|
|
62
|
+
| UserPromptPart
|
|
63
|
+
| ToolReturnPart
|
|
64
|
+
| RetryPromptPart
|
|
65
|
+
| ModelResponsePart
|
|
66
|
+
),
|
|
67
|
+
) -> str:
|
|
68
|
+
"""Extract context from a single message part."""
|
|
69
|
+
if isinstance(message_part, SystemPromptPart):
|
|
70
|
+
return "" # Exclude system prompts from summary
|
|
71
|
+
|
|
72
|
+
elif isinstance(message_part, UserPromptPart):
|
|
73
|
+
if isinstance(message_part.content, str):
|
|
74
|
+
return f"<USER_PROMPT>\n{message_part.content}\n</USER_PROMPT>"
|
|
75
|
+
return ""
|
|
76
|
+
|
|
77
|
+
elif isinstance(message_part, ToolReturnPart):
|
|
78
|
+
return f"<TOOL_RETURN>\n{str(message_part.content)}\n</TOOL_RETURN>"
|
|
79
|
+
|
|
80
|
+
elif isinstance(message_part, RetryPromptPart):
|
|
81
|
+
if isinstance(message_part.content, str):
|
|
82
|
+
return f"<RETRY_PROMPT>\n{message_part.content}\n</RETRY_PROMPT>"
|
|
83
|
+
return ""
|
|
84
|
+
|
|
85
|
+
# Handle ModelResponsePart types
|
|
86
|
+
elif isinstance(message_part, TextPart):
|
|
87
|
+
return f"<ASSISTANT_TEXT>\n{message_part.content}\n</ASSISTANT_TEXT>"
|
|
88
|
+
|
|
89
|
+
elif isinstance(message_part, ToolCallPart):
|
|
90
|
+
if isinstance(message_part.args, dict):
|
|
91
|
+
args_str = ", ".join(f"{k}={repr(v)}" for k, v in message_part.args.items())
|
|
92
|
+
tool_call_str = f"{message_part.tool_name}({args_str})"
|
|
93
|
+
else:
|
|
94
|
+
tool_call_str = f"{message_part.tool_name}({message_part.args})"
|
|
95
|
+
return f"<TOOL_CALL>\n{tool_call_str}\n</TOOL_CALL>"
|
|
96
|
+
|
|
97
|
+
elif isinstance(message_part, BuiltinToolCallPart):
|
|
98
|
+
return f"<BUILTIN_TOOL_CALL>\n{message_part.tool_name}\n</BUILTIN_TOOL_CALL>"
|
|
99
|
+
|
|
100
|
+
elif isinstance(message_part, BuiltinToolReturnPart):
|
|
101
|
+
return (
|
|
102
|
+
f"<BUILTIN_TOOL_RETURN>\n{message_part.tool_name}\n</BUILTIN_TOOL_RETURN>"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
elif isinstance(message_part, ThinkingPart):
|
|
106
|
+
return f"<THINKING>\n{message_part.content}\n</THINKING>"
|
|
107
|
+
|
|
108
|
+
return ""
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Functions for building compacted message history."""
|
|
2
|
+
|
|
3
|
+
from pydantic_ai.messages import (
|
|
4
|
+
ModelMessage,
|
|
5
|
+
ModelRequest,
|
|
6
|
+
ModelRequestPart,
|
|
7
|
+
ModelResponse,
|
|
8
|
+
SystemPromptPart,
|
|
9
|
+
TextPart,
|
|
10
|
+
UserPromptPart,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from .message_utils import (
|
|
14
|
+
get_first_user_request,
|
|
15
|
+
get_last_user_request,
|
|
16
|
+
get_system_prompt,
|
|
17
|
+
get_user_content_from_request,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_clean_compacted_history(
|
|
22
|
+
summary_part: TextPart,
|
|
23
|
+
all_messages: list[ModelMessage],
|
|
24
|
+
last_summary_index: int | None = None,
|
|
25
|
+
) -> list[ModelMessage]:
|
|
26
|
+
"""Build a clean compacted history without preserving old verbose content.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
summary_part: The marked summary part to include
|
|
30
|
+
all_messages: Original message history
|
|
31
|
+
last_summary_index: Index of the last summary (if any)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Clean compacted message history
|
|
35
|
+
"""
|
|
36
|
+
# Extract essential context from pre-summary messages (if any)
|
|
37
|
+
system_prompt = ""
|
|
38
|
+
first_user_prompt = ""
|
|
39
|
+
|
|
40
|
+
if last_summary_index is not None and last_summary_index > 0:
|
|
41
|
+
# Get system and first user from original conversation
|
|
42
|
+
pre_summary_messages = all_messages[:last_summary_index]
|
|
43
|
+
system_prompt = get_system_prompt(pre_summary_messages) or ""
|
|
44
|
+
first_user_prompt = get_first_user_request(pre_summary_messages) or ""
|
|
45
|
+
|
|
46
|
+
# Build the base structure
|
|
47
|
+
compacted_messages: list[ModelMessage] = []
|
|
48
|
+
|
|
49
|
+
# Add system/user context if it exists and is meaningful
|
|
50
|
+
if system_prompt or first_user_prompt:
|
|
51
|
+
compacted_messages.append(
|
|
52
|
+
_create_base_request(system_prompt, first_user_prompt)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Add the summary
|
|
56
|
+
summary_message = ModelResponse(parts=[summary_part])
|
|
57
|
+
compacted_messages.append(summary_message)
|
|
58
|
+
|
|
59
|
+
# Ensure proper ending
|
|
60
|
+
return ensure_ends_with_model_request(compacted_messages, all_messages)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def ensure_ends_with_model_request(
|
|
64
|
+
compacted_messages: list[ModelMessage],
|
|
65
|
+
original_messages: list[ModelMessage],
|
|
66
|
+
) -> list[ModelMessage]:
|
|
67
|
+
"""Ensure the message history ends with ModelRequest for PydanticAI compatibility."""
|
|
68
|
+
last_user_request = get_last_user_request(original_messages)
|
|
69
|
+
|
|
70
|
+
if not last_user_request:
|
|
71
|
+
return compacted_messages
|
|
72
|
+
|
|
73
|
+
# Check if we need to add the last request or restructure
|
|
74
|
+
if compacted_messages and isinstance(compacted_messages[0], ModelRequest):
|
|
75
|
+
first_request = compacted_messages[0]
|
|
76
|
+
last_user_content = get_user_content_from_request(last_user_request)
|
|
77
|
+
first_user_content = get_user_content_from_request(first_request)
|
|
78
|
+
|
|
79
|
+
if last_user_content != first_user_content:
|
|
80
|
+
# Different messages - append the last request
|
|
81
|
+
compacted_messages.append(last_user_request)
|
|
82
|
+
else:
|
|
83
|
+
# Same message - restructure to end with ModelRequest
|
|
84
|
+
if len(compacted_messages) >= 2:
|
|
85
|
+
summary_message = compacted_messages[1] # The summary
|
|
86
|
+
compacted_messages = [summary_message, first_request]
|
|
87
|
+
else:
|
|
88
|
+
# No first request, just add the last one
|
|
89
|
+
compacted_messages.append(last_user_request)
|
|
90
|
+
|
|
91
|
+
return compacted_messages
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _create_base_request(system_prompt: str, user_prompt: str) -> ModelRequest:
|
|
95
|
+
"""Create the base ModelRequest with system and user prompts."""
|
|
96
|
+
parts: list[ModelRequestPart] = []
|
|
97
|
+
|
|
98
|
+
if system_prompt:
|
|
99
|
+
parts.append(SystemPromptPart(content=system_prompt))
|
|
100
|
+
|
|
101
|
+
if user_prompt:
|
|
102
|
+
parts.append(UserPromptPart(content=user_prompt))
|
|
103
|
+
|
|
104
|
+
return ModelRequest(parts=parts)
|