code-puppy 0.0.171__py3-none-any.whl → 0.0.173__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy/agent.py +8 -8
- code_puppy/agents/agent_creator_agent.py +0 -3
- code_puppy/agents/agent_qa_kitten.py +203 -0
- code_puppy/agents/base_agent.py +398 -2
- code_puppy/command_line/command_handler.py +68 -28
- code_puppy/command_line/mcp/add_command.py +2 -2
- code_puppy/command_line/mcp/base.py +1 -1
- code_puppy/command_line/mcp/install_command.py +2 -2
- code_puppy/command_line/mcp/list_command.py +1 -1
- code_puppy/command_line/mcp/search_command.py +1 -1
- code_puppy/command_line/mcp/start_all_command.py +1 -1
- code_puppy/command_line/mcp/status_command.py +2 -2
- code_puppy/command_line/mcp/stop_all_command.py +1 -1
- code_puppy/command_line/mcp/utils.py +1 -1
- code_puppy/command_line/mcp/wizard_utils.py +2 -2
- code_puppy/config.py +141 -12
- code_puppy/http_utils.py +50 -24
- code_puppy/main.py +2 -1
- code_puppy/{mcp → mcp_}/config_wizard.py +1 -1
- code_puppy/{mcp → mcp_}/examples/retry_example.py +1 -1
- code_puppy/{mcp → mcp_}/managed_server.py +1 -1
- code_puppy/{mcp → mcp_}/server_registry_catalog.py +1 -3
- code_puppy/message_history_processor.py +83 -221
- code_puppy/messaging/message_queue.py +4 -4
- code_puppy/state_management.py +1 -100
- code_puppy/tools/__init__.py +103 -6
- code_puppy/tools/browser/__init__.py +0 -0
- code_puppy/tools/browser/browser_control.py +293 -0
- code_puppy/tools/browser/browser_interactions.py +552 -0
- code_puppy/tools/browser/browser_locators.py +642 -0
- code_puppy/tools/browser/browser_navigation.py +251 -0
- code_puppy/tools/browser/browser_screenshot.py +242 -0
- code_puppy/tools/browser/browser_scripts.py +478 -0
- code_puppy/tools/browser/browser_workflows.py +196 -0
- code_puppy/tools/browser/camoufox_manager.py +194 -0
- code_puppy/tools/browser/vqa_agent.py +66 -0
- code_puppy/tools/browser_control.py +293 -0
- code_puppy/tools/browser_interactions.py +552 -0
- code_puppy/tools/browser_locators.py +642 -0
- code_puppy/tools/browser_navigation.py +251 -0
- code_puppy/tools/browser_screenshot.py +278 -0
- code_puppy/tools/browser_scripts.py +478 -0
- code_puppy/tools/browser_workflows.py +215 -0
- code_puppy/tools/camoufox_manager.py +150 -0
- code_puppy/tools/command_runner.py +13 -8
- code_puppy/tools/file_operations.py +7 -7
- code_puppy/tui/app.py +1 -1
- code_puppy/tui/components/custom_widgets.py +1 -1
- code_puppy/tui/screens/mcp_install_wizard.py +8 -8
- code_puppy/tui_state.py +55 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/METADATA +3 -1
- code_puppy-0.0.173.dist-info/RECORD +132 -0
- code_puppy-0.0.171.dist-info/RECORD +0 -112
- /code_puppy/{mcp → mcp_}/__init__.py +0 -0
- /code_puppy/{mcp → mcp_}/async_lifecycle.py +0 -0
- /code_puppy/{mcp → mcp_}/blocking_startup.py +0 -0
- /code_puppy/{mcp → mcp_}/captured_stdio_server.py +0 -0
- /code_puppy/{mcp → mcp_}/circuit_breaker.py +0 -0
- /code_puppy/{mcp → mcp_}/dashboard.py +0 -0
- /code_puppy/{mcp → mcp_}/error_isolation.py +0 -0
- /code_puppy/{mcp → mcp_}/health_monitor.py +0 -0
- /code_puppy/{mcp → mcp_}/manager.py +0 -0
- /code_puppy/{mcp → mcp_}/registry.py +0 -0
- /code_puppy/{mcp → mcp_}/retry_manager.py +0 -0
- /code_puppy/{mcp → mcp_}/status_tracker.py +0 -0
- /code_puppy/{mcp → mcp_}/system_tools.py +0 -0
- {code_puppy-0.0.171.data → code_puppy-0.0.173.data}/data/code_puppy/models.json +0 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/WHEEL +0 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/entry_points.txt +0 -0
- {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/licenses/LICENSE +0 -0
code_puppy/agent.py
CHANGED
|
@@ -7,10 +7,7 @@ from pydantic_ai import Agent
|
|
|
7
7
|
from pydantic_ai.settings import ModelSettings
|
|
8
8
|
from pydantic_ai.usage import UsageLimits
|
|
9
9
|
|
|
10
|
-
from code_puppy.message_history_processor import
|
|
11
|
-
get_model_context_length,
|
|
12
|
-
message_history_accumulator,
|
|
13
|
-
)
|
|
10
|
+
from code_puppy.message_history_processor import message_history_accumulator
|
|
14
11
|
from code_puppy.messaging.message_queue import (
|
|
15
12
|
emit_error,
|
|
16
13
|
emit_info,
|
|
@@ -48,7 +45,7 @@ _code_generation_agent = None
|
|
|
48
45
|
def _load_mcp_servers(extra_headers: Optional[Dict[str, str]] = None):
|
|
49
46
|
"""Load MCP servers using the new manager while maintaining backward compatibility."""
|
|
50
47
|
from code_puppy.config import get_value, load_mcp_server_configs
|
|
51
|
-
from code_puppy.
|
|
48
|
+
from code_puppy.mcp_ import ServerConfig, get_mcp_manager
|
|
52
49
|
|
|
53
50
|
# Check if MCP servers are disabled
|
|
54
51
|
mcp_disabled = get_value("disable_mcp_servers")
|
|
@@ -113,7 +110,7 @@ def _load_mcp_servers(extra_headers: Optional[Dict[str, str]] = None):
|
|
|
113
110
|
|
|
114
111
|
def reload_mcp_servers():
|
|
115
112
|
"""Reload MCP servers without restarting the agent."""
|
|
116
|
-
from code_puppy.
|
|
113
|
+
from code_puppy.mcp_ import get_mcp_manager
|
|
117
114
|
|
|
118
115
|
manager = get_mcp_manager()
|
|
119
116
|
# Reload configurations
|
|
@@ -167,14 +164,17 @@ def reload_code_generation_agent(message_group: str | None):
|
|
|
167
164
|
|
|
168
165
|
# Configure model settings with max_tokens if set
|
|
169
166
|
model_settings_dict = {"seed": 42}
|
|
170
|
-
|
|
167
|
+
# Get current agent to use its method
|
|
168
|
+
from code_puppy.agents import get_current_agent_config
|
|
169
|
+
current_agent = get_current_agent_config()
|
|
170
|
+
output_tokens = max(2048, min(int(0.05 * current_agent.get_model_context_length()) - 1024, 16384))
|
|
171
171
|
console.print(f"Max output tokens per message: {output_tokens}")
|
|
172
172
|
model_settings_dict["max_tokens"] = output_tokens
|
|
173
173
|
|
|
174
174
|
|
|
175
175
|
model_settings = ModelSettings(**model_settings_dict)
|
|
176
176
|
if "gpt-5" in model_name:
|
|
177
|
-
model_settings_dict["openai_reasoning_effort"] = "
|
|
177
|
+
model_settings_dict["openai_reasoning_effort"] = "off"
|
|
178
178
|
model_settings_dict["extra_body"] = {
|
|
179
179
|
"verbosity": "low"
|
|
180
180
|
}
|
|
@@ -245,9 +245,6 @@ Best-practice guidelines for `invoke_agent`:
|
|
|
245
245
|
Your solutions should be production-ready, maintainable, and follow best practices for the chosen language.
|
|
246
246
|
|
|
247
247
|
Return your final response as a string output
|
|
248
|
-
Walmart specific rules:
|
|
249
|
-
- You are operating inside Walmart Global Tech! Yay!
|
|
250
|
-
- Always use uv when working with python, and always use --index-url https://pypi.ci.artifacts.walmart.com/artifactory/api/pypi/external-pypi/simple
|
|
251
248
|
|
|
252
249
|
## Tool Templates:
|
|
253
250
|
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Quality Assurance Kitten - Playwright-powered browser automation agent."""
|
|
2
|
+
|
|
3
|
+
from .base_agent import BaseAgent
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class QualityAssuranceKittenAgent(BaseAgent):
|
|
7
|
+
"""Quality Assurance Kitten - Advanced browser automation with Playwright."""
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def name(self) -> str:
|
|
11
|
+
return "qa-kitten"
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def display_name(self) -> str:
|
|
15
|
+
return "Quality Assurance Kitten 🐱"
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def description(self) -> str:
|
|
19
|
+
return "Advanced web browser automation and quality assurance testing using Playwright with VQA capabilities"
|
|
20
|
+
|
|
21
|
+
def get_available_tools(self) -> list[str]:
|
|
22
|
+
"""Get the list of tools available to Web Browser Puppy."""
|
|
23
|
+
return [
|
|
24
|
+
# Core agent tools
|
|
25
|
+
"agent_share_your_reasoning",
|
|
26
|
+
# Browser control and initialization
|
|
27
|
+
"browser_initialize",
|
|
28
|
+
"browser_close",
|
|
29
|
+
"browser_status",
|
|
30
|
+
"browser_new_page",
|
|
31
|
+
"browser_list_pages",
|
|
32
|
+
# Browser navigation
|
|
33
|
+
"browser_navigate",
|
|
34
|
+
"browser_get_page_info",
|
|
35
|
+
"browser_go_back",
|
|
36
|
+
"browser_go_forward",
|
|
37
|
+
"browser_reload",
|
|
38
|
+
"browser_wait_for_load",
|
|
39
|
+
# Element discovery (semantic locators preferred)
|
|
40
|
+
"browser_find_by_role",
|
|
41
|
+
"browser_find_by_text",
|
|
42
|
+
"browser_find_by_label",
|
|
43
|
+
"browser_find_by_placeholder",
|
|
44
|
+
"browser_find_by_test_id",
|
|
45
|
+
"browser_find_buttons",
|
|
46
|
+
"browser_find_links",
|
|
47
|
+
"browser_xpath_query", # Fallback when semantic locators fail
|
|
48
|
+
# Element interactions
|
|
49
|
+
"browser_click",
|
|
50
|
+
"browser_double_click",
|
|
51
|
+
"browser_hover",
|
|
52
|
+
"browser_set_text",
|
|
53
|
+
"browser_get_text",
|
|
54
|
+
"browser_get_value",
|
|
55
|
+
"browser_select_option",
|
|
56
|
+
"browser_check",
|
|
57
|
+
"browser_uncheck",
|
|
58
|
+
# Advanced features
|
|
59
|
+
"browser_execute_js",
|
|
60
|
+
"browser_scroll",
|
|
61
|
+
"browser_scroll_to_element",
|
|
62
|
+
"browser_set_viewport",
|
|
63
|
+
"browser_wait_for_element",
|
|
64
|
+
"browser_highlight_element",
|
|
65
|
+
"browser_clear_highlights",
|
|
66
|
+
# Screenshots and VQA
|
|
67
|
+
"browser_screenshot_analyze",
|
|
68
|
+
# Workflow management
|
|
69
|
+
"browser_save_workflow",
|
|
70
|
+
"browser_list_workflows",
|
|
71
|
+
"browser_read_workflow",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
def get_system_prompt(self) -> str:
|
|
75
|
+
"""Get Web Browser Puppy's specialized system prompt."""
|
|
76
|
+
return """
|
|
77
|
+
You are Quality Assurance Kitten 🐱, an advanced autonomous browser automation and QA testing agent powered by Playwright!
|
|
78
|
+
|
|
79
|
+
You specialize in:
|
|
80
|
+
🎯 **Quality Assurance Testing** - automated testing of web applications and user workflows
|
|
81
|
+
👁️ **Visual verification** - taking screenshots and analyzing page content for bugs
|
|
82
|
+
🔍 **Element discovery** - finding elements using semantic locators and accessibility best practices
|
|
83
|
+
📝 **Data extraction** - scraping content and gathering information from web pages
|
|
84
|
+
🧪 **Web automation** - filling forms, clicking buttons, navigating sites with precision
|
|
85
|
+
🐛 **Bug detection** - identifying UI issues, broken functionality, and accessibility problems
|
|
86
|
+
|
|
87
|
+
## Core Workflow Philosophy
|
|
88
|
+
|
|
89
|
+
For any browser task, follow this approach:
|
|
90
|
+
1. **Check Existing Workflows**: Use browser_list_workflows to see if similar tasks have been solved before
|
|
91
|
+
2. **Learn from History**: If relevant workflows exist, use browser_read_workflow to review proven strategies
|
|
92
|
+
3. **Plan & Reason**: Use share_your_reasoning to break down complex tasks and explain your approach
|
|
93
|
+
4. **Initialize**: Always start with browser_initialize if browser isn't running
|
|
94
|
+
5. **Navigate**: Use browser_navigate to reach the target page
|
|
95
|
+
6. **Discover**: Use semantic locators (PREFERRED) for element discovery
|
|
96
|
+
7. **Verify**: Use highlighting and screenshots to confirm elements
|
|
97
|
+
8. **Act**: Interact with elements through clicks, typing, etc.
|
|
98
|
+
9. **Validate**: Take screenshots or query DOM to verify actions worked
|
|
99
|
+
10. **Document Success**: Use browser_save_workflow to save successful patterns for future reuse
|
|
100
|
+
|
|
101
|
+
## Tool Usage Guidelines
|
|
102
|
+
|
|
103
|
+
### Browser Initialization
|
|
104
|
+
- **ALWAYS call browser_initialize first** before any other browser operations
|
|
105
|
+
- Choose appropriate settings: headless=False for debugging, headless=True for production
|
|
106
|
+
- Use browser_status to check current state
|
|
107
|
+
|
|
108
|
+
### Element Discovery Best Practices (ACCESSIBILITY FIRST! 🌟)
|
|
109
|
+
- **PREFER semantic locators** - they're more reliable and follow accessibility standards
|
|
110
|
+
- Priority order:
|
|
111
|
+
1. browser_find_by_role (button, link, textbox, heading, etc.)
|
|
112
|
+
2. browser_find_by_label (for form inputs)
|
|
113
|
+
3. browser_find_by_text (for visible text)
|
|
114
|
+
4. browser_find_by_placeholder (for input hints)
|
|
115
|
+
5. browser_find_by_test_id (for test-friendly elements)
|
|
116
|
+
6. browser_xpath_query (ONLY as last resort)
|
|
117
|
+
|
|
118
|
+
### Visual Verification Workflow
|
|
119
|
+
- **Before critical actions**: Use browser_highlight_element to visually confirm
|
|
120
|
+
- **After interactions**: Use browser_screenshot_analyze to verify results
|
|
121
|
+
- **VQA questions**: Ask specific, actionable questions like "Is the login button highlighted?"
|
|
122
|
+
|
|
123
|
+
### Form Input Best Practices
|
|
124
|
+
- **ALWAYS check current values** with browser_get_value before typing
|
|
125
|
+
- Use browser_get_value after typing to verify success
|
|
126
|
+
- This prevents typing loops and gives clear visibility into form state
|
|
127
|
+
- Clear fields when appropriate before entering new text
|
|
128
|
+
|
|
129
|
+
### Error Handling & Troubleshooting
|
|
130
|
+
|
|
131
|
+
**When Element Discovery Fails:**
|
|
132
|
+
1. Try different semantic locators first
|
|
133
|
+
2. Use browser_find_buttons or browser_find_links to see available elements
|
|
134
|
+
3. Take a screenshot with browser_screenshot_analyze to understand the page layout
|
|
135
|
+
4. Only use XPath as absolute last resort
|
|
136
|
+
|
|
137
|
+
**When Page Interactions Fail:**
|
|
138
|
+
1. Check if element is visible with browser_wait_for_element
|
|
139
|
+
2. Scroll element into view with browser_scroll_to_element
|
|
140
|
+
3. Use browser_highlight_element to confirm element location
|
|
141
|
+
4. Try browser_execute_js for complex interactions
|
|
142
|
+
|
|
143
|
+
### JavaScript Execution
|
|
144
|
+
- Use browser_execute_js for:
|
|
145
|
+
- Complex page state checks
|
|
146
|
+
- Custom scrolling behavior
|
|
147
|
+
- Triggering events that standard tools can't handle
|
|
148
|
+
- Accessing browser APIs
|
|
149
|
+
|
|
150
|
+
### Workflow Management 📋
|
|
151
|
+
|
|
152
|
+
**ALWAYS start new tasks by checking for existing workflows!**
|
|
153
|
+
|
|
154
|
+
**At the beginning of any automation task:**
|
|
155
|
+
1. **browser_list_workflows** - Check what workflows are already available
|
|
156
|
+
2. **browser_read_workflow** - If you find a relevant workflow, read it to understand the proven approach
|
|
157
|
+
3. Adapt and apply the successful patterns from existing workflows
|
|
158
|
+
|
|
159
|
+
**When to save workflows:**
|
|
160
|
+
- After successfully completing a complex multi-step task
|
|
161
|
+
- When you discover a reliable pattern for a common website interaction
|
|
162
|
+
- After troubleshooting and finding working solutions for tricky elements
|
|
163
|
+
- Include both the successful steps AND the challenges/solutions you encountered
|
|
164
|
+
|
|
165
|
+
**Workflow naming conventions:**
|
|
166
|
+
- Use descriptive names like "search_and_atc_walmart", "login_to_github", "fill_contact_form"
|
|
167
|
+
- Include the website domain for clarity
|
|
168
|
+
- Focus on the main goal/outcome
|
|
169
|
+
|
|
170
|
+
**What to include in saved workflows:**
|
|
171
|
+
- Step-by-step tool usage with specific parameters
|
|
172
|
+
- Element discovery strategies that worked
|
|
173
|
+
- Common pitfalls and how to avoid them
|
|
174
|
+
- Alternative approaches for edge cases
|
|
175
|
+
- Tips for handling dynamic content
|
|
176
|
+
|
|
177
|
+
### Performance & Best Practices
|
|
178
|
+
- Use appropriate timeouts for element discovery (default 10s is usually fine)
|
|
179
|
+
- Take screenshots strategically - not after every single action
|
|
180
|
+
- Use browser_wait_for_load when navigating to ensure pages are ready
|
|
181
|
+
- Clear highlights when done for clean visual state
|
|
182
|
+
|
|
183
|
+
## Specialized Capabilities
|
|
184
|
+
|
|
185
|
+
🌐 **WCAG 2.2 Level AA Compliance**: Always prioritize accessibility in element discovery
|
|
186
|
+
📸 **Visual Question Answering**: Use browser_screenshot_analyze for intelligent page analysis
|
|
187
|
+
🚀 **Semantic Web Navigation**: Prefer role-based and label-based element discovery
|
|
188
|
+
⚡ **Playwright Power**: Full access to modern browser automation capabilities
|
|
189
|
+
📋 **Workflow Management**: Save, load, and reuse automation patterns for consistency
|
|
190
|
+
|
|
191
|
+
## Important Rules
|
|
192
|
+
|
|
193
|
+
- **ALWAYS check for existing workflows first** - Use browser_list_workflows at the start of new tasks
|
|
194
|
+
- **ALWAYS use browser_initialize before any browser operations**
|
|
195
|
+
- **PREFER semantic locators over XPath** - they're more maintainable and accessible
|
|
196
|
+
- **Use visual verification for critical actions** - highlight elements and take screenshots
|
|
197
|
+
- **Be explicit about your reasoning** - use share_your_reasoning for complex workflows
|
|
198
|
+
- **Handle errors gracefully** - provide helpful debugging information
|
|
199
|
+
- **Follow accessibility best practices** - your automation should work for everyone
|
|
200
|
+
- **Document your successes** - Save working patterns with browser_save_workflow for future reuse
|
|
201
|
+
|
|
202
|
+
Your browser automation should be reliable, maintainable, and accessible. You are a meticulous QA engineer who catches bugs before users do! 🐱✨
|
|
203
|
+
"""
|
code_puppy/agents/base_agent.py
CHANGED
|
@@ -1,13 +1,26 @@
|
|
|
1
1
|
"""Base agent configuration class for defining agent properties."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
4
|
+
import queue
|
|
3
5
|
import uuid
|
|
4
6
|
from abc import ABC, abstractmethod
|
|
5
|
-
from typing import Any, Dict, List, Optional, Set
|
|
7
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
8
|
+
|
|
9
|
+
import pydantic
|
|
10
|
+
from pydantic_ai.messages import (
|
|
11
|
+
ModelMessage,
|
|
12
|
+
ModelRequest,
|
|
13
|
+
TextPart,
|
|
14
|
+
ToolCallPart,
|
|
15
|
+
ToolCallPartDelta,
|
|
16
|
+
ToolReturn,
|
|
17
|
+
ToolReturnPart,
|
|
18
|
+
)
|
|
6
19
|
|
|
7
20
|
|
|
8
21
|
class BaseAgent(ABC):
|
|
9
22
|
"""Base class for all agent configurations."""
|
|
10
|
-
|
|
23
|
+
|
|
11
24
|
def __init__(self):
|
|
12
25
|
self.id = str(uuid.uuid4())
|
|
13
26
|
self._message_history: List[Any] = []
|
|
@@ -114,3 +127,386 @@ class BaseAgent(ABC):
|
|
|
114
127
|
message_hash: Hash of a message that has been compacted/summarized.
|
|
115
128
|
"""
|
|
116
129
|
self._compacted_message_hashes.add(message_hash)
|
|
130
|
+
|
|
131
|
+
def get_model_name(self) -> Optional[str]:
|
|
132
|
+
"""Get pinned model name for this agent, if specified.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Model name to use for this agent, or None to use global default.
|
|
136
|
+
"""
|
|
137
|
+
from ..config import get_agent_pinned_model
|
|
138
|
+
return get_agent_pinned_model(self.name)
|
|
139
|
+
|
|
140
|
+
# Message history processing methods (moved from state_management.py and message_history_processor.py)
|
|
141
|
+
def _stringify_part(self, part: Any) -> str:
|
|
142
|
+
"""Create a stable string representation for a message part.
|
|
143
|
+
|
|
144
|
+
We deliberately ignore timestamps so identical content hashes the same even when
|
|
145
|
+
emitted at different times. This prevents status updates from blowing up the
|
|
146
|
+
history when they are repeated with new timestamps."""
|
|
147
|
+
|
|
148
|
+
attributes: List[str] = [part.__class__.__name__]
|
|
149
|
+
|
|
150
|
+
# Role/instructions help disambiguate parts that otherwise share content
|
|
151
|
+
if hasattr(part, "role") and part.role:
|
|
152
|
+
attributes.append(f"role={part.role}")
|
|
153
|
+
if hasattr(part, "instructions") and part.instructions:
|
|
154
|
+
attributes.append(f"instructions={part.instructions}")
|
|
155
|
+
|
|
156
|
+
if hasattr(part, "tool_call_id") and part.tool_call_id:
|
|
157
|
+
attributes.append(f"tool_call_id={part.tool_call_id}")
|
|
158
|
+
|
|
159
|
+
if hasattr(part, "tool_name") and part.tool_name:
|
|
160
|
+
attributes.append(f"tool_name={part.tool_name}")
|
|
161
|
+
|
|
162
|
+
content = getattr(part, "content", None)
|
|
163
|
+
if content is None:
|
|
164
|
+
attributes.append("content=None")
|
|
165
|
+
elif isinstance(content, str):
|
|
166
|
+
attributes.append(f"content={content}")
|
|
167
|
+
elif isinstance(content, pydantic.BaseModel):
|
|
168
|
+
attributes.append(f"content={json.dumps(content.model_dump(), sort_keys=True)}")
|
|
169
|
+
elif isinstance(content, dict):
|
|
170
|
+
attributes.append(f"content={json.dumps(content, sort_keys=True)}")
|
|
171
|
+
else:
|
|
172
|
+
attributes.append(f"content={repr(content)}")
|
|
173
|
+
result = "|".join(attributes)
|
|
174
|
+
return result
|
|
175
|
+
|
|
176
|
+
def hash_message(self, message: Any) -> int:
|
|
177
|
+
"""Create a stable hash for a model message that ignores timestamps."""
|
|
178
|
+
role = getattr(message, "role", None)
|
|
179
|
+
instructions = getattr(message, "instructions", None)
|
|
180
|
+
header_bits: List[str] = []
|
|
181
|
+
if role:
|
|
182
|
+
header_bits.append(f"role={role}")
|
|
183
|
+
if instructions:
|
|
184
|
+
header_bits.append(f"instructions={instructions}")
|
|
185
|
+
|
|
186
|
+
part_strings = [self._stringify_part(part) for part in getattr(message, "parts", [])]
|
|
187
|
+
canonical = "||".join(header_bits + part_strings)
|
|
188
|
+
return hash(canonical)
|
|
189
|
+
|
|
190
|
+
def stringify_message_part(self, part) -> str:
|
|
191
|
+
"""
|
|
192
|
+
Convert a message part to a string representation for token estimation or other uses.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
part: A message part that may contain content or be a tool call
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
String representation of the message part
|
|
199
|
+
"""
|
|
200
|
+
result = ""
|
|
201
|
+
if hasattr(part, "part_kind"):
|
|
202
|
+
result += part.part_kind + ": "
|
|
203
|
+
else:
|
|
204
|
+
result += str(type(part)) + ": "
|
|
205
|
+
|
|
206
|
+
# Handle content
|
|
207
|
+
if hasattr(part, "content") and part.content:
|
|
208
|
+
# Handle different content types
|
|
209
|
+
if isinstance(part.content, str):
|
|
210
|
+
result = part.content
|
|
211
|
+
elif isinstance(part.content, pydantic.BaseModel):
|
|
212
|
+
result = json.dumps(part.content.model_dump())
|
|
213
|
+
elif isinstance(part.content, dict):
|
|
214
|
+
result = json.dumps(part.content)
|
|
215
|
+
else:
|
|
216
|
+
result = str(part.content)
|
|
217
|
+
|
|
218
|
+
# Handle tool calls which may have additional token costs
|
|
219
|
+
# If part also has content, we'll process tool calls separately
|
|
220
|
+
if hasattr(part, "tool_name") and part.tool_name:
|
|
221
|
+
# Estimate tokens for tool name and parameters
|
|
222
|
+
tool_text = part.tool_name
|
|
223
|
+
if hasattr(part, "args"):
|
|
224
|
+
tool_text += f" {str(part.args)}"
|
|
225
|
+
result += tool_text
|
|
226
|
+
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
def estimate_tokens_for_message(self, message: ModelMessage) -> int:
|
|
230
|
+
"""
|
|
231
|
+
Estimate the number of tokens in a message using len(message) - 4.
|
|
232
|
+
Simple and fast replacement for tiktoken.
|
|
233
|
+
"""
|
|
234
|
+
total_tokens = 0
|
|
235
|
+
|
|
236
|
+
for part in message.parts:
|
|
237
|
+
part_str = self.stringify_message_part(part)
|
|
238
|
+
if part_str:
|
|
239
|
+
total_tokens += len(part_str)
|
|
240
|
+
|
|
241
|
+
return int(max(1, total_tokens) / 4)
|
|
242
|
+
|
|
243
|
+
def _is_tool_call_part(self, part: Any) -> bool:
|
|
244
|
+
if isinstance(part, (ToolCallPart, ToolCallPartDelta)):
|
|
245
|
+
return True
|
|
246
|
+
|
|
247
|
+
part_kind = (getattr(part, "part_kind", "") or "").replace("_", "-")
|
|
248
|
+
if part_kind == "tool-call":
|
|
249
|
+
return True
|
|
250
|
+
|
|
251
|
+
has_tool_name = getattr(part, "tool_name", None) is not None
|
|
252
|
+
has_args = getattr(part, "args", None) is not None
|
|
253
|
+
has_args_delta = getattr(part, "args_delta", None) is not None
|
|
254
|
+
|
|
255
|
+
return bool(has_tool_name and (has_args or has_args_delta))
|
|
256
|
+
|
|
257
|
+
def _is_tool_return_part(self, part: Any) -> bool:
|
|
258
|
+
if isinstance(part, (ToolReturnPart, ToolReturn)):
|
|
259
|
+
return True
|
|
260
|
+
|
|
261
|
+
part_kind = (getattr(part, "part_kind", "") or "").replace("_", "-")
|
|
262
|
+
if part_kind in {"tool-return", "tool-result"}:
|
|
263
|
+
return True
|
|
264
|
+
|
|
265
|
+
if getattr(part, "tool_call_id", None) is None:
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
has_content = getattr(part, "content", None) is not None
|
|
269
|
+
has_content_delta = getattr(part, "content_delta", None) is not None
|
|
270
|
+
return bool(has_content or has_content_delta)
|
|
271
|
+
|
|
272
|
+
def filter_huge_messages(self, messages: List[ModelMessage]) -> List[ModelMessage]:
|
|
273
|
+
if not messages:
|
|
274
|
+
return []
|
|
275
|
+
|
|
276
|
+
# Never drop the system prompt, even if it is extremely large.
|
|
277
|
+
system_message, *rest = messages
|
|
278
|
+
filtered_rest = [
|
|
279
|
+
m for m in rest if self.estimate_tokens_for_message(m) < 50000
|
|
280
|
+
]
|
|
281
|
+
return [system_message] + filtered_rest
|
|
282
|
+
|
|
283
|
+
def split_messages_for_protected_summarization(
|
|
284
|
+
self,
|
|
285
|
+
messages: List[ModelMessage],
|
|
286
|
+
) -> Tuple[List[ModelMessage], List[ModelMessage]]:
|
|
287
|
+
"""
|
|
288
|
+
Split messages into two groups: messages to summarize and protected recent messages.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Tuple of (messages_to_summarize, protected_messages)
|
|
292
|
+
|
|
293
|
+
The protected_messages are the most recent messages that total up to the configured protected token count.
|
|
294
|
+
The system message (first message) is always protected.
|
|
295
|
+
All other messages that don't fit in the protected zone will be summarized.
|
|
296
|
+
"""
|
|
297
|
+
if len(messages) <= 1: # Just system message or empty
|
|
298
|
+
return [], messages
|
|
299
|
+
|
|
300
|
+
# Always protect the system message (first message)
|
|
301
|
+
system_message = messages[0]
|
|
302
|
+
system_tokens = self.estimate_tokens_for_message(system_message)
|
|
303
|
+
|
|
304
|
+
if len(messages) == 1:
|
|
305
|
+
return [], messages
|
|
306
|
+
|
|
307
|
+
# Get the configured protected token count
|
|
308
|
+
from ..config import get_protected_token_count
|
|
309
|
+
protected_tokens_limit = get_protected_token_count()
|
|
310
|
+
|
|
311
|
+
# Calculate tokens for messages from most recent backwards (excluding system message)
|
|
312
|
+
protected_messages = []
|
|
313
|
+
protected_token_count = system_tokens # Start with system message tokens
|
|
314
|
+
|
|
315
|
+
# Go backwards through non-system messages to find protected zone
|
|
316
|
+
for i in range(len(messages) - 1, 0, -1): # Stop at 1, not 0 (skip system message)
|
|
317
|
+
message = messages[i]
|
|
318
|
+
message_tokens = self.estimate_tokens_for_message(message)
|
|
319
|
+
|
|
320
|
+
# If adding this message would exceed protected tokens, stop here
|
|
321
|
+
if protected_token_count + message_tokens > protected_tokens_limit:
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
protected_messages.append(message)
|
|
325
|
+
protected_token_count += message_tokens
|
|
326
|
+
|
|
327
|
+
# Messages that were added while scanning backwards are currently in reverse order.
|
|
328
|
+
# Reverse them to restore chronological ordering, then prepend the system prompt.
|
|
329
|
+
protected_messages.reverse()
|
|
330
|
+
protected_messages.insert(0, system_message)
|
|
331
|
+
|
|
332
|
+
# Messages to summarize are everything between the system message and the
|
|
333
|
+
# protected tail zone we just constructed.
|
|
334
|
+
protected_start_idx = max(1, len(messages) - (len(protected_messages) - 1))
|
|
335
|
+
messages_to_summarize = messages[1:protected_start_idx]
|
|
336
|
+
|
|
337
|
+
# Emit info messages
|
|
338
|
+
from ..messaging import emit_info
|
|
339
|
+
emit_info(
|
|
340
|
+
f"🔒 Protecting {len(protected_messages)} recent messages ({protected_token_count} tokens, limit: {protected_tokens_limit})"
|
|
341
|
+
)
|
|
342
|
+
emit_info(f"📝 Summarizing {len(messages_to_summarize)} older messages")
|
|
343
|
+
|
|
344
|
+
return messages_to_summarize, protected_messages
|
|
345
|
+
|
|
346
|
+
def summarize_messages(
|
|
347
|
+
self,
|
|
348
|
+
messages: List[ModelMessage],
|
|
349
|
+
with_protection: bool = True
|
|
350
|
+
) -> Tuple[List[ModelMessage], List[ModelMessage]]:
|
|
351
|
+
"""
|
|
352
|
+
Summarize messages while protecting recent messages up to PROTECTED_TOKENS.
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Tuple of (compacted_messages, summarized_source_messages)
|
|
356
|
+
where compacted_messages always preserves the original system message
|
|
357
|
+
as the first entry.
|
|
358
|
+
"""
|
|
359
|
+
messages_to_summarize: List[ModelMessage]
|
|
360
|
+
protected_messages: List[ModelMessage]
|
|
361
|
+
|
|
362
|
+
if with_protection:
|
|
363
|
+
messages_to_summarize, protected_messages = (
|
|
364
|
+
self.split_messages_for_protected_summarization(messages)
|
|
365
|
+
)
|
|
366
|
+
else:
|
|
367
|
+
messages_to_summarize = messages[1:] if messages else []
|
|
368
|
+
protected_messages = messages[:1]
|
|
369
|
+
|
|
370
|
+
if not messages:
|
|
371
|
+
return [], []
|
|
372
|
+
|
|
373
|
+
system_message = messages[0]
|
|
374
|
+
|
|
375
|
+
if not messages_to_summarize:
|
|
376
|
+
# Nothing to summarize, so just return the original sequence
|
|
377
|
+
return self.prune_interrupted_tool_calls(messages), []
|
|
378
|
+
|
|
379
|
+
instructions = (
|
|
380
|
+
"The input will be a log of Agentic AI steps that have been taken"
|
|
381
|
+
" as well as user queries, etc. Summarize the contents of these steps."
|
|
382
|
+
" The high level details should remain but the bulk of the content from tool-call"
|
|
383
|
+
" responses should be compacted and summarized. For example if you see a tool-call"
|
|
384
|
+
" reading a file, and the file contents are large, then in your summary you might just"
|
|
385
|
+
" write: * used read_file on space_invaders.cpp - contents removed."
|
|
386
|
+
"\n Make sure your result is a bulleted list of all steps and interactions."
|
|
387
|
+
"\n\nNOTE: This summary represents older conversation history. Recent messages are preserved separately."
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
from ..summarization_agent import run_summarization_sync
|
|
392
|
+
new_messages = run_summarization_sync(
|
|
393
|
+
instructions, message_history=messages_to_summarize
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
if not isinstance(new_messages, list):
|
|
397
|
+
from ..messaging import emit_warning
|
|
398
|
+
emit_warning(
|
|
399
|
+
"Summarization agent returned non-list output; wrapping into message request"
|
|
400
|
+
)
|
|
401
|
+
new_messages = [ModelRequest([TextPart(str(new_messages))])]
|
|
402
|
+
|
|
403
|
+
compacted: List[ModelMessage] = [system_message] + list(new_messages)
|
|
404
|
+
|
|
405
|
+
# Drop the system message from protected_messages because we already included it
|
|
406
|
+
protected_tail = [msg for msg in protected_messages if msg is not system_message]
|
|
407
|
+
|
|
408
|
+
compacted.extend(protected_tail)
|
|
409
|
+
|
|
410
|
+
return self.prune_interrupted_tool_calls(compacted), messages_to_summarize
|
|
411
|
+
except Exception as e:
|
|
412
|
+
from ..messaging import emit_error
|
|
413
|
+
emit_error(f"Summarization failed during compaction: {e}")
|
|
414
|
+
return messages, [] # Return original messages on failure
|
|
415
|
+
|
|
416
|
+
def summarize_message(self, message: ModelMessage) -> ModelMessage:
|
|
417
|
+
try:
|
|
418
|
+
# If the message looks like a system/instructions message, skip summarization
|
|
419
|
+
instructions = getattr(message, "instructions", None)
|
|
420
|
+
if instructions:
|
|
421
|
+
return message
|
|
422
|
+
# If any part is a tool call, skip summarization
|
|
423
|
+
for part in message.parts:
|
|
424
|
+
if isinstance(part, ToolCallPart) or getattr(part, "tool_name", None):
|
|
425
|
+
return message
|
|
426
|
+
# Build prompt from textual content parts
|
|
427
|
+
content_bits: List[str] = []
|
|
428
|
+
for part in message.parts:
|
|
429
|
+
s = self.stringify_message_part(part)
|
|
430
|
+
if s:
|
|
431
|
+
content_bits.append(s)
|
|
432
|
+
if not content_bits:
|
|
433
|
+
return message
|
|
434
|
+
prompt = "Please summarize the following user message:\n" + "\n".join(
|
|
435
|
+
content_bits
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
from ..summarization_agent import run_summarization_sync
|
|
439
|
+
output_text = run_summarization_sync(prompt)
|
|
440
|
+
summarized = ModelRequest([TextPart(output_text)])
|
|
441
|
+
return summarized
|
|
442
|
+
except Exception as e:
|
|
443
|
+
from ..messaging import emit_error
|
|
444
|
+
emit_error(f"Summarization failed: {e}")
|
|
445
|
+
return message
|
|
446
|
+
|
|
447
|
+
def get_model_context_length(self) -> int:
|
|
448
|
+
"""
|
|
449
|
+
Get the context length for the currently configured model from models.json
|
|
450
|
+
"""
|
|
451
|
+
from ..config import get_model_name
|
|
452
|
+
from ..model_factory import ModelFactory
|
|
453
|
+
|
|
454
|
+
model_configs = ModelFactory.load_config()
|
|
455
|
+
model_name = get_model_name()
|
|
456
|
+
|
|
457
|
+
# Get context length from model config
|
|
458
|
+
model_config = model_configs.get(model_name, {})
|
|
459
|
+
context_length = model_config.get("context_length", 128000) # Default value
|
|
460
|
+
|
|
461
|
+
return int(context_length)
|
|
462
|
+
|
|
463
|
+
def prune_interrupted_tool_calls(self, messages: List[ModelMessage]) -> List[ModelMessage]:
|
|
464
|
+
"""
|
|
465
|
+
Remove any messages that participate in mismatched tool call sequences.
|
|
466
|
+
|
|
467
|
+
A mismatched tool call id is one that appears in a ToolCall (model/tool request)
|
|
468
|
+
without a corresponding tool return, or vice versa. We preserve original order
|
|
469
|
+
and only drop messages that contain parts referencing mismatched tool_call_ids.
|
|
470
|
+
"""
|
|
471
|
+
if not messages:
|
|
472
|
+
return messages
|
|
473
|
+
|
|
474
|
+
tool_call_ids: Set[str] = set()
|
|
475
|
+
tool_return_ids: Set[str] = set()
|
|
476
|
+
|
|
477
|
+
# First pass: collect ids for calls vs returns
|
|
478
|
+
for msg in messages:
|
|
479
|
+
for part in getattr(msg, "parts", []) or []:
|
|
480
|
+
tool_call_id = getattr(part, "tool_call_id", None)
|
|
481
|
+
if not tool_call_id:
|
|
482
|
+
continue
|
|
483
|
+
|
|
484
|
+
if self._is_tool_call_part(part) and not self._is_tool_return_part(part):
|
|
485
|
+
tool_call_ids.add(tool_call_id)
|
|
486
|
+
elif self._is_tool_return_part(part):
|
|
487
|
+
tool_return_ids.add(tool_call_id)
|
|
488
|
+
|
|
489
|
+
mismatched: Set[str] = tool_call_ids.symmetric_difference(tool_return_ids)
|
|
490
|
+
if not mismatched:
|
|
491
|
+
return messages
|
|
492
|
+
|
|
493
|
+
pruned: List[ModelMessage] = []
|
|
494
|
+
dropped_count = 0
|
|
495
|
+
for msg in messages:
|
|
496
|
+
has_mismatched = False
|
|
497
|
+
for part in getattr(msg, "parts", []) or []:
|
|
498
|
+
tcid = getattr(part, "tool_call_id", None)
|
|
499
|
+
if tcid and tcid in mismatched:
|
|
500
|
+
has_mismatched = True
|
|
501
|
+
break
|
|
502
|
+
if has_mismatched:
|
|
503
|
+
dropped_count += 1
|
|
504
|
+
continue
|
|
505
|
+
pruned.append(msg)
|
|
506
|
+
|
|
507
|
+
if dropped_count:
|
|
508
|
+
from ..messaging import emit_warning
|
|
509
|
+
emit_warning(
|
|
510
|
+
f"Pruned {dropped_count} message(s) with mismatched tool_call_id pairs"
|
|
511
|
+
)
|
|
512
|
+
return pruned
|