code-puppy 0.0.171__py3-none-any.whl → 0.0.173__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. code_puppy/agent.py +8 -8
  2. code_puppy/agents/agent_creator_agent.py +0 -3
  3. code_puppy/agents/agent_qa_kitten.py +203 -0
  4. code_puppy/agents/base_agent.py +398 -2
  5. code_puppy/command_line/command_handler.py +68 -28
  6. code_puppy/command_line/mcp/add_command.py +2 -2
  7. code_puppy/command_line/mcp/base.py +1 -1
  8. code_puppy/command_line/mcp/install_command.py +2 -2
  9. code_puppy/command_line/mcp/list_command.py +1 -1
  10. code_puppy/command_line/mcp/search_command.py +1 -1
  11. code_puppy/command_line/mcp/start_all_command.py +1 -1
  12. code_puppy/command_line/mcp/status_command.py +2 -2
  13. code_puppy/command_line/mcp/stop_all_command.py +1 -1
  14. code_puppy/command_line/mcp/utils.py +1 -1
  15. code_puppy/command_line/mcp/wizard_utils.py +2 -2
  16. code_puppy/config.py +141 -12
  17. code_puppy/http_utils.py +50 -24
  18. code_puppy/main.py +2 -1
  19. code_puppy/{mcp → mcp_}/config_wizard.py +1 -1
  20. code_puppy/{mcp → mcp_}/examples/retry_example.py +1 -1
  21. code_puppy/{mcp → mcp_}/managed_server.py +1 -1
  22. code_puppy/{mcp → mcp_}/server_registry_catalog.py +1 -3
  23. code_puppy/message_history_processor.py +83 -221
  24. code_puppy/messaging/message_queue.py +4 -4
  25. code_puppy/state_management.py +1 -100
  26. code_puppy/tools/__init__.py +103 -6
  27. code_puppy/tools/browser/__init__.py +0 -0
  28. code_puppy/tools/browser/browser_control.py +293 -0
  29. code_puppy/tools/browser/browser_interactions.py +552 -0
  30. code_puppy/tools/browser/browser_locators.py +642 -0
  31. code_puppy/tools/browser/browser_navigation.py +251 -0
  32. code_puppy/tools/browser/browser_screenshot.py +242 -0
  33. code_puppy/tools/browser/browser_scripts.py +478 -0
  34. code_puppy/tools/browser/browser_workflows.py +196 -0
  35. code_puppy/tools/browser/camoufox_manager.py +194 -0
  36. code_puppy/tools/browser/vqa_agent.py +66 -0
  37. code_puppy/tools/browser_control.py +293 -0
  38. code_puppy/tools/browser_interactions.py +552 -0
  39. code_puppy/tools/browser_locators.py +642 -0
  40. code_puppy/tools/browser_navigation.py +251 -0
  41. code_puppy/tools/browser_screenshot.py +278 -0
  42. code_puppy/tools/browser_scripts.py +478 -0
  43. code_puppy/tools/browser_workflows.py +215 -0
  44. code_puppy/tools/camoufox_manager.py +150 -0
  45. code_puppy/tools/command_runner.py +13 -8
  46. code_puppy/tools/file_operations.py +7 -7
  47. code_puppy/tui/app.py +1 -1
  48. code_puppy/tui/components/custom_widgets.py +1 -1
  49. code_puppy/tui/screens/mcp_install_wizard.py +8 -8
  50. code_puppy/tui_state.py +55 -0
  51. {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/METADATA +3 -1
  52. code_puppy-0.0.173.dist-info/RECORD +132 -0
  53. code_puppy-0.0.171.dist-info/RECORD +0 -112
  54. /code_puppy/{mcp → mcp_}/__init__.py +0 -0
  55. /code_puppy/{mcp → mcp_}/async_lifecycle.py +0 -0
  56. /code_puppy/{mcp → mcp_}/blocking_startup.py +0 -0
  57. /code_puppy/{mcp → mcp_}/captured_stdio_server.py +0 -0
  58. /code_puppy/{mcp → mcp_}/circuit_breaker.py +0 -0
  59. /code_puppy/{mcp → mcp_}/dashboard.py +0 -0
  60. /code_puppy/{mcp → mcp_}/error_isolation.py +0 -0
  61. /code_puppy/{mcp → mcp_}/health_monitor.py +0 -0
  62. /code_puppy/{mcp → mcp_}/manager.py +0 -0
  63. /code_puppy/{mcp → mcp_}/registry.py +0 -0
  64. /code_puppy/{mcp → mcp_}/retry_manager.py +0 -0
  65. /code_puppy/{mcp → mcp_}/status_tracker.py +0 -0
  66. /code_puppy/{mcp → mcp_}/system_tools.py +0 -0
  67. {code_puppy-0.0.171.data → code_puppy-0.0.173.data}/data/code_puppy/models.json +0 -0
  68. {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/WHEEL +0 -0
  69. {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/entry_points.txt +0 -0
  70. {code_puppy-0.0.171.dist-info → code_puppy-0.0.173.dist-info}/licenses/LICENSE +0 -0
code_puppy/agent.py CHANGED
@@ -7,10 +7,7 @@ from pydantic_ai import Agent
7
7
  from pydantic_ai.settings import ModelSettings
8
8
  from pydantic_ai.usage import UsageLimits
9
9
 
10
- from code_puppy.message_history_processor import (
11
- get_model_context_length,
12
- message_history_accumulator,
13
- )
10
+ from code_puppy.message_history_processor import message_history_accumulator
14
11
  from code_puppy.messaging.message_queue import (
15
12
  emit_error,
16
13
  emit_info,
@@ -48,7 +45,7 @@ _code_generation_agent = None
48
45
  def _load_mcp_servers(extra_headers: Optional[Dict[str, str]] = None):
49
46
  """Load MCP servers using the new manager while maintaining backward compatibility."""
50
47
  from code_puppy.config import get_value, load_mcp_server_configs
51
- from code_puppy.mcp import ServerConfig, get_mcp_manager
48
+ from code_puppy.mcp_ import ServerConfig, get_mcp_manager
52
49
 
53
50
  # Check if MCP servers are disabled
54
51
  mcp_disabled = get_value("disable_mcp_servers")
@@ -113,7 +110,7 @@ def _load_mcp_servers(extra_headers: Optional[Dict[str, str]] = None):
113
110
 
114
111
  def reload_mcp_servers():
115
112
  """Reload MCP servers without restarting the agent."""
116
- from code_puppy.mcp import get_mcp_manager
113
+ from code_puppy.mcp_ import get_mcp_manager
117
114
 
118
115
  manager = get_mcp_manager()
119
116
  # Reload configurations
@@ -167,14 +164,17 @@ def reload_code_generation_agent(message_group: str | None):
167
164
 
168
165
  # Configure model settings with max_tokens if set
169
166
  model_settings_dict = {"seed": 42}
170
- output_tokens = max(2048, min(int(0.05 * get_model_context_length()) - 1024, 16384))
167
+ # Get current agent to use its method
168
+ from code_puppy.agents import get_current_agent_config
169
+ current_agent = get_current_agent_config()
170
+ output_tokens = max(2048, min(int(0.05 * current_agent.get_model_context_length()) - 1024, 16384))
171
171
  console.print(f"Max output tokens per message: {output_tokens}")
172
172
  model_settings_dict["max_tokens"] = output_tokens
173
173
 
174
174
 
175
175
  model_settings = ModelSettings(**model_settings_dict)
176
176
  if "gpt-5" in model_name:
177
- model_settings_dict["openai_reasoning_effort"] = "high"
177
+ model_settings_dict["openai_reasoning_effort"] = "off"
178
178
  model_settings_dict["extra_body"] = {
179
179
  "verbosity": "low"
180
180
  }
@@ -245,9 +245,6 @@ Best-practice guidelines for `invoke_agent`:
245
245
  Your solutions should be production-ready, maintainable, and follow best practices for the chosen language.
246
246
 
247
247
  Return your final response as a string output
248
- Walmart specific rules:
249
- - You are operating inside Walmart Global Tech! Yay!
250
- - Always use uv when working with python, and always use --index-url https://pypi.ci.artifacts.walmart.com/artifactory/api/pypi/external-pypi/simple
251
248
 
252
249
  ## Tool Templates:
253
250
 
@@ -0,0 +1,203 @@
1
+ """Quality Assurance Kitten - Playwright-powered browser automation agent."""
2
+
3
+ from .base_agent import BaseAgent
4
+
5
+
6
+ class QualityAssuranceKittenAgent(BaseAgent):
7
+ """Quality Assurance Kitten - Advanced browser automation with Playwright."""
8
+
9
+ @property
10
+ def name(self) -> str:
11
+ return "qa-kitten"
12
+
13
+ @property
14
+ def display_name(self) -> str:
15
+ return "Quality Assurance Kitten 🐱"
16
+
17
+ @property
18
+ def description(self) -> str:
19
+ return "Advanced web browser automation and quality assurance testing using Playwright with VQA capabilities"
20
+
21
+ def get_available_tools(self) -> list[str]:
22
+ """Get the list of tools available to Web Browser Puppy."""
23
+ return [
24
+ # Core agent tools
25
+ "agent_share_your_reasoning",
26
+ # Browser control and initialization
27
+ "browser_initialize",
28
+ "browser_close",
29
+ "browser_status",
30
+ "browser_new_page",
31
+ "browser_list_pages",
32
+ # Browser navigation
33
+ "browser_navigate",
34
+ "browser_get_page_info",
35
+ "browser_go_back",
36
+ "browser_go_forward",
37
+ "browser_reload",
38
+ "browser_wait_for_load",
39
+ # Element discovery (semantic locators preferred)
40
+ "browser_find_by_role",
41
+ "browser_find_by_text",
42
+ "browser_find_by_label",
43
+ "browser_find_by_placeholder",
44
+ "browser_find_by_test_id",
45
+ "browser_find_buttons",
46
+ "browser_find_links",
47
+ "browser_xpath_query", # Fallback when semantic locators fail
48
+ # Element interactions
49
+ "browser_click",
50
+ "browser_double_click",
51
+ "browser_hover",
52
+ "browser_set_text",
53
+ "browser_get_text",
54
+ "browser_get_value",
55
+ "browser_select_option",
56
+ "browser_check",
57
+ "browser_uncheck",
58
+ # Advanced features
59
+ "browser_execute_js",
60
+ "browser_scroll",
61
+ "browser_scroll_to_element",
62
+ "browser_set_viewport",
63
+ "browser_wait_for_element",
64
+ "browser_highlight_element",
65
+ "browser_clear_highlights",
66
+ # Screenshots and VQA
67
+ "browser_screenshot_analyze",
68
+ # Workflow management
69
+ "browser_save_workflow",
70
+ "browser_list_workflows",
71
+ "browser_read_workflow",
72
+ ]
73
+
74
+ def get_system_prompt(self) -> str:
75
+ """Get Web Browser Puppy's specialized system prompt."""
76
+ return """
77
+ You are Quality Assurance Kitten 🐱, an advanced autonomous browser automation and QA testing agent powered by Playwright!
78
+
79
+ You specialize in:
80
+ 🎯 **Quality Assurance Testing** - automated testing of web applications and user workflows
81
+ 👁️ **Visual verification** - taking screenshots and analyzing page content for bugs
82
+ 🔍 **Element discovery** - finding elements using semantic locators and accessibility best practices
83
+ 📝 **Data extraction** - scraping content and gathering information from web pages
84
+ 🧪 **Web automation** - filling forms, clicking buttons, navigating sites with precision
85
+ 🐛 **Bug detection** - identifying UI issues, broken functionality, and accessibility problems
86
+
87
+ ## Core Workflow Philosophy
88
+
89
+ For any browser task, follow this approach:
90
+ 1. **Check Existing Workflows**: Use browser_list_workflows to see if similar tasks have been solved before
91
+ 2. **Learn from History**: If relevant workflows exist, use browser_read_workflow to review proven strategies
92
+ 3. **Plan & Reason**: Use share_your_reasoning to break down complex tasks and explain your approach
93
+ 4. **Initialize**: Always start with browser_initialize if browser isn't running
94
+ 5. **Navigate**: Use browser_navigate to reach the target page
95
+ 6. **Discover**: Use semantic locators (PREFERRED) for element discovery
96
+ 7. **Verify**: Use highlighting and screenshots to confirm elements
97
+ 8. **Act**: Interact with elements through clicks, typing, etc.
98
+ 9. **Validate**: Take screenshots or query DOM to verify actions worked
99
+ 10. **Document Success**: Use browser_save_workflow to save successful patterns for future reuse
100
+
101
+ ## Tool Usage Guidelines
102
+
103
+ ### Browser Initialization
104
+ - **ALWAYS call browser_initialize first** before any other browser operations
105
+ - Choose appropriate settings: headless=False for debugging, headless=True for production
106
+ - Use browser_status to check current state
107
+
108
+ ### Element Discovery Best Practices (ACCESSIBILITY FIRST! 🌟)
109
+ - **PREFER semantic locators** - they're more reliable and follow accessibility standards
110
+ - Priority order:
111
+ 1. browser_find_by_role (button, link, textbox, heading, etc.)
112
+ 2. browser_find_by_label (for form inputs)
113
+ 3. browser_find_by_text (for visible text)
114
+ 4. browser_find_by_placeholder (for input hints)
115
+ 5. browser_find_by_test_id (for test-friendly elements)
116
+ 6. browser_xpath_query (ONLY as last resort)
117
+
118
+ ### Visual Verification Workflow
119
+ - **Before critical actions**: Use browser_highlight_element to visually confirm
120
+ - **After interactions**: Use browser_screenshot_analyze to verify results
121
+ - **VQA questions**: Ask specific, actionable questions like "Is the login button highlighted?"
122
+
123
+ ### Form Input Best Practices
124
+ - **ALWAYS check current values** with browser_get_value before typing
125
+ - Use browser_get_value after typing to verify success
126
+ - This prevents typing loops and gives clear visibility into form state
127
+ - Clear fields when appropriate before entering new text
128
+
129
+ ### Error Handling & Troubleshooting
130
+
131
+ **When Element Discovery Fails:**
132
+ 1. Try different semantic locators first
133
+ 2. Use browser_find_buttons or browser_find_links to see available elements
134
+ 3. Take a screenshot with browser_screenshot_analyze to understand the page layout
135
+ 4. Only use XPath as absolute last resort
136
+
137
+ **When Page Interactions Fail:**
138
+ 1. Check if element is visible with browser_wait_for_element
139
+ 2. Scroll element into view with browser_scroll_to_element
140
+ 3. Use browser_highlight_element to confirm element location
141
+ 4. Try browser_execute_js for complex interactions
142
+
143
+ ### JavaScript Execution
144
+ - Use browser_execute_js for:
145
+ - Complex page state checks
146
+ - Custom scrolling behavior
147
+ - Triggering events that standard tools can't handle
148
+ - Accessing browser APIs
149
+
150
+ ### Workflow Management 📋
151
+
152
+ **ALWAYS start new tasks by checking for existing workflows!**
153
+
154
+ **At the beginning of any automation task:**
155
+ 1. **browser_list_workflows** - Check what workflows are already available
156
+ 2. **browser_read_workflow** - If you find a relevant workflow, read it to understand the proven approach
157
+ 3. Adapt and apply the successful patterns from existing workflows
158
+
159
+ **When to save workflows:**
160
+ - After successfully completing a complex multi-step task
161
+ - When you discover a reliable pattern for a common website interaction
162
+ - After troubleshooting and finding working solutions for tricky elements
163
+ - Include both the successful steps AND the challenges/solutions you encountered
164
+
165
+ **Workflow naming conventions:**
166
+ - Use descriptive names like "search_and_atc_walmart", "login_to_github", "fill_contact_form"
167
+ - Include the website domain for clarity
168
+ - Focus on the main goal/outcome
169
+
170
+ **What to include in saved workflows:**
171
+ - Step-by-step tool usage with specific parameters
172
+ - Element discovery strategies that worked
173
+ - Common pitfalls and how to avoid them
174
+ - Alternative approaches for edge cases
175
+ - Tips for handling dynamic content
176
+
177
+ ### Performance & Best Practices
178
+ - Use appropriate timeouts for element discovery (default 10s is usually fine)
179
+ - Take screenshots strategically - not after every single action
180
+ - Use browser_wait_for_load when navigating to ensure pages are ready
181
+ - Clear highlights when done for clean visual state
182
+
183
+ ## Specialized Capabilities
184
+
185
+ 🌐 **WCAG 2.2 Level AA Compliance**: Always prioritize accessibility in element discovery
186
+ 📸 **Visual Question Answering**: Use browser_screenshot_analyze for intelligent page analysis
187
+ 🚀 **Semantic Web Navigation**: Prefer role-based and label-based element discovery
188
+ ⚡ **Playwright Power**: Full access to modern browser automation capabilities
189
+ 📋 **Workflow Management**: Save, load, and reuse automation patterns for consistency
190
+
191
+ ## Important Rules
192
+
193
+ - **ALWAYS check for existing workflows first** - Use browser_list_workflows at the start of new tasks
194
+ - **ALWAYS use browser_initialize before any browser operations**
195
+ - **PREFER semantic locators over XPath** - they're more maintainable and accessible
196
+ - **Use visual verification for critical actions** - highlight elements and take screenshots
197
+ - **Be explicit about your reasoning** - use share_your_reasoning for complex workflows
198
+ - **Handle errors gracefully** - provide helpful debugging information
199
+ - **Follow accessibility best practices** - your automation should work for everyone
200
+ - **Document your successes** - Save working patterns with browser_save_workflow for future reuse
201
+
202
+ Your browser automation should be reliable, maintainable, and accessible. You are a meticulous QA engineer who catches bugs before users do! 🐱✨
203
+ """
@@ -1,13 +1,26 @@
1
1
  """Base agent configuration class for defining agent properties."""
2
2
 
3
+ import json
4
+ import queue
3
5
  import uuid
4
6
  from abc import ABC, abstractmethod
5
- from typing import Any, Dict, List, Optional, Set
7
+ from typing import Any, Dict, List, Optional, Set, Tuple
8
+
9
+ import pydantic
10
+ from pydantic_ai.messages import (
11
+ ModelMessage,
12
+ ModelRequest,
13
+ TextPart,
14
+ ToolCallPart,
15
+ ToolCallPartDelta,
16
+ ToolReturn,
17
+ ToolReturnPart,
18
+ )
6
19
 
7
20
 
8
21
  class BaseAgent(ABC):
9
22
  """Base class for all agent configurations."""
10
-
23
+
11
24
  def __init__(self):
12
25
  self.id = str(uuid.uuid4())
13
26
  self._message_history: List[Any] = []
@@ -114,3 +127,386 @@ class BaseAgent(ABC):
114
127
  message_hash: Hash of a message that has been compacted/summarized.
115
128
  """
116
129
  self._compacted_message_hashes.add(message_hash)
130
+
131
+ def get_model_name(self) -> Optional[str]:
132
+ """Get pinned model name for this agent, if specified.
133
+
134
+ Returns:
135
+ Model name to use for this agent, or None to use global default.
136
+ """
137
+ from ..config import get_agent_pinned_model
138
+ return get_agent_pinned_model(self.name)
139
+
140
+ # Message history processing methods (moved from state_management.py and message_history_processor.py)
141
+ def _stringify_part(self, part: Any) -> str:
142
+ """Create a stable string representation for a message part.
143
+
144
+ We deliberately ignore timestamps so identical content hashes the same even when
145
+ emitted at different times. This prevents status updates from blowing up the
146
+ history when they are repeated with new timestamps."""
147
+
148
+ attributes: List[str] = [part.__class__.__name__]
149
+
150
+ # Role/instructions help disambiguate parts that otherwise share content
151
+ if hasattr(part, "role") and part.role:
152
+ attributes.append(f"role={part.role}")
153
+ if hasattr(part, "instructions") and part.instructions:
154
+ attributes.append(f"instructions={part.instructions}")
155
+
156
+ if hasattr(part, "tool_call_id") and part.tool_call_id:
157
+ attributes.append(f"tool_call_id={part.tool_call_id}")
158
+
159
+ if hasattr(part, "tool_name") and part.tool_name:
160
+ attributes.append(f"tool_name={part.tool_name}")
161
+
162
+ content = getattr(part, "content", None)
163
+ if content is None:
164
+ attributes.append("content=None")
165
+ elif isinstance(content, str):
166
+ attributes.append(f"content={content}")
167
+ elif isinstance(content, pydantic.BaseModel):
168
+ attributes.append(f"content={json.dumps(content.model_dump(), sort_keys=True)}")
169
+ elif isinstance(content, dict):
170
+ attributes.append(f"content={json.dumps(content, sort_keys=True)}")
171
+ else:
172
+ attributes.append(f"content={repr(content)}")
173
+ result = "|".join(attributes)
174
+ return result
175
+
176
+ def hash_message(self, message: Any) -> int:
177
+ """Create a stable hash for a model message that ignores timestamps."""
178
+ role = getattr(message, "role", None)
179
+ instructions = getattr(message, "instructions", None)
180
+ header_bits: List[str] = []
181
+ if role:
182
+ header_bits.append(f"role={role}")
183
+ if instructions:
184
+ header_bits.append(f"instructions={instructions}")
185
+
186
+ part_strings = [self._stringify_part(part) for part in getattr(message, "parts", [])]
187
+ canonical = "||".join(header_bits + part_strings)
188
+ return hash(canonical)
189
+
190
+ def stringify_message_part(self, part) -> str:
191
+ """
192
+ Convert a message part to a string representation for token estimation or other uses.
193
+
194
+ Args:
195
+ part: A message part that may contain content or be a tool call
196
+
197
+ Returns:
198
+ String representation of the message part
199
+ """
200
+ result = ""
201
+ if hasattr(part, "part_kind"):
202
+ result += part.part_kind + ": "
203
+ else:
204
+ result += str(type(part)) + ": "
205
+
206
+ # Handle content
207
+ if hasattr(part, "content") and part.content:
208
+ # Handle different content types
209
+ if isinstance(part.content, str):
210
+ result = part.content
211
+ elif isinstance(part.content, pydantic.BaseModel):
212
+ result = json.dumps(part.content.model_dump())
213
+ elif isinstance(part.content, dict):
214
+ result = json.dumps(part.content)
215
+ else:
216
+ result = str(part.content)
217
+
218
+ # Handle tool calls which may have additional token costs
219
+ # If part also has content, we'll process tool calls separately
220
+ if hasattr(part, "tool_name") and part.tool_name:
221
+ # Estimate tokens for tool name and parameters
222
+ tool_text = part.tool_name
223
+ if hasattr(part, "args"):
224
+ tool_text += f" {str(part.args)}"
225
+ result += tool_text
226
+
227
+ return result
228
+
229
+ def estimate_tokens_for_message(self, message: ModelMessage) -> int:
230
+ """
231
+ Estimate the number of tokens in a message using len(message) - 4.
232
+ Simple and fast replacement for tiktoken.
233
+ """
234
+ total_tokens = 0
235
+
236
+ for part in message.parts:
237
+ part_str = self.stringify_message_part(part)
238
+ if part_str:
239
+ total_tokens += len(part_str)
240
+
241
+ return int(max(1, total_tokens) / 4)
242
+
243
+ def _is_tool_call_part(self, part: Any) -> bool:
244
+ if isinstance(part, (ToolCallPart, ToolCallPartDelta)):
245
+ return True
246
+
247
+ part_kind = (getattr(part, "part_kind", "") or "").replace("_", "-")
248
+ if part_kind == "tool-call":
249
+ return True
250
+
251
+ has_tool_name = getattr(part, "tool_name", None) is not None
252
+ has_args = getattr(part, "args", None) is not None
253
+ has_args_delta = getattr(part, "args_delta", None) is not None
254
+
255
+ return bool(has_tool_name and (has_args or has_args_delta))
256
+
257
+ def _is_tool_return_part(self, part: Any) -> bool:
258
+ if isinstance(part, (ToolReturnPart, ToolReturn)):
259
+ return True
260
+
261
+ part_kind = (getattr(part, "part_kind", "") or "").replace("_", "-")
262
+ if part_kind in {"tool-return", "tool-result"}:
263
+ return True
264
+
265
+ if getattr(part, "tool_call_id", None) is None:
266
+ return False
267
+
268
+ has_content = getattr(part, "content", None) is not None
269
+ has_content_delta = getattr(part, "content_delta", None) is not None
270
+ return bool(has_content or has_content_delta)
271
+
272
+ def filter_huge_messages(self, messages: List[ModelMessage]) -> List[ModelMessage]:
273
+ if not messages:
274
+ return []
275
+
276
+ # Never drop the system prompt, even if it is extremely large.
277
+ system_message, *rest = messages
278
+ filtered_rest = [
279
+ m for m in rest if self.estimate_tokens_for_message(m) < 50000
280
+ ]
281
+ return [system_message] + filtered_rest
282
+
283
+ def split_messages_for_protected_summarization(
284
+ self,
285
+ messages: List[ModelMessage],
286
+ ) -> Tuple[List[ModelMessage], List[ModelMessage]]:
287
+ """
288
+ Split messages into two groups: messages to summarize and protected recent messages.
289
+
290
+ Returns:
291
+ Tuple of (messages_to_summarize, protected_messages)
292
+
293
+ The protected_messages are the most recent messages that total up to the configured protected token count.
294
+ The system message (first message) is always protected.
295
+ All other messages that don't fit in the protected zone will be summarized.
296
+ """
297
+ if len(messages) <= 1: # Just system message or empty
298
+ return [], messages
299
+
300
+ # Always protect the system message (first message)
301
+ system_message = messages[0]
302
+ system_tokens = self.estimate_tokens_for_message(system_message)
303
+
304
+ if len(messages) == 1:
305
+ return [], messages
306
+
307
+ # Get the configured protected token count
308
+ from ..config import get_protected_token_count
309
+ protected_tokens_limit = get_protected_token_count()
310
+
311
+ # Calculate tokens for messages from most recent backwards (excluding system message)
312
+ protected_messages = []
313
+ protected_token_count = system_tokens # Start with system message tokens
314
+
315
+ # Go backwards through non-system messages to find protected zone
316
+ for i in range(len(messages) - 1, 0, -1): # Stop at 1, not 0 (skip system message)
317
+ message = messages[i]
318
+ message_tokens = self.estimate_tokens_for_message(message)
319
+
320
+ # If adding this message would exceed protected tokens, stop here
321
+ if protected_token_count + message_tokens > protected_tokens_limit:
322
+ break
323
+
324
+ protected_messages.append(message)
325
+ protected_token_count += message_tokens
326
+
327
+ # Messages that were added while scanning backwards are currently in reverse order.
328
+ # Reverse them to restore chronological ordering, then prepend the system prompt.
329
+ protected_messages.reverse()
330
+ protected_messages.insert(0, system_message)
331
+
332
+ # Messages to summarize are everything between the system message and the
333
+ # protected tail zone we just constructed.
334
+ protected_start_idx = max(1, len(messages) - (len(protected_messages) - 1))
335
+ messages_to_summarize = messages[1:protected_start_idx]
336
+
337
+ # Emit info messages
338
+ from ..messaging import emit_info
339
+ emit_info(
340
+ f"🔒 Protecting {len(protected_messages)} recent messages ({protected_token_count} tokens, limit: {protected_tokens_limit})"
341
+ )
342
+ emit_info(f"📝 Summarizing {len(messages_to_summarize)} older messages")
343
+
344
+ return messages_to_summarize, protected_messages
345
+
346
+ def summarize_messages(
347
+ self,
348
+ messages: List[ModelMessage],
349
+ with_protection: bool = True
350
+ ) -> Tuple[List[ModelMessage], List[ModelMessage]]:
351
+ """
352
+ Summarize messages while protecting recent messages up to PROTECTED_TOKENS.
353
+
354
+ Returns:
355
+ Tuple of (compacted_messages, summarized_source_messages)
356
+ where compacted_messages always preserves the original system message
357
+ as the first entry.
358
+ """
359
+ messages_to_summarize: List[ModelMessage]
360
+ protected_messages: List[ModelMessage]
361
+
362
+ if with_protection:
363
+ messages_to_summarize, protected_messages = (
364
+ self.split_messages_for_protected_summarization(messages)
365
+ )
366
+ else:
367
+ messages_to_summarize = messages[1:] if messages else []
368
+ protected_messages = messages[:1]
369
+
370
+ if not messages:
371
+ return [], []
372
+
373
+ system_message = messages[0]
374
+
375
+ if not messages_to_summarize:
376
+ # Nothing to summarize, so just return the original sequence
377
+ return self.prune_interrupted_tool_calls(messages), []
378
+
379
+ instructions = (
380
+ "The input will be a log of Agentic AI steps that have been taken"
381
+ " as well as user queries, etc. Summarize the contents of these steps."
382
+ " The high level details should remain but the bulk of the content from tool-call"
383
+ " responses should be compacted and summarized. For example if you see a tool-call"
384
+ " reading a file, and the file contents are large, then in your summary you might just"
385
+ " write: * used read_file on space_invaders.cpp - contents removed."
386
+ "\n Make sure your result is a bulleted list of all steps and interactions."
387
+ "\n\nNOTE: This summary represents older conversation history. Recent messages are preserved separately."
388
+ )
389
+
390
+ try:
391
+ from ..summarization_agent import run_summarization_sync
392
+ new_messages = run_summarization_sync(
393
+ instructions, message_history=messages_to_summarize
394
+ )
395
+
396
+ if not isinstance(new_messages, list):
397
+ from ..messaging import emit_warning
398
+ emit_warning(
399
+ "Summarization agent returned non-list output; wrapping into message request"
400
+ )
401
+ new_messages = [ModelRequest([TextPart(str(new_messages))])]
402
+
403
+ compacted: List[ModelMessage] = [system_message] + list(new_messages)
404
+
405
+ # Drop the system message from protected_messages because we already included it
406
+ protected_tail = [msg for msg in protected_messages if msg is not system_message]
407
+
408
+ compacted.extend(protected_tail)
409
+
410
+ return self.prune_interrupted_tool_calls(compacted), messages_to_summarize
411
+ except Exception as e:
412
+ from ..messaging import emit_error
413
+ emit_error(f"Summarization failed during compaction: {e}")
414
+ return messages, [] # Return original messages on failure
415
+
416
+ def summarize_message(self, message: ModelMessage) -> ModelMessage:
417
+ try:
418
+ # If the message looks like a system/instructions message, skip summarization
419
+ instructions = getattr(message, "instructions", None)
420
+ if instructions:
421
+ return message
422
+ # If any part is a tool call, skip summarization
423
+ for part in message.parts:
424
+ if isinstance(part, ToolCallPart) or getattr(part, "tool_name", None):
425
+ return message
426
+ # Build prompt from textual content parts
427
+ content_bits: List[str] = []
428
+ for part in message.parts:
429
+ s = self.stringify_message_part(part)
430
+ if s:
431
+ content_bits.append(s)
432
+ if not content_bits:
433
+ return message
434
+ prompt = "Please summarize the following user message:\n" + "\n".join(
435
+ content_bits
436
+ )
437
+
438
+ from ..summarization_agent import run_summarization_sync
439
+ output_text = run_summarization_sync(prompt)
440
+ summarized = ModelRequest([TextPart(output_text)])
441
+ return summarized
442
+ except Exception as e:
443
+ from ..messaging import emit_error
444
+ emit_error(f"Summarization failed: {e}")
445
+ return message
446
+
447
+ def get_model_context_length(self) -> int:
448
+ """
449
+ Get the context length for the currently configured model from models.json
450
+ """
451
+ from ..config import get_model_name
452
+ from ..model_factory import ModelFactory
453
+
454
+ model_configs = ModelFactory.load_config()
455
+ model_name = get_model_name()
456
+
457
+ # Get context length from model config
458
+ model_config = model_configs.get(model_name, {})
459
+ context_length = model_config.get("context_length", 128000) # Default value
460
+
461
+ return int(context_length)
462
+
463
+ def prune_interrupted_tool_calls(self, messages: List[ModelMessage]) -> List[ModelMessage]:
464
+ """
465
+ Remove any messages that participate in mismatched tool call sequences.
466
+
467
+ A mismatched tool call id is one that appears in a ToolCall (model/tool request)
468
+ without a corresponding tool return, or vice versa. We preserve original order
469
+ and only drop messages that contain parts referencing mismatched tool_call_ids.
470
+ """
471
+ if not messages:
472
+ return messages
473
+
474
+ tool_call_ids: Set[str] = set()
475
+ tool_return_ids: Set[str] = set()
476
+
477
+ # First pass: collect ids for calls vs returns
478
+ for msg in messages:
479
+ for part in getattr(msg, "parts", []) or []:
480
+ tool_call_id = getattr(part, "tool_call_id", None)
481
+ if not tool_call_id:
482
+ continue
483
+
484
+ if self._is_tool_call_part(part) and not self._is_tool_return_part(part):
485
+ tool_call_ids.add(tool_call_id)
486
+ elif self._is_tool_return_part(part):
487
+ tool_return_ids.add(tool_call_id)
488
+
489
+ mismatched: Set[str] = tool_call_ids.symmetric_difference(tool_return_ids)
490
+ if not mismatched:
491
+ return messages
492
+
493
+ pruned: List[ModelMessage] = []
494
+ dropped_count = 0
495
+ for msg in messages:
496
+ has_mismatched = False
497
+ for part in getattr(msg, "parts", []) or []:
498
+ tcid = getattr(part, "tool_call_id", None)
499
+ if tcid and tcid in mismatched:
500
+ has_mismatched = True
501
+ break
502
+ if has_mismatched:
503
+ dropped_count += 1
504
+ continue
505
+ pruned.append(msg)
506
+
507
+ if dropped_count:
508
+ from ..messaging import emit_warning
509
+ emit_warning(
510
+ f"Pruned {dropped_count} message(s) with mismatched tool_call_id pairs"
511
+ )
512
+ return pruned