code-puppy 0.0.214__py3-none-any.whl → 0.0.366__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. code_puppy/__init__.py +7 -1
  2. code_puppy/agents/__init__.py +2 -0
  3. code_puppy/agents/agent_c_reviewer.py +59 -6
  4. code_puppy/agents/agent_code_puppy.py +7 -1
  5. code_puppy/agents/agent_code_reviewer.py +12 -2
  6. code_puppy/agents/agent_cpp_reviewer.py +73 -6
  7. code_puppy/agents/agent_creator_agent.py +45 -4
  8. code_puppy/agents/agent_golang_reviewer.py +92 -3
  9. code_puppy/agents/agent_javascript_reviewer.py +101 -8
  10. code_puppy/agents/agent_manager.py +81 -4
  11. code_puppy/agents/agent_pack_leader.py +383 -0
  12. code_puppy/agents/agent_planning.py +163 -0
  13. code_puppy/agents/agent_python_programmer.py +165 -0
  14. code_puppy/agents/agent_python_reviewer.py +28 -6
  15. code_puppy/agents/agent_qa_expert.py +98 -6
  16. code_puppy/agents/agent_qa_kitten.py +12 -7
  17. code_puppy/agents/agent_security_auditor.py +113 -3
  18. code_puppy/agents/agent_terminal_qa.py +323 -0
  19. code_puppy/agents/agent_typescript_reviewer.py +106 -7
  20. code_puppy/agents/base_agent.py +802 -176
  21. code_puppy/agents/event_stream_handler.py +350 -0
  22. code_puppy/agents/pack/__init__.py +34 -0
  23. code_puppy/agents/pack/bloodhound.py +304 -0
  24. code_puppy/agents/pack/husky.py +321 -0
  25. code_puppy/agents/pack/retriever.py +393 -0
  26. code_puppy/agents/pack/shepherd.py +348 -0
  27. code_puppy/agents/pack/terrier.py +287 -0
  28. code_puppy/agents/pack/watchdog.py +367 -0
  29. code_puppy/agents/prompt_reviewer.py +145 -0
  30. code_puppy/agents/subagent_stream_handler.py +276 -0
  31. code_puppy/api/__init__.py +13 -0
  32. code_puppy/api/app.py +169 -0
  33. code_puppy/api/main.py +21 -0
  34. code_puppy/api/pty_manager.py +446 -0
  35. code_puppy/api/routers/__init__.py +12 -0
  36. code_puppy/api/routers/agents.py +36 -0
  37. code_puppy/api/routers/commands.py +217 -0
  38. code_puppy/api/routers/config.py +74 -0
  39. code_puppy/api/routers/sessions.py +232 -0
  40. code_puppy/api/templates/terminal.html +361 -0
  41. code_puppy/api/websocket.py +154 -0
  42. code_puppy/callbacks.py +142 -4
  43. code_puppy/chatgpt_codex_client.py +283 -0
  44. code_puppy/claude_cache_client.py +586 -0
  45. code_puppy/cli_runner.py +916 -0
  46. code_puppy/command_line/add_model_menu.py +1079 -0
  47. code_puppy/command_line/agent_menu.py +395 -0
  48. code_puppy/command_line/attachments.py +10 -5
  49. code_puppy/command_line/autosave_menu.py +605 -0
  50. code_puppy/command_line/clipboard.py +527 -0
  51. code_puppy/command_line/colors_menu.py +520 -0
  52. code_puppy/command_line/command_handler.py +176 -738
  53. code_puppy/command_line/command_registry.py +150 -0
  54. code_puppy/command_line/config_commands.py +715 -0
  55. code_puppy/command_line/core_commands.py +792 -0
  56. code_puppy/command_line/diff_menu.py +863 -0
  57. code_puppy/command_line/load_context_completion.py +15 -22
  58. code_puppy/command_line/mcp/base.py +0 -3
  59. code_puppy/command_line/mcp/catalog_server_installer.py +175 -0
  60. code_puppy/command_line/mcp/custom_server_form.py +688 -0
  61. code_puppy/command_line/mcp/custom_server_installer.py +195 -0
  62. code_puppy/command_line/mcp/edit_command.py +148 -0
  63. code_puppy/command_line/mcp/handler.py +9 -4
  64. code_puppy/command_line/mcp/help_command.py +6 -5
  65. code_puppy/command_line/mcp/install_command.py +15 -26
  66. code_puppy/command_line/mcp/install_menu.py +685 -0
  67. code_puppy/command_line/mcp/list_command.py +2 -2
  68. code_puppy/command_line/mcp/logs_command.py +174 -65
  69. code_puppy/command_line/mcp/remove_command.py +2 -2
  70. code_puppy/command_line/mcp/restart_command.py +12 -4
  71. code_puppy/command_line/mcp/search_command.py +16 -10
  72. code_puppy/command_line/mcp/start_all_command.py +18 -6
  73. code_puppy/command_line/mcp/start_command.py +47 -25
  74. code_puppy/command_line/mcp/status_command.py +4 -5
  75. code_puppy/command_line/mcp/stop_all_command.py +7 -1
  76. code_puppy/command_line/mcp/stop_command.py +8 -4
  77. code_puppy/command_line/mcp/test_command.py +2 -2
  78. code_puppy/command_line/mcp/wizard_utils.py +20 -16
  79. code_puppy/command_line/mcp_completion.py +174 -0
  80. code_puppy/command_line/model_picker_completion.py +75 -25
  81. code_puppy/command_line/model_settings_menu.py +884 -0
  82. code_puppy/command_line/motd.py +14 -8
  83. code_puppy/command_line/onboarding_slides.py +179 -0
  84. code_puppy/command_line/onboarding_wizard.py +340 -0
  85. code_puppy/command_line/pin_command_completion.py +329 -0
  86. code_puppy/command_line/prompt_toolkit_completion.py +463 -63
  87. code_puppy/command_line/session_commands.py +296 -0
  88. code_puppy/command_line/utils.py +54 -0
  89. code_puppy/config.py +898 -112
  90. code_puppy/error_logging.py +118 -0
  91. code_puppy/gemini_code_assist.py +385 -0
  92. code_puppy/gemini_model.py +602 -0
  93. code_puppy/http_utils.py +210 -148
  94. code_puppy/keymap.py +128 -0
  95. code_puppy/main.py +5 -698
  96. code_puppy/mcp_/__init__.py +17 -0
  97. code_puppy/mcp_/async_lifecycle.py +35 -4
  98. code_puppy/mcp_/blocking_startup.py +70 -43
  99. code_puppy/mcp_/captured_stdio_server.py +2 -2
  100. code_puppy/mcp_/config_wizard.py +4 -4
  101. code_puppy/mcp_/dashboard.py +15 -6
  102. code_puppy/mcp_/managed_server.py +65 -38
  103. code_puppy/mcp_/manager.py +146 -52
  104. code_puppy/mcp_/mcp_logs.py +224 -0
  105. code_puppy/mcp_/registry.py +6 -6
  106. code_puppy/mcp_/server_registry_catalog.py +24 -5
  107. code_puppy/messaging/__init__.py +199 -2
  108. code_puppy/messaging/bus.py +610 -0
  109. code_puppy/messaging/commands.py +167 -0
  110. code_puppy/messaging/markdown_patches.py +57 -0
  111. code_puppy/messaging/message_queue.py +17 -48
  112. code_puppy/messaging/messages.py +500 -0
  113. code_puppy/messaging/queue_console.py +1 -24
  114. code_puppy/messaging/renderers.py +43 -146
  115. code_puppy/messaging/rich_renderer.py +1027 -0
  116. code_puppy/messaging/spinner/__init__.py +21 -5
  117. code_puppy/messaging/spinner/console_spinner.py +86 -51
  118. code_puppy/messaging/subagent_console.py +461 -0
  119. code_puppy/model_factory.py +634 -83
  120. code_puppy/model_utils.py +167 -0
  121. code_puppy/models.json +66 -68
  122. code_puppy/models_dev_api.json +1 -0
  123. code_puppy/models_dev_parser.py +592 -0
  124. code_puppy/plugins/__init__.py +164 -10
  125. code_puppy/plugins/antigravity_oauth/__init__.py +10 -0
  126. code_puppy/plugins/antigravity_oauth/accounts.py +406 -0
  127. code_puppy/plugins/antigravity_oauth/antigravity_model.py +704 -0
  128. code_puppy/plugins/antigravity_oauth/config.py +42 -0
  129. code_puppy/plugins/antigravity_oauth/constants.py +136 -0
  130. code_puppy/plugins/antigravity_oauth/oauth.py +478 -0
  131. code_puppy/plugins/antigravity_oauth/register_callbacks.py +406 -0
  132. code_puppy/plugins/antigravity_oauth/storage.py +271 -0
  133. code_puppy/plugins/antigravity_oauth/test_plugin.py +319 -0
  134. code_puppy/plugins/antigravity_oauth/token.py +167 -0
  135. code_puppy/plugins/antigravity_oauth/transport.py +767 -0
  136. code_puppy/plugins/antigravity_oauth/utils.py +169 -0
  137. code_puppy/plugins/chatgpt_oauth/__init__.py +8 -0
  138. code_puppy/plugins/chatgpt_oauth/config.py +52 -0
  139. code_puppy/plugins/chatgpt_oauth/oauth_flow.py +328 -0
  140. code_puppy/plugins/chatgpt_oauth/register_callbacks.py +94 -0
  141. code_puppy/plugins/chatgpt_oauth/test_plugin.py +293 -0
  142. code_puppy/plugins/chatgpt_oauth/utils.py +489 -0
  143. code_puppy/plugins/claude_code_oauth/README.md +167 -0
  144. code_puppy/plugins/claude_code_oauth/SETUP.md +93 -0
  145. code_puppy/plugins/claude_code_oauth/__init__.py +6 -0
  146. code_puppy/plugins/claude_code_oauth/config.py +50 -0
  147. code_puppy/plugins/claude_code_oauth/register_callbacks.py +308 -0
  148. code_puppy/plugins/claude_code_oauth/test_plugin.py +283 -0
  149. code_puppy/plugins/claude_code_oauth/utils.py +518 -0
  150. code_puppy/plugins/customizable_commands/__init__.py +0 -0
  151. code_puppy/plugins/customizable_commands/register_callbacks.py +169 -0
  152. code_puppy/plugins/example_custom_command/README.md +280 -0
  153. code_puppy/plugins/example_custom_command/register_callbacks.py +2 -2
  154. code_puppy/plugins/file_permission_handler/__init__.py +4 -0
  155. code_puppy/plugins/file_permission_handler/register_callbacks.py +523 -0
  156. code_puppy/plugins/frontend_emitter/__init__.py +25 -0
  157. code_puppy/plugins/frontend_emitter/emitter.py +121 -0
  158. code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
  159. code_puppy/plugins/oauth_puppy_html.py +228 -0
  160. code_puppy/plugins/shell_safety/__init__.py +6 -0
  161. code_puppy/plugins/shell_safety/agent_shell_safety.py +69 -0
  162. code_puppy/plugins/shell_safety/command_cache.py +156 -0
  163. code_puppy/plugins/shell_safety/register_callbacks.py +202 -0
  164. code_puppy/prompts/antigravity_system_prompt.md +1 -0
  165. code_puppy/prompts/codex_system_prompt.md +310 -0
  166. code_puppy/pydantic_patches.py +131 -0
  167. code_puppy/reopenable_async_client.py +8 -8
  168. code_puppy/round_robin_model.py +9 -12
  169. code_puppy/session_storage.py +2 -1
  170. code_puppy/status_display.py +21 -4
  171. code_puppy/summarization_agent.py +41 -13
  172. code_puppy/terminal_utils.py +418 -0
  173. code_puppy/tools/__init__.py +37 -1
  174. code_puppy/tools/agent_tools.py +536 -52
  175. code_puppy/tools/browser/__init__.py +37 -0
  176. code_puppy/tools/browser/browser_control.py +19 -23
  177. code_puppy/tools/browser/browser_interactions.py +41 -48
  178. code_puppy/tools/browser/browser_locators.py +36 -38
  179. code_puppy/tools/browser/browser_manager.py +316 -0
  180. code_puppy/tools/browser/browser_navigation.py +16 -16
  181. code_puppy/tools/browser/browser_screenshot.py +79 -143
  182. code_puppy/tools/browser/browser_scripts.py +32 -42
  183. code_puppy/tools/browser/browser_workflows.py +44 -27
  184. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  185. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  186. code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
  187. code_puppy/tools/browser/terminal_tools.py +525 -0
  188. code_puppy/tools/command_runner.py +930 -147
  189. code_puppy/tools/common.py +1113 -5
  190. code_puppy/tools/display.py +84 -0
  191. code_puppy/tools/file_modifications.py +288 -89
  192. code_puppy/tools/file_operations.py +226 -154
  193. code_puppy/tools/subagent_context.py +158 -0
  194. code_puppy/uvx_detection.py +242 -0
  195. code_puppy/version_checker.py +30 -11
  196. code_puppy-0.0.366.data/data/code_puppy/models.json +110 -0
  197. code_puppy-0.0.366.data/data/code_puppy/models_dev_api.json +1 -0
  198. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/METADATA +149 -75
  199. code_puppy-0.0.366.dist-info/RECORD +217 -0
  200. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/WHEEL +1 -1
  201. code_puppy/command_line/mcp/add_command.py +0 -183
  202. code_puppy/messaging/spinner/textual_spinner.py +0 -106
  203. code_puppy/tools/browser/camoufox_manager.py +0 -216
  204. code_puppy/tools/browser/vqa_agent.py +0 -70
  205. code_puppy/tui/__init__.py +0 -10
  206. code_puppy/tui/app.py +0 -1105
  207. code_puppy/tui/components/__init__.py +0 -21
  208. code_puppy/tui/components/chat_view.py +0 -551
  209. code_puppy/tui/components/command_history_modal.py +0 -218
  210. code_puppy/tui/components/copy_button.py +0 -139
  211. code_puppy/tui/components/custom_widgets.py +0 -63
  212. code_puppy/tui/components/human_input_modal.py +0 -175
  213. code_puppy/tui/components/input_area.py +0 -167
  214. code_puppy/tui/components/sidebar.py +0 -309
  215. code_puppy/tui/components/status_bar.py +0 -185
  216. code_puppy/tui/messages.py +0 -27
  217. code_puppy/tui/models/__init__.py +0 -8
  218. code_puppy/tui/models/chat_message.py +0 -25
  219. code_puppy/tui/models/command_history.py +0 -89
  220. code_puppy/tui/models/enums.py +0 -24
  221. code_puppy/tui/screens/__init__.py +0 -17
  222. code_puppy/tui/screens/autosave_picker.py +0 -175
  223. code_puppy/tui/screens/help.py +0 -130
  224. code_puppy/tui/screens/mcp_install_wizard.py +0 -803
  225. code_puppy/tui/screens/settings.py +0 -306
  226. code_puppy/tui/screens/tools.py +0 -74
  227. code_puppy/tui_state.py +0 -55
  228. code_puppy-0.0.214.data/data/code_puppy/models.json +0 -112
  229. code_puppy-0.0.214.dist-info/RECORD +0 -131
  230. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/entry_points.txt +0 -0
  231. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/licenses/LICENSE +0 -0
@@ -4,9 +4,21 @@ import asyncio
4
4
  import json
5
5
  import math
6
6
  import signal
7
+ import threading
7
8
  import uuid
8
9
  from abc import ABC, abstractmethod
9
- from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
10
+ from typing import (
11
+ Any,
12
+ Callable,
13
+ Dict,
14
+ List,
15
+ Optional,
16
+ Sequence,
17
+ Set,
18
+ Tuple,
19
+ Type,
20
+ Union,
21
+ )
10
22
 
11
23
  import mcp
12
24
  import pydantic
@@ -26,13 +38,15 @@ from pydantic_ai.messages import (
26
38
  ModelMessage,
27
39
  ModelRequest,
28
40
  TextPart,
41
+ ThinkingPart,
29
42
  ToolCallPart,
30
43
  ToolCallPartDelta,
31
44
  ToolReturn,
32
45
  ToolReturnPart,
33
46
  )
34
- from pydantic_ai.models.openai import OpenAIChatModelSettings
35
- from pydantic_ai.settings import ModelSettings
47
+ from rich.text import Text
48
+
49
+ from code_puppy.agents.event_stream_handler import event_stream_handler
36
50
 
37
51
  # Consolidated relative imports
38
52
  from code_puppy.config import (
@@ -41,26 +55,31 @@ from code_puppy.config import (
41
55
  get_compaction_threshold,
42
56
  get_global_model_name,
43
57
  get_message_limit,
44
- get_openai_reasoning_effort,
45
58
  get_protected_token_count,
46
59
  get_use_dbos,
47
60
  get_value,
48
- load_mcp_server_configs,
49
61
  )
50
- from code_puppy.mcp_ import ServerConfig, get_mcp_manager
62
+ from code_puppy.error_logging import log_error
63
+ from code_puppy.keymap import cancel_agent_uses_signal, get_cancel_agent_char_code
64
+ from code_puppy.mcp_ import get_mcp_manager
51
65
  from code_puppy.messaging import (
52
66
  emit_error,
53
67
  emit_info,
54
- emit_system_message,
55
68
  emit_warning,
56
69
  )
57
70
  from code_puppy.messaging.spinner import (
58
71
  SpinnerBase,
59
72
  update_spinner_context,
60
73
  )
61
- from code_puppy.model_factory import ModelFactory
74
+ from code_puppy.model_factory import ModelFactory, make_model_settings
62
75
  from code_puppy.summarization_agent import run_summarization_sync
63
- from code_puppy.tools.common import console
76
+ from code_puppy.tools.agent_tools import _active_subagent_tasks
77
+ from code_puppy.tools.command_runner import (
78
+ is_awaiting_user_input,
79
+ )
80
+
81
+ # Global flag to track delayed compaction requests
82
+ _delayed_compaction_requested = False
64
83
 
65
84
  _reload_count = 0
66
85
 
@@ -78,6 +97,9 @@ class BaseAgent(ABC):
78
97
  # Puppy rules loaded lazily
79
98
  self._puppy_rules: Optional[str] = None
80
99
  self.cur_model: pydantic_ai.models.Model
100
+ # Cache for MCP tool definitions (for token estimation)
101
+ # This is populated after the first successful run when MCP tools are retrieved
102
+ self._mcp_tool_definitions_cache: List[Dict[str, Any]] = []
81
103
 
82
104
  @property
83
105
  @abstractmethod
@@ -334,6 +356,173 @@ class BaseAgent(ABC):
334
356
 
335
357
  return max(1, total_tokens)
336
358
 
359
+ def estimate_context_overhead_tokens(self) -> int:
360
+ """
361
+ Estimate the token overhead from system prompt and tool definitions.
362
+
363
+ This accounts for tokens that are always present in the context:
364
+ - System prompt (for non-Claude-Code models)
365
+ - Tool definitions (name, description, parameter schema)
366
+ - MCP tool definitions
367
+
368
+ Note: For Claude Code models, the system prompt is prepended to the first
369
+ user message, so it's already counted in the message history tokens.
370
+ We only count the short fixed instructions for Claude Code models.
371
+ """
372
+ total_tokens = 0
373
+
374
+ # 1. Estimate tokens for system prompt / instructions
375
+ # For Claude Code models, the full system prompt is prepended to the first
376
+ # user message (already in message history), so we only count the short
377
+ # fixed instructions. For other models, count the full system prompt.
378
+ try:
379
+ from code_puppy.model_utils import (
380
+ get_antigravity_instructions,
381
+ get_chatgpt_codex_instructions,
382
+ get_claude_code_instructions,
383
+ is_antigravity_model,
384
+ is_chatgpt_codex_model,
385
+ is_claude_code_model,
386
+ )
387
+
388
+ model_name = (
389
+ self.get_model_name() if hasattr(self, "get_model_name") else ""
390
+ )
391
+ if is_claude_code_model(model_name):
392
+ # For Claude Code models, only count the short fixed instructions
393
+ # The full system prompt is already in the message history
394
+ instructions = get_claude_code_instructions()
395
+ total_tokens += self.estimate_token_count(instructions)
396
+ elif is_chatgpt_codex_model(model_name):
397
+ # For ChatGPT Codex models, only count the short fixed instructions
398
+ # The full system prompt is already in the message history
399
+ instructions = get_chatgpt_codex_instructions()
400
+ total_tokens += self.estimate_token_count(instructions)
401
+ elif is_antigravity_model(model_name):
402
+ # For Antigravity models, only count the short fixed instructions
403
+ # The full system prompt is already in the message history
404
+ instructions = get_antigravity_instructions()
405
+ total_tokens += self.estimate_token_count(instructions)
406
+ else:
407
+ # For other models, count the full system prompt
408
+ system_prompt = self.get_system_prompt()
409
+ if system_prompt:
410
+ total_tokens += self.estimate_token_count(system_prompt)
411
+ except Exception:
412
+ pass # If we can't get system prompt, skip it
413
+
414
+ # 2. Estimate tokens for pydantic_agent tool definitions
415
+ pydantic_agent = getattr(self, "pydantic_agent", None)
416
+ if pydantic_agent:
417
+ tools = getattr(pydantic_agent, "_tools", None)
418
+ if tools and isinstance(tools, dict):
419
+ for tool_name, tool_func in tools.items():
420
+ try:
421
+ # Estimate tokens from tool name
422
+ total_tokens += self.estimate_token_count(tool_name)
423
+
424
+ # Estimate tokens from tool description
425
+ description = getattr(tool_func, "__doc__", None) or ""
426
+ if description:
427
+ total_tokens += self.estimate_token_count(description)
428
+
429
+ # Estimate tokens from parameter schema
430
+ # Tools may have a schema attribute or we can try to get it from annotations
431
+ schema = getattr(tool_func, "schema", None)
432
+ if schema:
433
+ schema_str = (
434
+ json.dumps(schema)
435
+ if isinstance(schema, dict)
436
+ else str(schema)
437
+ )
438
+ total_tokens += self.estimate_token_count(schema_str)
439
+ else:
440
+ # Try to get schema from function annotations
441
+ annotations = getattr(tool_func, "__annotations__", None)
442
+ if annotations:
443
+ total_tokens += self.estimate_token_count(
444
+ str(annotations)
445
+ )
446
+ except Exception:
447
+ continue # Skip tools we can't process
448
+
449
+ # 3. Estimate tokens for MCP tool definitions from cache
450
+ # MCP tools are fetched asynchronously, so we use a cache that's populated
451
+ # after the first successful run. See _update_mcp_tool_cache() method.
452
+ mcp_tool_cache = getattr(self, "_mcp_tool_definitions_cache", [])
453
+ if mcp_tool_cache:
454
+ for tool_def in mcp_tool_cache:
455
+ try:
456
+ # Estimate tokens from tool name
457
+ tool_name = tool_def.get("name", "")
458
+ if tool_name:
459
+ total_tokens += self.estimate_token_count(tool_name)
460
+
461
+ # Estimate tokens from tool description
462
+ description = tool_def.get("description", "")
463
+ if description:
464
+ total_tokens += self.estimate_token_count(description)
465
+
466
+ # Estimate tokens from parameter schema (inputSchema)
467
+ input_schema = tool_def.get("inputSchema")
468
+ if input_schema:
469
+ schema_str = (
470
+ json.dumps(input_schema)
471
+ if isinstance(input_schema, dict)
472
+ else str(input_schema)
473
+ )
474
+ total_tokens += self.estimate_token_count(schema_str)
475
+ except Exception:
476
+ continue # Skip tools we can't process
477
+
478
+ return total_tokens
479
+
480
+ async def _update_mcp_tool_cache(self) -> None:
481
+ """
482
+ Update the MCP tool definitions cache by fetching tools from running MCP servers.
483
+
484
+ This should be called after a successful run to populate the cache for
485
+ accurate token estimation in subsequent runs.
486
+ """
487
+ mcp_servers = getattr(self, "_mcp_servers", None)
488
+ if not mcp_servers:
489
+ return
490
+
491
+ tool_definitions = []
492
+ for mcp_server in mcp_servers:
493
+ try:
494
+ # Check if the server has list_tools method (pydantic-ai MCP servers)
495
+ if hasattr(mcp_server, "list_tools"):
496
+ # list_tools() returns list[mcp_types.Tool]
497
+ tools = await mcp_server.list_tools()
498
+ for tool in tools:
499
+ tool_def = {
500
+ "name": getattr(tool, "name", ""),
501
+ "description": getattr(tool, "description", ""),
502
+ "inputSchema": getattr(tool, "inputSchema", {}),
503
+ }
504
+ tool_definitions.append(tool_def)
505
+ except Exception:
506
+ # Server might not be running or accessible, skip it
507
+ continue
508
+
509
+ self._mcp_tool_definitions_cache = tool_definitions
510
+
511
+ def update_mcp_tool_cache_sync(self) -> None:
512
+ """
513
+ Synchronously clear the MCP tool cache.
514
+
515
+ This clears the cache so that token counts will be recalculated on the next
516
+ agent run. Call this after starting/stopping MCP servers.
517
+
518
+ Note: We don't try to fetch tools synchronously because MCP servers require
519
+ async context management that doesn't work well from sync code. The cache
520
+ will be repopulated on the next successful agent run.
521
+ """
522
+ # Simply clear the cache - it will be repopulated on the next agent run
523
+ # This is safer than trying to call async methods from sync context
524
+ self._mcp_tool_definitions_cache = []
525
+
337
526
  def _is_tool_call_part(self, part: Any) -> bool:
338
527
  if isinstance(part, (ToolCallPart, ToolCallPartDelta)):
339
528
  return True
@@ -516,6 +705,98 @@ class BaseAgent(ABC):
516
705
  # Be safe; don't blow up status/compaction if model lookup fails
517
706
  return 128000
518
707
 
708
+ def has_pending_tool_calls(self, messages: List[ModelMessage]) -> bool:
709
+ """
710
+ Check if there are any pending tool calls in the message history.
711
+
712
+ A pending tool call is one that has a ToolCallPart without a corresponding
713
+ ToolReturnPart. This indicates the model is still waiting for tool execution.
714
+
715
+ Returns:
716
+ True if there are pending tool calls, False otherwise
717
+ """
718
+ if not messages:
719
+ return False
720
+
721
+ tool_call_ids: Set[str] = set()
722
+ tool_return_ids: Set[str] = set()
723
+
724
+ # Collect all tool call and return IDs
725
+ for msg in messages:
726
+ for part in getattr(msg, "parts", []) or []:
727
+ tool_call_id = getattr(part, "tool_call_id", None)
728
+ if not tool_call_id:
729
+ continue
730
+
731
+ if part.part_kind == "tool-call":
732
+ tool_call_ids.add(tool_call_id)
733
+ elif part.part_kind == "tool-return":
734
+ tool_return_ids.add(tool_call_id)
735
+
736
+ # Pending tool calls are those without corresponding returns
737
+ pending_calls = tool_call_ids - tool_return_ids
738
+ return len(pending_calls) > 0
739
+
740
+ def request_delayed_compaction(self) -> None:
741
+ """
742
+ Request that compaction be attempted after the current tool calls complete.
743
+
744
+ This sets a global flag that will be checked during the next message
745
+ processing cycle to trigger compaction when it's safe to do so.
746
+ """
747
+ global _delayed_compaction_requested
748
+ _delayed_compaction_requested = True
749
+ emit_info(
750
+ "🔄 Delayed compaction requested - will attempt after tool calls complete",
751
+ message_group="token_context_status",
752
+ )
753
+
754
+ def should_attempt_delayed_compaction(self) -> bool:
755
+ """
756
+ Check if delayed compaction was requested and it's now safe to proceed.
757
+
758
+ Returns:
759
+ True if delayed compaction was requested and no tool calls are pending
760
+ """
761
+ global _delayed_compaction_requested
762
+ if not _delayed_compaction_requested:
763
+ return False
764
+
765
+ # Check if it's now safe to compact
766
+ messages = self.get_message_history()
767
+ if not self.has_pending_tool_calls(messages):
768
+ _delayed_compaction_requested = False # Reset the flag
769
+ return True
770
+
771
+ return False
772
+
773
+ def get_pending_tool_call_count(self, messages: List[ModelMessage]) -> int:
774
+ """
775
+ Get the count of pending tool calls for debugging purposes.
776
+
777
+ Returns:
778
+ Number of tool calls waiting for execution
779
+ """
780
+ if not messages:
781
+ return 0
782
+
783
+ tool_call_ids: Set[str] = set()
784
+ tool_return_ids: Set[str] = set()
785
+
786
+ for msg in messages:
787
+ for part in getattr(msg, "parts", []) or []:
788
+ tool_call_id = getattr(part, "tool_call_id", None)
789
+ if not tool_call_id:
790
+ continue
791
+
792
+ if part.part_kind == "tool-call":
793
+ tool_call_ids.add(tool_call_id)
794
+ elif part.part_kind == "tool-return":
795
+ tool_return_ids.add(tool_call_id)
796
+
797
+ pending_calls = tool_call_ids - tool_return_ids
798
+ return len(pending_calls)
799
+
519
800
  def prune_interrupted_tool_calls(
520
801
  self, messages: List[ModelMessage]
521
802
  ) -> List[ModelMessage]:
@@ -570,35 +851,16 @@ class BaseAgent(ABC):
570
851
  # First, prune any interrupted/mismatched tool-call conversations
571
852
  model_max = self.get_model_context_length()
572
853
 
573
- total_current_tokens = sum(
574
- self.estimate_tokens_for_message(msg) for msg in messages
575
- )
854
+ message_tokens = sum(self.estimate_tokens_for_message(msg) for msg in messages)
855
+ context_overhead = self.estimate_context_overhead_tokens()
856
+ total_current_tokens = message_tokens + context_overhead
576
857
  proportion_used = total_current_tokens / model_max
577
858
 
578
- # Check if we're in TUI mode and can update the status bar
579
- from code_puppy.tui_state import get_tui_app_instance, is_tui_mode
580
-
581
859
  context_summary = SpinnerBase.format_context_info(
582
860
  total_current_tokens, model_max, proportion_used
583
861
  )
584
862
  update_spinner_context(context_summary)
585
863
 
586
- if is_tui_mode():
587
- tui_app = get_tui_app_instance()
588
- if tui_app:
589
- try:
590
- # Update the status bar instead of emitting a chat message
591
- status_bar = tui_app.query_one("StatusBar")
592
- status_bar.update_token_info(
593
- total_current_tokens, model_max, proportion_used
594
- )
595
- except Exception as e:
596
- emit_error(e)
597
- else:
598
- emit_info(
599
- f"Final token count after processing: {total_current_tokens}",
600
- message_group="token_context_status",
601
- )
602
864
  # Get the configured compaction threshold
603
865
  compaction_threshold = get_compaction_threshold()
604
866
 
@@ -606,6 +868,21 @@ class BaseAgent(ABC):
606
868
  compaction_strategy = get_compaction_strategy()
607
869
 
608
870
  if proportion_used > compaction_threshold:
871
+ # RACE CONDITION PROTECTION: Check for pending tool calls before summarization
872
+ if compaction_strategy == "summarization" and self.has_pending_tool_calls(
873
+ messages
874
+ ):
875
+ pending_count = self.get_pending_tool_call_count(messages)
876
+ emit_warning(
877
+ f"⚠️ Summarization deferred: {pending_count} pending tool call(s) detected. "
878
+ "Waiting for tool execution to complete before compaction.",
879
+ message_group="token_context_status",
880
+ )
881
+ # Request delayed compaction for when tool calls complete
882
+ self.request_delayed_compaction()
883
+ # Return original messages without compaction
884
+ return messages, []
885
+
609
886
  if compaction_strategy == "truncation":
610
887
  # Use truncation instead of summarization
611
888
  protected_tokens = get_protected_token_count()
@@ -614,7 +891,7 @@ class BaseAgent(ABC):
614
891
  )
615
892
  summarized_messages = [] # No summarization in truncation mode
616
893
  else:
617
- # Default to summarization
894
+ # Default to summarization (safe to proceed - no pending tool calls)
618
895
  result_messages, summarized_messages = self.summarize_messages(
619
896
  self.filter_huge_messages(messages)
620
897
  )
@@ -622,31 +899,12 @@ class BaseAgent(ABC):
622
899
  final_token_count = sum(
623
900
  self.estimate_tokens_for_message(msg) for msg in result_messages
624
901
  )
625
- # Update status bar with final token count if in TUI mode
902
+ # Update spinner with final token count
626
903
  final_summary = SpinnerBase.format_context_info(
627
904
  final_token_count, model_max, final_token_count / model_max
628
905
  )
629
906
  update_spinner_context(final_summary)
630
907
 
631
- if is_tui_mode():
632
- tui_app = get_tui_app_instance()
633
- if tui_app:
634
- try:
635
- status_bar = tui_app.query_one("StatusBar")
636
- status_bar.update_token_info(
637
- final_token_count, model_max, final_token_count / model_max
638
- )
639
- except Exception:
640
- emit_info(
641
- f"Final token count after processing: {final_token_count}",
642
- message_group="token_context_status",
643
- )
644
- else:
645
- emit_info(
646
- f"Final token count after processing: {final_token_count}",
647
- message_group="token_context_status",
648
- )
649
-
650
908
  self.set_message_history(result_messages)
651
909
  for m in summarized_messages:
652
910
  self.add_compacted_message_hash(self.hash_message(m))
@@ -659,6 +917,11 @@ class BaseAgent(ABC):
659
917
  """
660
918
  Truncate message history to manage token usage.
661
919
 
920
+ Protects:
921
+ - The first message (system prompt) - always kept
922
+ - The second message if it contains a ThinkingPart (extended thinking context)
923
+ - The most recent messages up to protected_tokens
924
+
662
925
  Args:
663
926
  messages: List of messages to truncate
664
927
  protected_tokens: Number of tokens to protect
@@ -670,12 +933,30 @@ class BaseAgent(ABC):
670
933
 
671
934
  emit_info("Truncating message history to manage token usage")
672
935
  result = [messages[0]] # Always keep the first message (system prompt)
936
+
937
+ # Check if second message exists and contains a ThinkingPart
938
+ # If so, protect it (extended thinking context shouldn't be lost)
939
+ skip_second = False
940
+ if len(messages) > 1:
941
+ second_msg = messages[1]
942
+ has_thinking = any(
943
+ isinstance(part, ThinkingPart) for part in second_msg.parts
944
+ )
945
+ if has_thinking:
946
+ result.append(second_msg)
947
+ skip_second = True
948
+
673
949
  num_tokens = 0
674
950
  stack = queue.LifoQueue()
675
951
 
952
+ # Determine which messages to consider for the recent-tokens window
953
+ # Skip first message (already added), and skip second if it has thinking
954
+ start_idx = 2 if skip_second else 1
955
+ messages_to_scan = messages[start_idx:]
956
+
676
957
  # Put messages in reverse order (most recent first) into the stack
677
958
  # but break when we exceed protected_tokens
678
- for idx, msg in enumerate(reversed(messages[1:])): # Skip the first message
959
+ for msg in reversed(messages_to_scan):
679
960
  num_tokens += self.estimate_tokens_for_message(msg)
680
961
  if num_tokens > protected_tokens:
681
962
  break
@@ -708,71 +989,71 @@ class BaseAgent(ABC):
708
989
 
709
990
  # ===== Agent wiring formerly in code_puppy/agent.py =====
710
991
  def load_puppy_rules(self) -> Optional[str]:
711
- """Load AGENT(S).md if present and cache the contents."""
992
+ """Load AGENT(S).md from both global config and project directory.
993
+
994
+ Checks for AGENTS.md/AGENT.md/agents.md/agent.md in this order:
995
+ 1. Global config directory (~/.code_puppy/ or XDG config)
996
+ 2. Current working directory (project-specific)
997
+
998
+ If both exist, they are combined with global rules first, then project rules.
999
+ This allows project-specific rules to override or extend global rules.
1000
+ """
712
1001
  if self._puppy_rules is not None:
713
1002
  return self._puppy_rules
714
1003
  from pathlib import Path
715
1004
 
716
1005
  possible_paths = ["AGENTS.md", "AGENT.md", "agents.md", "agent.md"]
1006
+
1007
+ # Load global rules from CONFIG_DIR
1008
+ global_rules = None
1009
+ from code_puppy.config import CONFIG_DIR
1010
+
717
1011
  for path_str in possible_paths:
718
- puppy_rules_path = Path(path_str)
719
- if puppy_rules_path.exists():
720
- with open(puppy_rules_path, "r") as f:
721
- self._puppy_rules = f.read()
722
- break
1012
+ global_path = Path(CONFIG_DIR) / path_str
1013
+ if global_path.exists():
1014
+ global_rules = global_path.read_text(encoding="utf-8-sig")
1015
+ break
1016
+
1017
+ # Load project-local rules from current working directory
1018
+ project_rules = None
1019
+ for path_str in possible_paths:
1020
+ project_path = Path(path_str)
1021
+ if project_path.exists():
1022
+ project_rules = project_path.read_text(encoding="utf-8-sig")
1023
+ break
1024
+
1025
+ # Combine global and project rules
1026
+ # Global rules come first, project rules second (allowing project to override)
1027
+ rules = [r for r in [global_rules, project_rules] if r]
1028
+ self._puppy_rules = "\n\n".join(rules) if rules else None
723
1029
  return self._puppy_rules
724
1030
 
725
1031
  def load_mcp_servers(self, extra_headers: Optional[Dict[str, str]] = None):
726
- """Load MCP servers through the manager and return pydantic-ai compatible servers."""
1032
+ """Load MCP servers through the manager and return pydantic-ai compatible servers.
1033
+
1034
+ Note: The manager automatically syncs from mcp_servers.json during initialization,
1035
+ so we don't need to sync here. Use reload_mcp_servers() to force a re-sync.
1036
+ """
727
1037
 
728
1038
  mcp_disabled = get_value("disable_mcp_servers")
729
1039
  if mcp_disabled and str(mcp_disabled).lower() in ("1", "true", "yes", "on"):
730
- emit_system_message("[dim]MCP servers disabled via config[/dim]")
731
1040
  return []
732
1041
 
733
1042
  manager = get_mcp_manager()
734
- configs = load_mcp_server_configs()
735
- if not configs:
736
- existing_servers = manager.list_servers()
737
- if not existing_servers:
738
- emit_system_message("[dim]No MCP servers configured[/dim]")
739
- return []
740
- else:
741
- for name, conf in configs.items():
742
- try:
743
- server_config = ServerConfig(
744
- id=conf.get("id", f"{name}_{hash(name)}"),
745
- name=name,
746
- type=conf.get("type", "sse"),
747
- enabled=conf.get("enabled", True),
748
- config=conf,
749
- )
750
- existing = manager.get_server_by_name(name)
751
- if not existing:
752
- manager.register_server(server_config)
753
- emit_system_message(f"[dim]Registered MCP server: {name}[/dim]")
754
- else:
755
- if existing.config != server_config.config:
756
- manager.update_server(existing.id, server_config)
757
- emit_system_message(
758
- f"[dim]Updated MCP server: {name}[/dim]"
759
- )
760
- except Exception as e:
761
- emit_error(f"Failed to register MCP server '{name}': {str(e)}")
762
- continue
763
-
764
- servers = manager.get_servers_for_agent()
765
- if servers:
766
- emit_system_message(
767
- f"[green]Successfully loaded {len(servers)} MCP server(s)[/green]"
768
- )
769
- # Stay silent when there are no servers configured/available
770
- return servers
1043
+ return manager.get_servers_for_agent()
771
1044
 
772
1045
  def reload_mcp_servers(self):
773
- """Reload MCP servers and return updated servers."""
774
- self.load_mcp_servers()
1046
+ """Reload MCP servers and return updated servers.
1047
+
1048
+ Forces a re-sync from mcp_servers.json to pick up any configuration changes.
1049
+ """
1050
+ # Clear the MCP tool cache when servers are reloaded
1051
+ self._mcp_tool_definitions_cache = []
1052
+
1053
+ # Force re-sync from mcp_servers.json
775
1054
  manager = get_mcp_manager()
1055
+ manager.sync_from_config()
1056
+
776
1057
  return manager.get_servers_for_agent()
777
1058
 
778
1059
  def _load_model_with_fallback(
@@ -794,8 +1075,8 @@ class BaseAgent(ABC):
794
1075
  )
795
1076
  emit_warning(
796
1077
  (
797
- f"[yellow]Model '{requested_model_name}' not found. "
798
- f"Available models: {available_str}[/yellow]"
1078
+ f"Model '{requested_model_name}' not found. "
1079
+ f"Available models: {available_str}"
799
1080
  ),
800
1081
  message_group=message_group,
801
1082
  )
@@ -815,7 +1096,7 @@ class BaseAgent(ABC):
815
1096
  try:
816
1097
  model = ModelFactory.get_model(candidate, models_config)
817
1098
  emit_info(
818
- f"[bold cyan]Using fallback model: {candidate}[/bold cyan]",
1099
+ f"Using fallback model: {candidate}",
819
1100
  message_group=message_group,
820
1101
  )
821
1102
  return model, candidate
@@ -827,7 +1108,7 @@ class BaseAgent(ABC):
827
1108
  "a valid model with `config set`."
828
1109
  )
829
1110
  emit_error(
830
- f"[bold red]{friendly_message}[/bold red]",
1111
+ friendly_message,
831
1112
  message_group=message_group,
832
1113
  )
833
1114
  raise ValueError(friendly_message) from exc
@@ -841,10 +1122,6 @@ class BaseAgent(ABC):
841
1122
 
842
1123
  model_name = self.get_model_name()
843
1124
 
844
- emit_info(
845
- f"[bold cyan]Loading Model: {model_name}[/bold cyan]",
846
- message_group=message_group,
847
- )
848
1125
  models_config = ModelFactory.load_config()
849
1126
  model, resolved_model_name = self._load_model_with_fallback(
850
1127
  model_name,
@@ -852,34 +1129,22 @@ class BaseAgent(ABC):
852
1129
  message_group,
853
1130
  )
854
1131
 
855
- emit_info(
856
- f"[bold magenta]Loading Agent: {self.name}[/bold magenta]",
857
- message_group=message_group,
858
- )
859
-
860
1132
  instructions = self.get_system_prompt()
861
1133
  puppy_rules = self.load_puppy_rules()
862
1134
  if puppy_rules:
863
1135
  instructions += f"\n{puppy_rules}"
864
1136
 
865
1137
  mcp_servers = self.load_mcp_servers()
866
- emit_info(f"[dim]DEBUG: Loaded {len(mcp_servers)} MCP servers during reload[/dim]")
867
1138
 
868
- model_settings_dict: Dict[str, Any] = {"seed": 42}
869
- output_tokens = max(
870
- 2048,
871
- min(int(0.05 * self.get_model_context_length()) - 1024, 16384),
872
- )
873
- console.print(f"Max output tokens per message: {output_tokens}")
874
- model_settings_dict["max_tokens"] = output_tokens
1139
+ model_settings = make_model_settings(resolved_model_name)
875
1140
 
876
- model_settings: ModelSettings = ModelSettings(**model_settings_dict)
877
- if "gpt-5" in model_name:
878
- model_settings_dict["openai_reasoning_effort"] = (
879
- get_openai_reasoning_effort()
880
- )
881
- model_settings_dict["extra_body"] = {"verbosity": "low"}
882
- model_settings = OpenAIChatModelSettings(**model_settings_dict)
1141
+ # Handle claude-code models: swap instructions (prompt prepending happens in run_with_mcp)
1142
+ from code_puppy.model_utils import prepare_prompt_for_model
1143
+
1144
+ prepared = prepare_prompt_for_model(
1145
+ model_name, instructions, "", prepend_system_to_user=False
1146
+ )
1147
+ instructions = prepared.instructions
883
1148
 
884
1149
  self.cur_model = model
885
1150
  p_agent = PydanticAgent(
@@ -894,36 +1159,37 @@ class BaseAgent(ABC):
894
1159
 
895
1160
  agent_tools = self.get_available_tools()
896
1161
  register_tools_for_agent(p_agent, agent_tools)
897
-
1162
+
898
1163
  # Get existing tool names to filter out conflicts with MCP tools
899
1164
  existing_tool_names = set()
900
1165
  try:
901
1166
  # Get tools from the agent to find existing tool names
902
- tools = getattr(p_agent, '_tools', None)
1167
+ tools = getattr(p_agent, "_tools", None)
903
1168
  if tools:
904
1169
  existing_tool_names = set(tools.keys())
905
1170
  except Exception:
906
1171
  # If we can't get tool names, proceed without filtering
907
1172
  pass
908
-
1173
+
909
1174
  # Filter MCP server toolsets to remove conflicting tools
910
1175
  filtered_mcp_servers = []
911
1176
  if mcp_servers and existing_tool_names:
912
1177
  for mcp_server in mcp_servers:
913
1178
  try:
914
1179
  # Get tools from this MCP server
915
- server_tools = getattr(mcp_server, 'tools', None)
1180
+ server_tools = getattr(mcp_server, "tools", None)
916
1181
  if server_tools:
917
1182
  # Filter out conflicting tools
918
1183
  filtered_tools = {}
919
1184
  for tool_name, tool_func in server_tools.items():
920
1185
  if tool_name not in existing_tool_names:
921
1186
  filtered_tools[tool_name] = tool_func
922
-
1187
+
923
1188
  # Create a filtered version of the MCP server if we have tools
924
1189
  if filtered_tools:
925
1190
  # Create a new toolset with filtered tools
926
1191
  from pydantic_ai.tools import ToolSet
1192
+
927
1193
  filtered_toolset = ToolSet()
928
1194
  for tool_name, tool_func in filtered_tools.items():
929
1195
  filtered_toolset._tools[tool_name] = tool_func
@@ -934,15 +1200,19 @@ class BaseAgent(ABC):
934
1200
  else:
935
1201
  # Can't get tools from this server, include as-is
936
1202
  filtered_mcp_servers.append(mcp_server)
937
- except Exception as e:
1203
+ except Exception:
938
1204
  # Error processing this server, include as-is to be safe
939
1205
  filtered_mcp_servers.append(mcp_server)
940
1206
  else:
941
1207
  # No filtering needed or possible
942
1208
  filtered_mcp_servers = mcp_servers if mcp_servers else []
943
-
1209
+
944
1210
  if len(filtered_mcp_servers) != len(mcp_servers):
945
- emit_info(f"[dim]Filtered {len(mcp_servers) - len(filtered_mcp_servers)} conflicting MCP tools[/dim]")
1211
+ emit_info(
1212
+ Text.from_markup(
1213
+ f"[dim]Filtered {len(mcp_servers) - len(filtered_mcp_servers)} conflicting MCP tools[/dim]"
1214
+ )
1215
+ )
946
1216
 
947
1217
  self._last_model_name = resolved_model_name
948
1218
  # expose for run_with_mcp
@@ -962,16 +1232,21 @@ class BaseAgent(ABC):
962
1232
  history_processors=[self.message_history_accumulator],
963
1233
  model_settings=model_settings,
964
1234
  )
965
-
1235
+
966
1236
  # Register regular tools (non-MCP) on the new agent
967
1237
  agent_tools = self.get_available_tools()
968
1238
  register_tools_for_agent(agent_without_mcp, agent_tools)
969
-
970
- # Wrap with DBOS
971
- dbos_agent = DBOSAgent(agent_without_mcp, name=f"{self.name}-{_reload_count}")
1239
+
1240
+ # Wrap with DBOS - pass event_stream_handler at construction time
1241
+ # so DBOSModel gets the handler for streaming output
1242
+ dbos_agent = DBOSAgent(
1243
+ agent_without_mcp,
1244
+ name=f"{self.name}-{_reload_count}",
1245
+ event_stream_handler=event_stream_handler,
1246
+ )
972
1247
  self.pydantic_agent = dbos_agent
973
1248
  self._code_generation_agent = dbos_agent
974
-
1249
+
975
1250
  # Store filtered MCP servers separately for runtime use
976
1251
  self._mcp_servers = filtered_mcp_servers
977
1252
  else:
@@ -989,13 +1264,84 @@ class BaseAgent(ABC):
989
1264
  # Register regular tools on the agent
990
1265
  agent_tools = self.get_available_tools()
991
1266
  register_tools_for_agent(p_agent, agent_tools)
992
-
1267
+
993
1268
  self.pydantic_agent = p_agent
994
1269
  self._code_generation_agent = p_agent
995
1270
  self._mcp_servers = filtered_mcp_servers
996
1271
  self._mcp_servers = mcp_servers
997
1272
  return self._code_generation_agent
998
1273
 
1274
+ def _create_agent_with_output_type(self, output_type: Type[Any]) -> PydanticAgent:
1275
+ """Create a temporary agent configured with a custom output_type.
1276
+
1277
+ This is used when structured output is requested via run_with_mcp.
1278
+ The agent is created fresh with the same configuration as the main agent
1279
+ but with the specified output_type instead of str.
1280
+
1281
+ Args:
1282
+ output_type: The Pydantic model or type for structured output.
1283
+
1284
+ Returns:
1285
+ A configured PydanticAgent (or DBOSAgent wrapper) with the custom output_type.
1286
+ """
1287
+ from code_puppy.model_utils import prepare_prompt_for_model
1288
+ from code_puppy.tools import register_tools_for_agent
1289
+
1290
+ model_name = self.get_model_name()
1291
+ models_config = ModelFactory.load_config()
1292
+ model, resolved_model_name = self._load_model_with_fallback(
1293
+ model_name, models_config, str(uuid.uuid4())
1294
+ )
1295
+
1296
+ instructions = self.get_system_prompt()
1297
+ puppy_rules = self.load_puppy_rules()
1298
+ if puppy_rules:
1299
+ instructions += f"\n{puppy_rules}"
1300
+
1301
+ mcp_servers = getattr(self, "_mcp_servers", []) or []
1302
+ model_settings = make_model_settings(resolved_model_name)
1303
+
1304
+ prepared = prepare_prompt_for_model(
1305
+ model_name, instructions, "", prepend_system_to_user=False
1306
+ )
1307
+ instructions = prepared.instructions
1308
+
1309
+ global _reload_count
1310
+ _reload_count += 1
1311
+
1312
+ if get_use_dbos():
1313
+ temp_agent = PydanticAgent(
1314
+ model=model,
1315
+ instructions=instructions,
1316
+ output_type=output_type,
1317
+ retries=3,
1318
+ toolsets=[],
1319
+ history_processors=[self.message_history_accumulator],
1320
+ model_settings=model_settings,
1321
+ )
1322
+ agent_tools = self.get_available_tools()
1323
+ register_tools_for_agent(temp_agent, agent_tools)
1324
+ # Pass event_stream_handler at construction time for streaming output
1325
+ dbos_agent = DBOSAgent(
1326
+ temp_agent,
1327
+ name=f"{self.name}-structured-{_reload_count}",
1328
+ event_stream_handler=event_stream_handler,
1329
+ )
1330
+ return dbos_agent
1331
+ else:
1332
+ temp_agent = PydanticAgent(
1333
+ model=model,
1334
+ instructions=instructions,
1335
+ output_type=output_type,
1336
+ retries=3,
1337
+ toolsets=mcp_servers,
1338
+ history_processors=[self.message_history_accumulator],
1339
+ model_settings=model_settings,
1340
+ )
1341
+ agent_tools = self.get_available_tools()
1342
+ register_tools_for_agent(temp_agent, agent_tools)
1343
+ return temp_agent
1344
+
999
1345
  # It's okay to decorate it with DBOS.step even if not using DBOS; the decorator is a no-op in that case.
1000
1346
  @DBOS.step()
1001
1347
  def message_history_accumulator(self, ctx: RunContext, messages: List[Any]):
@@ -1011,14 +1357,171 @@ class BaseAgent(ABC):
1011
1357
  # Apply message history trimming using the main processor
1012
1358
  # This ensures we maintain global state while still managing context limits
1013
1359
  self.message_history_processor(ctx, _message_history)
1360
+ result_messages_filtered_empty_thinking = []
1361
+ for msg in self.get_message_history():
1362
+ if len(msg.parts) == 1:
1363
+ if isinstance(msg.parts[0], ThinkingPart):
1364
+ if msg.parts[0].content == "":
1365
+ continue
1366
+ result_messages_filtered_empty_thinking.append(msg)
1367
+ self.set_message_history(result_messages_filtered_empty_thinking)
1014
1368
  return self.get_message_history()
1015
1369
 
1370
+ def _spawn_ctrl_x_key_listener(
1371
+ self,
1372
+ stop_event: threading.Event,
1373
+ on_escape: Callable[[], None],
1374
+ on_cancel_agent: Optional[Callable[[], None]] = None,
1375
+ ) -> Optional[threading.Thread]:
1376
+ """Start a keyboard listener thread for CLI sessions.
1377
+
1378
+ Listens for Ctrl+X (shell command cancel) and optionally the configured
1379
+ cancel_agent_key (when not using SIGINT/Ctrl+C).
1380
+
1381
+ Args:
1382
+ stop_event: Event to signal the listener to stop.
1383
+ on_escape: Callback for Ctrl+X (shell command cancel).
1384
+ on_cancel_agent: Optional callback for cancel_agent_key (only used
1385
+ when cancel_agent_uses_signal() returns False).
1386
+ """
1387
+ try:
1388
+ import sys
1389
+ except ImportError:
1390
+ return None
1391
+
1392
+ stdin = getattr(sys, "stdin", None)
1393
+ if stdin is None or not hasattr(stdin, "isatty"):
1394
+ return None
1395
+ try:
1396
+ if not stdin.isatty():
1397
+ return None
1398
+ except Exception:
1399
+ return None
1400
+
1401
+ def listener() -> None:
1402
+ try:
1403
+ if sys.platform.startswith("win"):
1404
+ self._listen_for_ctrl_x_windows(
1405
+ stop_event, on_escape, on_cancel_agent
1406
+ )
1407
+ else:
1408
+ self._listen_for_ctrl_x_posix(
1409
+ stop_event, on_escape, on_cancel_agent
1410
+ )
1411
+ except Exception:
1412
+ emit_warning(
1413
+ "Key listener stopped unexpectedly; press Ctrl+C to cancel."
1414
+ )
1415
+
1416
+ thread = threading.Thread(
1417
+ target=listener, name="code-puppy-key-listener", daemon=True
1418
+ )
1419
+ thread.start()
1420
+ return thread
1421
+
1422
+ def _listen_for_ctrl_x_windows(
1423
+ self,
1424
+ stop_event: threading.Event,
1425
+ on_escape: Callable[[], None],
1426
+ on_cancel_agent: Optional[Callable[[], None]] = None,
1427
+ ) -> None:
1428
+ import msvcrt
1429
+ import time
1430
+
1431
+ # Get the cancel agent char code if we're using keyboard-based cancel
1432
+ cancel_agent_char: Optional[str] = None
1433
+ if on_cancel_agent is not None and not cancel_agent_uses_signal():
1434
+ cancel_agent_char = get_cancel_agent_char_code()
1435
+
1436
+ while not stop_event.is_set():
1437
+ try:
1438
+ if msvcrt.kbhit():
1439
+ key = msvcrt.getwch()
1440
+ if key == "\x18": # Ctrl+X
1441
+ try:
1442
+ on_escape()
1443
+ except Exception:
1444
+ emit_warning(
1445
+ "Ctrl+X handler raised unexpectedly; Ctrl+C still works."
1446
+ )
1447
+ elif (
1448
+ cancel_agent_char
1449
+ and on_cancel_agent
1450
+ and key == cancel_agent_char
1451
+ ):
1452
+ try:
1453
+ on_cancel_agent()
1454
+ except Exception:
1455
+ emit_warning("Cancel agent handler raised unexpectedly.")
1456
+ except Exception:
1457
+ emit_warning(
1458
+ "Windows key listener error; Ctrl+C is still available for cancel."
1459
+ )
1460
+ return
1461
+ time.sleep(0.05)
1462
+
1463
+ def _listen_for_ctrl_x_posix(
1464
+ self,
1465
+ stop_event: threading.Event,
1466
+ on_escape: Callable[[], None],
1467
+ on_cancel_agent: Optional[Callable[[], None]] = None,
1468
+ ) -> None:
1469
+ import select
1470
+ import sys
1471
+ import termios
1472
+ import tty
1473
+
1474
+ # Get the cancel agent char code if we're using keyboard-based cancel
1475
+ cancel_agent_char: Optional[str] = None
1476
+ if on_cancel_agent is not None and not cancel_agent_uses_signal():
1477
+ cancel_agent_char = get_cancel_agent_char_code()
1478
+
1479
+ stdin = sys.stdin
1480
+ try:
1481
+ fd = stdin.fileno()
1482
+ except (AttributeError, ValueError, OSError):
1483
+ return
1484
+ try:
1485
+ original_attrs = termios.tcgetattr(fd)
1486
+ except Exception:
1487
+ return
1488
+
1489
+ try:
1490
+ tty.setcbreak(fd)
1491
+ while not stop_event.is_set():
1492
+ try:
1493
+ read_ready, _, _ = select.select([stdin], [], [], 0.05)
1494
+ except Exception:
1495
+ break
1496
+ if not read_ready:
1497
+ continue
1498
+ data = stdin.read(1)
1499
+ if not data:
1500
+ break
1501
+ if data == "\x18": # Ctrl+X
1502
+ try:
1503
+ on_escape()
1504
+ except Exception:
1505
+ emit_warning(
1506
+ "Ctrl+X handler raised unexpectedly; Ctrl+C still works."
1507
+ )
1508
+ elif (
1509
+ cancel_agent_char and on_cancel_agent and data == cancel_agent_char
1510
+ ):
1511
+ try:
1512
+ on_cancel_agent()
1513
+ except Exception:
1514
+ emit_warning("Cancel agent handler raised unexpectedly.")
1515
+ finally:
1516
+ termios.tcsetattr(fd, termios.TCSADRAIN, original_attrs)
1517
+
1016
1518
  async def run_with_mcp(
1017
1519
  self,
1018
1520
  prompt: str,
1019
1521
  *,
1020
1522
  attachments: Optional[Sequence[BinaryContent]] = None,
1021
1523
  link_attachments: Optional[Sequence[Union[ImageUrl, DocumentUrl]]] = None,
1524
+ output_type: Optional[Type[Any]] = None,
1022
1525
  **kwargs,
1023
1526
  ) -> Any:
1024
1527
  """Run the agent with MCP servers, attachments, and full cancellation support.
@@ -1027,20 +1530,60 @@ class BaseAgent(ABC):
1027
1530
  prompt: Primary user prompt text (may be empty when attachments present).
1028
1531
  attachments: Local binary payloads (e.g., dragged images) to include.
1029
1532
  link_attachments: Remote assets (image/document URLs) to include.
1533
+ output_type: Optional Pydantic model or type for structured output.
1534
+ When provided, creates a temporary agent configured to return
1535
+ this type instead of the default string output.
1030
1536
  **kwargs: Additional arguments forwarded to `pydantic_ai.Agent.run`.
1031
1537
 
1032
1538
  Returns:
1033
- The agent's response.
1539
+ The agent's response (typed according to output_type if specified).
1034
1540
 
1035
1541
  Raises:
1036
1542
  asyncio.CancelledError: When execution is cancelled by user.
1037
1543
  """
1544
+ # Sanitize prompt to remove invalid Unicode surrogates that can cause
1545
+ # encoding errors (especially common on Windows with copy-paste)
1546
+ if prompt:
1547
+ try:
1548
+ prompt = prompt.encode("utf-8", errors="surrogatepass").decode(
1549
+ "utf-8", errors="replace"
1550
+ )
1551
+ except (UnicodeEncodeError, UnicodeDecodeError):
1552
+ # Fallback: filter out surrogate characters directly
1553
+ prompt = "".join(
1554
+ char if ord(char) < 0xD800 or ord(char) > 0xDFFF else "\ufffd"
1555
+ for char in prompt
1556
+ )
1557
+
1038
1558
  group_id = str(uuid.uuid4())
1039
1559
  # Avoid double-loading: reuse existing agent if already built
1040
1560
  pydantic_agent = (
1041
1561
  self._code_generation_agent or self.reload_code_generation_agent()
1042
1562
  )
1043
1563
 
1564
+ # If a custom output_type is specified, create a temporary agent with that type
1565
+ if output_type is not None:
1566
+ pydantic_agent = self._create_agent_with_output_type(output_type)
1567
+
1568
+ # Handle claude-code, chatgpt-codex, and antigravity models: prepend system prompt to first user message
1569
+ from code_puppy.model_utils import (
1570
+ is_antigravity_model,
1571
+ is_chatgpt_codex_model,
1572
+ is_claude_code_model,
1573
+ )
1574
+
1575
+ if (
1576
+ is_claude_code_model(self.get_model_name())
1577
+ or is_chatgpt_codex_model(self.get_model_name())
1578
+ or is_antigravity_model(self.get_model_name())
1579
+ ):
1580
+ if len(self.get_message_history()) == 0:
1581
+ system_prompt = self.get_system_prompt()
1582
+ puppy_rules = self.load_puppy_rules()
1583
+ if puppy_rules:
1584
+ system_prompt += f"\n{puppy_rules}"
1585
+ prompt = system_prompt + "\n\n" + prompt
1586
+
1044
1587
  # Build combined prompt payload when attachments are provided.
1045
1588
  attachment_parts: List[Any] = []
1046
1589
  if attachments:
@@ -1061,15 +1604,35 @@ class BaseAgent(ABC):
1061
1604
  self.set_message_history(
1062
1605
  self.prune_interrupted_tool_calls(self.get_message_history())
1063
1606
  )
1607
+
1608
+ # DELAYED COMPACTION: Check if we should attempt delayed compaction
1609
+ if self.should_attempt_delayed_compaction():
1610
+ emit_info(
1611
+ "🔄 Attempting delayed compaction (tool calls completed)",
1612
+ message_group="token_context_status",
1613
+ )
1614
+ current_messages = self.get_message_history()
1615
+ compacted_messages, _ = self.compact_messages(current_messages)
1616
+ if compacted_messages != current_messages:
1617
+ self.set_message_history(compacted_messages)
1618
+ emit_info(
1619
+ "✅ Delayed compaction completed successfully",
1620
+ message_group="token_context_status",
1621
+ )
1622
+
1064
1623
  usage_limits = UsageLimits(request_limit=get_message_limit())
1065
-
1624
+
1066
1625
  # Handle MCP servers - add them temporarily when using DBOS
1067
- if get_use_dbos() and hasattr(self, '_mcp_servers') and self._mcp_servers:
1626
+ if (
1627
+ get_use_dbos()
1628
+ and hasattr(self, "_mcp_servers")
1629
+ and self._mcp_servers
1630
+ ):
1068
1631
  # Temporarily add MCP servers to the DBOS agent using internal _toolsets
1069
1632
  original_toolsets = pydantic_agent._toolsets
1070
1633
  pydantic_agent._toolsets = original_toolsets + self._mcp_servers
1071
1634
  pydantic_agent._toolsets = original_toolsets + self._mcp_servers
1072
-
1635
+
1073
1636
  try:
1074
1637
  # Set the workflow ID for DBOS context so DBOS and Code Puppy ID match
1075
1638
  with SetWorkflowID(group_id):
@@ -1077,29 +1640,33 @@ class BaseAgent(ABC):
1077
1640
  prompt_payload,
1078
1641
  message_history=self.get_message_history(),
1079
1642
  usage_limits=usage_limits,
1643
+ event_stream_handler=event_stream_handler,
1080
1644
  **kwargs,
1081
1645
  )
1646
+ return result_
1082
1647
  finally:
1083
1648
  # Always restore original toolsets
1084
1649
  pydantic_agent._toolsets = original_toolsets
1085
1650
  elif get_use_dbos():
1086
- # DBOS without MCP servers
1087
1651
  with SetWorkflowID(group_id):
1088
1652
  result_ = await pydantic_agent.run(
1089
1653
  prompt_payload,
1090
1654
  message_history=self.get_message_history(),
1091
1655
  usage_limits=usage_limits,
1656
+ event_stream_handler=event_stream_handler,
1092
1657
  **kwargs,
1093
1658
  )
1659
+ return result_
1094
1660
  else:
1095
1661
  # Non-DBOS path (MCP servers are already included)
1096
1662
  result_ = await pydantic_agent.run(
1097
1663
  prompt_payload,
1098
1664
  message_history=self.get_message_history(),
1099
1665
  usage_limits=usage_limits,
1666
+ event_stream_handler=event_stream_handler,
1100
1667
  **kwargs,
1101
1668
  )
1102
- return result_
1669
+ return result_
1103
1670
  except* UsageLimitExceeded as ule:
1104
1671
  emit_info(f"Usage limit exceeded: {str(ule)}", group_id=group_id)
1105
1672
  emit_info(
@@ -1134,6 +1701,12 @@ class BaseAgent(ABC):
1134
1701
  remaining_exceptions.append(exc)
1135
1702
  emit_info(f"Unexpected error: {str(exc)}", group_id=group_id)
1136
1703
  emit_info(f"{str(exc.args)}", group_id=group_id)
1704
+ # Log to file for debugging
1705
+ log_error(
1706
+ exc,
1707
+ context=f"Agent run (group_id={group_id})",
1708
+ include_traceback=True,
1709
+ )
1137
1710
 
1138
1711
  collect_non_cancelled_exceptions(other_error)
1139
1712
 
@@ -1156,35 +1729,87 @@ class BaseAgent(ABC):
1156
1729
  # Create the task FIRST
1157
1730
  agent_task = asyncio.create_task(run_agent_task())
1158
1731
 
1159
- # Import shell process killer
1160
- from code_puppy.tools.command_runner import kill_all_running_shell_processes
1732
+ # Import shell process status helper
1161
1733
 
1162
- # Ensure the interrupt handler only acts once per task
1163
- def keyboard_interrupt_handler(sig, frame):
1164
- """Signal handler for Ctrl+C - replicating exact original logic"""
1734
+ loop = asyncio.get_running_loop()
1165
1735
 
1166
- # First, nuke any running shell processes triggered by tools
1167
- try:
1168
- killed = kill_all_running_shell_processes()
1169
- if killed:
1170
- emit_info(f"Cancelled {killed} running shell process(es).")
1171
- else:
1172
- # Only cancel the agent task if no shell processes were killed
1173
- if not agent_task.done():
1174
- agent_task.cancel()
1175
- except Exception as e:
1176
- emit_info(f"Shell kill error: {e}")
1177
- if not agent_task.done():
1178
- agent_task.cancel()
1179
- # Don't call the original handler
1180
- # This prevents the application from exiting
1736
+ def schedule_agent_cancel() -> None:
1737
+ from code_puppy.tools.command_runner import _RUNNING_PROCESSES
1738
+
1739
+ if len(_RUNNING_PROCESSES):
1740
+ emit_warning(
1741
+ "Refusing to cancel Agent while a shell command is currently running - press Ctrl+X to cancel the shell command."
1742
+ )
1743
+ return
1744
+ if agent_task.done():
1745
+ return
1746
+
1747
+ # Cancel all active subagent tasks
1748
+ if _active_subagent_tasks:
1749
+ emit_warning(
1750
+ f"Cancelling {len(_active_subagent_tasks)} active subagent task(s)..."
1751
+ )
1752
+ for task in list(
1753
+ _active_subagent_tasks
1754
+ ): # Create a copy since we'll be modifying the set
1755
+ if not task.done():
1756
+ loop.call_soon_threadsafe(task.cancel)
1757
+ loop.call_soon_threadsafe(agent_task.cancel)
1758
+
1759
+ def keyboard_interrupt_handler(_sig, _frame):
1760
+ # If we're awaiting user input (e.g., file permission prompt),
1761
+ # don't cancel the agent - let the input() call handle the interrupt naturally
1762
+ if is_awaiting_user_input():
1763
+ # Don't do anything here - let the input() call raise KeyboardInterrupt naturally
1764
+ return
1765
+
1766
+ schedule_agent_cancel()
1767
+
1768
+ def graceful_sigint_handler(_sig, _frame):
1769
+ # When using keyboard-based cancel, SIGINT should be a no-op
1770
+ # (just show a hint to user about the configured cancel key)
1771
+ # Also reset terminal to prevent bricking on Windows+uvx
1772
+ from code_puppy.keymap import get_cancel_agent_display_name
1773
+ from code_puppy.terminal_utils import reset_windows_terminal_full
1774
+
1775
+ # Reset terminal state first to prevent bricking
1776
+ reset_windows_terminal_full()
1777
+
1778
+ cancel_key = get_cancel_agent_display_name()
1779
+ emit_info(f"Use {cancel_key} to cancel the agent task.")
1780
+
1781
+ original_handler = None
1782
+ key_listener_stop_event = None
1783
+ _key_listener_thread = None
1181
1784
 
1182
1785
  try:
1183
- # Save original handler and set our custom one AFTER task is created
1184
- original_handler = signal.signal(signal.SIGINT, keyboard_interrupt_handler)
1786
+ if cancel_agent_uses_signal():
1787
+ # Use SIGINT-based cancellation (default Ctrl+C behavior)
1788
+ original_handler = signal.signal(
1789
+ signal.SIGINT, keyboard_interrupt_handler
1790
+ )
1791
+ else:
1792
+ # Use keyboard listener for agent cancellation
1793
+ # Set a graceful SIGINT handler that shows a hint
1794
+ original_handler = signal.signal(signal.SIGINT, graceful_sigint_handler)
1795
+ # Spawn keyboard listener with the cancel agent callback
1796
+ key_listener_stop_event = threading.Event()
1797
+ _key_listener_thread = self._spawn_ctrl_x_key_listener(
1798
+ key_listener_stop_event,
1799
+ on_escape=lambda: None, # Ctrl+X handled by command_runner
1800
+ on_cancel_agent=schedule_agent_cancel,
1801
+ )
1185
1802
 
1186
1803
  # Wait for the task to complete or be cancelled
1187
1804
  result = await agent_task
1805
+
1806
+ # Update MCP tool cache after successful run for accurate token estimation
1807
+ if hasattr(self, "_mcp_servers") and self._mcp_servers:
1808
+ try:
1809
+ await self._update_mcp_tool_cache()
1810
+ except Exception:
1811
+ pass # Don't fail the run if cache update fails
1812
+
1188
1813
  return result
1189
1814
  except asyncio.CancelledError:
1190
1815
  agent_task.cancel()
@@ -1192,11 +1817,12 @@ class BaseAgent(ABC):
1192
1817
  # Handle direct keyboard interrupt during await
1193
1818
  if not agent_task.done():
1194
1819
  agent_task.cancel()
1195
- try:
1196
- await agent_task
1197
- except asyncio.CancelledError:
1198
- pass
1199
1820
  finally:
1821
+ # Stop keyboard listener if it was started
1822
+ if key_listener_stop_event is not None:
1823
+ key_listener_stop_event.set()
1200
1824
  # Restore original signal handler
1201
- if original_handler:
1202
- signal.signal(signal.SIGINT, original_handler)
1825
+ if (
1826
+ original_handler is not None
1827
+ ): # Explicit None check - SIG_DFL can be 0/falsy!
1828
+ signal.signal(signal.SIGINT, original_handler)