code-puppy 0.0.214__py3-none-any.whl → 0.0.366__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. code_puppy/__init__.py +7 -1
  2. code_puppy/agents/__init__.py +2 -0
  3. code_puppy/agents/agent_c_reviewer.py +59 -6
  4. code_puppy/agents/agent_code_puppy.py +7 -1
  5. code_puppy/agents/agent_code_reviewer.py +12 -2
  6. code_puppy/agents/agent_cpp_reviewer.py +73 -6
  7. code_puppy/agents/agent_creator_agent.py +45 -4
  8. code_puppy/agents/agent_golang_reviewer.py +92 -3
  9. code_puppy/agents/agent_javascript_reviewer.py +101 -8
  10. code_puppy/agents/agent_manager.py +81 -4
  11. code_puppy/agents/agent_pack_leader.py +383 -0
  12. code_puppy/agents/agent_planning.py +163 -0
  13. code_puppy/agents/agent_python_programmer.py +165 -0
  14. code_puppy/agents/agent_python_reviewer.py +28 -6
  15. code_puppy/agents/agent_qa_expert.py +98 -6
  16. code_puppy/agents/agent_qa_kitten.py +12 -7
  17. code_puppy/agents/agent_security_auditor.py +113 -3
  18. code_puppy/agents/agent_terminal_qa.py +323 -0
  19. code_puppy/agents/agent_typescript_reviewer.py +106 -7
  20. code_puppy/agents/base_agent.py +802 -176
  21. code_puppy/agents/event_stream_handler.py +350 -0
  22. code_puppy/agents/pack/__init__.py +34 -0
  23. code_puppy/agents/pack/bloodhound.py +304 -0
  24. code_puppy/agents/pack/husky.py +321 -0
  25. code_puppy/agents/pack/retriever.py +393 -0
  26. code_puppy/agents/pack/shepherd.py +348 -0
  27. code_puppy/agents/pack/terrier.py +287 -0
  28. code_puppy/agents/pack/watchdog.py +367 -0
  29. code_puppy/agents/prompt_reviewer.py +145 -0
  30. code_puppy/agents/subagent_stream_handler.py +276 -0
  31. code_puppy/api/__init__.py +13 -0
  32. code_puppy/api/app.py +169 -0
  33. code_puppy/api/main.py +21 -0
  34. code_puppy/api/pty_manager.py +446 -0
  35. code_puppy/api/routers/__init__.py +12 -0
  36. code_puppy/api/routers/agents.py +36 -0
  37. code_puppy/api/routers/commands.py +217 -0
  38. code_puppy/api/routers/config.py +74 -0
  39. code_puppy/api/routers/sessions.py +232 -0
  40. code_puppy/api/templates/terminal.html +361 -0
  41. code_puppy/api/websocket.py +154 -0
  42. code_puppy/callbacks.py +142 -4
  43. code_puppy/chatgpt_codex_client.py +283 -0
  44. code_puppy/claude_cache_client.py +586 -0
  45. code_puppy/cli_runner.py +916 -0
  46. code_puppy/command_line/add_model_menu.py +1079 -0
  47. code_puppy/command_line/agent_menu.py +395 -0
  48. code_puppy/command_line/attachments.py +10 -5
  49. code_puppy/command_line/autosave_menu.py +605 -0
  50. code_puppy/command_line/clipboard.py +527 -0
  51. code_puppy/command_line/colors_menu.py +520 -0
  52. code_puppy/command_line/command_handler.py +176 -738
  53. code_puppy/command_line/command_registry.py +150 -0
  54. code_puppy/command_line/config_commands.py +715 -0
  55. code_puppy/command_line/core_commands.py +792 -0
  56. code_puppy/command_line/diff_menu.py +863 -0
  57. code_puppy/command_line/load_context_completion.py +15 -22
  58. code_puppy/command_line/mcp/base.py +0 -3
  59. code_puppy/command_line/mcp/catalog_server_installer.py +175 -0
  60. code_puppy/command_line/mcp/custom_server_form.py +688 -0
  61. code_puppy/command_line/mcp/custom_server_installer.py +195 -0
  62. code_puppy/command_line/mcp/edit_command.py +148 -0
  63. code_puppy/command_line/mcp/handler.py +9 -4
  64. code_puppy/command_line/mcp/help_command.py +6 -5
  65. code_puppy/command_line/mcp/install_command.py +15 -26
  66. code_puppy/command_line/mcp/install_menu.py +685 -0
  67. code_puppy/command_line/mcp/list_command.py +2 -2
  68. code_puppy/command_line/mcp/logs_command.py +174 -65
  69. code_puppy/command_line/mcp/remove_command.py +2 -2
  70. code_puppy/command_line/mcp/restart_command.py +12 -4
  71. code_puppy/command_line/mcp/search_command.py +16 -10
  72. code_puppy/command_line/mcp/start_all_command.py +18 -6
  73. code_puppy/command_line/mcp/start_command.py +47 -25
  74. code_puppy/command_line/mcp/status_command.py +4 -5
  75. code_puppy/command_line/mcp/stop_all_command.py +7 -1
  76. code_puppy/command_line/mcp/stop_command.py +8 -4
  77. code_puppy/command_line/mcp/test_command.py +2 -2
  78. code_puppy/command_line/mcp/wizard_utils.py +20 -16
  79. code_puppy/command_line/mcp_completion.py +174 -0
  80. code_puppy/command_line/model_picker_completion.py +75 -25
  81. code_puppy/command_line/model_settings_menu.py +884 -0
  82. code_puppy/command_line/motd.py +14 -8
  83. code_puppy/command_line/onboarding_slides.py +179 -0
  84. code_puppy/command_line/onboarding_wizard.py +340 -0
  85. code_puppy/command_line/pin_command_completion.py +329 -0
  86. code_puppy/command_line/prompt_toolkit_completion.py +463 -63
  87. code_puppy/command_line/session_commands.py +296 -0
  88. code_puppy/command_line/utils.py +54 -0
  89. code_puppy/config.py +898 -112
  90. code_puppy/error_logging.py +118 -0
  91. code_puppy/gemini_code_assist.py +385 -0
  92. code_puppy/gemini_model.py +602 -0
  93. code_puppy/http_utils.py +210 -148
  94. code_puppy/keymap.py +128 -0
  95. code_puppy/main.py +5 -698
  96. code_puppy/mcp_/__init__.py +17 -0
  97. code_puppy/mcp_/async_lifecycle.py +35 -4
  98. code_puppy/mcp_/blocking_startup.py +70 -43
  99. code_puppy/mcp_/captured_stdio_server.py +2 -2
  100. code_puppy/mcp_/config_wizard.py +4 -4
  101. code_puppy/mcp_/dashboard.py +15 -6
  102. code_puppy/mcp_/managed_server.py +65 -38
  103. code_puppy/mcp_/manager.py +146 -52
  104. code_puppy/mcp_/mcp_logs.py +224 -0
  105. code_puppy/mcp_/registry.py +6 -6
  106. code_puppy/mcp_/server_registry_catalog.py +24 -5
  107. code_puppy/messaging/__init__.py +199 -2
  108. code_puppy/messaging/bus.py +610 -0
  109. code_puppy/messaging/commands.py +167 -0
  110. code_puppy/messaging/markdown_patches.py +57 -0
  111. code_puppy/messaging/message_queue.py +17 -48
  112. code_puppy/messaging/messages.py +500 -0
  113. code_puppy/messaging/queue_console.py +1 -24
  114. code_puppy/messaging/renderers.py +43 -146
  115. code_puppy/messaging/rich_renderer.py +1027 -0
  116. code_puppy/messaging/spinner/__init__.py +21 -5
  117. code_puppy/messaging/spinner/console_spinner.py +86 -51
  118. code_puppy/messaging/subagent_console.py +461 -0
  119. code_puppy/model_factory.py +634 -83
  120. code_puppy/model_utils.py +167 -0
  121. code_puppy/models.json +66 -68
  122. code_puppy/models_dev_api.json +1 -0
  123. code_puppy/models_dev_parser.py +592 -0
  124. code_puppy/plugins/__init__.py +164 -10
  125. code_puppy/plugins/antigravity_oauth/__init__.py +10 -0
  126. code_puppy/plugins/antigravity_oauth/accounts.py +406 -0
  127. code_puppy/plugins/antigravity_oauth/antigravity_model.py +704 -0
  128. code_puppy/plugins/antigravity_oauth/config.py +42 -0
  129. code_puppy/plugins/antigravity_oauth/constants.py +136 -0
  130. code_puppy/plugins/antigravity_oauth/oauth.py +478 -0
  131. code_puppy/plugins/antigravity_oauth/register_callbacks.py +406 -0
  132. code_puppy/plugins/antigravity_oauth/storage.py +271 -0
  133. code_puppy/plugins/antigravity_oauth/test_plugin.py +319 -0
  134. code_puppy/plugins/antigravity_oauth/token.py +167 -0
  135. code_puppy/plugins/antigravity_oauth/transport.py +767 -0
  136. code_puppy/plugins/antigravity_oauth/utils.py +169 -0
  137. code_puppy/plugins/chatgpt_oauth/__init__.py +8 -0
  138. code_puppy/plugins/chatgpt_oauth/config.py +52 -0
  139. code_puppy/plugins/chatgpt_oauth/oauth_flow.py +328 -0
  140. code_puppy/plugins/chatgpt_oauth/register_callbacks.py +94 -0
  141. code_puppy/plugins/chatgpt_oauth/test_plugin.py +293 -0
  142. code_puppy/plugins/chatgpt_oauth/utils.py +489 -0
  143. code_puppy/plugins/claude_code_oauth/README.md +167 -0
  144. code_puppy/plugins/claude_code_oauth/SETUP.md +93 -0
  145. code_puppy/plugins/claude_code_oauth/__init__.py +6 -0
  146. code_puppy/plugins/claude_code_oauth/config.py +50 -0
  147. code_puppy/plugins/claude_code_oauth/register_callbacks.py +308 -0
  148. code_puppy/plugins/claude_code_oauth/test_plugin.py +283 -0
  149. code_puppy/plugins/claude_code_oauth/utils.py +518 -0
  150. code_puppy/plugins/customizable_commands/__init__.py +0 -0
  151. code_puppy/plugins/customizable_commands/register_callbacks.py +169 -0
  152. code_puppy/plugins/example_custom_command/README.md +280 -0
  153. code_puppy/plugins/example_custom_command/register_callbacks.py +2 -2
  154. code_puppy/plugins/file_permission_handler/__init__.py +4 -0
  155. code_puppy/plugins/file_permission_handler/register_callbacks.py +523 -0
  156. code_puppy/plugins/frontend_emitter/__init__.py +25 -0
  157. code_puppy/plugins/frontend_emitter/emitter.py +121 -0
  158. code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
  159. code_puppy/plugins/oauth_puppy_html.py +228 -0
  160. code_puppy/plugins/shell_safety/__init__.py +6 -0
  161. code_puppy/plugins/shell_safety/agent_shell_safety.py +69 -0
  162. code_puppy/plugins/shell_safety/command_cache.py +156 -0
  163. code_puppy/plugins/shell_safety/register_callbacks.py +202 -0
  164. code_puppy/prompts/antigravity_system_prompt.md +1 -0
  165. code_puppy/prompts/codex_system_prompt.md +310 -0
  166. code_puppy/pydantic_patches.py +131 -0
  167. code_puppy/reopenable_async_client.py +8 -8
  168. code_puppy/round_robin_model.py +9 -12
  169. code_puppy/session_storage.py +2 -1
  170. code_puppy/status_display.py +21 -4
  171. code_puppy/summarization_agent.py +41 -13
  172. code_puppy/terminal_utils.py +418 -0
  173. code_puppy/tools/__init__.py +37 -1
  174. code_puppy/tools/agent_tools.py +536 -52
  175. code_puppy/tools/browser/__init__.py +37 -0
  176. code_puppy/tools/browser/browser_control.py +19 -23
  177. code_puppy/tools/browser/browser_interactions.py +41 -48
  178. code_puppy/tools/browser/browser_locators.py +36 -38
  179. code_puppy/tools/browser/browser_manager.py +316 -0
  180. code_puppy/tools/browser/browser_navigation.py +16 -16
  181. code_puppy/tools/browser/browser_screenshot.py +79 -143
  182. code_puppy/tools/browser/browser_scripts.py +32 -42
  183. code_puppy/tools/browser/browser_workflows.py +44 -27
  184. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  185. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  186. code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
  187. code_puppy/tools/browser/terminal_tools.py +525 -0
  188. code_puppy/tools/command_runner.py +930 -147
  189. code_puppy/tools/common.py +1113 -5
  190. code_puppy/tools/display.py +84 -0
  191. code_puppy/tools/file_modifications.py +288 -89
  192. code_puppy/tools/file_operations.py +226 -154
  193. code_puppy/tools/subagent_context.py +158 -0
  194. code_puppy/uvx_detection.py +242 -0
  195. code_puppy/version_checker.py +30 -11
  196. code_puppy-0.0.366.data/data/code_puppy/models.json +110 -0
  197. code_puppy-0.0.366.data/data/code_puppy/models_dev_api.json +1 -0
  198. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/METADATA +149 -75
  199. code_puppy-0.0.366.dist-info/RECORD +217 -0
  200. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/WHEEL +1 -1
  201. code_puppy/command_line/mcp/add_command.py +0 -183
  202. code_puppy/messaging/spinner/textual_spinner.py +0 -106
  203. code_puppy/tools/browser/camoufox_manager.py +0 -216
  204. code_puppy/tools/browser/vqa_agent.py +0 -70
  205. code_puppy/tui/__init__.py +0 -10
  206. code_puppy/tui/app.py +0 -1105
  207. code_puppy/tui/components/__init__.py +0 -21
  208. code_puppy/tui/components/chat_view.py +0 -551
  209. code_puppy/tui/components/command_history_modal.py +0 -218
  210. code_puppy/tui/components/copy_button.py +0 -139
  211. code_puppy/tui/components/custom_widgets.py +0 -63
  212. code_puppy/tui/components/human_input_modal.py +0 -175
  213. code_puppy/tui/components/input_area.py +0 -167
  214. code_puppy/tui/components/sidebar.py +0 -309
  215. code_puppy/tui/components/status_bar.py +0 -185
  216. code_puppy/tui/messages.py +0 -27
  217. code_puppy/tui/models/__init__.py +0 -8
  218. code_puppy/tui/models/chat_message.py +0 -25
  219. code_puppy/tui/models/command_history.py +0 -89
  220. code_puppy/tui/models/enums.py +0 -24
  221. code_puppy/tui/screens/__init__.py +0 -17
  222. code_puppy/tui/screens/autosave_picker.py +0 -175
  223. code_puppy/tui/screens/help.py +0 -130
  224. code_puppy/tui/screens/mcp_install_wizard.py +0 -803
  225. code_puppy/tui/screens/settings.py +0 -306
  226. code_puppy/tui/screens/tools.py +0 -74
  227. code_puppy/tui_state.py +0 -55
  228. code_puppy-0.0.214.data/data/code_puppy/models.json +0 -112
  229. code_puppy-0.0.214.dist-info/RECORD +0 -131
  230. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/entry_points.txt +0 -0
  231. {code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,521 @@
1
+ """Terminal command execution tools for browser-based terminal automation.
2
+
3
+ This module provides tools for:
4
+ - Running commands in the terminal browser
5
+ - Sending special keys (Ctrl+C, Tab, arrows, etc.)
6
+ - Waiting for terminal output patterns
7
+
8
+ These tools use the ChromiumTerminalManager to manage the browser instance
9
+ and interact with the xterm.js terminal in the Code Puppy API.
10
+ """
11
+
12
+ import asyncio
13
+ import logging
14
+ import re
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from pydantic_ai import RunContext
18
+
19
+ from rich.text import Text
20
+
21
+ from code_puppy.messaging import emit_error, emit_info, emit_success
22
+ from code_puppy.tools.browser import format_terminal_banner
23
+ from code_puppy.tools.common import generate_group_id
24
+
25
+ from .terminal_screenshot_tools import terminal_read_output, terminal_screenshot
26
+ from .terminal_tools import get_session_manager
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Timeout defaults (seconds)
31
+ DEFAULT_COMMAND_TIMEOUT = 30.0
32
+ DEFAULT_OUTPUT_TIMEOUT = 30.0
33
+
34
+ # Time to wait for prompt to reappear after command (ms)
35
+ PROMPT_WAIT_MS = 500
36
+
37
+ # Modifier key mapping for Playwright
38
+ MODIFIER_MAP = {
39
+ "control": "Control",
40
+ "ctrl": "Control",
41
+ "shift": "Shift",
42
+ "alt": "Alt",
43
+ "meta": "Meta",
44
+ "command": "Meta",
45
+ "cmd": "Meta",
46
+ }
47
+
48
+ # JavaScript to robustly focus the xterm.js terminal
49
+ # xterm.js uses a hidden textarea to capture keyboard input
50
+ FOCUS_TERMINAL_JS = """
51
+ () => {
52
+ // Method 1: Find and focus the xterm helper textarea directly
53
+ // This is the element that actually receives keyboard input in xterm.js
54
+ const textareas = document.querySelectorAll('textarea.xterm-helper-textarea');
55
+ for (const textarea of textareas) {
56
+ textarea.focus();
57
+ // Also click on the parent to ensure xterm knows it's active
58
+ const xterm = textarea.closest('.xterm');
59
+ if (xterm) {
60
+ xterm.click();
61
+ }
62
+ return { success: true, method: 'textarea_focus', found: textareas.length };
63
+ }
64
+
65
+ // Method 2: Click on the xterm viewport/screen to trigger focus
66
+ const viewport = document.querySelector('.xterm-viewport') ||
67
+ document.querySelector('.xterm-screen');
68
+ if (viewport) {
69
+ viewport.click();
70
+ // Try textarea again after click
71
+ const ta = document.querySelector('textarea.xterm-helper-textarea');
72
+ if (ta) ta.focus();
73
+ return { success: true, method: 'viewport_click' };
74
+ }
75
+
76
+ // Method 3: Find any xterm element and click it
77
+ const xterm = document.querySelector('.xterm');
78
+ if (xterm) {
79
+ xterm.click();
80
+ const ta = xterm.querySelector('textarea');
81
+ if (ta) ta.focus();
82
+ return { success: true, method: 'xterm_click' };
83
+ }
84
+
85
+ // Method 4: Try the terminal container
86
+ const container = document.getElementById('terminal');
87
+ if (container) {
88
+ container.click();
89
+ return { success: true, method: 'container_click' };
90
+ }
91
+
92
+ return { success: false, error: 'Could not find terminal element' };
93
+ }
94
+ """
95
+
96
+
97
+ async def _focus_terminal(page) -> Dict[str, Any]:
98
+ """Focus the xterm.js terminal to receive keyboard input.
99
+
100
+ xterm.js uses a hidden textarea element to capture keyboard events.
101
+ We need to ensure this textarea is focused for keys to work.
102
+ """
103
+ try:
104
+ # First, try the JavaScript approach which is most reliable
105
+ result = await page.evaluate(FOCUS_TERMINAL_JS)
106
+
107
+ if result.get("success"):
108
+ # Give the browser a moment to process the focus
109
+ await asyncio.sleep(0.15)
110
+ return result
111
+
112
+ # Fallback: Try clicking on known selectors
113
+ selectors_to_try = [
114
+ "textarea.xterm-helper-textarea",
115
+ ".xterm-viewport",
116
+ ".xterm-screen",
117
+ ".xterm",
118
+ "#terminal",
119
+ ]
120
+
121
+ for selector in selectors_to_try:
122
+ element = await page.query_selector(selector)
123
+ if element:
124
+ await element.click()
125
+ await asyncio.sleep(0.1)
126
+ # If we clicked something other than textarea, try to focus textarea
127
+ if "textarea" not in selector:
128
+ textarea = await page.query_selector(
129
+ "textarea.xterm-helper-textarea"
130
+ )
131
+ if textarea:
132
+ await textarea.focus()
133
+ return {"success": True, "method": f"fallback_{selector}"}
134
+
135
+ return {"success": False, "error": "Could not find terminal element to focus"}
136
+
137
+ except Exception as e:
138
+ logger.warning(f"Error focusing terminal: {e}")
139
+ return {"success": False, "error": str(e)}
140
+
141
+
142
+ def _normalize_modifier(modifier: str) -> str:
143
+ """Normalize modifier name to Playwright format."""
144
+ return MODIFIER_MAP.get(modifier.lower(), modifier)
145
+
146
+
147
+ async def run_terminal_command(
148
+ command: str,
149
+ wait_for_prompt: bool = True,
150
+ timeout: float = DEFAULT_COMMAND_TIMEOUT,
151
+ capture_screenshot: bool = False,
152
+ ) -> Dict[str, Any]:
153
+ """Execute a command in the terminal browser.
154
+
155
+ Types the command into the xterm.js terminal and presses Enter to execute.
156
+ Optionally captures a screenshot that multimodal models can see directly.
157
+
158
+ Args:
159
+ command: The command string to execute.
160
+ wait_for_prompt: If True, wait briefly for command to process.
161
+ Defaults to True.
162
+ timeout: Maximum wait time in seconds. Defaults to 30.0.
163
+ capture_screenshot: If True, take a screenshot after execution.
164
+ The screenshot is returned as base64 data. Defaults to False.
165
+
166
+ Returns:
167
+ A dictionary containing:
168
+ - success (bool): True if command was sent.
169
+ - command (str): The command that was executed.
170
+ - base64_image (str, optional): Screenshot as base64 PNG (if captured).
171
+ - screenshot_path (str, optional): Path to saved screenshot.
172
+ - error (str, optional): Error message if unsuccessful.
173
+ """
174
+ group_id = generate_group_id("terminal_run_command", command[:50])
175
+ banner = format_terminal_banner("TERMINAL RUN COMMAND 💻")
176
+ emit_info(
177
+ Text.from_markup(f"{banner} [dim]{command}[/dim]"), message_group=group_id
178
+ )
179
+
180
+ try:
181
+ manager = get_session_manager()
182
+ page = await manager.get_current_page()
183
+
184
+ if not page:
185
+ error_msg = "No active terminal page. Open terminal first."
186
+ emit_error(error_msg, message_group=group_id)
187
+ return {"success": False, "error": error_msg, "command": command}
188
+
189
+ # Focus the terminal before typing
190
+ focus_result = await _focus_terminal(page)
191
+ if not focus_result.get("success"):
192
+ emit_info(
193
+ f"Warning: Could not focus terminal: {focus_result.get('error')}",
194
+ message_group=group_id,
195
+ )
196
+
197
+ # Type and execute command
198
+ await page.keyboard.type(command)
199
+ await page.keyboard.press("Enter")
200
+ emit_info(f"Command sent: {command}", message_group=group_id)
201
+
202
+ # Wait for command to process
203
+ if wait_for_prompt:
204
+ await asyncio.sleep(min(PROMPT_WAIT_MS / 1000, timeout))
205
+
206
+ result: Dict[str, Any] = {
207
+ "success": True,
208
+ "command": command,
209
+ }
210
+
211
+ # Capture screenshot if requested
212
+ if capture_screenshot:
213
+ screenshot_result = await terminal_screenshot()
214
+ if screenshot_result["success"]:
215
+ result["base64_image"] = screenshot_result["base64_image"]
216
+ result["screenshot_path"] = screenshot_result.get("screenshot_path")
217
+ result["media_type"] = "image/png"
218
+
219
+ emit_success(f"Command executed: {command}", message_group=group_id)
220
+ return result
221
+
222
+ except Exception as e:
223
+ error_msg = f"Failed to run terminal command: {str(e)}"
224
+ emit_error(error_msg, message_group=group_id)
225
+ logger.exception("Error running terminal command")
226
+ return {"success": False, "error": error_msg, "command": command}
227
+
228
+
229
+ async def send_terminal_keys(
230
+ keys: str,
231
+ modifiers: Optional[List[str]] = None,
232
+ repeat: int = 1,
233
+ delay_ms: int = 50,
234
+ ) -> Dict[str, Any]:
235
+ """Send special keys or key combinations to the terminal.
236
+
237
+ Sends keyboard input to the xterm.js terminal, supporting special keys
238
+ and modifier combinations like Ctrl+C, Ctrl+D, Tab, Arrow keys, etc.
239
+
240
+ Args:
241
+ keys: The key(s) to send. Can be a single character or special key
242
+ like "Enter", "Tab", "ArrowUp", "ArrowDown", "ArrowLeft",
243
+ "ArrowRight", "Escape", "Backspace", "Delete", etc.
244
+ modifiers: Optional modifier keys to hold. Supported:
245
+ "Control"/"Ctrl", "Shift", "Alt", "Meta"/"Command"/"Cmd".
246
+ repeat: Number of times to press the key. Defaults to 1.
247
+ Use this to navigate multiple items, e.g., repeat=5 for ArrowDown.
248
+ delay_ms: Delay in milliseconds between repeated keypresses.
249
+ Defaults to 50ms. Increase if the TUI needs time to update.
250
+
251
+ Returns:
252
+ Dict with success, keys_sent, modifiers, repeat_count, and optional error.
253
+
254
+ Examples:
255
+ >>> await send_terminal_keys("c", modifiers=["Control"]) # Ctrl+C
256
+ >>> await send_terminal_keys("Tab") # Tab completion
257
+ >>> await send_terminal_keys("ArrowUp") # Previous command
258
+ >>> await send_terminal_keys("ArrowDown", repeat=5) # Navigate down 5 items
259
+ >>> await send_terminal_keys("ArrowRight", repeat=3, delay_ms=100) # Move right 3 times
260
+ """
261
+ modifiers = modifiers or []
262
+ repeat = max(1, repeat) # Ensure at least 1
263
+ normalized_modifiers = [_normalize_modifier(m) for m in modifiers]
264
+ modifier_str = "+".join(normalized_modifiers) if normalized_modifiers else ""
265
+ key_combo = f"{modifier_str}+{keys}" if modifier_str else keys
266
+
267
+ repeat_str = f" x{repeat}" if repeat > 1 else ""
268
+ group_id = generate_group_id("terminal_send_keys", f"{key_combo}{repeat_str}")
269
+ banner = format_terminal_banner("TERMINAL SEND KEYS ⌨️")
270
+ emit_info(
271
+ Text.from_markup(f"{banner} [bold cyan]{key_combo}{repeat_str}[/bold cyan]"),
272
+ message_group=group_id,
273
+ )
274
+
275
+ try:
276
+ manager = get_session_manager()
277
+ page = await manager.get_current_page()
278
+
279
+ if not page:
280
+ error_msg = "No active terminal page. Open terminal first."
281
+ emit_error(error_msg, message_group=group_id)
282
+ return {
283
+ "success": False,
284
+ "error": error_msg,
285
+ "keys_sent": keys,
286
+ "modifiers": modifiers,
287
+ }
288
+
289
+ # Focus terminal before sending keys
290
+ await _focus_terminal(page)
291
+
292
+ # Send key(s) the specified number of times
293
+ for i in range(repeat):
294
+ # Hold modifiers and press key
295
+ for modifier in normalized_modifiers:
296
+ await page.keyboard.down(modifier)
297
+
298
+ try:
299
+ if len(keys) > 1 or keys[0].isupper():
300
+ await page.keyboard.press(keys)
301
+ else:
302
+ await page.keyboard.type(keys)
303
+ finally:
304
+ for modifier in reversed(normalized_modifiers):
305
+ await page.keyboard.up(modifier)
306
+
307
+ # Delay between repeated keypresses (but not after the last one)
308
+ if repeat > 1 and i < repeat - 1:
309
+ await asyncio.sleep(delay_ms / 1000)
310
+
311
+ emit_success(f"Keys sent: {key_combo}{repeat_str}", message_group=group_id)
312
+ return {
313
+ "success": True,
314
+ "keys_sent": keys,
315
+ "modifiers": modifiers,
316
+ "repeat_count": repeat,
317
+ }
318
+
319
+ except Exception as e:
320
+ error_msg = f"Failed to send terminal keys: {str(e)}"
321
+ emit_error(error_msg, message_group=group_id)
322
+ logger.exception("Error sending terminal keys")
323
+ return {
324
+ "success": False,
325
+ "error": error_msg,
326
+ "keys_sent": keys,
327
+ "modifiers": modifiers,
328
+ "repeat_count": repeat,
329
+ }
330
+
331
+
332
+ async def wait_for_terminal_output(
333
+ pattern: Optional[str] = None,
334
+ timeout: float = DEFAULT_OUTPUT_TIMEOUT,
335
+ capture_screenshot: bool = False,
336
+ ) -> Dict[str, Any]:
337
+ """Wait for terminal output, optionally matching a pattern.
338
+
339
+ Reads the terminal text output and checks for a pattern match.
340
+ Uses DOM scraping to get actual text content.
341
+
342
+ Args:
343
+ pattern: Optional regex or text pattern to match.
344
+ If None, just reads current output.
345
+ timeout: Maximum wait time in seconds. Defaults to 30.0.
346
+ capture_screenshot: If True, include a screenshot. Defaults to False.
347
+
348
+ Returns:
349
+ Dict with:
350
+ - success (bool): True if output was read.
351
+ - matched (bool): True if pattern was found (when pattern given).
352
+ - output (str): The terminal text content.
353
+ - base64_image (str, optional): Screenshot if captured.
354
+ - error (str, optional): Error message if unsuccessful.
355
+ """
356
+ pattern_display = pattern[:50] if pattern else "any"
357
+ group_id = generate_group_id("terminal_wait_output", pattern_display)
358
+ banner = format_terminal_banner("TERMINAL WAIT OUTPUT 👁️")
359
+ emit_info(
360
+ Text.from_markup(f"{banner} [dim]pattern={pattern_display}[/dim]"),
361
+ message_group=group_id,
362
+ )
363
+
364
+ try:
365
+ # Read terminal text output
366
+ read_result = await terminal_read_output(lines=100)
367
+
368
+ if not read_result["success"]:
369
+ emit_error(
370
+ read_result.get("error", "Failed to read output"),
371
+ message_group=group_id,
372
+ )
373
+ return {
374
+ "success": False,
375
+ "error": read_result.get("error"),
376
+ "matched": False,
377
+ }
378
+
379
+ output_text = read_result["output"]
380
+
381
+ result: Dict[str, Any] = {
382
+ "success": True,
383
+ "output": output_text,
384
+ "line_count": read_result.get("line_count", 0),
385
+ }
386
+
387
+ # Check pattern match
388
+ if pattern:
389
+ try:
390
+ # Try regex match first
391
+ matched = bool(re.search(pattern, output_text, re.IGNORECASE))
392
+ except re.error:
393
+ # Fall back to simple substring match
394
+ matched = pattern.lower() in output_text.lower()
395
+
396
+ result["matched"] = matched
397
+ if matched:
398
+ emit_success(f"Pattern matched: {pattern}", message_group=group_id)
399
+ else:
400
+ emit_info(f"Pattern not found: {pattern}", message_group=group_id)
401
+ else:
402
+ result["matched"] = bool(output_text.strip())
403
+
404
+ # Capture screenshot if requested
405
+ if capture_screenshot:
406
+ screenshot_result = await terminal_screenshot()
407
+ if screenshot_result["success"]:
408
+ result["base64_image"] = screenshot_result["base64_image"]
409
+ result["screenshot_path"] = screenshot_result.get("screenshot_path")
410
+ result["media_type"] = "image/png"
411
+
412
+ return result
413
+
414
+ except Exception as e:
415
+ error_msg = f"Failed to wait for terminal output: {str(e)}"
416
+ emit_error(error_msg, message_group=group_id)
417
+ logger.exception("Error waiting for terminal output")
418
+ return {"success": False, "error": error_msg, "matched": False}
419
+
420
+
421
+ # =============================================================================
422
+ # Tool Registration Functions
423
+ # =============================================================================
424
+
425
+
426
+ def register_run_terminal_command(agent):
427
+ """Register the terminal command execution tool."""
428
+
429
+ @agent.tool
430
+ async def terminal_run_command(
431
+ context: RunContext,
432
+ command: str,
433
+ wait_for_prompt: bool = True,
434
+ capture_screenshot: bool = False,
435
+ ) -> Dict[str, Any]:
436
+ """
437
+ Execute a command in the terminal browser.
438
+
439
+ Types the command and presses Enter. Optionally captures a screenshot
440
+ that you can see directly as base64 image data.
441
+
442
+ Args:
443
+ command: The command to execute.
444
+ wait_for_prompt: Wait briefly for command to process (default: True).
445
+ capture_screenshot: Capture screenshot after execution (default: False).
446
+ Set True if you need to see the terminal output visually.
447
+
448
+ Returns:
449
+ Dict with success, command, and optionally base64_image you can see.
450
+ """
451
+ # Session is set by invoke_agent via contextvar
452
+ return await run_terminal_command(
453
+ command=command,
454
+ wait_for_prompt=wait_for_prompt,
455
+ capture_screenshot=capture_screenshot,
456
+ )
457
+
458
+
459
+ def register_send_terminal_keys(agent):
460
+ """Register the terminal key sending tool."""
461
+
462
+ @agent.tool
463
+ async def terminal_send_keys(
464
+ context: RunContext,
465
+ keys: str,
466
+ modifiers: Optional[List[str]] = None,
467
+ repeat: int = 1,
468
+ delay_ms: int = 50,
469
+ ) -> Dict[str, Any]:
470
+ """
471
+ Send special keys or key combinations to the terminal.
472
+
473
+ Args:
474
+ keys: Key to send (e.g., "Enter", "Tab", "ArrowUp", "ArrowDown", "c").
475
+ modifiers: Modifier keys like ["Control"] for Ctrl+C.
476
+ repeat: Number of times to press the key. Use this to navigate
477
+ multiple items instead of calling this function multiple times!
478
+ Example: repeat=5 to press ArrowDown 5 times.
479
+ delay_ms: Milliseconds to wait between repeated keypresses (default 50).
480
+
481
+ Returns:
482
+ Dict with success, keys_sent, modifiers, repeat_count.
483
+
484
+ Examples:
485
+ - Navigate down 5 items: keys="ArrowDown", repeat=5
486
+ - Navigate right 3 times: keys="ArrowRight", repeat=3
487
+ - Ctrl+C: keys="c", modifiers=["Control"]
488
+ - Tab: keys="Tab"
489
+ """
490
+ # Session is set by invoke_agent via contextvar
491
+ return await send_terminal_keys(
492
+ keys=keys, modifiers=modifiers, repeat=repeat, delay_ms=delay_ms
493
+ )
494
+
495
+
496
+ def register_wait_terminal_output(agent):
497
+ """Register the terminal output waiting tool."""
498
+
499
+ @agent.tool
500
+ async def terminal_wait_output(
501
+ context: RunContext,
502
+ pattern: Optional[str] = None,
503
+ capture_screenshot: bool = False,
504
+ ) -> Dict[str, Any]:
505
+ """
506
+ Read terminal output and optionally match a pattern.
507
+
508
+ Extracts text from the terminal. Can check for pattern matches.
509
+
510
+ Args:
511
+ pattern: Optional regex or text to search for.
512
+ capture_screenshot: Include a screenshot you can see (default: False).
513
+
514
+ Returns:
515
+ Dict with output (text), matched (if pattern given), optionally base64_image.
516
+ """
517
+ # Session is set by invoke_agent via contextvar
518
+ return await wait_for_terminal_output(
519
+ pattern=pattern,
520
+ capture_screenshot=capture_screenshot,
521
+ )