code-puppy 0.0.348__py3-none-any.whl → 0.0.372__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy/agents/__init__.py +8 -0
- code_puppy/agents/agent_manager.py +272 -1
- code_puppy/agents/agent_pack_leader.py +383 -0
- code_puppy/agents/agent_qa_kitten.py +12 -7
- code_puppy/agents/agent_terminal_qa.py +323 -0
- code_puppy/agents/base_agent.py +11 -8
- code_puppy/agents/event_stream_handler.py +101 -8
- code_puppy/agents/pack/__init__.py +34 -0
- code_puppy/agents/pack/bloodhound.py +304 -0
- code_puppy/agents/pack/husky.py +321 -0
- code_puppy/agents/pack/retriever.py +393 -0
- code_puppy/agents/pack/shepherd.py +348 -0
- code_puppy/agents/pack/terrier.py +287 -0
- code_puppy/agents/pack/watchdog.py +367 -0
- code_puppy/agents/subagent_stream_handler.py +276 -0
- code_puppy/api/__init__.py +13 -0
- code_puppy/api/app.py +169 -0
- code_puppy/api/main.py +21 -0
- code_puppy/api/pty_manager.py +446 -0
- code_puppy/api/routers/__init__.py +12 -0
- code_puppy/api/routers/agents.py +36 -0
- code_puppy/api/routers/commands.py +217 -0
- code_puppy/api/routers/config.py +74 -0
- code_puppy/api/routers/sessions.py +232 -0
- code_puppy/api/templates/terminal.html +361 -0
- code_puppy/api/websocket.py +154 -0
- code_puppy/callbacks.py +73 -0
- code_puppy/chatgpt_codex_client.py +53 -0
- code_puppy/claude_cache_client.py +294 -41
- code_puppy/command_line/add_model_menu.py +13 -4
- code_puppy/command_line/agent_menu.py +662 -0
- code_puppy/command_line/core_commands.py +89 -112
- code_puppy/command_line/model_picker_completion.py +3 -20
- code_puppy/command_line/model_settings_menu.py +21 -3
- code_puppy/config.py +145 -70
- code_puppy/gemini_model.py +706 -0
- code_puppy/http_utils.py +6 -3
- code_puppy/messaging/__init__.py +15 -0
- code_puppy/messaging/messages.py +27 -0
- code_puppy/messaging/queue_console.py +1 -1
- code_puppy/messaging/rich_renderer.py +36 -1
- code_puppy/messaging/spinner/__init__.py +20 -2
- code_puppy/messaging/subagent_console.py +461 -0
- code_puppy/model_factory.py +50 -16
- code_puppy/model_switching.py +63 -0
- code_puppy/model_utils.py +27 -24
- code_puppy/models.json +12 -12
- code_puppy/plugins/antigravity_oauth/antigravity_model.py +206 -172
- code_puppy/plugins/antigravity_oauth/register_callbacks.py +15 -8
- code_puppy/plugins/antigravity_oauth/transport.py +236 -45
- code_puppy/plugins/chatgpt_oauth/register_callbacks.py +2 -2
- code_puppy/plugins/claude_code_oauth/register_callbacks.py +2 -30
- code_puppy/plugins/claude_code_oauth/utils.py +4 -1
- code_puppy/plugins/frontend_emitter/__init__.py +25 -0
- code_puppy/plugins/frontend_emitter/emitter.py +121 -0
- code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
- code_puppy/prompts/antigravity_system_prompt.md +1 -0
- code_puppy/pydantic_patches.py +52 -0
- code_puppy/status_display.py +6 -2
- code_puppy/tools/__init__.py +37 -1
- code_puppy/tools/agent_tools.py +83 -33
- code_puppy/tools/browser/__init__.py +37 -0
- code_puppy/tools/browser/browser_control.py +6 -6
- code_puppy/tools/browser/browser_interactions.py +21 -20
- code_puppy/tools/browser/browser_locators.py +9 -9
- code_puppy/tools/browser/browser_manager.py +316 -0
- code_puppy/tools/browser/browser_navigation.py +7 -7
- code_puppy/tools/browser/browser_screenshot.py +78 -140
- code_puppy/tools/browser/browser_scripts.py +15 -13
- code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
- code_puppy/tools/browser/terminal_command_tools.py +521 -0
- code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
- code_puppy/tools/browser/terminal_tools.py +525 -0
- code_puppy/tools/command_runner.py +292 -101
- code_puppy/tools/common.py +176 -1
- code_puppy/tools/display.py +84 -0
- code_puppy/tools/subagent_context.py +158 -0
- {code_puppy-0.0.348.data → code_puppy-0.0.372.data}/data/code_puppy/models.json +12 -12
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/METADATA +17 -16
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/RECORD +84 -51
- code_puppy/prompts/codex_system_prompt.md +0 -310
- code_puppy/tools/browser/camoufox_manager.py +0 -235
- code_puppy/tools/browser/vqa_agent.py +0 -90
- {code_puppy-0.0.348.data → code_puppy-0.0.372.data}/data/code_puppy/models_dev_api.json +0 -0
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/WHEEL +0 -0
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/entry_points.txt +0 -0
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
"""Terminal command execution tools for browser-based terminal automation.
|
|
2
|
+
|
|
3
|
+
This module provides tools for:
|
|
4
|
+
- Running commands in the terminal browser
|
|
5
|
+
- Sending special keys (Ctrl+C, Tab, arrows, etc.)
|
|
6
|
+
- Waiting for terminal output patterns
|
|
7
|
+
|
|
8
|
+
These tools use the ChromiumTerminalManager to manage the browser instance
|
|
9
|
+
and interact with the xterm.js terminal in the Code Puppy API.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from pydantic_ai import RunContext
|
|
18
|
+
|
|
19
|
+
from rich.text import Text
|
|
20
|
+
|
|
21
|
+
from code_puppy.messaging import emit_error, emit_info, emit_success
|
|
22
|
+
from code_puppy.tools.browser import format_terminal_banner
|
|
23
|
+
from code_puppy.tools.common import generate_group_id
|
|
24
|
+
|
|
25
|
+
from .terminal_screenshot_tools import terminal_read_output, terminal_screenshot
|
|
26
|
+
from .terminal_tools import get_session_manager
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# Timeout defaults (seconds)
|
|
31
|
+
DEFAULT_COMMAND_TIMEOUT = 30.0
|
|
32
|
+
DEFAULT_OUTPUT_TIMEOUT = 30.0
|
|
33
|
+
|
|
34
|
+
# Time to wait for prompt to reappear after command (ms)
|
|
35
|
+
PROMPT_WAIT_MS = 500
|
|
36
|
+
|
|
37
|
+
# Modifier key mapping for Playwright
|
|
38
|
+
MODIFIER_MAP = {
|
|
39
|
+
"control": "Control",
|
|
40
|
+
"ctrl": "Control",
|
|
41
|
+
"shift": "Shift",
|
|
42
|
+
"alt": "Alt",
|
|
43
|
+
"meta": "Meta",
|
|
44
|
+
"command": "Meta",
|
|
45
|
+
"cmd": "Meta",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# JavaScript to robustly focus the xterm.js terminal
|
|
49
|
+
# xterm.js uses a hidden textarea to capture keyboard input
|
|
50
|
+
FOCUS_TERMINAL_JS = """
|
|
51
|
+
() => {
|
|
52
|
+
// Method 1: Find and focus the xterm helper textarea directly
|
|
53
|
+
// This is the element that actually receives keyboard input in xterm.js
|
|
54
|
+
const textareas = document.querySelectorAll('textarea.xterm-helper-textarea');
|
|
55
|
+
for (const textarea of textareas) {
|
|
56
|
+
textarea.focus();
|
|
57
|
+
// Also click on the parent to ensure xterm knows it's active
|
|
58
|
+
const xterm = textarea.closest('.xterm');
|
|
59
|
+
if (xterm) {
|
|
60
|
+
xterm.click();
|
|
61
|
+
}
|
|
62
|
+
return { success: true, method: 'textarea_focus', found: textareas.length };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Method 2: Click on the xterm viewport/screen to trigger focus
|
|
66
|
+
const viewport = document.querySelector('.xterm-viewport') ||
|
|
67
|
+
document.querySelector('.xterm-screen');
|
|
68
|
+
if (viewport) {
|
|
69
|
+
viewport.click();
|
|
70
|
+
// Try textarea again after click
|
|
71
|
+
const ta = document.querySelector('textarea.xterm-helper-textarea');
|
|
72
|
+
if (ta) ta.focus();
|
|
73
|
+
return { success: true, method: 'viewport_click' };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Method 3: Find any xterm element and click it
|
|
77
|
+
const xterm = document.querySelector('.xterm');
|
|
78
|
+
if (xterm) {
|
|
79
|
+
xterm.click();
|
|
80
|
+
const ta = xterm.querySelector('textarea');
|
|
81
|
+
if (ta) ta.focus();
|
|
82
|
+
return { success: true, method: 'xterm_click' };
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Method 4: Try the terminal container
|
|
86
|
+
const container = document.getElementById('terminal');
|
|
87
|
+
if (container) {
|
|
88
|
+
container.click();
|
|
89
|
+
return { success: true, method: 'container_click' };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return { success: false, error: 'Could not find terminal element' };
|
|
93
|
+
}
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
async def _focus_terminal(page) -> Dict[str, Any]:
|
|
98
|
+
"""Focus the xterm.js terminal to receive keyboard input.
|
|
99
|
+
|
|
100
|
+
xterm.js uses a hidden textarea element to capture keyboard events.
|
|
101
|
+
We need to ensure this textarea is focused for keys to work.
|
|
102
|
+
"""
|
|
103
|
+
try:
|
|
104
|
+
# First, try the JavaScript approach which is most reliable
|
|
105
|
+
result = await page.evaluate(FOCUS_TERMINAL_JS)
|
|
106
|
+
|
|
107
|
+
if result.get("success"):
|
|
108
|
+
# Give the browser a moment to process the focus
|
|
109
|
+
await asyncio.sleep(0.15)
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
# Fallback: Try clicking on known selectors
|
|
113
|
+
selectors_to_try = [
|
|
114
|
+
"textarea.xterm-helper-textarea",
|
|
115
|
+
".xterm-viewport",
|
|
116
|
+
".xterm-screen",
|
|
117
|
+
".xterm",
|
|
118
|
+
"#terminal",
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
for selector in selectors_to_try:
|
|
122
|
+
element = await page.query_selector(selector)
|
|
123
|
+
if element:
|
|
124
|
+
await element.click()
|
|
125
|
+
await asyncio.sleep(0.1)
|
|
126
|
+
# If we clicked something other than textarea, try to focus textarea
|
|
127
|
+
if "textarea" not in selector:
|
|
128
|
+
textarea = await page.query_selector(
|
|
129
|
+
"textarea.xterm-helper-textarea"
|
|
130
|
+
)
|
|
131
|
+
if textarea:
|
|
132
|
+
await textarea.focus()
|
|
133
|
+
return {"success": True, "method": f"fallback_{selector}"}
|
|
134
|
+
|
|
135
|
+
return {"success": False, "error": "Could not find terminal element to focus"}
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.warning(f"Error focusing terminal: {e}")
|
|
139
|
+
return {"success": False, "error": str(e)}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _normalize_modifier(modifier: str) -> str:
|
|
143
|
+
"""Normalize modifier name to Playwright format."""
|
|
144
|
+
return MODIFIER_MAP.get(modifier.lower(), modifier)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
async def run_terminal_command(
|
|
148
|
+
command: str,
|
|
149
|
+
wait_for_prompt: bool = True,
|
|
150
|
+
timeout: float = DEFAULT_COMMAND_TIMEOUT,
|
|
151
|
+
capture_screenshot: bool = False,
|
|
152
|
+
) -> Dict[str, Any]:
|
|
153
|
+
"""Execute a command in the terminal browser.
|
|
154
|
+
|
|
155
|
+
Types the command into the xterm.js terminal and presses Enter to execute.
|
|
156
|
+
Optionally captures a screenshot that multimodal models can see directly.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
command: The command string to execute.
|
|
160
|
+
wait_for_prompt: If True, wait briefly for command to process.
|
|
161
|
+
Defaults to True.
|
|
162
|
+
timeout: Maximum wait time in seconds. Defaults to 30.0.
|
|
163
|
+
capture_screenshot: If True, take a screenshot after execution.
|
|
164
|
+
The screenshot is returned as base64 data. Defaults to False.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
A dictionary containing:
|
|
168
|
+
- success (bool): True if command was sent.
|
|
169
|
+
- command (str): The command that was executed.
|
|
170
|
+
- base64_image (str, optional): Screenshot as base64 PNG (if captured).
|
|
171
|
+
- screenshot_path (str, optional): Path to saved screenshot.
|
|
172
|
+
- error (str, optional): Error message if unsuccessful.
|
|
173
|
+
"""
|
|
174
|
+
group_id = generate_group_id("terminal_run_command", command[:50])
|
|
175
|
+
banner = format_terminal_banner("TERMINAL RUN COMMAND 💻")
|
|
176
|
+
emit_info(
|
|
177
|
+
Text.from_markup(f"{banner} [dim]{command}[/dim]"), message_group=group_id
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
manager = get_session_manager()
|
|
182
|
+
page = await manager.get_current_page()
|
|
183
|
+
|
|
184
|
+
if not page:
|
|
185
|
+
error_msg = "No active terminal page. Open terminal first."
|
|
186
|
+
emit_error(error_msg, message_group=group_id)
|
|
187
|
+
return {"success": False, "error": error_msg, "command": command}
|
|
188
|
+
|
|
189
|
+
# Focus the terminal before typing
|
|
190
|
+
focus_result = await _focus_terminal(page)
|
|
191
|
+
if not focus_result.get("success"):
|
|
192
|
+
emit_info(
|
|
193
|
+
f"Warning: Could not focus terminal: {focus_result.get('error')}",
|
|
194
|
+
message_group=group_id,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Type and execute command
|
|
198
|
+
await page.keyboard.type(command)
|
|
199
|
+
await page.keyboard.press("Enter")
|
|
200
|
+
emit_info(f"Command sent: {command}", message_group=group_id)
|
|
201
|
+
|
|
202
|
+
# Wait for command to process
|
|
203
|
+
if wait_for_prompt:
|
|
204
|
+
await asyncio.sleep(min(PROMPT_WAIT_MS / 1000, timeout))
|
|
205
|
+
|
|
206
|
+
result: Dict[str, Any] = {
|
|
207
|
+
"success": True,
|
|
208
|
+
"command": command,
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# Capture screenshot if requested
|
|
212
|
+
if capture_screenshot:
|
|
213
|
+
screenshot_result = await terminal_screenshot()
|
|
214
|
+
if screenshot_result["success"]:
|
|
215
|
+
result["base64_image"] = screenshot_result["base64_image"]
|
|
216
|
+
result["screenshot_path"] = screenshot_result.get("screenshot_path")
|
|
217
|
+
result["media_type"] = "image/png"
|
|
218
|
+
|
|
219
|
+
emit_success(f"Command executed: {command}", message_group=group_id)
|
|
220
|
+
return result
|
|
221
|
+
|
|
222
|
+
except Exception as e:
|
|
223
|
+
error_msg = f"Failed to run terminal command: {str(e)}"
|
|
224
|
+
emit_error(error_msg, message_group=group_id)
|
|
225
|
+
logger.exception("Error running terminal command")
|
|
226
|
+
return {"success": False, "error": error_msg, "command": command}
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
async def send_terminal_keys(
|
|
230
|
+
keys: str,
|
|
231
|
+
modifiers: Optional[List[str]] = None,
|
|
232
|
+
repeat: int = 1,
|
|
233
|
+
delay_ms: int = 50,
|
|
234
|
+
) -> Dict[str, Any]:
|
|
235
|
+
"""Send special keys or key combinations to the terminal.
|
|
236
|
+
|
|
237
|
+
Sends keyboard input to the xterm.js terminal, supporting special keys
|
|
238
|
+
and modifier combinations like Ctrl+C, Ctrl+D, Tab, Arrow keys, etc.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
keys: The key(s) to send. Can be a single character or special key
|
|
242
|
+
like "Enter", "Tab", "ArrowUp", "ArrowDown", "ArrowLeft",
|
|
243
|
+
"ArrowRight", "Escape", "Backspace", "Delete", etc.
|
|
244
|
+
modifiers: Optional modifier keys to hold. Supported:
|
|
245
|
+
"Control"/"Ctrl", "Shift", "Alt", "Meta"/"Command"/"Cmd".
|
|
246
|
+
repeat: Number of times to press the key. Defaults to 1.
|
|
247
|
+
Use this to navigate multiple items, e.g., repeat=5 for ArrowDown.
|
|
248
|
+
delay_ms: Delay in milliseconds between repeated keypresses.
|
|
249
|
+
Defaults to 50ms. Increase if the TUI needs time to update.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
Dict with success, keys_sent, modifiers, repeat_count, and optional error.
|
|
253
|
+
|
|
254
|
+
Examples:
|
|
255
|
+
>>> await send_terminal_keys("c", modifiers=["Control"]) # Ctrl+C
|
|
256
|
+
>>> await send_terminal_keys("Tab") # Tab completion
|
|
257
|
+
>>> await send_terminal_keys("ArrowUp") # Previous command
|
|
258
|
+
>>> await send_terminal_keys("ArrowDown", repeat=5) # Navigate down 5 items
|
|
259
|
+
>>> await send_terminal_keys("ArrowRight", repeat=3, delay_ms=100) # Move right 3 times
|
|
260
|
+
"""
|
|
261
|
+
modifiers = modifiers or []
|
|
262
|
+
repeat = max(1, repeat) # Ensure at least 1
|
|
263
|
+
normalized_modifiers = [_normalize_modifier(m) for m in modifiers]
|
|
264
|
+
modifier_str = "+".join(normalized_modifiers) if normalized_modifiers else ""
|
|
265
|
+
key_combo = f"{modifier_str}+{keys}" if modifier_str else keys
|
|
266
|
+
|
|
267
|
+
repeat_str = f" x{repeat}" if repeat > 1 else ""
|
|
268
|
+
group_id = generate_group_id("terminal_send_keys", f"{key_combo}{repeat_str}")
|
|
269
|
+
banner = format_terminal_banner("TERMINAL SEND KEYS ⌨️")
|
|
270
|
+
emit_info(
|
|
271
|
+
Text.from_markup(f"{banner} [bold cyan]{key_combo}{repeat_str}[/bold cyan]"),
|
|
272
|
+
message_group=group_id,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
manager = get_session_manager()
|
|
277
|
+
page = await manager.get_current_page()
|
|
278
|
+
|
|
279
|
+
if not page:
|
|
280
|
+
error_msg = "No active terminal page. Open terminal first."
|
|
281
|
+
emit_error(error_msg, message_group=group_id)
|
|
282
|
+
return {
|
|
283
|
+
"success": False,
|
|
284
|
+
"error": error_msg,
|
|
285
|
+
"keys_sent": keys,
|
|
286
|
+
"modifiers": modifiers,
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
# Focus terminal before sending keys
|
|
290
|
+
await _focus_terminal(page)
|
|
291
|
+
|
|
292
|
+
# Send key(s) the specified number of times
|
|
293
|
+
for i in range(repeat):
|
|
294
|
+
# Hold modifiers and press key
|
|
295
|
+
for modifier in normalized_modifiers:
|
|
296
|
+
await page.keyboard.down(modifier)
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
if len(keys) > 1 or keys[0].isupper():
|
|
300
|
+
await page.keyboard.press(keys)
|
|
301
|
+
else:
|
|
302
|
+
await page.keyboard.type(keys)
|
|
303
|
+
finally:
|
|
304
|
+
for modifier in reversed(normalized_modifiers):
|
|
305
|
+
await page.keyboard.up(modifier)
|
|
306
|
+
|
|
307
|
+
# Delay between repeated keypresses (but not after the last one)
|
|
308
|
+
if repeat > 1 and i < repeat - 1:
|
|
309
|
+
await asyncio.sleep(delay_ms / 1000)
|
|
310
|
+
|
|
311
|
+
emit_success(f"Keys sent: {key_combo}{repeat_str}", message_group=group_id)
|
|
312
|
+
return {
|
|
313
|
+
"success": True,
|
|
314
|
+
"keys_sent": keys,
|
|
315
|
+
"modifiers": modifiers,
|
|
316
|
+
"repeat_count": repeat,
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
except Exception as e:
|
|
320
|
+
error_msg = f"Failed to send terminal keys: {str(e)}"
|
|
321
|
+
emit_error(error_msg, message_group=group_id)
|
|
322
|
+
logger.exception("Error sending terminal keys")
|
|
323
|
+
return {
|
|
324
|
+
"success": False,
|
|
325
|
+
"error": error_msg,
|
|
326
|
+
"keys_sent": keys,
|
|
327
|
+
"modifiers": modifiers,
|
|
328
|
+
"repeat_count": repeat,
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
async def wait_for_terminal_output(
|
|
333
|
+
pattern: Optional[str] = None,
|
|
334
|
+
timeout: float = DEFAULT_OUTPUT_TIMEOUT,
|
|
335
|
+
capture_screenshot: bool = False,
|
|
336
|
+
) -> Dict[str, Any]:
|
|
337
|
+
"""Wait for terminal output, optionally matching a pattern.
|
|
338
|
+
|
|
339
|
+
Reads the terminal text output and checks for a pattern match.
|
|
340
|
+
Uses DOM scraping to get actual text content.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
pattern: Optional regex or text pattern to match.
|
|
344
|
+
If None, just reads current output.
|
|
345
|
+
timeout: Maximum wait time in seconds. Defaults to 30.0.
|
|
346
|
+
capture_screenshot: If True, include a screenshot. Defaults to False.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Dict with:
|
|
350
|
+
- success (bool): True if output was read.
|
|
351
|
+
- matched (bool): True if pattern was found (when pattern given).
|
|
352
|
+
- output (str): The terminal text content.
|
|
353
|
+
- base64_image (str, optional): Screenshot if captured.
|
|
354
|
+
- error (str, optional): Error message if unsuccessful.
|
|
355
|
+
"""
|
|
356
|
+
pattern_display = pattern[:50] if pattern else "any"
|
|
357
|
+
group_id = generate_group_id("terminal_wait_output", pattern_display)
|
|
358
|
+
banner = format_terminal_banner("TERMINAL WAIT OUTPUT 👁️")
|
|
359
|
+
emit_info(
|
|
360
|
+
Text.from_markup(f"{banner} [dim]pattern={pattern_display}[/dim]"),
|
|
361
|
+
message_group=group_id,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
# Read terminal text output
|
|
366
|
+
read_result = await terminal_read_output(lines=100)
|
|
367
|
+
|
|
368
|
+
if not read_result["success"]:
|
|
369
|
+
emit_error(
|
|
370
|
+
read_result.get("error", "Failed to read output"),
|
|
371
|
+
message_group=group_id,
|
|
372
|
+
)
|
|
373
|
+
return {
|
|
374
|
+
"success": False,
|
|
375
|
+
"error": read_result.get("error"),
|
|
376
|
+
"matched": False,
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
output_text = read_result["output"]
|
|
380
|
+
|
|
381
|
+
result: Dict[str, Any] = {
|
|
382
|
+
"success": True,
|
|
383
|
+
"output": output_text,
|
|
384
|
+
"line_count": read_result.get("line_count", 0),
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
# Check pattern match
|
|
388
|
+
if pattern:
|
|
389
|
+
try:
|
|
390
|
+
# Try regex match first
|
|
391
|
+
matched = bool(re.search(pattern, output_text, re.IGNORECASE))
|
|
392
|
+
except re.error:
|
|
393
|
+
# Fall back to simple substring match
|
|
394
|
+
matched = pattern.lower() in output_text.lower()
|
|
395
|
+
|
|
396
|
+
result["matched"] = matched
|
|
397
|
+
if matched:
|
|
398
|
+
emit_success(f"Pattern matched: {pattern}", message_group=group_id)
|
|
399
|
+
else:
|
|
400
|
+
emit_info(f"Pattern not found: {pattern}", message_group=group_id)
|
|
401
|
+
else:
|
|
402
|
+
result["matched"] = bool(output_text.strip())
|
|
403
|
+
|
|
404
|
+
# Capture screenshot if requested
|
|
405
|
+
if capture_screenshot:
|
|
406
|
+
screenshot_result = await terminal_screenshot()
|
|
407
|
+
if screenshot_result["success"]:
|
|
408
|
+
result["base64_image"] = screenshot_result["base64_image"]
|
|
409
|
+
result["screenshot_path"] = screenshot_result.get("screenshot_path")
|
|
410
|
+
result["media_type"] = "image/png"
|
|
411
|
+
|
|
412
|
+
return result
|
|
413
|
+
|
|
414
|
+
except Exception as e:
|
|
415
|
+
error_msg = f"Failed to wait for terminal output: {str(e)}"
|
|
416
|
+
emit_error(error_msg, message_group=group_id)
|
|
417
|
+
logger.exception("Error waiting for terminal output")
|
|
418
|
+
return {"success": False, "error": error_msg, "matched": False}
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
# =============================================================================
|
|
422
|
+
# Tool Registration Functions
|
|
423
|
+
# =============================================================================
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def register_run_terminal_command(agent):
|
|
427
|
+
"""Register the terminal command execution tool."""
|
|
428
|
+
|
|
429
|
+
@agent.tool
|
|
430
|
+
async def terminal_run_command(
|
|
431
|
+
context: RunContext,
|
|
432
|
+
command: str,
|
|
433
|
+
wait_for_prompt: bool = True,
|
|
434
|
+
capture_screenshot: bool = False,
|
|
435
|
+
) -> Dict[str, Any]:
|
|
436
|
+
"""
|
|
437
|
+
Execute a command in the terminal browser.
|
|
438
|
+
|
|
439
|
+
Types the command and presses Enter. Optionally captures a screenshot
|
|
440
|
+
that you can see directly as base64 image data.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
command: The command to execute.
|
|
444
|
+
wait_for_prompt: Wait briefly for command to process (default: True).
|
|
445
|
+
capture_screenshot: Capture screenshot after execution (default: False).
|
|
446
|
+
Set True if you need to see the terminal output visually.
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
Dict with success, command, and optionally base64_image you can see.
|
|
450
|
+
"""
|
|
451
|
+
# Session is set by invoke_agent via contextvar
|
|
452
|
+
return await run_terminal_command(
|
|
453
|
+
command=command,
|
|
454
|
+
wait_for_prompt=wait_for_prompt,
|
|
455
|
+
capture_screenshot=capture_screenshot,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def register_send_terminal_keys(agent):
|
|
460
|
+
"""Register the terminal key sending tool."""
|
|
461
|
+
|
|
462
|
+
@agent.tool
|
|
463
|
+
async def terminal_send_keys(
|
|
464
|
+
context: RunContext,
|
|
465
|
+
keys: str,
|
|
466
|
+
modifiers: Optional[List[str]] = None,
|
|
467
|
+
repeat: int = 1,
|
|
468
|
+
delay_ms: int = 50,
|
|
469
|
+
) -> Dict[str, Any]:
|
|
470
|
+
"""
|
|
471
|
+
Send special keys or key combinations to the terminal.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
keys: Key to send (e.g., "Enter", "Tab", "ArrowUp", "ArrowDown", "c").
|
|
475
|
+
modifiers: Modifier keys like ["Control"] for Ctrl+C.
|
|
476
|
+
repeat: Number of times to press the key. Use this to navigate
|
|
477
|
+
multiple items instead of calling this function multiple times!
|
|
478
|
+
Example: repeat=5 to press ArrowDown 5 times.
|
|
479
|
+
delay_ms: Milliseconds to wait between repeated keypresses (default 50).
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
Dict with success, keys_sent, modifiers, repeat_count.
|
|
483
|
+
|
|
484
|
+
Examples:
|
|
485
|
+
- Navigate down 5 items: keys="ArrowDown", repeat=5
|
|
486
|
+
- Navigate right 3 times: keys="ArrowRight", repeat=3
|
|
487
|
+
- Ctrl+C: keys="c", modifiers=["Control"]
|
|
488
|
+
- Tab: keys="Tab"
|
|
489
|
+
"""
|
|
490
|
+
# Session is set by invoke_agent via contextvar
|
|
491
|
+
return await send_terminal_keys(
|
|
492
|
+
keys=keys, modifiers=modifiers, repeat=repeat, delay_ms=delay_ms
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def register_wait_terminal_output(agent):
|
|
497
|
+
"""Register the terminal output waiting tool."""
|
|
498
|
+
|
|
499
|
+
@agent.tool
|
|
500
|
+
async def terminal_wait_output(
|
|
501
|
+
context: RunContext,
|
|
502
|
+
pattern: Optional[str] = None,
|
|
503
|
+
capture_screenshot: bool = False,
|
|
504
|
+
) -> Dict[str, Any]:
|
|
505
|
+
"""
|
|
506
|
+
Read terminal output and optionally match a pattern.
|
|
507
|
+
|
|
508
|
+
Extracts text from the terminal. Can check for pattern matches.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
pattern: Optional regex or text to search for.
|
|
512
|
+
capture_screenshot: Include a screenshot you can see (default: False).
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
Dict with output (text), matched (if pattern given), optionally base64_image.
|
|
516
|
+
"""
|
|
517
|
+
# Session is set by invoke_agent via contextvar
|
|
518
|
+
return await wait_for_terminal_output(
|
|
519
|
+
pattern=pattern,
|
|
520
|
+
capture_screenshot=capture_screenshot,
|
|
521
|
+
)
|