code-puppy 0.0.348__py3-none-any.whl → 0.0.372__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_puppy/agents/__init__.py +8 -0
- code_puppy/agents/agent_manager.py +272 -1
- code_puppy/agents/agent_pack_leader.py +383 -0
- code_puppy/agents/agent_qa_kitten.py +12 -7
- code_puppy/agents/agent_terminal_qa.py +323 -0
- code_puppy/agents/base_agent.py +11 -8
- code_puppy/agents/event_stream_handler.py +101 -8
- code_puppy/agents/pack/__init__.py +34 -0
- code_puppy/agents/pack/bloodhound.py +304 -0
- code_puppy/agents/pack/husky.py +321 -0
- code_puppy/agents/pack/retriever.py +393 -0
- code_puppy/agents/pack/shepherd.py +348 -0
- code_puppy/agents/pack/terrier.py +287 -0
- code_puppy/agents/pack/watchdog.py +367 -0
- code_puppy/agents/subagent_stream_handler.py +276 -0
- code_puppy/api/__init__.py +13 -0
- code_puppy/api/app.py +169 -0
- code_puppy/api/main.py +21 -0
- code_puppy/api/pty_manager.py +446 -0
- code_puppy/api/routers/__init__.py +12 -0
- code_puppy/api/routers/agents.py +36 -0
- code_puppy/api/routers/commands.py +217 -0
- code_puppy/api/routers/config.py +74 -0
- code_puppy/api/routers/sessions.py +232 -0
- code_puppy/api/templates/terminal.html +361 -0
- code_puppy/api/websocket.py +154 -0
- code_puppy/callbacks.py +73 -0
- code_puppy/chatgpt_codex_client.py +53 -0
- code_puppy/claude_cache_client.py +294 -41
- code_puppy/command_line/add_model_menu.py +13 -4
- code_puppy/command_line/agent_menu.py +662 -0
- code_puppy/command_line/core_commands.py +89 -112
- code_puppy/command_line/model_picker_completion.py +3 -20
- code_puppy/command_line/model_settings_menu.py +21 -3
- code_puppy/config.py +145 -70
- code_puppy/gemini_model.py +706 -0
- code_puppy/http_utils.py +6 -3
- code_puppy/messaging/__init__.py +15 -0
- code_puppy/messaging/messages.py +27 -0
- code_puppy/messaging/queue_console.py +1 -1
- code_puppy/messaging/rich_renderer.py +36 -1
- code_puppy/messaging/spinner/__init__.py +20 -2
- code_puppy/messaging/subagent_console.py +461 -0
- code_puppy/model_factory.py +50 -16
- code_puppy/model_switching.py +63 -0
- code_puppy/model_utils.py +27 -24
- code_puppy/models.json +12 -12
- code_puppy/plugins/antigravity_oauth/antigravity_model.py +206 -172
- code_puppy/plugins/antigravity_oauth/register_callbacks.py +15 -8
- code_puppy/plugins/antigravity_oauth/transport.py +236 -45
- code_puppy/plugins/chatgpt_oauth/register_callbacks.py +2 -2
- code_puppy/plugins/claude_code_oauth/register_callbacks.py +2 -30
- code_puppy/plugins/claude_code_oauth/utils.py +4 -1
- code_puppy/plugins/frontend_emitter/__init__.py +25 -0
- code_puppy/plugins/frontend_emitter/emitter.py +121 -0
- code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
- code_puppy/prompts/antigravity_system_prompt.md +1 -0
- code_puppy/pydantic_patches.py +52 -0
- code_puppy/status_display.py +6 -2
- code_puppy/tools/__init__.py +37 -1
- code_puppy/tools/agent_tools.py +83 -33
- code_puppy/tools/browser/__init__.py +37 -0
- code_puppy/tools/browser/browser_control.py +6 -6
- code_puppy/tools/browser/browser_interactions.py +21 -20
- code_puppy/tools/browser/browser_locators.py +9 -9
- code_puppy/tools/browser/browser_manager.py +316 -0
- code_puppy/tools/browser/browser_navigation.py +7 -7
- code_puppy/tools/browser/browser_screenshot.py +78 -140
- code_puppy/tools/browser/browser_scripts.py +15 -13
- code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
- code_puppy/tools/browser/terminal_command_tools.py +521 -0
- code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
- code_puppy/tools/browser/terminal_tools.py +525 -0
- code_puppy/tools/command_runner.py +292 -101
- code_puppy/tools/common.py +176 -1
- code_puppy/tools/display.py +84 -0
- code_puppy/tools/subagent_context.py +158 -0
- {code_puppy-0.0.348.data → code_puppy-0.0.372.data}/data/code_puppy/models.json +12 -12
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/METADATA +17 -16
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/RECORD +84 -51
- code_puppy/prompts/codex_system_prompt.md +0 -310
- code_puppy/tools/browser/camoufox_manager.py +0 -235
- code_puppy/tools/browser/vqa_agent.py +0 -90
- {code_puppy-0.0.348.data → code_puppy-0.0.372.data}/data/code_puppy/models_dev_api.json +0 -0
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/WHEEL +0 -0
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/entry_points.txt +0 -0
- {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,19 +1,21 @@
|
|
|
1
|
-
"""Screenshot
|
|
1
|
+
"""Screenshot tool for browser automation.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Captures screenshots and returns them via ToolReturn with BinaryContent
|
|
4
|
+
so multimodal models can directly see and analyze - no separate VQA agent needed.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import time
|
|
4
8
|
from datetime import datetime
|
|
5
9
|
from pathlib import Path
|
|
6
10
|
from tempfile import gettempdir, mkdtemp
|
|
7
|
-
from typing import Any, Dict, Optional
|
|
11
|
+
from typing import Any, Dict, Optional, Union
|
|
8
12
|
|
|
9
|
-
from
|
|
10
|
-
from pydantic_ai import RunContext
|
|
13
|
+
from pydantic_ai import BinaryContent, RunContext, ToolReturn
|
|
11
14
|
|
|
12
15
|
from code_puppy.messaging import emit_error, emit_info, emit_success
|
|
13
16
|
from code_puppy.tools.common import generate_group_id
|
|
14
17
|
|
|
15
|
-
from .
|
|
16
|
-
from .vqa_agent import run_vqa_analysis
|
|
18
|
+
from .browser_manager import get_session_browser_manager
|
|
17
19
|
|
|
18
20
|
_TEMP_SCREENSHOT_ROOT = Path(
|
|
19
21
|
mkdtemp(prefix="code_puppy_screenshots_", dir=gettempdir())
|
|
@@ -21,21 +23,11 @@ _TEMP_SCREENSHOT_ROOT = Path(
|
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
def _build_screenshot_path(timestamp: str) -> Path:
|
|
24
|
-
"""Return the target path for a screenshot
|
|
26
|
+
"""Return the target path for a screenshot."""
|
|
25
27
|
filename = f"screenshot_{timestamp}.png"
|
|
26
28
|
return _TEMP_SCREENSHOT_ROOT / filename
|
|
27
29
|
|
|
28
30
|
|
|
29
|
-
class ScreenshotResult(BaseModel):
|
|
30
|
-
"""Result from screenshot operation."""
|
|
31
|
-
|
|
32
|
-
success: bool
|
|
33
|
-
screenshot_path: Optional[str] = None
|
|
34
|
-
screenshot_data: Optional[bytes] = None
|
|
35
|
-
timestamp: Optional[str] = None
|
|
36
|
-
error: Optional[str] = None
|
|
37
|
-
|
|
38
|
-
|
|
39
31
|
async def _capture_screenshot(
|
|
40
32
|
page,
|
|
41
33
|
full_page: bool = False,
|
|
@@ -45,41 +37,37 @@ async def _capture_screenshot(
|
|
|
45
37
|
) -> Dict[str, Any]:
|
|
46
38
|
"""Internal screenshot capture function."""
|
|
47
39
|
try:
|
|
48
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%
|
|
40
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
49
41
|
|
|
50
42
|
# Take screenshot
|
|
51
43
|
if element_selector:
|
|
52
|
-
# Screenshot specific element
|
|
53
44
|
element = await page.locator(element_selector).first
|
|
54
45
|
if not await element.is_visible():
|
|
55
46
|
return {
|
|
56
47
|
"success": False,
|
|
57
48
|
"error": f"Element '{element_selector}' is not visible",
|
|
58
49
|
}
|
|
59
|
-
|
|
50
|
+
screenshot_bytes = await element.screenshot()
|
|
60
51
|
else:
|
|
61
|
-
|
|
62
|
-
screenshot_data = await page.screenshot(full_page=full_page)
|
|
52
|
+
screenshot_bytes = await page.screenshot(full_page=full_page)
|
|
63
53
|
|
|
64
|
-
result = {
|
|
54
|
+
result: Dict[str, Any] = {
|
|
65
55
|
"success": True,
|
|
66
|
-
"
|
|
56
|
+
"screenshot_bytes": screenshot_bytes,
|
|
67
57
|
"timestamp": timestamp,
|
|
68
58
|
}
|
|
69
59
|
|
|
70
60
|
if save_screenshot:
|
|
71
61
|
screenshot_path = _build_screenshot_path(timestamp)
|
|
72
62
|
screenshot_path.parent.mkdir(parents=True, exist_ok=True)
|
|
73
|
-
|
|
74
63
|
with open(screenshot_path, "wb") as f:
|
|
75
|
-
f.write(
|
|
76
|
-
|
|
64
|
+
f.write(screenshot_bytes)
|
|
77
65
|
result["screenshot_path"] = str(screenshot_path)
|
|
78
|
-
|
|
66
|
+
|
|
79
67
|
if group_id:
|
|
80
|
-
emit_success(
|
|
81
|
-
|
|
82
|
-
|
|
68
|
+
emit_success(
|
|
69
|
+
f"Screenshot saved: {screenshot_path}", message_group=group_id
|
|
70
|
+
)
|
|
83
71
|
|
|
84
72
|
return result
|
|
85
73
|
|
|
@@ -87,46 +75,42 @@ async def _capture_screenshot(
|
|
|
87
75
|
return {"success": False, "error": str(e)}
|
|
88
76
|
|
|
89
77
|
|
|
90
|
-
async def
|
|
91
|
-
question: str,
|
|
78
|
+
async def take_screenshot(
|
|
92
79
|
full_page: bool = False,
|
|
93
80
|
element_selector: Optional[str] = None,
|
|
94
81
|
save_screenshot: bool = True,
|
|
95
|
-
) -> Dict[str, Any]:
|
|
96
|
-
"""
|
|
97
|
-
|
|
82
|
+
) -> Union[ToolReturn, Dict[str, Any]]:
|
|
83
|
+
"""Take a screenshot of the browser page.
|
|
84
|
+
|
|
85
|
+
Returns a ToolReturn with BinaryContent so multimodal models can
|
|
86
|
+
directly see and analyze the screenshot.
|
|
98
87
|
|
|
99
88
|
Args:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
save_screenshot: Whether to save the screenshot to disk
|
|
89
|
+
full_page: Whether to capture full page or just viewport.
|
|
90
|
+
element_selector: Optional selector to screenshot specific element.
|
|
91
|
+
save_screenshot: Whether to save the screenshot to disk.
|
|
104
92
|
|
|
105
93
|
Returns:
|
|
106
|
-
|
|
94
|
+
ToolReturn containing:
|
|
95
|
+
- return_value: Success message with screenshot path
|
|
96
|
+
- content: List with description and BinaryContent image
|
|
97
|
+
- metadata: Screenshot details (path, target, timestamp)
|
|
98
|
+
Or Dict with error info if failed.
|
|
107
99
|
"""
|
|
108
100
|
target = element_selector or ("full_page" if full_page else "viewport")
|
|
109
|
-
group_id = generate_group_id(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
emit_info(
|
|
113
|
-
f"BROWSER SCREENSHOT ANALYZE 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
|
|
114
|
-
message_group=group_id,
|
|
115
|
-
)
|
|
101
|
+
group_id = generate_group_id("browser_screenshot", target)
|
|
102
|
+
emit_info(f"BROWSER SCREENSHOT 📷 target={target}", message_group=group_id)
|
|
103
|
+
|
|
116
104
|
try:
|
|
117
|
-
|
|
118
|
-
browser_manager = get_camoufox_manager()
|
|
105
|
+
browser_manager = get_session_browser_manager()
|
|
119
106
|
page = await browser_manager.get_current_page()
|
|
120
107
|
|
|
121
108
|
if not page:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
"question": question,
|
|
126
|
-
}
|
|
109
|
+
error_msg = "No active browser page. Navigate to a webpage first."
|
|
110
|
+
emit_error(error_msg, message_group=group_id)
|
|
111
|
+
return {"success": False, "error": error_msg}
|
|
127
112
|
|
|
128
|
-
|
|
129
|
-
screenshot_result = await _capture_screenshot(
|
|
113
|
+
result = await _capture_screenshot(
|
|
130
114
|
page,
|
|
131
115
|
full_page=full_page,
|
|
132
116
|
element_selector=element_selector,
|
|
@@ -134,108 +118,62 @@ async def take_screenshot_and_analyze(
|
|
|
134
118
|
group_id=group_id,
|
|
135
119
|
)
|
|
136
120
|
|
|
137
|
-
if not
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
"
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
"
|
|
157
|
-
"
|
|
158
|
-
"question": question,
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
try:
|
|
162
|
-
vqa_result = await asyncio.to_thread(
|
|
163
|
-
run_vqa_analysis,
|
|
164
|
-
question,
|
|
165
|
-
screenshot_bytes,
|
|
166
|
-
)
|
|
167
|
-
except Exception as exc:
|
|
168
|
-
emit_error(
|
|
169
|
-
f"Visual question answering failed: {exc}",
|
|
170
|
-
message_group=group_id,
|
|
171
|
-
)
|
|
172
|
-
return {
|
|
173
|
-
"success": False,
|
|
174
|
-
"error": f"Visual analysis failed: {exc}",
|
|
175
|
-
"question": question,
|
|
176
|
-
"screenshot_info": {
|
|
177
|
-
"path": screenshot_result.get("screenshot_path"),
|
|
178
|
-
"timestamp": screenshot_result.get("timestamp"),
|
|
179
|
-
"full_page": full_page,
|
|
180
|
-
"element_selector": element_selector,
|
|
181
|
-
},
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
emit_success(
|
|
185
|
-
f"Visual analysis answer: {vqa_result.answer}",
|
|
186
|
-
message_group=group_id,
|
|
187
|
-
)
|
|
188
|
-
emit_info(
|
|
189
|
-
f"Observations: {vqa_result.observations}",
|
|
190
|
-
message_group=group_id,
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
return {
|
|
194
|
-
"success": True,
|
|
195
|
-
"question": question,
|
|
196
|
-
"answer": vqa_result.answer,
|
|
197
|
-
"confidence": vqa_result.confidence,
|
|
198
|
-
"observations": vqa_result.observations,
|
|
199
|
-
"screenshot_info": {
|
|
200
|
-
"path": screenshot_result.get("screenshot_path"),
|
|
201
|
-
"size": len(screenshot_bytes),
|
|
202
|
-
"timestamp": screenshot_result.get("timestamp"),
|
|
121
|
+
if not result["success"]:
|
|
122
|
+
emit_error(result.get("error", "Screenshot failed"), message_group=group_id)
|
|
123
|
+
return {"success": False, "error": result.get("error")}
|
|
124
|
+
|
|
125
|
+
screenshot_path = result.get("screenshot_path", "(not saved)")
|
|
126
|
+
|
|
127
|
+
# Return as ToolReturn with BinaryContent so the model can SEE the image!
|
|
128
|
+
return ToolReturn(
|
|
129
|
+
return_value=f"Screenshot captured successfully. Saved to: {screenshot_path}",
|
|
130
|
+
content=[
|
|
131
|
+
f"Here's the browser screenshot ({target}):",
|
|
132
|
+
BinaryContent(
|
|
133
|
+
data=result["screenshot_bytes"],
|
|
134
|
+
media_type="image/png",
|
|
135
|
+
),
|
|
136
|
+
"Please analyze what you see and describe any relevant details.",
|
|
137
|
+
],
|
|
138
|
+
metadata={
|
|
139
|
+
"success": True,
|
|
140
|
+
"screenshot_path": screenshot_path,
|
|
141
|
+
"target": target,
|
|
203
142
|
"full_page": full_page,
|
|
204
143
|
"element_selector": element_selector,
|
|
144
|
+
"timestamp": time.time(),
|
|
205
145
|
},
|
|
206
|
-
|
|
146
|
+
)
|
|
207
147
|
|
|
208
148
|
except Exception as e:
|
|
209
|
-
|
|
210
|
-
|
|
149
|
+
error_msg = f"Screenshot failed: {str(e)}"
|
|
150
|
+
emit_error(error_msg, message_group=group_id)
|
|
151
|
+
return {"success": False, "error": error_msg}
|
|
211
152
|
|
|
212
153
|
|
|
213
154
|
def register_take_screenshot_and_analyze(agent):
|
|
214
|
-
"""Register the screenshot
|
|
155
|
+
"""Register the screenshot tool."""
|
|
215
156
|
|
|
216
157
|
@agent.tool
|
|
217
158
|
async def browser_screenshot_analyze(
|
|
218
159
|
context: RunContext,
|
|
219
|
-
question: str,
|
|
220
160
|
full_page: bool = False,
|
|
221
161
|
element_selector: Optional[str] = None,
|
|
222
|
-
|
|
223
|
-
) -> Dict[str, Any]:
|
|
162
|
+
) -> Union[ToolReturn, Dict[str, Any]]:
|
|
224
163
|
"""
|
|
225
|
-
Take a screenshot
|
|
164
|
+
Take a screenshot of the browser page.
|
|
165
|
+
|
|
166
|
+
Returns the screenshot via ToolReturn with BinaryContent that you can
|
|
167
|
+
see directly. Use this to see what's displayed in the browser.
|
|
226
168
|
|
|
227
169
|
Args:
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
element_selector: Optional CSS/XPath selector to screenshot specific element
|
|
231
|
-
save_screenshot: Whether to save the screenshot to disk
|
|
170
|
+
full_page: Capture full page (True) or just viewport (False).
|
|
171
|
+
element_selector: Optional CSS selector to screenshot specific element.
|
|
232
172
|
|
|
233
173
|
Returns:
|
|
234
|
-
|
|
174
|
+
ToolReturn with the screenshot image you can analyze, or error dict.
|
|
235
175
|
"""
|
|
236
|
-
return await
|
|
237
|
-
question=question,
|
|
176
|
+
return await take_screenshot(
|
|
238
177
|
full_page=full_page,
|
|
239
178
|
element_selector=element_selector,
|
|
240
|
-
save_screenshot=save_screenshot,
|
|
241
179
|
)
|
|
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
|
|
|
7
7
|
from code_puppy.messaging import emit_error, emit_info, emit_success
|
|
8
8
|
from code_puppy.tools.common import generate_group_id
|
|
9
9
|
|
|
10
|
-
from .
|
|
10
|
+
from .browser_manager import get_session_browser_manager
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
async def execute_javascript(
|
|
@@ -21,14 +21,16 @@ async def execute_javascript(
|
|
|
21
21
|
message_group=group_id,
|
|
22
22
|
)
|
|
23
23
|
try:
|
|
24
|
-
browser_manager =
|
|
24
|
+
browser_manager = get_session_browser_manager()
|
|
25
25
|
page = await browser_manager.get_current_page()
|
|
26
26
|
|
|
27
27
|
if not page:
|
|
28
28
|
return {"success": False, "error": "No active browser page available"}
|
|
29
29
|
|
|
30
30
|
# Execute JavaScript
|
|
31
|
-
|
|
31
|
+
# Note: page.evaluate() does NOT accept a timeout parameter
|
|
32
|
+
# The timeout arg to this function is kept for API compatibility but unused
|
|
33
|
+
result = await page.evaluate(script)
|
|
32
34
|
|
|
33
35
|
emit_success("JavaScript executed successfully", message_group=group_id)
|
|
34
36
|
|
|
@@ -52,7 +54,7 @@ async def scroll_page(
|
|
|
52
54
|
message_group=group_id,
|
|
53
55
|
)
|
|
54
56
|
try:
|
|
55
|
-
browser_manager =
|
|
57
|
+
browser_manager = get_session_browser_manager()
|
|
56
58
|
page = await browser_manager.get_current_page()
|
|
57
59
|
|
|
58
60
|
if not page:
|
|
@@ -60,7 +62,7 @@ async def scroll_page(
|
|
|
60
62
|
|
|
61
63
|
if element_selector:
|
|
62
64
|
# Scroll specific element
|
|
63
|
-
element = page.locator(element_selector)
|
|
65
|
+
element = page.locator(element_selector).first
|
|
64
66
|
await element.scroll_into_view_if_needed()
|
|
65
67
|
|
|
66
68
|
# Get element's current scroll position and dimensions
|
|
@@ -146,13 +148,13 @@ async def scroll_to_element(
|
|
|
146
148
|
message_group=group_id,
|
|
147
149
|
)
|
|
148
150
|
try:
|
|
149
|
-
browser_manager =
|
|
151
|
+
browser_manager = get_session_browser_manager()
|
|
150
152
|
page = await browser_manager.get_current_page()
|
|
151
153
|
|
|
152
154
|
if not page:
|
|
153
155
|
return {"success": False, "error": "No active browser page available"}
|
|
154
156
|
|
|
155
|
-
element = page.locator(selector)
|
|
157
|
+
element = page.locator(selector).first
|
|
156
158
|
await element.wait_for(state="attached", timeout=timeout)
|
|
157
159
|
await element.scroll_into_view_if_needed()
|
|
158
160
|
|
|
@@ -178,7 +180,7 @@ async def set_viewport_size(
|
|
|
178
180
|
message_group=group_id,
|
|
179
181
|
)
|
|
180
182
|
try:
|
|
181
|
-
browser_manager =
|
|
183
|
+
browser_manager = get_session_browser_manager()
|
|
182
184
|
page = await browser_manager.get_current_page()
|
|
183
185
|
|
|
184
186
|
if not page:
|
|
@@ -209,13 +211,13 @@ async def wait_for_element(
|
|
|
209
211
|
message_group=group_id,
|
|
210
212
|
)
|
|
211
213
|
try:
|
|
212
|
-
browser_manager =
|
|
214
|
+
browser_manager = get_session_browser_manager()
|
|
213
215
|
page = await browser_manager.get_current_page()
|
|
214
216
|
|
|
215
217
|
if not page:
|
|
216
218
|
return {"success": False, "error": "No active browser page available"}
|
|
217
219
|
|
|
218
|
-
element = page.locator(selector)
|
|
220
|
+
element = page.locator(selector).first
|
|
219
221
|
await element.wait_for(state=state, timeout=timeout)
|
|
220
222
|
|
|
221
223
|
emit_success(f"Element {selector} is now {state}", message_group=group_id)
|
|
@@ -240,13 +242,13 @@ async def highlight_element(
|
|
|
240
242
|
message_group=group_id,
|
|
241
243
|
)
|
|
242
244
|
try:
|
|
243
|
-
browser_manager =
|
|
245
|
+
browser_manager = get_session_browser_manager()
|
|
244
246
|
page = await browser_manager.get_current_page()
|
|
245
247
|
|
|
246
248
|
if not page:
|
|
247
249
|
return {"success": False, "error": "No active browser page available"}
|
|
248
250
|
|
|
249
|
-
element = page.locator(selector)
|
|
251
|
+
element = page.locator(selector).first
|
|
250
252
|
await element.wait_for(state="visible", timeout=timeout)
|
|
251
253
|
|
|
252
254
|
# Add highlight style
|
|
@@ -277,7 +279,7 @@ async def clear_highlights() -> Dict[str, Any]:
|
|
|
277
279
|
message_group=group_id,
|
|
278
280
|
)
|
|
279
281
|
try:
|
|
280
|
-
browser_manager =
|
|
282
|
+
browser_manager = get_session_browser_manager()
|
|
281
283
|
page = await browser_manager.get_current_page()
|
|
282
284
|
|
|
283
285
|
if not page:
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Chromium Terminal Manager - Simple Chromium browser for terminal use.
|
|
2
|
+
|
|
3
|
+
This module provides a browser manager for Chromium terminal automation.
|
|
4
|
+
Each instance gets its own ephemeral browser context, allowing multiple
|
|
5
|
+
terminal QA agents to run simultaneously without profile conflicts.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import uuid
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from playwright.async_api import Browser, BrowserContext, Page, async_playwright
|
|
13
|
+
|
|
14
|
+
from code_puppy.messaging import emit_info, emit_success
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Store active manager instances by session ID
|
|
19
|
+
_active_managers: dict[str, "ChromiumTerminalManager"] = {}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ChromiumTerminalManager:
|
|
23
|
+
"""Browser manager for Chromium terminal automation.
|
|
24
|
+
|
|
25
|
+
Each instance gets its own ephemeral browser context, allowing multiple
|
|
26
|
+
terminal QA agents to run simultaneously without profile conflicts.
|
|
27
|
+
|
|
28
|
+
Key features:
|
|
29
|
+
- Ephemeral contexts (no profile locking issues)
|
|
30
|
+
- Multiple instances can run simultaneously
|
|
31
|
+
- Visible (headless=False) by default for terminal use
|
|
32
|
+
- Simple API: initialize, get_current_page, new_page, close
|
|
33
|
+
|
|
34
|
+
Usage:
|
|
35
|
+
manager = get_chromium_terminal_manager() # or with session_id
|
|
36
|
+
await manager.async_initialize()
|
|
37
|
+
page = await manager.get_current_page()
|
|
38
|
+
await page.goto("https://example.com")
|
|
39
|
+
await manager.close()
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
_browser: Optional[Browser] = None
|
|
43
|
+
_context: Optional[BrowserContext] = None
|
|
44
|
+
_playwright: Optional[object] = None
|
|
45
|
+
_initialized: bool = False
|
|
46
|
+
|
|
47
|
+
def __init__(self, session_id: Optional[str] = None) -> None:
|
|
48
|
+
"""Initialize manager settings.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
session_id: Optional session ID for tracking this instance.
|
|
52
|
+
If None, a UUID will be generated.
|
|
53
|
+
"""
|
|
54
|
+
import os
|
|
55
|
+
|
|
56
|
+
self.session_id = session_id or str(uuid.uuid4())[:8]
|
|
57
|
+
|
|
58
|
+
# Default to headless=False - we want to see the terminal browser!
|
|
59
|
+
# Can override with CHROMIUM_HEADLESS=true if needed
|
|
60
|
+
self.headless = os.getenv("CHROMIUM_HEADLESS", "false").lower() == "true"
|
|
61
|
+
|
|
62
|
+
logger.debug(
|
|
63
|
+
f"ChromiumTerminalManager created: session={self.session_id}, "
|
|
64
|
+
f"headless={self.headless}"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
async def async_initialize(self) -> None:
|
|
68
|
+
"""Initialize the Chromium browser.
|
|
69
|
+
|
|
70
|
+
Launches a Chromium browser with an ephemeral context. The browser
|
|
71
|
+
runs in visible mode by default (headless=False) for terminal use.
|
|
72
|
+
|
|
73
|
+
Raises:
|
|
74
|
+
Exception: If browser initialization fails.
|
|
75
|
+
"""
|
|
76
|
+
if self._initialized:
|
|
77
|
+
logger.debug(
|
|
78
|
+
f"ChromiumTerminalManager {self.session_id} already initialized"
|
|
79
|
+
)
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
emit_info(
|
|
84
|
+
f"Initializing Chromium terminal browser (session: {self.session_id})..."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Start Playwright
|
|
88
|
+
self._playwright = await async_playwright().start()
|
|
89
|
+
|
|
90
|
+
# Launch browser (not persistent - allows multiple instances)
|
|
91
|
+
self._browser = await self._playwright.chromium.launch(
|
|
92
|
+
headless=self.headless,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Create ephemeral context
|
|
96
|
+
self._context = await self._browser.new_context()
|
|
97
|
+
self._initialized = True
|
|
98
|
+
|
|
99
|
+
emit_success(
|
|
100
|
+
f"Chromium terminal browser initialized (session: {self.session_id})"
|
|
101
|
+
)
|
|
102
|
+
logger.info(
|
|
103
|
+
f"Chromium initialized: session={self.session_id}, headless={self.headless}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.error(f"Failed to initialize Chromium: {e}")
|
|
108
|
+
await self._cleanup()
|
|
109
|
+
raise
|
|
110
|
+
|
|
111
|
+
async def get_current_page(self) -> Optional[Page]:
|
|
112
|
+
"""Get the currently active page, creating one if none exist.
|
|
113
|
+
|
|
114
|
+
Lazily initializes the browser if not already initialized.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
The current page, or None if context is unavailable.
|
|
118
|
+
"""
|
|
119
|
+
if not self._initialized or not self._context:
|
|
120
|
+
await self.async_initialize()
|
|
121
|
+
|
|
122
|
+
if not self._context:
|
|
123
|
+
logger.warning("No browser context available")
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
pages = self._context.pages
|
|
127
|
+
if pages:
|
|
128
|
+
return pages[0]
|
|
129
|
+
|
|
130
|
+
# Create a new blank page if none exist
|
|
131
|
+
logger.debug("No existing pages, creating new blank page")
|
|
132
|
+
return await self._context.new_page()
|
|
133
|
+
|
|
134
|
+
async def new_page(self, url: Optional[str] = None) -> Page:
|
|
135
|
+
"""Create a new page, optionally navigating to a URL.
|
|
136
|
+
|
|
137
|
+
Lazily initializes the browser if not already initialized.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
url: Optional URL to navigate to after creating the page.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
The newly created page.
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
RuntimeError: If browser context is not available.
|
|
147
|
+
"""
|
|
148
|
+
if not self._initialized:
|
|
149
|
+
await self.async_initialize()
|
|
150
|
+
|
|
151
|
+
if not self._context:
|
|
152
|
+
raise RuntimeError("Browser context not available")
|
|
153
|
+
|
|
154
|
+
page = await self._context.new_page()
|
|
155
|
+
logger.debug(f"Created new page{f' navigating to {url}' if url else ''}")
|
|
156
|
+
|
|
157
|
+
if url:
|
|
158
|
+
await page.goto(url)
|
|
159
|
+
|
|
160
|
+
return page
|
|
161
|
+
|
|
162
|
+
async def close_page(self, page: Page) -> None:
|
|
163
|
+
"""Close a specific page.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
page: The page to close.
|
|
167
|
+
"""
|
|
168
|
+
await page.close()
|
|
169
|
+
logger.debug("Page closed")
|
|
170
|
+
|
|
171
|
+
async def get_all_pages(self) -> list[Page]:
|
|
172
|
+
"""Get all open pages.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
List of all open pages, or empty list if no context.
|
|
176
|
+
"""
|
|
177
|
+
if not self._context:
|
|
178
|
+
return []
|
|
179
|
+
return self._context.pages
|
|
180
|
+
|
|
181
|
+
async def _cleanup(self, silent: bool = False) -> None:
|
|
182
|
+
"""Clean up browser resources.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
silent: If True, suppress all errors (used during shutdown).
|
|
186
|
+
"""
|
|
187
|
+
try:
|
|
188
|
+
if self._context:
|
|
189
|
+
try:
|
|
190
|
+
await self._context.close()
|
|
191
|
+
except Exception:
|
|
192
|
+
pass
|
|
193
|
+
self._context = None
|
|
194
|
+
|
|
195
|
+
if self._browser:
|
|
196
|
+
try:
|
|
197
|
+
await self._browser.close()
|
|
198
|
+
except Exception:
|
|
199
|
+
pass
|
|
200
|
+
self._browser = None
|
|
201
|
+
|
|
202
|
+
if self._playwright:
|
|
203
|
+
try:
|
|
204
|
+
await self._playwright.stop()
|
|
205
|
+
except Exception:
|
|
206
|
+
pass
|
|
207
|
+
self._playwright = None
|
|
208
|
+
|
|
209
|
+
self._initialized = False
|
|
210
|
+
|
|
211
|
+
# Remove from active managers
|
|
212
|
+
if self.session_id in _active_managers:
|
|
213
|
+
del _active_managers[self.session_id]
|
|
214
|
+
|
|
215
|
+
if not silent:
|
|
216
|
+
logger.debug(
|
|
217
|
+
f"Browser resources cleaned up (session: {self.session_id})"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
except Exception as e:
|
|
221
|
+
if not silent:
|
|
222
|
+
logger.warning(f"Warning during cleanup: {e}")
|
|
223
|
+
|
|
224
|
+
async def close(self) -> None:
|
|
225
|
+
"""Close the browser and clean up all resources.
|
|
226
|
+
|
|
227
|
+
This properly shuts down the browser and releases all resources.
|
|
228
|
+
Should be called when done with the browser.
|
|
229
|
+
"""
|
|
230
|
+
await self._cleanup()
|
|
231
|
+
emit_info(f"Chromium terminal browser closed (session: {self.session_id})")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def get_chromium_terminal_manager(
|
|
235
|
+
session_id: Optional[str] = None,
|
|
236
|
+
) -> ChromiumTerminalManager:
|
|
237
|
+
"""Get or create a ChromiumTerminalManager instance.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
session_id: Optional session ID. If provided and a manager with this
|
|
241
|
+
session exists, returns that manager. Otherwise creates a new one.
|
|
242
|
+
If None, uses 'default' as the session ID.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
A ChromiumTerminalManager instance.
|
|
246
|
+
|
|
247
|
+
Example:
|
|
248
|
+
# Default session (for single-agent use)
|
|
249
|
+
manager = get_chromium_terminal_manager()
|
|
250
|
+
|
|
251
|
+
# Named session (for multi-agent use)
|
|
252
|
+
manager = get_chromium_terminal_manager("agent-1")
|
|
253
|
+
"""
|
|
254
|
+
session_id = session_id or "default"
|
|
255
|
+
|
|
256
|
+
if session_id not in _active_managers:
|
|
257
|
+
_active_managers[session_id] = ChromiumTerminalManager(session_id)
|
|
258
|
+
|
|
259
|
+
return _active_managers[session_id]
|