code-puppy 0.0.348__py3-none-any.whl → 0.0.372__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. code_puppy/agents/__init__.py +8 -0
  2. code_puppy/agents/agent_manager.py +272 -1
  3. code_puppy/agents/agent_pack_leader.py +383 -0
  4. code_puppy/agents/agent_qa_kitten.py +12 -7
  5. code_puppy/agents/agent_terminal_qa.py +323 -0
  6. code_puppy/agents/base_agent.py +11 -8
  7. code_puppy/agents/event_stream_handler.py +101 -8
  8. code_puppy/agents/pack/__init__.py +34 -0
  9. code_puppy/agents/pack/bloodhound.py +304 -0
  10. code_puppy/agents/pack/husky.py +321 -0
  11. code_puppy/agents/pack/retriever.py +393 -0
  12. code_puppy/agents/pack/shepherd.py +348 -0
  13. code_puppy/agents/pack/terrier.py +287 -0
  14. code_puppy/agents/pack/watchdog.py +367 -0
  15. code_puppy/agents/subagent_stream_handler.py +276 -0
  16. code_puppy/api/__init__.py +13 -0
  17. code_puppy/api/app.py +169 -0
  18. code_puppy/api/main.py +21 -0
  19. code_puppy/api/pty_manager.py +446 -0
  20. code_puppy/api/routers/__init__.py +12 -0
  21. code_puppy/api/routers/agents.py +36 -0
  22. code_puppy/api/routers/commands.py +217 -0
  23. code_puppy/api/routers/config.py +74 -0
  24. code_puppy/api/routers/sessions.py +232 -0
  25. code_puppy/api/templates/terminal.html +361 -0
  26. code_puppy/api/websocket.py +154 -0
  27. code_puppy/callbacks.py +73 -0
  28. code_puppy/chatgpt_codex_client.py +53 -0
  29. code_puppy/claude_cache_client.py +294 -41
  30. code_puppy/command_line/add_model_menu.py +13 -4
  31. code_puppy/command_line/agent_menu.py +662 -0
  32. code_puppy/command_line/core_commands.py +89 -112
  33. code_puppy/command_line/model_picker_completion.py +3 -20
  34. code_puppy/command_line/model_settings_menu.py +21 -3
  35. code_puppy/config.py +145 -70
  36. code_puppy/gemini_model.py +706 -0
  37. code_puppy/http_utils.py +6 -3
  38. code_puppy/messaging/__init__.py +15 -0
  39. code_puppy/messaging/messages.py +27 -0
  40. code_puppy/messaging/queue_console.py +1 -1
  41. code_puppy/messaging/rich_renderer.py +36 -1
  42. code_puppy/messaging/spinner/__init__.py +20 -2
  43. code_puppy/messaging/subagent_console.py +461 -0
  44. code_puppy/model_factory.py +50 -16
  45. code_puppy/model_switching.py +63 -0
  46. code_puppy/model_utils.py +27 -24
  47. code_puppy/models.json +12 -12
  48. code_puppy/plugins/antigravity_oauth/antigravity_model.py +206 -172
  49. code_puppy/plugins/antigravity_oauth/register_callbacks.py +15 -8
  50. code_puppy/plugins/antigravity_oauth/transport.py +236 -45
  51. code_puppy/plugins/chatgpt_oauth/register_callbacks.py +2 -2
  52. code_puppy/plugins/claude_code_oauth/register_callbacks.py +2 -30
  53. code_puppy/plugins/claude_code_oauth/utils.py +4 -1
  54. code_puppy/plugins/frontend_emitter/__init__.py +25 -0
  55. code_puppy/plugins/frontend_emitter/emitter.py +121 -0
  56. code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
  57. code_puppy/prompts/antigravity_system_prompt.md +1 -0
  58. code_puppy/pydantic_patches.py +52 -0
  59. code_puppy/status_display.py +6 -2
  60. code_puppy/tools/__init__.py +37 -1
  61. code_puppy/tools/agent_tools.py +83 -33
  62. code_puppy/tools/browser/__init__.py +37 -0
  63. code_puppy/tools/browser/browser_control.py +6 -6
  64. code_puppy/tools/browser/browser_interactions.py +21 -20
  65. code_puppy/tools/browser/browser_locators.py +9 -9
  66. code_puppy/tools/browser/browser_manager.py +316 -0
  67. code_puppy/tools/browser/browser_navigation.py +7 -7
  68. code_puppy/tools/browser/browser_screenshot.py +78 -140
  69. code_puppy/tools/browser/browser_scripts.py +15 -13
  70. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  71. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  72. code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
  73. code_puppy/tools/browser/terminal_tools.py +525 -0
  74. code_puppy/tools/command_runner.py +292 -101
  75. code_puppy/tools/common.py +176 -1
  76. code_puppy/tools/display.py +84 -0
  77. code_puppy/tools/subagent_context.py +158 -0
  78. {code_puppy-0.0.348.data → code_puppy-0.0.372.data}/data/code_puppy/models.json +12 -12
  79. {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/METADATA +17 -16
  80. {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/RECORD +84 -51
  81. code_puppy/prompts/codex_system_prompt.md +0 -310
  82. code_puppy/tools/browser/camoufox_manager.py +0 -235
  83. code_puppy/tools/browser/vqa_agent.py +0 -90
  84. {code_puppy-0.0.348.data → code_puppy-0.0.372.data}/data/code_puppy/models_dev_api.json +0 -0
  85. {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/WHEEL +0 -0
  86. {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/entry_points.txt +0 -0
  87. {code_puppy-0.0.348.dist-info → code_puppy-0.0.372.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,21 @@
1
- """Screenshot and visual analysis tool with VQA capabilities."""
1
+ """Screenshot tool for browser automation.
2
2
 
3
- import asyncio
3
+ Captures screenshots and returns them via ToolReturn with BinaryContent
4
+ so multimodal models can directly see and analyze - no separate VQA agent needed.
5
+ """
6
+
7
+ import time
4
8
  from datetime import datetime
5
9
  from pathlib import Path
6
10
  from tempfile import gettempdir, mkdtemp
7
- from typing import Any, Dict, Optional
11
+ from typing import Any, Dict, Optional, Union
8
12
 
9
- from pydantic import BaseModel
10
- from pydantic_ai import RunContext
13
+ from pydantic_ai import BinaryContent, RunContext, ToolReturn
11
14
 
12
15
  from code_puppy.messaging import emit_error, emit_info, emit_success
13
16
  from code_puppy.tools.common import generate_group_id
14
17
 
15
- from .camoufox_manager import get_camoufox_manager
16
- from .vqa_agent import run_vqa_analysis
18
+ from .browser_manager import get_session_browser_manager
17
19
 
18
20
  _TEMP_SCREENSHOT_ROOT = Path(
19
21
  mkdtemp(prefix="code_puppy_screenshots_", dir=gettempdir())
@@ -21,21 +23,11 @@ _TEMP_SCREENSHOT_ROOT = Path(
21
23
 
22
24
 
23
25
  def _build_screenshot_path(timestamp: str) -> Path:
24
- """Return the target path for a screenshot using a shared temp directory."""
26
+ """Return the target path for a screenshot."""
25
27
  filename = f"screenshot_{timestamp}.png"
26
28
  return _TEMP_SCREENSHOT_ROOT / filename
27
29
 
28
30
 
29
- class ScreenshotResult(BaseModel):
30
- """Result from screenshot operation."""
31
-
32
- success: bool
33
- screenshot_path: Optional[str] = None
34
- screenshot_data: Optional[bytes] = None
35
- timestamp: Optional[str] = None
36
- error: Optional[str] = None
37
-
38
-
39
31
  async def _capture_screenshot(
40
32
  page,
41
33
  full_page: bool = False,
@@ -45,41 +37,37 @@ async def _capture_screenshot(
45
37
  ) -> Dict[str, Any]:
46
38
  """Internal screenshot capture function."""
47
39
  try:
48
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
40
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
49
41
 
50
42
  # Take screenshot
51
43
  if element_selector:
52
- # Screenshot specific element
53
44
  element = await page.locator(element_selector).first
54
45
  if not await element.is_visible():
55
46
  return {
56
47
  "success": False,
57
48
  "error": f"Element '{element_selector}' is not visible",
58
49
  }
59
- screenshot_data = await element.screenshot()
50
+ screenshot_bytes = await element.screenshot()
60
51
  else:
61
- # Screenshot page or full page
62
- screenshot_data = await page.screenshot(full_page=full_page)
52
+ screenshot_bytes = await page.screenshot(full_page=full_page)
63
53
 
64
- result = {
54
+ result: Dict[str, Any] = {
65
55
  "success": True,
66
- "screenshot_data": screenshot_data,
56
+ "screenshot_bytes": screenshot_bytes,
67
57
  "timestamp": timestamp,
68
58
  }
69
59
 
70
60
  if save_screenshot:
71
61
  screenshot_path = _build_screenshot_path(timestamp)
72
62
  screenshot_path.parent.mkdir(parents=True, exist_ok=True)
73
-
74
63
  with open(screenshot_path, "wb") as f:
75
- f.write(screenshot_data)
76
-
64
+ f.write(screenshot_bytes)
77
65
  result["screenshot_path"] = str(screenshot_path)
78
- message = f"Screenshot saved: {screenshot_path}"
66
+
79
67
  if group_id:
80
- emit_success(message, message_group=group_id)
81
- else:
82
- emit_success(message)
68
+ emit_success(
69
+ f"Screenshot saved: {screenshot_path}", message_group=group_id
70
+ )
83
71
 
84
72
  return result
85
73
 
@@ -87,46 +75,42 @@ async def _capture_screenshot(
87
75
  return {"success": False, "error": str(e)}
88
76
 
89
77
 
90
- async def take_screenshot_and_analyze(
91
- question: str,
78
+ async def take_screenshot(
92
79
  full_page: bool = False,
93
80
  element_selector: Optional[str] = None,
94
81
  save_screenshot: bool = True,
95
- ) -> Dict[str, Any]:
96
- """
97
- Take a screenshot and analyze it using visual understanding.
82
+ ) -> Union[ToolReturn, Dict[str, Any]]:
83
+ """Take a screenshot of the browser page.
84
+
85
+ Returns a ToolReturn with BinaryContent so multimodal models can
86
+ directly see and analyze the screenshot.
98
87
 
99
88
  Args:
100
- question: The specific question to ask about the screenshot
101
- full_page: Whether to capture the full page or just viewport
102
- element_selector: Optional selector to screenshot just a specific element
103
- save_screenshot: Whether to save the screenshot to disk
89
+ full_page: Whether to capture full page or just viewport.
90
+ element_selector: Optional selector to screenshot specific element.
91
+ save_screenshot: Whether to save the screenshot to disk.
104
92
 
105
93
  Returns:
106
- Dict containing analysis results and screenshot info
94
+ ToolReturn containing:
95
+ - return_value: Success message with screenshot path
96
+ - content: List with description and BinaryContent image
97
+ - metadata: Screenshot details (path, target, timestamp)
98
+ Or Dict with error info if failed.
107
99
  """
108
100
  target = element_selector or ("full_page" if full_page else "viewport")
109
- group_id = generate_group_id(
110
- "browser_screenshot_analyze", f"{question[:50]}_{target}"
111
- )
112
- emit_info(
113
- f"BROWSER SCREENSHOT ANALYZE 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
114
- message_group=group_id,
115
- )
101
+ group_id = generate_group_id("browser_screenshot", target)
102
+ emit_info(f"BROWSER SCREENSHOT 📷 target={target}", message_group=group_id)
103
+
116
104
  try:
117
- # Get the current browser page
118
- browser_manager = get_camoufox_manager()
105
+ browser_manager = get_session_browser_manager()
119
106
  page = await browser_manager.get_current_page()
120
107
 
121
108
  if not page:
122
- return {
123
- "success": False,
124
- "error": "No active browser page available. Please navigate to a webpage first.",
125
- "question": question,
126
- }
109
+ error_msg = "No active browser page. Navigate to a webpage first."
110
+ emit_error(error_msg, message_group=group_id)
111
+ return {"success": False, "error": error_msg}
127
112
 
128
- # Take screenshot
129
- screenshot_result = await _capture_screenshot(
113
+ result = await _capture_screenshot(
130
114
  page,
131
115
  full_page=full_page,
132
116
  element_selector=element_selector,
@@ -134,108 +118,62 @@ async def take_screenshot_and_analyze(
134
118
  group_id=group_id,
135
119
  )
136
120
 
137
- if not screenshot_result["success"]:
138
- error_message = screenshot_result.get("error", "Screenshot failed")
139
- emit_error(
140
- f"Screenshot capture failed: {error_message}",
141
- message_group=group_id,
142
- )
143
- return {
144
- "success": False,
145
- "error": error_message,
146
- "question": question,
147
- }
148
-
149
- screenshot_bytes = screenshot_result.get("screenshot_data")
150
- if not screenshot_bytes:
151
- emit_error(
152
- "Screenshot captured but pixel data missing; cannot run visual analysis.",
153
- message_group=group_id,
154
- )
155
- return {
156
- "success": False,
157
- "error": "Screenshot captured but no image bytes available for analysis.",
158
- "question": question,
159
- }
160
-
161
- try:
162
- vqa_result = await asyncio.to_thread(
163
- run_vqa_analysis,
164
- question,
165
- screenshot_bytes,
166
- )
167
- except Exception as exc:
168
- emit_error(
169
- f"Visual question answering failed: {exc}",
170
- message_group=group_id,
171
- )
172
- return {
173
- "success": False,
174
- "error": f"Visual analysis failed: {exc}",
175
- "question": question,
176
- "screenshot_info": {
177
- "path": screenshot_result.get("screenshot_path"),
178
- "timestamp": screenshot_result.get("timestamp"),
179
- "full_page": full_page,
180
- "element_selector": element_selector,
181
- },
182
- }
183
-
184
- emit_success(
185
- f"Visual analysis answer: {vqa_result.answer}",
186
- message_group=group_id,
187
- )
188
- emit_info(
189
- f"Observations: {vqa_result.observations}",
190
- message_group=group_id,
191
- )
192
-
193
- return {
194
- "success": True,
195
- "question": question,
196
- "answer": vqa_result.answer,
197
- "confidence": vqa_result.confidence,
198
- "observations": vqa_result.observations,
199
- "screenshot_info": {
200
- "path": screenshot_result.get("screenshot_path"),
201
- "size": len(screenshot_bytes),
202
- "timestamp": screenshot_result.get("timestamp"),
121
+ if not result["success"]:
122
+ emit_error(result.get("error", "Screenshot failed"), message_group=group_id)
123
+ return {"success": False, "error": result.get("error")}
124
+
125
+ screenshot_path = result.get("screenshot_path", "(not saved)")
126
+
127
+ # Return as ToolReturn with BinaryContent so the model can SEE the image!
128
+ return ToolReturn(
129
+ return_value=f"Screenshot captured successfully. Saved to: {screenshot_path}",
130
+ content=[
131
+ f"Here's the browser screenshot ({target}):",
132
+ BinaryContent(
133
+ data=result["screenshot_bytes"],
134
+ media_type="image/png",
135
+ ),
136
+ "Please analyze what you see and describe any relevant details.",
137
+ ],
138
+ metadata={
139
+ "success": True,
140
+ "screenshot_path": screenshot_path,
141
+ "target": target,
203
142
  "full_page": full_page,
204
143
  "element_selector": element_selector,
144
+ "timestamp": time.time(),
205
145
  },
206
- }
146
+ )
207
147
 
208
148
  except Exception as e:
209
- emit_error(f"Screenshot analysis failed: {str(e)}", message_group=group_id)
210
- return {"success": False, "error": str(e), "question": question}
149
+ error_msg = f"Screenshot failed: {str(e)}"
150
+ emit_error(error_msg, message_group=group_id)
151
+ return {"success": False, "error": error_msg}
211
152
 
212
153
 
213
154
  def register_take_screenshot_and_analyze(agent):
214
- """Register the screenshot analysis tool."""
155
+ """Register the screenshot tool."""
215
156
 
216
157
  @agent.tool
217
158
  async def browser_screenshot_analyze(
218
159
  context: RunContext,
219
- question: str,
220
160
  full_page: bool = False,
221
161
  element_selector: Optional[str] = None,
222
- save_screenshot: bool = True,
223
- ) -> Dict[str, Any]:
162
+ ) -> Union[ToolReturn, Dict[str, Any]]:
224
163
  """
225
- Take a screenshot and analyze it to answer a specific question.
164
+ Take a screenshot of the browser page.
165
+
166
+ Returns the screenshot via ToolReturn with BinaryContent that you can
167
+ see directly. Use this to see what's displayed in the browser.
226
168
 
227
169
  Args:
228
- question: The specific question to ask about the screenshot
229
- full_page: Whether to capture the full page or just viewport
230
- element_selector: Optional CSS/XPath selector to screenshot specific element
231
- save_screenshot: Whether to save the screenshot to disk
170
+ full_page: Capture full page (True) or just viewport (False).
171
+ element_selector: Optional CSS selector to screenshot specific element.
232
172
 
233
173
  Returns:
234
- Dict with analysis results including answer, confidence, and observations
174
+ ToolReturn with the screenshot image you can analyze, or error dict.
235
175
  """
236
- return await take_screenshot_and_analyze(
237
- question=question,
176
+ return await take_screenshot(
238
177
  full_page=full_page,
239
178
  element_selector=element_selector,
240
- save_screenshot=save_screenshot,
241
179
  )
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
7
7
  from code_puppy.messaging import emit_error, emit_info, emit_success
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .browser_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def execute_javascript(
@@ -21,14 +21,16 @@ async def execute_javascript(
21
21
  message_group=group_id,
22
22
  )
23
23
  try:
24
- browser_manager = get_camoufox_manager()
24
+ browser_manager = get_session_browser_manager()
25
25
  page = await browser_manager.get_current_page()
26
26
 
27
27
  if not page:
28
28
  return {"success": False, "error": "No active browser page available"}
29
29
 
30
30
  # Execute JavaScript
31
- result = await page.evaluate(script, timeout=timeout)
31
+ # Note: page.evaluate() does NOT accept a timeout parameter
32
+ # The timeout arg to this function is kept for API compatibility but unused
33
+ result = await page.evaluate(script)
32
34
 
33
35
  emit_success("JavaScript executed successfully", message_group=group_id)
34
36
 
@@ -52,7 +54,7 @@ async def scroll_page(
52
54
  message_group=group_id,
53
55
  )
54
56
  try:
55
- browser_manager = get_camoufox_manager()
57
+ browser_manager = get_session_browser_manager()
56
58
  page = await browser_manager.get_current_page()
57
59
 
58
60
  if not page:
@@ -60,7 +62,7 @@ async def scroll_page(
60
62
 
61
63
  if element_selector:
62
64
  # Scroll specific element
63
- element = page.locator(element_selector)
65
+ element = page.locator(element_selector).first
64
66
  await element.scroll_into_view_if_needed()
65
67
 
66
68
  # Get element's current scroll position and dimensions
@@ -146,13 +148,13 @@ async def scroll_to_element(
146
148
  message_group=group_id,
147
149
  )
148
150
  try:
149
- browser_manager = get_camoufox_manager()
151
+ browser_manager = get_session_browser_manager()
150
152
  page = await browser_manager.get_current_page()
151
153
 
152
154
  if not page:
153
155
  return {"success": False, "error": "No active browser page available"}
154
156
 
155
- element = page.locator(selector)
157
+ element = page.locator(selector).first
156
158
  await element.wait_for(state="attached", timeout=timeout)
157
159
  await element.scroll_into_view_if_needed()
158
160
 
@@ -178,7 +180,7 @@ async def set_viewport_size(
178
180
  message_group=group_id,
179
181
  )
180
182
  try:
181
- browser_manager = get_camoufox_manager()
183
+ browser_manager = get_session_browser_manager()
182
184
  page = await browser_manager.get_current_page()
183
185
 
184
186
  if not page:
@@ -209,13 +211,13 @@ async def wait_for_element(
209
211
  message_group=group_id,
210
212
  )
211
213
  try:
212
- browser_manager = get_camoufox_manager()
214
+ browser_manager = get_session_browser_manager()
213
215
  page = await browser_manager.get_current_page()
214
216
 
215
217
  if not page:
216
218
  return {"success": False, "error": "No active browser page available"}
217
219
 
218
- element = page.locator(selector)
220
+ element = page.locator(selector).first
219
221
  await element.wait_for(state=state, timeout=timeout)
220
222
 
221
223
  emit_success(f"Element {selector} is now {state}", message_group=group_id)
@@ -240,13 +242,13 @@ async def highlight_element(
240
242
  message_group=group_id,
241
243
  )
242
244
  try:
243
- browser_manager = get_camoufox_manager()
245
+ browser_manager = get_session_browser_manager()
244
246
  page = await browser_manager.get_current_page()
245
247
 
246
248
  if not page:
247
249
  return {"success": False, "error": "No active browser page available"}
248
250
 
249
- element = page.locator(selector)
251
+ element = page.locator(selector).first
250
252
  await element.wait_for(state="visible", timeout=timeout)
251
253
 
252
254
  # Add highlight style
@@ -277,7 +279,7 @@ async def clear_highlights() -> Dict[str, Any]:
277
279
  message_group=group_id,
278
280
  )
279
281
  try:
280
- browser_manager = get_camoufox_manager()
282
+ browser_manager = get_session_browser_manager()
281
283
  page = await browser_manager.get_current_page()
282
284
 
283
285
  if not page:
@@ -0,0 +1,259 @@
1
+ """Chromium Terminal Manager - Simple Chromium browser for terminal use.
2
+
3
+ This module provides a browser manager for Chromium terminal automation.
4
+ Each instance gets its own ephemeral browser context, allowing multiple
5
+ terminal QA agents to run simultaneously without profile conflicts.
6
+ """
7
+
8
+ import logging
9
+ import uuid
10
+ from typing import Optional
11
+
12
+ from playwright.async_api import Browser, BrowserContext, Page, async_playwright
13
+
14
+ from code_puppy.messaging import emit_info, emit_success
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Store active manager instances by session ID
19
+ _active_managers: dict[str, "ChromiumTerminalManager"] = {}
20
+
21
+
22
+ class ChromiumTerminalManager:
23
+ """Browser manager for Chromium terminal automation.
24
+
25
+ Each instance gets its own ephemeral browser context, allowing multiple
26
+ terminal QA agents to run simultaneously without profile conflicts.
27
+
28
+ Key features:
29
+ - Ephemeral contexts (no profile locking issues)
30
+ - Multiple instances can run simultaneously
31
+ - Visible (headless=False) by default for terminal use
32
+ - Simple API: initialize, get_current_page, new_page, close
33
+
34
+ Usage:
35
+ manager = get_chromium_terminal_manager() # or with session_id
36
+ await manager.async_initialize()
37
+ page = await manager.get_current_page()
38
+ await page.goto("https://example.com")
39
+ await manager.close()
40
+ """
41
+
42
+ _browser: Optional[Browser] = None
43
+ _context: Optional[BrowserContext] = None
44
+ _playwright: Optional[object] = None
45
+ _initialized: bool = False
46
+
47
+ def __init__(self, session_id: Optional[str] = None) -> None:
48
+ """Initialize manager settings.
49
+
50
+ Args:
51
+ session_id: Optional session ID for tracking this instance.
52
+ If None, a UUID will be generated.
53
+ """
54
+ import os
55
+
56
+ self.session_id = session_id or str(uuid.uuid4())[:8]
57
+
58
+ # Default to headless=False - we want to see the terminal browser!
59
+ # Can override with CHROMIUM_HEADLESS=true if needed
60
+ self.headless = os.getenv("CHROMIUM_HEADLESS", "false").lower() == "true"
61
+
62
+ logger.debug(
63
+ f"ChromiumTerminalManager created: session={self.session_id}, "
64
+ f"headless={self.headless}"
65
+ )
66
+
67
+ async def async_initialize(self) -> None:
68
+ """Initialize the Chromium browser.
69
+
70
+ Launches a Chromium browser with an ephemeral context. The browser
71
+ runs in visible mode by default (headless=False) for terminal use.
72
+
73
+ Raises:
74
+ Exception: If browser initialization fails.
75
+ """
76
+ if self._initialized:
77
+ logger.debug(
78
+ f"ChromiumTerminalManager {self.session_id} already initialized"
79
+ )
80
+ return
81
+
82
+ try:
83
+ emit_info(
84
+ f"Initializing Chromium terminal browser (session: {self.session_id})..."
85
+ )
86
+
87
+ # Start Playwright
88
+ self._playwright = await async_playwright().start()
89
+
90
+ # Launch browser (not persistent - allows multiple instances)
91
+ self._browser = await self._playwright.chromium.launch(
92
+ headless=self.headless,
93
+ )
94
+
95
+ # Create ephemeral context
96
+ self._context = await self._browser.new_context()
97
+ self._initialized = True
98
+
99
+ emit_success(
100
+ f"Chromium terminal browser initialized (session: {self.session_id})"
101
+ )
102
+ logger.info(
103
+ f"Chromium initialized: session={self.session_id}, headless={self.headless}"
104
+ )
105
+
106
+ except Exception as e:
107
+ logger.error(f"Failed to initialize Chromium: {e}")
108
+ await self._cleanup()
109
+ raise
110
+
111
+ async def get_current_page(self) -> Optional[Page]:
112
+ """Get the currently active page, creating one if none exist.
113
+
114
+ Lazily initializes the browser if not already initialized.
115
+
116
+ Returns:
117
+ The current page, or None if context is unavailable.
118
+ """
119
+ if not self._initialized or not self._context:
120
+ await self.async_initialize()
121
+
122
+ if not self._context:
123
+ logger.warning("No browser context available")
124
+ return None
125
+
126
+ pages = self._context.pages
127
+ if pages:
128
+ return pages[0]
129
+
130
+ # Create a new blank page if none exist
131
+ logger.debug("No existing pages, creating new blank page")
132
+ return await self._context.new_page()
133
+
134
+ async def new_page(self, url: Optional[str] = None) -> Page:
135
+ """Create a new page, optionally navigating to a URL.
136
+
137
+ Lazily initializes the browser if not already initialized.
138
+
139
+ Args:
140
+ url: Optional URL to navigate to after creating the page.
141
+
142
+ Returns:
143
+ The newly created page.
144
+
145
+ Raises:
146
+ RuntimeError: If browser context is not available.
147
+ """
148
+ if not self._initialized:
149
+ await self.async_initialize()
150
+
151
+ if not self._context:
152
+ raise RuntimeError("Browser context not available")
153
+
154
+ page = await self._context.new_page()
155
+ logger.debug(f"Created new page{f' navigating to {url}' if url else ''}")
156
+
157
+ if url:
158
+ await page.goto(url)
159
+
160
+ return page
161
+
162
+ async def close_page(self, page: Page) -> None:
163
+ """Close a specific page.
164
+
165
+ Args:
166
+ page: The page to close.
167
+ """
168
+ await page.close()
169
+ logger.debug("Page closed")
170
+
171
+ async def get_all_pages(self) -> list[Page]:
172
+ """Get all open pages.
173
+
174
+ Returns:
175
+ List of all open pages, or empty list if no context.
176
+ """
177
+ if not self._context:
178
+ return []
179
+ return self._context.pages
180
+
181
+ async def _cleanup(self, silent: bool = False) -> None:
182
+ """Clean up browser resources.
183
+
184
+ Args:
185
+ silent: If True, suppress all errors (used during shutdown).
186
+ """
187
+ try:
188
+ if self._context:
189
+ try:
190
+ await self._context.close()
191
+ except Exception:
192
+ pass
193
+ self._context = None
194
+
195
+ if self._browser:
196
+ try:
197
+ await self._browser.close()
198
+ except Exception:
199
+ pass
200
+ self._browser = None
201
+
202
+ if self._playwright:
203
+ try:
204
+ await self._playwright.stop()
205
+ except Exception:
206
+ pass
207
+ self._playwright = None
208
+
209
+ self._initialized = False
210
+
211
+ # Remove from active managers
212
+ if self.session_id in _active_managers:
213
+ del _active_managers[self.session_id]
214
+
215
+ if not silent:
216
+ logger.debug(
217
+ f"Browser resources cleaned up (session: {self.session_id})"
218
+ )
219
+
220
+ except Exception as e:
221
+ if not silent:
222
+ logger.warning(f"Warning during cleanup: {e}")
223
+
224
+ async def close(self) -> None:
225
+ """Close the browser and clean up all resources.
226
+
227
+ This properly shuts down the browser and releases all resources.
228
+ Should be called when done with the browser.
229
+ """
230
+ await self._cleanup()
231
+ emit_info(f"Chromium terminal browser closed (session: {self.session_id})")
232
+
233
+
234
+ def get_chromium_terminal_manager(
235
+ session_id: Optional[str] = None,
236
+ ) -> ChromiumTerminalManager:
237
+ """Get or create a ChromiumTerminalManager instance.
238
+
239
+ Args:
240
+ session_id: Optional session ID. If provided and a manager with this
241
+ session exists, returns that manager. Otherwise creates a new one.
242
+ If None, uses 'default' as the session ID.
243
+
244
+ Returns:
245
+ A ChromiumTerminalManager instance.
246
+
247
+ Example:
248
+ # Default session (for single-agent use)
249
+ manager = get_chromium_terminal_manager()
250
+
251
+ # Named session (for multi-agent use)
252
+ manager = get_chromium_terminal_manager("agent-1")
253
+ """
254
+ session_id = session_id or "default"
255
+
256
+ if session_id not in _active_managers:
257
+ _active_managers[session_id] = ChromiumTerminalManager(session_id)
258
+
259
+ return _active_managers[session_id]