code-puppy 0.0.356__py3-none-any.whl → 0.0.357__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. code_puppy/agents/agent_qa_kitten.py +10 -5
  2. code_puppy/agents/agent_terminal_qa.py +323 -0
  3. code_puppy/api/app.py +79 -2
  4. code_puppy/api/routers/commands.py +21 -2
  5. code_puppy/api/routers/sessions.py +49 -8
  6. code_puppy/config.py +5 -2
  7. code_puppy/tools/__init__.py +37 -0
  8. code_puppy/tools/agent_tools.py +26 -1
  9. code_puppy/tools/browser/__init__.py +41 -0
  10. code_puppy/tools/browser/browser_control.py +6 -6
  11. code_puppy/tools/browser/browser_interactions.py +21 -20
  12. code_puppy/tools/browser/browser_locators.py +9 -9
  13. code_puppy/tools/browser/browser_navigation.py +7 -7
  14. code_puppy/tools/browser/browser_screenshot.py +60 -135
  15. code_puppy/tools/browser/browser_screenshot_vqa.py +195 -0
  16. code_puppy/tools/browser/browser_scripts.py +15 -13
  17. code_puppy/tools/browser/camoufox_manager.py +226 -64
  18. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  19. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  20. code_puppy/tools/browser/terminal_screenshot_tools.py +520 -0
  21. code_puppy/tools/browser/terminal_tools.py +525 -0
  22. code_puppy/tools/browser/vqa_agent.py +138 -34
  23. code_puppy/tools/command_runner.py +0 -1
  24. {code_puppy-0.0.356.dist-info → code_puppy-0.0.357.dist-info}/METADATA +1 -1
  25. {code_puppy-0.0.356.dist-info → code_puppy-0.0.357.dist-info}/RECORD +30 -24
  26. {code_puppy-0.0.356.data → code_puppy-0.0.357.data}/data/code_puppy/models.json +0 -0
  27. {code_puppy-0.0.356.data → code_puppy-0.0.357.data}/data/code_puppy/models_dev_api.json +0 -0
  28. {code_puppy-0.0.356.dist-info → code_puppy-0.0.357.dist-info}/WHEEL +0 -0
  29. {code_puppy-0.0.356.dist-info → code_puppy-0.0.357.dist-info}/entry_points.txt +0 -0
  30. {code_puppy-0.0.356.dist-info → code_puppy-0.0.357.dist-info}/licenses/LICENSE +0 -0
@@ -41,6 +41,9 @@ from code_puppy.tools.browser.browser_navigation import (
41
41
  from code_puppy.tools.browser.browser_screenshot import (
42
42
  register_take_screenshot_and_analyze,
43
43
  )
44
+ from code_puppy.tools.browser.browser_screenshot_vqa import (
45
+ register_take_screenshot_and_analyze_vqa,
46
+ )
44
47
  from code_puppy.tools.browser.browser_scripts import (
45
48
  register_browser_clear_highlights,
46
49
  register_browser_highlight_element,
@@ -55,6 +58,25 @@ from code_puppy.tools.browser.browser_workflows import (
55
58
  register_read_workflow,
56
59
  register_save_workflow,
57
60
  )
61
+ from code_puppy.tools.browser.terminal_command_tools import (
62
+ register_run_terminal_command,
63
+ register_send_terminal_keys,
64
+ register_wait_terminal_output,
65
+ )
66
+ from code_puppy.tools.browser.terminal_screenshot_tools import (
67
+ register_load_image,
68
+ register_terminal_compare_mockup,
69
+ register_terminal_read_output,
70
+ register_terminal_screenshot,
71
+ )
72
+
73
+ # Terminal automation tools
74
+ from code_puppy.tools.browser.terminal_tools import (
75
+ register_check_terminal_server,
76
+ register_close_terminal,
77
+ register_open_terminal,
78
+ register_start_api_server,
79
+ )
58
80
  from code_puppy.tools.command_runner import (
59
81
  register_agent_run_shell_command,
60
82
  register_agent_share_your_reasoning,
@@ -126,10 +148,25 @@ TOOL_REGISTRY = {
126
148
  "browser_clear_highlights": register_browser_clear_highlights,
127
149
  # Browser Screenshots and VQA
128
150
  "browser_screenshot_analyze": register_take_screenshot_and_analyze,
151
+ "browser_screenshot_vqa": register_take_screenshot_and_analyze_vqa,
129
152
  # Browser Workflows
130
153
  "browser_save_workflow": register_save_workflow,
131
154
  "browser_list_workflows": register_list_workflows,
132
155
  "browser_read_workflow": register_read_workflow,
156
+ # Terminal Connection Tools
157
+ "terminal_check_server": register_check_terminal_server,
158
+ "terminal_open": register_open_terminal,
159
+ "terminal_close": register_close_terminal,
160
+ "start_api_server": register_start_api_server,
161
+ # Terminal Command Execution Tools
162
+ "terminal_run_command": register_run_terminal_command,
163
+ "terminal_send_keys": register_send_terminal_keys,
164
+ "terminal_wait_output": register_wait_terminal_output,
165
+ # Terminal Screenshot Tools
166
+ "terminal_screenshot_analyze": register_terminal_screenshot,
167
+ "terminal_read_output": register_terminal_read_output,
168
+ "terminal_compare_mockup": register_terminal_compare_mockup,
169
+ "load_image_for_analysis": register_load_image,
133
170
  }
134
171
 
135
172
 
@@ -1,6 +1,5 @@
1
1
  # agent_tools.py
2
2
  import asyncio
3
- from functools import partial
4
3
  import hashlib
5
4
  import itertools
6
5
  import json
@@ -8,6 +7,7 @@ import pickle
8
7
  import re
9
8
  import traceback
10
9
  from datetime import datetime
10
+ from functools import partial
11
11
  from pathlib import Path
12
12
  from typing import List, Set
13
13
 
@@ -434,6 +434,23 @@ def register_invoke_agent(agent):
434
434
  previous_session_id = get_session_context()
435
435
  set_session_context(session_id)
436
436
 
437
+ # Set terminal session for browser-based terminal tools
438
+ # This uses contextvars which properly propagate through async tasks
439
+ from code_puppy.tools.browser.terminal_tools import (
440
+ _terminal_session_var,
441
+ set_terminal_session,
442
+ )
443
+
444
+ terminal_session_token = set_terminal_session(f"terminal-{session_id}")
445
+
446
+ # Set browser session for Camoufox browser tools (qa-kitten, etc.)
447
+ # This allows parallel agent invocations to each have their own browser
448
+ from code_puppy.tools.browser.camoufox_manager import (
449
+ set_browser_session,
450
+ )
451
+
452
+ browser_session_token = set_browser_session(f"browser-{session_id}")
453
+
437
454
  try:
438
455
  # Lazy import to break circular dependency with messaging module
439
456
  from code_puppy.model_factory import ModelFactory, make_model_settings
@@ -645,5 +662,13 @@ def register_invoke_agent(agent):
645
662
  finally:
646
663
  # Restore the previous session context
647
664
  set_session_context(previous_session_id)
665
+ # Reset terminal session context
666
+ _terminal_session_var.reset(terminal_session_token)
667
+ # Reset browser session context
668
+ from code_puppy.tools.browser.camoufox_manager import (
669
+ _browser_session_var,
670
+ )
671
+
672
+ _browser_session_var.reset(browser_session_token)
648
673
 
649
674
  return invoke_agent
@@ -0,0 +1,41 @@
1
+ """Browser tools for terminal automation.
2
+
3
+ This module provides browser-based terminal automation tools.
4
+ """
5
+
6
+ from code_puppy.config import get_banner_color
7
+
8
+ from .camoufox_manager import (
9
+ cleanup_all_browsers,
10
+ get_browser_session,
11
+ get_session_browser_manager,
12
+ set_browser_session,
13
+ )
14
+ from .vqa_agent import VisualAnalysisResult, run_vqa_analysis, run_vqa_analysis_stream
15
+
16
+
17
+ def format_terminal_banner(text: str) -> str:
18
+ """Format a terminal tool banner with the configured terminal_tool color.
19
+
20
+ Returns Rich markup string that can be used with Text.from_markup().
21
+
22
+ Args:
23
+ text: The banner text (e.g., "TERMINAL OPEN 🖥️ localhost:8765")
24
+
25
+ Returns:
26
+ Rich markup formatted string
27
+ """
28
+ color = get_banner_color("terminal_tool")
29
+ return f"[bold white on {color}] {text} [/bold white on {color}]"
30
+
31
+
32
+ __all__ = [
33
+ "format_terminal_banner",
34
+ "cleanup_all_browsers",
35
+ "get_browser_session",
36
+ "get_session_browser_manager",
37
+ "set_browser_session",
38
+ "VisualAnalysisResult",
39
+ "run_vqa_analysis",
40
+ "run_vqa_analysis_stream",
41
+ ]
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
7
7
  from code_puppy.messaging import emit_error, emit_info, emit_success, emit_warning
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .camoufox_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def initialize_browser(
@@ -22,7 +22,7 @@ async def initialize_browser(
22
22
  message_group=group_id,
23
23
  )
24
24
  try:
25
- browser_manager = get_camoufox_manager()
25
+ browser_manager = get_session_browser_manager()
26
26
 
27
27
  # Configure browser settings
28
28
  browser_manager.headless = headless
@@ -75,7 +75,7 @@ async def close_browser() -> Dict[str, Any]:
75
75
  message_group=group_id,
76
76
  )
77
77
  try:
78
- browser_manager = get_camoufox_manager()
78
+ browser_manager = get_session_browser_manager()
79
79
  await browser_manager.close()
80
80
 
81
81
  emit_warning("Browser closed successfully", message_group=group_id)
@@ -94,7 +94,7 @@ async def get_browser_status() -> Dict[str, Any]:
94
94
  message_group=group_id,
95
95
  )
96
96
  try:
97
- browser_manager = get_camoufox_manager()
97
+ browser_manager = get_session_browser_manager()
98
98
 
99
99
  if not browser_manager._initialized:
100
100
  return {
@@ -139,7 +139,7 @@ async def create_new_page(url: Optional[str] = None) -> Dict[str, Any]:
139
139
  message_group=group_id,
140
140
  )
141
141
  try:
142
- browser_manager = get_camoufox_manager()
142
+ browser_manager = get_session_browser_manager()
143
143
 
144
144
  if not browser_manager._initialized:
145
145
  return {
@@ -168,7 +168,7 @@ async def list_pages() -> Dict[str, Any]:
168
168
  message_group=group_id,
169
169
  )
170
170
  try:
171
- browser_manager = get_camoufox_manager()
171
+ browser_manager = get_session_browser_manager()
172
172
 
173
173
  if not browser_manager._initialized:
174
174
  return {"success": False, "error": "Browser not initialized"}
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
7
7
  from code_puppy.messaging import emit_error, emit_info, emit_success
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .camoufox_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def click_element(
@@ -24,14 +24,15 @@ async def click_element(
24
24
  message_group=group_id,
25
25
  )
26
26
  try:
27
- browser_manager = get_camoufox_manager()
27
+ browser_manager = get_session_browser_manager()
28
28
  page = await browser_manager.get_current_page()
29
29
 
30
30
  if not page:
31
31
  return {"success": False, "error": "No active browser page available"}
32
32
 
33
- # Find element
34
- element = page.locator(selector)
33
+ # Find element - use .first to handle cases where selector matches multiple elements
34
+ # This avoids Playwright's strict mode violation errors
35
+ element = page.locator(selector).first
35
36
 
36
37
  # Wait for element to be visible and enabled
37
38
  await element.wait_for(state="visible", timeout=timeout)
@@ -69,13 +70,13 @@ async def double_click_element(
69
70
  message_group=group_id,
70
71
  )
71
72
  try:
72
- browser_manager = get_camoufox_manager()
73
+ browser_manager = get_session_browser_manager()
73
74
  page = await browser_manager.get_current_page()
74
75
 
75
76
  if not page:
76
77
  return {"success": False, "error": "No active browser page available"}
77
78
 
78
- element = page.locator(selector)
79
+ element = page.locator(selector).first
79
80
  await element.wait_for(state="visible", timeout=timeout)
80
81
  await element.dblclick(force=force, timeout=timeout)
81
82
 
@@ -99,13 +100,13 @@ async def hover_element(
99
100
  message_group=group_id,
100
101
  )
101
102
  try:
102
- browser_manager = get_camoufox_manager()
103
+ browser_manager = get_session_browser_manager()
103
104
  page = await browser_manager.get_current_page()
104
105
 
105
106
  if not page:
106
107
  return {"success": False, "error": "No active browser page available"}
107
108
 
108
- element = page.locator(selector)
109
+ element = page.locator(selector).first
109
110
  await element.wait_for(state="visible", timeout=timeout)
110
111
  await element.hover(force=force, timeout=timeout)
111
112
 
@@ -130,13 +131,13 @@ async def set_element_text(
130
131
  message_group=group_id,
131
132
  )
132
133
  try:
133
- browser_manager = get_camoufox_manager()
134
+ browser_manager = get_session_browser_manager()
134
135
  page = await browser_manager.get_current_page()
135
136
 
136
137
  if not page:
137
138
  return {"success": False, "error": "No active browser page available"}
138
139
 
139
- element = page.locator(selector)
140
+ element = page.locator(selector).first
140
141
  await element.wait_for(state="visible", timeout=timeout)
141
142
 
142
143
  if clear_first:
@@ -169,13 +170,13 @@ async def get_element_text(
169
170
  message_group=group_id,
170
171
  )
171
172
  try:
172
- browser_manager = get_camoufox_manager()
173
+ browser_manager = get_session_browser_manager()
173
174
  page = await browser_manager.get_current_page()
174
175
 
175
176
  if not page:
176
177
  return {"success": False, "error": "No active browser page available"}
177
178
 
178
- element = page.locator(selector)
179
+ element = page.locator(selector).first
179
180
  await element.wait_for(state="visible", timeout=timeout)
180
181
 
181
182
  text = await element.text_content()
@@ -197,13 +198,13 @@ async def get_element_value(
197
198
  message_group=group_id,
198
199
  )
199
200
  try:
200
- browser_manager = get_camoufox_manager()
201
+ browser_manager = get_session_browser_manager()
201
202
  page = await browser_manager.get_current_page()
202
203
 
203
204
  if not page:
204
205
  return {"success": False, "error": "No active browser page available"}
205
206
 
206
- element = page.locator(selector)
207
+ element = page.locator(selector).first
207
208
  await element.wait_for(state="visible", timeout=timeout)
208
209
 
209
210
  value = await element.input_value()
@@ -231,13 +232,13 @@ async def select_option(
231
232
  message_group=group_id,
232
233
  )
233
234
  try:
234
- browser_manager = get_camoufox_manager()
235
+ browser_manager = get_session_browser_manager()
235
236
  page = await browser_manager.get_current_page()
236
237
 
237
238
  if not page:
238
239
  return {"success": False, "error": "No active browser page available"}
239
240
 
240
- element = page.locator(selector)
241
+ element = page.locator(selector).first
241
242
  await element.wait_for(state="visible", timeout=timeout)
242
243
 
243
244
  if value is not None:
@@ -278,13 +279,13 @@ async def check_element(
278
279
  message_group=group_id,
279
280
  )
280
281
  try:
281
- browser_manager = get_camoufox_manager()
282
+ browser_manager = get_session_browser_manager()
282
283
  page = await browser_manager.get_current_page()
283
284
 
284
285
  if not page:
285
286
  return {"success": False, "error": "No active browser page available"}
286
287
 
287
- element = page.locator(selector)
288
+ element = page.locator(selector).first
288
289
  await element.wait_for(state="visible", timeout=timeout)
289
290
  await element.check(timeout=timeout)
290
291
 
@@ -307,13 +308,13 @@ async def uncheck_element(
307
308
  message_group=group_id,
308
309
  )
309
310
  try:
310
- browser_manager = get_camoufox_manager()
311
+ browser_manager = get_session_browser_manager()
311
312
  page = await browser_manager.get_current_page()
312
313
 
313
314
  if not page:
314
315
  return {"success": False, "error": "No active browser page available"}
315
316
 
316
- element = page.locator(selector)
317
+ element = page.locator(selector).first
317
318
  await element.wait_for(state="visible", timeout=timeout)
318
319
  await element.uncheck(timeout=timeout)
319
320
 
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
7
7
  from code_puppy.messaging import emit_info, emit_success
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .camoufox_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def find_by_role(
@@ -23,7 +23,7 @@ async def find_by_role(
23
23
  message_group=group_id,
24
24
  )
25
25
  try:
26
- browser_manager = get_camoufox_manager()
26
+ browser_manager = get_session_browser_manager()
27
27
  page = await browser_manager.get_current_page()
28
28
 
29
29
  if not page:
@@ -75,7 +75,7 @@ async def find_by_text(
75
75
  message_group=group_id,
76
76
  )
77
77
  try:
78
- browser_manager = get_camoufox_manager()
78
+ browser_manager = get_session_browser_manager()
79
79
  page = await browser_manager.get_current_page()
80
80
 
81
81
  if not page:
@@ -127,7 +127,7 @@ async def find_by_label(
127
127
  message_group=group_id,
128
128
  )
129
129
  try:
130
- browser_manager = get_camoufox_manager()
130
+ browser_manager = get_session_browser_manager()
131
131
  page = await browser_manager.get_current_page()
132
132
 
133
133
  if not page:
@@ -190,7 +190,7 @@ async def find_by_placeholder(
190
190
  message_group=group_id,
191
191
  )
192
192
  try:
193
- browser_manager = get_camoufox_manager()
193
+ browser_manager = get_session_browser_manager()
194
194
  page = await browser_manager.get_current_page()
195
195
 
196
196
  if not page:
@@ -248,7 +248,7 @@ async def find_by_test_id(
248
248
  message_group=group_id,
249
249
  )
250
250
  try:
251
- browser_manager = get_camoufox_manager()
251
+ browser_manager = get_session_browser_manager()
252
252
  page = await browser_manager.get_current_page()
253
253
 
254
254
  if not page:
@@ -304,7 +304,7 @@ async def run_xpath_query(
304
304
  message_group=group_id,
305
305
  )
306
306
  try:
307
- browser_manager = get_camoufox_manager()
307
+ browser_manager = get_session_browser_manager()
308
308
  page = await browser_manager.get_current_page()
309
309
 
310
310
  if not page:
@@ -359,7 +359,7 @@ async def find_buttons(
359
359
  message_group=group_id,
360
360
  )
361
361
  try:
362
- browser_manager = get_camoufox_manager()
362
+ browser_manager = get_session_browser_manager()
363
363
  page = await browser_manager.get_current_page()
364
364
 
365
365
  if not page:
@@ -410,7 +410,7 @@ async def find_links(
410
410
  message_group=group_id,
411
411
  )
412
412
  try:
413
- browser_manager = get_camoufox_manager()
413
+ browser_manager = get_session_browser_manager()
414
414
  page = await browser_manager.get_current_page()
415
415
 
416
416
  if not page:
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
7
7
  from code_puppy.messaging import emit_error, emit_info, emit_success
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .camoufox_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def navigate_to_url(url: str) -> Dict[str, Any]:
@@ -18,7 +18,7 @@ async def navigate_to_url(url: str) -> Dict[str, Any]:
18
18
  message_group=group_id,
19
19
  )
20
20
  try:
21
- browser_manager = get_camoufox_manager()
21
+ browser_manager = get_session_browser_manager()
22
22
  page = await browser_manager.get_current_page()
23
23
 
24
24
  if not page:
@@ -48,7 +48,7 @@ async def get_page_info() -> Dict[str, Any]:
48
48
  message_group=group_id,
49
49
  )
50
50
  try:
51
- browser_manager = get_camoufox_manager()
51
+ browser_manager = get_session_browser_manager()
52
52
  page = await browser_manager.get_current_page()
53
53
 
54
54
  if not page:
@@ -71,7 +71,7 @@ async def go_back() -> Dict[str, Any]:
71
71
  message_group=group_id,
72
72
  )
73
73
  try:
74
- browser_manager = get_camoufox_manager()
74
+ browser_manager = get_session_browser_manager()
75
75
  page = await browser_manager.get_current_page()
76
76
 
77
77
  if not page:
@@ -93,7 +93,7 @@ async def go_forward() -> Dict[str, Any]:
93
93
  message_group=group_id,
94
94
  )
95
95
  try:
96
- browser_manager = get_camoufox_manager()
96
+ browser_manager = get_session_browser_manager()
97
97
  page = await browser_manager.get_current_page()
98
98
 
99
99
  if not page:
@@ -115,7 +115,7 @@ async def reload_page(wait_until: str = "domcontentloaded") -> Dict[str, Any]:
115
115
  message_group=group_id,
116
116
  )
117
117
  try:
118
- browser_manager = get_camoufox_manager()
118
+ browser_manager = get_session_browser_manager()
119
119
  page = await browser_manager.get_current_page()
120
120
 
121
121
  if not page:
@@ -139,7 +139,7 @@ async def wait_for_load_state(
139
139
  message_group=group_id,
140
140
  )
141
141
  try:
142
- browser_manager = get_camoufox_manager()
142
+ browser_manager = get_session_browser_manager()
143
143
  page = await browser_manager.get_current_page()
144
144
 
145
145
  if not page: