code-puppy 0.0.348__py3-none-any.whl → 0.0.361__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. code_puppy/agents/__init__.py +2 -0
  2. code_puppy/agents/agent_manager.py +49 -0
  3. code_puppy/agents/agent_pack_leader.py +383 -0
  4. code_puppy/agents/agent_qa_kitten.py +12 -7
  5. code_puppy/agents/agent_terminal_qa.py +323 -0
  6. code_puppy/agents/base_agent.py +17 -4
  7. code_puppy/agents/event_stream_handler.py +101 -8
  8. code_puppy/agents/pack/__init__.py +34 -0
  9. code_puppy/agents/pack/bloodhound.py +304 -0
  10. code_puppy/agents/pack/husky.py +321 -0
  11. code_puppy/agents/pack/retriever.py +393 -0
  12. code_puppy/agents/pack/shepherd.py +348 -0
  13. code_puppy/agents/pack/terrier.py +287 -0
  14. code_puppy/agents/pack/watchdog.py +367 -0
  15. code_puppy/agents/subagent_stream_handler.py +276 -0
  16. code_puppy/api/__init__.py +13 -0
  17. code_puppy/api/app.py +169 -0
  18. code_puppy/api/main.py +21 -0
  19. code_puppy/api/pty_manager.py +446 -0
  20. code_puppy/api/routers/__init__.py +12 -0
  21. code_puppy/api/routers/agents.py +36 -0
  22. code_puppy/api/routers/commands.py +217 -0
  23. code_puppy/api/routers/config.py +74 -0
  24. code_puppy/api/routers/sessions.py +232 -0
  25. code_puppy/api/templates/terminal.html +361 -0
  26. code_puppy/api/websocket.py +154 -0
  27. code_puppy/callbacks.py +73 -0
  28. code_puppy/claude_cache_client.py +249 -34
  29. code_puppy/command_line/core_commands.py +85 -0
  30. code_puppy/config.py +66 -62
  31. code_puppy/messaging/__init__.py +15 -0
  32. code_puppy/messaging/messages.py +27 -0
  33. code_puppy/messaging/queue_console.py +1 -1
  34. code_puppy/messaging/rich_renderer.py +36 -1
  35. code_puppy/messaging/spinner/__init__.py +20 -2
  36. code_puppy/messaging/subagent_console.py +461 -0
  37. code_puppy/model_utils.py +54 -0
  38. code_puppy/plugins/antigravity_oauth/antigravity_model.py +90 -19
  39. code_puppy/plugins/antigravity_oauth/transport.py +1 -0
  40. code_puppy/plugins/frontend_emitter/__init__.py +25 -0
  41. code_puppy/plugins/frontend_emitter/emitter.py +121 -0
  42. code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
  43. code_puppy/prompts/antigravity_system_prompt.md +1 -0
  44. code_puppy/status_display.py +6 -2
  45. code_puppy/tools/__init__.py +37 -1
  46. code_puppy/tools/agent_tools.py +83 -33
  47. code_puppy/tools/browser/__init__.py +37 -0
  48. code_puppy/tools/browser/browser_control.py +6 -6
  49. code_puppy/tools/browser/browser_interactions.py +21 -20
  50. code_puppy/tools/browser/browser_locators.py +9 -9
  51. code_puppy/tools/browser/browser_navigation.py +7 -7
  52. code_puppy/tools/browser/browser_screenshot.py +78 -140
  53. code_puppy/tools/browser/browser_scripts.py +15 -13
  54. code_puppy/tools/browser/camoufox_manager.py +226 -64
  55. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  56. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  57. code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
  58. code_puppy/tools/browser/terminal_tools.py +525 -0
  59. code_puppy/tools/command_runner.py +292 -101
  60. code_puppy/tools/common.py +176 -1
  61. code_puppy/tools/display.py +84 -0
  62. code_puppy/tools/subagent_context.py +158 -0
  63. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/METADATA +13 -11
  64. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/RECORD +69 -38
  65. code_puppy/tools/browser/vqa_agent.py +0 -90
  66. {code_puppy-0.0.348.data → code_puppy-0.0.361.data}/data/code_puppy/models.json +0 -0
  67. {code_puppy-0.0.348.data → code_puppy-0.0.361.data}/data/code_puppy/models_dev_api.json +0 -0
  68. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/WHEEL +0 -0
  69. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/entry_points.txt +0 -0
  70. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,21 @@
1
- """Screenshot and visual analysis tool with VQA capabilities."""
1
+ """Screenshot tool for browser automation.
2
2
 
3
- import asyncio
3
+ Captures screenshots and returns them via ToolReturn with BinaryContent
4
+ so multimodal models can directly see and analyze - no separate VQA agent needed.
5
+ """
6
+
7
+ import time
4
8
  from datetime import datetime
5
9
  from pathlib import Path
6
10
  from tempfile import gettempdir, mkdtemp
7
- from typing import Any, Dict, Optional
11
+ from typing import Any, Dict, Optional, Union
8
12
 
9
- from pydantic import BaseModel
10
- from pydantic_ai import RunContext
13
+ from pydantic_ai import BinaryContent, RunContext, ToolReturn
11
14
 
12
15
  from code_puppy.messaging import emit_error, emit_info, emit_success
13
16
  from code_puppy.tools.common import generate_group_id
14
17
 
15
- from .camoufox_manager import get_camoufox_manager
16
- from .vqa_agent import run_vqa_analysis
18
+ from .camoufox_manager import get_session_browser_manager
17
19
 
18
20
  _TEMP_SCREENSHOT_ROOT = Path(
19
21
  mkdtemp(prefix="code_puppy_screenshots_", dir=gettempdir())
@@ -21,21 +23,11 @@ _TEMP_SCREENSHOT_ROOT = Path(
21
23
 
22
24
 
23
25
  def _build_screenshot_path(timestamp: str) -> Path:
24
- """Return the target path for a screenshot using a shared temp directory."""
26
+ """Return the target path for a screenshot."""
25
27
  filename = f"screenshot_{timestamp}.png"
26
28
  return _TEMP_SCREENSHOT_ROOT / filename
27
29
 
28
30
 
29
- class ScreenshotResult(BaseModel):
30
- """Result from screenshot operation."""
31
-
32
- success: bool
33
- screenshot_path: Optional[str] = None
34
- screenshot_data: Optional[bytes] = None
35
- timestamp: Optional[str] = None
36
- error: Optional[str] = None
37
-
38
-
39
31
  async def _capture_screenshot(
40
32
  page,
41
33
  full_page: bool = False,
@@ -45,41 +37,37 @@ async def _capture_screenshot(
45
37
  ) -> Dict[str, Any]:
46
38
  """Internal screenshot capture function."""
47
39
  try:
48
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
40
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
49
41
 
50
42
  # Take screenshot
51
43
  if element_selector:
52
- # Screenshot specific element
53
44
  element = await page.locator(element_selector).first
54
45
  if not await element.is_visible():
55
46
  return {
56
47
  "success": False,
57
48
  "error": f"Element '{element_selector}' is not visible",
58
49
  }
59
- screenshot_data = await element.screenshot()
50
+ screenshot_bytes = await element.screenshot()
60
51
  else:
61
- # Screenshot page or full page
62
- screenshot_data = await page.screenshot(full_page=full_page)
52
+ screenshot_bytes = await page.screenshot(full_page=full_page)
63
53
 
64
- result = {
54
+ result: Dict[str, Any] = {
65
55
  "success": True,
66
- "screenshot_data": screenshot_data,
56
+ "screenshot_bytes": screenshot_bytes,
67
57
  "timestamp": timestamp,
68
58
  }
69
59
 
70
60
  if save_screenshot:
71
61
  screenshot_path = _build_screenshot_path(timestamp)
72
62
  screenshot_path.parent.mkdir(parents=True, exist_ok=True)
73
-
74
63
  with open(screenshot_path, "wb") as f:
75
- f.write(screenshot_data)
76
-
64
+ f.write(screenshot_bytes)
77
65
  result["screenshot_path"] = str(screenshot_path)
78
- message = f"Screenshot saved: {screenshot_path}"
66
+
79
67
  if group_id:
80
- emit_success(message, message_group=group_id)
81
- else:
82
- emit_success(message)
68
+ emit_success(
69
+ f"Screenshot saved: {screenshot_path}", message_group=group_id
70
+ )
83
71
 
84
72
  return result
85
73
 
@@ -87,46 +75,42 @@ async def _capture_screenshot(
87
75
  return {"success": False, "error": str(e)}
88
76
 
89
77
 
90
- async def take_screenshot_and_analyze(
91
- question: str,
78
+ async def take_screenshot(
92
79
  full_page: bool = False,
93
80
  element_selector: Optional[str] = None,
94
81
  save_screenshot: bool = True,
95
- ) -> Dict[str, Any]:
96
- """
97
- Take a screenshot and analyze it using visual understanding.
82
+ ) -> Union[ToolReturn, Dict[str, Any]]:
83
+ """Take a screenshot of the browser page.
84
+
85
+ Returns a ToolReturn with BinaryContent so multimodal models can
86
+ directly see and analyze the screenshot.
98
87
 
99
88
  Args:
100
- question: The specific question to ask about the screenshot
101
- full_page: Whether to capture the full page or just viewport
102
- element_selector: Optional selector to screenshot just a specific element
103
- save_screenshot: Whether to save the screenshot to disk
89
+ full_page: Whether to capture full page or just viewport.
90
+ element_selector: Optional selector to screenshot specific element.
91
+ save_screenshot: Whether to save the screenshot to disk.
104
92
 
105
93
  Returns:
106
- Dict containing analysis results and screenshot info
94
+ ToolReturn containing:
95
+ - return_value: Success message with screenshot path
96
+ - content: List with description and BinaryContent image
97
+ - metadata: Screenshot details (path, target, timestamp)
98
+ Or Dict with error info if failed.
107
99
  """
108
100
  target = element_selector or ("full_page" if full_page else "viewport")
109
- group_id = generate_group_id(
110
- "browser_screenshot_analyze", f"{question[:50]}_{target}"
111
- )
112
- emit_info(
113
- f"BROWSER SCREENSHOT ANALYZE 📷 question='{question[:100]}{'...' if len(question) > 100 else ''}' target={target}",
114
- message_group=group_id,
115
- )
101
+ group_id = generate_group_id("browser_screenshot", target)
102
+ emit_info(f"BROWSER SCREENSHOT 📷 target={target}", message_group=group_id)
103
+
116
104
  try:
117
- # Get the current browser page
118
- browser_manager = get_camoufox_manager()
105
+ browser_manager = get_session_browser_manager()
119
106
  page = await browser_manager.get_current_page()
120
107
 
121
108
  if not page:
122
- return {
123
- "success": False,
124
- "error": "No active browser page available. Please navigate to a webpage first.",
125
- "question": question,
126
- }
109
+ error_msg = "No active browser page. Navigate to a webpage first."
110
+ emit_error(error_msg, message_group=group_id)
111
+ return {"success": False, "error": error_msg}
127
112
 
128
- # Take screenshot
129
- screenshot_result = await _capture_screenshot(
113
+ result = await _capture_screenshot(
130
114
  page,
131
115
  full_page=full_page,
132
116
  element_selector=element_selector,
@@ -134,108 +118,62 @@ async def take_screenshot_and_analyze(
134
118
  group_id=group_id,
135
119
  )
136
120
 
137
- if not screenshot_result["success"]:
138
- error_message = screenshot_result.get("error", "Screenshot failed")
139
- emit_error(
140
- f"Screenshot capture failed: {error_message}",
141
- message_group=group_id,
142
- )
143
- return {
144
- "success": False,
145
- "error": error_message,
146
- "question": question,
147
- }
148
-
149
- screenshot_bytes = screenshot_result.get("screenshot_data")
150
- if not screenshot_bytes:
151
- emit_error(
152
- "Screenshot captured but pixel data missing; cannot run visual analysis.",
153
- message_group=group_id,
154
- )
155
- return {
156
- "success": False,
157
- "error": "Screenshot captured but no image bytes available for analysis.",
158
- "question": question,
159
- }
160
-
161
- try:
162
- vqa_result = await asyncio.to_thread(
163
- run_vqa_analysis,
164
- question,
165
- screenshot_bytes,
166
- )
167
- except Exception as exc:
168
- emit_error(
169
- f"Visual question answering failed: {exc}",
170
- message_group=group_id,
171
- )
172
- return {
173
- "success": False,
174
- "error": f"Visual analysis failed: {exc}",
175
- "question": question,
176
- "screenshot_info": {
177
- "path": screenshot_result.get("screenshot_path"),
178
- "timestamp": screenshot_result.get("timestamp"),
179
- "full_page": full_page,
180
- "element_selector": element_selector,
181
- },
182
- }
183
-
184
- emit_success(
185
- f"Visual analysis answer: {vqa_result.answer}",
186
- message_group=group_id,
187
- )
188
- emit_info(
189
- f"Observations: {vqa_result.observations}",
190
- message_group=group_id,
191
- )
192
-
193
- return {
194
- "success": True,
195
- "question": question,
196
- "answer": vqa_result.answer,
197
- "confidence": vqa_result.confidence,
198
- "observations": vqa_result.observations,
199
- "screenshot_info": {
200
- "path": screenshot_result.get("screenshot_path"),
201
- "size": len(screenshot_bytes),
202
- "timestamp": screenshot_result.get("timestamp"),
121
+ if not result["success"]:
122
+ emit_error(result.get("error", "Screenshot failed"), message_group=group_id)
123
+ return {"success": False, "error": result.get("error")}
124
+
125
+ screenshot_path = result.get("screenshot_path", "(not saved)")
126
+
127
+ # Return as ToolReturn with BinaryContent so the model can SEE the image!
128
+ return ToolReturn(
129
+ return_value=f"Screenshot captured successfully. Saved to: {screenshot_path}",
130
+ content=[
131
+ f"Here's the browser screenshot ({target}):",
132
+ BinaryContent(
133
+ data=result["screenshot_bytes"],
134
+ media_type="image/png",
135
+ ),
136
+ "Please analyze what you see and describe any relevant details.",
137
+ ],
138
+ metadata={
139
+ "success": True,
140
+ "screenshot_path": screenshot_path,
141
+ "target": target,
203
142
  "full_page": full_page,
204
143
  "element_selector": element_selector,
144
+ "timestamp": time.time(),
205
145
  },
206
- }
146
+ )
207
147
 
208
148
  except Exception as e:
209
- emit_error(f"Screenshot analysis failed: {str(e)}", message_group=group_id)
210
- return {"success": False, "error": str(e), "question": question}
149
+ error_msg = f"Screenshot failed: {str(e)}"
150
+ emit_error(error_msg, message_group=group_id)
151
+ return {"success": False, "error": error_msg}
211
152
 
212
153
 
213
154
  def register_take_screenshot_and_analyze(agent):
214
- """Register the screenshot analysis tool."""
155
+ """Register the screenshot tool."""
215
156
 
216
157
  @agent.tool
217
158
  async def browser_screenshot_analyze(
218
159
  context: RunContext,
219
- question: str,
220
160
  full_page: bool = False,
221
161
  element_selector: Optional[str] = None,
222
- save_screenshot: bool = True,
223
- ) -> Dict[str, Any]:
162
+ ) -> Union[ToolReturn, Dict[str, Any]]:
224
163
  """
225
- Take a screenshot and analyze it to answer a specific question.
164
+ Take a screenshot of the browser page.
165
+
166
+ Returns the screenshot via ToolReturn with BinaryContent that you can
167
+ see directly. Use this to see what's displayed in the browser.
226
168
 
227
169
  Args:
228
- question: The specific question to ask about the screenshot
229
- full_page: Whether to capture the full page or just viewport
230
- element_selector: Optional CSS/XPath selector to screenshot specific element
231
- save_screenshot: Whether to save the screenshot to disk
170
+ full_page: Capture full page (True) or just viewport (False).
171
+ element_selector: Optional CSS selector to screenshot specific element.
232
172
 
233
173
  Returns:
234
- Dict with analysis results including answer, confidence, and observations
174
+ ToolReturn with the screenshot image you can analyze, or error dict.
235
175
  """
236
- return await take_screenshot_and_analyze(
237
- question=question,
176
+ return await take_screenshot(
238
177
  full_page=full_page,
239
178
  element_selector=element_selector,
240
- save_screenshot=save_screenshot,
241
179
  )
@@ -7,7 +7,7 @@ from pydantic_ai import RunContext
7
7
  from code_puppy.messaging import emit_error, emit_info, emit_success
8
8
  from code_puppy.tools.common import generate_group_id
9
9
 
10
- from .camoufox_manager import get_camoufox_manager
10
+ from .camoufox_manager import get_session_browser_manager
11
11
 
12
12
 
13
13
  async def execute_javascript(
@@ -21,14 +21,16 @@ async def execute_javascript(
21
21
  message_group=group_id,
22
22
  )
23
23
  try:
24
- browser_manager = get_camoufox_manager()
24
+ browser_manager = get_session_browser_manager()
25
25
  page = await browser_manager.get_current_page()
26
26
 
27
27
  if not page:
28
28
  return {"success": False, "error": "No active browser page available"}
29
29
 
30
30
  # Execute JavaScript
31
- result = await page.evaluate(script, timeout=timeout)
31
+ # Note: page.evaluate() does NOT accept a timeout parameter
32
+ # The timeout arg to this function is kept for API compatibility but unused
33
+ result = await page.evaluate(script)
32
34
 
33
35
  emit_success("JavaScript executed successfully", message_group=group_id)
34
36
 
@@ -52,7 +54,7 @@ async def scroll_page(
52
54
  message_group=group_id,
53
55
  )
54
56
  try:
55
- browser_manager = get_camoufox_manager()
57
+ browser_manager = get_session_browser_manager()
56
58
  page = await browser_manager.get_current_page()
57
59
 
58
60
  if not page:
@@ -60,7 +62,7 @@ async def scroll_page(
60
62
 
61
63
  if element_selector:
62
64
  # Scroll specific element
63
- element = page.locator(element_selector)
65
+ element = page.locator(element_selector).first
64
66
  await element.scroll_into_view_if_needed()
65
67
 
66
68
  # Get element's current scroll position and dimensions
@@ -146,13 +148,13 @@ async def scroll_to_element(
146
148
  message_group=group_id,
147
149
  )
148
150
  try:
149
- browser_manager = get_camoufox_manager()
151
+ browser_manager = get_session_browser_manager()
150
152
  page = await browser_manager.get_current_page()
151
153
 
152
154
  if not page:
153
155
  return {"success": False, "error": "No active browser page available"}
154
156
 
155
- element = page.locator(selector)
157
+ element = page.locator(selector).first
156
158
  await element.wait_for(state="attached", timeout=timeout)
157
159
  await element.scroll_into_view_if_needed()
158
160
 
@@ -178,7 +180,7 @@ async def set_viewport_size(
178
180
  message_group=group_id,
179
181
  )
180
182
  try:
181
- browser_manager = get_camoufox_manager()
183
+ browser_manager = get_session_browser_manager()
182
184
  page = await browser_manager.get_current_page()
183
185
 
184
186
  if not page:
@@ -209,13 +211,13 @@ async def wait_for_element(
209
211
  message_group=group_id,
210
212
  )
211
213
  try:
212
- browser_manager = get_camoufox_manager()
214
+ browser_manager = get_session_browser_manager()
213
215
  page = await browser_manager.get_current_page()
214
216
 
215
217
  if not page:
216
218
  return {"success": False, "error": "No active browser page available"}
217
219
 
218
- element = page.locator(selector)
220
+ element = page.locator(selector).first
219
221
  await element.wait_for(state=state, timeout=timeout)
220
222
 
221
223
  emit_success(f"Element {selector} is now {state}", message_group=group_id)
@@ -240,13 +242,13 @@ async def highlight_element(
240
242
  message_group=group_id,
241
243
  )
242
244
  try:
243
- browser_manager = get_camoufox_manager()
245
+ browser_manager = get_session_browser_manager()
244
246
  page = await browser_manager.get_current_page()
245
247
 
246
248
  if not page:
247
249
  return {"success": False, "error": "No active browser page available"}
248
250
 
249
- element = page.locator(selector)
251
+ element = page.locator(selector).first
250
252
  await element.wait_for(state="visible", timeout=timeout)
251
253
 
252
254
  # Add highlight style
@@ -277,7 +279,7 @@ async def clear_highlights() -> Dict[str, Any]:
277
279
  message_group=group_id,
278
280
  )
279
281
  try:
280
- browser_manager = get_camoufox_manager()
282
+ browser_manager = get_session_browser_manager()
281
283
  page = await browser_manager.get_current_page()
282
284
 
283
285
  if not page: