code-puppy 0.0.355__py3-none-any.whl → 0.0.357__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. code_puppy/agents/agent_qa_kitten.py +10 -5
  2. code_puppy/agents/agent_terminal_qa.py +323 -0
  3. code_puppy/api/app.py +79 -2
  4. code_puppy/api/routers/commands.py +21 -2
  5. code_puppy/api/routers/sessions.py +49 -8
  6. code_puppy/config.py +5 -2
  7. code_puppy/tools/__init__.py +37 -0
  8. code_puppy/tools/agent_tools.py +26 -1
  9. code_puppy/tools/browser/__init__.py +41 -0
  10. code_puppy/tools/browser/browser_control.py +6 -6
  11. code_puppy/tools/browser/browser_interactions.py +21 -20
  12. code_puppy/tools/browser/browser_locators.py +9 -9
  13. code_puppy/tools/browser/browser_navigation.py +7 -7
  14. code_puppy/tools/browser/browser_screenshot.py +60 -135
  15. code_puppy/tools/browser/browser_screenshot_vqa.py +195 -0
  16. code_puppy/tools/browser/browser_scripts.py +15 -13
  17. code_puppy/tools/browser/camoufox_manager.py +226 -64
  18. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  19. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  20. code_puppy/tools/browser/terminal_screenshot_tools.py +520 -0
  21. code_puppy/tools/browser/terminal_tools.py +525 -0
  22. code_puppy/tools/browser/vqa_agent.py +138 -34
  23. code_puppy/tools/command_runner.py +292 -101
  24. {code_puppy-0.0.355.dist-info → code_puppy-0.0.357.dist-info}/METADATA +1 -1
  25. {code_puppy-0.0.355.dist-info → code_puppy-0.0.357.dist-info}/RECORD +30 -24
  26. {code_puppy-0.0.355.data → code_puppy-0.0.357.data}/data/code_puppy/models.json +0 -0
  27. {code_puppy-0.0.355.data → code_puppy-0.0.357.data}/data/code_puppy/models_dev_api.json +0 -0
  28. {code_puppy-0.0.355.dist-info → code_puppy-0.0.357.dist-info}/WHEEL +0 -0
  29. {code_puppy-0.0.355.dist-info → code_puppy-0.0.357.dist-info}/entry_points.txt +0 -0
  30. {code_puppy-0.0.355.dist-info → code_puppy-0.0.357.dist-info}/licenses/LICENSE +0 -0
@@ -2,10 +2,12 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from functools import lru_cache
5
+ from collections.abc import AsyncIterable
6
+ from typing import Any
6
7
 
7
8
  from pydantic import BaseModel, Field
8
- from pydantic_ai import Agent, BinaryContent
9
+ from pydantic_ai import Agent, BinaryContent, PartDeltaEvent, PartStartEvent, RunContext
10
+ from pydantic_ai.messages import TextPart, TextPartDelta
9
11
 
10
12
  from code_puppy.config import get_use_dbos, get_vqa_model_name
11
13
 
@@ -18,73 +20,175 @@ class VisualAnalysisResult(BaseModel):
18
20
  observations: str
19
21
 
20
22
 
21
- def _get_vqa_instructions() -> str:
22
- """Get the system instructions for the VQA agent."""
23
- return (
24
- "You are a visual analysis specialist. Answer the user's question about the provided image. "
25
- "Always respond using the structured schema: answer, confidence (0-1 float), observations. "
26
- "Confidence reflects how certain you are about the answer. Observations should include useful, concise context."
27
- )
23
+ DEFAULT_VQA_INSTRUCTIONS = (
24
+ "You are a visual analysis specialist. Answer the user's question about the provided image. "
25
+ "Always respond using the structured schema: answer, confidence (0-1 float), observations. "
26
+ "Confidence reflects how certain you are about the answer. Observations should include useful, concise context."
27
+ )
28
28
 
29
29
 
30
- @lru_cache(maxsize=1)
31
- def _load_vqa_agent(model_name: str) -> Agent[None, VisualAnalysisResult]:
32
- """Create a cached agent instance for visual analysis."""
30
+ async def run_vqa_analysis(
31
+ question: str,
32
+ image_bytes: bytes,
33
+ media_type: str = "image/png",
34
+ ) -> str:
35
+ """Execute the VQA agent asynchronously against screenshot bytes.
36
+
37
+ Follows the same pattern as agent_tools.py for prompt preparation
38
+ and model configuration.
39
+
40
+ Args:
41
+ question: The question to ask about the image.
42
+ image_bytes: The raw image bytes.
43
+ media_type: The MIME type of the image (default: "image/png").
44
+ system_prompt: Optional custom system prompt. If None, uses default VQA instructions.
45
+
46
+ Returns:
47
+ str: The answer from the VQA analysis.
48
+ """
49
+ from code_puppy import callbacks
33
50
  from code_puppy.model_factory import ModelFactory
34
51
  from code_puppy.model_utils import prepare_prompt_for_model
35
52
 
53
+ # Get model configuration
54
+ model_name = get_vqa_model_name()
36
55
  models_config = ModelFactory.load_config()
37
56
  model = ModelFactory.get_model(model_name, models_config)
38
57
 
39
- # Handle claude-code models: swap instructions (prompt prepending happens in run_vqa_analysis)
40
- instructions = _get_vqa_instructions()
58
+ # Build instructions: custom system_prompt or default VQA instructions
59
+ instructions = DEFAULT_VQA_INSTRUCTIONS
60
+
61
+ # Apply prompt additions (like file permission handling) - same as agent_tools.py
62
+ prompt_additions = callbacks.on_load_prompt()
63
+ if prompt_additions:
64
+ instructions += "\n" + "\n".join(prompt_additions)
65
+
66
+ # Handle claude-code models: swap instructions, prepend system prompt to user question
67
+ # Following the exact pattern from agent_tools.py
41
68
  prepared = prepare_prompt_for_model(
42
- model_name, instructions, "", prepend_system_to_user=False
69
+ model_name, instructions, question, prepend_system_to_user=True
43
70
  )
44
71
  instructions = prepared.instructions
72
+ question = prepared.user_prompt
45
73
 
74
+ # Create the VQA agent with string output
46
75
  vqa_agent = Agent(
47
76
  model=model,
48
77
  instructions=instructions,
49
- output_type=VisualAnalysisResult,
50
- retries=2,
51
78
  )
52
79
 
80
+ # Wrap with DBOS if enabled
53
81
  if get_use_dbos():
54
82
  from pydantic_ai.durable_exec.dbos import DBOSAgent
55
83
 
56
- dbos_agent = DBOSAgent(vqa_agent, name="vqa-agent")
57
- return dbos_agent
84
+ vqa_agent = DBOSAgent(vqa_agent, name="vqa-agent")
58
85
 
59
- return vqa_agent
86
+ # Run the agent with the image
87
+ result = await vqa_agent.run(
88
+ [
89
+ question,
90
+ BinaryContent(data=image_bytes, media_type=media_type),
91
+ ]
92
+ )
93
+ return result.output
60
94
 
61
95
 
62
- def _get_vqa_agent() -> Agent[None, VisualAnalysisResult]:
63
- """Return a cached VQA agent configured with the current model."""
64
- model_name = get_vqa_model_name()
65
- # lru_cache keyed by model_name ensures refresh when configuration changes
66
- return _load_vqa_agent(model_name)
96
+ def _create_vqa_stream_handler(
97
+ accumulator: list[str],
98
+ ):
99
+ """Create an event stream handler that accumulates text.
100
+
101
+ Args:
102
+ accumulator: List to accumulate text chunks into (pass empty list).
103
+
104
+ Returns:
105
+ Async event stream handler function.
106
+ """
107
+
108
+ async def vqa_event_stream_handler(
109
+ ctx: RunContext,
110
+ events: AsyncIterable[Any],
111
+ ) -> None:
112
+ """Handle streaming events - print text as it arrives."""
113
+ async for event in events:
114
+ # Handle text part start - might have initial content
115
+ if isinstance(event, PartStartEvent):
116
+ if isinstance(event.part, TextPart) and event.part.content:
117
+ accumulator.append(event.part.content)
67
118
 
119
+ # Handle text deltas - the streaming bits
120
+ elif isinstance(event, PartDeltaEvent):
121
+ if isinstance(event.delta, TextPartDelta) and event.delta.content_delta:
122
+ accumulator.append(event.delta.content_delta)
68
123
 
69
- def run_vqa_analysis(
124
+ return vqa_event_stream_handler
125
+
126
+
127
+ async def run_vqa_analysis_stream(
70
128
  question: str,
71
129
  image_bytes: bytes,
72
130
  media_type: str = "image/png",
73
- ) -> VisualAnalysisResult:
74
- """Execute the VQA agent synchronously against screenshot bytes."""
75
- from code_puppy.model_utils import prepare_prompt_for_model
131
+ ) -> str:
132
+ """Execute the VQA agent with streaming output.
76
133
 
77
- agent = _get_vqa_agent()
134
+ Streams text to console as it arrives and accumulates the full response.
78
135
 
79
- # Handle claude-code models: prepend system prompt to user question
136
+ Args:
137
+ question: The question to ask about the image.
138
+ image_bytes: The raw image bytes.
139
+ media_type: The MIME type of the image (default: "image/png").
140
+
141
+ Returns:
142
+ str: The accumulated answer from the VQA analysis.
143
+ """
144
+ from code_puppy import callbacks
145
+ from code_puppy.model_factory import ModelFactory
146
+ from code_puppy.model_utils import prepare_prompt_for_model
147
+
148
+ # Get model configuration
80
149
  model_name = get_vqa_model_name()
81
- prepared = prepare_prompt_for_model(model_name, _get_vqa_instructions(), question)
150
+ models_config = ModelFactory.load_config()
151
+ model = ModelFactory.get_model(model_name, models_config)
152
+
153
+ # Build instructions
154
+ instructions = DEFAULT_VQA_INSTRUCTIONS
155
+
156
+ # Apply prompt additions (like file permission handling)
157
+ prompt_additions = callbacks.on_load_prompt()
158
+ if prompt_additions:
159
+ instructions += "\n" + "\n".join(prompt_additions)
160
+
161
+ # Handle claude-code models: swap instructions, prepend system prompt to user question
162
+ prepared = prepare_prompt_for_model(
163
+ model_name, instructions, question, prepend_system_to_user=True
164
+ )
165
+ instructions = prepared.instructions
82
166
  question = prepared.user_prompt
83
167
 
84
- result = agent.run_sync(
168
+ # Create the VQA agent
169
+ vqa_agent = Agent(
170
+ model=model,
171
+ instructions=instructions,
172
+ )
173
+
174
+ # Wrap with DBOS if enabled
175
+ if get_use_dbos():
176
+ from pydantic_ai.durable_exec.dbos import DBOSAgent
177
+
178
+ vqa_agent = DBOSAgent(vqa_agent, name="vqa-agent-stream")
179
+
180
+ # Accumulator for streamed text (use list to allow mutation in handler)
181
+ accumulated_chunks: list[str] = []
182
+
183
+ # Create the stream handler
184
+ stream_handler = _create_vqa_stream_handler(accumulated_chunks)
185
+
186
+ # Run the agent with event_stream_handler
187
+ result = await vqa_agent.run(
85
188
  [
86
189
  question,
87
190
  BinaryContent(data=image_bytes, media_type=media_type),
88
- ]
191
+ ],
192
+ event_stream_handler=stream_handler,
89
193
  )
90
194
  return result.output