code-puppy 0.0.348__py3-none-any.whl → 0.0.361__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. code_puppy/agents/__init__.py +2 -0
  2. code_puppy/agents/agent_manager.py +49 -0
  3. code_puppy/agents/agent_pack_leader.py +383 -0
  4. code_puppy/agents/agent_qa_kitten.py +12 -7
  5. code_puppy/agents/agent_terminal_qa.py +323 -0
  6. code_puppy/agents/base_agent.py +17 -4
  7. code_puppy/agents/event_stream_handler.py +101 -8
  8. code_puppy/agents/pack/__init__.py +34 -0
  9. code_puppy/agents/pack/bloodhound.py +304 -0
  10. code_puppy/agents/pack/husky.py +321 -0
  11. code_puppy/agents/pack/retriever.py +393 -0
  12. code_puppy/agents/pack/shepherd.py +348 -0
  13. code_puppy/agents/pack/terrier.py +287 -0
  14. code_puppy/agents/pack/watchdog.py +367 -0
  15. code_puppy/agents/subagent_stream_handler.py +276 -0
  16. code_puppy/api/__init__.py +13 -0
  17. code_puppy/api/app.py +169 -0
  18. code_puppy/api/main.py +21 -0
  19. code_puppy/api/pty_manager.py +446 -0
  20. code_puppy/api/routers/__init__.py +12 -0
  21. code_puppy/api/routers/agents.py +36 -0
  22. code_puppy/api/routers/commands.py +217 -0
  23. code_puppy/api/routers/config.py +74 -0
  24. code_puppy/api/routers/sessions.py +232 -0
  25. code_puppy/api/templates/terminal.html +361 -0
  26. code_puppy/api/websocket.py +154 -0
  27. code_puppy/callbacks.py +73 -0
  28. code_puppy/claude_cache_client.py +249 -34
  29. code_puppy/command_line/core_commands.py +85 -0
  30. code_puppy/config.py +66 -62
  31. code_puppy/messaging/__init__.py +15 -0
  32. code_puppy/messaging/messages.py +27 -0
  33. code_puppy/messaging/queue_console.py +1 -1
  34. code_puppy/messaging/rich_renderer.py +36 -1
  35. code_puppy/messaging/spinner/__init__.py +20 -2
  36. code_puppy/messaging/subagent_console.py +461 -0
  37. code_puppy/model_utils.py +54 -0
  38. code_puppy/plugins/antigravity_oauth/antigravity_model.py +90 -19
  39. code_puppy/plugins/antigravity_oauth/transport.py +1 -0
  40. code_puppy/plugins/frontend_emitter/__init__.py +25 -0
  41. code_puppy/plugins/frontend_emitter/emitter.py +121 -0
  42. code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
  43. code_puppy/prompts/antigravity_system_prompt.md +1 -0
  44. code_puppy/status_display.py +6 -2
  45. code_puppy/tools/__init__.py +37 -1
  46. code_puppy/tools/agent_tools.py +83 -33
  47. code_puppy/tools/browser/__init__.py +37 -0
  48. code_puppy/tools/browser/browser_control.py +6 -6
  49. code_puppy/tools/browser/browser_interactions.py +21 -20
  50. code_puppy/tools/browser/browser_locators.py +9 -9
  51. code_puppy/tools/browser/browser_navigation.py +7 -7
  52. code_puppy/tools/browser/browser_screenshot.py +78 -140
  53. code_puppy/tools/browser/browser_scripts.py +15 -13
  54. code_puppy/tools/browser/camoufox_manager.py +226 -64
  55. code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
  56. code_puppy/tools/browser/terminal_command_tools.py +521 -0
  57. code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
  58. code_puppy/tools/browser/terminal_tools.py +525 -0
  59. code_puppy/tools/command_runner.py +292 -101
  60. code_puppy/tools/common.py +176 -1
  61. code_puppy/tools/display.py +84 -0
  62. code_puppy/tools/subagent_context.py +158 -0
  63. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/METADATA +13 -11
  64. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/RECORD +69 -38
  65. code_puppy/tools/browser/vqa_agent.py +0 -90
  66. {code_puppy-0.0.348.data → code_puppy-0.0.361.data}/data/code_puppy/models.json +0 -0
  67. {code_puppy-0.0.348.data → code_puppy-0.0.361.data}/data/code_puppy/models_dev_api.json +0 -0
  68. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/WHEEL +0 -0
  69. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/entry_points.txt +0 -0
  70. {code_puppy-0.0.348.dist-info → code_puppy-0.0.361.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,556 @@
1
+ """Terminal Screenshot Tools.
2
+
3
+ This module provides tools for:
4
+ - Taking screenshots of the terminal browser
5
+ - Reading terminal output by scraping xterm.js DOM
6
+ - Loading images from the filesystem
7
+
8
+ Screenshots and images are returned via ToolReturn with BinaryContent
9
+ so multimodal models can directly see and analyze them.
10
+
11
+ Images are automatically resized to reduce token usage.
12
+ """
13
+
14
+ import io
15
+ import logging
16
+ import time
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+ from tempfile import gettempdir, mkdtemp
20
+ from typing import Any, Dict, Union
21
+
22
+ from PIL import Image
23
+ from pydantic_ai import BinaryContent, RunContext, ToolReturn
24
+ from rich.text import Text
25
+
26
+ from code_puppy.messaging import emit_error, emit_info, emit_success
27
+ from code_puppy.tools.browser import format_terminal_banner
28
+ from code_puppy.tools.common import generate_group_id
29
+
30
+ from .terminal_tools import get_session_manager
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Default max height for screenshots (reduces token usage significantly)
35
+ DEFAULT_MAX_HEIGHT = 768
36
+
37
+ # Temporary directory for screenshots
38
+ _TEMP_SCREENSHOT_ROOT = Path(
39
+ mkdtemp(prefix="code_puppy_terminal_screenshots_", dir=gettempdir())
40
+ )
41
+
42
+ # JavaScript to extract text content from xterm.js terminal
43
+ XTERM_TEXT_EXTRACTION_JS = """
44
+ () => {
45
+ const selectors = [
46
+ '.xterm-rows',
47
+ '.xterm .xterm-rows',
48
+ '[class*="xterm-rows"]',
49
+ '.xterm-screen',
50
+ ];
51
+
52
+ let container = null;
53
+ for (const selector of selectors) {
54
+ container = document.querySelector(selector);
55
+ if (container) break;
56
+ }
57
+
58
+ if (!container) {
59
+ const xtermElement = document.querySelector('.xterm');
60
+ if (xtermElement) {
61
+ return {
62
+ success: true,
63
+ lines: xtermElement.innerText.split('\\n').filter(line => line.trim()),
64
+ method: 'innerText'
65
+ };
66
+ }
67
+ return { success: false, error: 'Could not find xterm.js terminal container' };
68
+ }
69
+
70
+ const rows = container.querySelectorAll('div');
71
+ const lines = [];
72
+
73
+ rows.forEach(row => {
74
+ let text = '';
75
+ const spans = row.querySelectorAll('span');
76
+ if (spans.length > 0) {
77
+ spans.forEach(span => {
78
+ text += span.textContent || '';
79
+ });
80
+ } else {
81
+ text = row.textContent || '';
82
+ }
83
+ if (text.trim()) {
84
+ lines.push(text);
85
+ }
86
+ });
87
+
88
+ return {
89
+ success: true,
90
+ lines: lines,
91
+ method: 'row_extraction'
92
+ };
93
+ }
94
+ """
95
+
96
+
97
+ def _build_screenshot_path(prefix: str = "terminal_screenshot") -> Path:
98
+ """Generate a unique screenshot path."""
99
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
100
+ return _TEMP_SCREENSHOT_ROOT / f"{prefix}_{timestamp}.png"
101
+
102
+
103
+ def _resize_image(image_bytes: bytes, max_height: int = DEFAULT_MAX_HEIGHT) -> bytes:
104
+ """Resize image to max height while maintaining aspect ratio.
105
+
106
+ This dramatically reduces token usage for multimodal models.
107
+
108
+ Args:
109
+ image_bytes: Original PNG image bytes.
110
+ max_height: Maximum height in pixels (default 384).
111
+
112
+ Returns:
113
+ Resized PNG image bytes.
114
+ """
115
+ try:
116
+ img = Image.open(io.BytesIO(image_bytes))
117
+
118
+ # Only resize if image is taller than max_height
119
+ if img.height <= max_height:
120
+ return image_bytes
121
+
122
+ # Calculate new dimensions maintaining aspect ratio
123
+ ratio = max_height / img.height
124
+ new_width = int(img.width * ratio)
125
+ new_height = max_height
126
+
127
+ # Resize with high quality resampling
128
+ resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
129
+
130
+ # Save to bytes
131
+ output = io.BytesIO()
132
+ resized.save(output, format="PNG", optimize=True)
133
+ output.seek(0)
134
+
135
+ logger.debug(
136
+ f"Resized image from {img.width}x{img.height} to {new_width}x{new_height}"
137
+ )
138
+ return output.read()
139
+
140
+ except Exception as e:
141
+ logger.warning(f"Failed to resize image: {e}, using original")
142
+ return image_bytes
143
+
144
+
145
+ async def _capture_terminal_screenshot(
146
+ full_page: bool = False,
147
+ save_to_disk: bool = True,
148
+ group_id: str | None = None,
149
+ max_height: int = DEFAULT_MAX_HEIGHT,
150
+ ) -> Dict[str, Any]:
151
+ """Internal function to capture terminal screenshot.
152
+
153
+ Args:
154
+ full_page: Whether to capture full page or just viewport.
155
+ save_to_disk: Whether to save screenshot to disk.
156
+ group_id: Optional message group for logging.
157
+ max_height: Maximum height for resizing (default 768px).
158
+
159
+ Returns:
160
+ Dict with screenshot_bytes, screenshot_path, base64_data, and success status.
161
+ """
162
+ try:
163
+ manager = get_session_manager()
164
+ page = await manager.get_current_page()
165
+
166
+ if not page:
167
+ return {
168
+ "success": False,
169
+ "error": "No active terminal page. Open terminal first.",
170
+ }
171
+
172
+ # Capture screenshot as bytes
173
+ original_bytes = await page.screenshot(full_page=full_page, type="png")
174
+
175
+ # Resize to reduce token usage for multimodal models
176
+ screenshot_bytes = _resize_image(original_bytes, max_height=max_height)
177
+
178
+ result: Dict[str, Any] = {
179
+ "success": True,
180
+ "screenshot_bytes": screenshot_bytes,
181
+ }
182
+
183
+ # Save to disk if requested (save the resized version)
184
+ if save_to_disk:
185
+ screenshot_path = _build_screenshot_path()
186
+ screenshot_path.parent.mkdir(parents=True, exist_ok=True)
187
+ with open(screenshot_path, "wb") as f:
188
+ f.write(screenshot_bytes)
189
+ result["screenshot_path"] = str(screenshot_path)
190
+
191
+ if group_id:
192
+ emit_success(
193
+ f"Terminal screenshot saved: {screenshot_path}",
194
+ message_group=group_id,
195
+ )
196
+
197
+ return result
198
+
199
+ except Exception as e:
200
+ logger.exception("Error capturing terminal screenshot")
201
+ return {"success": False, "error": str(e)}
202
+
203
+
204
+ async def terminal_screenshot(
205
+ full_page: bool = False,
206
+ save_to_disk: bool = True,
207
+ ) -> Union[ToolReturn, Dict[str, Any]]:
208
+ """Take a screenshot of the terminal browser.
209
+
210
+ Captures a screenshot and returns it via ToolReturn with BinaryContent
211
+ so multimodal models can directly see and analyze the image.
212
+
213
+ Args:
214
+ full_page: Whether to capture the full page or just viewport.
215
+ Defaults to False (viewport only - what's visible on screen).
216
+ save_to_disk: Whether to save the screenshot to disk.
217
+ Defaults to True.
218
+
219
+ Returns:
220
+ ToolReturn containing:
221
+ - return_value: Success message with screenshot path
222
+ - content: List with description and BinaryContent image
223
+ - metadata: Screenshot details (path, target, timestamp)
224
+ Or Dict with error info if failed.
225
+ """
226
+ target = "full_page" if full_page else "viewport"
227
+ group_id = generate_group_id("terminal_screenshot", target)
228
+ banner = format_terminal_banner("TERMINAL SCREENSHOT 📷")
229
+ emit_info(
230
+ Text.from_markup(f"{banner} [bold cyan]{target}[/bold cyan]"),
231
+ message_group=group_id,
232
+ )
233
+
234
+ result = await _capture_terminal_screenshot(
235
+ full_page=full_page,
236
+ save_to_disk=save_to_disk,
237
+ group_id=group_id,
238
+ )
239
+
240
+ if not result["success"]:
241
+ emit_error(result.get("error", "Screenshot failed"), message_group=group_id)
242
+ return result
243
+
244
+ screenshot_path = result.get("screenshot_path", "(not saved)")
245
+
246
+ # Return as ToolReturn with BinaryContent so the model can SEE the image!
247
+ return ToolReturn(
248
+ return_value=f"Terminal screenshot captured. Saved to: {screenshot_path}",
249
+ content=[
250
+ f"Here's the terminal screenshot ({target}):",
251
+ BinaryContent(
252
+ data=result["screenshot_bytes"],
253
+ media_type="image/png",
254
+ ),
255
+ "Please analyze what you see in the terminal.",
256
+ ],
257
+ metadata={
258
+ "success": True,
259
+ "screenshot_path": screenshot_path,
260
+ "target": target,
261
+ "full_page": full_page,
262
+ "timestamp": time.time(),
263
+ },
264
+ )
265
+
266
+
267
+ async def terminal_read_output(lines: int = 50) -> Dict[str, Any]:
268
+ """Read text output from the terminal by scraping the xterm.js DOM.
269
+
270
+ Extracts text content from the terminal by parsing xterm.js DOM.
271
+ This is useful when you need the actual text rather than an image.
272
+
273
+ Args:
274
+ lines: Number of lines to return from the end. Defaults to 50.
275
+
276
+ Returns:
277
+ A dictionary containing:
278
+ - success (bool): True if text was extracted.
279
+ - output (str): The terminal text content.
280
+ - line_count (int): Number of lines extracted.
281
+ - error (str): Error message if unsuccessful.
282
+ """
283
+ group_id = generate_group_id("terminal_read_output", f"lines_{lines}")
284
+ banner = format_terminal_banner("TERMINAL READ OUTPUT 📖")
285
+ emit_info(
286
+ Text.from_markup(f"{banner} [dim]last {lines} lines[/dim]"),
287
+ message_group=group_id,
288
+ )
289
+
290
+ try:
291
+ manager = get_session_manager()
292
+ page = await manager.get_current_page()
293
+
294
+ if not page:
295
+ error_msg = "No active terminal page. Open terminal first."
296
+ emit_error(error_msg, message_group=group_id)
297
+ return {"success": False, "error": error_msg}
298
+
299
+ # Execute JavaScript to extract text
300
+ result = await page.evaluate(XTERM_TEXT_EXTRACTION_JS)
301
+
302
+ if not result.get("success"):
303
+ error_msg = result.get("error", "Failed to extract terminal text")
304
+ emit_error(error_msg, message_group=group_id)
305
+ return {"success": False, "error": error_msg}
306
+
307
+ extracted_lines = result.get("lines", [])
308
+
309
+ # Get the last N lines
310
+ if len(extracted_lines) > lines:
311
+ extracted_lines = extracted_lines[-lines:]
312
+
313
+ output_text = "\n".join(extracted_lines)
314
+
315
+ emit_success(
316
+ f"Extracted {len(extracted_lines)} lines from terminal",
317
+ message_group=group_id,
318
+ )
319
+
320
+ return {
321
+ "success": True,
322
+ "output": output_text,
323
+ "line_count": len(extracted_lines),
324
+ }
325
+
326
+ except Exception as e:
327
+ error_msg = f"Failed to read terminal output: {str(e)}"
328
+ emit_error(error_msg, message_group=group_id)
329
+ logger.exception("Error reading terminal output")
330
+ return {"success": False, "error": error_msg}
331
+
332
+
333
+ async def load_image(
334
+ image_path: str,
335
+ max_height: int = DEFAULT_MAX_HEIGHT,
336
+ ) -> Union[ToolReturn, Dict[str, Any]]:
337
+ """Load an image from the filesystem for visual analysis.
338
+
339
+ Loads any image file, resizes it to reduce token usage, and returns
340
+ it via ToolReturn with BinaryContent so multimodal models can see it.
341
+
342
+ Args:
343
+ image_path: Path to the image file.
344
+ max_height: Maximum height for resizing (default 768px).
345
+
346
+ Returns:
347
+ ToolReturn containing:
348
+ - return_value: Success message with path info
349
+ - content: List with description and BinaryContent image
350
+ - metadata: Image details (path, resized height)
351
+ Or Dict with error info if failed.
352
+ """
353
+ group_id = generate_group_id("load_image", image_path)
354
+ emit_info(f"LOAD IMAGE 🖼️ {image_path}", message_group=group_id)
355
+
356
+ try:
357
+ image_file = Path(image_path)
358
+
359
+ if not image_file.exists():
360
+ error_msg = f"Image file not found: {image_path}"
361
+ emit_error(error_msg, message_group=group_id)
362
+ return {"success": False, "error": error_msg, "image_path": image_path}
363
+
364
+ if not image_file.is_file():
365
+ error_msg = f"Path is not a file: {image_path}"
366
+ emit_error(error_msg, message_group=group_id)
367
+ return {"success": False, "error": error_msg, "image_path": image_path}
368
+
369
+ # Read image bytes
370
+ original_bytes = image_file.read_bytes()
371
+
372
+ # Resize to reduce token usage
373
+ image_bytes = _resize_image(original_bytes, max_height=max_height)
374
+
375
+ emit_success(f"Loaded image: {image_path}", message_group=group_id)
376
+
377
+ # Return as ToolReturn with BinaryContent so the model can SEE the image!
378
+ return ToolReturn(
379
+ return_value=f"Image loaded from: {image_path}",
380
+ content=[
381
+ f"Here's the image from {image_file.name}:",
382
+ BinaryContent(
383
+ data=image_bytes,
384
+ media_type="image/png", # Always PNG after resize
385
+ ),
386
+ "Please analyze what you see in this image.",
387
+ ],
388
+ metadata={
389
+ "success": True,
390
+ "image_path": image_path,
391
+ "max_height": max_height,
392
+ "timestamp": time.time(),
393
+ },
394
+ )
395
+
396
+ except Exception as e:
397
+ error_msg = f"Failed to load image: {str(e)}"
398
+ emit_error(error_msg, message_group=group_id)
399
+ logger.exception("Error loading image")
400
+ return {"success": False, "error": error_msg, "image_path": image_path}
401
+
402
+
403
+ # =============================================================================
404
+ # Tool Registration Functions
405
+ # =============================================================================
406
+
407
+
408
+ def register_terminal_screenshot(agent):
409
+ """Register the terminal screenshot tool."""
410
+
411
+ @agent.tool
412
+ async def terminal_screenshot_analyze(
413
+ context: RunContext,
414
+ full_page: bool = False,
415
+ ) -> Union[ToolReturn, Dict[str, Any]]:
416
+ """
417
+ Take a screenshot of the terminal browser.
418
+
419
+ Returns the screenshot via ToolReturn with BinaryContent that you can
420
+ see directly. Use this to see what's displayed in the terminal.
421
+
422
+ Args:
423
+ full_page: Capture full page (True) or just viewport (False).
424
+
425
+ Returns:
426
+ ToolReturn with the terminal screenshot you can analyze, or error dict.
427
+ """
428
+ # Session is set by invoke_agent via contextvar
429
+ return await terminal_screenshot(full_page=full_page)
430
+
431
+
432
+ def register_terminal_read_output(agent):
433
+ """Register the terminal text reading tool."""
434
+
435
+ @agent.tool
436
+ async def terminal_read_output(
437
+ context: RunContext,
438
+ lines: int = 50,
439
+ ) -> Dict[str, Any]:
440
+ """
441
+ Read text from the terminal (scrapes xterm.js DOM).
442
+
443
+ Use this when you need the actual text content, not just an image.
444
+
445
+ Args:
446
+ lines: Number of lines to read from end (default: 50).
447
+
448
+ Returns:
449
+ Dict with output (text content), line_count, success.
450
+ """
451
+ # Session is set by invoke_agent via contextvar
452
+ from . import terminal_screenshot_tools
453
+
454
+ return await terminal_screenshot_tools.terminal_read_output(lines=lines)
455
+
456
+
457
+ def register_load_image(agent):
458
+ """Register the image loading tool."""
459
+
460
+ @agent.tool
461
+ async def load_image_for_analysis(
462
+ context: RunContext,
463
+ image_path: str,
464
+ ) -> Union[ToolReturn, Dict[str, Any]]:
465
+ """
466
+ Load an image file so you can see and analyze it.
467
+
468
+ Returns the image via ToolReturn with BinaryContent that you can
469
+ see directly.
470
+
471
+ Args:
472
+ image_path: Path to the image file.
473
+
474
+ Returns:
475
+ ToolReturn with the image you can analyze, or error dict.
476
+ """
477
+ # Session is set by invoke_agent via contextvar
478
+ return await load_image(image_path=image_path)
479
+
480
+
481
+ def register_terminal_compare_mockup(agent):
482
+ """Register the mockup comparison tool."""
483
+
484
+ @agent.tool
485
+ async def terminal_compare_mockup(
486
+ context: RunContext,
487
+ mockup_path: str,
488
+ ) -> Union[ToolReturn, Dict[str, Any]]:
489
+ """
490
+ Compare the terminal to a mockup image.
491
+
492
+ Takes a screenshot of the terminal and loads the mockup image.
493
+ Returns both via ToolReturn with BinaryContent so you can compare them.
494
+
495
+ Args:
496
+ mockup_path: Path to the mockup/expected image.
497
+
498
+ Returns:
499
+ ToolReturn with both images (terminal and mockup) you can compare.
500
+ """
501
+ # Session is set by invoke_agent via contextvar
502
+ group_id = generate_group_id("terminal_compare_mockup", mockup_path)
503
+ banner = format_terminal_banner("TERMINAL COMPARE MOCKUP 🖼️")
504
+ emit_info(
505
+ Text.from_markup(f"{banner} [bold cyan]{mockup_path}[/bold cyan]"),
506
+ message_group=group_id,
507
+ )
508
+
509
+ # Capture terminal screenshot (get raw result for bytes)
510
+ terminal_capture = await _capture_terminal_screenshot(
511
+ full_page=False,
512
+ save_to_disk=True,
513
+ group_id=group_id,
514
+ )
515
+ if not terminal_capture["success"]:
516
+ return terminal_capture
517
+
518
+ # Load the mockup image
519
+ mockup_file = Path(mockup_path)
520
+ if not mockup_file.exists():
521
+ error_msg = f"Mockup file not found: {mockup_path}"
522
+ emit_error(error_msg, message_group=group_id)
523
+ return {"success": False, "error": error_msg}
524
+
525
+ mockup_bytes = _resize_image(mockup_file.read_bytes())
526
+
527
+ emit_success(
528
+ "Both images loaded. Compare them visually.",
529
+ message_group=group_id,
530
+ )
531
+
532
+ terminal_path = terminal_capture.get("screenshot_path", "(not saved)")
533
+
534
+ # Return as ToolReturn with BOTH images as BinaryContent!
535
+ return ToolReturn(
536
+ return_value=f"Comparison ready: terminal vs mockup ({mockup_path})",
537
+ content=[
538
+ "Here's the CURRENT terminal screenshot:",
539
+ BinaryContent(
540
+ data=terminal_capture["screenshot_bytes"],
541
+ media_type="image/png",
542
+ ),
543
+ f"And here's the EXPECTED mockup ({mockup_file.name}):",
544
+ BinaryContent(
545
+ data=mockup_bytes,
546
+ media_type="image/png",
547
+ ),
548
+ "Please compare these images and describe any differences.",
549
+ ],
550
+ metadata={
551
+ "success": True,
552
+ "terminal_path": terminal_path,
553
+ "mockup_path": mockup_path,
554
+ "timestamp": time.time(),
555
+ },
556
+ )