connectonion 0.5.10__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. connectonion/__init__.py +17 -16
  2. connectonion/cli/browser_agent/browser.py +488 -145
  3. connectonion/cli/browser_agent/scroll_strategies.py +276 -0
  4. connectonion/cli/commands/copy_commands.py +24 -1
  5. connectonion/cli/commands/deploy_commands.py +15 -0
  6. connectonion/cli/commands/eval_commands.py +286 -0
  7. connectonion/cli/commands/project_cmd_lib.py +1 -1
  8. connectonion/cli/main.py +11 -0
  9. connectonion/console.py +5 -5
  10. connectonion/core/__init__.py +53 -0
  11. connectonion/{agent.py → core/agent.py} +18 -15
  12. connectonion/{llm.py → core/llm.py} +9 -19
  13. connectonion/{tool_executor.py → core/tool_executor.py} +3 -2
  14. connectonion/{tool_factory.py → core/tool_factory.py} +3 -1
  15. connectonion/debug/__init__.py +51 -0
  16. connectonion/{interactive_debugger.py → debug/auto_debug.py} +7 -7
  17. connectonion/{auto_debug_exception.py → debug/auto_debug_exception.py} +3 -3
  18. connectonion/{debugger_ui.py → debug/auto_debug_ui.py} +1 -1
  19. connectonion/{debug_explainer → debug/debug_explainer}/explain_agent.py +1 -1
  20. connectonion/{debug_explainer → debug/debug_explainer}/explain_context.py +1 -1
  21. connectonion/{execution_analyzer → debug/execution_analyzer}/execution_analysis.py +1 -1
  22. connectonion/debug/runtime_inspector/__init__.py +13 -0
  23. connectonion/{debug_agent → debug/runtime_inspector}/agent.py +1 -1
  24. connectonion/{xray.py → debug/xray.py} +1 -1
  25. connectonion/llm_do.py +1 -1
  26. connectonion/logger.py +305 -135
  27. connectonion/network/__init__.py +37 -0
  28. connectonion/{announce.py → network/announce.py} +1 -1
  29. connectonion/{asgi.py → network/asgi.py} +122 -2
  30. connectonion/{connect.py → network/connect.py} +1 -1
  31. connectonion/network/connection.py +123 -0
  32. connectonion/{host.py → network/host.py} +31 -11
  33. connectonion/{trust.py → network/trust.py} +1 -1
  34. connectonion/tui/__init__.py +22 -0
  35. connectonion/tui/chat.py +647 -0
  36. connectonion/useful_events_handlers/reflect.py +2 -2
  37. connectonion/useful_plugins/__init__.py +4 -3
  38. connectonion/useful_plugins/calendar_plugin.py +2 -2
  39. connectonion/useful_plugins/eval.py +2 -2
  40. connectonion/useful_plugins/gmail_plugin.py +2 -2
  41. connectonion/useful_plugins/image_result_formatter.py +2 -2
  42. connectonion/useful_plugins/re_act.py +2 -2
  43. connectonion/useful_plugins/shell_approval.py +2 -2
  44. connectonion/useful_plugins/ui_stream.py +164 -0
  45. {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/METADATA +4 -3
  46. connectonion-0.6.1.dist-info/RECORD +123 -0
  47. connectonion/debug_agent/__init__.py +0 -13
  48. connectonion-0.5.10.dist-info/RECORD +0 -115
  49. /connectonion/{events.py → core/events.py} +0 -0
  50. /connectonion/{tool_registry.py → core/tool_registry.py} +0 -0
  51. /connectonion/{usage.py → core/usage.py} +0 -0
  52. /connectonion/{debug_explainer → debug/debug_explainer}/__init__.py +0 -0
  53. /connectonion/{debug_explainer → debug/debug_explainer}/explainer_prompt.md +0 -0
  54. /connectonion/{debug_explainer → debug/debug_explainer}/root_cause_analysis_prompt.md +0 -0
  55. /connectonion/{decorators.py → debug/decorators.py} +0 -0
  56. /connectonion/{execution_analyzer → debug/execution_analyzer}/__init__.py +0 -0
  57. /connectonion/{execution_analyzer → debug/execution_analyzer}/execution_analysis_prompt.md +0 -0
  58. /connectonion/{debug_agent → debug/runtime_inspector}/prompts/debug_assistant.md +0 -0
  59. /connectonion/{debug_agent → debug/runtime_inspector}/runtime_inspector.py +0 -0
  60. /connectonion/{relay.py → network/relay.py} +0 -0
  61. /connectonion/{static → network/static}/docs.html +0 -0
  62. /connectonion/{trust_agents.py → network/trust_agents.py} +0 -0
  63. /connectonion/{trust_functions.py → network/trust_functions.py} +0 -0
  64. {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/WHEEL +0 -0
  65. {connectonion-0.5.10.dist-info → connectonion-0.6.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,276 @@
1
+ """
2
+ Purpose: Universal scrolling strategies with AI-powered selection and screenshot-based verification
3
+ LLM-Note:
4
+ Dependencies: imports from [typing, pydantic, connectonion.llm_do, PIL.Image, os, time] | imported by [web_automation.py] | tested by [tests/test_final_scroll.py]
5
+ Data flow: receives page: Page, take_screenshot: Callable, times: int, description: str from web_automation.scroll() → scroll_with_verification() orchestrates 3 strategies → ai_scroll_strategy() calls llm_do(HTML+scrollable_elements→ScrollStrategy, gpt-4o) → element_scroll_strategy()/page_scroll_strategy() fallbacks → page.evaluate(javascript) executes scroll → screenshots_are_different() compares PIL Images with 1% pixel threshold → returns success/failure string
6
+ State/Effects: calls page.evaluate() multiple times (mutates DOM scroll positions) | take_screenshot() writes PNG files to screenshots/*.png | time.sleep(1-1.2) between scroll iterations | AI calls to gpt-4o with temperature=0.1 for strategy generation
7
+ Integration: exposes scroll_with_verification() as main entry point from WebAutomation.scroll() | exposes scroll_page(), scroll_element() as standalone utilities | ScrollStrategy Pydantic model defines AI output schema (javascript: str, explanation: str) | screenshots_are_different() uses PIL for pixel-level comparison
8
+ Performance: ai_scroll_strategy() calls llm_do() once per scroll session (100-500ms) | analyzes first 5000 chars of HTML | finds up to 3 scrollable elements | executes JS times iterations with 1.2s delays | element/page strategies are synchronous JS execution (fast) | PIL screenshot comparison ~50-100ms
9
+ Errors: returns descriptive strings (not exceptions) - "All scroll strategies failed", "Browser not open" | screenshot comparison failure returns True (assumes different) to continue | page.evaluate() exceptions caught and next strategy tried | prints debug output to stdout
10
+ ⚠️ Strategy order: AI-first may be slower but more accurate for complex sites (Gmail) - reorder if speed critical
11
+ ⚠️ Screenshot verification: 1% threshold may need tuning for high-resolution displays or subtle animations
12
+ """
13
+
14
+ from typing import Callable, List, Tuple
15
+ from pydantic import BaseModel
16
+ from connectonion import llm_do
17
+
18
+
19
+ class ScrollStrategy(BaseModel):
20
+ """AI-generated scroll strategy."""
21
+ javascript: str
22
+ explanation: str
23
+
24
+
25
+ def scroll_with_verification(
26
+ page,
27
+ take_screenshot: Callable,
28
+ times: int = 5,
29
+ description: str = "the main content area"
30
+ ) -> str:
31
+ """Universal scroll with automatic strategy selection and fallback.
32
+
33
+ Tries multiple strategies in order until one works:
34
+ 1. AI-generated strategy (default)
35
+ 2. Element scrolling
36
+ 3. Page scrolling
37
+
38
+ Args:
39
+ page: Playwright page object
40
+ take_screenshot: Function to take screenshots
41
+ times: Number of scroll iterations
42
+ description: What to scroll (natural language)
43
+
44
+ Returns:
45
+ Status message with successful strategy
46
+ """
47
+ if not page:
48
+ return "Browser not open"
49
+
50
+ print(f"\n📜 Starting universal scroll for: '{description}'")
51
+
52
+ import time
53
+ timestamp = int(time.time())
54
+ before_file = f"scroll_before_{timestamp}.png"
55
+ after_file = f"scroll_after_{timestamp}.png"
56
+
57
+ # Take before screenshot
58
+ take_screenshot(before_file)
59
+
60
+ strategies = [
61
+ ("AI-generated strategy", lambda: ai_scroll_strategy(page, times, description)),
62
+ ("Element scrolling", lambda: element_scroll_strategy(page, times)),
63
+ ("Page scrolling", lambda: page_scroll_strategy(page, times))
64
+ ]
65
+
66
+ for strategy_name, strategy_func in strategies:
67
+ print(f"\n Trying: {strategy_name}...")
68
+
69
+ try:
70
+ strategy_func()
71
+ time.sleep(1)
72
+
73
+ # Take after screenshot
74
+ take_screenshot(after_file)
75
+
76
+ # Verify scroll worked
77
+ if screenshots_are_different(before_file, after_file):
78
+ print(f" ✅ {strategy_name} WORKED! Content changed.")
79
+ return f"Scroll successful using {strategy_name}. Check {before_file} vs {after_file}"
80
+ else:
81
+ print(f" ⚠️ {strategy_name} didn't change content. Trying next...")
82
+ before_file = after_file
83
+ after_file = f"scroll_after_{timestamp}_next.png"
84
+
85
+ except Exception as e:
86
+ print(f" ❌ {strategy_name} failed: {e}")
87
+ continue
88
+
89
+ return "All scroll strategies failed. No visible content change."
90
+
91
+
92
+ def screenshots_are_different(file1: str, file2: str) -> bool:
93
+ """Compare screenshots to verify content changed.
94
+
95
+ Args:
96
+ file1: First screenshot filename
97
+ file2: Second screenshot filename
98
+
99
+ Returns:
100
+ True if screenshots are different
101
+ """
102
+ try:
103
+ from PIL import Image
104
+ import os
105
+
106
+ path1 = os.path.join("screenshots", file1)
107
+ path2 = os.path.join("screenshots", file2)
108
+
109
+ img1 = Image.open(path1).convert('RGB')
110
+ img2 = Image.open(path2).convert('RGB')
111
+
112
+ # Calculate pixel difference
113
+ diff = sum(
114
+ abs(a - b)
115
+ for pixel1, pixel2 in zip(img1.getdata(), img2.getdata())
116
+ for a, b in zip(pixel1, pixel2)
117
+ )
118
+
119
+ # 1% threshold
120
+ threshold = img1.size[0] * img1.size[1] * 3 * 0.01
121
+
122
+ is_different = diff > threshold
123
+ print(f" Screenshot diff: {diff:.0f} (threshold: {threshold:.0f}) - {'DIFFERENT' if is_different else 'SAME'}")
124
+
125
+ return is_different
126
+
127
+ except Exception as e:
128
+ print(f" Warning: Screenshot comparison failed: {e}")
129
+ return True # Assume different if comparison fails
130
+
131
+
132
+ def ai_scroll_strategy(page, times: int, description: str):
133
+ """AI-generated scroll strategy.
134
+
135
+ Analyzes page structure and generates custom JavaScript.
136
+ """
137
+ # Find scrollable elements
138
+ scrollable_elements = page.evaluate("""
139
+ (() => {
140
+ const scrollable = [];
141
+ document.querySelectorAll('*').forEach(el => {
142
+ const style = window.getComputedStyle(el);
143
+ if ((style.overflow === 'auto' || style.overflowY === 'scroll') &&
144
+ el.scrollHeight > el.clientHeight) {
145
+ scrollable.push({
146
+ tag: el.tagName,
147
+ classes: el.className,
148
+ id: el.id
149
+ });
150
+ }
151
+ });
152
+ return scrollable;
153
+ })()
154
+ """)
155
+
156
+ # Get simplified HTML
157
+ simplified_html = page.evaluate("""
158
+ (() => {
159
+ const clone = document.body.cloneNode(true);
160
+ clone.querySelectorAll('script, style, img, svg').forEach(el => el.remove());
161
+ return clone.innerHTML.substring(0, 5000);
162
+ })()
163
+ """)
164
+
165
+ # Generate scroll strategy using AI
166
+ strategy = llm_do(
167
+ f"""Generate JavaScript to scroll "{description}".
168
+
169
+ Scrollable elements: {scrollable_elements[:3]}
170
+ HTML structure: {simplified_html}
171
+
172
+ Return IIFE that scrolls the correct element:
173
+ (() => {{
174
+ const el = document.querySelector('.selector');
175
+ if (el) el.scrollTop += 1000;
176
+ return {{success: true}};
177
+ }})()
178
+ """,
179
+ output=ScrollStrategy,
180
+ model="gpt-4o",
181
+ temperature=0.1
182
+ )
183
+
184
+ print(f" AI generated: {strategy.explanation}")
185
+
186
+ # Execute scroll
187
+ import time
188
+ for i in range(times):
189
+ page.evaluate(strategy.javascript)
190
+ time.sleep(1.2)
191
+
192
+
193
+ def element_scroll_strategy(page, times: int):
194
+ """Scroll first scrollable element found."""
195
+ import time
196
+ for i in range(times):
197
+ page.evaluate("""
198
+ (() => {
199
+ const el = Array.from(document.querySelectorAll('*')).find(e => {
200
+ const s = window.getComputedStyle(e);
201
+ return (s.overflow === 'auto' || s.overflowY === 'scroll') &&
202
+ e.scrollHeight > e.clientHeight;
203
+ });
204
+ if (el) el.scrollTop += 1000;
205
+ })()
206
+ """)
207
+ time.sleep(1)
208
+
209
+
210
+ def page_scroll_strategy(page, times: int):
211
+ """Scroll the page window."""
212
+ import time
213
+ for i in range(times):
214
+ page.evaluate("window.scrollBy(0, 1000)")
215
+ time.sleep(1)
216
+
217
+
218
+ # Additional scroll helpers that can be called directly
219
+ def scroll_page(page, direction: str = "down", amount: int = 1000) -> str:
220
+ """Scroll the page in a specific direction.
221
+
222
+ Args:
223
+ page: Playwright page object
224
+ direction: "down", "up", "top", or "bottom"
225
+ amount: Pixels to scroll
226
+
227
+ Returns:
228
+ Status message
229
+ """
230
+ if not page:
231
+ return "Browser not open"
232
+
233
+ if direction == "bottom":
234
+ page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
235
+ return "Scrolled to bottom of page"
236
+ elif direction == "top":
237
+ page.evaluate("window.scrollTo(0, 0)")
238
+ return "Scrolled to top of page"
239
+ elif direction == "down":
240
+ page.evaluate(f"window.scrollBy(0, {amount})")
241
+ return f"Scrolled down {amount} pixels"
242
+ elif direction == "up":
243
+ page.evaluate(f"window.scrollBy(0, -{amount})")
244
+ return f"Scrolled up {amount} pixels"
245
+ else:
246
+ return f"Unknown direction: {direction}"
247
+
248
+
249
+ def scroll_element(page, selector: str, amount: int = 1000) -> str:
250
+ """Scroll a specific element by CSS selector.
251
+
252
+ Args:
253
+ page: Playwright page object
254
+ selector: CSS selector for the element
255
+ amount: Pixels to scroll
256
+
257
+ Returns:
258
+ Status message
259
+ """
260
+ if not page:
261
+ return "Browser not open"
262
+
263
+ result = page.evaluate(f"""
264
+ (() => {{
265
+ const element = document.querySelector('{selector}');
266
+ if (!element) return 'Element not found: {selector}';
267
+
268
+ const beforeScroll = element.scrollTop;
269
+ element.scrollTop += {amount};
270
+ const afterScroll = element.scrollTop;
271
+
272
+ return `Scrolled from ${{beforeScroll}}px to ${{afterScroll}}px (delta: ${{afterScroll - beforeScroll}}px)`;
273
+ }})()
274
+ """)
275
+
276
+ return result
@@ -39,6 +39,18 @@ PLUGINS = {
39
39
  "calendar_plugin": "calendar_plugin.py",
40
40
  }
41
41
 
42
+ # Registry of copyable TUI components
43
+ TUI = {
44
+ "chat": "chat.py",
45
+ "fuzzy": "fuzzy.py",
46
+ "divider": "divider.py",
47
+ "footer": "footer.py",
48
+ "status_bar": "status_bar.py",
49
+ "dropdown": "dropdown.py",
50
+ "pick": "pick.py",
51
+ "keys": "keys.py",
52
+ }
53
+
42
54
 
43
55
  def handle_copy(
44
56
  names: List[str],
@@ -56,9 +68,11 @@ def handle_copy(
56
68
  # Get source directories using import system (works for installed packages)
57
69
  import connectonion.useful_tools as tools_module
58
70
  import connectonion.useful_plugins as plugins_module
71
+ import connectonion.tui as tui_module
59
72
 
60
73
  useful_tools_dir = Path(tools_module.__file__).parent
61
74
  useful_plugins_dir = Path(plugins_module.__file__).parent
75
+ tui_dir = Path(tui_module.__file__).parent
62
76
 
63
77
  current_dir = Path.cwd()
64
78
 
@@ -77,6 +91,12 @@ def handle_copy(
77
91
  dest_dir = Path(path) if path else current_dir / "plugins"
78
92
  copy_file(source, dest_dir, force)
79
93
 
94
+ # Check if it's a TUI component
95
+ elif name_lower in TUI:
96
+ source = tui_dir / TUI[name_lower]
97
+ dest_dir = Path(path) if path else current_dir / "tui"
98
+ copy_file(source, dest_dir, force)
99
+
80
100
  else:
81
101
  console.print(f"[red]Unknown: {name}[/red]")
82
102
  console.print("Use [cyan]co copy --list[/cyan] to see available items")
@@ -100,7 +120,7 @@ def copy_file(source: Path, dest_dir: Path, force: bool):
100
120
 
101
121
 
102
122
  def show_available_items():
103
- """Display available tools and plugins."""
123
+ """Display available tools, plugins, and TUI components."""
104
124
  table = Table(title="Available Items to Copy")
105
125
  table.add_column("Name", style="cyan")
106
126
  table.add_column("Type", style="green")
@@ -112,5 +132,8 @@ def show_available_items():
112
132
  for name, file in sorted(PLUGINS.items()):
113
133
  table.add_row(name, "plugin", file)
114
134
 
135
+ for name, file in sorted(TUI.items()):
136
+ table.add_row(name, "tui", file)
137
+
115
138
  console.print(table)
116
139
  console.print("\n[dim]Usage: co copy <name> [--path ./custom/][/dim]")
@@ -217,4 +217,19 @@ def handle_deploy():
217
217
  # Always show URL if we have one
218
218
  if url:
219
219
  console.print(f"Agent URL: {url}")
220
+
221
+ # Always fetch and display container logs
222
+ if deployment_id:
223
+ logs_resp = requests.get(
224
+ f"{API_BASE}/api/v1/deploy/{deployment_id}/logs?tail=20",
225
+ headers={"Authorization": f"Bearer {api_key}"},
226
+ timeout=10,
227
+ )
228
+ if logs_resp.status_code == 200:
229
+ logs = logs_resp.json().get("logs", "")
230
+ if logs:
231
+ console.print()
232
+ console.print("[dim]Container logs:[/dim]")
233
+ console.print(f"[dim]{logs}[/dim]")
234
+
220
235
  console.print()
@@ -0,0 +1,286 @@
1
+ """
2
+ Purpose: CLI command for running and managing evals
3
+ LLM-Note:
4
+ Dependencies: imports from [pathlib, yaml, json, rich, importlib] | imported by [cli/main.py]
5
+ Data flow: handle_eval() → reads .co/evals/*.yaml → imports agent → runs with stored input → compares expected vs output
6
+ Integration: exposes handle_eval(name, run) for CLI
7
+
8
+ Eval YAML format:
9
+ - `turns`: List of inputs to send to agent sequentially (like a conversation).
10
+ Each turn can have one input. Turns run in order within same agent session,
11
+ simulating multi-round conversations. Use single turn for simple evals,
12
+ or multiple turns to test conversation flow.
13
+ """
14
+
15
+ import importlib.util
16
+ import json
17
+ import os
18
+ import sys
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ import yaml
24
+ from pydantic import BaseModel
25
+ from rich.console import Console
26
+ from rich.table import Table
27
+
28
+ console = Console()
29
+
30
+
31
+ class JudgeResult(BaseModel):
32
+ """Result from LLM judge evaluation."""
33
+ passed: bool
34
+ analysis: str
35
+
36
+
37
+ def get_agent_from_file(file_path: str, cwd: str):
38
+ """Import agent instance from file."""
39
+ from connectonion import Agent
40
+
41
+ if not os.path.isabs(file_path):
42
+ file_path = os.path.join(cwd, file_path)
43
+
44
+ if cwd not in sys.path:
45
+ sys.path.insert(0, cwd)
46
+
47
+ spec = importlib.util.spec_from_file_location("agent_module", file_path)
48
+ module = importlib.util.module_from_spec(spec)
49
+ spec.loader.exec_module(module)
50
+
51
+ if hasattr(module, 'agent') and isinstance(module.agent, Agent):
52
+ agent = module.agent
53
+ agent.logger.enable_sessions = False # Prevent duplicate eval files
54
+ return agent
55
+
56
+ raise ValueError(
57
+ f"No 'agent' instance found in {file_path}.\n\n"
58
+ f"Structure your file like this:\n\n"
59
+ f" agent = Agent(...)\n\n"
60
+ f" if __name__ == '__main__':\n"
61
+ f" agent.input('...')\n"
62
+ )
63
+
64
+
65
+ def handle_eval(name: Optional[str] = None, agent_file: Optional[str] = None):
66
+ """Run evals and show results.
67
+
68
+ Args:
69
+ name: Optional specific eval name to run
70
+ agent_file: Optional agent file path (overrides YAML setting)
71
+ """
72
+ evals_dir = Path(".co/evals")
73
+
74
+ if not evals_dir.exists():
75
+ console.print("[yellow]No evals found.[/yellow]")
76
+ console.print("[dim]Create eval files in .co/evals/*.yaml[/dim]")
77
+ return
78
+
79
+ if name:
80
+ eval_files = list(evals_dir.glob(f"{name}.yaml"))
81
+ if not eval_files:
82
+ console.print(f"[red]Eval not found: {name}[/red]")
83
+ return
84
+ else:
85
+ eval_files = list(evals_dir.glob("*.yaml"))
86
+
87
+ if not eval_files:
88
+ console.print("[yellow]No eval files found in .co/evals/[/yellow]")
89
+ return
90
+
91
+ _run_evals(eval_files, agent_file)
92
+
93
+ # Reload and show status
94
+ if name:
95
+ eval_files = list(evals_dir.glob(f"{name}.yaml"))
96
+ else:
97
+ eval_files = list(evals_dir.glob("*.yaml"))
98
+
99
+ _show_eval_status(eval_files)
100
+
101
+
102
+ def _run_evals(eval_files: list, agent_override: Optional[str] = None):
103
+ """Run agents for each eval and capture output."""
104
+ cwd = os.getcwd()
105
+ agents_cache = {} # Cache agents by file path
106
+
107
+ for eval_file in eval_files:
108
+ with open(eval_file) as f:
109
+ data = yaml.safe_load(f)
110
+
111
+ # Get agent file: CLI override > YAML > error
112
+ agent_file = agent_override or data.get('agent')
113
+ if not agent_file:
114
+ console.print(f"[red]No agent specified for {eval_file.stem}[/red]")
115
+ console.print(f"[dim]Add 'agent: agent.py' to the YAML or use --agent flag[/dim]")
116
+ continue
117
+
118
+ # Load agent (cached)
119
+ if agent_file not in agents_cache:
120
+ console.print(f"[cyan]Loading:[/cyan] {agent_file}")
121
+ agents_cache[agent_file] = get_agent_from_file(agent_file, cwd)
122
+ agent = agents_cache[agent_file]
123
+
124
+ turns = data.get('turns', [])
125
+ if not turns:
126
+ console.print(f"[yellow]No turns found in {eval_file.stem}[/yellow]")
127
+ continue
128
+
129
+ console.print(f"[cyan]Running:[/cyan] {eval_file.stem}")
130
+
131
+ # Reset agent session for fresh state each eval
132
+ agent.reset_conversation()
133
+
134
+ file_modified = False
135
+ for turn in turns:
136
+ input_text = turn.get('input', '')
137
+ if not input_text:
138
+ continue
139
+
140
+ # Show input (truncated)
141
+ display_input = input_text[:60] + "..." if len(input_text) > 60 else input_text
142
+ console.print(f" [dim]input:[/dim] {display_input}")
143
+
144
+ # Run agent and capture result
145
+ result = agent.input(input_text)
146
+
147
+ # Extract tools_called and metrics from agent session
148
+ trace = agent.current_session.get('trace', [])
149
+ tool_calls = [t for t in trace if t.get('type') == 'tool_execution']
150
+ llm_calls = [t for t in trace if t.get('type') == 'llm_call']
151
+ tools_called = [agent.logger._format_tool_call(t) for t in tool_calls]
152
+
153
+ total_tokens = sum(
154
+ (t.get('usage').input_tokens + t.get('usage').output_tokens)
155
+ for t in llm_calls if t.get('usage')
156
+ )
157
+ total_cost = sum(
158
+ t.get('usage').cost for t in llm_calls if t.get('usage')
159
+ )
160
+
161
+ # Build history as JSON array string (compact, easy to scan)
162
+ history_str = turn.get('history', '[]')
163
+ history = json.loads(history_str) if isinstance(history_str, str) else []
164
+ if turn.get('output'):
165
+ history.insert(0, {
166
+ "ts": turn.get('ts', ''),
167
+ "pass": turn.get('pass'),
168
+ "tokens": turn.get('tokens', 0),
169
+ "cost": turn.get('cost', 0)
170
+ })
171
+
172
+ # Store result in turn
173
+ turn['output'] = result
174
+ turn['tools_called'] = tools_called
175
+ turn['tokens'] = total_tokens
176
+ turn['cost'] = round(total_cost, 4)
177
+ turn['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
178
+ turn['run'] = data.get('runs', 0) + 1
179
+ # Format history as multi-line JSON for readability
180
+ if history:
181
+ lines = [json.dumps(h) for h in history]
182
+ turn['history'] = "[\n" + ",\n".join(lines) + "]"
183
+ else:
184
+ turn['history'] = "[]"
185
+ file_modified = True
186
+
187
+ # Judge immediately if expected exists
188
+ expected = turn.get('expected', '')
189
+ if expected:
190
+ judge = _judge_with_llm(expected, result, input_text)
191
+ turn['pass'] = judge.passed
192
+ turn['analysis'] = judge.analysis
193
+ status = "[green]✓[/green]" if judge.passed else "[red]✗[/red]"
194
+ console.print(f" {status} {judge.analysis[:60]}...")
195
+ else:
196
+ # Show output (truncated)
197
+ display_output = result[:60] + "..." if len(result) > 60 else result
198
+ console.print(f" [green]output:[/green] {display_output}")
199
+
200
+ if file_modified:
201
+ # Update runs count and save
202
+ data['runs'] = data.get('runs', 0) + 1
203
+ data['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
204
+ with open(eval_file, 'w') as f:
205
+ yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
206
+
207
+ console.print(f"[green]✓[/green] {eval_file.stem} completed")
208
+ console.print()
209
+
210
+ console.print()
211
+
212
+
213
+ def _judge_with_llm(expected: str, output: str, input_text: str) -> JudgeResult:
214
+ """Use LLM to judge if output matches expected."""
215
+ from connectonion import llm_do
216
+
217
+ prompt = f"""You are an eval judge. Determine if the agent's output satisfies the expected criteria.
218
+
219
+ Input: {input_text}
220
+ Expected: {expected}
221
+ Output: {output}
222
+
223
+ Does the output satisfy the expected criteria? Consider:
224
+ - Semantic similarity (not exact match)
225
+ - Key information presence
226
+ - Intent fulfillment
227
+ """
228
+ return llm_do(prompt, output=JudgeResult)
229
+
230
+
231
+ def _show_eval_status(eval_files: list):
232
+ """Show pass/fail status for all evals (uses stored results, no re-judging)."""
233
+ table = Table(title="Eval Results", show_header=True)
234
+ table.add_column("Eval", style="cyan")
235
+ table.add_column("Status", justify="center")
236
+ table.add_column("Expected", max_width=30)
237
+ table.add_column("Output", max_width=30)
238
+
239
+ passed = 0
240
+ failed = 0
241
+ no_expected = 0
242
+
243
+ for eval_file in sorted(eval_files):
244
+ with open(eval_file) as f:
245
+ data = yaml.safe_load(f)
246
+
247
+ for turn in data.get('turns', []):
248
+ expected = turn.get('expected', '')
249
+ output = turn.get('output', '')
250
+ pass_result = turn.get('pass')
251
+
252
+ if not expected:
253
+ status = "[dim]—[/dim]"
254
+ no_expected += 1
255
+ elif pass_result is True:
256
+ status = "[green]✓ pass[/green]"
257
+ passed += 1
258
+ elif pass_result is False:
259
+ status = "[red]✗ fail[/red]"
260
+ failed += 1
261
+ else:
262
+ status = "[dim]pending[/dim]"
263
+ no_expected += 1
264
+
265
+ # Truncate for display
266
+ expected_display = (expected[:27] + "...") if len(expected) > 30 else expected
267
+ output_display = (output[:27] + "...") if len(output) > 30 else output
268
+
269
+ table.add_row(
270
+ eval_file.stem,
271
+ status,
272
+ expected_display or "[dim]not set[/dim]",
273
+ output_display
274
+ )
275
+
276
+ console.print(table)
277
+ console.print()
278
+
279
+ # Summary
280
+ if failed > 0:
281
+ console.print(f"[bold red]✗ {failed} failed[/bold red], ", end="")
282
+ if passed > 0:
283
+ console.print(f"[bold green]✓ {passed} passed[/bold green], ", end="")
284
+ if no_expected > 0:
285
+ console.print(f"[dim]{no_expected} no expected[/dim]", end="")
286
+ console.print()
@@ -622,7 +622,7 @@ def generate_custom_template_with_name(description: str, api_key: str, model: st
622
622
  # Try to use AI to generate name and code
623
623
  if model or api_key:
624
624
  try:
625
- from ...llm import create_llm
625
+ from ...core.llm import create_llm
626
626
 
627
627
  # Use the model specified or default to co/gemini-2.5-pro
628
628
  llm_model = model if model else "co/gemini-2.5-pro"
connectonion/cli/main.py CHANGED
@@ -55,6 +55,7 @@ def _show_help():
55
55
  console.print(" [green]create[/green] <name> Create new project")
56
56
  console.print(" [green]init[/green] Initialize in current directory")
57
57
  console.print(" [green]copy[/green] <name> Copy tool/plugin source to project")
58
+ console.print(" [green]eval[/green] Run evals and show status")
58
59
  console.print(" [green]deploy[/green] Deploy to ConnectOnion Cloud")
59
60
  console.print(" [green]auth[/green] Authenticate for managed keys")
60
61
  console.print(" [green]status[/green] Check account balance")
@@ -152,6 +153,16 @@ def copy(
152
153
  handle_copy(names=names or [], list_all=list_all, path=path, force=force)
153
154
 
154
155
 
156
+ @app.command()
157
+ def eval(
158
+ name: Optional[str] = typer.Argument(None, help="Specific eval name"),
159
+ agent: Optional[str] = typer.Option(None, "--agent", "-a", help="Agent file (overrides YAML)"),
160
+ ):
161
+ """Run evals and show results."""
162
+ from .commands.eval_commands import handle_eval
163
+ handle_eval(name=name, agent_file=agent)
164
+
165
+
155
166
  def cli():
156
167
  """Entry point."""
157
168
  app()