connectonion 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ """
2
+ Purpose: Universal scrolling strategies with AI-powered selection and screenshot-based verification
3
+ LLM-Note:
4
+ Dependencies: imports from [typing, pydantic, connectonion.llm_do, PIL.Image, os, time] | imported by [web_automation.py] | tested by [tests/test_final_scroll.py]
5
+ Data flow: receives page: Page, take_screenshot: Callable, times: int, description: str from web_automation.scroll() → scroll_with_verification() orchestrates 3 strategies → ai_scroll_strategy() calls llm_do(HTML+scrollable_elements→ScrollStrategy, gpt-4o) → element_scroll_strategy()/page_scroll_strategy() fallbacks → page.evaluate(javascript) executes scroll → screenshots_are_different() compares PIL Images with 1% pixel threshold → returns success/failure string
6
+ State/Effects: calls page.evaluate() multiple times (mutates DOM scroll positions) | take_screenshot() writes PNG files to screenshots/*.png | time.sleep(1-1.2) between scroll iterations | AI calls to gpt-4o with temperature=0.1 for strategy generation
7
+ Integration: exposes scroll_with_verification() as main entry point from WebAutomation.scroll() | exposes scroll_page(), scroll_element() as standalone utilities | ScrollStrategy Pydantic model defines AI output schema (javascript: str, explanation: str) | screenshots_are_different() uses PIL for pixel-level comparison
8
+ Performance: ai_scroll_strategy() calls llm_do() once per scroll session (100-500ms) | analyzes first 5000 chars of HTML | finds up to 3 scrollable elements | executes JS times iterations with 1.2s delays | element/page strategies are synchronous JS execution (fast) | PIL screenshot comparison ~50-100ms
9
+ Errors: returns descriptive strings (not exceptions) - "All scroll strategies failed", "Browser not open" | screenshot comparison failure returns True (assumes different) to continue | page.evaluate() exceptions caught and next strategy tried | prints debug output to stdout
10
+ ⚠️ Strategy order: AI-first may be slower but more accurate for complex sites (Gmail) - reorder if speed critical
11
+ ⚠️ Screenshot verification: 1% threshold may need tuning for high-resolution displays or subtle animations
12
+ """
13
+
14
+ from typing import Callable, List, Tuple
15
+ from pydantic import BaseModel
16
+ from connectonion import llm_do
17
+
18
+
19
+ class ScrollStrategy(BaseModel):
20
+ """AI-generated scroll strategy."""
21
+ javascript: str
22
+ explanation: str
23
+
24
+
25
+ def scroll_with_verification(
26
+ page,
27
+ take_screenshot: Callable,
28
+ times: int = 5,
29
+ description: str = "the main content area"
30
+ ) -> str:
31
+ """Universal scroll with automatic strategy selection and fallback.
32
+
33
+ Tries multiple strategies in order until one works:
34
+ 1. AI-generated strategy (default)
35
+ 2. Element scrolling
36
+ 3. Page scrolling
37
+
38
+ Args:
39
+ page: Playwright page object
40
+ take_screenshot: Function to take screenshots
41
+ times: Number of scroll iterations
42
+ description: What to scroll (natural language)
43
+
44
+ Returns:
45
+ Status message with successful strategy
46
+ """
47
+ if not page:
48
+ return "Browser not open"
49
+
50
+ print(f"\n📜 Starting universal scroll for: '{description}'")
51
+
52
+ import time
53
+ timestamp = int(time.time())
54
+ before_file = f"scroll_before_{timestamp}.png"
55
+ after_file = f"scroll_after_{timestamp}.png"
56
+
57
+ # Take before screenshot
58
+ take_screenshot(before_file)
59
+
60
+ strategies = [
61
+ ("AI-generated strategy", lambda: ai_scroll_strategy(page, times, description)),
62
+ ("Element scrolling", lambda: element_scroll_strategy(page, times)),
63
+ ("Page scrolling", lambda: page_scroll_strategy(page, times))
64
+ ]
65
+
66
+ for strategy_name, strategy_func in strategies:
67
+ print(f"\n Trying: {strategy_name}...")
68
+
69
+ try:
70
+ strategy_func()
71
+ time.sleep(1)
72
+
73
+ # Take after screenshot
74
+ take_screenshot(after_file)
75
+
76
+ # Verify scroll worked
77
+ if screenshots_are_different(before_file, after_file):
78
+ print(f" ✅ {strategy_name} WORKED! Content changed.")
79
+ return f"Scroll successful using {strategy_name}. Check {before_file} vs {after_file}"
80
+ else:
81
+ print(f" ⚠️ {strategy_name} didn't change content. Trying next...")
82
+ before_file = after_file
83
+ after_file = f"scroll_after_{timestamp}_next.png"
84
+
85
+ except Exception as e:
86
+ print(f" ❌ {strategy_name} failed: {e}")
87
+ continue
88
+
89
+ return "All scroll strategies failed. No visible content change."
90
+
91
+
92
+ def screenshots_are_different(file1: str, file2: str) -> bool:
93
+ """Compare screenshots to verify content changed.
94
+
95
+ Args:
96
+ file1: First screenshot filename
97
+ file2: Second screenshot filename
98
+
99
+ Returns:
100
+ True if screenshots are different
101
+ """
102
+ try:
103
+ from PIL import Image
104
+ import os
105
+
106
+ path1 = os.path.join("screenshots", file1)
107
+ path2 = os.path.join("screenshots", file2)
108
+
109
+ img1 = Image.open(path1).convert('RGB')
110
+ img2 = Image.open(path2).convert('RGB')
111
+
112
+ # Calculate pixel difference
113
+ diff = sum(
114
+ abs(a - b)
115
+ for pixel1, pixel2 in zip(img1.getdata(), img2.getdata())
116
+ for a, b in zip(pixel1, pixel2)
117
+ )
118
+
119
+ # 1% threshold
120
+ threshold = img1.size[0] * img1.size[1] * 3 * 0.01
121
+
122
+ is_different = diff > threshold
123
+ print(f" Screenshot diff: {diff:.0f} (threshold: {threshold:.0f}) - {'DIFFERENT' if is_different else 'SAME'}")
124
+
125
+ return is_different
126
+
127
+ except Exception as e:
128
+ print(f" Warning: Screenshot comparison failed: {e}")
129
+ return True # Assume different if comparison fails
130
+
131
+
132
+ def ai_scroll_strategy(page, times: int, description: str):
133
+ """AI-generated scroll strategy.
134
+
135
+ Analyzes page structure and generates custom JavaScript.
136
+ """
137
+ # Find scrollable elements
138
+ scrollable_elements = page.evaluate("""
139
+ (() => {
140
+ const scrollable = [];
141
+ document.querySelectorAll('*').forEach(el => {
142
+ const style = window.getComputedStyle(el);
143
+ if ((style.overflow === 'auto' || style.overflowY === 'scroll') &&
144
+ el.scrollHeight > el.clientHeight) {
145
+ scrollable.push({
146
+ tag: el.tagName,
147
+ classes: el.className,
148
+ id: el.id
149
+ });
150
+ }
151
+ });
152
+ return scrollable;
153
+ })()
154
+ """)
155
+
156
+ # Get simplified HTML
157
+ simplified_html = page.evaluate("""
158
+ (() => {
159
+ const clone = document.body.cloneNode(true);
160
+ clone.querySelectorAll('script, style, img, svg').forEach(el => el.remove());
161
+ return clone.innerHTML.substring(0, 5000);
162
+ })()
163
+ """)
164
+
165
+ # Generate scroll strategy using AI
166
+ strategy = llm_do(
167
+ f"""Generate JavaScript to scroll "{description}".
168
+
169
+ Scrollable elements: {scrollable_elements[:3]}
170
+ HTML structure: {simplified_html}
171
+
172
+ Return IIFE that scrolls the correct element:
173
+ (() => {{
174
+ const el = document.querySelector('.selector');
175
+ if (el) el.scrollTop += 1000;
176
+ return {{success: true}};
177
+ }})()
178
+ """,
179
+ output=ScrollStrategy,
180
+ model="gpt-4o",
181
+ temperature=0.1
182
+ )
183
+
184
+ print(f" AI generated: {strategy.explanation}")
185
+
186
+ # Execute scroll
187
+ import time
188
+ for i in range(times):
189
+ page.evaluate(strategy.javascript)
190
+ time.sleep(1.2)
191
+
192
+
193
+ def element_scroll_strategy(page, times: int):
194
+ """Scroll first scrollable element found."""
195
+ import time
196
+ for i in range(times):
197
+ page.evaluate("""
198
+ (() => {
199
+ const el = Array.from(document.querySelectorAll('*')).find(e => {
200
+ const s = window.getComputedStyle(e);
201
+ return (s.overflow === 'auto' || s.overflowY === 'scroll') &&
202
+ e.scrollHeight > e.clientHeight;
203
+ });
204
+ if (el) el.scrollTop += 1000;
205
+ })()
206
+ """)
207
+ time.sleep(1)
208
+
209
+
210
+ def page_scroll_strategy(page, times: int):
211
+ """Scroll the page window."""
212
+ import time
213
+ for i in range(times):
214
+ page.evaluate("window.scrollBy(0, 1000)")
215
+ time.sleep(1)
216
+
217
+
218
+ # Additional scroll helpers that can be called directly
219
+ def scroll_page(page, direction: str = "down", amount: int = 1000) -> str:
220
+ """Scroll the page in a specific direction.
221
+
222
+ Args:
223
+ page: Playwright page object
224
+ direction: "down", "up", "top", or "bottom"
225
+ amount: Pixels to scroll
226
+
227
+ Returns:
228
+ Status message
229
+ """
230
+ if not page:
231
+ return "Browser not open"
232
+
233
+ if direction == "bottom":
234
+ page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
235
+ return "Scrolled to bottom of page"
236
+ elif direction == "top":
237
+ page.evaluate("window.scrollTo(0, 0)")
238
+ return "Scrolled to top of page"
239
+ elif direction == "down":
240
+ page.evaluate(f"window.scrollBy(0, {amount})")
241
+ return f"Scrolled down {amount} pixels"
242
+ elif direction == "up":
243
+ page.evaluate(f"window.scrollBy(0, -{amount})")
244
+ return f"Scrolled up {amount} pixels"
245
+ else:
246
+ return f"Unknown direction: {direction}"
247
+
248
+
249
+ def scroll_element(page, selector: str, amount: int = 1000) -> str:
250
+ """Scroll a specific element by CSS selector.
251
+
252
+ Args:
253
+ page: Playwright page object
254
+ selector: CSS selector for the element
255
+ amount: Pixels to scroll
256
+
257
+ Returns:
258
+ Status message
259
+ """
260
+ if not page:
261
+ return "Browser not open"
262
+
263
+ result = page.evaluate(f"""
264
+ (() => {{
265
+ const element = document.querySelector('{selector}');
266
+ if (!element) return 'Element not found: {selector}';
267
+
268
+ const beforeScroll = element.scrollTop;
269
+ element.scrollTop += {amount};
270
+ const afterScroll = element.scrollTop;
271
+
272
+ return `Scrolled from ${{beforeScroll}}px to ${{afterScroll}}px (delta: ${{afterScroll - beforeScroll}}px)`;
273
+ }})()
274
+ """)
275
+
276
+ return result
@@ -0,0 +1,286 @@
1
+ """
2
+ Purpose: CLI command for running and managing evals
3
+ LLM-Note:
4
+ Dependencies: imports from [pathlib, yaml, json, rich, importlib] | imported by [cli/main.py]
5
+ Data flow: handle_eval() → reads .co/evals/*.yaml → imports agent → runs with stored input → compares expected vs output
6
+ Integration: exposes handle_eval(name, run) for CLI
7
+
8
+ Eval YAML format:
9
+ - `turns`: List of inputs to send to agent sequentially (like a conversation).
10
+ Each turn can have one input. Turns run in order within same agent session,
11
+ simulating multi-round conversations. Use single turn for simple evals,
12
+ or multiple turns to test conversation flow.
13
+ """
14
+
15
+ import importlib.util
16
+ import json
17
+ import os
18
+ import sys
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ import yaml
24
+ from pydantic import BaseModel
25
+ from rich.console import Console
26
+ from rich.table import Table
27
+
28
+ console = Console()
29
+
30
+
31
+ class JudgeResult(BaseModel):
32
+ """Result from LLM judge evaluation."""
33
+ passed: bool
34
+ analysis: str
35
+
36
+
37
+ def get_agent_from_file(file_path: str, cwd: str):
38
+ """Import agent instance from file."""
39
+ from connectonion import Agent
40
+
41
+ if not os.path.isabs(file_path):
42
+ file_path = os.path.join(cwd, file_path)
43
+
44
+ if cwd not in sys.path:
45
+ sys.path.insert(0, cwd)
46
+
47
+ spec = importlib.util.spec_from_file_location("agent_module", file_path)
48
+ module = importlib.util.module_from_spec(spec)
49
+ spec.loader.exec_module(module)
50
+
51
+ if hasattr(module, 'agent') and isinstance(module.agent, Agent):
52
+ agent = module.agent
53
+ agent.logger.enable_sessions = False # Prevent duplicate eval files
54
+ return agent
55
+
56
+ raise ValueError(
57
+ f"No 'agent' instance found in {file_path}.\n\n"
58
+ f"Structure your file like this:\n\n"
59
+ f" agent = Agent(...)\n\n"
60
+ f" if __name__ == '__main__':\n"
61
+ f" agent.input('...')\n"
62
+ )
63
+
64
+
65
+ def handle_eval(name: Optional[str] = None, agent_file: Optional[str] = None):
66
+ """Run evals and show results.
67
+
68
+ Args:
69
+ name: Optional specific eval name to run
70
+ agent_file: Optional agent file path (overrides YAML setting)
71
+ """
72
+ evals_dir = Path(".co/evals")
73
+
74
+ if not evals_dir.exists():
75
+ console.print("[yellow]No evals found.[/yellow]")
76
+ console.print("[dim]Create eval files in .co/evals/*.yaml[/dim]")
77
+ return
78
+
79
+ if name:
80
+ eval_files = list(evals_dir.glob(f"{name}.yaml"))
81
+ if not eval_files:
82
+ console.print(f"[red]Eval not found: {name}[/red]")
83
+ return
84
+ else:
85
+ eval_files = list(evals_dir.glob("*.yaml"))
86
+
87
+ if not eval_files:
88
+ console.print("[yellow]No eval files found in .co/evals/[/yellow]")
89
+ return
90
+
91
+ _run_evals(eval_files, agent_file)
92
+
93
+ # Reload and show status
94
+ if name:
95
+ eval_files = list(evals_dir.glob(f"{name}.yaml"))
96
+ else:
97
+ eval_files = list(evals_dir.glob("*.yaml"))
98
+
99
+ _show_eval_status(eval_files)
100
+
101
+
102
+ def _run_evals(eval_files: list, agent_override: Optional[str] = None):
103
+ """Run agents for each eval and capture output."""
104
+ cwd = os.getcwd()
105
+ agents_cache = {} # Cache agents by file path
106
+
107
+ for eval_file in eval_files:
108
+ with open(eval_file) as f:
109
+ data = yaml.safe_load(f)
110
+
111
+ # Get agent file: CLI override > YAML > error
112
+ agent_file = agent_override or data.get('agent')
113
+ if not agent_file:
114
+ console.print(f"[red]No agent specified for {eval_file.stem}[/red]")
115
+ console.print(f"[dim]Add 'agent: agent.py' to the YAML or use --agent flag[/dim]")
116
+ continue
117
+
118
+ # Load agent (cached)
119
+ if agent_file not in agents_cache:
120
+ console.print(f"[cyan]Loading:[/cyan] {agent_file}")
121
+ agents_cache[agent_file] = get_agent_from_file(agent_file, cwd)
122
+ agent = agents_cache[agent_file]
123
+
124
+ turns = data.get('turns', [])
125
+ if not turns:
126
+ console.print(f"[yellow]No turns found in {eval_file.stem}[/yellow]")
127
+ continue
128
+
129
+ console.print(f"[cyan]Running:[/cyan] {eval_file.stem}")
130
+
131
+ # Reset agent session for fresh state each eval
132
+ agent.reset_conversation()
133
+
134
+ file_modified = False
135
+ for turn in turns:
136
+ input_text = turn.get('input', '')
137
+ if not input_text:
138
+ continue
139
+
140
+ # Show input (truncated)
141
+ display_input = input_text[:60] + "..." if len(input_text) > 60 else input_text
142
+ console.print(f" [dim]input:[/dim] {display_input}")
143
+
144
+ # Run agent and capture result
145
+ result = agent.input(input_text)
146
+
147
+ # Extract tools_called and metrics from agent session
148
+ trace = agent.current_session.get('trace', [])
149
+ tool_calls = [t for t in trace if t.get('type') == 'tool_execution']
150
+ llm_calls = [t for t in trace if t.get('type') == 'llm_call']
151
+ tools_called = [agent.logger._format_tool_call(t) for t in tool_calls]
152
+
153
+ total_tokens = sum(
154
+ (t.get('usage').input_tokens + t.get('usage').output_tokens)
155
+ for t in llm_calls if t.get('usage')
156
+ )
157
+ total_cost = sum(
158
+ t.get('usage').cost for t in llm_calls if t.get('usage')
159
+ )
160
+
161
+ # Build history as JSON array string (compact, easy to scan)
162
+ history_str = turn.get('history', '[]')
163
+ history = json.loads(history_str) if isinstance(history_str, str) else []
164
+ if turn.get('output'):
165
+ history.insert(0, {
166
+ "ts": turn.get('ts', ''),
167
+ "pass": turn.get('pass'),
168
+ "tokens": turn.get('tokens', 0),
169
+ "cost": turn.get('cost', 0)
170
+ })
171
+
172
+ # Store result in turn
173
+ turn['output'] = result
174
+ turn['tools_called'] = tools_called
175
+ turn['tokens'] = total_tokens
176
+ turn['cost'] = round(total_cost, 4)
177
+ turn['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
178
+ turn['run'] = data.get('runs', 0) + 1
179
+ # Format history as multi-line JSON for readability
180
+ if history:
181
+ lines = [json.dumps(h) for h in history]
182
+ turn['history'] = "[\n" + ",\n".join(lines) + "]"
183
+ else:
184
+ turn['history'] = "[]"
185
+ file_modified = True
186
+
187
+ # Judge immediately if expected exists
188
+ expected = turn.get('expected', '')
189
+ if expected:
190
+ judge = _judge_with_llm(expected, result, input_text)
191
+ turn['pass'] = judge.passed
192
+ turn['analysis'] = judge.analysis
193
+ status = "[green]✓[/green]" if judge.passed else "[red]✗[/red]"
194
+ console.print(f" {status} {judge.analysis[:60]}...")
195
+ else:
196
+ # Show output (truncated)
197
+ display_output = result[:60] + "..." if len(result) > 60 else result
198
+ console.print(f" [green]output:[/green] {display_output}")
199
+
200
+ if file_modified:
201
+ # Update runs count and save
202
+ data['runs'] = data.get('runs', 0) + 1
203
+ data['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
204
+ with open(eval_file, 'w') as f:
205
+ yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
206
+
207
+ console.print(f"[green]✓[/green] {eval_file.stem} completed")
208
+ console.print()
209
+
210
+ console.print()
211
+
212
+
213
+ def _judge_with_llm(expected: str, output: str, input_text: str) -> JudgeResult:
214
+ """Use LLM to judge if output matches expected."""
215
+ from connectonion import llm_do
216
+
217
+ prompt = f"""You are an eval judge. Determine if the agent's output satisfies the expected criteria.
218
+
219
+ Input: {input_text}
220
+ Expected: {expected}
221
+ Output: {output}
222
+
223
+ Does the output satisfy the expected criteria? Consider:
224
+ - Semantic similarity (not exact match)
225
+ - Key information presence
226
+ - Intent fulfillment
227
+ """
228
+ return llm_do(prompt, output=JudgeResult)
229
+
230
+
231
+ def _show_eval_status(eval_files: list):
232
+ """Show pass/fail status for all evals (uses stored results, no re-judging)."""
233
+ table = Table(title="Eval Results", show_header=True)
234
+ table.add_column("Eval", style="cyan")
235
+ table.add_column("Status", justify="center")
236
+ table.add_column("Expected", max_width=30)
237
+ table.add_column("Output", max_width=30)
238
+
239
+ passed = 0
240
+ failed = 0
241
+ no_expected = 0
242
+
243
+ for eval_file in sorted(eval_files):
244
+ with open(eval_file) as f:
245
+ data = yaml.safe_load(f)
246
+
247
+ for turn in data.get('turns', []):
248
+ expected = turn.get('expected', '')
249
+ output = turn.get('output', '')
250
+ pass_result = turn.get('pass')
251
+
252
+ if not expected:
253
+ status = "[dim]—[/dim]"
254
+ no_expected += 1
255
+ elif pass_result is True:
256
+ status = "[green]✓ pass[/green]"
257
+ passed += 1
258
+ elif pass_result is False:
259
+ status = "[red]✗ fail[/red]"
260
+ failed += 1
261
+ else:
262
+ status = "[dim]pending[/dim]"
263
+ no_expected += 1
264
+
265
+ # Truncate for display
266
+ expected_display = (expected[:27] + "...") if len(expected) > 30 else expected
267
+ output_display = (output[:27] + "...") if len(output) > 30 else output
268
+
269
+ table.add_row(
270
+ eval_file.stem,
271
+ status,
272
+ expected_display or "[dim]not set[/dim]",
273
+ output_display
274
+ )
275
+
276
+ console.print(table)
277
+ console.print()
278
+
279
+ # Summary
280
+ if failed > 0:
281
+ console.print(f"[bold red]✗ {failed} failed[/bold red], ", end="")
282
+ if passed > 0:
283
+ console.print(f"[bold green]✓ {passed} passed[/bold green], ", end="")
284
+ if no_expected > 0:
285
+ console.print(f"[dim]{no_expected} no expected[/dim]", end="")
286
+ console.print()
connectonion/cli/main.py CHANGED
@@ -55,6 +55,7 @@ def _show_help():
55
55
  console.print(" [green]create[/green] <name> Create new project")
56
56
  console.print(" [green]init[/green] Initialize in current directory")
57
57
  console.print(" [green]copy[/green] <name> Copy tool/plugin source to project")
58
+ console.print(" [green]eval[/green] Run evals and show status")
58
59
  console.print(" [green]deploy[/green] Deploy to ConnectOnion Cloud")
59
60
  console.print(" [green]auth[/green] Authenticate for managed keys")
60
61
  console.print(" [green]status[/green] Check account balance")
@@ -152,6 +153,16 @@ def copy(
152
153
  handle_copy(names=names or [], list_all=list_all, path=path, force=force)
153
154
 
154
155
 
156
+ @app.command()
157
+ def eval(
158
+ name: Optional[str] = typer.Argument(None, help="Specific eval name"),
159
+ agent: Optional[str] = typer.Option(None, "--agent", "-a", help="Agent file (overrides YAML)"),
160
+ ):
161
+ """Run evals and show results."""
162
+ from .commands.eval_commands import handle_eval
163
+ handle_eval(name=name, agent_file=agent)
164
+
165
+
155
166
  def cli():
156
167
  """Entry point."""
157
168
  app()
connectonion/console.py CHANGED
@@ -110,7 +110,7 @@ class Console:
110
110
  ● ─────────────────────
111
111
  connectonion v0.5.1
112
112
  o4-mini · 3 tools
113
- .co/logs/ · .co/sessions/
113
+ .co/logs/ · .co/evals/
114
114
 
115
115
  Args:
116
116
  agent_name: Name of the agent
@@ -156,7 +156,7 @@ class Console:
156
156
 
157
157
  # Add log paths if logging is enabled
158
158
  if log_dir:
159
- lines.append(f" [{DIM_COLOR}]{log_dir}logs/ · {log_dir}sessions/[/{DIM_COLOR}]")
159
+ lines.append(f" [{DIM_COLOR}]{log_dir}logs/ · {log_dir}evals/[/{DIM_COLOR}]")
160
160
 
161
161
  # Add Aaron's message for free tier users
162
162
  if aaron_message:
@@ -182,7 +182,7 @@ class Console:
182
182
  if meta_line:
183
183
  plain_lines.append(f" {meta_line}")
184
184
  if log_dir:
185
- plain_lines.append(f" {log_dir}logs/ · {log_dir}sessions/")
185
+ plain_lines.append(f" {log_dir}logs/ · {log_dir}evals/")
186
186
  if aaron_message:
187
187
  plain_lines.append(f" {aaron_message}")
188
188
  plain_lines.append(f" {separator}")
@@ -484,12 +484,12 @@ class Console:
484
484
 
485
485
  [co] ═══════════════════════════════════════
486
486
  [co] ✓ done · 2.3k tokens · $0.005 · 3.4s
487
- [co] saved → .co/sessions/research-assistant.yaml
487
+ [co] saved → .co/evals/research-assistant.yaml
488
488
 
489
489
  Args:
490
490
  duration_s: Total duration in seconds
491
491
  session: Agent's current_session dict (contains trace with usage)
492
- session_path: Optional path to session file
492
+ session_path: Optional path to eval file
493
493
  """
494
494
  # Calculate totals from trace
495
495
  trace = session.get('trace', [])
@@ -2,9 +2,9 @@
2
2
  Purpose: Orchestrate AI agent execution with LLM calls, tool execution, and automatic logging
3
3
  LLM-Note:
4
4
  Dependencies: imports from [llm.py, tool_factory.py, prompts.py, decorators.py, logger.py, tool_executor.py, tool_registry.py] | imported by [__init__.py, debug_agent/__init__.py] | tested by [tests/test_agent.py, tests/test_agent_prompts.py, tests/test_agent_workflows.py]
5
- Data flow: receives user prompt: str from Agent.input() → creates/extends current_session with messages → calls llm.complete() with tool schemas → receives LLMResponse with tool_calls → executes tools via tool_executor.execute_and_record_tools() → appends tool results to messages → repeats loop until no tool_calls or max_iterations → logger logs to .co/logs/{name}.log and .co/sessions/{name}_{timestamp}.yaml → returns final response: str
6
- State/Effects: modifies self.current_session['messages', 'trace', 'turn', 'iteration'] | writes to .co/logs/{name}.log and .co/sessions/ via logger.py
7
- Integration: exposes Agent(name, tools, system_prompt, model, log, quiet), .input(prompt), .execute_tool(name, args), .add_tool(func), .remove_tool(name), .list_tools(), .reset_conversation() | tools stored in ToolRegistry with attribute access (agent.tools.tool_name) and instance storage (agent.tools.gmail) | tool execution delegates to tool_executor module | log defaults to .co/logs/ (None), can be True (current dir), False (disabled), or custom path | quiet=True suppresses console but keeps session logging | trust enforcement moved to host() for network access control
5
+ Data flow: receives user prompt: str from Agent.input() → creates/extends current_session with messages → calls llm.complete() with tool schemas → receives LLMResponse with tool_calls → executes tools via tool_executor.execute_and_record_tools() → appends tool results to messages → repeats loop until no tool_calls or max_iterations → logger logs to .co/logs/{name}.log and .co/evals/{name}.yaml → returns final response: str
6
+ State/Effects: modifies self.current_session['messages', 'trace', 'turn', 'iteration'] | writes to .co/logs/{name}.log and .co/evals/ via logger.py
7
+ Integration: exposes Agent(name, tools, system_prompt, model, log, quiet), .input(prompt), .execute_tool(name, args), .add_tool(func), .remove_tool(name), .list_tools(), .reset_conversation() | tools stored in ToolRegistry with attribute access (agent.tools.tool_name) and instance storage (agent.tools.gmail) | tool execution delegates to tool_executor module | log defaults to .co/logs/ (None), can be True (current dir), False (disabled), or custom path | quiet=True suppresses console but keeps eval logging | trust enforcement moved to host() for network access control
8
8
  Performance: max_iterations=10 default (configurable per-input) | session state persists across turns for multi-turn conversations | ToolRegistry provides O(1) tool lookup via .get() or attribute access
9
9
  Errors: LLM errors bubble up | tool execution errors captured in trace and returned to LLM for retry
10
10
  """
@@ -51,11 +51,14 @@ class Agent:
51
51
  # Current session context (runtime only)
52
52
  self.current_session = None
53
53
 
54
+ # Connection to client (None locally, injected by host() for WebSocket)
55
+ self.connection = None
56
+
54
57
  # Token usage tracking
55
58
  self.total_cost: float = 0.0 # Cumulative cost in USD
56
59
  self.last_usage: Optional[TokenUsage] = None # From most recent LLM call
57
60
 
58
- # Initialize logger (unified: terminal + file + YAML sessions)
61
+ # Initialize logger (unified: terminal + file + YAML evals)
59
62
  # Environment variable override (highest priority)
60
63
  effective_log = log
61
64
  if os.getenv('CONNECTONION_LOG'):
@@ -250,16 +253,16 @@ class Agent:
250
253
 
251
254
  self.current_session['result'] = result
252
255
 
253
- # Print completion summary
254
- if self.logger.console:
255
- session_path = f".co/sessions/{self.name}.yaml" if self.logger.enable_sessions else None
256
- self.logger.console.print_completion(duration, self.current_session, session_path)
257
-
258
256
  self._invoke_events('on_complete')
259
257
 
260
- # Log turn to YAML session (after on_complete so handlers can modify state)
258
+ # Log turn to YAML eval (after on_complete so handlers can modify state)
261
259
  self.logger.log_turn(prompt, result, duration * 1000, self.current_session, self.llm.model)
262
260
 
261
+ # Print completion summary (after log_turn so we have the eval path)
262
+ if self.logger.console:
263
+ eval_path = self.logger.get_eval_path()
264
+ self.logger.console.print_completion(duration, self.current_session, eval_path)
265
+
263
266
  return result
264
267
 
265
268
  def reset_conversation(self):