PyPI - connectonion - Versions diffs - 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl - Mend

connectonion 0.6.0py3-none-any.whl → 0.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

connectonion/__init__.py +3 -2
connectonion/cli/browser_agent/browser.py +433 -147
connectonion/cli/browser_agent/element_finder.py +139 -0
connectonion/cli/browser_agent/highlight_screenshot.py +174 -0
connectonion/cli/browser_agent/prompt.md +188 -105
connectonion/cli/browser_agent/prompts/element_matcher.md +59 -0
connectonion/cli/browser_agent/prompts/form_filler.md +19 -0
connectonion/cli/browser_agent/prompts/scroll_strategy.md +36 -0
connectonion/cli/browser_agent/scripts/extract_elements.js +126 -0
connectonion/cli/browser_agent/scroll.py +137 -0
connectonion/cli/commands/eval_commands.py +286 -0
connectonion/cli/main.py +11 -0
connectonion/console.py +5 -5
connectonion/core/agent.py +13 -10
connectonion/core/llm.py +9 -19
connectonion/logger.py +305 -135
connectonion/network/__init__.py +3 -0
connectonion/network/asgi.py +122 -2
connectonion/network/connection.py +123 -0
connectonion/network/host.py +7 -5
connectonion/useful_plugins/__init__.py +4 -3
connectonion/useful_plugins/ui_stream.py +164 -0
{connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/METADATA +1 -1
{connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/RECORD +27 -17
/connectonion/{static → network/static}/docs.html +0 -0
{connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/WHEEL +0 -0
{connectonion-0.6.0.dist-info → connectonion-0.6.2.dist-info}/entry_points.txt +0 -0

connectonion/core/agent.py CHANGED Viewed

@@ -2,9 +2,9 @@
 Purpose: Orchestrate AI agent execution with LLM calls, tool execution, and automatic logging
 LLM-Note:
   Dependencies: imports from [llm.py, tool_factory.py, prompts.py, decorators.py, logger.py, tool_executor.py, tool_registry.py] | imported by [__init__.py, debug_agent/__init__.py] | tested by [tests/test_agent.py, tests/test_agent_prompts.py, tests/test_agent_workflows.py]
-  Data flow: receives user prompt: str from Agent.input() → creates/extends current_session with messages → calls llm.complete() with tool schemas → receives LLMResponse with tool_calls → executes tools via tool_executor.execute_and_record_tools() → appends tool results to messages → repeats loop until no tool_calls or max_iterations → logger logs to .co/logs/{name}.log and .co/sessions/{name}_{timestamp}.yaml → returns final response: str
-  State/Effects: modifies self.current_session['messages', 'trace', 'turn', 'iteration'] | writes to .co/logs/{name}.log and .co/sessions/ via logger.py
-  Integration: exposes Agent(name, tools, system_prompt, model, log, quiet), .input(prompt), .execute_tool(name, args), .add_tool(func), .remove_tool(name), .list_tools(), .reset_conversation() | tools stored in ToolRegistry with attribute access (agent.tools.tool_name) and instance storage (agent.tools.gmail) | tool execution delegates to tool_executor module | log defaults to .co/logs/ (None), can be True (current dir), False (disabled), or custom path | quiet=True suppresses console but keeps session logging | trust enforcement moved to host() for network access control
+  Data flow: receives user prompt: str from Agent.input() → creates/extends current_session with messages → calls llm.complete() with tool schemas → receives LLMResponse with tool_calls → executes tools via tool_executor.execute_and_record_tools() → appends tool results to messages → repeats loop until no tool_calls or max_iterations → logger logs to .co/logs/{name}.log and .co/evals/{name}.yaml → returns final response: str
+  State/Effects: modifies self.current_session['messages', 'trace', 'turn', 'iteration'] | writes to .co/logs/{name}.log and .co/evals/ via logger.py
+  Integration: exposes Agent(name, tools, system_prompt, model, log, quiet), .input(prompt), .execute_tool(name, args), .add_tool(func), .remove_tool(name), .list_tools(), .reset_conversation() | tools stored in ToolRegistry with attribute access (agent.tools.tool_name) and instance storage (agent.tools.gmail) | tool execution delegates to tool_executor module | log defaults to .co/logs/ (None), can be True (current dir), False (disabled), or custom path | quiet=True suppresses console but keeps eval logging | trust enforcement moved to host() for network access control
   Performance: max_iterations=10 default (configurable per-input) | session state persists across turns for multi-turn conversations | ToolRegistry provides O(1) tool lookup via .get() or attribute access
   Errors: LLM errors bubble up | tool execution errors captured in trace and returned to LLM for retry
 """
@@ -51,11 +51,14 @@ class Agent:
         # Current session context (runtime only)
         self.current_session = None
+        # Connection to client (None locally, injected by host() for WebSocket)
+        self.connection = None
         # Token usage tracking
         self.total_cost: float = 0.0  # Cumulative cost in USD
         self.last_usage: Optional[TokenUsage] = None  # From most recent LLM call
-        # Initialize logger (unified: terminal + file + YAML sessions)
+        # Initialize logger (unified: terminal + file + YAML evals)
         # Environment variable override (highest priority)
         effective_log = log
         if os.getenv('CONNECTONION_LOG'):
@@ -250,16 +253,16 @@ class Agent:
         self.current_session['result'] = result
-        # Print completion summary
-        if self.logger.console:
-            session_path = f".co/sessions/{self.name}.yaml" if self.logger.enable_sessions else None
-            self.logger.console.print_completion(duration, self.current_session, session_path)
         self._invoke_events('on_complete')
-        # Log turn to YAML session (after on_complete so handlers can modify state)
+        # Log turn to YAML eval (after on_complete so handlers can modify state)
         self.logger.log_turn(prompt, result, duration * 1000, self.current_session, self.llm.model)
+        # Print completion summary (after log_turn so we have the eval path)
+        if self.logger.console:
+            eval_path = self.logger.get_eval_path()
+            self.logger.console.print_completion(duration, self.current_session, eval_path)
         return result
     def reset_conversation(self):

connectonion/core/llm.py CHANGED Viewed

@@ -734,28 +734,18 @@ class OpenOnionLLM(LLM):
         )
     def structured_complete(self, messages: List[Dict], output_schema: Type[BaseModel], **kwargs) -> BaseModel:
-        """Get structured Pydantic output using OpenAI-compatible API."""
-        response = self.client.responses.parse(
+        """Get structured Pydantic output using OpenAI-compatible chat completions API.
+        Uses beta.chat.completions.parse() which routes through /v1/chat/completions,
+        allowing proper provider routing for Gemini, OpenAI, and other models.
+        """
+        completion = self.client.beta.chat.completions.parse(
             model=self.model,
-            input=messages,
-            text_format=output_schema,
+            messages=messages,
+            response_format=output_schema,
             **kwargs
         )
-        # Handle edge cases
-        if response.status == "incomplete":
-            if response.incomplete_details.reason == "max_output_tokens":
-                raise ValueError("Response incomplete: maximum output tokens reached")
-            elif response.incomplete_details.reason == "content_filter":
-                raise ValueError("Response incomplete: content filtered")
-        # Check for refusal
-        if response.output and len(response.output) > 0:
-            first_content = response.output[0].content[0] if response.output[0].content else None
-            if first_content and hasattr(first_content, 'type') and first_content.type == "refusal":
-                raise ValueError(f"Model refused to respond: {first_content.refusal}")
-        return response.output_parsed
+        return completion.choices[0].message.parsed
 def create_llm(model: str, api_key: Optional[str] = None, **kwargs) -> LLM:

connectonion/logger.py CHANGED Viewed

@@ -1,52 +1,74 @@
 """
-Purpose: Unified logging interface for agents - terminal output + plain text + YAML sessions
+Purpose: Unified logging interface for agents - terminal output + plain text + YAML evals
 LLM-Note:
-  Dependencies: imports from [datetime, pathlib, typing, yaml, console.py] | imported by [agent.py, tool_executor.py] | tested by [tests/unit/test_logger.py]
-  Data flow: receives from Agent/tool_executor → delegates to Console for terminal/file → writes YAML sessions to .co/sessions/
-  State/Effects: writes to .co/sessions/{agent_name}.yaml (one file per agent, appends turns) | delegates file logging to Console | session data persisted after each turn
+  Dependencies: imports from [datetime, pathlib, typing, json, re, yaml, os, console.py] | imported by [agent.py, tool_executor.py] | tested by [tests/unit/test_logger.py]
+  Data flow: receives from Agent/tool_executor → delegates to Console for terminal/file → writes YAML evals to .co/evals/
+  State/Effects: writes to .co/evals/{input_slug}.yaml (one file per unique first input) | run data stored in .co/evals/{input_slug}/run_{n}.yaml | eval data persisted after each turn
   Integration: exposes Logger(agent_name, quiet, log), .print(), .log_tool_call(name, args), .log_tool_result(result, timing), .log_llm_response(), .start_session(), .log_turn()
-  Session format: metadata at top → turns summary (with tools_called as function-call style) → system_prompt + messages at end (see docs/session-yaml-format.md)
-  Performance: YAML written after each turn (incremental) | loads existing session file on start | Console delegation is direct passthrough
+  Eval format: eval.yaml (metadata + turns) | run_N.yaml (system_prompt, model, cwd, tokens, cost, duration_ms, timestamp, messages as multi-line JSON)
+  Performance: YAML written after each turn (incremental) | Console delegation is direct passthrough
   Errors: let I/O errors bubble up (no try-except)
 """
+import json
+import re
 from datetime import datetime
 from pathlib import Path
-from typing import Optional, Union, Dict, Any
+from typing import Optional, Union, Dict, Any, List
 import yaml
 from .console import Console
+def _slugify(text: str, max_length: int = 50) -> str:
+    """Convert text to URL-friendly slug for filenames.
+    Args:
+        text: Input text to slugify
+        max_length: Maximum length of slug
+    Returns:
+        Lowercase slug with words separated by underscores
+    """
+    # Lowercase and replace spaces/special chars with underscores
+    slug = re.sub(r'[^a-zA-Z0-9]+', '_', text.lower())
+    # Remove leading/trailing underscores
+    slug = slug.strip('_')
+    # Truncate to max length at word boundary
+    if len(slug) > max_length:
+        slug = slug[:max_length].rsplit('_', 1)[0]
+    return slug or 'default'
 class Logger:
-    """Unified logging: terminal output + plain text + YAML sessions.
+    """Unified logging: terminal output + plain text + YAML evals.
-    Facade pattern: wraps Console for terminal/file logging, adds YAML sessions.
+    Facade pattern: wraps Console for terminal/file logging, adds YAML evals.
-    Session files use one file per agent (.co/sessions/{agent_name}.yaml) to
-    reduce file clutter. New turns are appended to the same file.
+    Eval files are named from the first input (slugified). Same input sequence
+    = same file with multiple runs. Each run stored as YAML with messages as JSON.
+    Log = Eval (same format, add expect field for tests).
     Args:
-        agent_name: Name of the agent (used in filenames)
+        agent_name: Name of the agent (used in log filenames)
         quiet: Suppress console output (default False)
         log: Enable file logging (default True, or path string for custom location)
     Files created:
         - .co/logs/{agent_name}.log: Plain text log with session markers
-        - .co/sessions/{agent_name}.yaml: Structured YAML with all turns
+        - .co/evals/{input_slug}.yaml: Structured YAML with turns and history
+        - .co/evals/{input_slug}/run_{n}.yaml: Run metadata + messages as multi-line JSON
     Examples:
         # Development (default) - see output + save everything
         logger = Logger("my-agent")
-        # Eval mode - quiet but record sessions
+        # Eval mode - quiet but record evals
         logger = Logger("my-agent", quiet=True)
         # Benchmark - completely off
         logger = Logger("my-agent", log=False)
-        # Custom log path
-        logger = Logger("my-agent", log="custom/path.log")
     """
     def __init__(
@@ -59,7 +81,7 @@ class Logger:
         # Determine what to enable
         self.enable_console = not quiet
-        self.enable_sessions = True  # Sessions on unless log=False
+        self.enable_sessions = True  # Evals on unless log=False
         self.enable_file = True
         self.log_file_path = Path(f".co/logs/{agent_name}.log")
@@ -73,7 +95,7 @@ class Logger:
             self.log_file_path = Path(log)
         # else: log=True or log=None → defaults
-        # If quiet=True, also disable file (only keep sessions)
+        # If quiet=True, also disable file (only keep evals)
         if quiet:
             self.enable_file = False
@@ -83,9 +105,12 @@ class Logger:
             file_path = self.log_file_path if self.enable_file else None
             self.console = Console(log_file=file_path)
-        # Session state (YAML)
-        self.session_file: Optional[Path] = None
-        self.session_data: Optional[Dict[str, Any]] = None
+        # Eval state
+        self.eval_file: Optional[Path] = None
+        self.eval_dir: Optional[Path] = None
+        self.eval_data: Optional[Dict[str, Any]] = None
+        self.current_run: int = 0
+        self._first_input: Optional[str] = None  # Track first input for file naming
     # Delegate to Console
     def print(self, message: str, style: str = None):
@@ -129,68 +154,76 @@ class Logger:
                 parts.append(f"{k}={v_str}")
         return f"{tool_name}({', '.join(parts)})"
-    # Session logging (YAML)
+    # Eval logging (YAML + JSONL) - Log = Eval, same format
     def start_session(self, system_prompt: str = "", session_id: Optional[str] = None):
-        """Initialize session YAML file.
+        """Initialize eval session state.
-        Uses one file per session_id (for HTTP API) or per agent (for interactive).
-        Loads existing session data if file exists, appends new turns.
+        Note: The actual file is created lazily in log_turn() when we have
+        the first input to generate the filename from.
+        System prompt is stored in messages JSONL, not in eval YAML.
         Args:
-            system_prompt: The system prompt for this session
-            session_id: Optional session identifier. If provided, logs to
-                       .co/sessions/{session_id}.yaml for thread-safe HTTP API.
-                       If None, uses agent name for interactive mode.
+            system_prompt: Unused (kept for backward compatibility)
+            session_id: Optional session identifier (used for HTTP API thread safety)
         """
         if not self.enable_sessions:
             return
-        sessions_dir = Path(".co/sessions")
-        sessions_dir.mkdir(parents=True, exist_ok=True)
-        # Use session_id if provided (HTTP API), otherwise use agent_name (interactive)
-        filename = session_id if session_id else self.agent_name
-        # Sanitize: keep only safe characters (alphanumeric, dash, underscore)
-        import re
-        filename = re.sub(r'[^a-zA-Z0-9_-]', '_', filename)[:255] or 'default'
-        self.session_file = sessions_dir / f"{filename}.yaml"
-        # Load existing session or create new
-        if self.session_file.exists():
-            with open(self.session_file, 'r') as f:
-                self.session_data = yaml.safe_load(f) or {}
-            # Ensure ALL required fields exist (handles empty/corrupted files)
-            if 'name' not in self.session_data:
-                self.session_data['name'] = self.agent_name
-            if 'session_id' not in self.session_data and session_id:
-                self.session_data['session_id'] = session_id
-            if 'created' not in self.session_data:
-                self.session_data['created'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-            if 'total_cost' not in self.session_data:
-                self.session_data['total_cost'] = 0.0
-            if 'total_tokens' not in self.session_data:
-                self.session_data['total_tokens'] = 0
-            if 'turns' not in self.session_data:
-                self.session_data['turns'] = []
-            if 'messages' not in self.session_data:
-                self.session_data['messages'] = {}
-            # Update system_prompt if provided
-            if system_prompt:
-                self.session_data['system_prompt'] = system_prompt
+        self._first_input = None
+        self.eval_file = None
+        self.eval_dir = None
+        self.eval_data = None
+        self.current_run = 0
+    def _init_eval_file(self, first_input: str):
+        """Initialize or load eval file based on first input.
+        Args:
+            first_input: The first user input (used to name the file)
+        """
+        evals_dir = Path(".co/evals")
+        evals_dir.mkdir(parents=True, exist_ok=True)
+        # Generate filename from first input
+        slug = _slugify(first_input)
+        self.eval_file = evals_dir / f"{slug}.yaml"
+        self.eval_dir = evals_dir / slug
+        self._first_input = first_input
+        # Load existing or create new
+        if self.eval_file.exists():
+            with open(self.eval_file, 'r') as f:
+                self.eval_data = yaml.safe_load(f) or {}
+            # Check if this is the same conversation (same first input)
+            existing_turns = self.eval_data.get('turns', [])
+            if existing_turns and existing_turns[0].get('input') == first_input:
+                # Same conversation - new run
+                self.current_run = self.eval_data.get('runs', 0) + 1
+                self.eval_data['runs'] = self.current_run
+            else:
+                # Different first input but same slug (collision) - treat as new
+                self.current_run = 1
+                self.eval_data = self._create_new_eval_data(first_input)
         else:
-            self.session_data = {
-                "name": self.agent_name,
-                "session_id": session_id,
-                "created": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                "total_cost": 0.0,
-                "total_tokens": 0,
-                "system_prompt": system_prompt,
-                "turns": [],
-                "messages": {}  # Dict keyed by turn number
-            }
+            self.current_run = 1
+            self.eval_data = self._create_new_eval_data(first_input)
+        # Create messages directory
+        self.eval_dir.mkdir(parents=True, exist_ok=True)
+    def _create_new_eval_data(self, first_input: str) -> Dict[str, Any]:
+        """Create new eval data structure."""
+        return {
+            "name": _slugify(first_input),
+            "created": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            "runs": 1,
+            "model": "",
+            "turns": []
+        }
     def log_turn(self, user_input: str, result: str, duration_ms: float, session: dict, model: str):
-        """Log turn summary + messages to YAML file.
+        """Log turn to YAML file and messages to JSONL.
         Args:
             user_input: The user's input prompt
@@ -199,9 +232,13 @@ class Logger:
             session: Agent's current_session dict (contains messages, trace)
             model: Model name string
         """
-        if not self.enable_sessions or not self.session_data:
+        if not self.enable_sessions:
             return
+        # Initialize file on first turn (lazy initialization)
+        if self.eval_data is None:
+            self._init_eval_file(user_input)
         # Aggregate from trace
         trace = session.get('trace', [])
         llm_calls = [t for t in trace if t.get('type') == 'llm_call']
@@ -216,85 +253,218 @@ class Logger:
             for t in llm_calls if t.get('usage')
         )
-        turn_data = {
-            'input': user_input,
-            'expected': session.get('expected', ''),
-            'model': model,
-            'duration_ms': int(duration_ms),
-            'tokens': total_tokens,
-            'cost': round(total_cost, 4),
+        # Build metadata as compact JSON string
+        meta = json.dumps({
+            "tokens": total_tokens,
+            "cost": round(total_cost, 4),
+            "duration_ms": int(duration_ms),
+            "ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        })
+        # Build turn data for this run
+        run_data = {
+            'run': self.current_run,
+            'output': result,
             'tools_called': [self._format_tool_call(t) for t in tool_calls],
-            'result': result,
-            'evaluation': session.get('evaluation', '')
+            'expected': session.get('expected', ''),
+            'evaluation': session.get('evaluation', ''),
+            'meta': meta
         }
-        # Update session aggregates
-        self.session_data['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        self.session_data['total_cost'] = round(
-            self.session_data.get('total_cost', 0) + turn_data['cost'], 4
-        )
-        self.session_data['total_tokens'] = (
-            self.session_data.get('total_tokens', 0) + turn_data['tokens']
+        # Find or create turn entry
+        turn_index = session.get('turn', 1) - 1  # 0-indexed
+        turns = self.eval_data['turns']
+        if turn_index < len(turns):
+            # Existing turn - add to history
+            existing_turn = turns[turn_index]
+            if existing_turn.get('input') == user_input:
+                # Same input - this is a new run
+                history = existing_turn.get('history', [])
+                # Move current run to history (metadata only)
+                if existing_turn.get('run'):
+                    history.insert(0, {
+                        'run': existing_turn.get('run', self.current_run - 1),
+                        'status': existing_turn.get('evaluation', ''),
+                        'meta': existing_turn.get('meta', '')
+                    })
+                # Update with new run data
+                existing_turn.update({
+                    'run': run_data['run'],
+                    'output': run_data['output'],
+                    'tools_called': run_data['tools_called'],
+                    'expected': run_data['expected'],
+                    'evaluation': run_data['evaluation'],
+                    'meta': run_data['meta'],
+                    'history': history
+                })
+            else:
+                # Different input at same position - shouldn't happen normally
+                turns.append({
+                    'input': user_input,
+                    **run_data,
+                    'history': []
+                })
+        else:
+            # New turn
+            turns.append({
+                'input': user_input,
+                **run_data,
+                'history': []
+            })
+        # Update metadata
+        self.eval_data['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        self.eval_data['model'] = model
+        # Write run YAML with messages
+        self._write_run_yaml(
+            messages=session.get('messages', []),
+            model=model,
+            tokens=total_tokens,
+            cost=total_cost,
+            duration_ms=duration_ms
         )
-        # Add turn number and timestamp
-        turn_num = len(self.session_data['turns']) + 1
-        turn_data['turn'] = turn_num
-        turn_data['timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        self.session_data['turns'].append(turn_data)
+        # Write YAML
+        self._write_eval()
-        # Extract this turn's messages (everything after what we've already saved)
-        all_messages = session.get('messages', [])
-        saved_count = sum(len(msgs) for msgs in self.session_data['messages'].values())
-        turn_messages = all_messages[saved_count + 1:]  # +1 to skip system message
-        self.session_data['messages'][turn_num] = turn_messages
+    def _write_run_yaml(self, messages: List[Dict], model: str, tokens: int, cost: float, duration_ms: float):
+        """Write run metadata and messages to YAML file.
-        # Write YAML
-        self._write_session()
+        Args:
+            messages: List of message dicts
+            model: Model name
+            tokens: Total tokens used
+            cost: Total cost
+            duration_ms: Duration in milliseconds
+        """
+        if not self.eval_dir:
+            return
+        import os
+        import sys
+        # Extract system prompt from messages
+        system_prompt = ""
+        for msg in messages:
+            if msg.get('role') == 'system':
+                system_prompt = msg.get('content', '')
+                break
+        # Get agent file path (the script being run)
+        agent_file = sys.argv[0] if sys.argv else ""
+        # Make it relative to cwd if possible
+        cwd = os.getcwd()
+        if agent_file and os.path.isabs(agent_file):
+            try:
+                agent_file = os.path.relpath(agent_file, cwd)
+            except ValueError:
+                pass  # Keep absolute if on different drive (Windows)
+        # Format messages as pretty JSON (one message per line)
+        messages_json_lines = []
+        for msg in messages:
+            messages_json_lines.append("  " + json.dumps(msg, ensure_ascii=False))
+        messages_formatted = "[\n" + ",\n".join(messages_json_lines) + "\n]"
+        # Build run data
+        run_data = {
+            'agent': agent_file,
+            'system_prompt': system_prompt,
+            'model': model,
+            'cwd': cwd,
+            'tokens': tokens,
+            'cost': round(cost, 4),
+            'duration_ms': int(duration_ms),
+            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            'messages': messages_formatted
+        }
-    def _write_session(self):
-        """Write session data with turns summary first, detail at end."""
-        # Build ordered dict: compact metadata → turns → detail (system_prompt + messages)
+        # Write YAML with messages as literal block
+        run_file = self.eval_dir / f"run_{self.current_run}.yaml"
+        with open(run_file, 'w', encoding='utf-8') as f:
+            # Write metadata fields normally
+            for key in ['agent', 'system_prompt', 'model', 'cwd', 'tokens', 'cost', 'duration_ms', 'timestamp']:
+                value = run_data[key]
+                if isinstance(value, str) and '\n' in value:
+                    f.write(f"{key}: |\n")
+                    for line in value.split('\n'):
+                        f.write(f"  {line}\n")
+                elif isinstance(value, str):
+                    # Quote strings that might have special chars
+                    f.write(f"{key}: {json.dumps(value)}\n")
+                else:
+                    f.write(f"{key}: {value}\n")
+            # Write messages as literal block
+            f.write("messages: |\n")
+            for line in messages_formatted.split('\n'):
+                f.write(f"  {line}\n")
+    def _write_eval(self):
+        """Write eval data to YAML file."""
+        if not self.eval_file or not self.eval_data:
+            return
+        # Build ordered output
         ordered = {
-            'name': self.session_data['name'],
-            'session_id': self.session_data.get('session_id'),
-            'created': self.session_data['created'],
-            'updated': self.session_data.get('updated', ''),
-            'total_cost': self.session_data.get('total_cost', 0),
-            'total_tokens': self.session_data.get('total_tokens', 0),
-            'turns': self.session_data['turns'],
-            # Detail section (scroll down)
-            'system_prompt': self.session_data.get('system_prompt', ''),
-            'messages': self.session_data['messages']
+            'name': self.eval_data['name'],
+            'created': self.eval_data['created'],
+            'updated': self.eval_data.get('updated', ''),
+            'runs': self.eval_data['runs'],
+            'model': self.eval_data['model'],
+            'turns': self.eval_data['turns']
         }
-        with open(self.session_file, 'w') as f:
+        with open(self.eval_file, 'w', encoding='utf-8') as f:
             yaml.dump(ordered, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
-    def load_messages(self) -> list:
-        """Load and reconstruct full message list from session file.
+    def get_eval_path(self) -> Optional[str]:
+        """Get the path to the current eval file.
+        Returns:
+            Path string like '.co/evals/what_is_25_x_4.yaml' or None
+        """
+        if self.eval_file:
+            return str(self.eval_file)
+        return None
+    def load_messages(self, run: Optional[int] = None) -> list:
+        """Load messages from run YAML file.
+        Args:
+            run: Run number to load (default: current run)
         Returns:
-            Full message list: [system_message] + all turn messages in order
+            List of message dicts
         """
-        if not self.session_file or not self.session_file.exists():
+        if not self.eval_dir:
             return []
-        with open(self.session_file, 'r') as f:
-            data = yaml.safe_load(f) or {}
-        # Reconstruct: system prompt + all turn messages in order
-        messages = []
-        if data.get('system_prompt'):
-            messages.append({"role": "system", "content": data['system_prompt']})
+        run_num = run or self.current_run
+        run_file = self.eval_dir / f"run_{run_num}.yaml"
+        if not run_file.exists():
+            # Try legacy JSONL format
+            jsonl_file = self.eval_dir / f"run_{run_num}.jsonl"
+            if jsonl_file.exists():
+                messages = []
+                with open(jsonl_file, 'r', encoding='utf-8') as f:
+                    for line in f:
+                        if line.strip():
+                            messages.append(json.loads(line))
+                return messages
+            return []
-        turn_messages = data.get('messages', {})
-        for turn_num in sorted(turn_messages.keys()):
-            messages.extend(turn_messages[turn_num])
+        with open(run_file, 'r', encoding='utf-8') as f:
+            data = yaml.safe_load(f)
-        return messages
+        messages_str = data.get('messages', '[]')
+        return json.loads(messages_str)
     def load_session(self) -> dict:
-        """Load session data from file."""
-        if not self.session_file or not self.session_file.exists():
-            return {'system_prompt': '', 'turns': [], 'messages': {}}
-        with open(self.session_file, 'r') as f:
-            return yaml.safe_load(f) or {'system_prompt': '', 'turns': [], 'messages': {}}
+        """Load eval data from file."""
+        if not self.eval_file or not self.eval_file.exists():
+            return {'turns': [], 'runs': 0}
+        with open(self.eval_file, 'r') as f:
+            return yaml.safe_load(f) or {'turns': [], 'runs': 0}

connectonion 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

connectonion 0.6.0py3-none-any.whl → 0.6.2py3-none-any.whl