PyPI - lybic-guiagents - Versions diffs - 0.1.0__py3-none-any.whl - Mend

lybic-guiagents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (85) hide show

desktop_env/__init__.py +1 -0
desktop_env/actions.py +203 -0
desktop_env/controllers/__init__.py +0 -0
desktop_env/controllers/python.py +471 -0
desktop_env/controllers/setup.py +882 -0
desktop_env/desktop_env.py +509 -0
desktop_env/evaluators/__init__.py +5 -0
desktop_env/evaluators/getters/__init__.py +41 -0
desktop_env/evaluators/getters/calc.py +15 -0
desktop_env/evaluators/getters/chrome.py +1774 -0
desktop_env/evaluators/getters/file.py +154 -0
desktop_env/evaluators/getters/general.py +42 -0
desktop_env/evaluators/getters/gimp.py +38 -0
desktop_env/evaluators/getters/impress.py +126 -0
desktop_env/evaluators/getters/info.py +24 -0
desktop_env/evaluators/getters/misc.py +406 -0
desktop_env/evaluators/getters/replay.py +20 -0
desktop_env/evaluators/getters/vlc.py +86 -0
desktop_env/evaluators/getters/vscode.py +35 -0
desktop_env/evaluators/metrics/__init__.py +160 -0
desktop_env/evaluators/metrics/basic_os.py +68 -0
desktop_env/evaluators/metrics/chrome.py +493 -0
desktop_env/evaluators/metrics/docs.py +1011 -0
desktop_env/evaluators/metrics/general.py +665 -0
desktop_env/evaluators/metrics/gimp.py +637 -0
desktop_env/evaluators/metrics/libreoffice.py +28 -0
desktop_env/evaluators/metrics/others.py +92 -0
desktop_env/evaluators/metrics/pdf.py +31 -0
desktop_env/evaluators/metrics/slides.py +957 -0
desktop_env/evaluators/metrics/table.py +585 -0
desktop_env/evaluators/metrics/thunderbird.py +176 -0
desktop_env/evaluators/metrics/utils.py +719 -0
desktop_env/evaluators/metrics/vlc.py +524 -0
desktop_env/evaluators/metrics/vscode.py +283 -0
desktop_env/providers/__init__.py +35 -0
desktop_env/providers/aws/__init__.py +0 -0
desktop_env/providers/aws/manager.py +278 -0
desktop_env/providers/aws/provider.py +186 -0
desktop_env/providers/aws/provider_with_proxy.py +315 -0
desktop_env/providers/aws/proxy_pool.py +193 -0
desktop_env/providers/azure/__init__.py +0 -0
desktop_env/providers/azure/manager.py +87 -0
desktop_env/providers/azure/provider.py +207 -0
desktop_env/providers/base.py +97 -0
desktop_env/providers/gcp/__init__.py +0 -0
desktop_env/providers/gcp/manager.py +0 -0
desktop_env/providers/gcp/provider.py +0 -0
desktop_env/providers/virtualbox/__init__.py +0 -0
desktop_env/providers/virtualbox/manager.py +463 -0
desktop_env/providers/virtualbox/provider.py +124 -0
desktop_env/providers/vmware/__init__.py +0 -0
desktop_env/providers/vmware/manager.py +455 -0
desktop_env/providers/vmware/provider.py +105 -0
gui_agents/__init__.py +0 -0
gui_agents/agents/Action.py +209 -0
gui_agents/agents/__init__.py +0 -0
gui_agents/agents/agent_s.py +832 -0
gui_agents/agents/global_state.py +610 -0
gui_agents/agents/grounding.py +651 -0
gui_agents/agents/hardware_interface.py +129 -0
gui_agents/agents/manager.py +568 -0
gui_agents/agents/translator.py +132 -0
gui_agents/agents/worker.py +355 -0
gui_agents/cli_app.py +560 -0
gui_agents/core/__init__.py +0 -0
gui_agents/core/engine.py +1496 -0
gui_agents/core/knowledge.py +449 -0
gui_agents/core/mllm.py +555 -0
gui_agents/tools/__init__.py +0 -0
gui_agents/tools/tools.py +727 -0
gui_agents/unit_test/__init__.py +0 -0
gui_agents/unit_test/run_tests.py +65 -0
gui_agents/unit_test/test_manager.py +330 -0
gui_agents/unit_test/test_worker.py +269 -0
gui_agents/utils/__init__.py +0 -0
gui_agents/utils/analyze_display.py +301 -0
gui_agents/utils/common_utils.py +263 -0
gui_agents/utils/display_viewer.py +281 -0
gui_agents/utils/embedding_manager.py +53 -0
gui_agents/utils/image_axis_utils.py +27 -0
lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0

gui_agents/utils/analyze_display.py ADDED Viewed

@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Display.json analyzer - Extract and analyze execution statistics from display.json files
+"""
+import json
+import os
+import glob
+import re
+from typing import Dict, List, Tuple
+def extract_cost_value(cost_str: str) -> tuple:
+    """
+    Extract numeric value and currency symbol from cost string (e.g., "0.000343￥" -> (0.000343, "￥"))
+    Args:
+        cost_str: Cost string with currency symbol
+    Returns:
+        Tuple of (float value, currency symbol)
+    """
+    # Extract numeric value and currency symbol
+    match = re.search(r'([\d.]+)([￥$€£¥]*)', cost_str)
+    if match:
+        value = float(match.group(1))
+        currency = match.group(2) if match.group(2) else "￥"  # Default to ￥ if no symbol found
+        return value, currency
+    return 0.0, "￥"
+def convert_currency_to_yuan(value: float, currency: str) -> float:
+    """
+    Convert different currencies to yuan (￥) for consistent cost calculation
+    Args:
+        value: Cost value
+        currency: Currency symbol
+    Returns:
+        Value converted to yuan
+    """
+    # Simple conversion rates (you might want to use real-time rates in production)
+    conversion_rates = {
+        "￥": 1.0,
+        "¥": 1.0,
+        "$": 7.2,  # USD to CNY (approximate)
+        "€": 7.8,  # EUR to CNY (approximate)
+        "£": 9.1,  # GBP to CNY (approximate)
+    }
+    rate = conversion_rates.get(currency, 1.0)
+    return value * rate
+def analyze_display_json(file_path: str) -> Dict:
+    """
+    Analyze a single display.json file and extract statistics
+    Args:
+        file_path: Path to the display.json file
+    Returns:
+        Dictionary containing analysis results
+    """
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+    except Exception as e:
+        print(f"Error reading {file_path}: {e}")
+        return {}
+    # Initialize counters
+    fast_action_count = 0
+    total_duration = 0
+    total_input_tokens = 0
+    total_output_tokens = 0
+    total_tokens = 0
+    total_cost = 0.0
+    currency_symbol = "￥"  # Default currency symbol
+    # Check if this is a fast mode or normal mode display.json
+    is_fast_mode = False
+    if 'operations' in data and 'agent' in data['operations']:
+        for operation in data['operations']['agent']:
+            if operation.get('operation') == 'fast_action_execution':
+                is_fast_mode = True
+                break
+    if is_fast_mode:
+        # Fast mode analysis - similar to original logic
+        if 'operations' in data and 'agent' in data['operations']:
+            for operation in data['operations']['agent']:
+                if operation.get('operation') == 'fast_action_execution':
+                    fast_action_count += 1
+                # Extract tokens
+                tokens = operation.get('tokens', [0, 0, 0])
+                if len(tokens) >= 3:
+                    total_input_tokens += tokens[0]
+                    total_output_tokens += tokens[1]
+                    total_tokens += tokens[2]
+                # Extract cost
+                cost_str = operation.get('cost', '0￥')
+                cost_value, currency = extract_cost_value(cost_str)
+                # Convert to yuan for consistent calculation
+                cost_in_yuan = convert_currency_to_yuan(cost_value, currency)
+                total_cost += cost_in_yuan
+                currency_symbol = "￥"  # Always use ￥ for consistency
+        # Extract total execution time for fast mode
+        if 'operations' in data and 'other' in data['operations']:
+            for operation in data['operations']['other']:
+                if operation.get('operation') == 'total_execution_time_fast':
+                    total_duration = int(operation.get('duration', 0))
+                    break
+    else:
+        # Normal mode analysis - analyze specific operations
+        if 'operations' in data:
+            # Define the operations to count for tokens and cost
+            token_cost_operations = {
+                'formulate_query', 'retrieve_narrative_experience', 'retrieve_knowledge',
+                'knowledge_fusion', 'subtask_planner', 'generated_dag', 'reflection',
+                'episode_summarization', 'action_plan', 'grounding_model_response'
+            }
+            # Count hardware operations as steps
+            if 'hardware' in data['operations']:
+                fast_action_count = len(data['operations']['hardware'])
+            # Extract tokens and cost from specific operations across all modules
+            for module_name, module_operations in data['operations'].items():
+                if isinstance(module_operations, list):
+                    for operation in module_operations:
+                        operation_type = operation.get('operation', '')
+                        # Only count tokens and cost for specified operations
+                        if operation_type in token_cost_operations:
+                            # Extract tokens if available
+                            tokens = operation.get('tokens', [0, 0, 0])
+                            if isinstance(tokens, list) and len(tokens) >= 3:
+                                total_input_tokens += tokens[0]
+                                total_output_tokens += tokens[1]
+                                total_tokens += tokens[2]
+                            # Extract cost if available
+                            cost_str = operation.get('cost', '0￥')
+                            cost_value, currency = extract_cost_value(cost_str)
+                            # Convert to yuan for consistent calculation
+                            cost_in_yuan = convert_currency_to_yuan(cost_value, currency)
+                            total_cost += cost_in_yuan
+                            # Always use ￥ for consistency
+                            currency_symbol = "￥"
+            # Extract total execution time for normal mode
+            if 'other' in data['operations']:
+                for operation in data['operations']['other']:
+                    if operation.get('operation') == 'total_execution_time':
+                        total_duration = int(operation.get('duration', 0))
+                        break
+    return {
+        'fast_action_count': fast_action_count,
+        'total_duration': total_duration,
+        'total_input_tokens': total_input_tokens,
+        'total_output_tokens': total_output_tokens,
+        'total_tokens': total_tokens,
+        'total_cost': total_cost,
+        'currency_symbol': currency_symbol
+    }
+def analyze_folder(folder_path: str) -> List[Dict]:
+    """
+    Analyze all display.json files in a folder
+    Args:
+        folder_path: Path to the folder containing display.json files
+    Returns:
+        List of analysis results for each file
+    """
+    results = []
+    # Find all display.json files recursively
+    pattern = os.path.join(folder_path, "**", "display.json")
+    display_files = glob.glob(pattern, recursive=True)
+    if not display_files:
+        print(f"No display.json files found in {folder_path}")
+        return results
+    print(f"Found {len(display_files)} display.json files")
+    for file_path in display_files:
+        print(f"Analyzing: {file_path}")
+        result = analyze_display_json(file_path)
+        if result:
+            result['file_path'] = file_path
+            results.append(result)
+    return results
+def aggregate_results(results: List[Dict]) -> Dict:
+    """
+    Aggregate results from multiple files
+    Args:
+        results: List of analysis results
+    Returns:
+        Aggregated statistics
+    """
+    if not results:
+        return {}
+    total_fast_actions = sum(r['fast_action_count'] for r in results)
+    total_duration = max(r['total_duration'] for r in results) if results else 0
+    total_input_tokens = sum(r['total_input_tokens'] for r in results)
+    total_output_tokens = sum(r['total_output_tokens'] for r in results)
+    total_tokens = sum(r['total_tokens'] for r in results)
+    total_cost = sum(r['total_cost'] for r in results)
+    # Use the currency symbol from the first result, or default to ￥
+    currency_symbol = results[0].get('currency_symbol', '￥') if results else '￥'
+    return {
+        'total_fast_actions': total_fast_actions,
+        'total_duration': total_duration,
+        'total_input_tokens': total_input_tokens,
+        'total_output_tokens': total_output_tokens,
+        'total_tokens': total_tokens,
+        'total_cost': total_cost,
+        'currency_symbol': currency_symbol
+    }
+def format_output_line(stats: Dict) -> str:
+    """
+    Format statistics into a single output line
+    Args:
+        stats: Statistics dictionary
+    Returns:
+        Formatted output line
+    """
+    if not stats:
+        return "No data available"
+    # Format: steps, duration (seconds), tokens, cost
+    steps = stats.get('fast_action_count', 0)
+    duration = stats.get('total_duration', 0)
+    tokens = (stats.get('total_input_tokens', 0),stats.get('total_output_tokens', 0),stats.get('total_tokens', 0))
+    cost = stats.get('total_cost', 0.0)
+    return f"{steps}, {duration}, {tokens}, {cost:.4f}{stats.get('currency_symbol', '￥')}"
+def main():
+    """
+    Main function to analyze display.json files
+    """
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python analyze_display.py <folder_path>")
+        print("Example: python analyze_display.py lybicguiagents/runtime")
+        return
+    folder_path = sys.argv[1]
+    if not os.path.exists(folder_path):
+        print(f"Folder not found: {folder_path}")
+        return
+    # Analyze all display.json files in the folder
+    results = analyze_folder(folder_path)
+    if not results:
+        print("No valid display.json files found")
+        return
+    # Aggregate results
+    aggregated_stats = aggregate_results(results)
+    # Print the required single line output
+    print("\nStatistics:")
+    print("-" * 80)
+    print("Steps, Duration (seconds), (Input Tokens, Output Tokens, Total Tokens), Cost")
+    print("-" * 80)
+    output_line = format_output_line(aggregated_stats)
+    print(output_line)
+    print("-" * 80)
+if __name__ == "__main__":
+    main()

gui_agents/utils/common_utils.py ADDED Viewed

@@ -0,0 +1,263 @@
+import json
+import re
+from typing import List
+import time
+import tiktoken
+import numpy as np
+from typing import Tuple, List, Union, Dict
+from pydantic import BaseModel, ValidationError
+import pickle
+class Node(BaseModel):
+    name: str
+    info: str
+class Dag(BaseModel):
+    nodes: List[Node]
+    edges: List[List[Node]]
+NUM_IMAGE_TOKEN = 1105  # Value set of screen of size 1920x1080 for openai vision
+def calculate_tokens(messages, num_image_token=NUM_IMAGE_TOKEN) -> Tuple[int, int]:
+    num_input_images = 0
+    output_message = messages[-1]
+    input_message = messages[:-1]
+    input_string = """"""
+    for message in input_message:
+        input_string += message["content"][0]["text"] + "\n"
+        if len(message["content"]) > 1:
+            num_input_images += 1
+    input_text_tokens = get_input_token_length(input_string)
+    input_image_tokens = num_image_token * num_input_images
+    output_tokens = get_input_token_length(output_message["content"][0]["text"])
+    return (input_text_tokens + input_image_tokens), output_tokens
+def parse_dag(text):
+    """
+    Try extracting JSON from <json>…</json> tags first;
+    if not found, try ```json … ``` Markdown fences.
+    If both fail, try to parse the entire text as JSON.
+    """
+    import logging
+    logger = logging.getLogger("desktopenv.agent")
+    def _extract(pattern):
+        m = re.search(pattern, text, re.DOTALL)
+        return m.group(1).strip() if m else None
+    # 1) look for <json>…</json>
+    json_str = _extract(r"<json>(.*?)</json>")
+    # 2) fallback to ```json … ```
+    if json_str is None:
+        json_str = _extract(r"```json\s*(.*?)\s*```")
+        if json_str is None:
+            # 3) try other possible code block formats
+            json_str = _extract(r"```\s*(.*?)\s*```")
+    # 4) if still not found, try to parse the entire text
+    if json_str is None:
+        logger.warning("JSON markers not found, attempting to parse entire text")
+        json_str = text.strip()
+    # Log the extracted JSON string
+    logger.debug(f"Extracted JSON string: {json_str[:100]}...")
+    try:
+        # Try to parse as JSON directly
+        payload = json.loads(json_str)
+    except json.JSONDecodeError as e:
+        logger.error(f"JSON parsing error: {e}")
+        # Try to fix common JSON format issues
+        try:
+            # Replace single quotes with double quotes
+            fixed_json = json_str.replace("'", "\"")
+            payload = json.loads(fixed_json)
+            logger.info("Successfully fixed JSON by replacing single quotes with double quotes")
+        except json.JSONDecodeError:
+            # Try to find and extract possible JSON objects
+            try:
+                # Look for content between { and }
+                match = re.search(r"\{(.*)\}", json_str, re.DOTALL)
+                if match:
+                    fixed_json = "{" + match.group(1) + "}"
+                    payload = json.loads(fixed_json)
+                    logger.info("Successfully fixed JSON by extracting JSON object")
+                else:
+                    logger.error("Unable to fix JSON format")
+                    return None
+            except Exception:
+                logger.error("All JSON fixing attempts failed")
+        return None
+    # Check if payload contains dag key
+    if "dag" not in payload:
+        logger.warning("'dag' key not found in JSON, attempting to use entire JSON object")
+        # If no dag key, try to use the entire payload
+        try:
+            # Check if payload directly conforms to Dag structure
+            if "nodes" in payload and "edges" in payload:
+                return Dag(**payload)
+            else:
+                # Iterate through top-level keys to find possible dag structure
+                for key, value in payload.items():
+                    if isinstance(value, dict) and "nodes" in value and "edges" in value:
+                        logger.info(f"Found DAG structure in key '{key}'")
+                        return Dag(**value)
+                logger.error("Could not find valid DAG structure in JSON")
+                return None
+        except ValidationError as e:
+            logger.error(f"Data structure validation error: {e}")
+        return None
+    # Normal case, use value of dag key
+    try:
+        return Dag(**payload["dag"])
+    except ValidationError as e:
+        logger.error(f"DAG data structure validation error: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unknown error parsing DAG: {e}")
+        return None
+def parse_single_code_from_string(input_string):
+    input_string = input_string.strip()
+    if input_string.strip() in ["WAIT", "DONE", "FAIL"]:
+        return input_string.strip()
+    pattern = r"```(?:\w+\s+)?(.*?)```"
+    matches = re.findall(pattern, input_string, re.DOTALL)
+    codes = []
+    for match in matches:
+        match = match.strip()
+        commands = ["WAIT", "DONE", "FAIL"]
+        if match in commands:
+            codes.append(match.strip())
+        elif match.split("\n")[-1] in commands:
+            if len(match.split("\n")) > 1:
+                codes.append("\n".join(match.split("\n")[:-1]))
+            codes.append(match.split("\n")[-1])
+        else:
+            codes.append(match)
+    if len(codes) > 0:
+        return codes[0]
+    # The pattern matches function calls with balanced parentheses and quotes
+    code_match = re.search(r"(\w+\.\w+\((?:[^()]*|\([^()]*\))*\))", input_string)
+    if code_match:
+        return code_match.group(1)
+    lines = [line.strip() for line in input_string.splitlines() if line.strip()]
+    if lines:
+        return lines[0]
+    return "fail"
+def get_input_token_length(input_string):
+    enc = tiktoken.encoding_for_model("gpt-4")
+    tokens = enc.encode(input_string)
+    return len(tokens)
+def sanitize_code(code):
+    # This pattern captures the outermost double-quoted text
+    if "\n" in code:
+        pattern = r'(".*?")'
+        # Find all matches in the text
+        matches = re.findall(pattern, code, flags=re.DOTALL)
+        if matches:
+            # Replace the first occurrence only
+            first_match = matches[0]
+            code = code.replace(first_match, f'"""{first_match[1:-1]}"""', 1)
+    return code
+def extract_first_agent_function(code_string):
+    # Regular expression pattern to match 'agent' functions with any arguments, including nested parentheses
+    pattern = r'agent\.[a-zA-Z_]+\((?:[^()\'"]|\'[^\']*\'|"[^"]*")*\)'
+    # Find all matches in the string
+    matches = re.findall(pattern, code_string)
+    # Return the first match if found, otherwise return None
+    return matches[0] if matches else None
+def load_knowledge_base(kb_path: str) -> Dict:
+    try:
+        with open(kb_path, "r") as f:
+            return json.load(f)
+    except Exception as e:
+        print(f"Error loading knowledge base: {e}")
+        return {}
+def clean_empty_embeddings(embeddings: Dict) -> Dict:
+    to_delete = []
+    for k, v in embeddings.items():
+        arr = np.array(v)
+        if arr.size == 0 or arr.shape == () or (
+            isinstance(v, list) and v and isinstance(v[0], str) and v[0].startswith('Error:')
+        ) or (isinstance(v, str) and v.startswith('Error:')):
+            to_delete.append(k)
+    for k in to_delete:
+        del embeddings[k]
+    return embeddings
+def load_embeddings(embeddings_path: str) -> Dict:
+    try:
+        with open(embeddings_path, "rb") as f:
+            embeddings = pickle.load(f)
+        embeddings = clean_empty_embeddings(embeddings)
+        return embeddings
+    except Exception as e:
+        # print(f"Error loading embeddings: {e}")
+        print(f"Empty embeddings file: {embeddings_path}")
+        return {}
+def save_embeddings(embeddings_path: str, embeddings: Dict):
+    try:
+        import os
+        os.makedirs(os.path.dirname(embeddings_path), exist_ok=True)
+        with open(embeddings_path, "wb") as f:
+            pickle.dump(embeddings, f)
+    except Exception as e:
+        print(f"Error saving embeddings: {e}")
+def agent_log_to_string(agent_log: List[Dict]) -> str:
+    """
+    Converts a list of agent log entries into a single string for LLM consumption.
+    Args:
+        agent_log: A list of dictionaries, where each dictionary is an agent log entry.
+    Returns:
+        A formatted string representing the agent log.
+    """
+    if not agent_log:
+        return "No agent log entries yet."
+    log_strings = ["[AGENT LOG]"]
+    for entry in agent_log:
+        entry_id = entry.get("id", "N/A")
+        entry_type = entry.get("type", "N/A").capitalize()
+        content = entry.get("content", "")
+        log_strings.append(f"[Entry {entry_id} - {entry_type}] {content}")
+    return "\n".join(log_strings)