PyPI - lybic-guiagents - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

lybic-guiagents 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (25) hide show

gui_agents/__init__.py +1 -1
gui_agents/agents/Backend/LybicBackend.py +25 -19
gui_agents/agents/agent_s.py +292 -97
gui_agents/agents/grounding.py +43 -6
gui_agents/agents/manager.py +113 -18
gui_agents/agents/stream_manager.py +163 -0
gui_agents/agents/worker.py +60 -35
gui_agents/cli_app.py +16 -5
gui_agents/core/knowledge.py +36 -5
gui_agents/grpc_app.py +784 -0
gui_agents/proto/__init__.py +3 -0
gui_agents/proto/pb/__init__.py +4 -0
gui_agents/tools/model.md +351 -0
gui_agents/tools/tools.py +80 -39
gui_agents/tools/tools_config.json +101 -0
gui_agents/tools/tools_config_cn.json +101 -0
gui_agents/tools/tools_config_en.json +101 -0
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.2.3.dist-info}/METADATA +86 -8
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.2.3.dist-info}/RECORD +23 -16
lybic_guiagents-0.2.3.dist-info/entry_points.txt +3 -0
gui_agents/lybic_client/__init__.py +0 -0
gui_agents/lybic_client/lybic_client.py +0 -88
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.2.3.dist-info}/WHEEL +0 -0
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.2.3.dist-info}/licenses/LICENSE +0 -0
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.2.3.dist-info}/top_level.txt +0 -0

gui_agents/agents/grounding.py CHANGED Viewed

@@ -32,6 +32,20 @@ class Grounding(ACI):
         width: int = 1920,
         height: int = 1080,
     ):
+        """
+        Initialize a Grounding instance: configure screen dimensions, prepare tool instances, and load global state.
+        Parameters:
+            Tools_dict (Dict): Mapping of tool names to their configuration dictionaries used to register tools.
+            platform (str): Target platform identifier (e.g., "windows", "macos") used by the grounding agents.
+            width (int): Current screen width in pixels.
+            height (int): Current screen height in pixels.
+        Detailed behavior:
+            - Creates and registers two Tools instances ("grounding" and "text_span") using entries from Tools_dict; registration will include any authentication-related parameters present in the tool configuration.
+            - Obtains grounding tool dimensions (grounding_width, grounding_height) and falls back to the provided width and height when the grounding tool does not supply them.
+            - Initializes coordinate placeholders (coords1, coords2) and stores a reference to the global state store.
+        """
         self.platform = platform
         self.Tools_dict = Tools_dict
         self.width = width
@@ -39,10 +53,35 @@ class Grounding(ACI):
         self.coords1 = None
         self.coords2 = None
+        def _register(tools_instance, tool_name):
+            """
+            Register a tool into the provided tools instance using configuration from Tools_dict.
+            Reads the tool configuration for `tool_name` from the surrounding `Tools_dict`, extracts optional `provider` and `model`, collects common authentication parameters (api_key, base_url, endpoint_url, azure_endpoint, api_version), merges them with any remaining configuration, logs the registration, and calls tools_instance.register_tool with the assembled parameters.
+            Parameters:
+                tools_instance: The tools manager/registry instance that exposes register_tool(tool_name, provider, model, **params).
+                tool_name (str): Key name of the tool in Tools_dict whose configuration will be used to register the tool.
+            """
+            config = Tools_dict.get(tool_name, {}).copy()
+            provider = config.pop("provider", None)
+            model = config.pop("model", None)
+            auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
+            auth_params = {}
+            for key in auth_keys:
+                if key in config:
+                    auth_params[key] = config[key]
+                    logger.info(f"Grounding._register: Setting {key} for tool '{tool_name}'")
+            # 合并所有参数
+            all_params = {**config, **auth_params}
+            logger.info(f"Grounding._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
+            tools_instance.register_tool(tool_name, provider, model, **all_params)
         self.grounding_model = Tools()
-        self.grounding_model.register_tool(
-            "grounding", self.Tools_dict["grounding"]["provider"],
-            self.Tools_dict["grounding"]["model"])
+        _register(self.grounding_model, "grounding")
         self.grounding_width, self.grounding_height = self.grounding_model.tools[
             "grounding"].get_grounding_wh()
@@ -51,9 +90,7 @@ class Grounding(ACI):
             self.grounding_height = self.height
         self.text_span_agent = Tools()
-        self.text_span_agent.register_tool(
-            "text_span", self.Tools_dict["text_span"]["provider"],
-            self.Tools_dict["text_span"]["model"])
+        _register(self.text_span_agent, "text_span")
         self.global_state: GlobalState = Registry.get(
             "GlobalStateStore")  # type: ignore

gui_agents/agents/manager.py CHANGED Viewed

@@ -14,6 +14,7 @@ from gui_agents.utils.common_utils import (
     agent_log_to_string,
 )
 from gui_agents.tools.tools import Tools
+from gui_agents.agents.stream_manager import stream_manager
 logger = logging.getLogger("desktopenv.agent")
@@ -29,31 +30,68 @@ class Manager:
         platform: str = platform.system().lower(),
         enable_search: bool = True,
     ):
+        """
+        Initialize the Manager which orchestrates planning, knowledge retrieval/fusion, DAG generation, topological sorting, and action queue creation for task-driven agents.
+        Parameters:
+            Tools_dict (Dict): Mapping of tool names to their configuration dictionaries; used to register and configure internal Tools instances (e.g., 'subtask_planner', 'dag_translator', 'embedding', 'websearch').
+            local_kb_path (str): Filesystem path to the local knowledge base storage used by the KnowledgeBase.
+            multi_round (bool): When True, enable multi-round interaction/stateful planning behavior across turns.
+            platform (str): Target platform identifier (defaults to current system name); forwarded to KnowledgeBase and tools where applicable.
+            enable_search (bool): When True, register and enable a web search tool ('websearch'); otherwise search functionality is disabled.
+        """
         self.platform = platform
         self.Tools_dict = Tools_dict
+        def _register(tools_instance, tool_name):
+            """
+            Register a tool with the provided tools manager using settings from Tools_dict.
+            Parameters:
+                tools_instance: An object exposing register_tool(name, provider, model, **kwargs) used to register the tool.
+                tool_name (str): Key to look up the tool's configuration in Tools_dict; provider, model, and supported authentication keys
+                    (e.g., 'api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version') will be extracted and passed to register_tool.
+            """
+            config = Tools_dict.get(tool_name, {}).copy()
+            provider = config.pop("provider", None)
+            model = config.pop("model", None)
+            auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
+            auth_params = {}
+            for key in auth_keys:
+                if key in config:
+                    auth_params[key] = config[key]
+                    logger.info(f"Manager._register: Setting {key} for tool '{tool_name}'")
+            all_params = {**config, **auth_params}
+            logger.info(f"Manager._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
+            tools_instance.register_tool(tool_name, provider, model, **all_params)
         self.generator_agent = Tools()
-        self.generator_agent.register_tool("subtask_planner", Tools_dict["subtask_planner"]["provider"], Tools_dict["subtask_planner"]["model"])
+        _register(self.generator_agent, "subtask_planner")
         self.dag_translator_agent = Tools()
-        self.dag_translator_agent.register_tool("dag_translator", self.Tools_dict["dag_translator"]["provider"], self.Tools_dict["dag_translator"]["model"])
+        _register(self.dag_translator_agent, "dag_translator")
         self.narrative_summarization_agent = Tools()
-        self.narrative_summarization_agent.register_tool("narrative_summarization", self.Tools_dict["narrative_summarization"]["provider"], self.Tools_dict["narrative_summarization"]["model"])
+        _register(self.narrative_summarization_agent, "narrative_summarization")
         self.episode_summarization_agent = Tools()
-        self.episode_summarization_agent.register_tool("episode_summarization", self.Tools_dict["episode_summarization"]["provider"], self.Tools_dict["episode_summarization"]["model"])
+        _register(self.episode_summarization_agent, "episode_summarization")
         self.local_kb_path = local_kb_path
         self.embedding_engine = Tools()
-        self.embedding_engine.register_tool("embedding", self.Tools_dict["embedding"]["provider"], self.Tools_dict["embedding"]["model"])
+        _register(self.embedding_engine, "embedding")
         KB_Tools_dict = {
-            "embedding": self.Tools_dict["embedding"],
-            "query_formulator": self.Tools_dict["query_formulator"],
-            "context_fusion": self.Tools_dict["context_fusion"],
-            "narrative_summarization": self.Tools_dict["narrative_summarization"],
-            "episode_summarization": self.Tools_dict["episode_summarization"],
+            "embedding": self.Tools_dict.get("embedding"),
+            "query_formulator": self.Tools_dict.get("query_formulator"),
+            "context_fusion": self.Tools_dict.get("context_fusion"),
+            "narrative_summarization": self.Tools_dict.get("narrative_summarization"),
+            "episode_summarization": self.Tools_dict.get("episode_summarization"),
         }
@@ -69,20 +107,40 @@ class Manager:
         self.planner_history = []
         self.turn_count = 0
+        self.task_id = None  # Will be set by agent
         # Initialize search engine based on enable_search parameter
         if enable_search:
             self.search_engine = Tools()
-            self.search_engine.register_tool("websearch", self.Tools_dict["websearch"]["provider"], self.Tools_dict["websearch"]["model"])
+            _register(self.search_engine, "websearch")
         else:
             self.search_engine = None
         self.multi_round = multi_round
+    def _send_stream_message(self, task_id: str, stage: str, message: str) -> None:
+        """
+        Enqueue a stream message for the given task if a task ID is provided.
+        Parameters:
+            task_id (str): Identifier of the task stream; no message is sent if empty.
+            stage (str): Stage label for the message.
+            message (str): Message content to enqueue.
+        """
+        if not task_id:
+            return
+        stream_manager.add_message_threadsafe(task_id, stage, message)
     def summarize_episode(self, trajectory):
-        """Summarize the episode experience for lifelong learning reflection
-        Args:
-            trajectory: str: The episode experience to be summarized
+        """
+        Create a concise summary of the provided episode trajectory for lifelong learning and reflection.
+        Parameters:
+            trajectory (str): Serialized episode experience or trajectory to summarize.
+        Returns:
+            subtask_summarization (str): A short summary highlighting key subtasks, lessons, or reflections from the episode.
         """
         # Create Reflection on whole trajectories for next round trial, keep earlier messages as exemplars
@@ -99,12 +157,19 @@ class Manager:
             }
         )
+        self._send_stream_message(self.task_id, "summarization", f"Episode summarization: {subtask_summarization}")
         return subtask_summarization
     def summarize_narrative(self, trajectory):
-        """Summarize the narrative experience for lifelong learning reflection
-        Args:
-            trajectory: str: The narrative experience to be summarized
+        """
+        Produce a concise reflective summary of a narrative trajectory to inform lifelong learning.
+        Parameters:
+            trajectory: Narrative content (e.g., episode transcript or sequence of subtasks) to be summarized.
+        Returns:
+            A string containing a reflective summary that captures key insights, lessons learned, and recommendations for future rounds.
         """
         # Create Reflection on whole trajectories for next round trial
         lifelong_learning_reflection, total_tokens, cost_string = self.narrative_summarization_agent.execute_tool("narrative_summarization", {"str_input": trajectory})
@@ -131,6 +196,27 @@ class Manager:
         remaining_subtasks_list: List[Node] = [],
     ) -> Tuple[Dict, str]:
+        """
+        Generate a high-level, step-by-step plan for the given task, optionally incorporating retrieved knowledge and the current subtask state.
+        Parameters:
+            observation (Dict): Current environment/desktop state; may include a 'screenshot' key with image data used for planning.
+            instruction (str): Natural-language task description to plan for.
+            failed_subtask (Optional[Node]): If provided, indicates a subtask that failed and triggers replanning for the remainder.
+            completed_subtasks_list (List[Node]): Ordered list of subtasks already completed; used to inform replanning.
+            remaining_subtasks_list (List[Node]): Ordered list of subtasks still expected; used to inform replanning.
+        Returns:
+            planner_info (Dict): Metadata about the planning step (includes at least 'search_query' and 'goal_plan').
+            plan (str): The generated high-level plan as a human-readable string.
+        Side effects:
+            - May perform retrieval and knowledge fusion on the first planning turn.
+            - Records operations to global_state, appends the plan to self.planner_history, increments self.turn_count, and sends stream messages when self.task_id is set.
+        Raises:
+            Exception: If plan generation produces an empty plan.
+        """
         import time
         step_start = time.time()
         # Converts a list of DAG Nodes into a natural langauge list
@@ -275,6 +361,10 @@ class Manager:
         logger.info("GENERATING HIGH LEVEL PLAN")
         subtask_planner_start = time.time()
+        # Stream subtask planning message
+        self._send_stream_message(self.task_id, "planning", "Analyzing tasks and generating subtask plans...")
         plan, total_tokens, cost_string = self.generator_agent.execute_tool("subtask_planner", {"str_input": generator_message, "img_input": observation.get("screenshot", None)})
         logger.info(f"Subtask planner tokens: {total_tokens}, cost: {cost_string}")
         subtask_planner_time = time.time() - subtask_planner_start
@@ -289,6 +379,11 @@ class Manager:
                 "duration": subtask_planner_time
             }
         )
+        # Stream planning completion message
+        if self.task_id:
+            plan_preview = plan[:150] + "..." if len(plan) > 150 else plan
+            self._send_stream_message(self.task_id, "planning", f"Subtask planning completed: {plan_preview}")
         step_time = time.time() - step_start
         logger.info(f"[Timing] Manager._generate_step_by_step_plan execution time: {step_time:.2f} seconds")

gui_agents/agents/stream_manager.py ADDED Viewed

@@ -0,0 +1,163 @@
+"""
+Stream manager for per-task progress messaging.
+This module provides a global `stream_manager` singleton that manages
+async message queues for task-based streaming. The singleton is async-safe
+and should have its event loop configured via `set_loop()` during application
+startup.
+"""
+import asyncio
+from google.protobuf.timestamp_pb2 import Timestamp
+from typing import Dict, Optional, AsyncGenerator
+from dataclasses import dataclass
+import logging
+logger = logging.getLogger(__name__)
+@dataclass
+class StreamMessage:
+    stage: str
+    message: str
+    timestamp: Timestamp
+class StreamManager:
+    """
+    Manages in-memory async message queues for each task to stream progress.
+    This class is async-safe.
+    """
+    def __init__(self, max_queue_size: int = 100):
+        """
+        Initialize a StreamManager that manages per-task in-memory async message queues.
+        Parameters:
+            max_queue_size (int): Maximum number of messages to keep per task queue; when a queue is full the oldest message will be dropped to make room for new messages.
+        """
+        self.task_queues: Dict[str, asyncio.Queue[Optional[StreamMessage]]] = {}
+        self.max_queue_size = max_queue_size
+        self._lock = asyncio.Lock()
+        self.loop: Optional[asyncio.AbstractEventLoop] = None
+    def set_loop(self, loop: asyncio.AbstractEventLoop):
+        """
+        Store the event loop used to schedule coroutines from non-async threads.
+        Parameters:
+            loop (asyncio.AbstractEventLoop): Event loop passed to asyncio.run_coroutine_threadsafe for thread-safe coroutine execution.
+        """
+        self.loop = loop
+    def add_message_threadsafe(self, task_id: str, stage: str, message: str):
+        """
+        Enqueue a progress message for a task from a non-async thread in a thread-safe manner.
+        If the manager's event loop has not been set, an error is logged and the message is not scheduled.
+        Parameters:
+            task_id (str): Identifier of the task to receive the message.
+            stage (str): Stage label for the progress update.
+            message (str): Text of the progress message.
+        """
+        if not self.loop:
+            logger.error("StreamManager event loop not set. Cannot send message from thread.")
+            return
+        asyncio.run_coroutine_threadsafe(
+            self.add_message(task_id, stage, message),
+            self.loop
+        )
+    async def add_message(self, task_id: str, stage: str, message: str):
+        """
+        Enqueues a progress message for the given task; if the task's queue is full, drops the oldest message to make room.
+        Parameters:
+            task_id (str): Identifier of the task whose queue will receive the message.
+            stage (str): Short stage name or label for the message.
+            message (str): Human-readable progress message.
+        """
+        async with self._lock:
+            q = self.task_queues.get(task_id)
+        if q:
+            timestamp = Timestamp()
+            timestamp.GetCurrentTime()
+            msg = StreamMessage(stage=stage, message=message, timestamp=timestamp)
+            try:
+                q.put_nowait(msg)
+            except asyncio.QueueFull:
+                logger.warning(f"Message queue for task {task_id} is full. Dropping oldest message.")
+                # Drop the oldest message to make space for the new one
+                q.get_nowait()
+                q.put_nowait(msg)
+        else:
+            logger.warning(f"No message queue found for task {task_id}. Message not added.")
+    async def get_message_stream(self, task_id: str) -> AsyncGenerator[StreamMessage, None]:
+        """
+        Provide an async generator that yields progress messages for the given task.
+        If the task has no existing queue, one is created and registered. The generator yields StreamMessage objects produced for the task and terminates when a sentinel `None` is received, signaling end of stream.
+        Parameters:
+            task_id (str): Identifier of the task whose message stream to consume.
+        Returns:
+            AsyncGenerator[StreamMessage, None]: An async generator yielding `StreamMessage` instances for the task; iteration ends when a sentinel `None` is encountered.
+        """
+        async with self._lock:
+            if task_id not in self.task_queues:
+                self.task_queues[task_id] = asyncio.Queue(maxsize=self.max_queue_size)
+                logger.info(f"Registered message queue for task {task_id} in get_message_stream.")
+            q = self.task_queues[task_id]
+        while True:
+            message = await q.get()
+            if message is None:  # Sentinel value indicates end of stream
+                logger.info(f"End of stream for task {task_id}")
+                break
+            yield message
+    async def register_task(self, task_id: str):
+        """
+        Create a per-task message queue if one does not already exist.
+        This is idempotent: if a queue for the given task_id already exists, the call has no effect. The created queue uses the manager's configured max_queue_size and the operation is safe to call concurrently.
+        Parameters:
+            task_id (str): Unique identifier of the task to register a message queue for.
+        """
+        async with self._lock:
+            if task_id not in self.task_queues:
+                self.task_queues[task_id] = asyncio.Queue(maxsize=self.max_queue_size)
+                logger.info(f"Registered message queue for task {task_id}")
+    async def unregister_task(self, task_id: str):
+        """Removes a task's message queue and signals end of stream."""
+        q = None
+        async with self._lock:
+            if task_id in self.task_queues:
+                q = self.task_queues.pop(task_id)
+                logger.info(f"Unregistered message queue for task {task_id}")
+        if q:
+            try:
+                # Put a sentinel value to unblock any consumers
+                q.put_nowait(None)
+            except asyncio.QueueFull:
+                # If full, make space for sentinel
+                try:
+                    q.get_nowait()
+                except asyncio.QueueEmpty:
+                    pass
+                # Retry put after making space or if queue became empty
+                try:
+                    q.put_nowait(None)
+                except asyncio.QueueFull:
+                    logger.error(f"Could not send sentinel for task {task_id}: queue still full after retry")
+# Global instance to be used across the application
+stream_manager = StreamManager()

gui_agents/agents/worker.py CHANGED Viewed

@@ -35,24 +35,17 @@ class Worker:
         tools_config: Dict = {},
     ):
         """
-        Worker receives a subtask list and active subtask and generates the next action for the to execute.
-        Args:
-            engine_params: Dict
-                Parameters for the multimodal engine
-            local_kb_path: str
-                Path to knowledge base
-            platform: str
-                OS platform the agent runs on (darwin, linux, windows)
-            enable_reflection: bool
-                Whether to enable reflection
-            use_subtask_experience: bool
-                Whether to use subtask experience
-            enable_takeover: bool
-                Whether to enable user takeover functionality
-            enable_search: bool
-                Global switch for search functionality (overrides config)
-            tools_config: Dict
-                Complete tools configuration from tools_config.json
+        Initialize a Worker that generates executor actions using the provided tools, local knowledge base, and optional reflection, episodic experience, takeover, and search features.
+        Parameters:
+            Tools_dict (Dict): Mapping of tool names to tool instances/configurations used by the Worker.
+            local_kb_path (str): Filesystem path to the local knowledge base to use for retrieval.
+            platform (str): Operating system identifier the agent runs on (e.g., 'darwin', 'linux', 'windows').
+            enable_reflection (bool): If True, enable trajectory reflection generation and use its output when producing actions.
+            use_subtask_experience (bool): If True, attempt to retrieve and incorporate episodic/subtask experience on the first turn.
+            enable_takeover (bool): If True, use the takeover-capable action generator tool when producing actions.
+            enable_search (bool): Global switch that forces search-enabled tools to run with search disabled when False.
+            tools_config (Dict): Tools configuration mapping; if None, the Worker loads tools_config.json from the package tools directory.
         """
         # super().__init__(engine_params, platform)
         self.platform = platform
@@ -72,11 +65,6 @@ class Worker:
         else:
             self.tools_config = tools_config
-        self.embedding_engine = Tools()
-        self.embedding_engine.register_tool(
-            "embedding", self.Tools_dict["embedding"]["provider"],
-            self.Tools_dict["embedding"]["model"])
         self.enable_reflection = enable_reflection
         self.use_subtask_experience = use_subtask_experience
         self.global_state: GlobalState = Registry.get(
@@ -85,6 +73,31 @@ class Worker:
     def reset(self):
+        """
+        Initialize the worker's tool agents, knowledge base, and internal state for a new task session.
+        This method registers the action generator (with optional takeover variant), trajectory reflector, and embedding engine using a local helper that merges tool configuration with any overrides and propagates authentication parameters; it initializes the KnowledgeBase with the embedding engine and toolkit, configures search-related parameters for the action generator according to global and per-tool settings, and resets runtime state fields (turn count, histories, reflections, cost tracking, screenshot inputs, planner history, latest action, trajectory length limit, and task_id).
+        """
+        def _register(tools_instance, tool_name, **override_kwargs):
+            config = self.Tools_dict.get(tool_name, {}).copy()
+            provider = config.pop("provider", None)
+            model = config.pop("model", None)
+            # Merge with any explicit overrides
+            config.update(override_kwargs)
+            auth_params = {}
+            auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
+            for key in auth_keys:
+                if key in config:
+                    auth_params[key] = config[key]
+                    logger.info(f"Worker._register: Setting {key} for tool '{tool_name}'")
+            all_params = {**config, **auth_params}
+            logger.info(f"Worker._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
+            tools_instance.register_tool(tool_name, provider, model, **all_params)
         self.generator_agent = Tools()
         self.action_generator_tool = "action_generator_with_takeover" if self.enable_takeover else "action_generator"
@@ -121,20 +134,14 @@ class Worker:
                 )
         # Register the tool with parameters
-        self.generator_agent.register_tool(
-            self.action_generator_tool,
-            self.Tools_dict[self.action_generator_tool]["provider"],
-            self.Tools_dict[self.action_generator_tool]["model"], **tool_params)
+        _register(self.generator_agent, self.action_generator_tool, **tool_params)
         self.reflection_agent = Tools()
-        self.reflection_agent.register_tool(
-            "traj_reflector", self.Tools_dict["traj_reflector"]["provider"],
-            self.Tools_dict["traj_reflector"]["model"])
+        _register(self.reflection_agent, "traj_reflector")
         self.embedding_engine = Tools()
-        self.embedding_engine.register_tool(
-            "embedding", self.Tools_dict["embedding"]["provider"],
-            self.Tools_dict["embedding"]["model"])
+        _register(self.embedding_engine, "embedding")
         self.knowledge_base = KnowledgeBase(
             embedding_engine=self.embedding_engine,
             Tools_dict=self.Tools_dict,
@@ -150,6 +157,7 @@ class Worker:
         self.planner_history = []
         self.latest_action = None
         self.max_trajector_length = 8
+        self.task_id = None  # Will be set by agent
     def generate_next_action(
         self,
@@ -163,7 +171,24 @@ class Worker:
         running_state: str = "running",
     ) -> Dict:
         """
-        Predict the next action(s) based on the current observation.
+        Generate the next executor action plan and related metadata for the current subtask given the observation and context.
+        Parameters:
+            Tu (str): Full task description or task context.
+            search_query (str): Search string used for retrieving episodic/subtask experience.
+            subtask (str): Current subtask instruction/description to complete.
+            subtask_info (str): Additional information or constraints for the current subtask.
+            future_tasks (List[Node]): List of upcoming task nodes (used for context in planning).
+            done_task (List[Node]): List of completed task nodes.
+            obs (Dict): Current observation dictionary; must include a "screenshot" key with the current screen image.
+            running_state (str): Current executor running state (default "running").
+        Returns:
+            Dict: Executor information containing:
+                - "current_subtask" (str): The provided subtask.
+                - "current_subtask_info" (str): The provided subtask_info.
+                - "executor_plan" (str): The raw plan produced by the action generator.
+                - "reflection" (str|None): Reflection text produced by the trajectory reflector, or None if reflection is disabled.
         """
         import time
         action_start = time.time()
@@ -351,4 +376,4 @@ class Worker:
         # Cut off extra grounded actions
         res = res[:res.find("(Grounded Action)")]
         res += f"(Grounded Action)\n```python\n{action}\n```\n"
-        return res
+        return res

gui_agents/cli_app.py CHANGED Viewed

@@ -1,15 +1,14 @@
 import argparse
-import datetime
-import io
 import logging
 import os
 import platform
 import sys
-import time
 import datetime
 from pathlib import Path
 from dotenv import load_dotenv
+from gui_agents.agents.Backend.LybicBackend import LybicBackend
 env_path = Path(os.path.dirname(os.path.abspath(__file__))) / '.env'
 if env_path.exists():
     load_dotenv(dotenv_path=env_path)
@@ -260,6 +259,18 @@ def scale_screenshot_dimensions(screenshot: Image.Image, hwi_para: HardwareInter
     return screenshot
 def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_steps: int = 50, enable_takeover: bool = False):
+    """
+    Run an agent in normal mode to iteratively observe, plan, and execute actions for a given instruction.
+    Runs up to `max_steps` iterations: captures screenshots, obtains observations, asks the agent for a plan, executes hardware actions, and updates trajectory and memories until the agent signals completion or failure. The function also supports pausing for user takeover and performs post-run timing logging and automatic analysis.
+    Parameters:
+        agent: The agent instance used to generate plans and reflections (expects an object exposing `predict`, `update_episodic_memory`, and `update_narrative_memory`).
+        instruction (str): The high-level task description provided to the agent.
+        hwi_para (HardwareInterface): Hardware interface used to capture screenshots and dispatch actions.
+        max_steps (int): Maximum number of agent prediction/execute cycles to run.
+        enable_takeover (bool): If True, the agent may request a user takeover that pauses execution until the user resumes.
+    """
     import time
     obs = {}
     traj = "Task:\n" + instruction
@@ -302,7 +313,7 @@ def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_s
                 os.system(
                     f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent" buttons "OK" default button "OK"\''
                 )
-            elif platform.system() == "Linux":
+            elif platform.system() == "Linux" and not (hwi_para.backend== "lybic" or isinstance(hwi_para.backend, LybicBackend)):
                 os.system(
                     f'zenity --info --title="OpenACI Agent" --text="Task Completed" --width=200 --height=100'
                 )
@@ -434,7 +445,7 @@ def run_agent_fast(agent,
                 os.system(
                     f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent (Fast)" buttons "OK" default button "OK"\''
                 )
-            elif platform.system() == "Linux":
+            elif platform.system() == "Linux" and not (hwi_para.backend== "lybic" or isinstance(hwi_para.backend, LybicBackend)):
                 os.system(
                     f'zenity --info --title="OpenACI Agent (Fast)" --text="Task Completed" --width=200 --height=100'
                 )

lybic-guiagents 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

Potentially problematic release.

lybic-guiagents 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl