PyPI - lybic-guiagents - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

lybic-guiagents 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (25) hide show

gui_agents/__init__.py +1 -1
gui_agents/agents/Backend/LybicBackend.py +25 -19
gui_agents/agents/agent_s.py +292 -97
gui_agents/agents/grounding.py +43 -6
gui_agents/agents/manager.py +113 -18
gui_agents/agents/stream_manager.py +163 -0
gui_agents/agents/worker.py +60 -35
gui_agents/cli_app.py +16 -5
gui_agents/core/knowledge.py +36 -5
gui_agents/grpc_app.py +784 -0
gui_agents/proto/__init__.py +3 -0
gui_agents/proto/pb/__init__.py +4 -0
gui_agents/tools/model.md +351 -0
gui_agents/tools/tools.py +80 -39
gui_agents/tools/tools_config.json +101 -0
gui_agents/tools/tools_config_cn.json +101 -0
gui_agents/tools/tools_config_en.json +101 -0
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/METADATA +86 -8
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/RECORD +23 -16
lybic_guiagents-0.3.0.dist-info/entry_points.txt +3 -0
gui_agents/lybic_client/__init__.py +0 -0
gui_agents/lybic_client/lybic_client.py +0 -88
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/WHEEL +0 -0
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/licenses/LICENSE +0 -0
{lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/top_level.txt +0 -0

gui_agents/agents/agent_s.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import asyncio
 import json
 import logging
 import os
@@ -19,9 +20,32 @@ from gui_agents.utils.common_utils import (
     agent_log_to_string,
 )
 from gui_agents.tools.tools import Tools
+from gui_agents.agents.stream_manager import stream_manager
 logger = logging.getLogger("desktopenv.agent")
+def load_config():
+    """
+    Load tool configurations from the repository's tools/tools_config.json and produce a mapping keyed by tool name.
+    Returns:
+        tuple: (tools_config, tools_dict) where `tools_config` is the parsed JSON object from tools_config.json, and `tools_dict` is a dict mapping each tool's `tool_name` to a dict with `provider` and `model`.
+    """
+    # Load tools configuration from tools_config.json
+    tools_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tools", "tools_config.json")
+    with open(tools_config_path, "r") as f:
+        tools_config = json.load(f)
+        print(f"Loaded tools configuration from: {tools_config_path}")
+        tools_dict = {}
+        for tool in tools_config["tools"]:
+            tool_name = tool["tool_name"]
+            tools_dict[tool_name] = {
+                "provider": tool["provider"],
+                "model": tool["model_name"]
+            }
+        print(f"Tools configuration: {tools_dict}")
+        return tools_config,tools_dict
 class UIAgent:
     """Base class for UI automation agents"""
@@ -37,18 +61,28 @@ class UIAgent:
         self.platform = platform
     def reset(self) -> None:
-        """Reset agent state"""
+        """
+        Reset the agent to its initial internal state.
+        Performs any subclass-specific reinitialization needed so the agent is ready to start a new task or episode.
+        """
         pass
-    def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]|None:
-        """Generate next action prediction
+    def _send_stream_message(self, task_id: str, stage: str, message: str) -> None:
+        """
+        Safely send stream message to task stream.
+        """
+        if not task_id:
+            return
-        Args:
-            instruction: Natural language instruction
-            observation: Current UI state observation
+        stream_manager.add_message_threadsafe(task_id, stage, message)
+    def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]|None:
+        """
+        Produce the next agent information and action sequence for the given instruction and current observation.
         Returns:
-            Tuple containing agent info dictionary and list of actions
+            (info, actions) where `info` is a dictionary containing planner, executor and evaluator metadata (including subtask metadata and statuses) and `actions` is a list of action strings to execute; returns `None` if no prediction is available.
         """
         pass
@@ -84,16 +118,15 @@ class AgentS2(UIAgent):
         kb_release_tag: str = "v0.2.2",
         enable_takeover: bool = False,
         enable_search: bool = True,
+        tools_config: dict | None = None,
     ):
-        """Initialize AgentS2
-        Args:
-            platform: Operating system platform (darwin, linux, windows)
-            memory_root_path: Path to memory directory. Defaults to current working directory.
-            memory_folder_name: Name of memory folder. Defaults to "kb_s2".
-            kb_release_tag: Release tag for knowledge base. Defaults to "v0.2.2".
-            enable_takeover: Whether to enable user takeover functionality. Defaults to False.
-            enable_search: Whether to enable web search functionality. Defaults to True.
+        """
+        Initialize an AgentS2 instance and prepare its tools and local knowledge base.
+        If `tools_config` is provided, build `Tools_dict` mapping each `tool_name` to its config (renaming `model_name` to `model` and removing `tool_name`). If `tools_config` is not provided, load configuration via `load_config()`. Ensure a platform-specific knowledge base directory exists under `memory_root_path/memory_folder_name` (creating it if missing). Sets initial attributes (platform, screen_size, memory paths, flags) and initializes internal state via `reset()`.
+        Parameters:
+            tools_config (dict | None): Optional pre-loaded tools configuration; when present it is transformed into `Tools_dict`. Omit to load configuration from disk.
         """
         super().__init__(
             platform,
@@ -105,20 +138,24 @@ class AgentS2(UIAgent):
         self.screen_size = screen_size
         self.enable_takeover = enable_takeover
         self.enable_search = enable_search
+        self.task_id = None  # Will be set when task starts
-        # Load tools configuration from tools_config.json
-        tools_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tools", "tools_config.json")
-        with open(tools_config_path, "r") as f:
-            self.tools_config = json.load(f)
-            print(f"Loaded tools configuration from: {tools_config_path}")
+        if tools_config is not None:
+            self.tools_config = tools_config
+            # Create the dictionary mapping from the list-based config
             self.Tools_dict = {}
             for tool in self.tools_config["tools"]:
                 tool_name = tool["tool_name"]
-                self.Tools_dict[tool_name] = {
-                    "provider": tool["provider"],
-                    "model": tool["model_name"]
-                }
-            print(f"Tools configuration: {self.Tools_dict}")
+                # Create a copy of the tool's config to avoid modifying the original
+                config_copy = tool.copy()
+                # Rename 'model_name' to 'model' for consistency in downstream use
+                if 'model_name' in config_copy:
+                    config_copy['model'] = config_copy.pop('model_name')
+                # Remove tool_name as it's now the key
+                config_copy.pop('tool_name', None)
+                self.Tools_dict[tool_name] = config_copy
+        else:
+            self.tools_config, self.Tools_dict = load_config()
         # Initialize agent's knowledge base path
         self.local_kb_path = os.path.join(
@@ -138,16 +175,22 @@ class AgentS2(UIAgent):
         self.reset()
     def reset(self) -> None:
-        """Reset agent state and initialize components"""
-        # Initialize core components
+        """
+        Reinitialize core components and reset the agent's runtime state.
+        Recreates the Manager, Worker, and Grounding components using the agent's current configuration,
+        resets planning/execution flags and counters, clears subtask-related state, reloads the shared
+        global state from the registry, and propagates the agent's task_id to the components when present.
+        """
+        # Initialize core components
         self.manager = Manager(
             Tools_dict=self.Tools_dict,
             local_kb_path=self.local_kb_path,
             platform=self.platform,
             enable_search=self.enable_search,  # Pass global switch to Manager
         )
         self.worker = Worker(
             Tools_dict=self.Tools_dict,
             local_kb_path=self.local_kb_path,
@@ -178,6 +221,25 @@ class AgentS2(UIAgent):
         self.subtask_status: str = "Start"
         self.global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
+        # Pass task_id to components
+        if self.task_id:
+            self.manager.task_id = self.task_id
+            self.worker.task_id = self.task_id
+    def set_task_id(self, task_id: str) -> None:
+        """
+        Set the task identifier and propagate it to internal components used for streaming.
+        Parameters:
+            task_id (str): Identifier for the current task; assigned to this agent and, if present, to its manager and worker so stream messages are tagged consistently.
+        """
+        self.task_id = task_id
+        # Also set task_id for components if they exist
+        if hasattr(self, 'manager') and self.manager:
+            self.manager.task_id = task_id
+        if hasattr(self, 'worker') and self.worker:
+            self.worker.task_id = task_id
     def reset_executor_state(self) -> None:
         """Reset executor and step counter"""
         self.worker.reset()
@@ -185,6 +247,19 @@ class AgentS2(UIAgent):
     def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
         # Initialize the three info dictionaries
+        """
+        Produce the next executor actions and diagnostic information for the current task step.
+        This method coordinates planning, subtask selection, action generation, grounding (code extraction and execution), and status updates. It may trigger replanning, advance to the next subtask, mark subtasks as completed or failed, and emit stream messages and logs. The returned info merges planner, executor, and evaluator metadata and includes current subtask details.
+        Parameters:
+            instruction (str): The user or system instruction describing the task to accomplish; forwarded to the manager/worker as the task utterance.
+            observation (Dict): Current environment observation/state used for grounding and coordinate assignment.
+        Returns:
+            info (Dict): A merged dictionary containing planner_info, executor_info, evaluator_info and the keys `subtask`, `subtask_info`, and `subtask_status`.
+            actions (List[Dict]): List of action dictionaries produced for execution (may include actions with type "DONE", failure indicators, or other executor-generated actions).
+        """
         planner_info = {}
         executor_info = {}
         evaluator_info = {
@@ -209,6 +284,10 @@ class AgentS2(UIAgent):
             # If replan is true, generate a new plan. True at start, after a failed plan, or after subtask completion
             if self.requires_replan:
                 logger.info("(RE)PLANNING...")
+                # Stream planning start message
+                self._send_stream_message(self.task_id, "planning", f"Start planning task steps (Step {self.step_count + 1})...")
                 Manager_info, self.subtasks = self.manager.get_action_queue(
                     Tu=self.global_state.get_Tu(),
                     observation=self.global_state.get_obs_for_manager(),
@@ -224,6 +303,9 @@ class AgentS2(UIAgent):
                     self.search_query = Manager_info["search_query"]
                 else:
                     self.search_query = ""
+                # Stream planning completion message
+                self._send_stream_message(self.task_id, "planning", f"Planning completed, {len(self.subtasks)} subtasks generated")
             get_action_queue_time = time.time() - manager_start
             logger.info(f"[Timing] manager.get_action_queue execution time: {get_action_queue_time:.2f} seconds")
             self.global_state.log_operation(
@@ -253,8 +335,10 @@ class AgentS2(UIAgent):
                         "reflection": "agent.done()",
                     }
                     actions = [{"type": "DONE"}]
-                    # 记录任务完成
+                    # Stream task completion message
+                    self._send_stream_message(self.task_id, "completion", "🎉 Mission Completed! All subtasks have been successfully executed")
                     self.global_state.log_operation(
                         module="agent",
                         operation="task_complete",
@@ -272,40 +356,58 @@ class AgentS2(UIAgent):
                 logger.info(f"REMAINING SUBTASKS FROM GLOBAL STATE: {self.global_state.get_remaining_subtasks()}")
                 self.needs_next_subtask = False
                 self.subtask_status = "Start"
+                # Stream current subtask message
+                if self.current_subtask is not None:
+                    self._send_stream_message(self.task_id, "subtask", f"Start executing subtasks: {self.current_subtask.name}")
+                else:
+                    self._send_stream_message(self.task_id, "subtask", "Start executing a new subtask")
                 self.global_state.log_operation(
                     module="agent",
                     operation="current_subtask",
                     data={
-                        "content": str(self.current_subtask),
+                        "content": str(self.current_subtask) if self.current_subtask is not None else "No active subtask",
                         "status": "start"
                     }
                 )
             worker_start_time = time.time()
+            # Stream action generation start message
+            self._send_stream_message(self.task_id, "thinking", "Generating execution actions...")
             # get the next action from the worker
+            # Handle case where current_subtask might be None
+            subtask_name = self.current_subtask.name if self.current_subtask is not None else "No active subtask"
+            subtask_info = self.current_subtask.info if self.current_subtask is not None else ""
             executor_info = self.worker.generate_next_action(
                 Tu=instruction,
                 search_query=self.search_query,
-                subtask=self.current_subtask.name, # type: ignore
-                subtask_info=self.current_subtask.info, # type: ignore
+                subtask=subtask_name,
+                subtask_info=subtask_info,
                 future_tasks=self.global_state.get_remaining_subtasks(),
                 done_task=self.global_state.get_completed_subtasks(),
                 obs=self.global_state.get_obs_for_manager(),
             )
             worker_execution_time = time.time() - worker_start_time
             self.global_state.log_operation(
                 module="agent",
                 operation="worker_execution",
                 data={
                     "duration": worker_execution_time,
-                    "subtask": self.current_subtask.name # type: ignore
+                    "subtask": self.current_subtask.name if self.current_subtask is not None else "No active subtask" # type: ignore
                 }
             )
+            # Stream action plan message
+            if self.task_id and "executor_plan" in executor_info:
+                plan_preview = executor_info["executor_plan"][:100] + "..." if len(executor_info["executor_plan"]) > 100 else executor_info["executor_plan"]
+                self._send_stream_message(self.task_id, "action_plan", f"Generate an execution plan: {plan_preview}")
             try:
                 grounding_start_time = time.time()
                 current_width, current_height = self.global_state.get_screen_size()
@@ -345,6 +447,11 @@ class AgentS2(UIAgent):
             actions = [exec_code]
+            # Stream action execution message
+            if actions:
+                action_type = actions[0].get("type", "unknown")
+                self._send_stream_message(self.task_id, "action", f"Execute an action: {action_type}")
             self.step_count += 1
             # set the should_send_action flag to True if the executor returns an action
@@ -356,15 +463,22 @@ class AgentS2(UIAgent):
                 self.needs_next_subtask = True
                 # assign the failed subtask
-                self.global_state.add_failed_subtask(self.current_subtask) # type: ignore
+                if self.current_subtask is not None:
+                    self.global_state.add_failed_subtask(self.current_subtask) # type: ignore
                 self.failure_subtask = self.global_state.get_latest_failed_subtask()
+                # Stream failure message
+                if self.current_subtask is not None:
+                    self._send_stream_message(self.task_id, "error", f"Subtask execution failed: {self.current_subtask.name}, will re-plan")
+                else:
+                    self._send_stream_message(self.task_id, "error", "Subtask execution failed and will be re-planned")
                 # 记录失败的子任务
                 self.global_state.log_operation(
                     module="agent",
                     operation="subtask_failed",
                     data={
-                        "content": str(self.current_subtask),
+                        "content": str(self.current_subtask) if self.current_subtask is not None else "Unknown subtask",
                         "status": "failed"
                     }
                 )
@@ -381,14 +495,22 @@ class AgentS2(UIAgent):
                 self.requires_replan = True
                 self.needs_next_subtask = True
                 self.failure_subtask = None
-                self.global_state.add_completed_subtask(self.current_subtask) # type: ignore
+                # add completed subtask only if it exists
+                if self.current_subtask is not None:
+                    self.global_state.add_completed_subtask(self.current_subtask) # type: ignore
+                # Stream subtask completion message
+                if self.current_subtask is not None:
+                    self._send_stream_message(self.task_id, "subtask_complete", f"✅ Subtask completed: {self.current_subtask.name}")
+                else:
+                    self._send_stream_message(self.task_id, "subtask_complete", "✅ Subtask completed")
                 # 记录完成的子任务
                 self.global_state.log_operation(
                     module="agent",
                     operation="subtask_completed",
                     data={
-                        "content": str(self.current_subtask),
+                        "content": str(self.current_subtask) if self.current_subtask is not None else "Unknown subtask",
                         "status": "completed"
                     }
                 )
@@ -414,13 +536,24 @@ class AgentS2(UIAgent):
                 for k, v in d.items()
             }
         }
-        info.update(
-            {
-                "subtask": self.current_subtask.name, # type: ignore
-                "subtask_info": self.current_subtask.info, # type: ignore
-                "subtask_status": self.subtask_status,
-            }
-        )
+        # Handle case where current_subtask might be None
+        if self.current_subtask is not None:
+            info.update(
+                {
+                    "subtask": self.current_subtask.name, # type: ignore
+                    "subtask_info": self.current_subtask.info, # type: ignore
+                    "subtask_status": self.subtask_status,
+                }
+            )
+        else:
+            # Handle None case - provide default values
+            info.update(
+                {
+                    "subtask": "No active subtask",
+                    "subtask_info": "",
+                    "subtask_status": "no_subtask",
+                }
+            )
         # 记录predict函数总执行时间
         predict_total_time = time.time() - predict_start_time
@@ -538,18 +671,23 @@ class AgentSFast(UIAgent):
         enable_takeover: bool = False,
         enable_search: bool = True,
         enable_reflection: bool = True,
+        tools_config: dict | None = None,
         # enable_reflection: bool = False,
     ):
-        """Initialize AgentSFast
-        Args:
-            platform: Operating system platform (darwin, linux, windows)
-            memory_root_path: Path to memory directory. Defaults to current working directory.
-            memory_folder_name: Name of memory folder. Defaults to "kb_s2".
-            kb_release_tag: Release tag for knowledge base. Defaults to "v0.2.2".
-            enable_takeover: Whether to enable user takeover functionality. Defaults to False.
-            enable_search: Whether to enable web search functionality. Defaults to True.
-            enable_reflection: Whether to enable reflection functionality. Defaults to True.
+        """
+        Create and initialize an AgentSFast instance, configuring tools, memory paths, and optional features.
+        Parameters:
+            platform (str): Operating system platform identifier (e.g., "darwin", "linux", "windows"); used to scope platform-specific knowledge base.
+            screen_size (List[int]): Screen width and height used for grounding calculations.
+            memory_root_path (str): Root directory for agent memory storage.
+            memory_folder_name (str): Subfolder name under memory_root_path for this agent's knowledge base.
+            kb_release_tag (str): Knowledge base release tag used for bookkeeping or compatibility.
+            enable_takeover (bool): If True, enable user takeover capabilities in the fast action generator.
+            enable_search (bool): If True, enable web/search-related features when registering tools.
+            enable_reflection (bool): If True, enable trajectory reflection and a reflection agent to summarize agent behavior.
+            tools_config (dict | None): Optional pre-loaded tools configuration; if omitted, configuration is loaded from disk.
         """
         super().__init__(
             platform,
@@ -562,20 +700,24 @@ class AgentSFast(UIAgent):
         self.enable_takeover = enable_takeover
         self.enable_search = enable_search
         self.enable_reflection = enable_reflection
+        self.task_id = None  # Will be set when task starts
-        # Load tools configuration from tools_config.json
-        tools_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tools", "tools_config.json")
-        with open(tools_config_path, "r") as f:
-            self.tools_config = json.load(f)
-            print(f"Loaded tools configuration from: {tools_config_path}")
+        if tools_config is not None:
+            self.tools_config = tools_config
+            # Create the dictionary mapping from the list-based config
             self.Tools_dict = {}
             for tool in self.tools_config["tools"]:
                 tool_name = tool["tool_name"]
-                self.Tools_dict[tool_name] = {
-                    "provider": tool["provider"],
-                    "model": tool["model_name"]
-                }
-            print(f"Tools configuration: {self.Tools_dict}")
+                # Create a copy of the tool's config to avoid modifying the original
+                config_copy = tool.copy()
+                # Rename 'model_name' to 'model' for consistency in downstream use
+                if 'model_name' in config_copy:
+                    config_copy['model'] = config_copy.pop('model_name')
+                # Remove tool_name as it's now the key
+                config_copy.pop('tool_name', None)
+                self.Tools_dict[tool_name] = config_copy
+        else:
+            self.tools_config, self.Tools_dict = load_config()
         # Initialize agent's knowledge base path
         self.local_kb_path = os.path.join(
@@ -594,21 +736,25 @@ class AgentSFast(UIAgent):
         self.reset()
     def reset(self) -> None:
-        """Reset agent state and initialize components"""
+        """
+        Reinitialize the fast-agent components and reset internal runtime state.
+        Initializes and registers the fast action generator tool (and traj_reflector if reflection is enabled), configures search/auth parameters from tool configuration, creates or updates the grounding subsystem with resolved grounding dimensions, resets counters and runtime references (step_count, turn_count, latest_action, global_state), and propagates the current task_id to any registered tools.
+        """
         # Initialize the fast action generator tool
         self.fast_action_generator = Tools()
         self.fast_action_generator_tool = "fast_action_generator_with_takeover" if self.enable_takeover else "fast_action_generator"
         # Get tool configuration from tools_config
         tool_config = None
         for tool in self.tools_config["tools"]:
             if tool["tool_name"] == self.fast_action_generator_tool:
                 tool_config = tool
                 break
         # Prepare tool parameters
         tool_params = {}
         # First check global search switch
         if not self.enable_search:
             # If global search is disabled, force disable search for this tool
@@ -622,15 +768,28 @@ class AgentSFast(UIAgent):
                 tool_params["enable_search"] = enable_search
                 tool_params["search_provider"] = tool_config.get("search_provider", "bocha")
                 tool_params["search_model"] = tool_config.get("search_model", "")
                 logger.info(f"Configuring {self.fast_action_generator_tool} with search enabled: {enable_search} (from config)")
-        # Register the tool with parameters
+        # Get base config from Tools_dict
+        tool_config = self.Tools_dict[self.fast_action_generator_tool].copy()
+        provider = tool_config.get("provider")
+        model = tool_config.get("model")
+        # Merge with search-related parameters
+        all_params = {**tool_config, **tool_params}
+        auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
+        for key in auth_keys:
+            if key in all_params:
+                logger.info(f"AgentSFast.reset: Setting {key} for fast_action_generator_tool")
+        # Register the tool with all parameters
         self.fast_action_generator.register_tool(
-            self.fast_action_generator_tool,
-            self.Tools_dict[self.fast_action_generator_tool]["provider"],
-            self.Tools_dict[self.fast_action_generator_tool]["model"],
-            **tool_params
+            self.fast_action_generator_tool,
+            provider,
+            model,
+            **all_params
         )
         if self.enable_reflection:
@@ -660,15 +819,38 @@ class AgentSFast(UIAgent):
         self.global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
         self.latest_action = None
-    def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
-        """Generate next action prediction using only the fast_action_generator tool
+        # Pass task_id to tools if available
+        if self.task_id:
+            self.fast_action_generator.task_id = self.task_id
+            if self.enable_reflection and hasattr(self, 'reflection_agent'):
+                self.reflection_agent.task_id = self.task_id
-        Args:
-            instruction: Natural language instruction
-            observation: Current UI state observation
+    def set_task_id(self, task_id: str) -> None:
+        """
+        Store the task identifier on the agent and propagate it to subcomponents that use it.
+        Parameters:
+            task_id (str): Identifier for the active task; assigned to this agent and, if present, to
+                `fast_action_generator` and `reflection_agent`.
+        """
+        self.task_id = task_id
+        # Also set task_id for components if they exist
+        if hasattr(self, 'fast_action_generator') and self.fast_action_generator:
+            self.fast_action_generator.task_id = task_id
+        if hasattr(self, 'reflection_agent') and self.reflection_agent:
+            self.reflection_agent.task_id = task_id
+    def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
+        """
+        Generate the next executor plan and corresponding actions using the configured fast action generator.
+        Parameters:
+        	instruction (str): Natural language task description.
+        	observation (Dict): Current UI state; must include a "screenshot" entry with the screen image.
         Returns:
-            Tuple containing agent info dictionary and list of actions
+        	executor_info (dict): Contains at least the keys `executor_plan` (raw plan text), `reflection` (reflection text or empty string), and `plan_code` (the latest extracted/used action code).
+        	actions (List[dict]): List of action dictionaries produced by grounding execution; typically a single action dict describing the operation to perform.
         """
         import time
         predict_start_time = time.time()
@@ -725,11 +907,14 @@ class AgentSFast(UIAgent):
         generator_message = textwrap.dedent(f"""
             Task Description: {instruction}
         """)
         generator_message += f"\n\nPlease refer to the agent log to understand the progress and context of the task so far.\n{agent_log}"
         fast_action_start_time = time.time()
+        # Stream action generation start message
+        self._send_stream_message(self.task_id, "thinking", "Generating execution actions quickly...")
         plan, total_tokens, cost_string = self.fast_action_generator.execute_tool(
             self.fast_action_generator_tool,
             {
@@ -738,9 +923,9 @@ class AgentSFast(UIAgent):
             }
         )
         self.fast_action_generator.reset(self.fast_action_generator_tool)
         fast_action_execution_time = time.time() - fast_action_start_time
         self.global_state.log_operation(
             module="agent",
             operation="fast_action_execution",
@@ -750,7 +935,12 @@ class AgentSFast(UIAgent):
                 "cost": cost_string
             }
         )
+        # Stream action plan message
+        if self.task_id:
+            plan_preview = plan[:100] + "..." if len(plan) > 100 else plan
+            self._send_stream_message(self.task_id, "action_plan", f"Quickly generate execution plans: {plan_preview}")
         logger.info("Fast Action Plan: %s", plan)
         current_width, current_height = self.global_state.get_screen_size()
@@ -809,13 +999,18 @@ class AgentSFast(UIAgent):
         self.step_count += 1
         self.turn_count += 1
+        # Stream action execution message
+        if actions:
+            action_type = actions[0].get("type", "unknown")
+            self._send_stream_message(self.task_id, "action", f"Execute an action: {action_type}")
         executor_info = {
             "executor_plan": plan,
             "reflection": reflection or "",
             "plan_code": self.latest_action
         }
         predict_total_time = time.time() - predict_start_time
         self.global_state.log_operation(
             module="agent",
@@ -827,4 +1022,4 @@ class AgentSFast(UIAgent):
             }
         )
-        return executor_info, actions
+        return executor_info, actions

lybic-guiagents 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

lybic-guiagents 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl