lybic-guiagents 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

@@ -32,6 +32,20 @@ class Grounding(ACI):
32
32
  width: int = 1920,
33
33
  height: int = 1080,
34
34
  ):
35
+ """
36
+ Initialize a Grounding instance: configure screen dimensions, prepare tool instances, and load global state.
37
+
38
+ Parameters:
39
+ Tools_dict (Dict): Mapping of tool names to their configuration dictionaries used to register tools.
40
+ platform (str): Target platform identifier (e.g., "windows", "macos") used by the grounding agents.
41
+ width (int): Current screen width in pixels.
42
+ height (int): Current screen height in pixels.
43
+
44
+ Detailed behavior:
45
+ - Creates and registers two Tools instances ("grounding" and "text_span") using entries from Tools_dict; registration will include any authentication-related parameters present in the tool configuration.
46
+ - Obtains grounding tool dimensions (grounding_width, grounding_height) and falls back to the provided width and height when the grounding tool does not supply them.
47
+ - Initializes coordinate placeholders (coords1, coords2) and stores a reference to the global state store.
48
+ """
35
49
  self.platform = platform
36
50
  self.Tools_dict = Tools_dict
37
51
  self.width = width
@@ -39,10 +53,35 @@ class Grounding(ACI):
39
53
  self.coords1 = None
40
54
  self.coords2 = None
41
55
 
56
+ def _register(tools_instance, tool_name):
57
+ """
58
+ Register a tool into the provided tools instance using configuration from Tools_dict.
59
+
60
+ Reads the tool configuration for `tool_name` from the surrounding `Tools_dict`, extracts optional `provider` and `model`, collects common authentication parameters (api_key, base_url, endpoint_url, azure_endpoint, api_version), merges them with any remaining configuration, logs the registration, and calls tools_instance.register_tool with the assembled parameters.
61
+
62
+ Parameters:
63
+ tools_instance: The tools manager/registry instance that exposes register_tool(tool_name, provider, model, **params).
64
+ tool_name (str): Key name of the tool in Tools_dict whose configuration will be used to register the tool.
65
+ """
66
+ config = Tools_dict.get(tool_name, {}).copy()
67
+ provider = config.pop("provider", None)
68
+ model = config.pop("model", None)
69
+
70
+ auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
71
+ auth_params = {}
72
+ for key in auth_keys:
73
+ if key in config:
74
+ auth_params[key] = config[key]
75
+ logger.info(f"Grounding._register: Setting {key} for tool '{tool_name}'")
76
+
77
+ # 合并所有参数
78
+ all_params = {**config, **auth_params}
79
+
80
+ logger.info(f"Grounding._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
81
+ tools_instance.register_tool(tool_name, provider, model, **all_params)
82
+
42
83
  self.grounding_model = Tools()
43
- self.grounding_model.register_tool(
44
- "grounding", self.Tools_dict["grounding"]["provider"],
45
- self.Tools_dict["grounding"]["model"])
84
+ _register(self.grounding_model, "grounding")
46
85
 
47
86
  self.grounding_width, self.grounding_height = self.grounding_model.tools[
48
87
  "grounding"].get_grounding_wh()
@@ -51,9 +90,7 @@ class Grounding(ACI):
51
90
  self.grounding_height = self.height
52
91
 
53
92
  self.text_span_agent = Tools()
54
- self.text_span_agent.register_tool(
55
- "text_span", self.Tools_dict["text_span"]["provider"],
56
- self.Tools_dict["text_span"]["model"])
93
+ _register(self.text_span_agent, "text_span")
57
94
 
58
95
  self.global_state: GlobalState = Registry.get(
59
96
  "GlobalStateStore") # type: ignore
@@ -14,6 +14,7 @@ from gui_agents.utils.common_utils import (
14
14
  agent_log_to_string,
15
15
  )
16
16
  from gui_agents.tools.tools import Tools
17
+ from gui_agents.agents.stream_manager import stream_manager
17
18
 
18
19
  logger = logging.getLogger("desktopenv.agent")
19
20
 
@@ -29,31 +30,68 @@ class Manager:
29
30
  platform: str = platform.system().lower(),
30
31
  enable_search: bool = True,
31
32
  ):
33
+ """
34
+ Initialize the Manager which orchestrates planning, knowledge retrieval/fusion, DAG generation, topological sorting, and action queue creation for task-driven agents.
35
+
36
+ Parameters:
37
+ Tools_dict (Dict): Mapping of tool names to their configuration dictionaries; used to register and configure internal Tools instances (e.g., 'subtask_planner', 'dag_translator', 'embedding', 'websearch').
38
+ local_kb_path (str): Filesystem path to the local knowledge base storage used by the KnowledgeBase.
39
+ multi_round (bool): When True, enable multi-round interaction/stateful planning behavior across turns.
40
+ platform (str): Target platform identifier (defaults to current system name); forwarded to KnowledgeBase and tools where applicable.
41
+ enable_search (bool): When True, register and enable a web search tool ('websearch'); otherwise search functionality is disabled.
42
+ """
32
43
  self.platform = platform
33
44
  self.Tools_dict = Tools_dict
34
45
 
46
+ def _register(tools_instance, tool_name):
47
+ """
48
+ Register a tool with the provided tools manager using settings from Tools_dict.
49
+
50
+ Parameters:
51
+ tools_instance: An object exposing register_tool(name, provider, model, **kwargs) used to register the tool.
52
+ tool_name (str): Key to look up the tool's configuration in Tools_dict; provider, model, and supported authentication keys
53
+ (e.g., 'api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version') will be extracted and passed to register_tool.
54
+
55
+ """
56
+ config = Tools_dict.get(tool_name, {}).copy()
57
+ provider = config.pop("provider", None)
58
+ model = config.pop("model", None)
59
+
60
+ auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
61
+ auth_params = {}
62
+ for key in auth_keys:
63
+ if key in config:
64
+ auth_params[key] = config[key]
65
+ logger.info(f"Manager._register: Setting {key} for tool '{tool_name}'")
66
+
67
+ all_params = {**config, **auth_params}
68
+
69
+ logger.info(f"Manager._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
70
+ tools_instance.register_tool(tool_name, provider, model, **all_params)
71
+
35
72
  self.generator_agent = Tools()
36
- self.generator_agent.register_tool("subtask_planner", Tools_dict["subtask_planner"]["provider"], Tools_dict["subtask_planner"]["model"])
73
+ _register(self.generator_agent, "subtask_planner")
37
74
 
38
75
  self.dag_translator_agent = Tools()
39
- self.dag_translator_agent.register_tool("dag_translator", self.Tools_dict["dag_translator"]["provider"], self.Tools_dict["dag_translator"]["model"])
76
+ _register(self.dag_translator_agent, "dag_translator")
40
77
 
41
78
  self.narrative_summarization_agent = Tools()
42
- self.narrative_summarization_agent.register_tool("narrative_summarization", self.Tools_dict["narrative_summarization"]["provider"], self.Tools_dict["narrative_summarization"]["model"])
79
+ _register(self.narrative_summarization_agent, "narrative_summarization")
43
80
 
44
81
  self.episode_summarization_agent = Tools()
45
- self.episode_summarization_agent.register_tool("episode_summarization", self.Tools_dict["episode_summarization"]["provider"], self.Tools_dict["episode_summarization"]["model"])
82
+ _register(self.episode_summarization_agent, "episode_summarization")
46
83
 
47
84
  self.local_kb_path = local_kb_path
48
85
 
49
86
  self.embedding_engine = Tools()
50
- self.embedding_engine.register_tool("embedding", self.Tools_dict["embedding"]["provider"], self.Tools_dict["embedding"]["model"])
87
+ _register(self.embedding_engine, "embedding")
88
+
51
89
  KB_Tools_dict = {
52
- "embedding": self.Tools_dict["embedding"],
53
- "query_formulator": self.Tools_dict["query_formulator"],
54
- "context_fusion": self.Tools_dict["context_fusion"],
55
- "narrative_summarization": self.Tools_dict["narrative_summarization"],
56
- "episode_summarization": self.Tools_dict["episode_summarization"],
90
+ "embedding": self.Tools_dict.get("embedding"),
91
+ "query_formulator": self.Tools_dict.get("query_formulator"),
92
+ "context_fusion": self.Tools_dict.get("context_fusion"),
93
+ "narrative_summarization": self.Tools_dict.get("narrative_summarization"),
94
+ "episode_summarization": self.Tools_dict.get("episode_summarization"),
57
95
  }
58
96
 
59
97
 
@@ -69,20 +107,40 @@ class Manager:
69
107
  self.planner_history = []
70
108
 
71
109
  self.turn_count = 0
72
-
110
+ self.task_id = None # Will be set by agent
111
+
73
112
  # Initialize search engine based on enable_search parameter
74
113
  if enable_search:
75
114
  self.search_engine = Tools()
76
- self.search_engine.register_tool("websearch", self.Tools_dict["websearch"]["provider"], self.Tools_dict["websearch"]["model"])
115
+ _register(self.search_engine, "websearch")
77
116
  else:
78
117
  self.search_engine = None
79
118
 
80
119
  self.multi_round = multi_round
81
120
 
121
+ def _send_stream_message(self, task_id: str, stage: str, message: str) -> None:
122
+ """
123
+ Enqueue a stream message for the given task if a task ID is provided.
124
+
125
+ Parameters:
126
+ task_id (str): Identifier of the task stream; no message is sent if empty.
127
+ stage (str): Stage label for the message.
128
+ message (str): Message content to enqueue.
129
+ """
130
+ if not task_id:
131
+ return
132
+
133
+ stream_manager.add_message_threadsafe(task_id, stage, message)
134
+
82
135
  def summarize_episode(self, trajectory):
83
- """Summarize the episode experience for lifelong learning reflection
84
- Args:
85
- trajectory: str: The episode experience to be summarized
136
+ """
137
+ Create a concise summary of the provided episode trajectory for lifelong learning and reflection.
138
+
139
+ Parameters:
140
+ trajectory (str): Serialized episode experience or trajectory to summarize.
141
+
142
+ Returns:
143
+ subtask_summarization (str): A short summary highlighting key subtasks, lessons, or reflections from the episode.
86
144
  """
87
145
 
88
146
  # Create Reflection on whole trajectories for next round trial, keep earlier messages as exemplars
@@ -99,12 +157,19 @@ class Manager:
99
157
  }
100
158
  )
101
159
 
160
+ self._send_stream_message(self.task_id, "summarization", f"Episode summarization: {subtask_summarization}")
161
+
102
162
  return subtask_summarization
103
163
 
104
164
  def summarize_narrative(self, trajectory):
105
- """Summarize the narrative experience for lifelong learning reflection
106
- Args:
107
- trajectory: str: The narrative experience to be summarized
165
+ """
166
+ Produce a concise reflective summary of a narrative trajectory to inform lifelong learning.
167
+
168
+ Parameters:
169
+ trajectory: Narrative content (e.g., episode transcript or sequence of subtasks) to be summarized.
170
+
171
+ Returns:
172
+ A string containing a reflective summary that captures key insights, lessons learned, and recommendations for future rounds.
108
173
  """
109
174
  # Create Reflection on whole trajectories for next round trial
110
175
  lifelong_learning_reflection, total_tokens, cost_string = self.narrative_summarization_agent.execute_tool("narrative_summarization", {"str_input": trajectory})
@@ -131,6 +196,27 @@ class Manager:
131
196
  remaining_subtasks_list: List[Node] = [],
132
197
  ) -> Tuple[Dict, str]:
133
198
 
199
+ """
200
+ Generate a high-level, step-by-step plan for the given task, optionally incorporating retrieved knowledge and the current subtask state.
201
+
202
+ Parameters:
203
+ observation (Dict): Current environment/desktop state; may include a 'screenshot' key with image data used for planning.
204
+ instruction (str): Natural-language task description to plan for.
205
+ failed_subtask (Optional[Node]): If provided, indicates a subtask that failed and triggers replanning for the remainder.
206
+ completed_subtasks_list (List[Node]): Ordered list of subtasks already completed; used to inform replanning.
207
+ remaining_subtasks_list (List[Node]): Ordered list of subtasks still expected; used to inform replanning.
208
+
209
+ Returns:
210
+ planner_info (Dict): Metadata about the planning step (includes at least 'search_query' and 'goal_plan').
211
+ plan (str): The generated high-level plan as a human-readable string.
212
+
213
+ Side effects:
214
+ - May perform retrieval and knowledge fusion on the first planning turn.
215
+ - Records operations to global_state, appends the plan to self.planner_history, increments self.turn_count, and sends stream messages when self.task_id is set.
216
+
217
+ Raises:
218
+ Exception: If plan generation produces an empty plan.
219
+ """
134
220
  import time
135
221
  step_start = time.time()
136
222
  # Converts a list of DAG Nodes into a natural langauge list
@@ -275,6 +361,10 @@ class Manager:
275
361
  logger.info("GENERATING HIGH LEVEL PLAN")
276
362
 
277
363
  subtask_planner_start = time.time()
364
+
365
+ # Stream subtask planning message
366
+ self._send_stream_message(self.task_id, "planning", "Analyzing tasks and generating subtask plans...")
367
+
278
368
  plan, total_tokens, cost_string = self.generator_agent.execute_tool("subtask_planner", {"str_input": generator_message, "img_input": observation.get("screenshot", None)})
279
369
  logger.info(f"Subtask planner tokens: {total_tokens}, cost: {cost_string}")
280
370
  subtask_planner_time = time.time() - subtask_planner_start
@@ -289,6 +379,11 @@ class Manager:
289
379
  "duration": subtask_planner_time
290
380
  }
291
381
  )
382
+
383
+ # Stream planning completion message
384
+ if self.task_id:
385
+ plan_preview = plan[:150] + "..." if len(plan) > 150 else plan
386
+ self._send_stream_message(self.task_id, "planning", f"Subtask planning completed: {plan_preview}")
292
387
 
293
388
  step_time = time.time() - step_start
294
389
  logger.info(f"[Timing] Manager._generate_step_by_step_plan execution time: {step_time:.2f} seconds")
@@ -0,0 +1,163 @@
1
+ """
2
+ Stream manager for per-task progress messaging.
3
+
4
+ This module provides a global `stream_manager` singleton that manages
5
+ async message queues for task-based streaming. The singleton is async-safe
6
+ and should have its event loop configured via `set_loop()` during application
7
+ startup.
8
+ """
9
+ import asyncio
10
+ from google.protobuf.timestamp_pb2 import Timestamp
11
+ from typing import Dict, Optional, AsyncGenerator
12
+ from dataclasses import dataclass
13
+ import logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class StreamMessage:
20
+ stage: str
21
+ message: str
22
+ timestamp: Timestamp
23
+
24
+
25
+ class StreamManager:
26
+ """
27
+ Manages in-memory async message queues for each task to stream progress.
28
+ This class is async-safe.
29
+ """
30
+
31
+ def __init__(self, max_queue_size: int = 100):
32
+ """
33
+ Initialize a StreamManager that manages per-task in-memory async message queues.
34
+
35
+ Parameters:
36
+ max_queue_size (int): Maximum number of messages to keep per task queue; when a queue is full the oldest message will be dropped to make room for new messages.
37
+ """
38
+ self.task_queues: Dict[str, asyncio.Queue[Optional[StreamMessage]]] = {}
39
+ self.max_queue_size = max_queue_size
40
+ self._lock = asyncio.Lock()
41
+ self.loop: Optional[asyncio.AbstractEventLoop] = None
42
+
43
+ def set_loop(self, loop: asyncio.AbstractEventLoop):
44
+ """
45
+ Store the event loop used to schedule coroutines from non-async threads.
46
+
47
+ Parameters:
48
+ loop (asyncio.AbstractEventLoop): Event loop passed to asyncio.run_coroutine_threadsafe for thread-safe coroutine execution.
49
+ """
50
+ self.loop = loop
51
+
52
+ def add_message_threadsafe(self, task_id: str, stage: str, message: str):
53
+ """
54
+ Enqueue a progress message for a task from a non-async thread in a thread-safe manner.
55
+
56
+ If the manager's event loop has not been set, an error is logged and the message is not scheduled.
57
+
58
+ Parameters:
59
+ task_id (str): Identifier of the task to receive the message.
60
+ stage (str): Stage label for the progress update.
61
+ message (str): Text of the progress message.
62
+ """
63
+ if not self.loop:
64
+ logger.error("StreamManager event loop not set. Cannot send message from thread.")
65
+ return
66
+
67
+ asyncio.run_coroutine_threadsafe(
68
+ self.add_message(task_id, stage, message),
69
+ self.loop
70
+ )
71
+
72
+ async def add_message(self, task_id: str, stage: str, message: str):
73
+ """
74
+ Enqueues a progress message for the given task; if the task's queue is full, drops the oldest message to make room.
75
+
76
+ Parameters:
77
+ task_id (str): Identifier of the task whose queue will receive the message.
78
+ stage (str): Short stage name or label for the message.
79
+ message (str): Human-readable progress message.
80
+ """
81
+ async with self._lock:
82
+ q = self.task_queues.get(task_id)
83
+
84
+ if q:
85
+ timestamp = Timestamp()
86
+ timestamp.GetCurrentTime()
87
+ msg = StreamMessage(stage=stage, message=message, timestamp=timestamp)
88
+ try:
89
+ q.put_nowait(msg)
90
+ except asyncio.QueueFull:
91
+ logger.warning(f"Message queue for task {task_id} is full. Dropping oldest message.")
92
+ # Drop the oldest message to make space for the new one
93
+ q.get_nowait()
94
+ q.put_nowait(msg)
95
+
96
+ else:
97
+ logger.warning(f"No message queue found for task {task_id}. Message not added.")
98
+
99
+ async def get_message_stream(self, task_id: str) -> AsyncGenerator[StreamMessage, None]:
100
+ """
101
+ Provide an async generator that yields progress messages for the given task.
102
+
103
+ If the task has no existing queue, one is created and registered. The generator yields StreamMessage objects produced for the task and terminates when a sentinel `None` is received, signaling end of stream.
104
+
105
+ Parameters:
106
+ task_id (str): Identifier of the task whose message stream to consume.
107
+
108
+ Returns:
109
+ AsyncGenerator[StreamMessage, None]: An async generator yielding `StreamMessage` instances for the task; iteration ends when a sentinel `None` is encountered.
110
+ """
111
+ async with self._lock:
112
+ if task_id not in self.task_queues:
113
+ self.task_queues[task_id] = asyncio.Queue(maxsize=self.max_queue_size)
114
+ logger.info(f"Registered message queue for task {task_id} in get_message_stream.")
115
+ q = self.task_queues[task_id]
116
+
117
+ while True:
118
+ message = await q.get()
119
+ if message is None: # Sentinel value indicates end of stream
120
+ logger.info(f"End of stream for task {task_id}")
121
+ break
122
+ yield message
123
+
124
+ async def register_task(self, task_id: str):
125
+ """
126
+ Create a per-task message queue if one does not already exist.
127
+
128
+ This is idempotent: if a queue for the given task_id already exists, the call has no effect. The created queue uses the manager's configured max_queue_size and the operation is safe to call concurrently.
129
+
130
+ Parameters:
131
+ task_id (str): Unique identifier of the task to register a message queue for.
132
+ """
133
+ async with self._lock:
134
+ if task_id not in self.task_queues:
135
+ self.task_queues[task_id] = asyncio.Queue(maxsize=self.max_queue_size)
136
+ logger.info(f"Registered message queue for task {task_id}")
137
+
138
+ async def unregister_task(self, task_id: str):
139
+ """Removes a task's message queue and signals end of stream."""
140
+ q = None
141
+ async with self._lock:
142
+ if task_id in self.task_queues:
143
+ q = self.task_queues.pop(task_id)
144
+ logger.info(f"Unregistered message queue for task {task_id}")
145
+ if q:
146
+ try:
147
+ # Put a sentinel value to unblock any consumers
148
+ q.put_nowait(None)
149
+ except asyncio.QueueFull:
150
+ # If full, make space for sentinel
151
+ try:
152
+ q.get_nowait()
153
+ except asyncio.QueueEmpty:
154
+ pass
155
+ # Retry put after making space or if queue became empty
156
+ try:
157
+ q.put_nowait(None)
158
+ except asyncio.QueueFull:
159
+ logger.error(f"Could not send sentinel for task {task_id}: queue still full after retry")
160
+
161
+
162
+ # Global instance to be used across the application
163
+ stream_manager = StreamManager()
@@ -35,24 +35,17 @@ class Worker:
35
35
  tools_config: Dict = {},
36
36
  ):
37
37
  """
38
- Worker receives a subtask list and active subtask and generates the next action for the to execute.
39
- Args:
40
- engine_params: Dict
41
- Parameters for the multimodal engine
42
- local_kb_path: str
43
- Path to knowledge base
44
- platform: str
45
- OS platform the agent runs on (darwin, linux, windows)
46
- enable_reflection: bool
47
- Whether to enable reflection
48
- use_subtask_experience: bool
49
- Whether to use subtask experience
50
- enable_takeover: bool
51
- Whether to enable user takeover functionality
52
- enable_search: bool
53
- Global switch for search functionality (overrides config)
54
- tools_config: Dict
55
- Complete tools configuration from tools_config.json
38
+ Initialize a Worker that generates executor actions using the provided tools, local knowledge base, and optional reflection, episodic experience, takeover, and search features.
39
+
40
+ Parameters:
41
+ Tools_dict (Dict): Mapping of tool names to tool instances/configurations used by the Worker.
42
+ local_kb_path (str): Filesystem path to the local knowledge base to use for retrieval.
43
+ platform (str): Operating system identifier the agent runs on (e.g., 'darwin', 'linux', 'windows').
44
+ enable_reflection (bool): If True, enable trajectory reflection generation and use its output when producing actions.
45
+ use_subtask_experience (bool): If True, attempt to retrieve and incorporate episodic/subtask experience on the first turn.
46
+ enable_takeover (bool): If True, use the takeover-capable action generator tool when producing actions.
47
+ enable_search (bool): Global switch that forces search-enabled tools to run with search disabled when False.
48
+ tools_config (Dict): Tools configuration mapping; if None, the Worker loads tools_config.json from the package tools directory.
56
49
  """
57
50
  # super().__init__(engine_params, platform)
58
51
  self.platform = platform
@@ -72,11 +65,6 @@ class Worker:
72
65
  else:
73
66
  self.tools_config = tools_config
74
67
 
75
- self.embedding_engine = Tools()
76
- self.embedding_engine.register_tool(
77
- "embedding", self.Tools_dict["embedding"]["provider"],
78
- self.Tools_dict["embedding"]["model"])
79
-
80
68
  self.enable_reflection = enable_reflection
81
69
  self.use_subtask_experience = use_subtask_experience
82
70
  self.global_state: GlobalState = Registry.get(
@@ -85,6 +73,31 @@ class Worker:
85
73
 
86
74
  def reset(self):
87
75
 
76
+ """
77
+ Initialize the worker's tool agents, knowledge base, and internal state for a new task session.
78
+
79
+ This method registers the action generator (with optional takeover variant), trajectory reflector, and embedding engine using a local helper that merges tool configuration with any overrides and propagates authentication parameters; it initializes the KnowledgeBase with the embedding engine and toolkit, configures search-related parameters for the action generator according to global and per-tool settings, and resets runtime state fields (turn count, histories, reflections, cost tracking, screenshot inputs, planner history, latest action, trajectory length limit, and task_id).
80
+ """
81
+ def _register(tools_instance, tool_name, **override_kwargs):
82
+ config = self.Tools_dict.get(tool_name, {}).copy()
83
+ provider = config.pop("provider", None)
84
+ model = config.pop("model", None)
85
+
86
+ # Merge with any explicit overrides
87
+ config.update(override_kwargs)
88
+
89
+ auth_params = {}
90
+ auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
91
+ for key in auth_keys:
92
+ if key in config:
93
+ auth_params[key] = config[key]
94
+ logger.info(f"Worker._register: Setting {key} for tool '{tool_name}'")
95
+
96
+ all_params = {**config, **auth_params}
97
+
98
+ logger.info(f"Worker._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
99
+ tools_instance.register_tool(tool_name, provider, model, **all_params)
100
+
88
101
  self.generator_agent = Tools()
89
102
  self.action_generator_tool = "action_generator_with_takeover" if self.enable_takeover else "action_generator"
90
103
 
@@ -121,20 +134,14 @@ class Worker:
121
134
  )
122
135
 
123
136
  # Register the tool with parameters
124
- self.generator_agent.register_tool(
125
- self.action_generator_tool,
126
- self.Tools_dict[self.action_generator_tool]["provider"],
127
- self.Tools_dict[self.action_generator_tool]["model"], **tool_params)
137
+ _register(self.generator_agent, self.action_generator_tool, **tool_params)
128
138
 
129
139
  self.reflection_agent = Tools()
130
- self.reflection_agent.register_tool(
131
- "traj_reflector", self.Tools_dict["traj_reflector"]["provider"],
132
- self.Tools_dict["traj_reflector"]["model"])
140
+ _register(self.reflection_agent, "traj_reflector")
133
141
 
134
142
  self.embedding_engine = Tools()
135
- self.embedding_engine.register_tool(
136
- "embedding", self.Tools_dict["embedding"]["provider"],
137
- self.Tools_dict["embedding"]["model"])
143
+ _register(self.embedding_engine, "embedding")
144
+
138
145
  self.knowledge_base = KnowledgeBase(
139
146
  embedding_engine=self.embedding_engine,
140
147
  Tools_dict=self.Tools_dict,
@@ -150,6 +157,7 @@ class Worker:
150
157
  self.planner_history = []
151
158
  self.latest_action = None
152
159
  self.max_trajector_length = 8
160
+ self.task_id = None # Will be set by agent
153
161
 
154
162
  def generate_next_action(
155
163
  self,
@@ -163,7 +171,24 @@ class Worker:
163
171
  running_state: str = "running",
164
172
  ) -> Dict:
165
173
  """
166
- Predict the next action(s) based on the current observation.
174
+ Generate the next executor action plan and related metadata for the current subtask given the observation and context.
175
+
176
+ Parameters:
177
+ Tu (str): Full task description or task context.
178
+ search_query (str): Search string used for retrieving episodic/subtask experience.
179
+ subtask (str): Current subtask instruction/description to complete.
180
+ subtask_info (str): Additional information or constraints for the current subtask.
181
+ future_tasks (List[Node]): List of upcoming task nodes (used for context in planning).
182
+ done_task (List[Node]): List of completed task nodes.
183
+ obs (Dict): Current observation dictionary; must include a "screenshot" key with the current screen image.
184
+ running_state (str): Current executor running state (default "running").
185
+
186
+ Returns:
187
+ Dict: Executor information containing:
188
+ - "current_subtask" (str): The provided subtask.
189
+ - "current_subtask_info" (str): The provided subtask_info.
190
+ - "executor_plan" (str): The raw plan produced by the action generator.
191
+ - "reflection" (str|None): Reflection text produced by the trajectory reflector, or None if reflection is disabled.
167
192
  """
168
193
  import time
169
194
  action_start = time.time()
@@ -351,4 +376,4 @@ class Worker:
351
376
  # Cut off extra grounded actions
352
377
  res = res[:res.find("(Grounded Action)")]
353
378
  res += f"(Grounded Action)\n```python\n{action}\n```\n"
354
- return res
379
+ return res
gui_agents/cli_app.py CHANGED
@@ -1,15 +1,14 @@
1
1
  import argparse
2
- import datetime
3
- import io
4
2
  import logging
5
3
  import os
6
4
  import platform
7
5
  import sys
8
- import time
9
6
  import datetime
10
7
  from pathlib import Path
11
8
  from dotenv import load_dotenv
12
9
 
10
+ from gui_agents.agents.Backend.LybicBackend import LybicBackend
11
+
13
12
  env_path = Path(os.path.dirname(os.path.abspath(__file__))) / '.env'
14
13
  if env_path.exists():
15
14
  load_dotenv(dotenv_path=env_path)
@@ -260,6 +259,18 @@ def scale_screenshot_dimensions(screenshot: Image.Image, hwi_para: HardwareInter
260
259
  return screenshot
261
260
 
262
261
  def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_steps: int = 50, enable_takeover: bool = False):
262
+ """
263
+ Run an agent in normal mode to iteratively observe, plan, and execute actions for a given instruction.
264
+
265
+ Runs up to `max_steps` iterations: captures screenshots, obtains observations, asks the agent for a plan, executes hardware actions, and updates trajectory and memories until the agent signals completion or failure. The function also supports pausing for user takeover and performs post-run timing logging and automatic analysis.
266
+
267
+ Parameters:
268
+ agent: The agent instance used to generate plans and reflections (expects an object exposing `predict`, `update_episodic_memory`, and `update_narrative_memory`).
269
+ instruction (str): The high-level task description provided to the agent.
270
+ hwi_para (HardwareInterface): Hardware interface used to capture screenshots and dispatch actions.
271
+ max_steps (int): Maximum number of agent prediction/execute cycles to run.
272
+ enable_takeover (bool): If True, the agent may request a user takeover that pauses execution until the user resumes.
273
+ """
263
274
  import time
264
275
  obs = {}
265
276
  traj = "Task:\n" + instruction
@@ -302,7 +313,7 @@ def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_s
302
313
  os.system(
303
314
  f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent" buttons "OK" default button "OK"\''
304
315
  )
305
- elif platform.system() == "Linux":
316
+ elif platform.system() == "Linux" and not (hwi_para.backend== "lybic" or isinstance(hwi_para.backend, LybicBackend)):
306
317
  os.system(
307
318
  f'zenity --info --title="OpenACI Agent" --text="Task Completed" --width=200 --height=100'
308
319
  )
@@ -434,7 +445,7 @@ def run_agent_fast(agent,
434
445
  os.system(
435
446
  f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent (Fast)" buttons "OK" default button "OK"\''
436
447
  )
437
- elif platform.system() == "Linux":
448
+ elif platform.system() == "Linux" and not (hwi_para.backend== "lybic" or isinstance(hwi_para.backend, LybicBackend)):
438
449
  os.system(
439
450
  f'zenity --info --title="OpenACI Agent (Fast)" --text="Task Completed" --width=200 --height=100'
440
451
  )