lybic-guiagents 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- gui_agents/__init__.py +1 -1
- gui_agents/agents/Backend/LybicBackend.py +25 -19
- gui_agents/agents/agent_s.py +292 -97
- gui_agents/agents/grounding.py +43 -6
- gui_agents/agents/manager.py +113 -18
- gui_agents/agents/stream_manager.py +163 -0
- gui_agents/agents/worker.py +60 -35
- gui_agents/cli_app.py +16 -5
- gui_agents/core/knowledge.py +36 -5
- gui_agents/grpc_app.py +784 -0
- gui_agents/proto/__init__.py +3 -0
- gui_agents/proto/pb/__init__.py +4 -0
- gui_agents/tools/model.md +351 -0
- gui_agents/tools/tools.py +80 -39
- gui_agents/tools/tools_config.json +101 -0
- gui_agents/tools/tools_config_cn.json +101 -0
- gui_agents/tools/tools_config_en.json +101 -0
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/METADATA +86 -8
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/RECORD +23 -16
- lybic_guiagents-0.3.0.dist-info/entry_points.txt +3 -0
- gui_agents/lybic_client/__init__.py +0 -0
- gui_agents/lybic_client/lybic_client.py +0 -88
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/WHEEL +0 -0
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/top_level.txt +0 -0
gui_agents/agents/grounding.py
CHANGED
|
@@ -32,6 +32,20 @@ class Grounding(ACI):
|
|
|
32
32
|
width: int = 1920,
|
|
33
33
|
height: int = 1080,
|
|
34
34
|
):
|
|
35
|
+
"""
|
|
36
|
+
Initialize a Grounding instance: configure screen dimensions, prepare tool instances, and load global state.
|
|
37
|
+
|
|
38
|
+
Parameters:
|
|
39
|
+
Tools_dict (Dict): Mapping of tool names to their configuration dictionaries used to register tools.
|
|
40
|
+
platform (str): Target platform identifier (e.g., "windows", "macos") used by the grounding agents.
|
|
41
|
+
width (int): Current screen width in pixels.
|
|
42
|
+
height (int): Current screen height in pixels.
|
|
43
|
+
|
|
44
|
+
Detailed behavior:
|
|
45
|
+
- Creates and registers two Tools instances ("grounding" and "text_span") using entries from Tools_dict; registration will include any authentication-related parameters present in the tool configuration.
|
|
46
|
+
- Obtains grounding tool dimensions (grounding_width, grounding_height) and falls back to the provided width and height when the grounding tool does not supply them.
|
|
47
|
+
- Initializes coordinate placeholders (coords1, coords2) and stores a reference to the global state store.
|
|
48
|
+
"""
|
|
35
49
|
self.platform = platform
|
|
36
50
|
self.Tools_dict = Tools_dict
|
|
37
51
|
self.width = width
|
|
@@ -39,10 +53,35 @@ class Grounding(ACI):
|
|
|
39
53
|
self.coords1 = None
|
|
40
54
|
self.coords2 = None
|
|
41
55
|
|
|
56
|
+
def _register(tools_instance, tool_name):
|
|
57
|
+
"""
|
|
58
|
+
Register a tool into the provided tools instance using configuration from Tools_dict.
|
|
59
|
+
|
|
60
|
+
Reads the tool configuration for `tool_name` from the surrounding `Tools_dict`, extracts optional `provider` and `model`, collects common authentication parameters (api_key, base_url, endpoint_url, azure_endpoint, api_version), merges them with any remaining configuration, logs the registration, and calls tools_instance.register_tool with the assembled parameters.
|
|
61
|
+
|
|
62
|
+
Parameters:
|
|
63
|
+
tools_instance: The tools manager/registry instance that exposes register_tool(tool_name, provider, model, **params).
|
|
64
|
+
tool_name (str): Key name of the tool in Tools_dict whose configuration will be used to register the tool.
|
|
65
|
+
"""
|
|
66
|
+
config = Tools_dict.get(tool_name, {}).copy()
|
|
67
|
+
provider = config.pop("provider", None)
|
|
68
|
+
model = config.pop("model", None)
|
|
69
|
+
|
|
70
|
+
auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
|
|
71
|
+
auth_params = {}
|
|
72
|
+
for key in auth_keys:
|
|
73
|
+
if key in config:
|
|
74
|
+
auth_params[key] = config[key]
|
|
75
|
+
logger.info(f"Grounding._register: Setting {key} for tool '{tool_name}'")
|
|
76
|
+
|
|
77
|
+
# 合并所有参数
|
|
78
|
+
all_params = {**config, **auth_params}
|
|
79
|
+
|
|
80
|
+
logger.info(f"Grounding._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
|
|
81
|
+
tools_instance.register_tool(tool_name, provider, model, **all_params)
|
|
82
|
+
|
|
42
83
|
self.grounding_model = Tools()
|
|
43
|
-
self.grounding_model
|
|
44
|
-
"grounding", self.Tools_dict["grounding"]["provider"],
|
|
45
|
-
self.Tools_dict["grounding"]["model"])
|
|
84
|
+
_register(self.grounding_model, "grounding")
|
|
46
85
|
|
|
47
86
|
self.grounding_width, self.grounding_height = self.grounding_model.tools[
|
|
48
87
|
"grounding"].get_grounding_wh()
|
|
@@ -51,9 +90,7 @@ class Grounding(ACI):
|
|
|
51
90
|
self.grounding_height = self.height
|
|
52
91
|
|
|
53
92
|
self.text_span_agent = Tools()
|
|
54
|
-
self.text_span_agent
|
|
55
|
-
"text_span", self.Tools_dict["text_span"]["provider"],
|
|
56
|
-
self.Tools_dict["text_span"]["model"])
|
|
93
|
+
_register(self.text_span_agent, "text_span")
|
|
57
94
|
|
|
58
95
|
self.global_state: GlobalState = Registry.get(
|
|
59
96
|
"GlobalStateStore") # type: ignore
|
gui_agents/agents/manager.py
CHANGED
|
@@ -14,6 +14,7 @@ from gui_agents.utils.common_utils import (
|
|
|
14
14
|
agent_log_to_string,
|
|
15
15
|
)
|
|
16
16
|
from gui_agents.tools.tools import Tools
|
|
17
|
+
from gui_agents.agents.stream_manager import stream_manager
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger("desktopenv.agent")
|
|
19
20
|
|
|
@@ -29,31 +30,68 @@ class Manager:
|
|
|
29
30
|
platform: str = platform.system().lower(),
|
|
30
31
|
enable_search: bool = True,
|
|
31
32
|
):
|
|
33
|
+
"""
|
|
34
|
+
Initialize the Manager which orchestrates planning, knowledge retrieval/fusion, DAG generation, topological sorting, and action queue creation for task-driven agents.
|
|
35
|
+
|
|
36
|
+
Parameters:
|
|
37
|
+
Tools_dict (Dict): Mapping of tool names to their configuration dictionaries; used to register and configure internal Tools instances (e.g., 'subtask_planner', 'dag_translator', 'embedding', 'websearch').
|
|
38
|
+
local_kb_path (str): Filesystem path to the local knowledge base storage used by the KnowledgeBase.
|
|
39
|
+
multi_round (bool): When True, enable multi-round interaction/stateful planning behavior across turns.
|
|
40
|
+
platform (str): Target platform identifier (defaults to current system name); forwarded to KnowledgeBase and tools where applicable.
|
|
41
|
+
enable_search (bool): When True, register and enable a web search tool ('websearch'); otherwise search functionality is disabled.
|
|
42
|
+
"""
|
|
32
43
|
self.platform = platform
|
|
33
44
|
self.Tools_dict = Tools_dict
|
|
34
45
|
|
|
46
|
+
def _register(tools_instance, tool_name):
|
|
47
|
+
"""
|
|
48
|
+
Register a tool with the provided tools manager using settings from Tools_dict.
|
|
49
|
+
|
|
50
|
+
Parameters:
|
|
51
|
+
tools_instance: An object exposing register_tool(name, provider, model, **kwargs) used to register the tool.
|
|
52
|
+
tool_name (str): Key to look up the tool's configuration in Tools_dict; provider, model, and supported authentication keys
|
|
53
|
+
(e.g., 'api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version') will be extracted and passed to register_tool.
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
config = Tools_dict.get(tool_name, {}).copy()
|
|
57
|
+
provider = config.pop("provider", None)
|
|
58
|
+
model = config.pop("model", None)
|
|
59
|
+
|
|
60
|
+
auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
|
|
61
|
+
auth_params = {}
|
|
62
|
+
for key in auth_keys:
|
|
63
|
+
if key in config:
|
|
64
|
+
auth_params[key] = config[key]
|
|
65
|
+
logger.info(f"Manager._register: Setting {key} for tool '{tool_name}'")
|
|
66
|
+
|
|
67
|
+
all_params = {**config, **auth_params}
|
|
68
|
+
|
|
69
|
+
logger.info(f"Manager._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
|
|
70
|
+
tools_instance.register_tool(tool_name, provider, model, **all_params)
|
|
71
|
+
|
|
35
72
|
self.generator_agent = Tools()
|
|
36
|
-
self.generator_agent
|
|
73
|
+
_register(self.generator_agent, "subtask_planner")
|
|
37
74
|
|
|
38
75
|
self.dag_translator_agent = Tools()
|
|
39
|
-
self.dag_translator_agent
|
|
76
|
+
_register(self.dag_translator_agent, "dag_translator")
|
|
40
77
|
|
|
41
78
|
self.narrative_summarization_agent = Tools()
|
|
42
|
-
self.narrative_summarization_agent
|
|
79
|
+
_register(self.narrative_summarization_agent, "narrative_summarization")
|
|
43
80
|
|
|
44
81
|
self.episode_summarization_agent = Tools()
|
|
45
|
-
self.episode_summarization_agent
|
|
82
|
+
_register(self.episode_summarization_agent, "episode_summarization")
|
|
46
83
|
|
|
47
84
|
self.local_kb_path = local_kb_path
|
|
48
85
|
|
|
49
86
|
self.embedding_engine = Tools()
|
|
50
|
-
self.embedding_engine
|
|
87
|
+
_register(self.embedding_engine, "embedding")
|
|
88
|
+
|
|
51
89
|
KB_Tools_dict = {
|
|
52
|
-
"embedding": self.Tools_dict
|
|
53
|
-
"query_formulator": self.Tools_dict
|
|
54
|
-
"context_fusion": self.Tools_dict
|
|
55
|
-
"narrative_summarization": self.Tools_dict
|
|
56
|
-
"episode_summarization": self.Tools_dict
|
|
90
|
+
"embedding": self.Tools_dict.get("embedding"),
|
|
91
|
+
"query_formulator": self.Tools_dict.get("query_formulator"),
|
|
92
|
+
"context_fusion": self.Tools_dict.get("context_fusion"),
|
|
93
|
+
"narrative_summarization": self.Tools_dict.get("narrative_summarization"),
|
|
94
|
+
"episode_summarization": self.Tools_dict.get("episode_summarization"),
|
|
57
95
|
}
|
|
58
96
|
|
|
59
97
|
|
|
@@ -69,20 +107,40 @@ class Manager:
|
|
|
69
107
|
self.planner_history = []
|
|
70
108
|
|
|
71
109
|
self.turn_count = 0
|
|
72
|
-
|
|
110
|
+
self.task_id = None # Will be set by agent
|
|
111
|
+
|
|
73
112
|
# Initialize search engine based on enable_search parameter
|
|
74
113
|
if enable_search:
|
|
75
114
|
self.search_engine = Tools()
|
|
76
|
-
self.search_engine
|
|
115
|
+
_register(self.search_engine, "websearch")
|
|
77
116
|
else:
|
|
78
117
|
self.search_engine = None
|
|
79
118
|
|
|
80
119
|
self.multi_round = multi_round
|
|
81
120
|
|
|
121
|
+
def _send_stream_message(self, task_id: str, stage: str, message: str) -> None:
|
|
122
|
+
"""
|
|
123
|
+
Enqueue a stream message for the given task if a task ID is provided.
|
|
124
|
+
|
|
125
|
+
Parameters:
|
|
126
|
+
task_id (str): Identifier of the task stream; no message is sent if empty.
|
|
127
|
+
stage (str): Stage label for the message.
|
|
128
|
+
message (str): Message content to enqueue.
|
|
129
|
+
"""
|
|
130
|
+
if not task_id:
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
stream_manager.add_message_threadsafe(task_id, stage, message)
|
|
134
|
+
|
|
82
135
|
def summarize_episode(self, trajectory):
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
|
|
136
|
+
"""
|
|
137
|
+
Create a concise summary of the provided episode trajectory for lifelong learning and reflection.
|
|
138
|
+
|
|
139
|
+
Parameters:
|
|
140
|
+
trajectory (str): Serialized episode experience or trajectory to summarize.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
subtask_summarization (str): A short summary highlighting key subtasks, lessons, or reflections from the episode.
|
|
86
144
|
"""
|
|
87
145
|
|
|
88
146
|
# Create Reflection on whole trajectories for next round trial, keep earlier messages as exemplars
|
|
@@ -99,12 +157,19 @@ class Manager:
|
|
|
99
157
|
}
|
|
100
158
|
)
|
|
101
159
|
|
|
160
|
+
self._send_stream_message(self.task_id, "summarization", f"Episode summarization: {subtask_summarization}")
|
|
161
|
+
|
|
102
162
|
return subtask_summarization
|
|
103
163
|
|
|
104
164
|
def summarize_narrative(self, trajectory):
|
|
105
|
-
"""
|
|
106
|
-
|
|
107
|
-
|
|
165
|
+
"""
|
|
166
|
+
Produce a concise reflective summary of a narrative trajectory to inform lifelong learning.
|
|
167
|
+
|
|
168
|
+
Parameters:
|
|
169
|
+
trajectory: Narrative content (e.g., episode transcript or sequence of subtasks) to be summarized.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
A string containing a reflective summary that captures key insights, lessons learned, and recommendations for future rounds.
|
|
108
173
|
"""
|
|
109
174
|
# Create Reflection on whole trajectories for next round trial
|
|
110
175
|
lifelong_learning_reflection, total_tokens, cost_string = self.narrative_summarization_agent.execute_tool("narrative_summarization", {"str_input": trajectory})
|
|
@@ -131,6 +196,27 @@ class Manager:
|
|
|
131
196
|
remaining_subtasks_list: List[Node] = [],
|
|
132
197
|
) -> Tuple[Dict, str]:
|
|
133
198
|
|
|
199
|
+
"""
|
|
200
|
+
Generate a high-level, step-by-step plan for the given task, optionally incorporating retrieved knowledge and the current subtask state.
|
|
201
|
+
|
|
202
|
+
Parameters:
|
|
203
|
+
observation (Dict): Current environment/desktop state; may include a 'screenshot' key with image data used for planning.
|
|
204
|
+
instruction (str): Natural-language task description to plan for.
|
|
205
|
+
failed_subtask (Optional[Node]): If provided, indicates a subtask that failed and triggers replanning for the remainder.
|
|
206
|
+
completed_subtasks_list (List[Node]): Ordered list of subtasks already completed; used to inform replanning.
|
|
207
|
+
remaining_subtasks_list (List[Node]): Ordered list of subtasks still expected; used to inform replanning.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
planner_info (Dict): Metadata about the planning step (includes at least 'search_query' and 'goal_plan').
|
|
211
|
+
plan (str): The generated high-level plan as a human-readable string.
|
|
212
|
+
|
|
213
|
+
Side effects:
|
|
214
|
+
- May perform retrieval and knowledge fusion on the first planning turn.
|
|
215
|
+
- Records operations to global_state, appends the plan to self.planner_history, increments self.turn_count, and sends stream messages when self.task_id is set.
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
Exception: If plan generation produces an empty plan.
|
|
219
|
+
"""
|
|
134
220
|
import time
|
|
135
221
|
step_start = time.time()
|
|
136
222
|
# Converts a list of DAG Nodes into a natural langauge list
|
|
@@ -275,6 +361,10 @@ class Manager:
|
|
|
275
361
|
logger.info("GENERATING HIGH LEVEL PLAN")
|
|
276
362
|
|
|
277
363
|
subtask_planner_start = time.time()
|
|
364
|
+
|
|
365
|
+
# Stream subtask planning message
|
|
366
|
+
self._send_stream_message(self.task_id, "planning", "Analyzing tasks and generating subtask plans...")
|
|
367
|
+
|
|
278
368
|
plan, total_tokens, cost_string = self.generator_agent.execute_tool("subtask_planner", {"str_input": generator_message, "img_input": observation.get("screenshot", None)})
|
|
279
369
|
logger.info(f"Subtask planner tokens: {total_tokens}, cost: {cost_string}")
|
|
280
370
|
subtask_planner_time = time.time() - subtask_planner_start
|
|
@@ -289,6 +379,11 @@ class Manager:
|
|
|
289
379
|
"duration": subtask_planner_time
|
|
290
380
|
}
|
|
291
381
|
)
|
|
382
|
+
|
|
383
|
+
# Stream planning completion message
|
|
384
|
+
if self.task_id:
|
|
385
|
+
plan_preview = plan[:150] + "..." if len(plan) > 150 else plan
|
|
386
|
+
self._send_stream_message(self.task_id, "planning", f"Subtask planning completed: {plan_preview}")
|
|
292
387
|
|
|
293
388
|
step_time = time.time() - step_start
|
|
294
389
|
logger.info(f"[Timing] Manager._generate_step_by_step_plan execution time: {step_time:.2f} seconds")
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stream manager for per-task progress messaging.
|
|
3
|
+
|
|
4
|
+
This module provides a global `stream_manager` singleton that manages
|
|
5
|
+
async message queues for task-based streaming. The singleton is async-safe
|
|
6
|
+
and should have its event loop configured via `set_loop()` during application
|
|
7
|
+
startup.
|
|
8
|
+
"""
|
|
9
|
+
import asyncio
|
|
10
|
+
from google.protobuf.timestamp_pb2 import Timestamp
|
|
11
|
+
from typing import Dict, Optional, AsyncGenerator
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class StreamMessage:
|
|
20
|
+
stage: str
|
|
21
|
+
message: str
|
|
22
|
+
timestamp: Timestamp
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class StreamManager:
|
|
26
|
+
"""
|
|
27
|
+
Manages in-memory async message queues for each task to stream progress.
|
|
28
|
+
This class is async-safe.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, max_queue_size: int = 100):
|
|
32
|
+
"""
|
|
33
|
+
Initialize a StreamManager that manages per-task in-memory async message queues.
|
|
34
|
+
|
|
35
|
+
Parameters:
|
|
36
|
+
max_queue_size (int): Maximum number of messages to keep per task queue; when a queue is full the oldest message will be dropped to make room for new messages.
|
|
37
|
+
"""
|
|
38
|
+
self.task_queues: Dict[str, asyncio.Queue[Optional[StreamMessage]]] = {}
|
|
39
|
+
self.max_queue_size = max_queue_size
|
|
40
|
+
self._lock = asyncio.Lock()
|
|
41
|
+
self.loop: Optional[asyncio.AbstractEventLoop] = None
|
|
42
|
+
|
|
43
|
+
def set_loop(self, loop: asyncio.AbstractEventLoop):
|
|
44
|
+
"""
|
|
45
|
+
Store the event loop used to schedule coroutines from non-async threads.
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
loop (asyncio.AbstractEventLoop): Event loop passed to asyncio.run_coroutine_threadsafe for thread-safe coroutine execution.
|
|
49
|
+
"""
|
|
50
|
+
self.loop = loop
|
|
51
|
+
|
|
52
|
+
def add_message_threadsafe(self, task_id: str, stage: str, message: str):
|
|
53
|
+
"""
|
|
54
|
+
Enqueue a progress message for a task from a non-async thread in a thread-safe manner.
|
|
55
|
+
|
|
56
|
+
If the manager's event loop has not been set, an error is logged and the message is not scheduled.
|
|
57
|
+
|
|
58
|
+
Parameters:
|
|
59
|
+
task_id (str): Identifier of the task to receive the message.
|
|
60
|
+
stage (str): Stage label for the progress update.
|
|
61
|
+
message (str): Text of the progress message.
|
|
62
|
+
"""
|
|
63
|
+
if not self.loop:
|
|
64
|
+
logger.error("StreamManager event loop not set. Cannot send message from thread.")
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
asyncio.run_coroutine_threadsafe(
|
|
68
|
+
self.add_message(task_id, stage, message),
|
|
69
|
+
self.loop
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
async def add_message(self, task_id: str, stage: str, message: str):
|
|
73
|
+
"""
|
|
74
|
+
Enqueues a progress message for the given task; if the task's queue is full, drops the oldest message to make room.
|
|
75
|
+
|
|
76
|
+
Parameters:
|
|
77
|
+
task_id (str): Identifier of the task whose queue will receive the message.
|
|
78
|
+
stage (str): Short stage name or label for the message.
|
|
79
|
+
message (str): Human-readable progress message.
|
|
80
|
+
"""
|
|
81
|
+
async with self._lock:
|
|
82
|
+
q = self.task_queues.get(task_id)
|
|
83
|
+
|
|
84
|
+
if q:
|
|
85
|
+
timestamp = Timestamp()
|
|
86
|
+
timestamp.GetCurrentTime()
|
|
87
|
+
msg = StreamMessage(stage=stage, message=message, timestamp=timestamp)
|
|
88
|
+
try:
|
|
89
|
+
q.put_nowait(msg)
|
|
90
|
+
except asyncio.QueueFull:
|
|
91
|
+
logger.warning(f"Message queue for task {task_id} is full. Dropping oldest message.")
|
|
92
|
+
# Drop the oldest message to make space for the new one
|
|
93
|
+
q.get_nowait()
|
|
94
|
+
q.put_nowait(msg)
|
|
95
|
+
|
|
96
|
+
else:
|
|
97
|
+
logger.warning(f"No message queue found for task {task_id}. Message not added.")
|
|
98
|
+
|
|
99
|
+
async def get_message_stream(self, task_id: str) -> AsyncGenerator[StreamMessage, None]:
|
|
100
|
+
"""
|
|
101
|
+
Provide an async generator that yields progress messages for the given task.
|
|
102
|
+
|
|
103
|
+
If the task has no existing queue, one is created and registered. The generator yields StreamMessage objects produced for the task and terminates when a sentinel `None` is received, signaling end of stream.
|
|
104
|
+
|
|
105
|
+
Parameters:
|
|
106
|
+
task_id (str): Identifier of the task whose message stream to consume.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
AsyncGenerator[StreamMessage, None]: An async generator yielding `StreamMessage` instances for the task; iteration ends when a sentinel `None` is encountered.
|
|
110
|
+
"""
|
|
111
|
+
async with self._lock:
|
|
112
|
+
if task_id not in self.task_queues:
|
|
113
|
+
self.task_queues[task_id] = asyncio.Queue(maxsize=self.max_queue_size)
|
|
114
|
+
logger.info(f"Registered message queue for task {task_id} in get_message_stream.")
|
|
115
|
+
q = self.task_queues[task_id]
|
|
116
|
+
|
|
117
|
+
while True:
|
|
118
|
+
message = await q.get()
|
|
119
|
+
if message is None: # Sentinel value indicates end of stream
|
|
120
|
+
logger.info(f"End of stream for task {task_id}")
|
|
121
|
+
break
|
|
122
|
+
yield message
|
|
123
|
+
|
|
124
|
+
async def register_task(self, task_id: str):
|
|
125
|
+
"""
|
|
126
|
+
Create a per-task message queue if one does not already exist.
|
|
127
|
+
|
|
128
|
+
This is idempotent: if a queue for the given task_id already exists, the call has no effect. The created queue uses the manager's configured max_queue_size and the operation is safe to call concurrently.
|
|
129
|
+
|
|
130
|
+
Parameters:
|
|
131
|
+
task_id (str): Unique identifier of the task to register a message queue for.
|
|
132
|
+
"""
|
|
133
|
+
async with self._lock:
|
|
134
|
+
if task_id not in self.task_queues:
|
|
135
|
+
self.task_queues[task_id] = asyncio.Queue(maxsize=self.max_queue_size)
|
|
136
|
+
logger.info(f"Registered message queue for task {task_id}")
|
|
137
|
+
|
|
138
|
+
async def unregister_task(self, task_id: str):
|
|
139
|
+
"""Removes a task's message queue and signals end of stream."""
|
|
140
|
+
q = None
|
|
141
|
+
async with self._lock:
|
|
142
|
+
if task_id in self.task_queues:
|
|
143
|
+
q = self.task_queues.pop(task_id)
|
|
144
|
+
logger.info(f"Unregistered message queue for task {task_id}")
|
|
145
|
+
if q:
|
|
146
|
+
try:
|
|
147
|
+
# Put a sentinel value to unblock any consumers
|
|
148
|
+
q.put_nowait(None)
|
|
149
|
+
except asyncio.QueueFull:
|
|
150
|
+
# If full, make space for sentinel
|
|
151
|
+
try:
|
|
152
|
+
q.get_nowait()
|
|
153
|
+
except asyncio.QueueEmpty:
|
|
154
|
+
pass
|
|
155
|
+
# Retry put after making space or if queue became empty
|
|
156
|
+
try:
|
|
157
|
+
q.put_nowait(None)
|
|
158
|
+
except asyncio.QueueFull:
|
|
159
|
+
logger.error(f"Could not send sentinel for task {task_id}: queue still full after retry")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# Global instance to be used across the application
|
|
163
|
+
stream_manager = StreamManager()
|
gui_agents/agents/worker.py
CHANGED
|
@@ -35,24 +35,17 @@ class Worker:
|
|
|
35
35
|
tools_config: Dict = {},
|
|
36
36
|
):
|
|
37
37
|
"""
|
|
38
|
-
Worker
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
local_kb_path:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
Whether to use subtask experience
|
|
50
|
-
enable_takeover: bool
|
|
51
|
-
Whether to enable user takeover functionality
|
|
52
|
-
enable_search: bool
|
|
53
|
-
Global switch for search functionality (overrides config)
|
|
54
|
-
tools_config: Dict
|
|
55
|
-
Complete tools configuration from tools_config.json
|
|
38
|
+
Initialize a Worker that generates executor actions using the provided tools, local knowledge base, and optional reflection, episodic experience, takeover, and search features.
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
Tools_dict (Dict): Mapping of tool names to tool instances/configurations used by the Worker.
|
|
42
|
+
local_kb_path (str): Filesystem path to the local knowledge base to use for retrieval.
|
|
43
|
+
platform (str): Operating system identifier the agent runs on (e.g., 'darwin', 'linux', 'windows').
|
|
44
|
+
enable_reflection (bool): If True, enable trajectory reflection generation and use its output when producing actions.
|
|
45
|
+
use_subtask_experience (bool): If True, attempt to retrieve and incorporate episodic/subtask experience on the first turn.
|
|
46
|
+
enable_takeover (bool): If True, use the takeover-capable action generator tool when producing actions.
|
|
47
|
+
enable_search (bool): Global switch that forces search-enabled tools to run with search disabled when False.
|
|
48
|
+
tools_config (Dict): Tools configuration mapping; if None, the Worker loads tools_config.json from the package tools directory.
|
|
56
49
|
"""
|
|
57
50
|
# super().__init__(engine_params, platform)
|
|
58
51
|
self.platform = platform
|
|
@@ -72,11 +65,6 @@ class Worker:
|
|
|
72
65
|
else:
|
|
73
66
|
self.tools_config = tools_config
|
|
74
67
|
|
|
75
|
-
self.embedding_engine = Tools()
|
|
76
|
-
self.embedding_engine.register_tool(
|
|
77
|
-
"embedding", self.Tools_dict["embedding"]["provider"],
|
|
78
|
-
self.Tools_dict["embedding"]["model"])
|
|
79
|
-
|
|
80
68
|
self.enable_reflection = enable_reflection
|
|
81
69
|
self.use_subtask_experience = use_subtask_experience
|
|
82
70
|
self.global_state: GlobalState = Registry.get(
|
|
@@ -85,6 +73,31 @@ class Worker:
|
|
|
85
73
|
|
|
86
74
|
def reset(self):
|
|
87
75
|
|
|
76
|
+
"""
|
|
77
|
+
Initialize the worker's tool agents, knowledge base, and internal state for a new task session.
|
|
78
|
+
|
|
79
|
+
This method registers the action generator (with optional takeover variant), trajectory reflector, and embedding engine using a local helper that merges tool configuration with any overrides and propagates authentication parameters; it initializes the KnowledgeBase with the embedding engine and toolkit, configures search-related parameters for the action generator according to global and per-tool settings, and resets runtime state fields (turn count, histories, reflections, cost tracking, screenshot inputs, planner history, latest action, trajectory length limit, and task_id).
|
|
80
|
+
"""
|
|
81
|
+
def _register(tools_instance, tool_name, **override_kwargs):
|
|
82
|
+
config = self.Tools_dict.get(tool_name, {}).copy()
|
|
83
|
+
provider = config.pop("provider", None)
|
|
84
|
+
model = config.pop("model", None)
|
|
85
|
+
|
|
86
|
+
# Merge with any explicit overrides
|
|
87
|
+
config.update(override_kwargs)
|
|
88
|
+
|
|
89
|
+
auth_params = {}
|
|
90
|
+
auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
|
|
91
|
+
for key in auth_keys:
|
|
92
|
+
if key in config:
|
|
93
|
+
auth_params[key] = config[key]
|
|
94
|
+
logger.info(f"Worker._register: Setting {key} for tool '{tool_name}'")
|
|
95
|
+
|
|
96
|
+
all_params = {**config, **auth_params}
|
|
97
|
+
|
|
98
|
+
logger.info(f"Worker._register: Registering tool '{tool_name}' with provider '{provider}', model '{model}'")
|
|
99
|
+
tools_instance.register_tool(tool_name, provider, model, **all_params)
|
|
100
|
+
|
|
88
101
|
self.generator_agent = Tools()
|
|
89
102
|
self.action_generator_tool = "action_generator_with_takeover" if self.enable_takeover else "action_generator"
|
|
90
103
|
|
|
@@ -121,20 +134,14 @@ class Worker:
|
|
|
121
134
|
)
|
|
122
135
|
|
|
123
136
|
# Register the tool with parameters
|
|
124
|
-
self.generator_agent.
|
|
125
|
-
self.action_generator_tool,
|
|
126
|
-
self.Tools_dict[self.action_generator_tool]["provider"],
|
|
127
|
-
self.Tools_dict[self.action_generator_tool]["model"], **tool_params)
|
|
137
|
+
_register(self.generator_agent, self.action_generator_tool, **tool_params)
|
|
128
138
|
|
|
129
139
|
self.reflection_agent = Tools()
|
|
130
|
-
self.reflection_agent
|
|
131
|
-
"traj_reflector", self.Tools_dict["traj_reflector"]["provider"],
|
|
132
|
-
self.Tools_dict["traj_reflector"]["model"])
|
|
140
|
+
_register(self.reflection_agent, "traj_reflector")
|
|
133
141
|
|
|
134
142
|
self.embedding_engine = Tools()
|
|
135
|
-
self.embedding_engine
|
|
136
|
-
|
|
137
|
-
self.Tools_dict["embedding"]["model"])
|
|
143
|
+
_register(self.embedding_engine, "embedding")
|
|
144
|
+
|
|
138
145
|
self.knowledge_base = KnowledgeBase(
|
|
139
146
|
embedding_engine=self.embedding_engine,
|
|
140
147
|
Tools_dict=self.Tools_dict,
|
|
@@ -150,6 +157,7 @@ class Worker:
|
|
|
150
157
|
self.planner_history = []
|
|
151
158
|
self.latest_action = None
|
|
152
159
|
self.max_trajector_length = 8
|
|
160
|
+
self.task_id = None # Will be set by agent
|
|
153
161
|
|
|
154
162
|
def generate_next_action(
|
|
155
163
|
self,
|
|
@@ -163,7 +171,24 @@ class Worker:
|
|
|
163
171
|
running_state: str = "running",
|
|
164
172
|
) -> Dict:
|
|
165
173
|
"""
|
|
166
|
-
|
|
174
|
+
Generate the next executor action plan and related metadata for the current subtask given the observation and context.
|
|
175
|
+
|
|
176
|
+
Parameters:
|
|
177
|
+
Tu (str): Full task description or task context.
|
|
178
|
+
search_query (str): Search string used for retrieving episodic/subtask experience.
|
|
179
|
+
subtask (str): Current subtask instruction/description to complete.
|
|
180
|
+
subtask_info (str): Additional information or constraints for the current subtask.
|
|
181
|
+
future_tasks (List[Node]): List of upcoming task nodes (used for context in planning).
|
|
182
|
+
done_task (List[Node]): List of completed task nodes.
|
|
183
|
+
obs (Dict): Current observation dictionary; must include a "screenshot" key with the current screen image.
|
|
184
|
+
running_state (str): Current executor running state (default "running").
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Dict: Executor information containing:
|
|
188
|
+
- "current_subtask" (str): The provided subtask.
|
|
189
|
+
- "current_subtask_info" (str): The provided subtask_info.
|
|
190
|
+
- "executor_plan" (str): The raw plan produced by the action generator.
|
|
191
|
+
- "reflection" (str|None): Reflection text produced by the trajectory reflector, or None if reflection is disabled.
|
|
167
192
|
"""
|
|
168
193
|
import time
|
|
169
194
|
action_start = time.time()
|
|
@@ -351,4 +376,4 @@ class Worker:
|
|
|
351
376
|
# Cut off extra grounded actions
|
|
352
377
|
res = res[:res.find("(Grounded Action)")]
|
|
353
378
|
res += f"(Grounded Action)\n```python\n{action}\n```\n"
|
|
354
|
-
return res
|
|
379
|
+
return res
|
gui_agents/cli_app.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
import datetime
|
|
3
|
-
import io
|
|
4
2
|
import logging
|
|
5
3
|
import os
|
|
6
4
|
import platform
|
|
7
5
|
import sys
|
|
8
|
-
import time
|
|
9
6
|
import datetime
|
|
10
7
|
from pathlib import Path
|
|
11
8
|
from dotenv import load_dotenv
|
|
12
9
|
|
|
10
|
+
from gui_agents.agents.Backend.LybicBackend import LybicBackend
|
|
11
|
+
|
|
13
12
|
env_path = Path(os.path.dirname(os.path.abspath(__file__))) / '.env'
|
|
14
13
|
if env_path.exists():
|
|
15
14
|
load_dotenv(dotenv_path=env_path)
|
|
@@ -260,6 +259,18 @@ def scale_screenshot_dimensions(screenshot: Image.Image, hwi_para: HardwareInter
|
|
|
260
259
|
return screenshot
|
|
261
260
|
|
|
262
261
|
def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_steps: int = 50, enable_takeover: bool = False):
|
|
262
|
+
"""
|
|
263
|
+
Run an agent in normal mode to iteratively observe, plan, and execute actions for a given instruction.
|
|
264
|
+
|
|
265
|
+
Runs up to `max_steps` iterations: captures screenshots, obtains observations, asks the agent for a plan, executes hardware actions, and updates trajectory and memories until the agent signals completion or failure. The function also supports pausing for user takeover and performs post-run timing logging and automatic analysis.
|
|
266
|
+
|
|
267
|
+
Parameters:
|
|
268
|
+
agent: The agent instance used to generate plans and reflections (expects an object exposing `predict`, `update_episodic_memory`, and `update_narrative_memory`).
|
|
269
|
+
instruction (str): The high-level task description provided to the agent.
|
|
270
|
+
hwi_para (HardwareInterface): Hardware interface used to capture screenshots and dispatch actions.
|
|
271
|
+
max_steps (int): Maximum number of agent prediction/execute cycles to run.
|
|
272
|
+
enable_takeover (bool): If True, the agent may request a user takeover that pauses execution until the user resumes.
|
|
273
|
+
"""
|
|
263
274
|
import time
|
|
264
275
|
obs = {}
|
|
265
276
|
traj = "Task:\n" + instruction
|
|
@@ -302,7 +313,7 @@ def run_agent_normal(agent, instruction: str, hwi_para: HardwareInterface, max_s
|
|
|
302
313
|
os.system(
|
|
303
314
|
f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent" buttons "OK" default button "OK"\''
|
|
304
315
|
)
|
|
305
|
-
elif platform.system() == "Linux":
|
|
316
|
+
elif platform.system() == "Linux" and not (hwi_para.backend== "lybic" or isinstance(hwi_para.backend, LybicBackend)):
|
|
306
317
|
os.system(
|
|
307
318
|
f'zenity --info --title="OpenACI Agent" --text="Task Completed" --width=200 --height=100'
|
|
308
319
|
)
|
|
@@ -434,7 +445,7 @@ def run_agent_fast(agent,
|
|
|
434
445
|
os.system(
|
|
435
446
|
f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent (Fast)" buttons "OK" default button "OK"\''
|
|
436
447
|
)
|
|
437
|
-
elif platform.system() == "Linux":
|
|
448
|
+
elif platform.system() == "Linux" and not (hwi_para.backend== "lybic" or isinstance(hwi_para.backend, LybicBackend)):
|
|
438
449
|
os.system(
|
|
439
450
|
f'zenity --info --title="OpenACI Agent (Fast)" --text="Task Completed" --width=200 --height=100'
|
|
440
451
|
)
|