lybic-guiagents 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- gui_agents/__init__.py +1 -1
- gui_agents/agents/Backend/LybicBackend.py +25 -19
- gui_agents/agents/agent_s.py +292 -97
- gui_agents/agents/grounding.py +43 -6
- gui_agents/agents/manager.py +113 -18
- gui_agents/agents/stream_manager.py +163 -0
- gui_agents/agents/worker.py +60 -35
- gui_agents/cli_app.py +16 -5
- gui_agents/core/knowledge.py +36 -5
- gui_agents/grpc_app.py +784 -0
- gui_agents/proto/__init__.py +3 -0
- gui_agents/proto/pb/__init__.py +4 -0
- gui_agents/tools/model.md +351 -0
- gui_agents/tools/tools.py +80 -39
- gui_agents/tools/tools_config.json +101 -0
- gui_agents/tools/tools_config_cn.json +101 -0
- gui_agents/tools/tools_config_en.json +101 -0
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/METADATA +86 -8
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/RECORD +23 -16
- lybic_guiagents-0.3.0.dist-info/entry_points.txt +3 -0
- gui_agents/lybic_client/__init__.py +0 -0
- gui_agents/lybic_client/lybic_client.py +0 -88
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/WHEEL +0 -0
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {lybic_guiagents-0.2.2.dist-info → lybic_guiagents-0.3.0.dist-info}/top_level.txt +0 -0
gui_agents/agents/agent_s.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
@@ -19,9 +20,32 @@ from gui_agents.utils.common_utils import (
|
|
|
19
20
|
agent_log_to_string,
|
|
20
21
|
)
|
|
21
22
|
from gui_agents.tools.tools import Tools
|
|
23
|
+
from gui_agents.agents.stream_manager import stream_manager
|
|
22
24
|
|
|
23
25
|
logger = logging.getLogger("desktopenv.agent")
|
|
24
26
|
|
|
27
|
+
def load_config():
|
|
28
|
+
"""
|
|
29
|
+
Load tool configurations from the repository's tools/tools_config.json and produce a mapping keyed by tool name.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
tuple: (tools_config, tools_dict) where `tools_config` is the parsed JSON object from tools_config.json, and `tools_dict` is a dict mapping each tool's `tool_name` to a dict with `provider` and `model`.
|
|
33
|
+
"""
|
|
34
|
+
# Load tools configuration from tools_config.json
|
|
35
|
+
tools_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "tools", "tools_config.json")
|
|
36
|
+
with open(tools_config_path, "r") as f:
|
|
37
|
+
tools_config = json.load(f)
|
|
38
|
+
print(f"Loaded tools configuration from: {tools_config_path}")
|
|
39
|
+
tools_dict = {}
|
|
40
|
+
for tool in tools_config["tools"]:
|
|
41
|
+
tool_name = tool["tool_name"]
|
|
42
|
+
tools_dict[tool_name] = {
|
|
43
|
+
"provider": tool["provider"],
|
|
44
|
+
"model": tool["model_name"]
|
|
45
|
+
}
|
|
46
|
+
print(f"Tools configuration: {tools_dict}")
|
|
47
|
+
return tools_config,tools_dict
|
|
48
|
+
|
|
25
49
|
class UIAgent:
|
|
26
50
|
"""Base class for UI automation agents"""
|
|
27
51
|
|
|
@@ -37,18 +61,28 @@ class UIAgent:
|
|
|
37
61
|
self.platform = platform
|
|
38
62
|
|
|
39
63
|
def reset(self) -> None:
|
|
40
|
-
"""
|
|
64
|
+
"""
|
|
65
|
+
Reset the agent to its initial internal state.
|
|
66
|
+
|
|
67
|
+
Performs any subclass-specific reinitialization needed so the agent is ready to start a new task or episode.
|
|
68
|
+
"""
|
|
41
69
|
pass
|
|
42
70
|
|
|
43
|
-
def
|
|
44
|
-
"""
|
|
71
|
+
def _send_stream_message(self, task_id: str, stage: str, message: str) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Safely send stream message to task stream.
|
|
74
|
+
"""
|
|
75
|
+
if not task_id:
|
|
76
|
+
return
|
|
45
77
|
|
|
46
|
-
|
|
47
|
-
instruction: Natural language instruction
|
|
48
|
-
observation: Current UI state observation
|
|
78
|
+
stream_manager.add_message_threadsafe(task_id, stage, message)
|
|
49
79
|
|
|
80
|
+
def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]|None:
|
|
81
|
+
"""
|
|
82
|
+
Produce the next agent information and action sequence for the given instruction and current observation.
|
|
83
|
+
|
|
50
84
|
Returns:
|
|
51
|
-
|
|
85
|
+
(info, actions) where `info` is a dictionary containing planner, executor and evaluator metadata (including subtask metadata and statuses) and `actions` is a list of action strings to execute; returns `None` if no prediction is available.
|
|
52
86
|
"""
|
|
53
87
|
pass
|
|
54
88
|
|
|
@@ -84,16 +118,15 @@ class AgentS2(UIAgent):
|
|
|
84
118
|
kb_release_tag: str = "v0.2.2",
|
|
85
119
|
enable_takeover: bool = False,
|
|
86
120
|
enable_search: bool = True,
|
|
121
|
+
tools_config: dict | None = None,
|
|
87
122
|
):
|
|
88
|
-
"""
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
enable_takeover: Whether to enable user takeover functionality. Defaults to False.
|
|
96
|
-
enable_search: Whether to enable web search functionality. Defaults to True.
|
|
123
|
+
"""
|
|
124
|
+
Initialize an AgentS2 instance and prepare its tools and local knowledge base.
|
|
125
|
+
|
|
126
|
+
If `tools_config` is provided, build `Tools_dict` mapping each `tool_name` to its config (renaming `model_name` to `model` and removing `tool_name`). If `tools_config` is not provided, load configuration via `load_config()`. Ensure a platform-specific knowledge base directory exists under `memory_root_path/memory_folder_name` (creating it if missing). Sets initial attributes (platform, screen_size, memory paths, flags) and initializes internal state via `reset()`.
|
|
127
|
+
|
|
128
|
+
Parameters:
|
|
129
|
+
tools_config (dict | None): Optional pre-loaded tools configuration; when present it is transformed into `Tools_dict`. Omit to load configuration from disk.
|
|
97
130
|
"""
|
|
98
131
|
super().__init__(
|
|
99
132
|
platform,
|
|
@@ -105,20 +138,24 @@ class AgentS2(UIAgent):
|
|
|
105
138
|
self.screen_size = screen_size
|
|
106
139
|
self.enable_takeover = enable_takeover
|
|
107
140
|
self.enable_search = enable_search
|
|
141
|
+
self.task_id = None # Will be set when task starts
|
|
108
142
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
self.tools_config = json.load(f)
|
|
113
|
-
print(f"Loaded tools configuration from: {tools_config_path}")
|
|
143
|
+
if tools_config is not None:
|
|
144
|
+
self.tools_config = tools_config
|
|
145
|
+
# Create the dictionary mapping from the list-based config
|
|
114
146
|
self.Tools_dict = {}
|
|
115
147
|
for tool in self.tools_config["tools"]:
|
|
116
148
|
tool_name = tool["tool_name"]
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
149
|
+
# Create a copy of the tool's config to avoid modifying the original
|
|
150
|
+
config_copy = tool.copy()
|
|
151
|
+
# Rename 'model_name' to 'model' for consistency in downstream use
|
|
152
|
+
if 'model_name' in config_copy:
|
|
153
|
+
config_copy['model'] = config_copy.pop('model_name')
|
|
154
|
+
# Remove tool_name as it's now the key
|
|
155
|
+
config_copy.pop('tool_name', None)
|
|
156
|
+
self.Tools_dict[tool_name] = config_copy
|
|
157
|
+
else:
|
|
158
|
+
self.tools_config, self.Tools_dict = load_config()
|
|
122
159
|
|
|
123
160
|
# Initialize agent's knowledge base path
|
|
124
161
|
self.local_kb_path = os.path.join(
|
|
@@ -138,16 +175,22 @@ class AgentS2(UIAgent):
|
|
|
138
175
|
self.reset()
|
|
139
176
|
|
|
140
177
|
def reset(self) -> None:
|
|
141
|
-
"""
|
|
142
|
-
|
|
178
|
+
"""
|
|
179
|
+
Reinitialize core components and reset the agent's runtime state.
|
|
143
180
|
|
|
181
|
+
Recreates the Manager, Worker, and Grounding components using the agent's current configuration,
|
|
182
|
+
resets planning/execution flags and counters, clears subtask-related state, reloads the shared
|
|
183
|
+
global state from the registry, and propagates the agent's task_id to the components when present.
|
|
184
|
+
"""
|
|
185
|
+
# Initialize core components
|
|
186
|
+
|
|
144
187
|
self.manager = Manager(
|
|
145
188
|
Tools_dict=self.Tools_dict,
|
|
146
189
|
local_kb_path=self.local_kb_path,
|
|
147
190
|
platform=self.platform,
|
|
148
191
|
enable_search=self.enable_search, # Pass global switch to Manager
|
|
149
192
|
)
|
|
150
|
-
|
|
193
|
+
|
|
151
194
|
self.worker = Worker(
|
|
152
195
|
Tools_dict=self.Tools_dict,
|
|
153
196
|
local_kb_path=self.local_kb_path,
|
|
@@ -178,6 +221,25 @@ class AgentS2(UIAgent):
|
|
|
178
221
|
self.subtask_status: str = "Start"
|
|
179
222
|
self.global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
|
|
180
223
|
|
|
224
|
+
# Pass task_id to components
|
|
225
|
+
if self.task_id:
|
|
226
|
+
self.manager.task_id = self.task_id
|
|
227
|
+
self.worker.task_id = self.task_id
|
|
228
|
+
|
|
229
|
+
def set_task_id(self, task_id: str) -> None:
|
|
230
|
+
"""
|
|
231
|
+
Set the task identifier and propagate it to internal components used for streaming.
|
|
232
|
+
|
|
233
|
+
Parameters:
|
|
234
|
+
task_id (str): Identifier for the current task; assigned to this agent and, if present, to its manager and worker so stream messages are tagged consistently.
|
|
235
|
+
"""
|
|
236
|
+
self.task_id = task_id
|
|
237
|
+
# Also set task_id for components if they exist
|
|
238
|
+
if hasattr(self, 'manager') and self.manager:
|
|
239
|
+
self.manager.task_id = task_id
|
|
240
|
+
if hasattr(self, 'worker') and self.worker:
|
|
241
|
+
self.worker.task_id = task_id
|
|
242
|
+
|
|
181
243
|
def reset_executor_state(self) -> None:
|
|
182
244
|
"""Reset executor and step counter"""
|
|
183
245
|
self.worker.reset()
|
|
@@ -185,6 +247,19 @@ class AgentS2(UIAgent):
|
|
|
185
247
|
|
|
186
248
|
def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
|
|
187
249
|
# Initialize the three info dictionaries
|
|
250
|
+
"""
|
|
251
|
+
Produce the next executor actions and diagnostic information for the current task step.
|
|
252
|
+
|
|
253
|
+
This method coordinates planning, subtask selection, action generation, grounding (code extraction and execution), and status updates. It may trigger replanning, advance to the next subtask, mark subtasks as completed or failed, and emit stream messages and logs. The returned info merges planner, executor, and evaluator metadata and includes current subtask details.
|
|
254
|
+
|
|
255
|
+
Parameters:
|
|
256
|
+
instruction (str): The user or system instruction describing the task to accomplish; forwarded to the manager/worker as the task utterance.
|
|
257
|
+
observation (Dict): Current environment observation/state used for grounding and coordinate assignment.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
info (Dict): A merged dictionary containing planner_info, executor_info, evaluator_info and the keys `subtask`, `subtask_info`, and `subtask_status`.
|
|
261
|
+
actions (List[Dict]): List of action dictionaries produced for execution (may include actions with type "DONE", failure indicators, or other executor-generated actions).
|
|
262
|
+
"""
|
|
188
263
|
planner_info = {}
|
|
189
264
|
executor_info = {}
|
|
190
265
|
evaluator_info = {
|
|
@@ -209,6 +284,10 @@ class AgentS2(UIAgent):
|
|
|
209
284
|
# If replan is true, generate a new plan. True at start, after a failed plan, or after subtask completion
|
|
210
285
|
if self.requires_replan:
|
|
211
286
|
logger.info("(RE)PLANNING...")
|
|
287
|
+
|
|
288
|
+
# Stream planning start message
|
|
289
|
+
self._send_stream_message(self.task_id, "planning", f"Start planning task steps (Step {self.step_count + 1})...")
|
|
290
|
+
|
|
212
291
|
Manager_info, self.subtasks = self.manager.get_action_queue(
|
|
213
292
|
Tu=self.global_state.get_Tu(),
|
|
214
293
|
observation=self.global_state.get_obs_for_manager(),
|
|
@@ -224,6 +303,9 @@ class AgentS2(UIAgent):
|
|
|
224
303
|
self.search_query = Manager_info["search_query"]
|
|
225
304
|
else:
|
|
226
305
|
self.search_query = ""
|
|
306
|
+
|
|
307
|
+
# Stream planning completion message
|
|
308
|
+
self._send_stream_message(self.task_id, "planning", f"Planning completed, {len(self.subtasks)} subtasks generated")
|
|
227
309
|
get_action_queue_time = time.time() - manager_start
|
|
228
310
|
logger.info(f"[Timing] manager.get_action_queue execution time: {get_action_queue_time:.2f} seconds")
|
|
229
311
|
self.global_state.log_operation(
|
|
@@ -253,8 +335,10 @@ class AgentS2(UIAgent):
|
|
|
253
335
|
"reflection": "agent.done()",
|
|
254
336
|
}
|
|
255
337
|
actions = [{"type": "DONE"}]
|
|
256
|
-
|
|
257
|
-
#
|
|
338
|
+
|
|
339
|
+
# Stream task completion message
|
|
340
|
+
self._send_stream_message(self.task_id, "completion", "🎉 Mission Completed! All subtasks have been successfully executed")
|
|
341
|
+
|
|
258
342
|
self.global_state.log_operation(
|
|
259
343
|
module="agent",
|
|
260
344
|
operation="task_complete",
|
|
@@ -272,40 +356,58 @@ class AgentS2(UIAgent):
|
|
|
272
356
|
logger.info(f"REMAINING SUBTASKS FROM GLOBAL STATE: {self.global_state.get_remaining_subtasks()}")
|
|
273
357
|
self.needs_next_subtask = False
|
|
274
358
|
self.subtask_status = "Start"
|
|
275
|
-
|
|
359
|
+
|
|
360
|
+
# Stream current subtask message
|
|
361
|
+
if self.current_subtask is not None:
|
|
362
|
+
self._send_stream_message(self.task_id, "subtask", f"Start executing subtasks: {self.current_subtask.name}")
|
|
363
|
+
else:
|
|
364
|
+
self._send_stream_message(self.task_id, "subtask", "Start executing a new subtask")
|
|
365
|
+
|
|
276
366
|
self.global_state.log_operation(
|
|
277
367
|
module="agent",
|
|
278
368
|
operation="current_subtask",
|
|
279
369
|
data={
|
|
280
|
-
"content": str(self.current_subtask),
|
|
370
|
+
"content": str(self.current_subtask) if self.current_subtask is not None else "No active subtask",
|
|
281
371
|
"status": "start"
|
|
282
372
|
}
|
|
283
373
|
)
|
|
284
374
|
|
|
285
375
|
worker_start_time = time.time()
|
|
286
|
-
|
|
376
|
+
|
|
377
|
+
# Stream action generation start message
|
|
378
|
+
self._send_stream_message(self.task_id, "thinking", "Generating execution actions...")
|
|
379
|
+
|
|
287
380
|
# get the next action from the worker
|
|
381
|
+
# Handle case where current_subtask might be None
|
|
382
|
+
subtask_name = self.current_subtask.name if self.current_subtask is not None else "No active subtask"
|
|
383
|
+
subtask_info = self.current_subtask.info if self.current_subtask is not None else ""
|
|
384
|
+
|
|
288
385
|
executor_info = self.worker.generate_next_action(
|
|
289
386
|
Tu=instruction,
|
|
290
387
|
search_query=self.search_query,
|
|
291
|
-
subtask=
|
|
292
|
-
subtask_info=
|
|
388
|
+
subtask=subtask_name,
|
|
389
|
+
subtask_info=subtask_info,
|
|
293
390
|
future_tasks=self.global_state.get_remaining_subtasks(),
|
|
294
391
|
done_task=self.global_state.get_completed_subtasks(),
|
|
295
392
|
obs=self.global_state.get_obs_for_manager(),
|
|
296
393
|
)
|
|
297
|
-
|
|
394
|
+
|
|
298
395
|
worker_execution_time = time.time() - worker_start_time
|
|
299
|
-
|
|
396
|
+
|
|
300
397
|
self.global_state.log_operation(
|
|
301
398
|
module="agent",
|
|
302
399
|
operation="worker_execution",
|
|
303
400
|
data={
|
|
304
401
|
"duration": worker_execution_time,
|
|
305
|
-
"subtask": self.current_subtask.name # type: ignore
|
|
402
|
+
"subtask": self.current_subtask.name if self.current_subtask is not None else "No active subtask" # type: ignore
|
|
306
403
|
}
|
|
307
404
|
)
|
|
308
405
|
|
|
406
|
+
# Stream action plan message
|
|
407
|
+
if self.task_id and "executor_plan" in executor_info:
|
|
408
|
+
plan_preview = executor_info["executor_plan"][:100] + "..." if len(executor_info["executor_plan"]) > 100 else executor_info["executor_plan"]
|
|
409
|
+
self._send_stream_message(self.task_id, "action_plan", f"Generate an execution plan: {plan_preview}")
|
|
410
|
+
|
|
309
411
|
try:
|
|
310
412
|
grounding_start_time = time.time()
|
|
311
413
|
current_width, current_height = self.global_state.get_screen_size()
|
|
@@ -345,6 +447,11 @@ class AgentS2(UIAgent):
|
|
|
345
447
|
|
|
346
448
|
actions = [exec_code]
|
|
347
449
|
|
|
450
|
+
# Stream action execution message
|
|
451
|
+
if actions:
|
|
452
|
+
action_type = actions[0].get("type", "unknown")
|
|
453
|
+
self._send_stream_message(self.task_id, "action", f"Execute an action: {action_type}")
|
|
454
|
+
|
|
348
455
|
self.step_count += 1
|
|
349
456
|
|
|
350
457
|
# set the should_send_action flag to True if the executor returns an action
|
|
@@ -356,15 +463,22 @@ class AgentS2(UIAgent):
|
|
|
356
463
|
self.needs_next_subtask = True
|
|
357
464
|
|
|
358
465
|
# assign the failed subtask
|
|
359
|
-
self.
|
|
466
|
+
if self.current_subtask is not None:
|
|
467
|
+
self.global_state.add_failed_subtask(self.current_subtask) # type: ignore
|
|
360
468
|
self.failure_subtask = self.global_state.get_latest_failed_subtask()
|
|
361
|
-
|
|
469
|
+
|
|
470
|
+
# Stream failure message
|
|
471
|
+
if self.current_subtask is not None:
|
|
472
|
+
self._send_stream_message(self.task_id, "error", f"Subtask execution failed: {self.current_subtask.name}, will re-plan")
|
|
473
|
+
else:
|
|
474
|
+
self._send_stream_message(self.task_id, "error", "Subtask execution failed and will be re-planned")
|
|
475
|
+
|
|
362
476
|
# 记录失败的子任务
|
|
363
477
|
self.global_state.log_operation(
|
|
364
478
|
module="agent",
|
|
365
479
|
operation="subtask_failed",
|
|
366
480
|
data={
|
|
367
|
-
"content": str(self.current_subtask),
|
|
481
|
+
"content": str(self.current_subtask) if self.current_subtask is not None else "Unknown subtask",
|
|
368
482
|
"status": "failed"
|
|
369
483
|
}
|
|
370
484
|
)
|
|
@@ -381,14 +495,22 @@ class AgentS2(UIAgent):
|
|
|
381
495
|
self.requires_replan = True
|
|
382
496
|
self.needs_next_subtask = True
|
|
383
497
|
self.failure_subtask = None
|
|
384
|
-
|
|
385
|
-
|
|
498
|
+
# add completed subtask only if it exists
|
|
499
|
+
if self.current_subtask is not None:
|
|
500
|
+
self.global_state.add_completed_subtask(self.current_subtask) # type: ignore
|
|
501
|
+
|
|
502
|
+
# Stream subtask completion message
|
|
503
|
+
if self.current_subtask is not None:
|
|
504
|
+
self._send_stream_message(self.task_id, "subtask_complete", f"✅ Subtask completed: {self.current_subtask.name}")
|
|
505
|
+
else:
|
|
506
|
+
self._send_stream_message(self.task_id, "subtask_complete", "✅ Subtask completed")
|
|
507
|
+
|
|
386
508
|
# 记录完成的子任务
|
|
387
509
|
self.global_state.log_operation(
|
|
388
510
|
module="agent",
|
|
389
511
|
operation="subtask_completed",
|
|
390
512
|
data={
|
|
391
|
-
"content": str(self.current_subtask),
|
|
513
|
+
"content": str(self.current_subtask) if self.current_subtask is not None else "Unknown subtask",
|
|
392
514
|
"status": "completed"
|
|
393
515
|
}
|
|
394
516
|
)
|
|
@@ -414,13 +536,24 @@ class AgentS2(UIAgent):
|
|
|
414
536
|
for k, v in d.items()
|
|
415
537
|
}
|
|
416
538
|
}
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
539
|
+
# Handle case where current_subtask might be None
|
|
540
|
+
if self.current_subtask is not None:
|
|
541
|
+
info.update(
|
|
542
|
+
{
|
|
543
|
+
"subtask": self.current_subtask.name, # type: ignore
|
|
544
|
+
"subtask_info": self.current_subtask.info, # type: ignore
|
|
545
|
+
"subtask_status": self.subtask_status,
|
|
546
|
+
}
|
|
547
|
+
)
|
|
548
|
+
else:
|
|
549
|
+
# Handle None case - provide default values
|
|
550
|
+
info.update(
|
|
551
|
+
{
|
|
552
|
+
"subtask": "No active subtask",
|
|
553
|
+
"subtask_info": "",
|
|
554
|
+
"subtask_status": "no_subtask",
|
|
555
|
+
}
|
|
556
|
+
)
|
|
424
557
|
|
|
425
558
|
# 记录predict函数总执行时间
|
|
426
559
|
predict_total_time = time.time() - predict_start_time
|
|
@@ -538,18 +671,23 @@ class AgentSFast(UIAgent):
|
|
|
538
671
|
enable_takeover: bool = False,
|
|
539
672
|
enable_search: bool = True,
|
|
540
673
|
enable_reflection: bool = True,
|
|
674
|
+
tools_config: dict | None = None,
|
|
541
675
|
# enable_reflection: bool = False,
|
|
542
676
|
):
|
|
543
|
-
"""
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
677
|
+
"""
|
|
678
|
+
Create and initialize an AgentSFast instance, configuring tools, memory paths, and optional features.
|
|
679
|
+
|
|
680
|
+
Parameters:
|
|
681
|
+
platform (str): Operating system platform identifier (e.g., "darwin", "linux", "windows"); used to scope platform-specific knowledge base.
|
|
682
|
+
screen_size (List[int]): Screen width and height used for grounding calculations.
|
|
683
|
+
memory_root_path (str): Root directory for agent memory storage.
|
|
684
|
+
memory_folder_name (str): Subfolder name under memory_root_path for this agent's knowledge base.
|
|
685
|
+
kb_release_tag (str): Knowledge base release tag used for bookkeeping or compatibility.
|
|
686
|
+
enable_takeover (bool): If True, enable user takeover capabilities in the fast action generator.
|
|
687
|
+
enable_search (bool): If True, enable web/search-related features when registering tools.
|
|
688
|
+
enable_reflection (bool): If True, enable trajectory reflection and a reflection agent to summarize agent behavior.
|
|
689
|
+
tools_config (dict | None): Optional pre-loaded tools configuration; if omitted, configuration is loaded from disk.
|
|
690
|
+
|
|
553
691
|
"""
|
|
554
692
|
super().__init__(
|
|
555
693
|
platform,
|
|
@@ -562,20 +700,24 @@ class AgentSFast(UIAgent):
|
|
|
562
700
|
self.enable_takeover = enable_takeover
|
|
563
701
|
self.enable_search = enable_search
|
|
564
702
|
self.enable_reflection = enable_reflection
|
|
703
|
+
self.task_id = None # Will be set when task starts
|
|
565
704
|
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
self.tools_config = json.load(f)
|
|
570
|
-
print(f"Loaded tools configuration from: {tools_config_path}")
|
|
705
|
+
if tools_config is not None:
|
|
706
|
+
self.tools_config = tools_config
|
|
707
|
+
# Create the dictionary mapping from the list-based config
|
|
571
708
|
self.Tools_dict = {}
|
|
572
709
|
for tool in self.tools_config["tools"]:
|
|
573
710
|
tool_name = tool["tool_name"]
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
711
|
+
# Create a copy of the tool's config to avoid modifying the original
|
|
712
|
+
config_copy = tool.copy()
|
|
713
|
+
# Rename 'model_name' to 'model' for consistency in downstream use
|
|
714
|
+
if 'model_name' in config_copy:
|
|
715
|
+
config_copy['model'] = config_copy.pop('model_name')
|
|
716
|
+
# Remove tool_name as it's now the key
|
|
717
|
+
config_copy.pop('tool_name', None)
|
|
718
|
+
self.Tools_dict[tool_name] = config_copy
|
|
719
|
+
else:
|
|
720
|
+
self.tools_config, self.Tools_dict = load_config()
|
|
579
721
|
|
|
580
722
|
# Initialize agent's knowledge base path
|
|
581
723
|
self.local_kb_path = os.path.join(
|
|
@@ -594,21 +736,25 @@ class AgentSFast(UIAgent):
|
|
|
594
736
|
self.reset()
|
|
595
737
|
|
|
596
738
|
def reset(self) -> None:
|
|
597
|
-
"""
|
|
739
|
+
"""
|
|
740
|
+
Reinitialize the fast-agent components and reset internal runtime state.
|
|
741
|
+
|
|
742
|
+
Initializes and registers the fast action generator tool (and traj_reflector if reflection is enabled), configures search/auth parameters from tool configuration, creates or updates the grounding subsystem with resolved grounding dimensions, resets counters and runtime references (step_count, turn_count, latest_action, global_state), and propagates the current task_id to any registered tools.
|
|
743
|
+
"""
|
|
598
744
|
# Initialize the fast action generator tool
|
|
599
745
|
self.fast_action_generator = Tools()
|
|
600
746
|
self.fast_action_generator_tool = "fast_action_generator_with_takeover" if self.enable_takeover else "fast_action_generator"
|
|
601
|
-
|
|
747
|
+
|
|
602
748
|
# Get tool configuration from tools_config
|
|
603
749
|
tool_config = None
|
|
604
750
|
for tool in self.tools_config["tools"]:
|
|
605
751
|
if tool["tool_name"] == self.fast_action_generator_tool:
|
|
606
752
|
tool_config = tool
|
|
607
753
|
break
|
|
608
|
-
|
|
754
|
+
|
|
609
755
|
# Prepare tool parameters
|
|
610
756
|
tool_params = {}
|
|
611
|
-
|
|
757
|
+
|
|
612
758
|
# First check global search switch
|
|
613
759
|
if not self.enable_search:
|
|
614
760
|
# If global search is disabled, force disable search for this tool
|
|
@@ -622,15 +768,28 @@ class AgentSFast(UIAgent):
|
|
|
622
768
|
tool_params["enable_search"] = enable_search
|
|
623
769
|
tool_params["search_provider"] = tool_config.get("search_provider", "bocha")
|
|
624
770
|
tool_params["search_model"] = tool_config.get("search_model", "")
|
|
625
|
-
|
|
771
|
+
|
|
626
772
|
logger.info(f"Configuring {self.fast_action_generator_tool} with search enabled: {enable_search} (from config)")
|
|
627
|
-
|
|
628
|
-
#
|
|
773
|
+
|
|
774
|
+
# Get base config from Tools_dict
|
|
775
|
+
tool_config = self.Tools_dict[self.fast_action_generator_tool].copy()
|
|
776
|
+
provider = tool_config.get("provider")
|
|
777
|
+
model = tool_config.get("model")
|
|
778
|
+
|
|
779
|
+
# Merge with search-related parameters
|
|
780
|
+
all_params = {**tool_config, **tool_params}
|
|
781
|
+
|
|
782
|
+
auth_keys = ['api_key', 'base_url', 'endpoint_url', 'azure_endpoint', 'api_version']
|
|
783
|
+
for key in auth_keys:
|
|
784
|
+
if key in all_params:
|
|
785
|
+
logger.info(f"AgentSFast.reset: Setting {key} for fast_action_generator_tool")
|
|
786
|
+
|
|
787
|
+
# Register the tool with all parameters
|
|
629
788
|
self.fast_action_generator.register_tool(
|
|
630
|
-
self.fast_action_generator_tool,
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
**
|
|
789
|
+
self.fast_action_generator_tool,
|
|
790
|
+
provider,
|
|
791
|
+
model,
|
|
792
|
+
**all_params
|
|
634
793
|
)
|
|
635
794
|
|
|
636
795
|
if self.enable_reflection:
|
|
@@ -660,15 +819,38 @@ class AgentSFast(UIAgent):
|
|
|
660
819
|
self.global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
|
|
661
820
|
self.latest_action = None
|
|
662
821
|
|
|
663
|
-
|
|
664
|
-
|
|
822
|
+
# Pass task_id to tools if available
|
|
823
|
+
if self.task_id:
|
|
824
|
+
self.fast_action_generator.task_id = self.task_id
|
|
825
|
+
if self.enable_reflection and hasattr(self, 'reflection_agent'):
|
|
826
|
+
self.reflection_agent.task_id = self.task_id
|
|
665
827
|
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
828
|
+
def set_task_id(self, task_id: str) -> None:
|
|
829
|
+
"""
|
|
830
|
+
Store the task identifier on the agent and propagate it to subcomponents that use it.
|
|
831
|
+
|
|
832
|
+
Parameters:
|
|
833
|
+
task_id (str): Identifier for the active task; assigned to this agent and, if present, to
|
|
834
|
+
`fast_action_generator` and `reflection_agent`.
|
|
835
|
+
"""
|
|
836
|
+
self.task_id = task_id
|
|
837
|
+
# Also set task_id for components if they exist
|
|
838
|
+
if hasattr(self, 'fast_action_generator') and self.fast_action_generator:
|
|
839
|
+
self.fast_action_generator.task_id = task_id
|
|
840
|
+
if hasattr(self, 'reflection_agent') and self.reflection_agent:
|
|
841
|
+
self.reflection_agent.task_id = task_id
|
|
669
842
|
|
|
843
|
+
def predict(self, instruction: str, observation: Dict) -> Tuple[Dict, List[str]]:
|
|
844
|
+
"""
|
|
845
|
+
Generate the next executor plan and corresponding actions using the configured fast action generator.
|
|
846
|
+
|
|
847
|
+
Parameters:
|
|
848
|
+
instruction (str): Natural language task description.
|
|
849
|
+
observation (Dict): Current UI state; must include a "screenshot" entry with the screen image.
|
|
850
|
+
|
|
670
851
|
Returns:
|
|
671
|
-
|
|
852
|
+
executor_info (dict): Contains at least the keys `executor_plan` (raw plan text), `reflection` (reflection text or empty string), and `plan_code` (the latest extracted/used action code).
|
|
853
|
+
actions (List[dict]): List of action dictionaries produced by grounding execution; typically a single action dict describing the operation to perform.
|
|
672
854
|
"""
|
|
673
855
|
import time
|
|
674
856
|
predict_start_time = time.time()
|
|
@@ -725,11 +907,14 @@ class AgentSFast(UIAgent):
|
|
|
725
907
|
generator_message = textwrap.dedent(f"""
|
|
726
908
|
Task Description: {instruction}
|
|
727
909
|
""")
|
|
728
|
-
|
|
910
|
+
|
|
729
911
|
generator_message += f"\n\nPlease refer to the agent log to understand the progress and context of the task so far.\n{agent_log}"
|
|
730
912
|
|
|
731
913
|
fast_action_start_time = time.time()
|
|
732
|
-
|
|
914
|
+
|
|
915
|
+
# Stream action generation start message
|
|
916
|
+
self._send_stream_message(self.task_id, "thinking", "Generating execution actions quickly...")
|
|
917
|
+
|
|
733
918
|
plan, total_tokens, cost_string = self.fast_action_generator.execute_tool(
|
|
734
919
|
self.fast_action_generator_tool,
|
|
735
920
|
{
|
|
@@ -738,9 +923,9 @@ class AgentSFast(UIAgent):
|
|
|
738
923
|
}
|
|
739
924
|
)
|
|
740
925
|
self.fast_action_generator.reset(self.fast_action_generator_tool)
|
|
741
|
-
|
|
926
|
+
|
|
742
927
|
fast_action_execution_time = time.time() - fast_action_start_time
|
|
743
|
-
|
|
928
|
+
|
|
744
929
|
self.global_state.log_operation(
|
|
745
930
|
module="agent",
|
|
746
931
|
operation="fast_action_execution",
|
|
@@ -750,7 +935,12 @@ class AgentSFast(UIAgent):
|
|
|
750
935
|
"cost": cost_string
|
|
751
936
|
}
|
|
752
937
|
)
|
|
753
|
-
|
|
938
|
+
|
|
939
|
+
# Stream action plan message
|
|
940
|
+
if self.task_id:
|
|
941
|
+
plan_preview = plan[:100] + "..." if len(plan) > 100 else plan
|
|
942
|
+
self._send_stream_message(self.task_id, "action_plan", f"Quickly generate execution plans: {plan_preview}")
|
|
943
|
+
|
|
754
944
|
logger.info("Fast Action Plan: %s", plan)
|
|
755
945
|
|
|
756
946
|
current_width, current_height = self.global_state.get_screen_size()
|
|
@@ -809,13 +999,18 @@ class AgentSFast(UIAgent):
|
|
|
809
999
|
|
|
810
1000
|
self.step_count += 1
|
|
811
1001
|
self.turn_count += 1
|
|
812
|
-
|
|
1002
|
+
|
|
1003
|
+
# Stream action execution message
|
|
1004
|
+
if actions:
|
|
1005
|
+
action_type = actions[0].get("type", "unknown")
|
|
1006
|
+
self._send_stream_message(self.task_id, "action", f"Execute an action: {action_type}")
|
|
1007
|
+
|
|
813
1008
|
executor_info = {
|
|
814
1009
|
"executor_plan": plan,
|
|
815
1010
|
"reflection": reflection or "",
|
|
816
1011
|
"plan_code": self.latest_action
|
|
817
1012
|
}
|
|
818
|
-
|
|
1013
|
+
|
|
819
1014
|
predict_total_time = time.time() - predict_start_time
|
|
820
1015
|
self.global_state.log_operation(
|
|
821
1016
|
module="agent",
|
|
@@ -827,4 +1022,4 @@ class AgentSFast(UIAgent):
|
|
|
827
1022
|
}
|
|
828
1023
|
)
|
|
829
1024
|
|
|
830
|
-
return executor_info, actions
|
|
1025
|
+
return executor_info, actions
|