lybic-guiagents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- desktop_env/__init__.py +1 -0
- desktop_env/actions.py +203 -0
- desktop_env/controllers/__init__.py +0 -0
- desktop_env/controllers/python.py +471 -0
- desktop_env/controllers/setup.py +882 -0
- desktop_env/desktop_env.py +509 -0
- desktop_env/evaluators/__init__.py +5 -0
- desktop_env/evaluators/getters/__init__.py +41 -0
- desktop_env/evaluators/getters/calc.py +15 -0
- desktop_env/evaluators/getters/chrome.py +1774 -0
- desktop_env/evaluators/getters/file.py +154 -0
- desktop_env/evaluators/getters/general.py +42 -0
- desktop_env/evaluators/getters/gimp.py +38 -0
- desktop_env/evaluators/getters/impress.py +126 -0
- desktop_env/evaluators/getters/info.py +24 -0
- desktop_env/evaluators/getters/misc.py +406 -0
- desktop_env/evaluators/getters/replay.py +20 -0
- desktop_env/evaluators/getters/vlc.py +86 -0
- desktop_env/evaluators/getters/vscode.py +35 -0
- desktop_env/evaluators/metrics/__init__.py +160 -0
- desktop_env/evaluators/metrics/basic_os.py +68 -0
- desktop_env/evaluators/metrics/chrome.py +493 -0
- desktop_env/evaluators/metrics/docs.py +1011 -0
- desktop_env/evaluators/metrics/general.py +665 -0
- desktop_env/evaluators/metrics/gimp.py +637 -0
- desktop_env/evaluators/metrics/libreoffice.py +28 -0
- desktop_env/evaluators/metrics/others.py +92 -0
- desktop_env/evaluators/metrics/pdf.py +31 -0
- desktop_env/evaluators/metrics/slides.py +957 -0
- desktop_env/evaluators/metrics/table.py +585 -0
- desktop_env/evaluators/metrics/thunderbird.py +176 -0
- desktop_env/evaluators/metrics/utils.py +719 -0
- desktop_env/evaluators/metrics/vlc.py +524 -0
- desktop_env/evaluators/metrics/vscode.py +283 -0
- desktop_env/providers/__init__.py +35 -0
- desktop_env/providers/aws/__init__.py +0 -0
- desktop_env/providers/aws/manager.py +278 -0
- desktop_env/providers/aws/provider.py +186 -0
- desktop_env/providers/aws/provider_with_proxy.py +315 -0
- desktop_env/providers/aws/proxy_pool.py +193 -0
- desktop_env/providers/azure/__init__.py +0 -0
- desktop_env/providers/azure/manager.py +87 -0
- desktop_env/providers/azure/provider.py +207 -0
- desktop_env/providers/base.py +97 -0
- desktop_env/providers/gcp/__init__.py +0 -0
- desktop_env/providers/gcp/manager.py +0 -0
- desktop_env/providers/gcp/provider.py +0 -0
- desktop_env/providers/virtualbox/__init__.py +0 -0
- desktop_env/providers/virtualbox/manager.py +463 -0
- desktop_env/providers/virtualbox/provider.py +124 -0
- desktop_env/providers/vmware/__init__.py +0 -0
- desktop_env/providers/vmware/manager.py +455 -0
- desktop_env/providers/vmware/provider.py +105 -0
- gui_agents/__init__.py +0 -0
- gui_agents/agents/Action.py +209 -0
- gui_agents/agents/__init__.py +0 -0
- gui_agents/agents/agent_s.py +832 -0
- gui_agents/agents/global_state.py +610 -0
- gui_agents/agents/grounding.py +651 -0
- gui_agents/agents/hardware_interface.py +129 -0
- gui_agents/agents/manager.py +568 -0
- gui_agents/agents/translator.py +132 -0
- gui_agents/agents/worker.py +355 -0
- gui_agents/cli_app.py +560 -0
- gui_agents/core/__init__.py +0 -0
- gui_agents/core/engine.py +1496 -0
- gui_agents/core/knowledge.py +449 -0
- gui_agents/core/mllm.py +555 -0
- gui_agents/tools/__init__.py +0 -0
- gui_agents/tools/tools.py +727 -0
- gui_agents/unit_test/__init__.py +0 -0
- gui_agents/unit_test/run_tests.py +65 -0
- gui_agents/unit_test/test_manager.py +330 -0
- gui_agents/unit_test/test_worker.py +269 -0
- gui_agents/utils/__init__.py +0 -0
- gui_agents/utils/analyze_display.py +301 -0
- gui_agents/utils/common_utils.py +263 -0
- gui_agents/utils/display_viewer.py +281 -0
- gui_agents/utils/embedding_manager.py +53 -0
- gui_agents/utils/image_axis_utils.py +27 -0
- lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
- lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
- lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
- lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
- lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pyautogui
|
|
4
|
+
from gui_agents.agents.Backend.Backend import Backend
|
|
5
|
+
from gui_agents.agents.Backend.ADBBackend import ADBBackend
|
|
6
|
+
from gui_agents.agents.Backend.LybicBackend import LybicBackend
|
|
7
|
+
from gui_agents.agents.Backend.PyAutoGUIBackend import PyAutoGUIBackend
|
|
8
|
+
from gui_agents.agents.Backend.PyAutoGUIVMwareBackend import PyAutoGUIVMwareBackend
|
|
9
|
+
"""hardware_interface.py ▸ Execute Action objects on real devices / emulators
|
|
10
|
+
===============================================================================
|
|
11
|
+
This module is the *single entry point* that upper‑layer planners / executors
|
|
12
|
+
use to perform UI operations. It is deliberately thin:
|
|
13
|
+
|
|
14
|
+
* Accepts one `Action` **or** a `List[Action]` (defined in *actions.py*).
|
|
15
|
+
* Delegates to a concrete *Backend* which knows how to translate the `Action`
|
|
16
|
+
into platform‑specific calls (PyAutoGUI, ADB, Lybic cloud device, …).
|
|
17
|
+
* Performs minimal capability checks + error propagation.
|
|
18
|
+
|
|
19
|
+
The default backend implemented here is **PyAutoGUIBackend**. Stubs for
|
|
20
|
+
**ADBBackend** and **LybicBackend** show how to extend the system.
|
|
21
|
+
|
|
22
|
+
--------------------------------------------------------------------------
|
|
23
|
+
Quick usage
|
|
24
|
+
--------------------------------------------------------------------------
|
|
25
|
+
```python
|
|
26
|
+
from actions import Click
|
|
27
|
+
from hardware_interface import HardwareInterface
|
|
28
|
+
|
|
29
|
+
hwi = HardwareInterface(backend="pyautogui")
|
|
30
|
+
|
|
31
|
+
# Single action
|
|
32
|
+
hwi.dispatch(Click(xy=(960, 540)))
|
|
33
|
+
|
|
34
|
+
# Batch
|
|
35
|
+
plan = [Click(xy=(100,200)), Click(xy=(300,400))]
|
|
36
|
+
hwi.dispatch(plan)
|
|
37
|
+
|
|
38
|
+
# actionDict
|
|
39
|
+
hwi.dispatchDict({"type": "Click", "xy": [200, 300]})
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from typing import List, Type, Dict, Set, Union
|
|
45
|
+
|
|
46
|
+
# Import your Action primitives
|
|
47
|
+
from gui_agents.agents.Action import (
|
|
48
|
+
Action,
|
|
49
|
+
Screenshot,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
"HardwareInterface",
|
|
54
|
+
"Backend",
|
|
55
|
+
"PyAutoGUIBackend",
|
|
56
|
+
"ADBBackend",
|
|
57
|
+
"LybicBackend",
|
|
58
|
+
"PyAutoGUIVMwareBackend",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Facade – single entry point
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
class HardwareInterface:
|
|
67
|
+
"""High‑level facade that routes Action objects to a chosen backend."""
|
|
68
|
+
|
|
69
|
+
BACKEND_MAP: Dict[str, Type[Backend]] = {
|
|
70
|
+
"pyautogui": PyAutoGUIBackend,
|
|
71
|
+
"adb": ADBBackend,
|
|
72
|
+
"lybic": LybicBackend,
|
|
73
|
+
"pyautogui_vmware": PyAutoGUIVMwareBackend,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# ------------------------------------------------------------------
|
|
77
|
+
def __init__(self, backend: str | Backend = "pyautogui", **backend_kwargs):
|
|
78
|
+
if isinstance(backend, Backend):
|
|
79
|
+
self.backend: Backend = backend
|
|
80
|
+
else:
|
|
81
|
+
key = backend.lower()
|
|
82
|
+
if key not in self.BACKEND_MAP:
|
|
83
|
+
raise ValueError(f"Unsupported backend '{backend}'. Available: {list(self.BACKEND_MAP)}")
|
|
84
|
+
self.backend = self.BACKEND_MAP[key](**backend_kwargs)
|
|
85
|
+
|
|
86
|
+
# ------------------------------------------------------------------
|
|
87
|
+
def dispatch(self, actions: Action | List[Action]):
|
|
88
|
+
"""Execute one or multiple actions *in order*.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
actions: `Action` instance or list thereof.
|
|
92
|
+
"""
|
|
93
|
+
if isinstance(actions, Action):
|
|
94
|
+
actions = [actions]
|
|
95
|
+
|
|
96
|
+
for act in actions:
|
|
97
|
+
# 特殊处理Memorize动作,不传递给后端执行
|
|
98
|
+
if type(act).__name__ == "Memorize":
|
|
99
|
+
return None
|
|
100
|
+
if not self.backend.supports(type(act)):
|
|
101
|
+
raise NotImplementedError(
|
|
102
|
+
f"{type(act).__name__} is not supported by backend {self.backend.__class__.__name__}"
|
|
103
|
+
)
|
|
104
|
+
if (not isinstance(actions, list)) or (len(actions)==1):
|
|
105
|
+
return self.backend.execute(act)
|
|
106
|
+
else:
|
|
107
|
+
self.backend.execute(act)
|
|
108
|
+
|
|
109
|
+
def dispatchDict(self, actionDict: Dict):
|
|
110
|
+
"""Execute one actions *in order*.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
actionDict: `Action` instance or list thereof.
|
|
114
|
+
"""
|
|
115
|
+
"""
|
|
116
|
+
Convenience helper – accept JSON-style dict(s) instead of Action objects.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
payload : Dict | List[Dict]
|
|
121
|
+
- Dict: single action, e.g. {"type": "Click", "xy": [100,200], ...}
|
|
122
|
+
- List: sequence of actions in the above format
|
|
123
|
+
"""
|
|
124
|
+
if isinstance(actionDict, list):
|
|
125
|
+
actions = [Action.from_dict(item) for item in actionDict]
|
|
126
|
+
else:
|
|
127
|
+
actions = Action.from_dict(actionDict)
|
|
128
|
+
|
|
129
|
+
return self.dispatch(actions)
|
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from typing import Dict, List, Optional, Tuple
|
|
5
|
+
import platform
|
|
6
|
+
|
|
7
|
+
from gui_agents.agents.grounding import ACI
|
|
8
|
+
from gui_agents.core.knowledge import KnowledgeBase
|
|
9
|
+
from gui_agents.agents.global_state import GlobalState
|
|
10
|
+
from gui_agents.store.registry import Registry
|
|
11
|
+
from gui_agents.utils.common_utils import (
|
|
12
|
+
Dag,
|
|
13
|
+
Node,
|
|
14
|
+
parse_dag,
|
|
15
|
+
agent_log_to_string,
|
|
16
|
+
)
|
|
17
|
+
from gui_agents.tools.tools import Tools
|
|
18
|
+
from PIL import Image
|
|
19
|
+
import io
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger("desktopenv.agent")
|
|
22
|
+
|
|
23
|
+
NUM_IMAGE_TOKEN = 1105 # Value set of screen of size 1920x1080 for openai vision
|
|
24
|
+
|
|
25
|
+
class Manager:
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
Tools_dict: Dict,
|
|
29
|
+
# engine_params: Dict,
|
|
30
|
+
local_kb_path: str,
|
|
31
|
+
multi_round: bool = False,
|
|
32
|
+
platform: str = platform.system().lower(),
|
|
33
|
+
enable_search: bool = True,
|
|
34
|
+
):
|
|
35
|
+
self.platform = platform
|
|
36
|
+
self.Tools_dict = Tools_dict
|
|
37
|
+
|
|
38
|
+
self.generator_agent = Tools()
|
|
39
|
+
self.generator_agent.register_tool("subtask_planner", Tools_dict["subtask_planner"]["provider"], Tools_dict["subtask_planner"]["model"])
|
|
40
|
+
|
|
41
|
+
self.dag_translator_agent = Tools()
|
|
42
|
+
self.dag_translator_agent.register_tool("dag_translator", self.Tools_dict["dag_translator"]["provider"], self.Tools_dict["dag_translator"]["model"])
|
|
43
|
+
|
|
44
|
+
self.narrative_summarization_agent = Tools()
|
|
45
|
+
self.narrative_summarization_agent.register_tool("narrative_summarization", self.Tools_dict["narrative_summarization"]["provider"], self.Tools_dict["narrative_summarization"]["model"])
|
|
46
|
+
|
|
47
|
+
self.episode_summarization_agent = Tools()
|
|
48
|
+
self.episode_summarization_agent.register_tool("episode_summarization", self.Tools_dict["episode_summarization"]["provider"], self.Tools_dict["episode_summarization"]["model"])
|
|
49
|
+
|
|
50
|
+
self.local_kb_path = local_kb_path
|
|
51
|
+
|
|
52
|
+
self.embedding_engine = Tools()
|
|
53
|
+
self.embedding_engine.register_tool("embedding", self.Tools_dict["embedding"]["provider"], self.Tools_dict["embedding"]["model"])
|
|
54
|
+
KB_Tools_dict = {
|
|
55
|
+
"embedding": self.Tools_dict["embedding"],
|
|
56
|
+
"query_formulator": self.Tools_dict["query_formulator"],
|
|
57
|
+
"context_fusion": self.Tools_dict["context_fusion"],
|
|
58
|
+
"narrative_summarization": self.Tools_dict["narrative_summarization"],
|
|
59
|
+
"episode_summarization": self.Tools_dict["episode_summarization"],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
self.knowledge_base = KnowledgeBase(
|
|
64
|
+
embedding_engine=self.embedding_engine,
|
|
65
|
+
local_kb_path=self.local_kb_path,
|
|
66
|
+
platform=platform,
|
|
67
|
+
Tools_dict=KB_Tools_dict,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
self.global_state: GlobalState = Registry.get("GlobalStateStore") # type: ignore
|
|
71
|
+
|
|
72
|
+
self.planner_history = []
|
|
73
|
+
|
|
74
|
+
self.turn_count = 0
|
|
75
|
+
|
|
76
|
+
# Initialize search engine based on enable_search parameter
|
|
77
|
+
if enable_search:
|
|
78
|
+
self.search_engine = Tools()
|
|
79
|
+
self.search_engine.register_tool("websearch", self.Tools_dict["websearch"]["provider"], self.Tools_dict["websearch"]["model"])
|
|
80
|
+
else:
|
|
81
|
+
self.search_engine = None
|
|
82
|
+
|
|
83
|
+
self.multi_round = multi_round
|
|
84
|
+
|
|
85
|
+
def summarize_episode(self, trajectory):
|
|
86
|
+
"""Summarize the episode experience for lifelong learning reflection
|
|
87
|
+
Args:
|
|
88
|
+
trajectory: str: The episode experience to be summarized
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
# Create Reflection on whole trajectories for next round trial, keep earlier messages as exemplars
|
|
92
|
+
subtask_summarization, total_tokens, cost_string = self.episode_summarization_agent.execute_tool("episode_summarization", {"str_input": trajectory})
|
|
93
|
+
logger.info(f"Episode summarization tokens: {total_tokens}, cost: {cost_string}")
|
|
94
|
+
|
|
95
|
+
self.global_state.log_operation(
|
|
96
|
+
module="manager",
|
|
97
|
+
operation="episode_summarization",
|
|
98
|
+
data={
|
|
99
|
+
"tokens": total_tokens,
|
|
100
|
+
"cost": cost_string,
|
|
101
|
+
"content": subtask_summarization
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return subtask_summarization
|
|
106
|
+
|
|
107
|
+
def summarize_narrative(self, trajectory):
|
|
108
|
+
"""Summarize the narrative experience for lifelong learning reflection
|
|
109
|
+
Args:
|
|
110
|
+
trajectory: str: The narrative experience to be summarized
|
|
111
|
+
"""
|
|
112
|
+
# Create Reflection on whole trajectories for next round trial
|
|
113
|
+
lifelong_learning_reflection, total_tokens, cost_string = self.narrative_summarization_agent.execute_tool("narrative_summarization", {"str_input": trajectory})
|
|
114
|
+
logger.info(f"Narrative summarization tokens: {total_tokens}, cost: {cost_string}")
|
|
115
|
+
|
|
116
|
+
self.global_state.log_operation(
|
|
117
|
+
module="manager",
|
|
118
|
+
operation="narrative_summarization",
|
|
119
|
+
data={
|
|
120
|
+
"tokens": total_tokens,
|
|
121
|
+
"cost": cost_string,
|
|
122
|
+
"content": lifelong_learning_reflection
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return lifelong_learning_reflection
|
|
127
|
+
|
|
128
|
+
def _generate_step_by_step_plan(
|
|
129
|
+
self,
|
|
130
|
+
observation: Dict,
|
|
131
|
+
instruction: str,
|
|
132
|
+
failed_subtask: Optional[Node] = None,
|
|
133
|
+
completed_subtasks_list: List[Node] = [],
|
|
134
|
+
remaining_subtasks_list: List[Node] = [],
|
|
135
|
+
) -> Tuple[Dict, str]:
|
|
136
|
+
|
|
137
|
+
import time
|
|
138
|
+
step_start = time.time()
|
|
139
|
+
# Converts a list of DAG Nodes into a natural langauge list
|
|
140
|
+
def format_subtask_list(subtasks: List[Node]) -> str:
|
|
141
|
+
res = ""
|
|
142
|
+
for idx, node in enumerate(subtasks):
|
|
143
|
+
res += f"{idx+1}. **{node.name}**:\n"
|
|
144
|
+
bullets = re.split(r"(?<=[.!?;]) +", node.info)
|
|
145
|
+
for bullet in bullets:
|
|
146
|
+
res += f" - {bullet}\n"
|
|
147
|
+
res += "\n"
|
|
148
|
+
return res
|
|
149
|
+
prefix_message = ""
|
|
150
|
+
# Perform Retrieval only at the first planning step
|
|
151
|
+
if self.turn_count == 0:
|
|
152
|
+
formulate_query_start = time.time()
|
|
153
|
+
self.search_query, total_tokens, cost_string = self.knowledge_base.formulate_query(
|
|
154
|
+
instruction, observation
|
|
155
|
+
)
|
|
156
|
+
formulate_query_time = time.time() - formulate_query_start
|
|
157
|
+
logger.info(f"Formulate query tokens: {total_tokens}, cost: {cost_string}")
|
|
158
|
+
self.global_state.log_operation(
|
|
159
|
+
module="manager",
|
|
160
|
+
operation="formulate_query",
|
|
161
|
+
data={
|
|
162
|
+
"tokens": total_tokens,
|
|
163
|
+
"cost": cost_string,
|
|
164
|
+
"content": self.search_query,
|
|
165
|
+
"duration": formulate_query_time
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
self.global_state.set_search_query(self.search_query)
|
|
169
|
+
|
|
170
|
+
most_similar_task = ""
|
|
171
|
+
retrieved_experience = ""
|
|
172
|
+
integrated_knowledge = ""
|
|
173
|
+
# Retrieve most similar narrative (task) experience
|
|
174
|
+
narrative_start = time.time()
|
|
175
|
+
most_similar_task, retrieved_experience, total_tokens, cost_string = (
|
|
176
|
+
self.knowledge_base.retrieve_narrative_experience(instruction)
|
|
177
|
+
)
|
|
178
|
+
logger.info(f"Retrieve narrative experience tokens: {total_tokens}, cost: {cost_string}")
|
|
179
|
+
narrative_time = time.time() - narrative_start
|
|
180
|
+
logger.info(f"[Timing] Manager.retrieve_narrative_experience execution time: {narrative_time:.2f} seconds")
|
|
181
|
+
self.global_state.log_operation(
|
|
182
|
+
module="manager",
|
|
183
|
+
operation="retrieve_narrative_experience",
|
|
184
|
+
data={
|
|
185
|
+
"tokens": total_tokens,
|
|
186
|
+
"cost": cost_string,
|
|
187
|
+
"content": "Most similar task: " + most_similar_task + "\n" + retrieved_experience.strip(),
|
|
188
|
+
"duration": narrative_time
|
|
189
|
+
}
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
logger.info(
|
|
193
|
+
"SIMILAR TASK EXPERIENCE: %s",
|
|
194
|
+
most_similar_task + "\n" + retrieved_experience.strip(),
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Retrieve knowledge from the web if search_engine is provided
|
|
198
|
+
if self.search_engine is not None:
|
|
199
|
+
knowledge_start = time.time()
|
|
200
|
+
retrieved_knowledge, total_tokens, cost_string = self.knowledge_base.retrieve_knowledge(
|
|
201
|
+
instruction=instruction,
|
|
202
|
+
search_query=self.search_query,
|
|
203
|
+
search_engine=self.search_engine,
|
|
204
|
+
)
|
|
205
|
+
logger.info(f"Retrieve knowledge tokens: {total_tokens}, cost: {cost_string}")
|
|
206
|
+
|
|
207
|
+
knowledge_time = time.time() - knowledge_start
|
|
208
|
+
logger.info(f"[Timing] Manager.retrieve_knowledge execution time: {knowledge_time:.2f} seconds")
|
|
209
|
+
self.global_state.log_operation(
|
|
210
|
+
module="manager",
|
|
211
|
+
operation="retrieve_knowledge",
|
|
212
|
+
data={
|
|
213
|
+
"tokens": total_tokens,
|
|
214
|
+
"cost": cost_string,
|
|
215
|
+
"content": retrieved_knowledge,
|
|
216
|
+
"duration": knowledge_time
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
logger.info("RETRIEVED KNOWLEDGE: %s", retrieved_knowledge)
|
|
221
|
+
|
|
222
|
+
if retrieved_knowledge is not None:
|
|
223
|
+
# Fuse the retrieved knowledge and experience
|
|
224
|
+
fusion_start = time.time()
|
|
225
|
+
integrated_knowledge, total_tokens, cost_string = self.knowledge_base.knowledge_fusion(
|
|
226
|
+
observation=observation,
|
|
227
|
+
instruction=instruction,
|
|
228
|
+
web_knowledge=retrieved_knowledge,
|
|
229
|
+
similar_task=most_similar_task,
|
|
230
|
+
experience=retrieved_experience,
|
|
231
|
+
)
|
|
232
|
+
logger.info(f"Knowledge fusion tokens: {total_tokens}, cost: {cost_string}")
|
|
233
|
+
fusion_time = time.time() - fusion_start
|
|
234
|
+
logger.info(f"[Timing] Manager.knowledge_fusion execution time: {fusion_time:.2f} seconds")
|
|
235
|
+
self.global_state.log_operation(
|
|
236
|
+
module="manager",
|
|
237
|
+
operation="knowledge_fusion",
|
|
238
|
+
data={
|
|
239
|
+
"tokens": total_tokens,
|
|
240
|
+
"cost": cost_string,
|
|
241
|
+
"content": integrated_knowledge,
|
|
242
|
+
"duration": fusion_time
|
|
243
|
+
}
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
logger.info("INTEGRATED KNOWLEDGE: %s", integrated_knowledge)
|
|
247
|
+
|
|
248
|
+
integrated_knowledge = integrated_knowledge or retrieved_experience
|
|
249
|
+
|
|
250
|
+
# Add the integrated knowledge to the task instruction in the system prompt
|
|
251
|
+
if integrated_knowledge:
|
|
252
|
+
instruction += f"\nYou may refer to some retrieved knowledge if you think they are useful.{integrated_knowledge}"
|
|
253
|
+
prefix_message = f"TASK_DESCRIPTION is {instruction}"
|
|
254
|
+
|
|
255
|
+
# Re-plan on failure case
|
|
256
|
+
if failed_subtask:
|
|
257
|
+
agent_log = agent_log_to_string(self.global_state.get_agent_log())
|
|
258
|
+
generator_message = (
|
|
259
|
+
f"The subtask {failed_subtask} cannot be completed. Please generate a new plan for the remainder of the trajectory.\n\n"
|
|
260
|
+
f"Successfully Completed Subtasks:\n{format_subtask_list(completed_subtasks_list)}\n"
|
|
261
|
+
f"Please refer to the agent log to understand the progress and context of the task so far.\n{agent_log}"
|
|
262
|
+
)
|
|
263
|
+
# Re-plan on subtask completion case
|
|
264
|
+
elif len(completed_subtasks_list) + len(remaining_subtasks_list) > 0:
|
|
265
|
+
agent_log = agent_log_to_string(self.global_state.get_agent_log())
|
|
266
|
+
generator_message = (
|
|
267
|
+
"The current trajectory and desktop state is provided. Please revise the plan for the following trajectory.\n\n"
|
|
268
|
+
f"Successfully Completed Subtasks:\n{format_subtask_list(completed_subtasks_list)}\n"
|
|
269
|
+
f"Future Remaining Subtasks:\n{format_subtask_list(remaining_subtasks_list)}\n"
|
|
270
|
+
f"Please refer to the agent log to understand the progress and context of the task so far.\n{agent_log}"
|
|
271
|
+
)
|
|
272
|
+
# Initial plan case
|
|
273
|
+
else:
|
|
274
|
+
generator_message = "Please generate the initial plan for the task.\n"
|
|
275
|
+
|
|
276
|
+
generator_message = prefix_message + "\n" + generator_message
|
|
277
|
+
logger.info("GENERATOR MESSAGE: %s", generator_message)
|
|
278
|
+
logger.info("GENERATING HIGH LEVEL PLAN")
|
|
279
|
+
|
|
280
|
+
subtask_planner_start = time.time()
|
|
281
|
+
plan, total_tokens, cost_string = self.generator_agent.execute_tool("subtask_planner", {"str_input": generator_message, "img_input": observation.get("screenshot", None)})
|
|
282
|
+
logger.info(f"Subtask planner tokens: {total_tokens}, cost: {cost_string}")
|
|
283
|
+
subtask_planner_time = time.time() - subtask_planner_start
|
|
284
|
+
logger.info(f"[Timing] Manager.subtask_planner execution time: {subtask_planner_time:.2f} seconds")
|
|
285
|
+
self.global_state.log_operation(
|
|
286
|
+
module="manager",
|
|
287
|
+
operation="subtask_planner",
|
|
288
|
+
data={
|
|
289
|
+
"tokens": total_tokens,
|
|
290
|
+
"cost": cost_string,
|
|
291
|
+
"content": plan,
|
|
292
|
+
"duration": subtask_planner_time
|
|
293
|
+
}
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
step_time = time.time() - step_start
|
|
297
|
+
logger.info(f"[Timing] Manager._generate_step_by_step_plan execution time: {step_time:.2f} seconds")
|
|
298
|
+
self.global_state.log_operation(
|
|
299
|
+
module="manager",
|
|
300
|
+
operation="Manager._generate_step_by_step_plan",
|
|
301
|
+
data={"duration": step_time}
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
if plan == "":
|
|
305
|
+
raise Exception("Plan Generation Failed - Fix the Prompt")
|
|
306
|
+
|
|
307
|
+
logger.info("HIGH LEVEL STEP BY STEP PLAN: %s", plan)
|
|
308
|
+
|
|
309
|
+
self.planner_history.append(plan)
|
|
310
|
+
self.turn_count += 1
|
|
311
|
+
|
|
312
|
+
planner_info = {
|
|
313
|
+
"search_query": self.search_query,
|
|
314
|
+
"goal_plan": plan,
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
assert type(plan) == str
|
|
318
|
+
|
|
319
|
+
return planner_info, plan
|
|
320
|
+
|
|
321
|
+
def _generate_dag(self, instruction: str, plan: str) -> Tuple[Dict, Dag]:
|
|
322
|
+
import time
|
|
323
|
+
dag_start = time.time()
|
|
324
|
+
|
|
325
|
+
logger.info("GENERATING DAG")
|
|
326
|
+
|
|
327
|
+
# Add maximum retry count
|
|
328
|
+
max_retries = 2
|
|
329
|
+
retry_count = 0
|
|
330
|
+
dag = None
|
|
331
|
+
|
|
332
|
+
while retry_count < max_retries and dag is None:
|
|
333
|
+
if retry_count > 0:
|
|
334
|
+
logger.warning(f"Retrying DAG generation, attempt {retry_count}")
|
|
335
|
+
self.global_state.log_operation(
|
|
336
|
+
module="manager",
|
|
337
|
+
operation="dag_retry",
|
|
338
|
+
data={"retry_count": retry_count}
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Generate DAG
|
|
342
|
+
dag_raw, total_tokens, cost_string = self.dag_translator_agent.execute_tool("dag_translator", {"str_input": f"Instruction: {instruction}\nPlan: {plan}"})
|
|
343
|
+
logger.info(f"DAG translator tokens: {total_tokens}, cost: {cost_string}")
|
|
344
|
+
|
|
345
|
+
# Try to parse DAG
|
|
346
|
+
dag = parse_dag(dag_raw)
|
|
347
|
+
|
|
348
|
+
# If parsing fails, increment retry count
|
|
349
|
+
if dag is None:
|
|
350
|
+
retry_count += 1
|
|
351
|
+
# If not the last attempt, wait a short time before retrying
|
|
352
|
+
if retry_count < max_retries:
|
|
353
|
+
time.sleep(1)
|
|
354
|
+
|
|
355
|
+
dag_time = time.time() - dag_start
|
|
356
|
+
logger.info(f"[Timing] Manager._generate_dag execution time: {dag_time:.2f} seconds")
|
|
357
|
+
|
|
358
|
+
logger.info("Generated DAG: %s", dag_raw)
|
|
359
|
+
self.global_state.log_operation(
|
|
360
|
+
module="manager",
|
|
361
|
+
operation="generated_dag",
|
|
362
|
+
data={
|
|
363
|
+
"tokens": total_tokens,
|
|
364
|
+
"cost": cost_string,
|
|
365
|
+
"content": dag_raw,
|
|
366
|
+
"duration": dag_time,
|
|
367
|
+
"retry_count": retry_count
|
|
368
|
+
}
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
dag_info = {
|
|
372
|
+
"dag": dag_raw,
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
# If all attempts fail, create a simple default DAG
|
|
376
|
+
if dag is None:
|
|
377
|
+
logger.error("Unable to generate valid DAG, using default DAG")
|
|
378
|
+
# Create a simple default DAG with just one "Execute Task" node
|
|
379
|
+
default_node = Node(name="Execute Task", info=f"Execute instruction: {instruction}")
|
|
380
|
+
dag = Dag(nodes=[default_node], edges=[])
|
|
381
|
+
|
|
382
|
+
self.global_state.log_operation(
|
|
383
|
+
module="manager",
|
|
384
|
+
operation="default_dag_created",
|
|
385
|
+
data={"content": "Using default DAG because valid DAG could not be parsed from model output"}
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return dag_info, dag
|
|
389
|
+
|
|
390
|
+
def _topological_sort(self, dag: Dag) -> List[Node]:
|
|
391
|
+
"""Topological sort of the DAG using DFS
|
|
392
|
+
dag: Dag: Object representation of the DAG with nodes and edges
|
|
393
|
+
"""
|
|
394
|
+
import logging
|
|
395
|
+
logger = logging.getLogger("desktopenv.agent")
|
|
396
|
+
|
|
397
|
+
# Check if DAG is empty
|
|
398
|
+
if not dag.nodes:
|
|
399
|
+
logger.warning("DAG has no nodes, returning empty list")
|
|
400
|
+
return []
|
|
401
|
+
|
|
402
|
+
# If there's only one node, return it directly
|
|
403
|
+
if len(dag.nodes) == 1:
|
|
404
|
+
logger.info("DAG has only one node, returning directly")
|
|
405
|
+
return dag.nodes
|
|
406
|
+
|
|
407
|
+
def dfs(node_name, visited, temp_visited, stack):
|
|
408
|
+
# If node is already in current path, we have a cycle
|
|
409
|
+
if node_name in temp_visited:
|
|
410
|
+
raise ValueError(f"Cycle detected in DAG involving node: {node_name}")
|
|
411
|
+
|
|
412
|
+
# If node has been visited, skip
|
|
413
|
+
if visited.get(node_name, False):
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
# Mark node as part of current path
|
|
417
|
+
temp_visited.add(node_name)
|
|
418
|
+
visited[node_name] = True
|
|
419
|
+
|
|
420
|
+
# Visit all neighbors
|
|
421
|
+
for neighbor in adj_list.get(node_name, []):
|
|
422
|
+
if not visited.get(neighbor, False):
|
|
423
|
+
dfs(neighbor, visited, temp_visited, stack)
|
|
424
|
+
|
|
425
|
+
# Remove node from current path
|
|
426
|
+
temp_visited.remove(node_name)
|
|
427
|
+
stack.append(node_name)
|
|
428
|
+
|
|
429
|
+
try:
|
|
430
|
+
# Build adjacency list
|
|
431
|
+
adj_list = defaultdict(list)
|
|
432
|
+
for u, v in dag.edges:
|
|
433
|
+
if not u or not v:
|
|
434
|
+
logger.warning(f"Skipping invalid edge: {u} -> {v}")
|
|
435
|
+
continue
|
|
436
|
+
adj_list[u.name].append(v.name)
|
|
437
|
+
|
|
438
|
+
visited = {node.name: False for node in dag.nodes}
|
|
439
|
+
temp_visited = set() # For cycle detection
|
|
440
|
+
stack = []
|
|
441
|
+
|
|
442
|
+
# Perform DFS for each unvisited node
|
|
443
|
+
for node in dag.nodes:
|
|
444
|
+
if not visited.get(node.name, False):
|
|
445
|
+
dfs(node.name, visited, temp_visited, stack)
|
|
446
|
+
|
|
447
|
+
# Return topologically sorted nodes
|
|
448
|
+
sorted_nodes = []
|
|
449
|
+
for name in stack[::-1]:
|
|
450
|
+
matching_nodes = [n for n in dag.nodes if n.name == name]
|
|
451
|
+
if matching_nodes:
|
|
452
|
+
sorted_nodes.append(matching_nodes[0])
|
|
453
|
+
else:
|
|
454
|
+
logger.warning(f"Could not find node named {name} in DAG node list")
|
|
455
|
+
|
|
456
|
+
# Check if all nodes are included in result
|
|
457
|
+
if len(sorted_nodes) != len(dag.nodes):
|
|
458
|
+
logger.warning(f"Number of nodes in topological sort result ({len(sorted_nodes)}) does not match number in DAG ({len(dag.nodes)})")
|
|
459
|
+
|
|
460
|
+
return sorted_nodes
|
|
461
|
+
|
|
462
|
+
except Exception as e:
|
|
463
|
+
logger.error(f"Error during topological sort: {e}")
|
|
464
|
+
# On error, return original node list
|
|
465
|
+
logger.info("Returning unsorted original node list")
|
|
466
|
+
return dag.nodes
|
|
467
|
+
|
|
468
|
+
def get_action_queue(
|
|
469
|
+
self,
|
|
470
|
+
Tu: str,
|
|
471
|
+
observation: Dict,
|
|
472
|
+
running_state: str,
|
|
473
|
+
failed_subtask: Optional[Node] = None,
|
|
474
|
+
completed_subtasks_list: List[Node] = [],
|
|
475
|
+
remaining_subtasks_list: List[Node] = [],
|
|
476
|
+
):
|
|
477
|
+
"""Generate the action list based on the instruction
|
|
478
|
+
instruction:str: Instruction for the task
|
|
479
|
+
"""
|
|
480
|
+
import time
|
|
481
|
+
action_queue_start = time.time()
|
|
482
|
+
|
|
483
|
+
try:
|
|
484
|
+
planner_info, plan = self._generate_step_by_step_plan(
|
|
485
|
+
observation,
|
|
486
|
+
Tu,
|
|
487
|
+
failed_subtask,
|
|
488
|
+
completed_subtasks_list,
|
|
489
|
+
remaining_subtasks_list,
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# Generate the DAG
|
|
493
|
+
try:
|
|
494
|
+
dag_info, dag = self._generate_dag(Tu, plan)
|
|
495
|
+
except Exception as e:
|
|
496
|
+
logger.error(f"Error generating DAG: {e}")
|
|
497
|
+
# Create a simple default DAG with just one "Execute Task" node
|
|
498
|
+
default_node = Node(name="Execute Task", info=f"Execute instruction: {Tu}")
|
|
499
|
+
dag = Dag(nodes=[default_node], edges=[])
|
|
500
|
+
dag_info = {"dag": "Failed to generate DAG, using default DAG"}
|
|
501
|
+
|
|
502
|
+
self.global_state.log_operation(
|
|
503
|
+
module="manager",
|
|
504
|
+
operation="dag_generation_error",
|
|
505
|
+
data={"error": str(e), "content": "Using default DAG due to error in DAG generation"}
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# Topological sort of the DAG
|
|
509
|
+
try:
|
|
510
|
+
action_queue = self._topological_sort(dag)
|
|
511
|
+
except Exception as e:
|
|
512
|
+
logger.error(f"Error during topological sort of DAG: {e}")
|
|
513
|
+
# If topological sort fails, use node list directly
|
|
514
|
+
action_queue = dag.nodes
|
|
515
|
+
|
|
516
|
+
self.global_state.log_operation(
|
|
517
|
+
module="manager",
|
|
518
|
+
operation="topological_sort_error",
|
|
519
|
+
data={"error": str(e), "content": "Topological sort failed, using node list directly"}
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
planner_info.update(dag_info)
|
|
523
|
+
|
|
524
|
+
if action_queue:
|
|
525
|
+
logger.info(f"NEXT SUBTASK: {action_queue[0]}")
|
|
526
|
+
self.global_state.log_operation(
|
|
527
|
+
module="manager",
|
|
528
|
+
operation="next_subtask",
|
|
529
|
+
data={"content": str(action_queue[0])}
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
if len(action_queue) > 1:
|
|
533
|
+
logger.info(f"REMAINING SUBTASKS: {action_queue[1:]}")
|
|
534
|
+
self.global_state.log_operation(
|
|
535
|
+
module="manager",
|
|
536
|
+
operation="remaining_subtasks",
|
|
537
|
+
data={"content": str(action_queue[1:])}
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
action_queue_time = time.time() - action_queue_start
|
|
541
|
+
logger.info(f"[Timing] manager.get_action_queue execution time: {action_queue_time:.2f} seconds")
|
|
542
|
+
self.global_state.log_operation(
|
|
543
|
+
module="manager",
|
|
544
|
+
operation="manager.get_action_queue",
|
|
545
|
+
data={"duration": action_queue_time}
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
return planner_info, action_queue
|
|
549
|
+
|
|
550
|
+
except Exception as e:
|
|
551
|
+
# Handle any unhandled exceptions in the entire process
|
|
552
|
+
logger.error(f"Unhandled exception in get_action_queue function: {e}")
|
|
553
|
+
|
|
554
|
+
# Create a simple default task node
|
|
555
|
+
default_node = Node(name="Execute Task", info=f"Execute instruction: {Tu}")
|
|
556
|
+
action_queue = [default_node]
|
|
557
|
+
planner_info = {"error": str(e), "fallback": "Using default task node"}
|
|
558
|
+
|
|
559
|
+
self.global_state.log_operation(
|
|
560
|
+
module="manager",
|
|
561
|
+
operation="get_action_queue_error",
|
|
562
|
+
data={"error": str(e), "content": "Unhandled exception occurred, using default task node"}
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
action_queue_time = time.time() - action_queue_start
|
|
566
|
+
logger.info(f"[Timing] manager.get_action_queue (error path) execution time: {action_queue_time:.2f} seconds")
|
|
567
|
+
|
|
568
|
+
return planner_info, action_queue
|