computer-use-ootb-internal 0.0.146__py3-none-any.whl → 0.0.148__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- computer_use_ootb_internal/app_teachmode.py +558 -551
- computer_use_ootb_internal/computer_use_demo/animation/test_animation.py +39 -39
- computer_use_ootb_internal/guard_service.py +985 -950
- computer_use_ootb_internal/preparation/star_rail_prepare.py +99 -99
- computer_use_ootb_internal/run_teachmode_ootb_args.py +223 -223
- computer_use_ootb_internal/service_manager.py +194 -194
- computer_use_ootb_internal/signal_connection.py +47 -47
- {computer_use_ootb_internal-0.0.146.dist-info → computer_use_ootb_internal-0.0.148.dist-info}/METADATA +9 -8
- {computer_use_ootb_internal-0.0.146.dist-info → computer_use_ootb_internal-0.0.148.dist-info}/RECORD +11 -11
- computer_use_ootb_internal-0.0.148.dist-info/entry_points.txt +4 -0
- computer_use_ootb_internal-0.0.146.dist-info/entry_points.txt +0 -2
- {computer_use_ootb_internal-0.0.146.dist-info → computer_use_ootb_internal-0.0.148.dist-info}/WHEEL +0 -0
@@ -1,100 +1,100 @@
|
|
1
|
-
# src/computer_use_ootb_internal/preparation/star_rail_prepare.py
|
2
|
-
import time
|
3
|
-
import platform
|
4
|
-
import subprocess # Added for taskkill
|
5
|
-
import pyautogui
|
6
|
-
import webbrowser
|
7
|
-
import logging # Use logging instead of print for better practice
|
8
|
-
|
9
|
-
# Set up logging for this module if needed, or rely on root logger
|
10
|
-
log = logging.getLogger(__name__)
|
11
|
-
|
12
|
-
def run_preparation(state):
|
13
|
-
"""
|
14
|
-
Performs environment preparation specific to Star Rail on Windows.
|
15
|
-
Closes existing Edge browsers, opens the specified URL in a new Edge instance,
|
16
|
-
and performs initial clicks.
|
17
|
-
"""
|
18
|
-
if platform.system() != "Windows":
|
19
|
-
log.info("Star Rail preparation skipped: Not running on Windows.")
|
20
|
-
return
|
21
|
-
|
22
|
-
log.info("Star Rail preparation: Starting environment setup on Windows...")
|
23
|
-
url = "https://sr.mihoyo.com/cloud/#/" # Consider making this configurable later
|
24
|
-
browser_opened = False
|
25
|
-
try:
|
26
|
-
# Attempt to close existing Microsoft Edge processes
|
27
|
-
log.info("Attempting to close existing Microsoft Edge processes...")
|
28
|
-
try:
|
29
|
-
# /F forces termination, /IM specifies image name
|
30
|
-
result = subprocess.run(['taskkill', '/F', '/IM', 'msedge.exe'],
|
31
|
-
capture_output=True, text=True, check=False)
|
32
|
-
if result.returncode == 0:
|
33
|
-
log.info("Successfully sent termination signal to msedge.exe processes.")
|
34
|
-
elif "not found" in result.stderr.lower() or "not found" in result.stdout.lower():
|
35
|
-
log.info("No running msedge.exe processes found to close.")
|
36
|
-
else:
|
37
|
-
log.warning(f"taskkill command finished with return code {result.returncode}. Output: {result.stdout} Stderr: {result.stderr}")
|
38
|
-
time.sleep(2) # Give processes time to close
|
39
|
-
except FileNotFoundError:
|
40
|
-
log.error("Error: 'taskkill' command not found. Make sure it's in the system PATH.")
|
41
|
-
except Exception as e:
|
42
|
-
log.error(f"Error occurred while trying to close Edge: {e}", exc_info=True)
|
43
|
-
|
44
|
-
# Use only webbrowser.open
|
45
|
-
log.info(f"Attempting to open {url} using webbrowser.open()...")
|
46
|
-
if webbrowser.open(url):
|
47
|
-
log.info(f"Successfully requested browser to open {url} via webbrowser.open().")
|
48
|
-
browser_opened = True
|
49
|
-
# Ensure sleep time for browser load before clicks is present
|
50
|
-
time.sleep(5)
|
51
|
-
else:
|
52
|
-
log.warning("webbrowser.open() returned False, indicating potential failure.")
|
53
|
-
|
54
|
-
if not browser_opened:
|
55
|
-
log.error("Failed to confirm browser opening via webbrowser.open(). Will still attempt clicks.")
|
56
|
-
|
57
|
-
# Add pyautogui click after attempting to open the browser
|
58
|
-
log.info("Proceeding with pyautogui actions...")
|
59
|
-
time.sleep(5) # Wait time for the browser to load
|
60
|
-
|
61
|
-
# Get screen size
|
62
|
-
screen_width, screen_height = pyautogui.size()
|
63
|
-
log.info(f"Detected screen size: {screen_width}x{screen_height}")
|
64
|
-
|
65
|
-
# Calculate click coordinates based on a reference resolution (e.g., 1280x720)
|
66
|
-
# TODO: Make these coordinates more robust or configurable
|
67
|
-
click_x_1 = int(screen_width * (1036 / 1280))
|
68
|
-
click_y_1 = int(screen_height * (500 / 720))
|
69
|
-
log.info(f"Calculated click coordinates for starting the game: ({click_x_1}, {click_y_1})")
|
70
|
-
click_x_2 = int(screen_width * (1233 / 1280))
|
71
|
-
click_y_2 = int(screen_height * (30 / 720))
|
72
|
-
log.info(f"Calculated click coordinates for closing the browser warning: ({click_x_2}, {click_y_2})")
|
73
|
-
|
74
|
-
# Disable failsafe before clicking
|
75
|
-
pyautogui.FAILSAFE = False
|
76
|
-
log.info("PyAutoGUI failsafe temporarily disabled.")
|
77
|
-
|
78
|
-
log.info(f"Clicking at coordinates: ({click_x_1}, {click_y_1})")
|
79
|
-
pyautogui.click(click_x_1, click_y_1)
|
80
|
-
time.sleep(2)
|
81
|
-
pyautogui.click(click_x_1, click_y_1) # Double click?
|
82
|
-
|
83
|
-
# Press F11 to attempt fullscreen
|
84
|
-
log.info("Pressing F11 to enter fullscreen...")
|
85
|
-
time.sleep(1) # Short delay before pressing F11
|
86
|
-
pyautogui.press('f11')
|
87
|
-
time.sleep(1)
|
88
|
-
log.info(f"Clicking at coordinates: ({click_x_2}, {click_y_2})")
|
89
|
-
pyautogui.click(click_x_2, click_y_2)
|
90
|
-
time.sleep(1)
|
91
|
-
pyautogui.click(click_x_2, click_y_2)
|
92
|
-
|
93
|
-
log.info("Star Rail preparation clicks completed.")
|
94
|
-
|
95
|
-
except Exception as e:
|
96
|
-
log.error(f"Error during Star Rail preparation (browser/click): {e}", exc_info=True)
|
97
|
-
finally:
|
98
|
-
# Ensure failsafe is re-enabled
|
99
|
-
pyautogui.FAILSAFE = True
|
1
|
+
# src/computer_use_ootb_internal/preparation/star_rail_prepare.py
|
2
|
+
import time
|
3
|
+
import platform
|
4
|
+
import subprocess # Added for taskkill
|
5
|
+
import pyautogui
|
6
|
+
import webbrowser
|
7
|
+
import logging # Use logging instead of print for better practice
|
8
|
+
|
9
|
+
# Set up logging for this module if needed, or rely on root logger
|
10
|
+
log = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
def run_preparation(state):
|
13
|
+
"""
|
14
|
+
Performs environment preparation specific to Star Rail on Windows.
|
15
|
+
Closes existing Edge browsers, opens the specified URL in a new Edge instance,
|
16
|
+
and performs initial clicks.
|
17
|
+
"""
|
18
|
+
if platform.system() != "Windows":
|
19
|
+
log.info("Star Rail preparation skipped: Not running on Windows.")
|
20
|
+
return
|
21
|
+
|
22
|
+
log.info("Star Rail preparation: Starting environment setup on Windows...")
|
23
|
+
url = "https://sr.mihoyo.com/cloud/#/" # Consider making this configurable later
|
24
|
+
browser_opened = False
|
25
|
+
try:
|
26
|
+
# Attempt to close existing Microsoft Edge processes
|
27
|
+
log.info("Attempting to close existing Microsoft Edge processes...")
|
28
|
+
try:
|
29
|
+
# /F forces termination, /IM specifies image name
|
30
|
+
result = subprocess.run(['taskkill', '/F', '/IM', 'msedge.exe'],
|
31
|
+
capture_output=True, text=True, check=False)
|
32
|
+
if result.returncode == 0:
|
33
|
+
log.info("Successfully sent termination signal to msedge.exe processes.")
|
34
|
+
elif "not found" in result.stderr.lower() or "not found" in result.stdout.lower():
|
35
|
+
log.info("No running msedge.exe processes found to close.")
|
36
|
+
else:
|
37
|
+
log.warning(f"taskkill command finished with return code {result.returncode}. Output: {result.stdout} Stderr: {result.stderr}")
|
38
|
+
time.sleep(2) # Give processes time to close
|
39
|
+
except FileNotFoundError:
|
40
|
+
log.error("Error: 'taskkill' command not found. Make sure it's in the system PATH.")
|
41
|
+
except Exception as e:
|
42
|
+
log.error(f"Error occurred while trying to close Edge: {e}", exc_info=True)
|
43
|
+
|
44
|
+
# Use only webbrowser.open
|
45
|
+
log.info(f"Attempting to open {url} using webbrowser.open()...")
|
46
|
+
if webbrowser.open(url):
|
47
|
+
log.info(f"Successfully requested browser to open {url} via webbrowser.open().")
|
48
|
+
browser_opened = True
|
49
|
+
# Ensure sleep time for browser load before clicks is present
|
50
|
+
time.sleep(5)
|
51
|
+
else:
|
52
|
+
log.warning("webbrowser.open() returned False, indicating potential failure.")
|
53
|
+
|
54
|
+
if not browser_opened:
|
55
|
+
log.error("Failed to confirm browser opening via webbrowser.open(). Will still attempt clicks.")
|
56
|
+
|
57
|
+
# Add pyautogui click after attempting to open the browser
|
58
|
+
log.info("Proceeding with pyautogui actions...")
|
59
|
+
time.sleep(5) # Wait time for the browser to load
|
60
|
+
|
61
|
+
# Get screen size
|
62
|
+
screen_width, screen_height = pyautogui.size()
|
63
|
+
log.info(f"Detected screen size: {screen_width}x{screen_height}")
|
64
|
+
|
65
|
+
# Calculate click coordinates based on a reference resolution (e.g., 1280x720)
|
66
|
+
# TODO: Make these coordinates more robust or configurable
|
67
|
+
click_x_1 = int(screen_width * (1036 / 1280))
|
68
|
+
click_y_1 = int(screen_height * (500 / 720))
|
69
|
+
log.info(f"Calculated click coordinates for starting the game: ({click_x_1}, {click_y_1})")
|
70
|
+
click_x_2 = int(screen_width * (1233 / 1280))
|
71
|
+
click_y_2 = int(screen_height * (30 / 720))
|
72
|
+
log.info(f"Calculated click coordinates for closing the browser warning: ({click_x_2}, {click_y_2})")
|
73
|
+
|
74
|
+
# Disable failsafe before clicking
|
75
|
+
pyautogui.FAILSAFE = False
|
76
|
+
log.info("PyAutoGUI failsafe temporarily disabled.")
|
77
|
+
|
78
|
+
log.info(f"Clicking at coordinates: ({click_x_1}, {click_y_1})")
|
79
|
+
pyautogui.click(click_x_1, click_y_1)
|
80
|
+
time.sleep(2)
|
81
|
+
pyautogui.click(click_x_1, click_y_1) # Double click?
|
82
|
+
|
83
|
+
# Press F11 to attempt fullscreen
|
84
|
+
log.info("Pressing F11 to enter fullscreen...")
|
85
|
+
time.sleep(1) # Short delay before pressing F11
|
86
|
+
pyautogui.press('f11')
|
87
|
+
time.sleep(1)
|
88
|
+
log.info(f"Clicking at coordinates: ({click_x_2}, {click_y_2})")
|
89
|
+
pyautogui.click(click_x_2, click_y_2)
|
90
|
+
time.sleep(1)
|
91
|
+
pyautogui.click(click_x_2, click_y_2)
|
92
|
+
|
93
|
+
log.info("Star Rail preparation clicks completed.")
|
94
|
+
|
95
|
+
except Exception as e:
|
96
|
+
log.error(f"Error during Star Rail preparation (browser/click): {e}", exc_info=True)
|
97
|
+
finally:
|
98
|
+
# Ensure failsafe is re-enabled
|
99
|
+
pyautogui.FAILSAFE = True
|
100
100
|
log.info("PyAutoGUI failsafe re-enabled.")
|
@@ -1,223 +1,223 @@
|
|
1
|
-
import argparse
|
2
|
-
import time
|
3
|
-
import json
|
4
|
-
import platform
|
5
|
-
import uuid
|
6
|
-
import base64
|
7
|
-
import datetime
|
8
|
-
from datetime import datetime, timedelta, timezone
|
9
|
-
|
10
|
-
from computer_use_ootb_internal.computer_use_demo.executor.teachmode_executor import TeachmodeExecutor
|
11
|
-
from computer_use_ootb_internal.computer_use_demo.gui_agent.llm_utils.llm_utils import is_image_path
|
12
|
-
from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.simple_parser.utils import get_screen_resize_factor
|
13
|
-
from computer_use_ootb_internal.computer_use_demo.tools.aws_request import send_request_to_server
|
14
|
-
from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.uia_tools.screenshot_service import get_screenshot_external_cmd
|
15
|
-
|
16
|
-
|
17
|
-
utc_plus_8 = timezone(timedelta(hours=8))
|
18
|
-
|
19
|
-
|
20
|
-
def simple_teachmode_sampling_loop(
|
21
|
-
model: str,
|
22
|
-
task: str,
|
23
|
-
api_keys: dict = None,
|
24
|
-
action_history: list[dict] = None,
|
25
|
-
selected_screen: int = 0,
|
26
|
-
user_id: str = None,
|
27
|
-
trace_id: str = None,
|
28
|
-
server_url: str = "http://localhost:5000/generate_action",
|
29
|
-
max_steps: int = 20,
|
30
|
-
):
|
31
|
-
"""
|
32
|
-
Synchronous sampling loop for assistant/tool interactions in 'teach mode'.
|
33
|
-
"""
|
34
|
-
# Initialize action_history if it's None
|
35
|
-
if action_history is None:
|
36
|
-
action_history = []
|
37
|
-
|
38
|
-
# if platform.system() != "Windows":
|
39
|
-
# raise ValueError("Teach mode is only supported on Windows.")
|
40
|
-
|
41
|
-
# Set StarRail mode based on input parameter
|
42
|
-
# 0: disabled, 1: starrail, 2: starrail browser
|
43
|
-
full_screen_game_mode = 0
|
44
|
-
|
45
|
-
# TODO: set full_screen_game_mode adaptively
|
46
|
-
if "star_rail" in user_id or "star_rail" in user_id:
|
47
|
-
full_screen_game_mode = 1
|
48
|
-
|
49
|
-
if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in user_id or "official" in user_id:
|
50
|
-
full_screen_game_mode = 2
|
51
|
-
|
52
|
-
print(f"Full Screen Game Mode: {full_screen_game_mode}")
|
53
|
-
executor = TeachmodeExecutor(
|
54
|
-
selected_screen=selected_screen,
|
55
|
-
full_screen_game_mode=full_screen_game_mode,
|
56
|
-
)
|
57
|
-
|
58
|
-
timestamp = datetime.now(utc_plus_8).strftime("%m%d-%H%M%S")
|
59
|
-
|
60
|
-
step_count = 1
|
61
|
-
unique_task_id = f"{timestamp}_uid_{user_id}_tid_{trace_id}_{str(uuid.uuid4())[:6]}"
|
62
|
-
|
63
|
-
print("[simple_teachmode_sampling_loop] starting task: ", task)
|
64
|
-
print(f"[simple_teachmode_sampling_loop] unique_task_id: {unique_task_id}")
|
65
|
-
|
66
|
-
|
67
|
-
while step_count < max_steps:
|
68
|
-
|
69
|
-
print(f"step_count: {step_count}")
|
70
|
-
|
71
|
-
# Pause briefly so we don't spam screenshots
|
72
|
-
time.sleep(1)
|
73
|
-
|
74
|
-
uia_meta, sc_path = get_screenshot_external_cmd(
|
75
|
-
selected_screen=selected_screen,
|
76
|
-
capture_uia_data=full_screen_game_mode==0
|
77
|
-
)
|
78
|
-
|
79
|
-
# yield {"role": "assistant", "content": "screenshot", "type": "action", "action_type": "screenshot"}
|
80
|
-
|
81
|
-
if is_image_path(sc_path):
|
82
|
-
# yield {"role": "assistant", "content": sc_path, "type": "image", "action_type": "screenshot"}
|
83
|
-
with open(sc_path, "rb") as image_file:
|
84
|
-
sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
|
85
|
-
yield {"role": "assistant", "content": sc_base64, "type": "image_base64", "action_type": "screenshot"}
|
86
|
-
|
87
|
-
payload = {
|
88
|
-
"task_id": unique_task_id,
|
89
|
-
"uia_data": uia_meta,
|
90
|
-
"screenshot_path": sc_path,
|
91
|
-
"query": task,
|
92
|
-
"action_history": action_history,
|
93
|
-
"mode": "teach",
|
94
|
-
"user_id": user_id,
|
95
|
-
"trace_id": trace_id,
|
96
|
-
"scale_factor": get_screen_resize_factor(),
|
97
|
-
"os_name": platform.system(),
|
98
|
-
"api_keys": api_keys,
|
99
|
-
}
|
100
|
-
|
101
|
-
# Send request to Marbot Run server
|
102
|
-
infer_server_response = send_request_to_server(payload, server_url)
|
103
|
-
|
104
|
-
# infer_server_response = {
|
105
|
-
# 'status': 'success',
|
106
|
-
# 'generated_plan': plan_details,
|
107
|
-
# 'generated_action': action,
|
108
|
-
# 'todo_md': todo_md_content,
|
109
|
-
# 'milestones': milestones,
|
110
|
-
# 'current_step': current_step,
|
111
|
-
# }
|
112
|
-
|
113
|
-
|
114
|
-
if infer_server_response is None:
|
115
|
-
print("No response from Marbot Run server. Exiting.")
|
116
|
-
yield {"role": "assistant", "content": "No response from Marbot Run server. Exiting.", "type": "error"}
|
117
|
-
action_history = []
|
118
|
-
break
|
119
|
-
|
120
|
-
try:
|
121
|
-
step_plan = infer_server_response["generated_plan"]
|
122
|
-
step_reasoning = step_plan["reasoning"]
|
123
|
-
step_info = step_plan["step_info"]
|
124
|
-
step_action = infer_server_response["generated_action"]["content"]
|
125
|
-
step_traj_idx = infer_server_response["current_traj_step"]
|
126
|
-
|
127
|
-
except Exception as e:
|
128
|
-
print("Error parsing generated_action content:", e)
|
129
|
-
yield {"role": "assistant", "content": "Error parsing response from Marbot Run server. Exiting.", "type": "error"}
|
130
|
-
break
|
131
|
-
|
132
|
-
yield {"role": "assistant", "content": step_reasoning, "type": "text"}
|
133
|
-
|
134
|
-
if step_action.get("action") == "STOP":
|
135
|
-
final_sc, final_sc_path = get_screenshot_external_cmd(selected_screen=selected_screen)
|
136
|
-
|
137
|
-
yield {"role": "assistant", "content": "Task completed. Final screenshot:", "type": "text"}
|
138
|
-
yield {"role": "assistant", "content": final_sc_path, "type": "image"}
|
139
|
-
|
140
|
-
# reset action history
|
141
|
-
action_history = []
|
142
|
-
break
|
143
|
-
|
144
|
-
action_history.append(f"Executing guidance trajectory step [{step_traj_idx}]: {{Plan: {step_info}, Action: {step_action}}}\n")
|
145
|
-
|
146
|
-
for exec_message in executor({"role": "assistant", "content": step_action}):
|
147
|
-
yield exec_message
|
148
|
-
|
149
|
-
step_count += 1
|
150
|
-
|
151
|
-
# reset action history
|
152
|
-
action_history = []
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
if __name__ == "__main__":
|
157
|
-
parser = argparse.ArgumentParser(
|
158
|
-
description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
|
159
|
-
)
|
160
|
-
parser.add_argument(
|
161
|
-
"--model",
|
162
|
-
default="teach-mode",
|
163
|
-
help="The model to use",
|
164
|
-
)
|
165
|
-
parser.add_argument(
|
166
|
-
"--task",
|
167
|
-
default="Click on the Google Chorme icon",
|
168
|
-
help="The task to be completed by the assistant (e.g., 'Complete some data extraction.').",
|
169
|
-
)
|
170
|
-
parser.add_argument(
|
171
|
-
"--selected_screen",
|
172
|
-
type=int,
|
173
|
-
default=0,
|
174
|
-
help="Index of the screen to capture (default=0).",
|
175
|
-
)
|
176
|
-
parser.add_argument(
|
177
|
-
"--user_id",
|
178
|
-
default="star_rail",
|
179
|
-
help="User ID for the session (default='liziqi').",
|
180
|
-
)
|
181
|
-
parser.add_argument(
|
182
|
-
"--trace_id",
|
183
|
-
default="ONG_JING_JIE_007-0213_0",
|
184
|
-
help="Trace ID for the session (default='default_trace').",
|
185
|
-
)
|
186
|
-
parser.add_argument(
|
187
|
-
"--api_key_file",
|
188
|
-
default="api_key.json",
|
189
|
-
help="Path to the JSON file containing API keys (default='api_key.json').",
|
190
|
-
)
|
191
|
-
parser.add_argument(
|
192
|
-
"--max_steps",
|
193
|
-
type=int,
|
194
|
-
default=20,
|
195
|
-
help="The maximum number of steps to take.",
|
196
|
-
)
|
197
|
-
|
198
|
-
args = parser.parse_args()
|
199
|
-
|
200
|
-
# # Load API keys
|
201
|
-
# with open(args.api_key_file, "r") as file:
|
202
|
-
# api_keys = json.load(file)
|
203
|
-
api_keys = None
|
204
|
-
|
205
|
-
print(f"Starting task: {args.task}")
|
206
|
-
|
207
|
-
# Execute the sampling loop
|
208
|
-
sampling_loop = simple_teachmode_sampling_loop(
|
209
|
-
model=args.model,
|
210
|
-
task=args.task,
|
211
|
-
selected_screen=args.selected_screen,
|
212
|
-
user_id=args.user_id,
|
213
|
-
trace_id=args.trace_id,
|
214
|
-
api_keys=api_keys,
|
215
|
-
max_steps=args.max_steps,
|
216
|
-
)
|
217
|
-
|
218
|
-
# # Print each step result
|
219
|
-
for step in sampling_loop:
|
220
|
-
print(step)
|
221
|
-
time.sleep(1)
|
222
|
-
|
223
|
-
print(f"Task '{args.task}' completed. Thanks for using Teachmode-OOTB.")
|
1
|
+
import argparse
|
2
|
+
import time
|
3
|
+
import json
|
4
|
+
import platform
|
5
|
+
import uuid
|
6
|
+
import base64
|
7
|
+
import datetime
|
8
|
+
from datetime import datetime, timedelta, timezone
|
9
|
+
|
10
|
+
from computer_use_ootb_internal.computer_use_demo.executor.teachmode_executor import TeachmodeExecutor
|
11
|
+
from computer_use_ootb_internal.computer_use_demo.gui_agent.llm_utils.llm_utils import is_image_path
|
12
|
+
from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.simple_parser.utils import get_screen_resize_factor
|
13
|
+
from computer_use_ootb_internal.computer_use_demo.tools.aws_request import send_request_to_server
|
14
|
+
from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.uia_tools.screenshot_service import get_screenshot_external_cmd
|
15
|
+
|
16
|
+
|
17
|
+
utc_plus_8 = timezone(timedelta(hours=8))
|
18
|
+
|
19
|
+
|
20
|
+
def simple_teachmode_sampling_loop(
|
21
|
+
model: str,
|
22
|
+
task: str,
|
23
|
+
api_keys: dict = None,
|
24
|
+
action_history: list[dict] = None,
|
25
|
+
selected_screen: int = 0,
|
26
|
+
user_id: str = None,
|
27
|
+
trace_id: str = None,
|
28
|
+
server_url: str = "http://localhost:5000/generate_action",
|
29
|
+
max_steps: int = 20,
|
30
|
+
):
|
31
|
+
"""
|
32
|
+
Synchronous sampling loop for assistant/tool interactions in 'teach mode'.
|
33
|
+
"""
|
34
|
+
# Initialize action_history if it's None
|
35
|
+
if action_history is None:
|
36
|
+
action_history = []
|
37
|
+
|
38
|
+
# if platform.system() != "Windows":
|
39
|
+
# raise ValueError("Teach mode is only supported on Windows.")
|
40
|
+
|
41
|
+
# Set StarRail mode based on input parameter
|
42
|
+
# 0: disabled, 1: starrail, 2: starrail browser
|
43
|
+
full_screen_game_mode = 0
|
44
|
+
|
45
|
+
# TODO: set full_screen_game_mode adaptively
|
46
|
+
if "star_rail" in user_id or "star_rail" in user_id:
|
47
|
+
full_screen_game_mode = 1
|
48
|
+
|
49
|
+
if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in user_id or "official" in user_id:
|
50
|
+
full_screen_game_mode = 2
|
51
|
+
|
52
|
+
print(f"Full Screen Game Mode: {full_screen_game_mode}")
|
53
|
+
executor = TeachmodeExecutor(
|
54
|
+
selected_screen=selected_screen,
|
55
|
+
full_screen_game_mode=full_screen_game_mode,
|
56
|
+
)
|
57
|
+
|
58
|
+
timestamp = datetime.now(utc_plus_8).strftime("%m%d-%H%M%S")
|
59
|
+
|
60
|
+
step_count = 1
|
61
|
+
unique_task_id = f"{timestamp}_uid_{user_id}_tid_{trace_id}_{str(uuid.uuid4())[:6]}"
|
62
|
+
|
63
|
+
print("[simple_teachmode_sampling_loop] starting task: ", task)
|
64
|
+
print(f"[simple_teachmode_sampling_loop] unique_task_id: {unique_task_id}")
|
65
|
+
|
66
|
+
|
67
|
+
while step_count < max_steps:
|
68
|
+
|
69
|
+
print(f"step_count: {step_count}")
|
70
|
+
|
71
|
+
# Pause briefly so we don't spam screenshots
|
72
|
+
time.sleep(1)
|
73
|
+
|
74
|
+
uia_meta, sc_path = get_screenshot_external_cmd(
|
75
|
+
selected_screen=selected_screen,
|
76
|
+
capture_uia_data=full_screen_game_mode==0
|
77
|
+
)
|
78
|
+
|
79
|
+
# yield {"role": "assistant", "content": "screenshot", "type": "action", "action_type": "screenshot"}
|
80
|
+
|
81
|
+
if is_image_path(sc_path):
|
82
|
+
# yield {"role": "assistant", "content": sc_path, "type": "image", "action_type": "screenshot"}
|
83
|
+
with open(sc_path, "rb") as image_file:
|
84
|
+
sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
|
85
|
+
yield {"role": "assistant", "content": sc_base64, "type": "image_base64", "action_type": "screenshot"}
|
86
|
+
|
87
|
+
payload = {
|
88
|
+
"task_id": unique_task_id,
|
89
|
+
"uia_data": uia_meta,
|
90
|
+
"screenshot_path": sc_path,
|
91
|
+
"query": task,
|
92
|
+
"action_history": action_history,
|
93
|
+
"mode": "teach",
|
94
|
+
"user_id": user_id,
|
95
|
+
"trace_id": trace_id,
|
96
|
+
"scale_factor": get_screen_resize_factor(),
|
97
|
+
"os_name": platform.system(),
|
98
|
+
"api_keys": api_keys,
|
99
|
+
}
|
100
|
+
|
101
|
+
# Send request to Marbot Run server
|
102
|
+
infer_server_response = send_request_to_server(payload, server_url)
|
103
|
+
|
104
|
+
# infer_server_response = {
|
105
|
+
# 'status': 'success',
|
106
|
+
# 'generated_plan': plan_details,
|
107
|
+
# 'generated_action': action,
|
108
|
+
# 'todo_md': todo_md_content,
|
109
|
+
# 'milestones': milestones,
|
110
|
+
# 'current_step': current_step,
|
111
|
+
# }
|
112
|
+
|
113
|
+
|
114
|
+
if infer_server_response is None:
|
115
|
+
print("No response from Marbot Run server. Exiting.")
|
116
|
+
yield {"role": "assistant", "content": "No response from Marbot Run server. Exiting.", "type": "error"}
|
117
|
+
action_history = []
|
118
|
+
break
|
119
|
+
|
120
|
+
try:
|
121
|
+
step_plan = infer_server_response["generated_plan"]
|
122
|
+
step_reasoning = step_plan["reasoning"]
|
123
|
+
step_info = step_plan["step_info"]
|
124
|
+
step_action = infer_server_response["generated_action"]["content"]
|
125
|
+
step_traj_idx = infer_server_response["current_traj_step"]
|
126
|
+
|
127
|
+
except Exception as e:
|
128
|
+
print("Error parsing generated_action content:", e)
|
129
|
+
yield {"role": "assistant", "content": "Error parsing response from Marbot Run server. Exiting.", "type": "error"}
|
130
|
+
break
|
131
|
+
|
132
|
+
yield {"role": "assistant", "content": step_reasoning, "type": "text"}
|
133
|
+
|
134
|
+
if step_action.get("action") == "STOP":
|
135
|
+
final_sc, final_sc_path = get_screenshot_external_cmd(selected_screen=selected_screen)
|
136
|
+
|
137
|
+
yield {"role": "assistant", "content": "Task completed. Final screenshot:", "type": "text"}
|
138
|
+
yield {"role": "assistant", "content": final_sc_path, "type": "image"}
|
139
|
+
|
140
|
+
# reset action history
|
141
|
+
action_history = []
|
142
|
+
break
|
143
|
+
|
144
|
+
action_history.append(f"Executing guidance trajectory step [{step_traj_idx}]: {{Plan: {step_info}, Action: {step_action}}}\n")
|
145
|
+
|
146
|
+
for exec_message in executor({"role": "assistant", "content": step_action}):
|
147
|
+
yield exec_message
|
148
|
+
|
149
|
+
step_count += 1
|
150
|
+
|
151
|
+
# reset action history
|
152
|
+
action_history = []
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
if __name__ == "__main__":
|
157
|
+
parser = argparse.ArgumentParser(
|
158
|
+
description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
|
159
|
+
)
|
160
|
+
parser.add_argument(
|
161
|
+
"--model",
|
162
|
+
default="teach-mode",
|
163
|
+
help="The model to use",
|
164
|
+
)
|
165
|
+
parser.add_argument(
|
166
|
+
"--task",
|
167
|
+
default="Click on the Google Chorme icon",
|
168
|
+
help="The task to be completed by the assistant (e.g., 'Complete some data extraction.').",
|
169
|
+
)
|
170
|
+
parser.add_argument(
|
171
|
+
"--selected_screen",
|
172
|
+
type=int,
|
173
|
+
default=0,
|
174
|
+
help="Index of the screen to capture (default=0).",
|
175
|
+
)
|
176
|
+
parser.add_argument(
|
177
|
+
"--user_id",
|
178
|
+
default="star_rail",
|
179
|
+
help="User ID for the session (default='liziqi').",
|
180
|
+
)
|
181
|
+
parser.add_argument(
|
182
|
+
"--trace_id",
|
183
|
+
default="ONG_JING_JIE_007-0213_0",
|
184
|
+
help="Trace ID for the session (default='default_trace').",
|
185
|
+
)
|
186
|
+
parser.add_argument(
|
187
|
+
"--api_key_file",
|
188
|
+
default="api_key.json",
|
189
|
+
help="Path to the JSON file containing API keys (default='api_key.json').",
|
190
|
+
)
|
191
|
+
parser.add_argument(
|
192
|
+
"--max_steps",
|
193
|
+
type=int,
|
194
|
+
default=20,
|
195
|
+
help="The maximum number of steps to take.",
|
196
|
+
)
|
197
|
+
|
198
|
+
args = parser.parse_args()
|
199
|
+
|
200
|
+
# # Load API keys
|
201
|
+
# with open(args.api_key_file, "r") as file:
|
202
|
+
# api_keys = json.load(file)
|
203
|
+
api_keys = None
|
204
|
+
|
205
|
+
print(f"Starting task: {args.task}")
|
206
|
+
|
207
|
+
# Execute the sampling loop
|
208
|
+
sampling_loop = simple_teachmode_sampling_loop(
|
209
|
+
model=args.model,
|
210
|
+
task=args.task,
|
211
|
+
selected_screen=args.selected_screen,
|
212
|
+
user_id=args.user_id,
|
213
|
+
trace_id=args.trace_id,
|
214
|
+
api_keys=api_keys,
|
215
|
+
max_steps=args.max_steps,
|
216
|
+
)
|
217
|
+
|
218
|
+
# # Print each step result
|
219
|
+
for step in sampling_loop:
|
220
|
+
print(step)
|
221
|
+
time.sleep(1)
|
222
|
+
|
223
|
+
print(f"Task '{args.task}' completed. Thanks for using Teachmode-OOTB.")
|