computer-use-ootb-internal 0.0.165__py3-none-any.whl → 0.0.167__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,100 +1,100 @@
1
- # src/computer_use_ootb_internal/preparation/star_rail_prepare.py
2
- import time
3
- import platform
4
- import subprocess # Added for taskkill
5
- import pyautogui
6
- import webbrowser
7
- import logging # Use logging instead of print for better practice
8
-
9
- # Set up logging for this module if needed, or rely on root logger
10
- log = logging.getLogger(__name__)
11
-
12
- def run_preparation(state):
13
- """
14
- Performs environment preparation specific to Star Rail on Windows.
15
- Closes existing Edge browsers, opens the specified URL in a new Edge instance,
16
- and performs initial clicks.
17
- """
18
- if platform.system() != "Windows":
19
- log.info("Star Rail preparation skipped: Not running on Windows.")
20
- return
21
-
22
- log.info("Star Rail preparation: Starting environment setup on Windows...")
23
- url = "https://sr.mihoyo.com/cloud/#/" # Consider making this configurable later
24
- browser_opened = False
25
- try:
26
- # Attempt to close existing Microsoft Edge processes
27
- log.info("Attempting to close existing Microsoft Edge processes...")
28
- try:
29
- # /F forces termination, /IM specifies image name
30
- result = subprocess.run(['taskkill', '/F', '/IM', 'msedge.exe'],
31
- capture_output=True, text=True, check=False)
32
- if result.returncode == 0:
33
- log.info("Successfully sent termination signal to msedge.exe processes.")
34
- elif "not found" in result.stderr.lower() or "not found" in result.stdout.lower():
35
- log.info("No running msedge.exe processes found to close.")
36
- else:
37
- log.warning(f"taskkill command finished with return code {result.returncode}. Output: {result.stdout} Stderr: {result.stderr}")
38
- time.sleep(2) # Give processes time to close
39
- except FileNotFoundError:
40
- log.error("Error: 'taskkill' command not found. Make sure it's in the system PATH.")
41
- except Exception as e:
42
- log.error(f"Error occurred while trying to close Edge: {e}", exc_info=True)
43
-
44
- # Use only webbrowser.open
45
- log.info(f"Attempting to open {url} using webbrowser.open()...")
46
- if webbrowser.open(url):
47
- log.info(f"Successfully requested browser to open {url} via webbrowser.open().")
48
- browser_opened = True
49
- # Ensure sleep time for browser load before clicks is present
50
- time.sleep(5)
51
- else:
52
- log.warning("webbrowser.open() returned False, indicating potential failure.")
53
-
54
- if not browser_opened:
55
- log.error("Failed to confirm browser opening via webbrowser.open(). Will still attempt clicks.")
56
-
57
- # Add pyautogui click after attempting to open the browser
58
- log.info("Proceeding with pyautogui actions...")
59
- time.sleep(5) # Wait time for the browser to load
60
-
61
- # Get screen size
62
- screen_width, screen_height = pyautogui.size()
63
- log.info(f"Detected screen size: {screen_width}x{screen_height}")
64
-
65
- # Calculate click coordinates based on a reference resolution (e.g., 1280x720)
66
- # TODO: Make these coordinates more robust or configurable
67
- click_x_1 = int(screen_width * (1036 / 1280))
68
- click_y_1 = int(screen_height * (500 / 720))
69
- log.info(f"Calculated click coordinates for starting the game: ({click_x_1}, {click_y_1})")
70
- click_x_2 = int(screen_width * (1233 / 1280))
71
- click_y_2 = int(screen_height * (30 / 720))
72
- log.info(f"Calculated click coordinates for closing the browser warning: ({click_x_2}, {click_y_2})")
73
-
74
- # Disable failsafe before clicking
75
- pyautogui.FAILSAFE = False
76
- log.info("PyAutoGUI failsafe temporarily disabled.")
77
-
78
- log.info(f"Clicking at coordinates: ({click_x_1}, {click_y_1})")
79
- pyautogui.click(click_x_1, click_y_1)
80
- time.sleep(2)
81
- pyautogui.click(click_x_1, click_y_1) # Double click?
82
-
83
- # Press F11 to attempt fullscreen
84
- log.info("Pressing F11 to enter fullscreen...")
85
- time.sleep(1) # Short delay before pressing F11
86
- pyautogui.press('f11')
87
- time.sleep(1)
88
- log.info(f"Clicking at coordinates: ({click_x_2}, {click_y_2})")
89
- pyautogui.click(click_x_2, click_y_2)
90
- time.sleep(1)
91
- pyautogui.click(click_x_2, click_y_2)
92
-
93
- log.info("Star Rail preparation clicks completed.")
94
-
95
- except Exception as e:
96
- log.error(f"Error during Star Rail preparation (browser/click): {e}", exc_info=True)
97
- finally:
98
- # Ensure failsafe is re-enabled
99
- pyautogui.FAILSAFE = True
1
+ # src/computer_use_ootb_internal/preparation/star_rail_prepare.py
2
+ import time
3
+ import platform
4
+ import subprocess # Added for taskkill
5
+ import pyautogui
6
+ import webbrowser
7
+ import logging # Use logging instead of print for better practice
8
+
9
+ # Set up logging for this module if needed, or rely on root logger
10
+ log = logging.getLogger(__name__)
11
+
12
+ def run_preparation(state):
13
+ """
14
+ Performs environment preparation specific to Star Rail on Windows.
15
+ Closes existing Edge browsers, opens the specified URL in a new Edge instance,
16
+ and performs initial clicks.
17
+ """
18
+ if platform.system() != "Windows":
19
+ log.info("Star Rail preparation skipped: Not running on Windows.")
20
+ return
21
+
22
+ log.info("Star Rail preparation: Starting environment setup on Windows...")
23
+ url = "https://sr.mihoyo.com/cloud/#/" # Consider making this configurable later
24
+ browser_opened = False
25
+ try:
26
+ # Attempt to close existing Microsoft Edge processes
27
+ log.info("Attempting to close existing Microsoft Edge processes...")
28
+ try:
29
+ # /F forces termination, /IM specifies image name
30
+ result = subprocess.run(['taskkill', '/F', '/IM', 'msedge.exe'],
31
+ capture_output=True, text=True, check=False)
32
+ if result.returncode == 0:
33
+ log.info("Successfully sent termination signal to msedge.exe processes.")
34
+ elif "not found" in result.stderr.lower() or "not found" in result.stdout.lower():
35
+ log.info("No running msedge.exe processes found to close.")
36
+ else:
37
+ log.warning(f"taskkill command finished with return code {result.returncode}. Output: {result.stdout} Stderr: {result.stderr}")
38
+ time.sleep(2) # Give processes time to close
39
+ except FileNotFoundError:
40
+ log.error("Error: 'taskkill' command not found. Make sure it's in the system PATH.")
41
+ except Exception as e:
42
+ log.error(f"Error occurred while trying to close Edge: {e}", exc_info=True)
43
+
44
+ # Use only webbrowser.open
45
+ log.info(f"Attempting to open {url} using webbrowser.open()...")
46
+ if webbrowser.open(url):
47
+ log.info(f"Successfully requested browser to open {url} via webbrowser.open().")
48
+ browser_opened = True
49
+ # Ensure sleep time for browser load before clicks is present
50
+ time.sleep(5)
51
+ else:
52
+ log.warning("webbrowser.open() returned False, indicating potential failure.")
53
+
54
+ if not browser_opened:
55
+ log.error("Failed to confirm browser opening via webbrowser.open(). Will still attempt clicks.")
56
+
57
+ # Add pyautogui click after attempting to open the browser
58
+ log.info("Proceeding with pyautogui actions...")
59
+ time.sleep(5) # Wait time for the browser to load
60
+
61
+ # Get screen size
62
+ screen_width, screen_height = pyautogui.size()
63
+ log.info(f"Detected screen size: {screen_width}x{screen_height}")
64
+
65
+ # Calculate click coordinates based on a reference resolution (e.g., 1280x720)
66
+ # TODO: Make these coordinates more robust or configurable
67
+ click_x_1 = int(screen_width * (1036 / 1280))
68
+ click_y_1 = int(screen_height * (500 / 720))
69
+ log.info(f"Calculated click coordinates for starting the game: ({click_x_1}, {click_y_1})")
70
+ click_x_2 = int(screen_width * (1233 / 1280))
71
+ click_y_2 = int(screen_height * (30 / 720))
72
+ log.info(f"Calculated click coordinates for closing the browser warning: ({click_x_2}, {click_y_2})")
73
+
74
+ # Disable failsafe before clicking
75
+ pyautogui.FAILSAFE = False
76
+ log.info("PyAutoGUI failsafe temporarily disabled.")
77
+
78
+ log.info(f"Clicking at coordinates: ({click_x_1}, {click_y_1})")
79
+ pyautogui.click(click_x_1, click_y_1)
80
+ time.sleep(2)
81
+ pyautogui.click(click_x_1, click_y_1) # Double click?
82
+
83
+ # Press F11 to attempt fullscreen
84
+ log.info("Pressing F11 to enter fullscreen...")
85
+ time.sleep(1) # Short delay before pressing F11
86
+ pyautogui.press('f11')
87
+ time.sleep(1)
88
+ log.info(f"Clicking at coordinates: ({click_x_2}, {click_y_2})")
89
+ pyautogui.click(click_x_2, click_y_2)
90
+ time.sleep(1)
91
+ pyautogui.click(click_x_2, click_y_2)
92
+
93
+ log.info("Star Rail preparation clicks completed.")
94
+
95
+ except Exception as e:
96
+ log.error(f"Error during Star Rail preparation (browser/click): {e}", exc_info=True)
97
+ finally:
98
+ # Ensure failsafe is re-enabled
99
+ pyautogui.FAILSAFE = True
100
100
  log.info("PyAutoGUI failsafe re-enabled.")
@@ -1,223 +1,223 @@
1
- import argparse
2
- import time
3
- import json
4
- import platform
5
- import uuid
6
- import base64
7
- import datetime
8
- from datetime import datetime, timedelta, timezone
9
-
10
- from computer_use_ootb_internal.computer_use_demo.executor.teachmode_executor import TeachmodeExecutor
11
- from computer_use_ootb_internal.computer_use_demo.gui_agent.llm_utils.llm_utils import is_image_path
12
- from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.simple_parser.utils import get_screen_resize_factor
13
- from computer_use_ootb_internal.computer_use_demo.tools.aws_request import send_request_to_server
14
- from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.uia_tools.screenshot_service import get_screenshot_external_cmd
15
-
16
-
17
- utc_plus_8 = timezone(timedelta(hours=8))
18
-
19
-
20
- def simple_teachmode_sampling_loop(
21
- model: str,
22
- task: str,
23
- api_keys: dict = None,
24
- action_history: list[dict] = None,
25
- selected_screen: int = 0,
26
- user_id: str = None,
27
- trace_id: str = None,
28
- server_url: str = "http://localhost:5000/generate_action",
29
- max_steps: int = 20,
30
- ):
31
- """
32
- Synchronous sampling loop for assistant/tool interactions in 'teach mode'.
33
- """
34
- # Initialize action_history if it's None
35
- if action_history is None:
36
- action_history = []
37
-
38
- # if platform.system() != "Windows":
39
- # raise ValueError("Teach mode is only supported on Windows.")
40
-
41
- # Set StarRail mode based on input parameter
42
- # 0: disabled, 1: starrail, 2: starrail browser
43
- full_screen_game_mode = 0
44
-
45
- # TODO: set full_screen_game_mode adaptively
46
- if "star_rail" in user_id or "star_rail" in user_id:
47
- full_screen_game_mode = 1
48
-
49
- if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in user_id or "official" in user_id:
50
- full_screen_game_mode = 2
51
-
52
- print(f"Full Screen Game Mode: {full_screen_game_mode}")
53
- executor = TeachmodeExecutor(
54
- selected_screen=selected_screen,
55
- full_screen_game_mode=full_screen_game_mode,
56
- )
57
-
58
- timestamp = datetime.now(utc_plus_8).strftime("%m%d-%H%M%S")
59
-
60
- step_count = 1
61
- unique_task_id = f"{timestamp}_uid_{user_id}_tid_{trace_id}_{str(uuid.uuid4())[:6]}"
62
-
63
- print("[simple_teachmode_sampling_loop] starting task: ", task)
64
- print(f"[simple_teachmode_sampling_loop] unique_task_id: {unique_task_id}")
65
-
66
-
67
- while step_count < max_steps:
68
-
69
- print(f"step_count: {step_count}")
70
-
71
- # Pause briefly so we don't spam screenshots
72
- time.sleep(1)
73
-
74
- uia_meta, sc_path = get_screenshot_external_cmd(
75
- selected_screen=selected_screen,
76
- capture_uia_data=full_screen_game_mode==0
77
- )
78
-
79
- # yield {"role": "assistant", "content": "screenshot", "type": "action", "action_type": "screenshot"}
80
-
81
- if is_image_path(sc_path):
82
- # yield {"role": "assistant", "content": sc_path, "type": "image", "action_type": "screenshot"}
83
- with open(sc_path, "rb") as image_file:
84
- sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
85
- yield {"role": "assistant", "content": sc_base64, "type": "image_base64", "action_type": "screenshot"}
86
-
87
- payload = {
88
- "task_id": unique_task_id,
89
- "uia_data": uia_meta,
90
- "screenshot_path": sc_path,
91
- "query": task,
92
- "action_history": action_history,
93
- "mode": "teach",
94
- "user_id": user_id,
95
- "trace_id": trace_id,
96
- "scale_factor": get_screen_resize_factor(),
97
- "os_name": platform.system(),
98
- "api_keys": api_keys,
99
- }
100
-
101
- # Send request to Marbot Run server
102
- infer_server_response = send_request_to_server(payload, server_url)
103
-
104
- # infer_server_response = {
105
- # 'status': 'success',
106
- # 'generated_plan': plan_details,
107
- # 'generated_action': action,
108
- # 'todo_md': todo_md_content,
109
- # 'milestones': milestones,
110
- # 'current_step': current_step,
111
- # }
112
-
113
-
114
- if infer_server_response is None:
115
- print("No response from Marbot Run server. Exiting.")
116
- yield {"role": "assistant", "content": "No response from Marbot Run server. Exiting.", "type": "error"}
117
- action_history = []
118
- break
119
-
120
- try:
121
- step_plan = infer_server_response["generated_plan"]
122
- step_reasoning = step_plan["reasoning"]
123
- step_info = step_plan["step_info"]
124
- step_action = infer_server_response["generated_action"]["content"]
125
- step_traj_idx = infer_server_response["current_traj_step"]
126
-
127
- except Exception as e:
128
- print("Error parsing generated_action content:", e)
129
- yield {"role": "assistant", "content": "Error parsing response from Marbot Run server. Exiting.", "type": "error"}
130
- break
131
-
132
- yield {"role": "assistant", "content": step_reasoning, "type": "text"}
133
-
134
- if step_action.get("action") == "STOP":
135
- final_sc, final_sc_path = get_screenshot_external_cmd(selected_screen=selected_screen)
136
-
137
- yield {"role": "assistant", "content": "Task completed. Final screenshot:", "type": "text"}
138
- yield {"role": "assistant", "content": final_sc_path, "type": "image"}
139
-
140
- # reset action history
141
- action_history = []
142
- break
143
-
144
- action_history.append(f"Executing guidance trajectory step [{step_traj_idx}]: {{Plan: {step_info}, Action: {step_action}}}\n")
145
-
146
- for exec_message in executor({"role": "assistant", "content": step_action}):
147
- yield exec_message
148
-
149
- step_count += 1
150
-
151
- # reset action history
152
- action_history = []
153
-
154
-
155
-
156
- if __name__ == "__main__":
157
- parser = argparse.ArgumentParser(
158
- description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
159
- )
160
- parser.add_argument(
161
- "--model",
162
- default="teach-mode",
163
- help="The model to use",
164
- )
165
- parser.add_argument(
166
- "--task",
167
- default="Click on the Google Chorme icon",
168
- help="The task to be completed by the assistant (e.g., 'Complete some data extraction.').",
169
- )
170
- parser.add_argument(
171
- "--selected_screen",
172
- type=int,
173
- default=0,
174
- help="Index of the screen to capture (default=0).",
175
- )
176
- parser.add_argument(
177
- "--user_id",
178
- default="star_rail",
179
- help="User ID for the session (default='liziqi').",
180
- )
181
- parser.add_argument(
182
- "--trace_id",
183
- default="ONG_JING_JIE_007-0213_0",
184
- help="Trace ID for the session (default='default_trace').",
185
- )
186
- parser.add_argument(
187
- "--api_key_file",
188
- default="api_key.json",
189
- help="Path to the JSON file containing API keys (default='api_key.json').",
190
- )
191
- parser.add_argument(
192
- "--max_steps",
193
- type=int,
194
- default=20,
195
- help="The maximum number of steps to take.",
196
- )
197
-
198
- args = parser.parse_args()
199
-
200
- # # Load API keys
201
- # with open(args.api_key_file, "r") as file:
202
- # api_keys = json.load(file)
203
- api_keys = None
204
-
205
- print(f"Starting task: {args.task}")
206
-
207
- # Execute the sampling loop
208
- sampling_loop = simple_teachmode_sampling_loop(
209
- model=args.model,
210
- task=args.task,
211
- selected_screen=args.selected_screen,
212
- user_id=args.user_id,
213
- trace_id=args.trace_id,
214
- api_keys=api_keys,
215
- max_steps=args.max_steps,
216
- )
217
-
218
- # # Print each step result
219
- for step in sampling_loop:
220
- print(step)
221
- time.sleep(1)
222
-
223
- print(f"Task '{args.task}' completed. Thanks for using Teachmode-OOTB.")
1
+ import argparse
2
+ import time
3
+ import json
4
+ import platform
5
+ import uuid
6
+ import base64
7
+ import datetime
8
+ from datetime import datetime, timedelta, timezone
9
+
10
+ from computer_use_ootb_internal.computer_use_demo.executor.teachmode_executor import TeachmodeExecutor
11
+ from computer_use_ootb_internal.computer_use_demo.gui_agent.llm_utils.llm_utils import is_image_path
12
+ from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.simple_parser.utils import get_screen_resize_factor
13
+ from computer_use_ootb_internal.computer_use_demo.tools.aws_request import send_request_to_server
14
+ from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.uia_tools.screenshot_service import get_screenshot_external_cmd
15
+
16
+
17
+ utc_plus_8 = timezone(timedelta(hours=8))
18
+
19
+
20
+ def simple_teachmode_sampling_loop(
21
+ model: str,
22
+ task: str,
23
+ api_keys: dict = None,
24
+ action_history: list[dict] = None,
25
+ selected_screen: int = 0,
26
+ user_id: str = None,
27
+ trace_id: str = None,
28
+ server_url: str = "http://localhost:5000/generate_action",
29
+ max_steps: int = 20,
30
+ ):
31
+ """
32
+ Synchronous sampling loop for assistant/tool interactions in 'teach mode'.
33
+ """
34
+ # Initialize action_history if it's None
35
+ if action_history is None:
36
+ action_history = []
37
+
38
+ # if platform.system() != "Windows":
39
+ # raise ValueError("Teach mode is only supported on Windows.")
40
+
41
+ # Set StarRail mode based on input parameter
42
+ # 0: disabled, 1: starrail, 2: starrail browser
43
+ full_screen_game_mode = 0
44
+
45
+ # TODO: set full_screen_game_mode adaptively
46
+ if "star_rail" in user_id or "star_rail" in user_id:
47
+ full_screen_game_mode = 1
48
+
49
+ if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in user_id or "official" in user_id:
50
+ full_screen_game_mode = 2
51
+
52
+ print(f"Full Screen Game Mode: {full_screen_game_mode}")
53
+ executor = TeachmodeExecutor(
54
+ selected_screen=selected_screen,
55
+ full_screen_game_mode=full_screen_game_mode,
56
+ )
57
+
58
+ timestamp = datetime.now(utc_plus_8).strftime("%m%d-%H%M%S")
59
+
60
+ step_count = 1
61
+ unique_task_id = f"{timestamp}_uid_{user_id}_tid_{trace_id}_{str(uuid.uuid4())[:6]}"
62
+
63
+ print("[simple_teachmode_sampling_loop] starting task: ", task)
64
+ print(f"[simple_teachmode_sampling_loop] unique_task_id: {unique_task_id}")
65
+
66
+
67
+ while step_count < max_steps:
68
+
69
+ print(f"step_count: {step_count}")
70
+
71
+ # Pause briefly so we don't spam screenshots
72
+ time.sleep(1)
73
+
74
+ uia_meta, sc_path = get_screenshot_external_cmd(
75
+ selected_screen=selected_screen,
76
+ capture_uia_data=full_screen_game_mode==0
77
+ )
78
+
79
+ # yield {"role": "assistant", "content": "screenshot", "type": "action", "action_type": "screenshot"}
80
+
81
+ if is_image_path(sc_path):
82
+ # yield {"role": "assistant", "content": sc_path, "type": "image", "action_type": "screenshot"}
83
+ with open(sc_path, "rb") as image_file:
84
+ sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
85
+ yield {"role": "assistant", "content": sc_base64, "type": "image_base64", "action_type": "screenshot"}
86
+
87
+ payload = {
88
+ "task_id": unique_task_id,
89
+ "uia_data": uia_meta,
90
+ "screenshot_path": sc_path,
91
+ "query": task,
92
+ "action_history": action_history,
93
+ "mode": "teach",
94
+ "user_id": user_id,
95
+ "trace_id": trace_id,
96
+ "scale_factor": get_screen_resize_factor(),
97
+ "os_name": platform.system(),
98
+ "api_keys": api_keys,
99
+ }
100
+
101
+ # Send request to Marbot Run server
102
+ infer_server_response = send_request_to_server(payload, server_url)
103
+
104
+ # infer_server_response = {
105
+ # 'status': 'success',
106
+ # 'generated_plan': plan_details,
107
+ # 'generated_action': action,
108
+ # 'todo_md': todo_md_content,
109
+ # 'milestones': milestones,
110
+ # 'current_step': current_step,
111
+ # }
112
+
113
+
114
+ if infer_server_response is None:
115
+ print("No response from Marbot Run server. Exiting.")
116
+ yield {"role": "assistant", "content": "No response from Marbot Run server. Exiting.", "type": "error"}
117
+ action_history = []
118
+ break
119
+
120
+ try:
121
+ step_plan = infer_server_response["generated_plan"]
122
+ step_reasoning = step_plan["reasoning"]
123
+ step_info = step_plan["step_info"]
124
+ step_action = infer_server_response["generated_action"]["content"]
125
+ step_traj_idx = infer_server_response["current_traj_step"]
126
+
127
+ except Exception as e:
128
+ print("Error parsing generated_action content:", e)
129
+ yield {"role": "assistant", "content": "Error parsing response from Marbot Run server. Exiting.", "type": "error"}
130
+ break
131
+
132
+ yield {"role": "assistant", "content": step_reasoning, "type": "text"}
133
+
134
+ if step_action.get("action") == "STOP":
135
+ final_sc, final_sc_path = get_screenshot_external_cmd(selected_screen=selected_screen)
136
+
137
+ yield {"role": "assistant", "content": "Task completed. Final screenshot:", "type": "text"}
138
+ yield {"role": "assistant", "content": final_sc_path, "type": "image"}
139
+
140
+ # reset action history
141
+ action_history = []
142
+ break
143
+
144
+ action_history.append(f"Executing guidance trajectory step [{step_traj_idx}]: {{Plan: {step_info}, Action: {step_action}}}\n")
145
+
146
+ for exec_message in executor({"role": "assistant", "content": step_action}):
147
+ yield exec_message
148
+
149
+ step_count += 1
150
+
151
+ # reset action history
152
+ action_history = []
153
+
154
+
155
+
156
+ if __name__ == "__main__":
157
+ parser = argparse.ArgumentParser(
158
+ description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
159
+ )
160
+ parser.add_argument(
161
+ "--model",
162
+ default="teach-mode",
163
+ help="The model to use",
164
+ )
165
+ parser.add_argument(
166
+ "--task",
167
+ default="Click on the Google Chorme icon",
168
+ help="The task to be completed by the assistant (e.g., 'Complete some data extraction.').",
169
+ )
170
+ parser.add_argument(
171
+ "--selected_screen",
172
+ type=int,
173
+ default=0,
174
+ help="Index of the screen to capture (default=0).",
175
+ )
176
+ parser.add_argument(
177
+ "--user_id",
178
+ default="star_rail",
179
+ help="User ID for the session (default='liziqi').",
180
+ )
181
+ parser.add_argument(
182
+ "--trace_id",
183
+ default="ONG_JING_JIE_007-0213_0",
184
+ help="Trace ID for the session (default='default_trace').",
185
+ )
186
+ parser.add_argument(
187
+ "--api_key_file",
188
+ default="api_key.json",
189
+ help="Path to the JSON file containing API keys (default='api_key.json').",
190
+ )
191
+ parser.add_argument(
192
+ "--max_steps",
193
+ type=int,
194
+ default=20,
195
+ help="The maximum number of steps to take.",
196
+ )
197
+
198
+ args = parser.parse_args()
199
+
200
+ # # Load API keys
201
+ # with open(args.api_key_file, "r") as file:
202
+ # api_keys = json.load(file)
203
+ api_keys = None
204
+
205
+ print(f"Starting task: {args.task}")
206
+
207
+ # Execute the sampling loop
208
+ sampling_loop = simple_teachmode_sampling_loop(
209
+ model=args.model,
210
+ task=args.task,
211
+ selected_screen=args.selected_screen,
212
+ user_id=args.user_id,
213
+ trace_id=args.trace_id,
214
+ api_keys=api_keys,
215
+ max_steps=args.max_steps,
216
+ )
217
+
218
+ # # Print each step result
219
+ for step in sampling_loop:
220
+ print(step)
221
+ time.sleep(1)
222
+
223
+ print(f"Task '{args.task}' completed. Thanks for using Teachmode-OOTB.")