computer-use-ootb-internal 0.0.103__py3-none-any.whl → 0.0.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,11 @@ import json
4
4
  from datetime import datetime
5
5
  import threading
6
6
  import requests
7
+ import platform # Add platform import
8
+ import subprocess # Add subprocess import
9
+ import pyautogui # Add pyautogui import
10
+ import webbrowser # Add webbrowser import
11
+ import os # Import os for path joining
7
12
  from fastapi import FastAPI, Request
8
13
  from fastapi.responses import JSONResponse
9
14
  from fastapi.middleware.cors import CORSMiddleware
@@ -87,6 +92,69 @@ class SharedState:
87
92
  shared_state = None
88
93
  rate_limiter = RateLimiter(interval_seconds=2)
89
94
 
95
+ # Add the new prepare_environment function here
96
+ def prepare_environment(state):
97
+ """Prepares the environment before starting the main processing loop, e.g., opening specific apps."""
98
+ if platform.system() == "Windows":
99
+ # Assuming Star Rail mode is indicated by user_id containing "star_rail"
100
+ # You might need to adjust this condition based on the actual logic in run_teachmode_args
101
+ is_star_rail = "star_rail" in state.user_id.lower() or \
102
+ "star_rail" in state.trace_id.lower() or \
103
+ "hero_case" in state.trace_id.lower()
104
+
105
+ if is_star_rail:
106
+ print("Star Rail mode detected on Windows. Opening Edge browser...")
107
+ url = "https://sr.mihoyo.com/cloud/#/"
108
+ browser_opened = False
109
+ try:
110
+ # Use only webbrowser.open
111
+ print(f"Attempting to open {url} using webbrowser.open()...")
112
+ if webbrowser.open(url):
113
+ print(f"Successfully requested browser to open {url} via webbrowser.open().")
114
+ browser_opened = True
115
+ else:
116
+ print("webbrowser.open() returned False, indicating potential failure.")
117
+
118
+ if not browser_opened:
119
+ print("ERROR: Failed to confirm browser opening via webbrowser.open().")
120
+ # Still proceed to click attempt
121
+
122
+ # Add pyautogui click after attempting to open the browser
123
+ print("Proceeding with pyautogui actions...")
124
+ time.sleep(5) # Wait time for the browser to load
125
+
126
+ # Print detected screen size
127
+ screen_width, screen_height = pyautogui.size()
128
+ print(f"Detected screen size: {screen_width}x{screen_height}")
129
+
130
+ click_x = int(screen_width * (1036 / 1280))
131
+ click_y = int(screen_height * (500 / 720))
132
+ print(f"Calculated click coordinates: ({click_x}, {click_y})")
133
+
134
+ # Disable failsafe before clicking
135
+ pyautogui.FAILSAFE = False
136
+ print("PyAutoGUI failsafe temporarily disabled.")
137
+
138
+ print(f"Clicking at coordinates: ({click_x}, {click_y})")
139
+ pyautogui.click(click_x, click_y)
140
+ time.sleep(2)
141
+ pyautogui.click(click_x, click_y)
142
+
143
+ # Re-enable failsafe (optional, as script might end anyway)
144
+ # pyautogui.FAILSAFE = True
145
+ # print("PyAutoGUI failsafe re-enabled.")
146
+
147
+ except Exception as e:
148
+ print(f"Error during environment preparation (browser/click): {e}")
149
+ finally:
150
+ # Ensure failsafe is re-enabled if an error occurs after disabling it
151
+ pyautogui.FAILSAFE = True
152
+ print("PyAutoGUI failsafe re-enabled.")
153
+ else:
154
+ # Placeholder for potential preparations on other OS or non-Star Rail modes
155
+ print("Environment preparation: No specific actions required for this OS/mode.")
156
+
157
+
90
158
  @app.post("/update_params")
91
159
  async def update_parameters(request: Request):
92
160
  data = await request.json()
@@ -111,6 +179,9 @@ async def update_parameters(request: Request):
111
179
 
112
180
  log_ootb_request(shared_state.server_url, "update_params", data)
113
181
 
182
+ # Call the preparation function here, after parameters are updated
183
+ prepare_environment(shared_state)
184
+
114
185
  return JSONResponse(
115
186
  content={"status": "success", "message": "Parameters updated", "new_args": vars(shared_state.args)},
116
187
  status_code=200
@@ -1,81 +1,214 @@
1
- # click_anim_async.py ← put this in its own file (important for Windows "spawn")
2
- import sys, multiprocessing as mp
1
+ """
2
+ show_click(x, y, duration_ms=800)
3
+ → 在屏幕 (x,y) 显示点击动画,停留 duration_ms 毫秒
4
+ 依赖: pyside6
5
+ 确保同目录有 click.gif
6
+ """
7
+ import sys, time
3
8
  from pathlib import Path
4
- from PySide6.QtCore import Qt, QPoint, QTimer, QEasingCurve, QPropertyAnimation, QSize
5
- from PySide6.QtGui import QMovie
9
+ from PySide6.QtCore import Qt, QPoint, QTimer, QEventLoop, QSize, QEasingCurve, QPropertyAnimation
10
+ from PySide6.QtGui import QPainter, QPixmap, QMovie
6
11
  from PySide6.QtWidgets import QApplication, QWidget, QLabel
7
12
 
8
13
  CLICK_GIF = Path(__file__).with_name("icons8-select-cursor-transparent-96.gif")
9
14
 
10
- # ---------------------------- tiny in‑process GUI helpers ----------------------------
11
15
  class ClickAnimation(QWidget):
12
- def __init__(self, pos: QPoint, life_ms: int, size_px: int = 50):
16
+ def __init__(self, pos: QPoint, life_ms: int):
13
17
  super().__init__(None,
14
18
  Qt.FramelessWindowHint | Qt.Tool | Qt.WindowStaysOnTopHint
15
19
  | Qt.WindowTransparentForInput)
16
20
  self.setAttribute(Qt.WA_TranslucentBackground)
21
+
22
+ if not CLICK_GIF.exists():
23
+ print(f"Error: click.gif not found at {CLICK_GIF}")
24
+ return
25
+
26
+ try:
27
+ # 创建标签显示GIF
28
+ self.label = QLabel(self)
29
+ self.movie = QMovie(str(CLICK_GIF))
30
+
31
+ # 获取原始尺寸并打印(仅供参考)
32
+ self.movie.jumpToFrame(0)
33
+ original_size = self.movie.currentPixmap().size()
34
+ print(f"GIF original size: {original_size.width()}x{original_size.height()}")
35
+
36
+ # 将GIF缩放到30x30像素
37
+ target_size = QSize(50, 50)
38
+ self.movie.setScaledSize(target_size)
39
+
40
+ # 设置标签尺寸和GIF
41
+ self.label.setMovie(self.movie)
42
+ self.label.setFixedSize(target_size)
43
+
44
+ # 设置窗口大小和位置
45
+ self.resize(target_size)
46
+ self.move(pos.x() - 15, pos.y() - 15) # 居中显示
47
+
48
+ # 提高播放性能
49
+ self.movie.setCacheMode(QMovie.CacheAll)
50
+
51
+ # 开始播放动画
52
+ self.movie.start()
53
+
54
+ # 设置定时器关闭窗口
55
+ QTimer.singleShot(life_ms, self.close)
56
+
57
+ self.show()
58
+ self.raise_()
59
+ print(f"Click animation created at ({pos.x()}, {pos.y()}), size: 30x30, duration: {life_ms}ms")
60
+ except Exception as e:
61
+ print(f"Error creating click animation: {str(e)}")
17
62
 
18
- self.label = QLabel(self)
19
- movie = QMovie(str(CLICK_GIF))
20
- movie.setScaledSize(QSize(size_px, size_px))
21
- self.label.setMovie(movie)
22
- self.label.setFixedSize(size_px, size_px)
63
+ # ---------- 外部接口 ----------
64
+ _app = None
65
+ def _ensure_app():
66
+ global _app
67
+ if _app is None:
68
+ if QApplication.instance() is None:
69
+ print("Creating new QApplication instance")
70
+ _app = QApplication(sys.argv)
71
+ else:
72
+ print("Using existing QApplication instance")
73
+ _app = QApplication.instance()
23
74
 
24
- self.resize(size_px, size_px)
25
- self.move(pos.x() - size_px//2, pos.y() - size_px//2)
75
+ # Keep references to animations to prevent garbage collection
76
+ _active_animations = []
26
77
 
27
- movie.setCacheMode(QMovie.CacheAll)
28
- movie.start()
29
- QTimer.singleShot(life_ms, self.close)
30
- self.show()
31
- self.raise_()
78
+ def show_click(x: int, y: int, duration_ms: int = 2000, existing_ms: int = 2000): # 增加默认播放时间和静止时间
79
+ """非阻塞式点击动画:立即返回,动画在后台运行
80
+
81
+ Args:
82
+ x, y : 屏幕坐标
83
+ duration_ms : 动画播放时长
84
+ existing_ms : 动画结束后静止显示的时间
85
+ """
86
+ print(f"Attempting to show click at ({x}, {y})")
87
+
88
+ if not CLICK_GIF.exists():
89
+ raise FileNotFoundError(f"click.gif not found at {CLICK_GIF}")
90
+
91
+ _ensure_app()
92
+
93
+ try:
94
+ # 总生存时间 = 动画时间 + 静止显示时间
95
+ total_life_ms = duration_ms + existing_ms
96
+ animation = ClickAnimation(QPoint(x, y), total_life_ms)
97
+
98
+ # Store reference to prevent garbage collection
99
+ global _active_animations
100
+ _active_animations.append(animation)
101
+
102
+ # Set up cleanup after animation completes + existing time
103
+ QTimer.singleShot(total_life_ms + 150, lambda: _clean_animation(animation))
104
+
105
+ print(f"Click animation started (non-blocking, will exist for {total_life_ms}ms)")
106
+ except Exception as e:
107
+ print(f"Error during show_click: {str(e)}")
32
108
 
33
- # ------------------------- worker functions that live in a **child** -----------------
34
- def _worker_click(x, y, duration_ms, existing_ms):
35
- app = QApplication(sys.argv)
36
- total = duration_ms + existing_ms
37
- widget = ClickAnimation(QPoint(x, y), total) # Store in variable to prevent garbage collection
38
- QTimer.singleShot(total + 200, app.quit) # close event‑loop afterwards
39
- app.exec()
40
109
 
41
- def _worker_move(x1, y1, x2, y2, duration_ms, existing_ms):
42
- app = QApplication(sys.argv)
43
- total = duration_ms + existing_ms
44
- widget = ClickAnimation(QPoint(x1, y1), total)
110
+ def _clean_animation(animation):
111
+ """Remove animation from reference list after it completes"""
112
+ global _active_animations
113
+ if animation in _active_animations:
114
+ _active_animations.remove(animation)
115
+ print("Animation cleaned up")
45
116
 
46
- anim = QPropertyAnimation(widget, b"pos")
47
- anim.setDuration(duration_ms)
48
- anim.setStartValue(widget.pos())
49
- anim.setEndValue(QPoint(x2 - widget.width()//2, y2 - widget.height()//2))
50
- anim.setEasingCurve(QEasingCurve.OutQuad)
51
- anim.start()
52
117
 
53
- QTimer.singleShot(total + 200, app.quit)
54
- app.exec()
118
+ # ---------- 新增函数 ----------
119
+ def show_move_to(x1: int, y1: int, x2: int, y2: int, duration_ms: int = 1000, existing_ms: int = 3000):
120
+ """
121
+ 非阻塞式移动动画:在 (x1, y1) 处出现光标 GIF,
122
+ 并在 duration_ms 毫秒内平滑移动到 (x2, y2),
123
+ 然后在终点静止显示 existing_ms 毫秒。
124
+ 立即返回,动画在后台运行。
55
125
 
56
- # ------------------------------- public API (non‑blocking) ---------------------------
57
- def show_click(x: int, y: int, duration_ms: int = 800, existing_ms: int = 800):
58
- if not CLICK_GIF.exists():
59
- raise FileNotFoundError(f"GIF not found at {CLICK_GIF}")
60
- mp.get_context("spawn").Process(
61
- target=_worker_click,
62
- args=(x, y, duration_ms, existing_ms),
63
- daemon=False # keep running even if parent exits
64
- ).start()
126
+ Args:
127
+ x1, y1 : 起点屏幕坐标
128
+ x2, y2 : 终点屏幕坐标
129
+ duration_ms : 移动总时长
130
+ existing_ms : 移动结束后在终点静止显示的时间
131
+ """
132
+ print(f"Attempting to move click from ({x1}, {y1}) → ({x2}, {y2}) "
133
+ f"in {duration_ms} ms, then stay for {existing_ms} ms")
65
134
 
66
- def show_move_to(x1: int, y1: int, x2: int, y2: int,
67
- duration_ms: int = 1000, existing_ms: int = 800):
68
135
  if not CLICK_GIF.exists():
69
- raise FileNotFoundError(f"GIF not found at {CLICK_GIF}")
70
- mp.get_context("spawn").Process(
71
- target=_worker_move,
72
- args=(x1, y1, x2, y2, duration_ms, existing_ms),
73
- daemon=False
74
- ).start()
136
+ raise FileNotFoundError(f"click.gif not found at {CLICK_GIF}")
75
137
 
138
+ _ensure_app()
139
+
140
+ # 总生存时间 = 动画时间 + 静止显示时间
141
+ total_life_ms = duration_ms + existing_ms
142
+ widget = ClickAnimation(QPoint(x1, y1), total_life_ms)
143
+
144
+ # 用 QPropertyAnimation 平滑移动窗口
145
+ anim = QPropertyAnimation(widget, b"pos")
146
+ anim.setDuration(duration_ms)
147
+ # ClickAnimation 内部已经向左上偏移了 15px,这里沿用同样的偏移
148
+ anim.setStartValue(QPoint(x1 - 15, y1 - 15))
149
+ anim.setEndValue(QPoint(x2 - 15, y2 - 15))
150
+ anim.setEasingCurve(QEasingCurve.OutQuad) # 可自行更换缓动曲线
151
+
152
+ # Store references to both widget and animation to prevent garbage collection
153
+ global _active_animations
154
+ # Store them as a tuple to keep both references
155
+ animation_pair = (widget, anim)
156
+ _active_animations.append(animation_pair)
157
+
158
+ # Clean up both widget and animation after completion of total life time
159
+ def cleanup():
160
+ if animation_pair in _active_animations:
161
+ _active_animations.remove(animation_pair)
162
+ print("Move animation cleaned up")
163
+
164
+ # Connect finished signal only to print a message
165
+ anim.finished.connect(lambda: print("Movement finished, now staying still"))
166
+
167
+ # Start the animation
168
+ anim.start()
169
+
170
+ # Process events immediately to kickstart the animation
171
+ QApplication.processEvents()
172
+
173
+ # Set up final cleanup after animation + existing time
174
+ QTimer.singleShot(total_life_ms, cleanup)
175
+
176
+ print("Move-to animation started (non-blocking)")
76
177
 
77
178
 
179
+ # ---------- 命令行测试 ----------
78
180
  if __name__ == "__main__":
79
- # from click_anim_async import show_click
80
- show_click(500, 500)
81
- show_move_to(300, 300, 600, 600)
181
+ # 确保应用程序实例存在
182
+ _ensure_app()
183
+
184
+ # 测试点击
185
+ print("Testing non-blocking click animation...")
186
+ x, y = 500, 500
187
+ show_click(x, y)
188
+
189
+ # 测试同时运行两个动画
190
+ print("\nTesting simultaneous animations...")
191
+ x1, y1 = 200, 200
192
+ x2, y2 = 600, 600
193
+ # show_click(x1, y1)
194
+ show_move_to(x1, y1, x2, y2, duration_ms=2000)
195
+
196
+ # # 测试先移动,然后点击
197
+ print("\nTesting sequence with pyautogui simulation...")
198
+ x3, y3 = 800, 300
199
+ x4, y4 = 400, 500
200
+
201
+ # 启动移动动画
202
+ show_move_to(x3, y3, x4, y4, duration_ms=1500)
203
+
204
+ # 模拟移动完成后的点击动画(延迟1.5秒)
205
+ QTimer.singleShot(1500, lambda: show_click(x4, y4))
206
+
207
+ # 保持主程序运行,等待所有动画完成
208
+ print("\nWaiting for all animations to complete...")
209
+ loop = QEventLoop()
210
+ # 等待足够长的时间,确保所有动画都完成(最长的动画是2000ms + 清理时间)
211
+ QTimer.singleShot(4000, loop.quit)
212
+ loop.exec()
213
+
214
+ print("All animations completed, exiting test.")
@@ -249,6 +249,9 @@ class TeachmodeExecutor:
249
249
  else:
250
250
  parsed_action_list.append(action)
251
251
 
252
+ # parsed_action_list.extend([{"action": "key_down_windll", "text": "alt", "coordinate": None}])
253
+ # parsed_action_list.extend(action_list)
254
+ # parsed_action_list.extend([{"action": "key_up_windll", "text": "alt", "coordinate": None}])
252
255
  return parsed_action_list
253
256
 
254
257
 
@@ -1,8 +1,8 @@
1
- from PIL import ImageGrab
2
-
3
- bbox=(2560, 366, 2560+1920, 366+1080)
4
-
5
- screenshot = ImageGrab.grab(bbox=bbox, all_screens=True)
6
-
7
- screenshot = screenshot.convert('RGB')
1
+ from PIL import ImageGrab
2
+
3
+ bbox=(2560, 366, 2560+1920, 366+1080)
4
+
5
+ screenshot = ImageGrab.grab(bbox=bbox, all_screens=True)
6
+
7
+ screenshot = screenshot.convert('RGB')
8
8
  screenshot.save("screenshot.png")
@@ -317,6 +317,7 @@ class ComputerTool(BaseAnthropicTool):
317
317
  pyautogui.mouseDown()
318
318
  time.sleep(1)
319
319
  pyautogui.mouseUp()
320
+ show_click(x, y)
320
321
  elif action == "scroll_down":
321
322
  pyautogui.scroll(-200) # Adjust scroll amount as needed
322
323
  return ToolResult(output="Scrolled down")
@@ -0,0 +1,41 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Teachmode Client</title>
5
+ <!-- include socket.io client -->
6
+ <script src="https://cdn.socket.io/4.5.4/socket.io.min.js"></script>
7
+ </head>
8
+ <body>
9
+ <h1>Teachmode WebSocket Test</h1>
10
+ <div id="messages"></div>
11
+
12
+ <script>
13
+ const socket = io("http://localhost:5000"); // Your server’s URL/port
14
+
15
+ // Listen for partial responses
16
+ socket.on("partial_response", (data) => {
17
+ console.log("Got partial:", data);
18
+ const div = document.createElement("div");
19
+ div.innerText = "Assistant: " + data.content;
20
+ document.getElementById("messages").appendChild(div);
21
+ });
22
+
23
+ // Listen for done event
24
+ socket.on("done", (data) => {
25
+ console.log("Task completed:", data);
26
+ const div = document.createElement("div");
27
+ div.innerText = "TASK DONE!";
28
+ document.getElementById("messages").appendChild(div);
29
+ });
30
+
31
+ // When connected, emit run_teachmode
32
+ socket.on("connect", () => {
33
+ console.log("Connected to server. Emitting run_teachmode event...");
34
+ socket.emit("run_teachmode", {
35
+ user_input: "Hello, I'd like to do X, Y, Z."
36
+ // you can include model, task, user_id, trace_id, etc. if desired
37
+ });
38
+ });
39
+ </script>
40
+ </body>
41
+ </html>
@@ -15,5 +15,4 @@ uiautomation
15
15
  pywinauto
16
16
  textdistance
17
17
  matplotlib
18
- litellm
19
- PySide6
18
+ litellm
@@ -41,7 +41,7 @@ def simple_teachmode_sampling_loop(
41
41
  if "star_rail" in user_id or "star_rail" in user_id:
42
42
  full_screen_game_mode = 1
43
43
 
44
- if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in user_id:
44
+ if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in trace_id:
45
45
  full_screen_game_mode = 2
46
46
 
47
47
  print(f"Full Screen Game Mode: {full_screen_game_mode}")
@@ -102,7 +102,7 @@ def simple_teachmode_sampling_loop(
102
102
 
103
103
  try:
104
104
  step_plan = infer_server_response["generated_plan"]
105
- step_info = infer_server_response["generated_plan"]["step_info"]
105
+ step_info = infer_server_response["generated_action"]["step_info"]
106
106
  step_action = infer_server_response["generated_action"]["content"]
107
107
  step_traj_idx = infer_server_response["current_traj_step"]
108
108
 
@@ -125,6 +125,8 @@ def simple_teachmode_sampling_loop(
125
125
 
126
126
 
127
127
 
128
+
129
+
128
130
  if __name__ == "__main__":
129
131
  parser = argparse.ArgumentParser(
130
132
  description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
@@ -136,6 +138,8 @@ if __name__ == "__main__":
136
138
  )
137
139
  parser.add_argument(
138
140
  "--task",
141
+ # default="Help me to complete the extraction of the viewer data of Downald Trump's first video on youtube,\
142
+ # fill in the excel sheet.",
139
143
  default="Click on the Google Chorme icon",
140
144
  help="The task to be completed by the assistant (e.g., 'Complete some data extraction.').",
141
145
  )
@@ -0,0 +1,194 @@
1
+ import argparse
2
+ import time
3
+ import json
4
+ import threading
5
+
6
+ from flask import Flask, request, jsonify
7
+ from flask_socketio import SocketIO, emit
8
+
9
+ from screeninfo import get_monitors
10
+ from computer_use_ootb_internal.computer_use_demo.tools.computer import get_screen_details
11
+ from computer_use_ootb_internal.run_teachmode_ootb_args import simple_teachmode_sampling_loop
12
+
13
+ ###############################################################################
14
+ # Shared State
15
+ ###############################################################################
16
+ class SharedState:
17
+ def __init__(self):
18
+ self.args = None # Will hold argparse-like namespace
19
+ self.messages = [] # If you want to store a global chat or last session
20
+
21
+ shared_state = SharedState()
22
+
23
+ ###############################################################################
24
+ # Flask + SocketIO Application Setup
25
+ ###############################################################################
26
+ app = Flask(__name__)
27
+ app.config["SECRET_KEY"] = "some-secret-key" # In production, change this
28
+ socketio = SocketIO(app, cors_allowed_origins="*")
29
+
30
+ ###############################################################################
31
+ # Utility Functions
32
+ ###############################################################################
33
+ def setup_default_args():
34
+ """
35
+ Creates argparse-like defaults.
36
+ You can also parse real CLI args if you wish.
37
+ """
38
+ parser = argparse.ArgumentParser(description="Teachmode SocketIO Server.")
39
+ parser.add_argument("--model", default="teach-mode-gpt-4o")
40
+ parser.add_argument("--task", default="Help me complete data extraction on YouTube video.")
41
+ parser.add_argument("--selected_screen", type=int, default=0)
42
+ parser.add_argument("--user_id", default="liziqi")
43
+ parser.add_argument("--trace_id", default="default_trace")
44
+ parser.add_argument("--api_key_file", default="api_key.json")
45
+ parser.add_argument("--api_keys", default="")
46
+ parser.add_argument(
47
+ "--server_url",
48
+ default="http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action",
49
+ help="Server URL for the session (local='http://localhost:5000/generate_action', \
50
+ aws='http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action').",
51
+ )
52
+
53
+ # If you really want to parse sys.argv, do parser.parse_args().
54
+ # But you can also return the defaults for this example:
55
+ return parser.parse_args([])
56
+
57
+ def apply_args_to_state(args):
58
+ """
59
+ Helper that prints or logs relevant arguments and stores them in shared_state.
60
+ """
61
+ print("[apply_args_to_state] Applying arguments:", args)
62
+ shared_state.args = args
63
+
64
+ def run_teachmode_task(user_input):
65
+ """
66
+ Calls simple_teachmode_sampling_loop and emits partial responses over SocketIO.
67
+ """
68
+ # 1) Log or store user input
69
+ print(f"[run_teachmode_task] Received user_input: {user_input}")
70
+ # Optionally store or reset message history for this session
71
+ shared_state.messages = [{"role": "user", "content": user_input}]
72
+
73
+ # 2) Grab arguments from shared_state
74
+ args = shared_state.args
75
+ if not args:
76
+ print("[run_teachmode_task] No arguments in shared_state, applying defaults.")
77
+ args = setup_default_args()
78
+ apply_args_to_state(args)
79
+
80
+ # 3) Run the sampling loop
81
+ print(f"[run_teachmode_task] Starting the sampling loop with task: {args.task}")
82
+ sampling_loop = simple_teachmode_sampling_loop(
83
+ model=args.model,
84
+ task=args.task,
85
+ selected_screen=args.selected_screen,
86
+ user_id=args.user_id,
87
+ trace_id=args.trace_id,
88
+ api_keys=args.api_keys,
89
+ server_url=args.server_url
90
+ )
91
+
92
+ # 4) Send partial responses
93
+ for loop_msg in sampling_loop:
94
+ print(f"[run_teachmode_task] Emitting partial response: {loop_msg}")
95
+ # You can store it in shared_state messages
96
+ shared_state.messages.append({"role": "assistant", "content": loop_msg})
97
+ # Emit immediately so the client sees partial responses
98
+ emit("partial_response", {"role": "assistant", "content": loop_msg})
99
+ time.sleep(1) # Optional delay to simulate real-time streaming
100
+
101
+ # 5) Done event
102
+ print("[run_teachmode_task] Completed all messages.")
103
+ emit("done", {"messages": shared_state.messages, "status": "completed"})
104
+
105
+ ###############################################################################
106
+ # HTTP Endpoint: update_params
107
+ ###############################################################################
108
+ @app.route("/update_params", methods=["POST"])
109
+ def update_parameters():
110
+ """
111
+ HTTP endpoint that allows updating the parameters (like Gradio's /update_params).
112
+ Expects JSON body with fields matching the argparse Namespace (model, task, etc.)
113
+ """
114
+ data = request.json
115
+ if not data:
116
+ return jsonify({"status": "error", "message": "No JSON provided."}), 400
117
+
118
+ # Build an argparse.Namespace from the JSON keys
119
+ # Fallback to the existing arguments if some keys are missing
120
+ old_args = shared_state.args or setup_default_args()
121
+ new_args_dict = {**vars(old_args), **data} # Merge old with new
122
+ new_args = argparse.Namespace(**new_args_dict)
123
+ apply_args_to_state(new_args)
124
+
125
+ return jsonify({
126
+ "status": "success",
127
+ "message": "Parameters updated",
128
+ "new_args": vars(new_args)
129
+ })
130
+
131
+ ###############################################################################
132
+ # HTTP Endpoint: get_messages
133
+ ###############################################################################
134
+ @app.route("/get_messages", methods=["GET"])
135
+ def get_messages():
136
+ """
137
+ Example new function: returns the current chat messages in shared_state.
138
+ """
139
+ return jsonify(shared_state.messages)
140
+
141
+ ###############################################################################
142
+ # HTTP Endpoint: clear_messages
143
+ ###############################################################################
144
+ @app.route("/clear_messages", methods=["POST"])
145
+ def clear_messages():
146
+ """
147
+ Example new function: clears the stored chat messages in shared_state.
148
+ """
149
+ shared_state.messages = []
150
+ return jsonify({"status": "success", "message": "Chat history cleared."})
151
+
152
+ ###############################################################################
153
+ # SocketIO Event: run_teachmode
154
+ ###############################################################################
155
+ @socketio.on("run_teachmode")
156
+ def handle_run_teachmode(data):
157
+ """
158
+ Websocket event that starts the teachmode sampling loop.
159
+ `data` can include e.g. {"user_input": "..."}.
160
+ """
161
+ user_input = data.get("user_input", "Hello, let's start!")
162
+ run_teachmode_task(user_input)
163
+
164
+ ###############################################################################
165
+ # SocketIO Event: connect
166
+ ###############################################################################
167
+ @socketio.on("connect")
168
+ def on_connect():
169
+ print("[SocketIO] Client connected.")
170
+
171
+ @socketio.on("disconnect")
172
+ def on_disconnect():
173
+ print("[SocketIO] Client disconnected.")
174
+
175
+ ###############################################################################
176
+ # Main
177
+ ###############################################################################
178
+ def main():
179
+ # Pre-populate shared_state with default arguments
180
+ args = setup_default_args()
181
+ apply_args_to_state(args)
182
+
183
+ # Optional: Preload screen info if needed
184
+ screens = get_monitors()
185
+ print("Detected screens:", screens)
186
+ screen_names, primary_index = get_screen_details()
187
+ print("Screen names:", screen_names, "Default selected index:", primary_index)
188
+
189
+ # Run the Flask-SocketIO app
190
+ # eventlet is the default async_mode if installed, but we specify it explicitly.
191
+ socketio.run(app, host="0.0.0.0", port=5001, debug=True)
192
+
193
+ if __name__ == "__main__":
194
+ main()
@@ -0,0 +1,41 @@
1
+ import socketio
2
+
3
+ # Create a Socket.IO client instance
4
+ sio = socketio.Client()
5
+
6
+ @sio.on('connect')
7
+ def on_connect():
8
+ print("Connected to the server.")
9
+ # Once connected, send the event to start the teachmode process:
10
+ data = {
11
+ "user_input": "Hello, I'd like to open the Chrome browser."
12
+ # You can add more parameters here if needed, e.g.:
13
+ # "model": "teach-mode-gpt-4o",
14
+ # "task": "Some task",
15
+ # "user_id": "my_user",
16
+ # etc.
17
+ }
18
+ print("Emitting 'run_teachmode' event with data:", data)
19
+ sio.emit("run_teachmode", data)
20
+
21
+ @sio.on('partial_response')
22
+ def on_partial_response(data):
23
+ print("[partial_response] =>", data)
24
+
25
+ @sio.on('done')
26
+ def on_done(data):
27
+ print("[done] =>", data)
28
+ # Since the process is completed, you can disconnect:
29
+ sio.disconnect()
30
+
31
+ @sio.on('disconnect')
32
+ def on_disconnect():
33
+ print("Disconnected from server.")
34
+
35
+
36
+ if __name__ == "__main__":
37
+ # Connect to the Socket.IO server (adapt host/port as needed):
38
+ sio.connect("http://localhost:5001")
39
+
40
+ # Keep the client alive to receive events
41
+ sio.wait()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: computer-use-ootb-internal
3
- Version: 0.0.103
3
+ Version: 0.0.105
4
4
  Summary: Computer Use OOTB
5
5
  Author-email: Siyuan Hu <siyuan.hu.sg@gmail.com>
6
6
  Requires-Python: >=3.11
@@ -14,15 +14,14 @@ Requires-Dist: matplotlib
14
14
  Requires-Dist: opencv-python
15
15
  Requires-Dist: pre-commit==3.8.0
16
16
  Requires-Dist: pyautogui==0.9.54
17
- Requires-Dist: pyside6
18
17
  Requires-Dist: pytest-asyncio==0.23.6
19
18
  Requires-Dist: pytest==8.3.3
20
- Requires-Dist: pywinauto
19
+ Requires-Dist: pywinauto; sys_platform == 'win32'
21
20
  Requires-Dist: ruff==0.6.7
22
21
  Requires-Dist: screeninfo
23
22
  Requires-Dist: streamlit>=1.38.0
24
23
  Requires-Dist: textdistance
25
- Requires-Dist: uiautomation
24
+ Requires-Dist: uiautomation; sys_platform == 'win32'
26
25
  Provides-Extra: dev
27
26
  Requires-Dist: pytest-asyncio>=0.23.6; extra == 'dev'
28
27
  Requires-Dist: pytest>=8.3.3; extra == 'dev'
@@ -1,17 +1,19 @@
1
1
  computer_use_ootb_internal/README.md,sha256=FxpW95lyub2iX73ZDfK6ML7SdEKg060H5I6Grub7li4,31
2
- computer_use_ootb_internal/app_teachmode.py,sha256=zmUPvFjqdhysnN1bD2QQhaAKONnAtMFd03Rb9gF1l6c,15646
2
+ computer_use_ootb_internal/app_teachmode.py,sha256=oW9s33AdSe6FxSBZxS7z4S1wLdCYGdPNw5qqpK70GKg,19059
3
3
  computer_use_ootb_internal/app_teachmode_gradio.py,sha256=zAw-n3s20j1Jr0S4TzXHwllKV6APJ8HEHB1KqBuzriY,7907
4
4
  computer_use_ootb_internal/dependency_check.py,sha256=y8RMEP6RXQzTgU1MS_1piBLtz4J-Hfn9RjUZg59dyvo,1333
5
- computer_use_ootb_internal/requirements-lite.txt,sha256=5DAHomz4A_P2BmTIXNkNqkHbnIF0AyZ4_1XAlb1LaYs,290
6
- computer_use_ootb_internal/run_teachmode_ootb_args.py,sha256=UEiwLSdERzBqbCH2Em20b4UNzAb__S8cJSnkceVcBsY,6686
7
- computer_use_ootb_internal/computer_use_demo/animation/click_animation.py,sha256=QR_DEDk7bVON5EQ_xsJGrxNa3NoxqubYyXPFRB12pmQ,3183
5
+ computer_use_ootb_internal/example_websocket_js.html,sha256=BLYwDExVlgiAX4vXVXW3RuP5KD8FXE4EFXIl54bwF7w,1322
6
+ computer_use_ootb_internal/requirements-lite.txt,sha256=2C4OH_GRzuDsR-c9VpJee_te-VqLOf-KDk3LXKi3qdk,282
7
+ computer_use_ootb_internal/run_teachmode_ootb_args.py,sha256=djr4E7_G_qG9H6qmRz1mrM9Yrcxf1tnlSPH5ZqykF5Y,6845
8
+ computer_use_ootb_internal/service_teachmode.py,sha256=e81zp3B7CA2nMjA-3qoKF4P1hlNGRI7P1tBo1j21FBk,8082
9
+ computer_use_ootb_internal/service_teachmode_test.py,sha256=zpfBFFKD9WGLX4m77ajOBfmczpYsCa3_qTBweeSNRV8,1112
10
+ computer_use_ootb_internal/computer_use_demo/animation/click_animation.py,sha256=j3v-CrFp61mZiJ4-eT06txkOoguZipbakrQFj2d_mbk,7759
8
11
  computer_use_ootb_internal/computer_use_demo/animation/icons8-select-cursor-transparent-96.gif,sha256=4LfwsfFQnREXrNRs32aJU2jO65JXianJoL_8q7-8elg,30966
9
- computer_use_ootb_internal/computer_use_demo/animation/test_animation.py,sha256=SOJz2yffXTkjuAHqk0IZLcMriR0KQYTo7W1b8wGyRGY,1222
10
- computer_use_ootb_internal/computer_use_demo/executor/teachmode_executor.py,sha256=Rr_I0Uk8hsqTmq3Fr3KOtjkXTqSb4pkNxmutENbv8dc,16368
12
+ computer_use_ootb_internal/computer_use_demo/executor/teachmode_executor.py,sha256=y7lg_PjMif2WwCKWWC7g8Ys2zPRMh08Vtt42fStujY4,16623
11
13
  computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/__init__.py,sha256=h2CNeuACklxVpJC65QR8_6AvSybEZLmeO45hY_-lLBs,61
12
14
  computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/gui_capture.py,sha256=CxFJbsSb68ERKH7-C4RaaZy7FIhhzrzGx5qQJ4C37cA,13907
13
15
  computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/gui_parser.py,sha256=KSTJ0cMwh3ahUMzHRaDgA2sVNUL4MNlF7qEBGN3G0SI,28993
14
- computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/test_capture.py,sha256=26uxu70di9Me-jseym5ejF7RNIbP35uPEiipN5qamIc,211
16
+ computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/test_capture.py,sha256=YbLpuD-mSFiOU7j6HKrtX05oRP9ciEBxxGIsyse-nPI,204
15
17
  computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/uia_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
18
  computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/utils.py,sha256=GEA1ES7vOpHBg_Suxpl99reh34kRG4RQpp072JQBK5c,9787
17
19
  computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/icon_detection/icon_detection.py,sha256=ysTgvtjE1XM7QSrLLy1HD0i6_7iOb9GME5FWJljrJg0,9752
@@ -31,12 +33,12 @@ computer_use_ootb_internal/computer_use_demo/tools/base.py,sha256=QDqpuuKlhUKJT2
31
33
  computer_use_ootb_internal/computer_use_demo/tools/bash.py,sha256=rHetQ80_v-TTi-1oxIA7ncFEwJxFTh8FJCErIoZbGeY,4236
32
34
  computer_use_ootb_internal/computer_use_demo/tools/collection.py,sha256=8RzHLobL44_Jjt8ltXS6I8XJlEAQOfc75dmnDUaHE-8,922
33
35
  computer_use_ootb_internal/computer_use_demo/tools/colorful_text.py,sha256=cvlmnhAImDTwoRRwhT5au7mNFhfAD7ZfeoDEVdVzDKw,892
34
- computer_use_ootb_internal/computer_use_demo/tools/computer.py,sha256=kpcp3orAdSwzBJMvL8zt_OgF6kT9UdZaJuqkjja7bic,25493
36
+ computer_use_ootb_internal/computer_use_demo/tools/computer.py,sha256=bKOkCtE4iYmPpRxldslAAa5yQOs6NkhITJYCfZllWXI,25526
35
37
  computer_use_ootb_internal/computer_use_demo/tools/computer_marbot.py,sha256=zZuWz9ArfP3Zss-afnscrPkgCtB5UWbCy7HwAOvO2bo,5970
36
38
  computer_use_ootb_internal/computer_use_demo/tools/edit.py,sha256=b0PwUitxckHCQqFP3ZwlthWdqNkn7WETeTHeB6-o98c,11486
37
39
  computer_use_ootb_internal/computer_use_demo/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
38
40
  computer_use_ootb_internal/computer_use_demo/tools/screen_capture.py,sha256=L8qfvtUkPPQGt92N-2Zfw5ZTDBzLsDps39uMnX3_uSA,6857
39
- computer_use_ootb_internal-0.0.103.dist-info/METADATA,sha256=un-dDRkh_CxGGAaFc-BSNKzMVpRvl0YabB3qZEJRbq4,910
40
- computer_use_ootb_internal-0.0.103.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
- computer_use_ootb_internal-0.0.103.dist-info/entry_points.txt,sha256=-AbmawU7IRQuDZihgVMVDrFoY4E6rnXYOUB-5vSeBKs,93
42
- computer_use_ootb_internal-0.0.103.dist-info/RECORD,,
41
+ computer_use_ootb_internal-0.0.105.dist-info/METADATA,sha256=s1CzMYcZD_3G85jKy09Y9jeG7lharUA0EO1tKTEWANY,937
42
+ computer_use_ootb_internal-0.0.105.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
43
+ computer_use_ootb_internal-0.0.105.dist-info/entry_points.txt,sha256=-AbmawU7IRQuDZihgVMVDrFoY4E6rnXYOUB-5vSeBKs,93
44
+ computer_use_ootb_internal-0.0.105.dist-info/RECORD,,
@@ -1,40 +0,0 @@
1
- """
2
- Test script to verify cursor animation is working
3
- """
4
- import asyncio
5
- import sys
6
- import time
7
- from pathlib import Path
8
- from computer_use_ootb_internal.computer_use_demo.tools.computer import ComputerTool
9
-
10
- async def test_animations():
11
-
12
- # Initialize the computer tool
13
- computer = ComputerTool()
14
-
15
- # Test mouse move animation
16
- print("Testing mouse move animation...")
17
- await computer(action="mouse_move_windll", coordinate=(500, 500))
18
- print("Waiting 2 seconds...")
19
- await asyncio.sleep(2)
20
-
21
- # Test click animation
22
- print("Testing click animation...")
23
- await computer(action="left_click_windll", coordinate=(700, 300))
24
- print("Waiting 2 seconds...")
25
- await asyncio.sleep(2)
26
-
27
- # Test another move
28
- print("Testing move and click sequence...")
29
- await computer(action="mouse_move_windll", coordinate=(300, 300))
30
- await asyncio.sleep(1)
31
- await computer(action="left_click_windll", coordinate=(300, 300))
32
-
33
- # Wait for animations to complete
34
- print("Waiting for animations to complete...")
35
- await asyncio.sleep(3)
36
-
37
- print("Test completed")
38
-
39
- if __name__ == "__main__":
40
- asyncio.run(test_animations())