computer-use-ootb-internal 0.0.94.post4__tar.gz → 0.0.95__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/.gitignore +1 -0
  2. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/PKG-INFO +1 -1
  3. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/pyproject.toml +1 -1
  4. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/app_teachmode.py +386 -323
  5. computer_use_ootb_internal-0.0.95/src/computer_use_ootb_internal/computer_use_demo/animation/click_animation.py +154 -0
  6. computer_use_ootb_internal-0.0.95/src/computer_use_ootb_internal/computer_use_demo/animation/icons8-select-cursor-transparent-96.gif +0 -0
  7. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/icon_detection/icon_detection.py +0 -13
  8. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/computer.py +26 -8
  9. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/computer_marbot.py +2 -2
  10. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/README.md +0 -0
  11. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/app_teachmode_gradio.py +0 -0
  12. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/executor/teachmode_executor.py +0 -0
  13. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/__init__.py +0 -0
  14. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/gui_capture.py +0 -0
  15. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/gui_parser.py +0 -0
  16. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/panel_recognition/llm_panel_recognize.py +0 -0
  17. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/test_capture.py +0 -0
  18. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/uia_parser.py +0 -0
  19. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/utils.py +0 -0
  20. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/uia_tools/__init__.py +0 -0
  21. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/uia_tools/screenshot_cli.py +0 -0
  22. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/uia_tools/screenshot_service.py +0 -0
  23. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/llm_utils/llm_utils.py +0 -0
  24. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/llm_utils/oai.py +0 -0
  25. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/llm_utils/run_litellm.py +0 -0
  26. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/llm_utils/run_llm.py +0 -0
  27. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/vlm_utils/__init__.py +0 -0
  28. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/vlm_utils/run_vlm.py +0 -0
  29. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/__init__.py +0 -0
  30. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/aws_request.py +0 -0
  31. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/base.py +0 -0
  32. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/bash.py +0 -0
  33. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/collection.py +0 -0
  34. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/colorful_text.py +0 -0
  35. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/edit.py +0 -0
  36. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/run.py +0 -0
  37. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/screen_capture.py +0 -0
  38. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/dependency_check.py +0 -0
  39. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/example_websocket_js.html +0 -0
  40. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/requirements-lite.txt +0 -0
  41. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/run_teachmode_ootb_args.py +0 -0
  42. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/service_teachmode.py +0 -0
  43. {computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/service_teachmode_test.py +0 -0
@@ -4,3 +4,4 @@
4
4
  *.log
5
5
  screenshot*.png
6
6
  *dist*
7
+ *.toml
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: computer-use-ootb-internal
3
- Version: 0.0.94.post4
3
+ Version: 0.0.95
4
4
  Summary: Computer Use OOTB
5
5
  Author-email: Siyuan Hu <siyuan.hu.sg@gmail.com>
6
6
  Requires-Python: >=3.11
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "computer-use-ootb-internal"
7
- version = "0.0.94.post4"
7
+ version = "0.0.95"
8
8
  description = "Computer Use OOTB"
9
9
  authors = [{ name = "Siyuan Hu", email = "siyuan.hu.sg@gmail.com" }]
10
10
  requires-python = ">=3.11"
@@ -1,324 +1,387 @@
1
- import argparse
2
- import time
3
- import json
4
- import threading
5
- from fastapi import FastAPI, Request
6
- from fastapi.responses import JSONResponse
7
- from fastapi.middleware.cors import CORSMiddleware
8
- from screeninfo import get_monitors
9
- from computer_use_ootb_internal.computer_use_demo.tools.computer import get_screen_details
10
- from computer_use_ootb_internal.run_teachmode_ootb_args import simple_teachmode_sampling_loop
11
-
12
- app = FastAPI()
13
-
14
- # Add CORS middleware to allow requests from the frontend
15
- app.add_middleware(
16
- CORSMiddleware,
17
- allow_origins=["*"],
18
- allow_credentials=True,
19
- allow_methods=["*"],
20
- allow_headers=["*"],
21
- )
22
-
23
- class SharedState:
24
- def __init__(self, args):
25
- self.args = args
26
- self.task_updated = False
27
- self.chatbot_messages = []
28
- # Store all state-related data here
29
- self.model = args.model
30
- self.task = getattr(args, 'task', "")
31
- self.selected_screen = args.selected_screen
32
- self.user_id = args.user_id
33
- self.trace_id = args.trace_id
34
- self.api_keys = args.api_keys
35
- self.server_url = args.server_url
36
- self.message_queue = []
37
- self.is_processing = False
38
- self.should_stop = False
39
- self.is_paused = False
40
- # Add a new event to better control stopping
41
- self.stop_event = threading.Event()
42
- # Add a reference to the processing thread
43
- self.processing_thread = None
44
-
45
- shared_state = None
46
-
47
- @app.post("/update_params")
48
- async def update_parameters(request: Request):
49
- data = await request.json()
50
-
51
- if 'task' not in data:
52
- return JSONResponse(
53
- content={"status": "error", "message": "Missing required field: task"},
54
- status_code=400
55
- )
56
-
57
- shared_state.args = argparse.Namespace(**data)
58
- shared_state.task_updated = True
59
-
60
- # Update shared state when parameters change
61
- shared_state.model = getattr(shared_state.args, 'model', "teach-mode-gpt-4o")
62
- shared_state.task = getattr(shared_state.args, 'task', "Create a claim on the SAP system, using Receipt.pdf as attachment.")
63
- shared_state.selected_screen = getattr(shared_state.args, 'selected_screen', 0)
64
- shared_state.user_id = getattr(shared_state.args, 'user_id', "a_test")
65
- shared_state.trace_id = getattr(shared_state.args, 'trace_id', "jess_4")
66
- shared_state.api_keys = getattr(shared_state.args, 'api_keys', "sk-proj-1234567890")
67
- shared_state.server_url = getattr(shared_state.args, 'server_url', "http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action")
68
-
69
- return JSONResponse(
70
- content={"status": "success", "message": "Parameters updated", "new_args": vars(shared_state.args)},
71
- status_code=200
72
- )
73
-
74
- @app.post("/update_message")
75
- async def update_message(request: Request):
76
- data = await request.json()
77
-
78
- if 'message' not in data:
79
- return JSONResponse(
80
- content={"status": "error", "message": "Missing required field: message"},
81
- status_code=400
82
- )
83
-
84
- message = data['message']
85
- shared_state.chatbot_messages.append({"role": "user", "content": message})
86
- shared_state.task = message
87
- shared_state.args.task = message
88
-
89
- # Reset stop event before starting
90
- shared_state.stop_event.clear()
91
-
92
- # Start processing if not already running
93
- if not shared_state.is_processing:
94
- # Create and store the thread
95
- shared_state.processing_thread = threading.Thread(target=process_input, daemon=True)
96
- shared_state.processing_thread.start()
97
-
98
- return JSONResponse(
99
- content={"status": "success", "message": "Message received", "task": shared_state.task},
100
- status_code=200
101
- )
102
-
103
- @app.get("/get_messages")
104
- async def get_messages():
105
- # Return all messages in the queue and clear it
106
- messages = shared_state.message_queue.copy()
107
- shared_state.message_queue = []
108
-
109
- return JSONResponse(
110
- content={"status": "success", "messages": messages},
111
- status_code=200
112
- )
113
-
114
- @app.get("/get_screens")
115
- async def get_screens():
116
- screen_options, primary_index = get_screen_details()
117
-
118
- return JSONResponse(
119
- content={"status": "success", "screens": screen_options, "primary_index": primary_index},
120
- status_code=200
121
- )
122
-
123
- @app.post("/stop_processing")
124
- async def stop_processing():
125
- if shared_state.is_processing:
126
- # Set both flags to ensure stopping the current task
127
- shared_state.should_stop = True
128
- shared_state.stop_event.set()
129
-
130
- # Send an immediate message to the queue to inform the user
131
- stop_initiated_msg = {"role": "assistant", "content": f"Stopping task '{shared_state.task}'..."}
132
- shared_state.message_queue.append(stop_initiated_msg)
133
-
134
- return JSONResponse(
135
- content={"status": "success", "message": "Task is being stopped, server will remain available for new tasks"},
136
- status_code=200
137
- )
138
- else:
139
- return JSONResponse(
140
- content={"status": "error", "message": "No active processing to stop"},
141
- status_code=400
142
- )
143
-
144
- @app.post("/toggle_pause")
145
- async def toggle_pause():
146
- if not shared_state.is_processing:
147
- return JSONResponse(
148
- content={"status": "error", "message": "No active processing to pause/resume"},
149
- status_code=400
150
- )
151
-
152
- # Toggle the pause state
153
- shared_state.is_paused = not shared_state.is_paused
154
- current_state = shared_state.is_paused
155
-
156
- print(f"Toggled pause state to: {current_state}")
157
-
158
- status_message = "paused" if current_state else "resumed"
159
-
160
- # Add a message to the queue to inform the user
161
- if current_state:
162
- message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been paused. Click Continue to resume."}
163
- else:
164
- message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been resumed."}
165
-
166
- shared_state.chatbot_messages.append(message)
167
- shared_state.message_queue.append(message)
168
-
169
- return JSONResponse(
170
- content={
171
- "status": "success",
172
- "message": f"Processing {status_message}",
173
- "is_paused": current_state
174
- },
175
- status_code=200
176
- )
177
-
178
- @app.get("/status")
179
- async def get_status():
180
- print(f"Status check - Processing: {shared_state.is_processing}, Paused: {shared_state.is_paused}")
181
- return JSONResponse(
182
- content={
183
- "status": "success",
184
- "is_processing": shared_state.is_processing,
185
- "is_paused": shared_state.is_paused
186
- },
187
- status_code=200
188
- )
189
-
190
- def process_input():
191
- shared_state.is_processing = True
192
- shared_state.should_stop = False
193
- shared_state.is_paused = False
194
- shared_state.stop_event.clear() # Ensure stop event is cleared at the start
195
-
196
- print(f"start sampling loop: {shared_state.chatbot_messages}")
197
- print(f"shared_state.args before sampling loop: {shared_state.args}")
198
-
199
-
200
- try:
201
- # Get the generator for the sampling loop
202
- sampling_loop = simple_teachmode_sampling_loop(
203
- model=shared_state.model,
204
- task=shared_state.task,
205
- selected_screen=shared_state.selected_screen,
206
- user_id=shared_state.user_id,
207
- trace_id=shared_state.trace_id,
208
- api_keys=shared_state.api_keys,
209
- server_url=shared_state.server_url,
210
- )
211
-
212
- # Process messages from the sampling loop
213
- for loop_msg in sampling_loop:
214
- # Check stop condition more frequently
215
- if shared_state.should_stop or shared_state.stop_event.is_set():
216
- print("Processing stopped by user")
217
- break
218
-
219
- # Check if paused and wait while paused
220
- while shared_state.is_paused and not shared_state.should_stop and not shared_state.stop_event.is_set():
221
- print(f"Processing paused at: {time.strftime('%H:%M:%S')}")
222
- # Wait a short time and check stop condition regularly
223
- for _ in range(5): # Check 5 times per second
224
- if shared_state.should_stop or shared_state.stop_event.is_set():
225
- break
226
- time.sleep(0.2)
227
-
228
- # Check again after pause loop
229
- if shared_state.should_stop or shared_state.stop_event.is_set():
230
- print("Processing stopped while paused or resuming")
231
- break
232
-
233
- # Process the message
234
- if loop_msg.startswith('<img'):
235
- message = {"role": "user", "content": loop_msg}
236
- else:
237
- message = {"role": "assistant", "content": loop_msg}
238
-
239
- shared_state.chatbot_messages.append(message)
240
- shared_state.message_queue.append(message)
241
-
242
- # Short sleep to allow stop signals to be processed
243
- for _ in range(5): # Check 5 times per second
244
- if shared_state.should_stop or shared_state.stop_event.is_set():
245
- print("Processing stopped during sleep")
246
- break
247
- time.sleep(0.1)
248
-
249
- if shared_state.should_stop or shared_state.stop_event.is_set():
250
- break
251
-
252
- except Exception as e:
253
- # Handle any exceptions in the processing loop
254
- error_msg = f"Error during task processing: {str(e)}"
255
- print(error_msg)
256
- error_message = {"role": "assistant", "content": error_msg}
257
- shared_state.message_queue.append(error_message)
258
-
259
- finally:
260
- # Handle completion or interruption
261
- if shared_state.should_stop or shared_state.stop_event.is_set():
262
- stop_msg = f"Task '{shared_state.task}' was stopped. Ready for new tasks."
263
- final_message = {"role": "assistant", "content": stop_msg}
264
- else:
265
- complete_msg = f"Task '{shared_state.task}' completed. Thanks for using Teachmode-OOTB."
266
- final_message = {"role": "assistant", "content": complete_msg}
267
-
268
- shared_state.chatbot_messages.append(final_message)
269
- shared_state.message_queue.append(final_message)
270
-
271
- # Reset all state flags to allow for new tasks
272
- shared_state.is_processing = False
273
- shared_state.should_stop = False
274
- shared_state.is_paused = False
275
- shared_state.stop_event.clear()
276
- print("Processing completed, ready for new tasks")
277
-
278
- def main():
279
- global app, shared_state
280
-
281
- parser = argparse.ArgumentParser(
282
- description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
283
- )
284
- parser.add_argument("--model", default="teach-mode-gpt-4o")
285
- parser.add_argument("--task", default="Create a claim on the SAP system, using Receipt.pdf as attachment.")
286
- parser.add_argument("--selected_screen", type=int, default=0)
287
- parser.add_argument("--user_id", default="star_rail_dev")
288
- parser.add_argument("--trace_id", default="scroll")
289
- parser.add_argument("--api_key_file", default="api_key.json")
290
- parser.add_argument("--api_keys", default="")
291
- parser.add_argument(
292
- "--server_url",
293
- default="http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action",
294
- help="Server URL for the session"
295
- )
296
-
297
- args = parser.parse_args()
298
- shared_state = SharedState(args)
299
-
300
- import uvicorn
301
- import platform
302
- import os
303
-
304
- # Default port
305
- port = 7888
306
-
307
- # Determine port based on Windows username
308
- if platform.system() == "Windows":
309
- username = os.environ["USERNAME"].lower()
310
- if username == "altair":
311
- port = 14000
312
- elif username.startswith("guest") and username[5:].isdigit():
313
- num = int(username[5:])
314
- if 1 <= num <= 10:
315
- port = 14000 + num
316
- else:
317
- port = 7888
318
- else:
319
- port = 7888
320
-
321
- uvicorn.run(app, host="0.0.0.0", port=port)
322
-
323
- if __name__ == "__main__":
1
+ import argparse
2
+ import time
3
+ import json
4
+ import threading
5
+ from fastapi import FastAPI, Request
6
+ from fastapi.responses import JSONResponse
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from screeninfo import get_monitors
9
+ from computer_use_ootb_internal.computer_use_demo.tools.computer import get_screen_details
10
+ from computer_use_ootb_internal.run_teachmode_ootb_args import simple_teachmode_sampling_loop
11
+
12
+ app = FastAPI()
13
+
14
+ # Add CORS middleware to allow requests from the frontend
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ # Rate limiter for API endpoints
24
+ class RateLimiter:
25
+ def __init__(self, interval_seconds=2):
26
+ self.interval = interval_seconds
27
+ self.last_request_time = {}
28
+ self.lock = threading.Lock()
29
+
30
+ def allow_request(self, endpoint):
31
+ with self.lock:
32
+ current_time = time.time()
33
+ # Priority endpoints always allowed
34
+ if endpoint in ["/update_params", "/update_message"]:
35
+ return True
36
+
37
+ # For other endpoints, apply rate limiting
38
+ if endpoint not in self.last_request_time:
39
+ self.last_request_time[endpoint] = current_time
40
+ return True
41
+
42
+ elapsed = current_time - self.last_request_time[endpoint]
43
+ if elapsed < self.interval:
44
+ return False
45
+
46
+ self.last_request_time[endpoint] = current_time
47
+ return True
48
+
49
+ class SharedState:
50
+ def __init__(self, args):
51
+ self.args = args
52
+ self.task_updated = False
53
+ self.chatbot_messages = []
54
+ # Store all state-related data here
55
+ self.model = args.model
56
+ self.task = getattr(args, 'task', "")
57
+ self.selected_screen = args.selected_screen
58
+ self.user_id = args.user_id
59
+ self.trace_id = args.trace_id
60
+ self.api_keys = args.api_keys
61
+ self.server_url = args.server_url
62
+ self.message_queue = []
63
+ self.is_processing = False
64
+ self.should_stop = False
65
+ self.is_paused = False
66
+ # Add a new event to better control stopping
67
+ self.stop_event = threading.Event()
68
+ # Add a reference to the processing thread
69
+ self.processing_thread = None
70
+
71
+ shared_state = None
72
+ rate_limiter = RateLimiter(interval_seconds=2)
73
+
74
+ @app.post("/update_params")
75
+ async def update_parameters(request: Request):
76
+ data = await request.json()
77
+
78
+ if 'task' not in data:
79
+ return JSONResponse(
80
+ content={"status": "error", "message": "Missing required field: task"},
81
+ status_code=400
82
+ )
83
+
84
+ shared_state.args = argparse.Namespace(**data)
85
+ shared_state.task_updated = True
86
+
87
+ # Update shared state when parameters change
88
+ shared_state.model = getattr(shared_state.args, 'model', "teach-mode-gpt-4o")
89
+ shared_state.task = getattr(shared_state.args, 'task', "Create a claim on the SAP system, using Receipt.pdf as attachment.")
90
+ shared_state.selected_screen = getattr(shared_state.args, 'selected_screen', 0)
91
+ shared_state.user_id = getattr(shared_state.args, 'user_id', "a_test")
92
+ shared_state.trace_id = getattr(shared_state.args, 'trace_id', "jess_4")
93
+ shared_state.api_keys = getattr(shared_state.args, 'api_keys', "sk-proj-1234567890")
94
+ shared_state.server_url = getattr(shared_state.args, 'server_url', "http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action")
95
+
96
+ return JSONResponse(
97
+ content={"status": "success", "message": "Parameters updated", "new_args": vars(shared_state.args)},
98
+ status_code=200
99
+ )
100
+
101
+ @app.post("/update_message")
102
+ async def update_message(request: Request):
103
+ data = await request.json()
104
+
105
+ if 'message' not in data:
106
+ return JSONResponse(
107
+ content={"status": "error", "message": "Missing required field: message"},
108
+ status_code=400
109
+ )
110
+
111
+ message = data['message']
112
+ shared_state.chatbot_messages.append({"role": "user", "content": message})
113
+ shared_state.task = message
114
+ shared_state.args.task = message
115
+
116
+ # Reset stop event before starting
117
+ shared_state.stop_event.clear()
118
+
119
+ # Start processing if not already running
120
+ if not shared_state.is_processing:
121
+ # Create and store the thread
122
+ shared_state.processing_thread = threading.Thread(target=process_input, daemon=True)
123
+ shared_state.processing_thread.start()
124
+
125
+ return JSONResponse(
126
+ content={"status": "success", "message": "Message received", "task": shared_state.task},
127
+ status_code=200
128
+ )
129
+
130
+ @app.get("/get_messages")
131
+ async def get_messages(request: Request):
132
+ # Apply rate limiting
133
+ if not rate_limiter.allow_request(request.url.path):
134
+ return JSONResponse(
135
+ content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
136
+ status_code=429
137
+ )
138
+
139
+ # Return all messages in the queue and clear it
140
+ messages = shared_state.message_queue.copy()
141
+ shared_state.message_queue = []
142
+
143
+ return JSONResponse(
144
+ content={"status": "success", "messages": messages},
145
+ status_code=200
146
+ )
147
+
148
+ @app.get("/get_screens")
149
+ async def get_screens(request: Request):
150
+ # Apply rate limiting
151
+ if not rate_limiter.allow_request(request.url.path):
152
+ return JSONResponse(
153
+ content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
154
+ status_code=429
155
+ )
156
+
157
+ screen_options, primary_index = get_screen_details()
158
+
159
+ return JSONResponse(
160
+ content={"status": "success", "screens": screen_options, "primary_index": primary_index},
161
+ status_code=200
162
+ )
163
+
164
+ @app.post("/stop_processing")
165
+ async def stop_processing(request: Request):
166
+ # Apply rate limiting
167
+ if not rate_limiter.allow_request(request.url.path):
168
+ return JSONResponse(
169
+ content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
170
+ status_code=429
171
+ )
172
+
173
+ if shared_state.is_processing:
174
+ # Set both flags to ensure stopping the current task
175
+ shared_state.should_stop = True
176
+ shared_state.stop_event.set()
177
+
178
+ # Send an immediate message to the queue to inform the user
179
+ stop_initiated_msg = {"role": "assistant", "content": f"Stopping task '{shared_state.task}'..."}
180
+ shared_state.message_queue.append(stop_initiated_msg)
181
+
182
+ return JSONResponse(
183
+ content={"status": "success", "message": "Task is being stopped, server will remain available for new tasks"},
184
+ status_code=200
185
+ )
186
+ else:
187
+ return JSONResponse(
188
+ content={"status": "error", "message": "No active processing to stop"},
189
+ status_code=400
190
+ )
191
+
192
+ @app.post("/toggle_pause")
193
+ async def toggle_pause(request: Request):
194
+ # Apply rate limiting
195
+ if not rate_limiter.allow_request(request.url.path):
196
+ return JSONResponse(
197
+ content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
198
+ status_code=429
199
+ )
200
+
201
+ if not shared_state.is_processing:
202
+ return JSONResponse(
203
+ content={"status": "error", "message": "No active processing to pause/resume"},
204
+ status_code=400
205
+ )
206
+
207
+ # Toggle the pause state
208
+ shared_state.is_paused = not shared_state.is_paused
209
+ current_state = shared_state.is_paused
210
+
211
+ print(f"Toggled pause state to: {current_state}")
212
+
213
+ status_message = "paused" if current_state else "resumed"
214
+
215
+ # Add a message to the queue to inform the user
216
+ if current_state:
217
+ message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been paused. Click Continue to resume."}
218
+ else:
219
+ message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been resumed."}
220
+
221
+ shared_state.chatbot_messages.append(message)
222
+ shared_state.message_queue.append(message)
223
+
224
+ return JSONResponse(
225
+ content={
226
+ "status": "success",
227
+ "message": f"Processing {status_message}",
228
+ "is_paused": current_state
229
+ },
230
+ status_code=200
231
+ )
232
+
233
+ @app.get("/status")
234
+ async def get_status(request: Request):
235
+ # Apply rate limiting
236
+ if not rate_limiter.allow_request(request.url.path):
237
+ return JSONResponse(
238
+ content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
239
+ status_code=429
240
+ )
241
+
242
+ print(f"Status check - Processing: {shared_state.is_processing}, Paused: {shared_state.is_paused}")
243
+ return JSONResponse(
244
+ content={
245
+ "status": "success",
246
+ "is_processing": shared_state.is_processing,
247
+ "is_paused": shared_state.is_paused
248
+ },
249
+ status_code=200
250
+ )
251
+
252
+ def process_input():
253
+ shared_state.is_processing = True
254
+ shared_state.should_stop = False
255
+ shared_state.is_paused = False
256
+ shared_state.stop_event.clear() # Ensure stop event is cleared at the start
257
+
258
+ print(f"start sampling loop: {shared_state.chatbot_messages}")
259
+ print(f"shared_state.args before sampling loop: {shared_state.args}")
260
+
261
+
262
+ try:
263
+ # Get the generator for the sampling loop
264
+ sampling_loop = simple_teachmode_sampling_loop(
265
+ model=shared_state.model,
266
+ task=shared_state.task,
267
+ selected_screen=shared_state.selected_screen,
268
+ user_id=shared_state.user_id,
269
+ trace_id=shared_state.trace_id,
270
+ api_keys=shared_state.api_keys,
271
+ server_url=shared_state.server_url,
272
+ )
273
+
274
+ # Process messages from the sampling loop
275
+ for loop_msg in sampling_loop:
276
+ # Check stop condition more frequently
277
+ if shared_state.should_stop or shared_state.stop_event.is_set():
278
+ print("Processing stopped by user")
279
+ break
280
+
281
+ # Check if paused and wait while paused
282
+ while shared_state.is_paused and not shared_state.should_stop and not shared_state.stop_event.is_set():
283
+ print(f"Processing paused at: {time.strftime('%H:%M:%S')}")
284
+ # Wait a short time and check stop condition regularly
285
+ for _ in range(5): # Check 5 times per second
286
+ if shared_state.should_stop or shared_state.stop_event.is_set():
287
+ break
288
+ time.sleep(0.2)
289
+
290
+ # Check again after pause loop
291
+ if shared_state.should_stop or shared_state.stop_event.is_set():
292
+ print("Processing stopped while paused or resuming")
293
+ break
294
+
295
+ # Process the message
296
+ if loop_msg.startswith('<img'):
297
+ message = {"role": "user", "content": loop_msg}
298
+ else:
299
+ message = {"role": "assistant", "content": loop_msg}
300
+
301
+ shared_state.chatbot_messages.append(message)
302
+ shared_state.message_queue.append(message)
303
+
304
+ # Short sleep to allow stop signals to be processed
305
+ for _ in range(5): # Check 5 times per second
306
+ if shared_state.should_stop or shared_state.stop_event.is_set():
307
+ print("Processing stopped during sleep")
308
+ break
309
+ time.sleep(0.1)
310
+
311
+ if shared_state.should_stop or shared_state.stop_event.is_set():
312
+ break
313
+
314
+ except Exception as e:
315
+ # Handle any exceptions in the processing loop
316
+ error_msg = f"Error during task processing: {str(e)}"
317
+ print(error_msg)
318
+ error_message = {"role": "assistant", "content": error_msg}
319
+ shared_state.message_queue.append(error_message)
320
+
321
+ finally:
322
+ # Handle completion or interruption
323
+ if shared_state.should_stop or shared_state.stop_event.is_set():
324
+ stop_msg = f"Task '{shared_state.task}' was stopped. Ready for new tasks."
325
+ final_message = {"role": "assistant", "content": stop_msg}
326
+ else:
327
+ complete_msg = f"Task '{shared_state.task}' completed. Thanks for using Teachmode-OOTB."
328
+ final_message = {"role": "assistant", "content": complete_msg}
329
+
330
+ shared_state.chatbot_messages.append(final_message)
331
+ shared_state.message_queue.append(final_message)
332
+
333
+ # Reset all state flags to allow for new tasks
334
+ shared_state.is_processing = False
335
+ shared_state.should_stop = False
336
+ shared_state.is_paused = False
337
+ shared_state.stop_event.clear()
338
+ print("Processing completed, ready for new tasks")
339
+
340
+ def main():
341
+ global app, shared_state, rate_limiter
342
+
343
+ parser = argparse.ArgumentParser(
344
+ description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
345
+ )
346
+ parser.add_argument("--model", default="teach-mode-gpt-4o")
347
+ parser.add_argument("--task", default="Create a claim on the SAP system, using Receipt.pdf as attachment.")
348
+ parser.add_argument("--selected_screen", type=int, default=0)
349
+ parser.add_argument("--user_id", default="star_rail_dev")
350
+ parser.add_argument("--trace_id", default="scroll")
351
+ parser.add_argument("--api_key_file", default="api_key.json")
352
+ parser.add_argument("--api_keys", default="")
353
+ parser.add_argument(
354
+ "--server_url",
355
+ default="http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action",
356
+ help="Server URL for the session"
357
+ )
358
+
359
+ args = parser.parse_args()
360
+ shared_state = SharedState(args)
361
+ rate_limiter = RateLimiter(interval_seconds=2)
362
+
363
+ import uvicorn
364
+ import platform
365
+ import os
366
+
367
+ # Default port
368
+ port = 7888
369
+
370
+ # Determine port based on Windows username
371
+ if platform.system() == "Windows":
372
+ username = os.environ["USERNAME"].lower()
373
+ if username == "altair":
374
+ port = 14000
375
+ elif username.startswith("guest") and username[5:].isdigit():
376
+ num = int(username[5:])
377
+ if 1 <= num <= 10:
378
+ port = 14000 + num
379
+ else:
380
+ port = 7888
381
+ else:
382
+ port = 7888
383
+
384
+ uvicorn.run(app, host="0.0.0.0", port=port)
385
+
386
+ if __name__ == "__main__":
324
387
  main()
@@ -0,0 +1,154 @@
1
+ """
2
+ show_click(x, y, duration_ms=800)
3
+ → 在屏幕 (x,y) 显示点击动画,停留 duration_ms 毫秒
4
+ 依赖: pyside6
5
+ 确保同目录有 click.gif
6
+ """
7
+ import sys, time
8
+ from pathlib import Path
9
+ from PySide6.QtCore import Qt, QPoint, QTimer, QEventLoop, QSize
10
+ from PySide6.QtGui import QPainter, QPixmap, QMovie
11
+ from PySide6.QtWidgets import QApplication, QWidget, QLabel
12
+
13
+ CLICK_GIF = Path(__file__).with_name("icons8-select-cursor-transparent-96.gif")
14
+
15
+ class ClickAnimation(QWidget):
16
+ def __init__(self, pos: QPoint, life_ms: int):
17
+ super().__init__(None,
18
+ Qt.FramelessWindowHint | Qt.Tool | Qt.WindowStaysOnTopHint
19
+ | Qt.WindowTransparentForInput)
20
+ self.setAttribute(Qt.WA_TranslucentBackground)
21
+
22
+ if not CLICK_GIF.exists():
23
+ print(f"Error: click.gif not found at {CLICK_GIF}")
24
+ return
25
+
26
+ try:
27
+ # 创建标签显示GIF
28
+ self.label = QLabel(self)
29
+ self.movie = QMovie(str(CLICK_GIF))
30
+
31
+ # 获取原始尺寸并打印(仅供参考)
32
+ self.movie.jumpToFrame(0)
33
+ original_size = self.movie.currentPixmap().size()
34
+ print(f"GIF original size: {original_size.width()}x{original_size.height()}")
35
+
36
+ # 将GIF缩放到30x30像素
37
+ target_size = QSize(50, 50)
38
+ self.movie.setScaledSize(target_size)
39
+
40
+ # 设置标签尺寸和GIF
41
+ self.label.setMovie(self.movie)
42
+ self.label.setFixedSize(target_size)
43
+
44
+ # 设置窗口大小和位置
45
+ self.resize(target_size)
46
+ self.move(pos.x() - 15, pos.y() - 15) # 居中显示
47
+
48
+ # 提高播放性能
49
+ self.movie.setCacheMode(QMovie.CacheAll)
50
+
51
+ # 开始播放动画
52
+ self.movie.start()
53
+
54
+ # 设置定时器关闭窗口
55
+ QTimer.singleShot(life_ms, self.close)
56
+
57
+ self.show()
58
+ self.raise_()
59
+ print(f"Click animation created at ({pos.x()}, {pos.y()}), size: 30x30, duration: {life_ms}ms")
60
+ except Exception as e:
61
+ print(f"Error creating click animation: {str(e)}")
62
+
63
+ # ---------- 外部接口 ----------
64
+ _app = None
65
+ def _ensure_app():
66
+ global _app
67
+ if _app is None:
68
+ if QApplication.instance() is None:
69
+ print("Creating new QApplication instance")
70
+ _app = QApplication(sys.argv)
71
+ else:
72
+ print("Using existing QApplication instance")
73
+ _app = QApplication.instance()
74
+
75
+ def show_click(x: int, y: int, duration_ms: int = 2000): # 增加默认播放时间
76
+ """阻塞式点击动画:调用后必定肉眼可见"""
77
+ print(f"Attempting to show click at ({x}, {y})")
78
+
79
+ if not CLICK_GIF.exists():
80
+ raise FileNotFoundError(f"click.gif not found at {CLICK_GIF}")
81
+
82
+ _ensure_app()
83
+
84
+ try:
85
+ animation = ClickAnimation(QPoint(x, y), duration_ms)
86
+
87
+ # 局部事件循环,动画结束后返回
88
+ loop = QEventLoop()
89
+ QTimer.singleShot(duration_ms + 150, loop.quit) # 增加等待时间
90
+ loop.exec()
91
+ print("Click animation completed")
92
+ except Exception as e:
93
+ print(f"Error during show_click: {str(e)}")
94
+
95
+
96
+ # --- 在原 import 区域追加 ---
97
+ from PySide6.QtCore import QEasingCurve, QPropertyAnimation
98
+ # --------------------------------------------------------
99
+
100
+
101
+ # ---------- 新增函数 ----------
102
+ def show_move_to(x1: int, y1: int, x2: int, y2: int, duration_ms: int = 1200):
103
+ """
104
+ 阻塞式移动动画:在 (x1, y1) 处出现光标 GIF,
105
+ 并在 duration_ms 毫秒内平滑移动到 (x2, y2)。
106
+
107
+ Args:
108
+ x1, y1 : 起点屏幕坐标
109
+ x2, y2 : 终点屏幕坐标
110
+ duration_ms : 移动总时长
111
+ """
112
+ print(f"Attempting to move click from ({x1}, {y1}) → ({x2}, {y2}) "
113
+ f"in {duration_ms} ms")
114
+
115
+ if not CLICK_GIF.exists():
116
+ raise FileNotFoundError(f"click.gif not found at {CLICK_GIF}")
117
+
118
+ _ensure_app()
119
+
120
+ # 让 widget 的生命周期略长于动画,避免提前销毁
121
+ life_ms = duration_ms + 200
122
+ widget = ClickAnimation(QPoint(x1, y1), life_ms)
123
+
124
+ # 用 QPropertyAnimation 平滑移动窗口
125
+ anim = QPropertyAnimation(widget, b"pos")
126
+ anim.setDuration(duration_ms)
127
+ # ClickAnimation 内部已经向左上偏移了 15px,这里沿用同样的偏移
128
+ anim.setStartValue(QPoint(x1 - 15, y1 - 15))
129
+ anim.setEndValue(QPoint(x2 - 15, y2 - 15))
130
+ anim.setEasingCurve(QEasingCurve.OutQuad) # 可自行更换缓动曲线
131
+ anim.start()
132
+
133
+ # 局部事件循环,直到动画结束
134
+ loop = QEventLoop()
135
+ anim.finished.connect(loop.quit)
136
+ QTimer.singleShot(life_ms, loop.quit) # 双保险
137
+ loop.exec()
138
+
139
+ print("Move‑to animation completed")
140
+ # ---------------------------------
141
+
142
+
143
+ # ---------- 命令行测试 ----------
144
+ if __name__ == "__main__":
145
+ # 测试点击
146
+ x, y = 500, 500
147
+ print(f"Testing click at ({x}, {y})")
148
+ show_click(x, y)
149
+
150
+ # 测试移动
151
+ x1, y1 = 400, 400
152
+ x2, y2 = 800, 600
153
+ print(f"Testing move from ({x1}, {y1}) → ({x2}, {y2})")
154
+ show_move_to(x1, y1, x2, y2, duration_ms=2000)
@@ -251,16 +251,3 @@ def get_screen_resize_factor():
251
251
  # return scaleFactor
252
252
  return "1.0x"
253
253
 
254
- # 示例调用
255
- if __name__ == "__main__":
256
- buttons = detect_icons(
257
- icon_folder=r"",
258
- image_path=r"",
259
- threshold=0.75,
260
- scale_factor="1.5x",
261
- specific_icon_names=[r"test\1.5x\macOS.png"]
262
- )
263
- draw_detected_icons(
264
- r"D:\develop\computer_use_ootb_internal-main\.cache\20241214_023408\screenshot-0.png", buttons
265
- )
266
-
@@ -18,8 +18,12 @@ from functools import partial
18
18
 
19
19
  from anthropic.types.beta import BetaToolComputerUse20241022Param
20
20
 
21
- from .base import BaseAnthropicTool, ToolError, ToolResult
22
- from .run import run
21
+ from computer_use_ootb_internal.computer_use_demo.tools.base import BaseAnthropicTool, ToolError, ToolResult
22
+ from computer_use_ootb_internal.computer_use_demo.tools.run import run
23
+
24
+ from computer_use_ootb_internal.computer_use_demo.tools.computer_marbot import MarbotAutoGUI
25
+ from computer_use_ootb_internal.computer_use_demo.animation.click_animation import show_click, show_move_to
26
+
23
27
 
24
28
  OUTPUT_DIR = "./tmp/outputs"
25
29
 
@@ -195,7 +199,6 @@ class ComputerTool(BaseAnthropicTool):
195
199
  self.offset_y = screen['y'] if system == "Darwin" else screen.y
196
200
  self.bbox = bbox
197
201
 
198
- from .computer_marbot import MarbotAutoGUI
199
202
  self.marbot_auto_gui = MarbotAutoGUI()
200
203
 
201
204
 
@@ -219,7 +222,6 @@ class ComputerTool(BaseAnthropicTool):
219
222
  raise ToolError(f"text is not accepted for {action}")
220
223
  if not isinstance(coordinate, (list, tuple)) or len(coordinate) != 2:
221
224
  raise ToolError(f"{coordinate} must be a tuple of length 2")
222
- # if not all(isinstance(i, int) and i >= 0 for i in coordinate):
223
225
  if not all(isinstance(i, int) for i in coordinate):
224
226
  raise ToolError(f"{coordinate} must be a tuple of non-negative ints")
225
227
 
@@ -233,8 +235,6 @@ class ComputerTool(BaseAnthropicTool):
233
235
  x += self.offset_x
234
236
  y += self.offset_y
235
237
 
236
- print(f"mouse move to {x}, {y}")
237
-
238
238
  if action == "mouse_move":
239
239
  pyautogui.moveTo(x, y)
240
240
  return ToolResult(output=f"Moved mouse to ({x}, {y})")
@@ -354,13 +354,25 @@ class ComputerTool(BaseAnthropicTool):
354
354
  if action in ("left_click_windll", "mouse_move_windll", "right_click_windll", "key_down_windll", "key_up_windll"):
355
355
  if action == "left_click_windll":
356
356
  if coordinate is None:
357
+ x, y = pyautogui.position()
358
+ x, y = self.scale_coordinates(ScalingSource.COMPUTER, x, y)
359
+ show_click(x, y)
357
360
  self.marbot_auto_gui.click()
358
361
  else:
359
- self.marbot_auto_gui.click(x=coordinate[0], y=coordinate[1])
362
+ x = coordinate[0]+self.offset_x
363
+ y = coordinate[1]+self.offset_y
364
+ self.marbot_auto_gui.click(x=x, y=y)
365
+ show_click(x, y)
360
366
  elif action == "mouse_move_windll":
361
367
  if coordinate is None:
362
368
  raise ToolError(f"coordinate is required for {action}")
363
- self.marbot_auto_gui.moveTo(x=coordinate[0], y=coordinate[1])
369
+ x1 = coordinate[0]+self.offset_x
370
+ y1 = coordinate[1]+self.offset_y
371
+ self.marbot_auto_gui.moveTo(x=x1, y=y1)
372
+ x0, y0 = pyautogui.position()
373
+ x0, y0 = self.scale_coordinates(ScalingSource.COMPUTER, x0, y0)
374
+ show_move_to(x0, y0, x1, y1, duration_ms=2000)
375
+
364
376
  # elif action == "right_click_windll":
365
377
  # self.marbot_auto_gui.rightClick(x=coordinate[0], y=coordinate[1])
366
378
  elif action == "key_down_windll":
@@ -594,3 +606,9 @@ class ComputerTool(BaseAnthropicTool):
594
606
  # For simplicity, return text as is
595
607
  # Implement mapping if special keys are needed
596
608
  return text
609
+
610
+
611
+ if __name__ == "__main__":
612
+ computer = ComputerTool()
613
+ # test left_click_windll
614
+ asyncio.run(computer(action="left_click_windll", coordinate=(500, 500)))
@@ -167,13 +167,13 @@ if __name__ == "__main__":
167
167
 
168
168
  # 等待你切到目标窗口
169
169
  print("⌛ Waiting 10 seconds...")
170
- sleep(10)
170
+ sleep(5)
171
171
 
172
172
  print("🚀 Start action sequence")
173
173
 
174
174
  # 设置目标位置
175
175
  target_x = 3061
176
- target_y = 268
176
+ target_y = 666
177
177
 
178
178
  # 按住 Alt 键
179
179
  bot.keyDown('alt')