PyPI - computer-use-ootb-internal - Versions diffs - 0.0.94.post4__tar.gz → 0.0.95__tar.gz - Mend

computer-use-ootb-internal 0.0.94.post4tar.gz → 0.0.95tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/.gitignore RENAMED Viewed

@@ -4,3 +4,4 @@
 *.log
 screenshot*.png
 *dist*
+*.toml

{computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: computer-use-ootb-internal
-Version: 0.0.94.post4
+Version: 0.0.95
 Summary: Computer Use OOTB
 Author-email: Siyuan Hu <siyuan.hu.sg@gmail.com>
 Requires-Python: >=3.11

{computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "computer-use-ootb-internal"
-version = "0.0.94.post4"
+version = "0.0.95"
 description = "Computer Use OOTB"
 authors = [{ name = "Siyuan Hu", email = "siyuan.hu.sg@gmail.com" }]
 requires-python = ">=3.11"

{computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/app_teachmode.py RENAMED Viewed

@@ -1,324 +1,387 @@
-import argparse
-import time
-import json
-import threading
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse
-from fastapi.middleware.cors import CORSMiddleware
-from screeninfo import get_monitors
-from computer_use_ootb_internal.computer_use_demo.tools.computer import get_screen_details
-from computer_use_ootb_internal.run_teachmode_ootb_args import simple_teachmode_sampling_loop
-app = FastAPI()
-# Add CORS middleware to allow requests from the frontend
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-class SharedState:
-    def __init__(self, args):
-        self.args = args
-        self.task_updated = False
-        self.chatbot_messages = []
-        # Store all state-related data here
-        self.model = args.model
-        self.task = getattr(args, 'task', "")
-        self.selected_screen = args.selected_screen
-        self.user_id = args.user_id
-        self.trace_id = args.trace_id
-        self.api_keys = args.api_keys
-        self.server_url = args.server_url
-        self.message_queue = []
-        self.is_processing = False
-        self.should_stop = False
-        self.is_paused = False
-        # Add a new event to better control stopping
-        self.stop_event = threading.Event()
-        # Add a reference to the processing thread
-        self.processing_thread = None
-shared_state = None
-@app.post("/update_params")
-async def update_parameters(request: Request):
-    data = await request.json()
-    if 'task' not in data:
-        return JSONResponse(
-            content={"status": "error", "message": "Missing required field: task"},
-            status_code=400
-        )
-    shared_state.args = argparse.Namespace(**data)
-    shared_state.task_updated = True
-    # Update shared state when parameters change
-    shared_state.model = getattr(shared_state.args, 'model', "teach-mode-gpt-4o")
-    shared_state.task = getattr(shared_state.args, 'task', "Create a claim on the SAP system, using Receipt.pdf as attachment.")
-    shared_state.selected_screen = getattr(shared_state.args, 'selected_screen', 0)
-    shared_state.user_id = getattr(shared_state.args, 'user_id', "a_test")
-    shared_state.trace_id = getattr(shared_state.args, 'trace_id', "jess_4")
-    shared_state.api_keys = getattr(shared_state.args, 'api_keys', "sk-proj-1234567890")
-    shared_state.server_url = getattr(shared_state.args, 'server_url', "http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action")
-    return JSONResponse(
-        content={"status": "success", "message": "Parameters updated", "new_args": vars(shared_state.args)},
-        status_code=200
-    )
-@app.post("/update_message")
-async def update_message(request: Request):
-    data = await request.json()
-    if 'message' not in data:
-        return JSONResponse(
-            content={"status": "error", "message": "Missing required field: message"},
-            status_code=400
-        )
-    message = data['message']
-    shared_state.chatbot_messages.append({"role": "user", "content": message})
-    shared_state.task = message
-    shared_state.args.task = message
-    # Reset stop event before starting
-    shared_state.stop_event.clear()
-    # Start processing if not already running
-    if not shared_state.is_processing:
-        # Create and store the thread
-        shared_state.processing_thread = threading.Thread(target=process_input, daemon=True)
-        shared_state.processing_thread.start()
-    return JSONResponse(
-        content={"status": "success", "message": "Message received", "task": shared_state.task},
-        status_code=200
-    )
-@app.get("/get_messages")
-async def get_messages():
-    # Return all messages in the queue and clear it
-    messages = shared_state.message_queue.copy()
-    shared_state.message_queue = []
-    return JSONResponse(
-        content={"status": "success", "messages": messages},
-        status_code=200
-    )
-@app.get("/get_screens")
-async def get_screens():
-    screen_options, primary_index = get_screen_details()
-    return JSONResponse(
-        content={"status": "success", "screens": screen_options, "primary_index": primary_index},
-        status_code=200
-    )
-@app.post("/stop_processing")
-async def stop_processing():
-    if shared_state.is_processing:
-        # Set both flags to ensure stopping the current task
-        shared_state.should_stop = True
-        shared_state.stop_event.set()
-        # Send an immediate message to the queue to inform the user
-        stop_initiated_msg = {"role": "assistant", "content": f"Stopping task '{shared_state.task}'..."}
-        shared_state.message_queue.append(stop_initiated_msg)
-        return JSONResponse(
-            content={"status": "success", "message": "Task is being stopped, server will remain available for new tasks"},
-            status_code=200
-        )
-    else:
-        return JSONResponse(
-            content={"status": "error", "message": "No active processing to stop"},
-            status_code=400
-        )
-@app.post("/toggle_pause")
-async def toggle_pause():
-    if not shared_state.is_processing:
-        return JSONResponse(
-            content={"status": "error", "message": "No active processing to pause/resume"},
-            status_code=400
-        )
-    # Toggle the pause state
-    shared_state.is_paused = not shared_state.is_paused
-    current_state = shared_state.is_paused
-    print(f"Toggled pause state to: {current_state}")
-    status_message = "paused" if current_state else "resumed"
-    # Add a message to the queue to inform the user
-    if current_state:
-        message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been paused. Click Continue to resume."}
-    else:
-        message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been resumed."}
-    shared_state.chatbot_messages.append(message)
-    shared_state.message_queue.append(message)
-    return JSONResponse(
-        content={
-            "status": "success",
-            "message": f"Processing {status_message}",
-            "is_paused": current_state
-        },
-        status_code=200
-    )
-@app.get("/status")
-async def get_status():
-    print(f"Status check - Processing: {shared_state.is_processing}, Paused: {shared_state.is_paused}")
-    return JSONResponse(
-        content={
-            "status": "success",
-            "is_processing": shared_state.is_processing,
-            "is_paused": shared_state.is_paused
-        },
-        status_code=200
-    )
-def process_input():
-    shared_state.is_processing = True
-    shared_state.should_stop = False
-    shared_state.is_paused = False
-    shared_state.stop_event.clear()  # Ensure stop event is cleared at the start
-    print(f"start sampling loop: {shared_state.chatbot_messages}")
-    print(f"shared_state.args before sampling loop: {shared_state.args}")
-    try:
-        # Get the generator for the sampling loop
-        sampling_loop = simple_teachmode_sampling_loop(
-            model=shared_state.model,
-            task=shared_state.task,
-            selected_screen=shared_state.selected_screen,
-            user_id=shared_state.user_id,
-            trace_id=shared_state.trace_id,
-            api_keys=shared_state.api_keys,
-            server_url=shared_state.server_url,
-        )
-        # Process messages from the sampling loop
-        for loop_msg in sampling_loop:
-            # Check stop condition more frequently
-            if shared_state.should_stop or shared_state.stop_event.is_set():
-                print("Processing stopped by user")
-                break
-            # Check if paused and wait while paused
-            while shared_state.is_paused and not shared_state.should_stop and not shared_state.stop_event.is_set():
-                print(f"Processing paused at: {time.strftime('%H:%M:%S')}")
-                # Wait a short time and check stop condition regularly
-                for _ in range(5):  # Check 5 times per second
-                    if shared_state.should_stop or shared_state.stop_event.is_set():
-                        break
-                    time.sleep(0.2)
-            # Check again after pause loop
-            if shared_state.should_stop or shared_state.stop_event.is_set():
-                print("Processing stopped while paused or resuming")
-                break
-            # Process the message
-            if loop_msg.startswith('<img'):
-                message = {"role": "user", "content": loop_msg}
-            else:
-                message = {"role": "assistant", "content": loop_msg}
-            shared_state.chatbot_messages.append(message)
-            shared_state.message_queue.append(message)
-            # Short sleep to allow stop signals to be processed
-            for _ in range(5):  # Check 5 times per second
-                if shared_state.should_stop or shared_state.stop_event.is_set():
-                    print("Processing stopped during sleep")
-                    break
-                time.sleep(0.1)
-            if shared_state.should_stop or shared_state.stop_event.is_set():
-                break
-    except Exception as e:
-        # Handle any exceptions in the processing loop
-        error_msg = f"Error during task processing: {str(e)}"
-        print(error_msg)
-        error_message = {"role": "assistant", "content": error_msg}
-        shared_state.message_queue.append(error_message)
-    finally:
-        # Handle completion or interruption
-        if shared_state.should_stop or shared_state.stop_event.is_set():
-            stop_msg = f"Task '{shared_state.task}' was stopped. Ready for new tasks."
-            final_message = {"role": "assistant", "content": stop_msg}
-        else:
-            complete_msg = f"Task '{shared_state.task}' completed. Thanks for using Teachmode-OOTB."
-            final_message = {"role": "assistant", "content": complete_msg}
-        shared_state.chatbot_messages.append(final_message)
-        shared_state.message_queue.append(final_message)
-        # Reset all state flags to allow for new tasks
-        shared_state.is_processing = False
-        shared_state.should_stop = False
-        shared_state.is_paused = False
-        shared_state.stop_event.clear()
-        print("Processing completed, ready for new tasks")
-def main():
-    global app, shared_state
-    parser = argparse.ArgumentParser(
-        description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
-    )
-    parser.add_argument("--model", default="teach-mode-gpt-4o")
-    parser.add_argument("--task", default="Create a claim on the SAP system, using Receipt.pdf as attachment.")
-    parser.add_argument("--selected_screen", type=int, default=0)
-    parser.add_argument("--user_id", default="star_rail_dev")
-    parser.add_argument("--trace_id", default="scroll")
-    parser.add_argument("--api_key_file", default="api_key.json")
-    parser.add_argument("--api_keys", default="")
-    parser.add_argument(
-        "--server_url",
-        default="http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action",
-        help="Server URL for the session"
-    )
-    args = parser.parse_args()
-    shared_state = SharedState(args)
-    import uvicorn
-    import platform
-    import os
-    # Default port
-    port = 7888
-    # Determine port based on Windows username
-    if platform.system() == "Windows":
-        username = os.environ["USERNAME"].lower()
-        if username == "altair":
-            port = 14000
-        elif username.startswith("guest") and username[5:].isdigit():
-            num = int(username[5:])
-            if 1 <= num <= 10:
-                port = 14000 + num
-            else:
-                port = 7888
-        else:
-            port = 7888
-    uvicorn.run(app, host="0.0.0.0", port=port)
-if __name__ == "__main__":
+import argparse
+import time
+import json
+import threading
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from screeninfo import get_monitors
+from computer_use_ootb_internal.computer_use_demo.tools.computer import get_screen_details
+from computer_use_ootb_internal.run_teachmode_ootb_args import simple_teachmode_sampling_loop
+app = FastAPI()
+# Add CORS middleware to allow requests from the frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Rate limiter for API endpoints
+class RateLimiter:
+    def __init__(self, interval_seconds=2):
+        self.interval = interval_seconds
+        self.last_request_time = {}
+        self.lock = threading.Lock()
+    def allow_request(self, endpoint):
+        with self.lock:
+            current_time = time.time()
+            # Priority endpoints always allowed
+            if endpoint in ["/update_params", "/update_message"]:
+                return True
+            # For other endpoints, apply rate limiting
+            if endpoint not in self.last_request_time:
+                self.last_request_time[endpoint] = current_time
+                return True
+            elapsed = current_time - self.last_request_time[endpoint]
+            if elapsed < self.interval:
+                return False
+            self.last_request_time[endpoint] = current_time
+            return True
+class SharedState:
+    def __init__(self, args):
+        self.args = args
+        self.task_updated = False
+        self.chatbot_messages = []
+        # Store all state-related data here
+        self.model = args.model
+        self.task = getattr(args, 'task', "")
+        self.selected_screen = args.selected_screen
+        self.user_id = args.user_id
+        self.trace_id = args.trace_id
+        self.api_keys = args.api_keys
+        self.server_url = args.server_url
+        self.message_queue = []
+        self.is_processing = False
+        self.should_stop = False
+        self.is_paused = False
+        # Add a new event to better control stopping
+        self.stop_event = threading.Event()
+        # Add a reference to the processing thread
+        self.processing_thread = None
+shared_state = None
+rate_limiter = RateLimiter(interval_seconds=2)
+@app.post("/update_params")
+async def update_parameters(request: Request):
+    data = await request.json()
+    if 'task' not in data:
+        return JSONResponse(
+            content={"status": "error", "message": "Missing required field: task"},
+            status_code=400
+        )
+    shared_state.args = argparse.Namespace(**data)
+    shared_state.task_updated = True
+    # Update shared state when parameters change
+    shared_state.model = getattr(shared_state.args, 'model', "teach-mode-gpt-4o")
+    shared_state.task = getattr(shared_state.args, 'task', "Create a claim on the SAP system, using Receipt.pdf as attachment.")
+    shared_state.selected_screen = getattr(shared_state.args, 'selected_screen', 0)
+    shared_state.user_id = getattr(shared_state.args, 'user_id', "a_test")
+    shared_state.trace_id = getattr(shared_state.args, 'trace_id', "jess_4")
+    shared_state.api_keys = getattr(shared_state.args, 'api_keys', "sk-proj-1234567890")
+    shared_state.server_url = getattr(shared_state.args, 'server_url', "http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action")
+    return JSONResponse(
+        content={"status": "success", "message": "Parameters updated", "new_args": vars(shared_state.args)},
+        status_code=200
+    )
+@app.post("/update_message")
+async def update_message(request: Request):
+    data = await request.json()
+    if 'message' not in data:
+        return JSONResponse(
+            content={"status": "error", "message": "Missing required field: message"},
+            status_code=400
+        )
+    message = data['message']
+    shared_state.chatbot_messages.append({"role": "user", "content": message})
+    shared_state.task = message
+    shared_state.args.task = message
+    # Reset stop event before starting
+    shared_state.stop_event.clear()
+    # Start processing if not already running
+    if not shared_state.is_processing:
+        # Create and store the thread
+        shared_state.processing_thread = threading.Thread(target=process_input, daemon=True)
+        shared_state.processing_thread.start()
+    return JSONResponse(
+        content={"status": "success", "message": "Message received", "task": shared_state.task},
+        status_code=200
+    )
+@app.get("/get_messages")
+async def get_messages(request: Request):
+    # Apply rate limiting
+    if not rate_limiter.allow_request(request.url.path):
+        return JSONResponse(
+            content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
+            status_code=429
+        )
+    # Return all messages in the queue and clear it
+    messages = shared_state.message_queue.copy()
+    shared_state.message_queue = []
+    return JSONResponse(
+        content={"status": "success", "messages": messages},
+        status_code=200
+    )
+@app.get("/get_screens")
+async def get_screens(request: Request):
+    # Apply rate limiting
+    if not rate_limiter.allow_request(request.url.path):
+        return JSONResponse(
+            content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
+            status_code=429
+        )
+    screen_options, primary_index = get_screen_details()
+    return JSONResponse(
+        content={"status": "success", "screens": screen_options, "primary_index": primary_index},
+        status_code=200
+    )
+@app.post("/stop_processing")
+async def stop_processing(request: Request):
+    # Apply rate limiting
+    if not rate_limiter.allow_request(request.url.path):
+        return JSONResponse(
+            content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
+            status_code=429
+        )
+    if shared_state.is_processing:
+        # Set both flags to ensure stopping the current task
+        shared_state.should_stop = True
+        shared_state.stop_event.set()
+        # Send an immediate message to the queue to inform the user
+        stop_initiated_msg = {"role": "assistant", "content": f"Stopping task '{shared_state.task}'..."}
+        shared_state.message_queue.append(stop_initiated_msg)
+        return JSONResponse(
+            content={"status": "success", "message": "Task is being stopped, server will remain available for new tasks"},
+            status_code=200
+        )
+    else:
+        return JSONResponse(
+            content={"status": "error", "message": "No active processing to stop"},
+            status_code=400
+        )
+@app.post("/toggle_pause")
+async def toggle_pause(request: Request):
+    # Apply rate limiting
+    if not rate_limiter.allow_request(request.url.path):
+        return JSONResponse(
+            content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
+            status_code=429
+        )
+    if not shared_state.is_processing:
+        return JSONResponse(
+            content={"status": "error", "message": "No active processing to pause/resume"},
+            status_code=400
+        )
+    # Toggle the pause state
+    shared_state.is_paused = not shared_state.is_paused
+    current_state = shared_state.is_paused
+    print(f"Toggled pause state to: {current_state}")
+    status_message = "paused" if current_state else "resumed"
+    # Add a message to the queue to inform the user
+    if current_state:
+        message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been paused. Click Continue to resume."}
+    else:
+        message = {"role": "assistant", "content": f"Task '{shared_state.task}' has been resumed."}
+    shared_state.chatbot_messages.append(message)
+    shared_state.message_queue.append(message)
+    return JSONResponse(
+        content={
+            "status": "success",
+            "message": f"Processing {status_message}",
+            "is_paused": current_state
+        },
+        status_code=200
+    )
+@app.get("/status")
+async def get_status(request: Request):
+    # Apply rate limiting
+    if not rate_limiter.allow_request(request.url.path):
+        return JSONResponse(
+            content={"status": "error", "message": "Rate limit exceeded. Try again after 2 seconds."},
+            status_code=429
+        )
+    print(f"Status check - Processing: {shared_state.is_processing}, Paused: {shared_state.is_paused}")
+    return JSONResponse(
+        content={
+            "status": "success",
+            "is_processing": shared_state.is_processing,
+            "is_paused": shared_state.is_paused
+        },
+        status_code=200
+    )
+def process_input():
+    shared_state.is_processing = True
+    shared_state.should_stop = False
+    shared_state.is_paused = False
+    shared_state.stop_event.clear()  # Ensure stop event is cleared at the start
+    print(f"start sampling loop: {shared_state.chatbot_messages}")
+    print(f"shared_state.args before sampling loop: {shared_state.args}")
+    try:
+        # Get the generator for the sampling loop
+        sampling_loop = simple_teachmode_sampling_loop(
+            model=shared_state.model,
+            task=shared_state.task,
+            selected_screen=shared_state.selected_screen,
+            user_id=shared_state.user_id,
+            trace_id=shared_state.trace_id,
+            api_keys=shared_state.api_keys,
+            server_url=shared_state.server_url,
+        )
+        # Process messages from the sampling loop
+        for loop_msg in sampling_loop:
+            # Check stop condition more frequently
+            if shared_state.should_stop or shared_state.stop_event.is_set():
+                print("Processing stopped by user")
+                break
+            # Check if paused and wait while paused
+            while shared_state.is_paused and not shared_state.should_stop and not shared_state.stop_event.is_set():
+                print(f"Processing paused at: {time.strftime('%H:%M:%S')}")
+                # Wait a short time and check stop condition regularly
+                for _ in range(5):  # Check 5 times per second
+                    if shared_state.should_stop or shared_state.stop_event.is_set():
+                        break
+                    time.sleep(0.2)
+            # Check again after pause loop
+            if shared_state.should_stop or shared_state.stop_event.is_set():
+                print("Processing stopped while paused or resuming")
+                break
+            # Process the message
+            if loop_msg.startswith('<img'):
+                message = {"role": "user", "content": loop_msg}
+            else:
+                message = {"role": "assistant", "content": loop_msg}
+            shared_state.chatbot_messages.append(message)
+            shared_state.message_queue.append(message)
+            # Short sleep to allow stop signals to be processed
+            for _ in range(5):  # Check 5 times per second
+                if shared_state.should_stop or shared_state.stop_event.is_set():
+                    print("Processing stopped during sleep")
+                    break
+                time.sleep(0.1)
+            if shared_state.should_stop or shared_state.stop_event.is_set():
+                break
+    except Exception as e:
+        # Handle any exceptions in the processing loop
+        error_msg = f"Error during task processing: {str(e)}"
+        print(error_msg)
+        error_message = {"role": "assistant", "content": error_msg}
+        shared_state.message_queue.append(error_message)
+    finally:
+        # Handle completion or interruption
+        if shared_state.should_stop or shared_state.stop_event.is_set():
+            stop_msg = f"Task '{shared_state.task}' was stopped. Ready for new tasks."
+            final_message = {"role": "assistant", "content": stop_msg}
+        else:
+            complete_msg = f"Task '{shared_state.task}' completed. Thanks for using Teachmode-OOTB."
+            final_message = {"role": "assistant", "content": complete_msg}
+        shared_state.chatbot_messages.append(final_message)
+        shared_state.message_queue.append(final_message)
+        # Reset all state flags to allow for new tasks
+        shared_state.is_processing = False
+        shared_state.should_stop = False
+        shared_state.is_paused = False
+        shared_state.stop_event.clear()
+        print("Processing completed, ready for new tasks")
+def main():
+    global app, shared_state, rate_limiter
+    parser = argparse.ArgumentParser(
+        description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
+    )
+    parser.add_argument("--model", default="teach-mode-gpt-4o")
+    parser.add_argument("--task", default="Create a claim on the SAP system, using Receipt.pdf as attachment.")
+    parser.add_argument("--selected_screen", type=int, default=0)
+    parser.add_argument("--user_id", default="star_rail_dev")
+    parser.add_argument("--trace_id", default="scroll")
+    parser.add_argument("--api_key_file", default="api_key.json")
+    parser.add_argument("--api_keys", default="")
+    parser.add_argument(
+        "--server_url",
+        default="http://ec2-44-234-43-86.us-west-2.compute.amazonaws.com/generate_action",
+        help="Server URL for the session"
+    )
+    args = parser.parse_args()
+    shared_state = SharedState(args)
+    rate_limiter = RateLimiter(interval_seconds=2)
+    import uvicorn
+    import platform
+    import os
+    # Default port
+    port = 7888
+    # Determine port based on Windows username
+    if platform.system() == "Windows":
+        username = os.environ["USERNAME"].lower()
+        if username == "altair":
+            port = 14000
+        elif username.startswith("guest") and username[5:].isdigit():
+            num = int(username[5:])
+            if 1 <= num <= 10:
+                port = 14000 + num
+            else:
+                port = 7888
+        else:
+            port = 7888
+    uvicorn.run(app, host="0.0.0.0", port=port)
+if __name__ == "__main__":
     main()

computer_use_ootb_internal-0.0.95/src/computer_use_ootb_internal/computer_use_demo/animation/click_animation.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""
+show_click(x, y, duration_ms=800)
+ → 在屏幕 (x,y) 显示点击动画，停留 duration_ms 毫秒
+依赖: pyside6
+确保同目录有 click.gif
+"""
+import sys, time
+from pathlib import Path
+from PySide6.QtCore import Qt, QPoint, QTimer, QEventLoop, QSize
+from PySide6.QtGui  import QPainter, QPixmap, QMovie
+from PySide6.QtWidgets import QApplication, QWidget, QLabel
+CLICK_GIF = Path(__file__).with_name("icons8-select-cursor-transparent-96.gif")
+class ClickAnimation(QWidget):
+    def __init__(self, pos: QPoint, life_ms: int):
+        super().__init__(None,
+            Qt.FramelessWindowHint | Qt.Tool | Qt.WindowStaysOnTopHint
+            | Qt.WindowTransparentForInput)
+        self.setAttribute(Qt.WA_TranslucentBackground)
+        if not CLICK_GIF.exists():
+            print(f"Error: click.gif not found at {CLICK_GIF}")
+            return
+        try:
+            # 创建标签显示GIF
+            self.label = QLabel(self)
+            self.movie = QMovie(str(CLICK_GIF))
+            # 获取原始尺寸并打印（仅供参考）
+            self.movie.jumpToFrame(0)
+            original_size = self.movie.currentPixmap().size()
+            print(f"GIF original size: {original_size.width()}x{original_size.height()}")
+            # 将GIF缩放到30x30像素
+            target_size = QSize(50, 50)
+            self.movie.setScaledSize(target_size)
+            # 设置标签尺寸和GIF
+            self.label.setMovie(self.movie)
+            self.label.setFixedSize(target_size)
+            # 设置窗口大小和位置
+            self.resize(target_size)
+            self.move(pos.x() - 15, pos.y() - 15)  # 居中显示
+            # 提高播放性能
+            self.movie.setCacheMode(QMovie.CacheAll)
+            # 开始播放动画
+            self.movie.start()
+            # 设置定时器关闭窗口
+            QTimer.singleShot(life_ms, self.close)
+            self.show()
+            self.raise_()
+            print(f"Click animation created at ({pos.x()}, {pos.y()}), size: 30x30, duration: {life_ms}ms")
+        except Exception as e:
+            print(f"Error creating click animation: {str(e)}")
+# ---------- 外部接口 ----------
+_app = None
+def _ensure_app():
+    global _app
+    if _app is None:
+        if QApplication.instance() is None:
+            print("Creating new QApplication instance")
+            _app = QApplication(sys.argv)
+        else:
+            print("Using existing QApplication instance")
+            _app = QApplication.instance()
+def show_click(x: int, y: int, duration_ms: int = 2000):  # 增加默认播放时间
+    """阻塞式点击动画：调用后必定肉眼可见"""
+    print(f"Attempting to show click at ({x}, {y})")
+    if not CLICK_GIF.exists():
+        raise FileNotFoundError(f"click.gif not found at {CLICK_GIF}")
+    _ensure_app()
+    try:
+        animation = ClickAnimation(QPoint(x, y), duration_ms)
+        # 局部事件循环，动画结束后返回
+        loop = QEventLoop()
+        QTimer.singleShot(duration_ms + 150, loop.quit)  # 增加等待时间
+        loop.exec()
+        print("Click animation completed")
+    except Exception as e:
+        print(f"Error during show_click: {str(e)}")
+# --- 在原 import 区域追加 ---
+from PySide6.QtCore import QEasingCurve, QPropertyAnimation
+# --------------------------------------------------------
+# ---------- 新增函数 ----------
+def show_move_to(x1: int, y1: int, x2: int, y2: int, duration_ms: int = 1200):
+    """
+    阻塞式移动动画：在 (x1, y1) 处出现光标 GIF，
+    并在 duration_ms 毫秒内平滑移动到 (x2, y2)。
+    Args:
+        x1, y1        : 起点屏幕坐标
+        x2, y2        : 终点屏幕坐标
+        duration_ms   : 移动总时长
+    """
+    print(f"Attempting to move click from ({x1}, {y1}) → ({x2}, {y2}) "
+          f"in {duration_ms} ms")
+    if not CLICK_GIF.exists():
+        raise FileNotFoundError(f"click.gif not found at {CLICK_GIF}")
+    _ensure_app()
+    # 让 widget 的生命周期略长于动画，避免提前销毁
+    life_ms = duration_ms + 200
+    widget  = ClickAnimation(QPoint(x1, y1), life_ms)
+    # 用 QPropertyAnimation 平滑移动窗口
+    anim = QPropertyAnimation(widget, b"pos")
+    anim.setDuration(duration_ms)
+    # ClickAnimation 内部已经向左上偏移了 15px，这里沿用同样的偏移
+    anim.setStartValue(QPoint(x1 - 15, y1 - 15))
+    anim.setEndValue(QPoint(x2 - 15, y2 - 15))
+    anim.setEasingCurve(QEasingCurve.OutQuad)     # 可自行更换缓动曲线
+    anim.start()
+    # 局部事件循环，直到动画结束
+    loop = QEventLoop()
+    anim.finished.connect(loop.quit)
+    QTimer.singleShot(life_ms, loop.quit)          # 双保险
+    loop.exec()
+    print("Move‑to animation completed")
+# ---------------------------------
+# ---------- 命令行测试 ----------
+if __name__ == "__main__":
+    # 测试点击
+    x, y = 500, 500
+    print(f"Testing click at ({x}, {y})")
+    show_click(x, y)
+    # 测试移动
+    x1, y1 = 400, 400
+    x2, y2 = 800, 600
+    print(f"Testing move from ({x1}, {y1}) → ({x2}, {y2})")
+    show_move_to(x1, y1, x2, y2, duration_ms=2000)

computer_use_ootb_internal-0.0.95/src/computer_use_ootb_internal/computer_use_demo/animation/icons8-select-cursor-transparent-96.gif ADDED Viewed

Binary file

{computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/gui_agent/gui_parser/simple_parser/icon_detection/icon_detection.py RENAMED Viewed

@@ -251,16 +251,3 @@ def get_screen_resize_factor():
     # return scaleFactor
     return "1.0x"
-# 示例调用
-if __name__ == "__main__":
-    buttons = detect_icons(
-        icon_folder=r"",
-        image_path=r"",
-        threshold=0.75,
-        scale_factor="1.5x",
-        specific_icon_names=[r"test\1.5x\macOS.png"]
-    )
-    draw_detected_icons(
-        r"D:\develop\computer_use_ootb_internal-main\.cache\20241214_023408\screenshot-0.png", buttons
-    )

{computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/computer.py RENAMED Viewed

@@ -18,8 +18,12 @@ from functools import partial
 from anthropic.types.beta import BetaToolComputerUse20241022Param
-from .base import BaseAnthropicTool, ToolError, ToolResult
-from .run import run
+from computer_use_ootb_internal.computer_use_demo.tools.base import BaseAnthropicTool, ToolError, ToolResult
+from computer_use_ootb_internal.computer_use_demo.tools.run import run
+from computer_use_ootb_internal.computer_use_demo.tools.computer_marbot import MarbotAutoGUI
+from computer_use_ootb_internal.computer_use_demo.animation.click_animation import show_click, show_move_to
 OUTPUT_DIR = "./tmp/outputs"
@@ -195,7 +199,6 @@ class ComputerTool(BaseAnthropicTool):
         self.offset_y = screen['y'] if system == "Darwin" else screen.y
         self.bbox = bbox
-        from .computer_marbot import MarbotAutoGUI
         self.marbot_auto_gui = MarbotAutoGUI()
@@ -219,7 +222,6 @@ class ComputerTool(BaseAnthropicTool):
                 raise ToolError(f"text is not accepted for {action}")
             if not isinstance(coordinate, (list, tuple)) or len(coordinate) != 2:
                 raise ToolError(f"{coordinate} must be a tuple of length 2")
-            # if not all(isinstance(i, int) and i >= 0 for i in coordinate):
             if not all(isinstance(i, int) for i in coordinate):
                 raise ToolError(f"{coordinate} must be a tuple of non-negative ints")
@@ -233,8 +235,6 @@ class ComputerTool(BaseAnthropicTool):
             x += self.offset_x
             y += self.offset_y
-            print(f"mouse move to {x}, {y}")
             if action == "mouse_move":
                 pyautogui.moveTo(x, y)
                 return ToolResult(output=f"Moved mouse to ({x}, {y})")
@@ -354,13 +354,25 @@ class ComputerTool(BaseAnthropicTool):
         if action in ("left_click_windll", "mouse_move_windll", "right_click_windll", "key_down_windll", "key_up_windll"):
             if action == "left_click_windll":
                 if coordinate is None:
+                    x, y = pyautogui.position()
+                    x, y = self.scale_coordinates(ScalingSource.COMPUTER, x, y)
+                    show_click(x, y)
                     self.marbot_auto_gui.click()
                 else:
-                    self.marbot_auto_gui.click(x=coordinate[0], y=coordinate[1])
+                    x = coordinate[0]+self.offset_x
+                    y = coordinate[1]+self.offset_y
+                    self.marbot_auto_gui.click(x=x, y=y)
+                    show_click(x, y)
             elif action == "mouse_move_windll":
                 if coordinate is None:
                     raise ToolError(f"coordinate is required for {action}")
-                self.marbot_auto_gui.moveTo(x=coordinate[0], y=coordinate[1])
+                x1 = coordinate[0]+self.offset_x
+                y1 = coordinate[1]+self.offset_y
+                self.marbot_auto_gui.moveTo(x=x1, y=y1)
+                x0, y0 = pyautogui.position()
+                x0, y0 = self.scale_coordinates(ScalingSource.COMPUTER, x0, y0)
+                show_move_to(x0, y0, x1, y1, duration_ms=2000)
             # elif action == "right_click_windll":
             #     self.marbot_auto_gui.rightClick(x=coordinate[0], y=coordinate[1])
             elif action == "key_down_windll":
@@ -594,3 +606,9 @@ class ComputerTool(BaseAnthropicTool):
         # For simplicity, return text as is
         # Implement mapping if special keys are needed
         return text
+if __name__ == "__main__":
+    computer = ComputerTool()
+    # test left_click_windll
+    asyncio.run(computer(action="left_click_windll", coordinate=(500, 500)))

{computer_use_ootb_internal-0.0.94.post4 → computer_use_ootb_internal-0.0.95}/src/computer_use_ootb_internal/computer_use_demo/tools/computer_marbot.py RENAMED Viewed

@@ -167,13 +167,13 @@ if __name__ == "__main__":
     # 等待你切到目标窗口
     print("⌛ Waiting 10 seconds...")
-    sleep(10)
+    sleep(5)
     print("🚀 Start action sequence")
     # 设置目标位置
     target_x = 3061
-    target_y = 268
+    target_y = 666
     # 按住 Alt 键
     bot.keyDown('alt')