PyPI - computer-use-ootb-internal - Versions diffs - 0.0.179__py3-none-any.whl → 0.0.180__py3-none-any.whl - Mend

computer-use-ootb-internal 0.0.179py3-none-any.whl → 0.0.180py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

computer_use_ootb_internal/preparation/word_prepare.py ADDED Viewed

@@ -0,0 +1,99 @@
+import os
+import platform
+import subprocess
+import logging
+from pathlib import Path
+import time
+log = logging.getLogger(__name__)
+def run_preparation(state):
+    """
+    Performs environment preparation specific to Word on Windows.
+    Opens a specific template file located on the user's desktop and maximizes the window.
+    Kills existing Word processes first.
+    """
+    if platform.system() != "Windows":
+        log.warning("Word preparation skipped: Not running on Windows.")
+        return
+    log.info(f"Word preparation: Starting on Windows platform...")
+    try:
+        # Determine the desktop path for Windows
+        try:
+            username = os.environ.get("USERNAME", "")
+            if not username:
+                log.error("Could not determine Windows username from environment")
+                return
+            log.info(f"Using Windows username: {username}")
+            desktop_path = Path(f"C:/Users/{username}/Desktop")
+            if not desktop_path.exists():
+                log.error(f"Desktop path not found at: {desktop_path}")
+                alt_path = Path(f"C:/Documents and Settings/{username}/Desktop")
+                if alt_path.exists():
+                    desktop_path = alt_path
+                    log.info(f"Using alternative desktop path: {desktop_path}")
+                else:
+                    log.error("Failed to find user's desktop directory")
+                    return
+        except Exception as e:
+            log.error(f"Error determining Windows user desktop: {e}", exc_info=True)
+            return
+        # Construct path to template file
+        template_file = desktop_path / "template.docx" # Changed extension
+        log.info(f"Looking for template file at: {template_file}")
+        if not template_file.exists():
+            log.error(f"Template file not found at: {template_file}")
+            return
+        # --- Kill existing Word processes ---
+        log.info("Attempting to close existing Microsoft Word processes...")
+        try:
+            # Command to forcefully terminate Word processes by image name
+            kill_cmd = ['taskkill', '/F', '/IM', 'WINWORD.EXE'] # Changed process name
+            kill_result = subprocess.run(kill_cmd,
+                                       capture_output=True, text=True, check=False)
+            # Check taskkill result
+            if kill_result.returncode == 0:
+                log.info("Successfully sent termination signal to WINWORD.EXE processes.")
+            elif "not found" in kill_result.stderr.lower() or "not found" in kill_result.stdout.lower():
+                 log.info("No running WINWORD.EXE processes found to close.")
+            else:
+                 log.warning(f"taskkill command finished with return code {kill_result.returncode}. Output: {kill_result.stdout} Stderr: {kill_result.stderr}")
+            time.sleep(2)
+        except FileNotFoundError:
+            log.error("Error: 'taskkill' command not found. Make sure it's in the system PATH.")
+        except Exception as e:
+            log.error(f"Error occurred while trying to close Word: {e}", exc_info=True)
+        # --- End of kill process ---
+        # Open the file with Word maximized on Windows
+        log.info(f"Attempting to open {template_file} with Word maximized on Windows...")
+        try:
+            # Use start command with /max flag on Windows
+            cmd = ['cmd', '/c', 'start', '/max', 'winword', str(template_file)] # Changed app name
+            # Temporarily commented out file opening
+            # result = subprocess.run(cmd, check=False, capture_output=True, text=True)
+            log.info(f"(Skipped) Would open file with command: {' '.join(cmd)}")
+            result = None # Set result to None or mock success if needed elsewhere
+            # if result and result.returncode == 0:
+            #     log.info(f"Successfully launched Word maximized with {template_file}")
+            # else:
+            #     log.error(f"Error opening Word: {result.stderr.strip() if result else 'Command not run'}")
+            #     if result and result.stdout:
+            #         log.error(f"Stdout from start command: {result.stdout.strip()}")
+        except FileNotFoundError:
+             log.error("Error: 'cmd' or 'start' command not found. Ensure system PATH is configured correctly.")
+        except Exception as e:
+            log.error(f"Exception opening Word on Windows: {e}", exc_info=True)
+    except Exception as e:
+        log.error(f"An unexpected error occurred during Word preparation: {e}", exc_info=True)

computer_use_ootb_internal/run_teachmode_ootb_args.py CHANGED Viewed

@@ -1,237 +1,237 @@
-import argparse
-import time
-import json
-import platform
-import uuid
-import base64
-import datetime
-from datetime import datetime, timedelta, timezone
-from computer_use_ootb_internal.computer_use_demo.executor.teachmode_executor import TeachmodeExecutor
-from computer_use_ootb_internal.computer_use_demo.gui_agent.llm_utils.llm_utils import is_image_path
-from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.simple_parser.utils import get_screen_resize_factor
-from computer_use_ootb_internal.computer_use_demo.tools.aws_request import send_request_to_server
-from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.uia_tools.screenshot_service import get_screenshot_external_cmd
-utc_plus_8 = timezone(timedelta(hours=8))
-def simple_teachmode_sampling_loop(
-    model: str,
-    task: str,
-    api_keys: dict = None,
-    action_history: list[dict] = None,
-    selected_screen: int = 0,
-    user_id: str = None,
-    trace_id: str = None,
-    server_url: str = "http://localhost:5000/generate_action",
-    max_steps: int = 20,
-    full_screen_game_mode: int = 0,  # 0: disabled, 1: starrail, 2: starrail browser
-):
-    """
-    Synchronous sampling loop for assistant/tool interactions in 'teach mode'.
-    """
-    # Initialize action_history if it's None
-    if action_history is None:
-        action_history = []
-    # if platform.system() != "Windows":
-    #     raise ValueError("Teach mode is only supported on Windows.")
-    # # Set StarRail mode based on input parameter
-    # # 0: disabled, 1: starrail, 2: starrail browser
-    # full_screen_game_mode = 0
-    # # TODO: set full_screen_game_mode adaptively
-    # if "star_rail" in user_id or "star_rail" in user_id:
-    #     full_screen_game_mode = 1
-    # if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in user_id or "official" in user_id:
-    #     full_screen_game_mode = 2
-    print(f"Full Screen Game Mode: {full_screen_game_mode}")
-    executor = TeachmodeExecutor(
-        selected_screen=selected_screen,
-        full_screen_game_mode=full_screen_game_mode,
-    )
-    timestamp = datetime.now(utc_plus_8).strftime("%m%d-%H%M%S")
-    step_count = 1
-    unique_task_id = f"{timestamp}_uid_{user_id}_tid_{trace_id}_{str(uuid.uuid4())[:6]}"
-    print("[simple_teachmode_sampling_loop] starting task: ", task)
-    print(f"[simple_teachmode_sampling_loop] unique_task_id: {unique_task_id}")
-    while step_count < max_steps:
-        print(f"step_count: {step_count}")
-        # Pause briefly so we don't spam screenshots
-        time.sleep(1)
-        uia_meta, sc_path = get_screenshot_external_cmd(
-            selected_screen=selected_screen,
-            capture_uia_data=full_screen_game_mode==0
-        )
-        # yield {"role": "assistant", "content": "screenshot", "type": "action", "action_type": "screenshot"}
-        if is_image_path(sc_path):
-            # yield {"role": "assistant", "content": sc_path, "type": "image", "action_type": "screenshot"}
-            with open(sc_path, "rb") as image_file:
-                sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
-            yield {"role": "assistant", "content": sc_base64, "type": "image_base64", "action_type": "screenshot"}
-        payload = {
-            "task_id": unique_task_id,
-            "uia_data": uia_meta,
-            "screenshot_path": sc_path,
-            "query": task,
-            "action_history": action_history,
-            "mode": "teach",
-            "user_id": user_id,
-            "trace_id": trace_id,
-            "scale_factor": get_screen_resize_factor(),
-            "os_name": platform.system(),
-            "api_keys": api_keys,
-        }
-        # Send request to Marbot Run server
-        infer_server_response = send_request_to_server(payload, server_url)
-        # infer_server_response = {
-        #     'status': 'success',
-        #     'generated_plan': plan_details,
-        #     'generated_action': action,
-        #     'todo_md': todo_md_content,
-        #     'milestones': milestones,
-        #     'current_step': current_step,
-        # }
-        if infer_server_response is None:
-            print("No response from Marbot Run server. Exiting.")
-            yield {"role": "assistant", "content": "No response from Marbot Run server. Exiting.", "type": "error"}
-            action_history = []
-            break
-        try:
-            step_plan = infer_server_response["generated_plan"]
-            step_plan_observation = step_plan["observation"]
-            step_plan_reasoning = step_plan["reasoning"]
-            step_plan_info = step_plan["step_info"]
-            step_action = infer_server_response["generated_action"]["content"]
-            step_traj_idx = infer_server_response["current_traj_step"]
-            # chat_visable_content = f"{step_plan_observation}{step_plan_reasoning}"
-        except Exception as e:
-            print("Error parsing generated_action content:", e)
-            yield {"role": "assistant", "content": "Error parsing response from Marbot Run server. Exiting.", "type": "error"}
-            break
-        yield {"role": "assistant", "content": step_plan_observation, "type": "text"}
-        yield {"role": "assistant", "content": step_plan_reasoning, "type": "text"}
-        if step_action.get("action") == "STOP":
-            final_sc, final_sc_path = get_screenshot_external_cmd(selected_screen=selected_screen)
-            with open(final_sc_path, "rb") as image_file:
-                final_sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
-            yield {"role": "assistant", "content": "Task completed. Final screenshot:", "type": "text"}
-            yield {"role": "assistant", "content": final_sc_base64, "type": "image_base64", "action_type": "screenshot"}
-            # reset action history
-            action_history = []
-            break
-        action_history.append(f"Executing guidance trajectory step [{step_traj_idx}]: {{Plan: {step_plan_info}, Action: {step_action}}}\n")
-        for exec_message in executor({"role": "assistant", "content": step_action}):
-            yield exec_message
-        step_count += 1
-    # reset action history
-    action_history = []
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
-    )
-    parser.add_argument(
-        "--model",
-        default="teach-mode",
-        help="The model to use",
-    )
-    parser.add_argument(
-        "--task",
-        default="Click on the Google Chorme icon",
-        help="The task to be completed by the assistant (e.g., 'Complete some data extraction.').",
-    )
-    parser.add_argument(
-        "--selected_screen",
-        type=int,
-        default=0,
-        help="Index of the screen to capture (default=0).",
-    )
-    parser.add_argument(
-        "--user_id",
-        default="star_rail",
-        help="User ID for the session (default='liziqi').",
-    )
-    parser.add_argument(
-        "--trace_id",
-        default="ONG_JING_JIE_007-0213_0",
-        help="Trace ID for the session (default='default_trace').",
-    )
-    parser.add_argument(
-        "--api_key_file",
-        default="api_key.json",
-        help="Path to the JSON file containing API keys (default='api_key.json').",
-    )
-    parser.add_argument(
-        "--max_steps",
-        type=int,
-        default=20,
-        help="The maximum number of steps to take.",
-    )
-    parser.add_argument(
-        "--full_screen_game_mode",
-        type=int,
-        default=0,
-        help="Full screen game mode (0: disabled, 1: starrail, 2: starrail browser)",
-    )
-    args = parser.parse_args()
-    # # Load API keys
-    # with open(args.api_key_file, "r") as file:
-    #     api_keys = json.load(file)
-    api_keys = None
-    print(f"Starting task: {args.task}")
-    # Execute the sampling loop
-    sampling_loop = simple_teachmode_sampling_loop(
-        model=args.model,
-        task=args.task,
-        selected_screen=args.selected_screen,
-        user_id=args.user_id,
-        trace_id=args.trace_id,
-        api_keys=api_keys,
-        max_steps=args.max_steps,
-        full_screen_game_mode=args.full_screen_game_mode,
-    )
-    # # Print each step result
-    for step in sampling_loop:
-        print(step)
-        time.sleep(1)
-    print(f"Task '{args.task}' completed. Thanks for using Teachmode-OOTB.")
+import argparse
+import time
+import json
+import platform
+import uuid
+import base64
+import datetime
+from datetime import datetime, timedelta, timezone
+from computer_use_ootb_internal.computer_use_demo.executor.teachmode_executor import TeachmodeExecutor
+from computer_use_ootb_internal.computer_use_demo.gui_agent.llm_utils.llm_utils import is_image_path
+from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.simple_parser.utils import get_screen_resize_factor
+from computer_use_ootb_internal.computer_use_demo.tools.aws_request import send_request_to_server
+from computer_use_ootb_internal.computer_use_demo.gui_agent.gui_parser.uia_tools.screenshot_service import get_screenshot_external_cmd
+utc_plus_8 = timezone(timedelta(hours=8))
+def simple_teachmode_sampling_loop(
+    model: str,
+    task: str,
+    api_keys: dict = None,
+    action_history: list[dict] = None,
+    selected_screen: int = 0,
+    user_id: str = None,
+    trace_id: str = None,
+    server_url: str = "http://localhost:5000/generate_action",
+    max_steps: int = 20,
+    full_screen_game_mode: int = 0,  # 0: disabled, 1: starrail, 2: starrail browser
+):
+    """
+    Synchronous sampling loop for assistant/tool interactions in 'teach mode'.
+    """
+    # Initialize action_history if it's None
+    if action_history is None:
+        action_history = []
+    # if platform.system() != "Windows":
+    #     raise ValueError("Teach mode is only supported on Windows.")
+    # # Set StarRail mode based on input parameter
+    # # 0: disabled, 1: starrail, 2: starrail browser
+    # full_screen_game_mode = 0
+    # # TODO: set full_screen_game_mode adaptively
+    # if "star_rail" in user_id or "star_rail" in user_id:
+    #     full_screen_game_mode = 1
+    # if "star_rail_dev" in trace_id or "star_rail_dev" in user_id or "hero_case" in user_id or "official" in user_id:
+    #     full_screen_game_mode = 2
+    print(f"Full Screen Game Mode: {full_screen_game_mode}")
+    executor = TeachmodeExecutor(
+        selected_screen=selected_screen,
+        full_screen_game_mode=full_screen_game_mode,
+    )
+    timestamp = datetime.now(utc_plus_8).strftime("%m%d-%H%M%S")
+    step_count = 1
+    unique_task_id = f"{timestamp}_uid_{user_id}_tid_{trace_id}_{str(uuid.uuid4())[:6]}"
+    print("[simple_teachmode_sampling_loop] starting task: ", task)
+    print(f"[simple_teachmode_sampling_loop] unique_task_id: {unique_task_id}")
+    while step_count < max_steps:
+        print(f"step_count: {step_count}")
+        # Pause briefly so we don't spam screenshots
+        time.sleep(1)
+        uia_meta, sc_path = get_screenshot_external_cmd(
+            selected_screen=selected_screen,
+            capture_uia_data=full_screen_game_mode==0
+        )
+        # yield {"role": "assistant", "content": "screenshot", "type": "action", "action_type": "screenshot"}
+        if is_image_path(sc_path):
+            # yield {"role": "assistant", "content": sc_path, "type": "image", "action_type": "screenshot"}
+            with open(sc_path, "rb") as image_file:
+                sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
+            yield {"role": "assistant", "content": sc_base64, "type": "image_base64", "action_type": "screenshot"}
+        payload = {
+            "task_id": unique_task_id,
+            "uia_data": uia_meta,
+            "screenshot_path": sc_path,
+            "query": task,
+            "action_history": action_history,
+            "mode": "teach",
+            "user_id": user_id,
+            "trace_id": trace_id,
+            "scale_factor": get_screen_resize_factor(),
+            "os_name": platform.system(),
+            "api_keys": api_keys,
+        }
+        # Send request to Marbot Run server
+        infer_server_response = send_request_to_server(payload, server_url)
+        # infer_server_response = {
+        #     'status': 'success',
+        #     'generated_plan': plan_details,
+        #     'generated_action': action,
+        #     'todo_md': todo_md_content,
+        #     'milestones': milestones,
+        #     'current_step': current_step,
+        # }
+        if infer_server_response is None:
+            print("No response from Marbot Run server. Exiting.")
+            yield {"role": "assistant", "content": "No response from Marbot Run server. Exiting.", "type": "error"}
+            action_history = []
+            break
+        try:
+            step_plan = infer_server_response["generated_plan"]
+            step_plan_observation = step_plan["observation"]
+            step_plan_reasoning = step_plan["reasoning"]
+            step_plan_info = step_plan["step_info"]
+            step_action = infer_server_response["generated_action"]["content"]
+            step_traj_idx = infer_server_response["current_traj_step"]
+            # chat_visable_content = f"{step_plan_observation}{step_plan_reasoning}"
+        except Exception as e:
+            print("Error parsing generated_action content:", e)
+            yield {"role": "assistant", "content": "Error parsing response from Marbot Run server. Exiting.", "type": "error"}
+            break
+        yield {"role": "assistant", "content": step_plan_observation, "type": "text"}
+        yield {"role": "assistant", "content": step_plan_reasoning, "type": "text"}
+        if step_action.get("action") == "STOP":
+            final_sc, final_sc_path = get_screenshot_external_cmd(selected_screen=selected_screen)
+            with open(final_sc_path, "rb") as image_file:
+                final_sc_base64 = base64.b64encode(image_file.read()).decode('utf-8')
+            yield {"role": "assistant", "content": "Task completed. Final screenshot:", "type": "text"}
+            yield {"role": "assistant", "content": final_sc_base64, "type": "image_base64", "action_type": "screenshot"}
+            # reset action history
+            action_history = []
+            break
+        action_history.append(f"Executing guidance trajectory step [{step_traj_idx}]: {{Plan: {step_plan_info}, Action: {step_action}}}\n")
+        for exec_message in executor({"role": "assistant", "content": step_action}):
+            yield exec_message
+        step_count += 1
+    # reset action history
+    action_history = []
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run a synchronous sampling loop for assistant/tool interactions in teach-mode."
+    )
+    parser.add_argument(
+        "--model",
+        default="teach-mode",
+        help="The model to use",
+    )
+    parser.add_argument(
+        "--task",
+        default="Click on the Google Chorme icon",
+        help="The task to be completed by the assistant (e.g., 'Complete some data extraction.').",
+    )
+    parser.add_argument(
+        "--selected_screen",
+        type=int,
+        default=0,
+        help="Index of the screen to capture (default=0).",
+    )
+    parser.add_argument(
+        "--user_id",
+        default="star_rail",
+        help="User ID for the session (default='liziqi').",
+    )
+    parser.add_argument(
+        "--trace_id",
+        default="ONG_JING_JIE_007-0213_0",
+        help="Trace ID for the session (default='default_trace').",
+    )
+    parser.add_argument(
+        "--api_key_file",
+        default="api_key.json",
+        help="Path to the JSON file containing API keys (default='api_key.json').",
+    )
+    parser.add_argument(
+        "--max_steps",
+        type=int,
+        default=20,
+        help="The maximum number of steps to take.",
+    )
+    parser.add_argument(
+        "--full_screen_game_mode",
+        type=int,
+        default=0,
+        help="Full screen game mode (0: disabled, 1: starrail, 2: starrail browser)",
+    )
+    args = parser.parse_args()
+    # # Load API keys
+    # with open(args.api_key_file, "r") as file:
+    #     api_keys = json.load(file)
+    api_keys = None
+    print(f"Starting task: {args.task}")
+    # Execute the sampling loop
+    sampling_loop = simple_teachmode_sampling_loop(
+        model=args.model,
+        task=args.task,
+        selected_screen=args.selected_screen,
+        user_id=args.user_id,
+        trace_id=args.trace_id,
+        api_keys=api_keys,
+        max_steps=args.max_steps,
+        full_screen_game_mode=args.full_screen_game_mode,
+    )
+    # # Print each step result
+    for step in sampling_loop:
+        print(step)
+        time.sleep(1)
+    print(f"Task '{args.task}' completed. Thanks for using Teachmode-OOTB.")

computer-use-ootb-internal 0.0.179__py3-none-any.whl → 0.0.180__py3-none-any.whl

computer-use-ootb-internal 0.0.179py3-none-any.whl → 0.0.180py3-none-any.whl