PyPI - clerk-sdk - Versions diffs - 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

clerk-sdk 0.1.8py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

clerk/base.py +94 -0
clerk/client.py +3 -104
clerk/decorator/models.py +1 -0
clerk/decorator/task_decorator.py +4 -1
clerk/gui_automation/__init__.py +0 -0
clerk/gui_automation/action_model/__init__.py +0 -0
clerk/gui_automation/action_model/model.py +126 -0
clerk/gui_automation/action_model/utils.py +26 -0
clerk/gui_automation/client.py +144 -0
clerk/gui_automation/client_actor/__init__.py +4 -0
clerk/gui_automation/client_actor/client_actor.py +178 -0
clerk/gui_automation/client_actor/exception.py +22 -0
clerk/gui_automation/client_actor/model.py +192 -0
clerk/gui_automation/decorators/__init__.py +1 -0
clerk/gui_automation/decorators/gui_automation.py +109 -0
clerk/gui_automation/exceptions/__init__.py +0 -0
clerk/gui_automation/exceptions/modality/__init__.py +0 -0
clerk/gui_automation/exceptions/modality/exc.py +46 -0
clerk/gui_automation/exceptions/websocket.py +6 -0
clerk/gui_automation/ui_actions/__init__.py +1 -0
clerk/gui_automation/ui_actions/actions.py +781 -0
clerk/gui_automation/ui_actions/base.py +200 -0
clerk/gui_automation/ui_actions/support.py +68 -0
clerk/gui_automation/ui_state_inspector/__init__.py +0 -0
clerk/gui_automation/ui_state_inspector/gui_vision.py +184 -0
clerk/gui_automation/ui_state_inspector/models.py +184 -0
clerk/gui_automation/ui_state_machine/__init__.py +11 -0
clerk/gui_automation/ui_state_machine/ai_recovery.py +110 -0
clerk/gui_automation/ui_state_machine/decorators.py +71 -0
clerk/gui_automation/ui_state_machine/exceptions.py +42 -0
clerk/gui_automation/ui_state_machine/models.py +40 -0
clerk/gui_automation/ui_state_machine/state_machine.py +838 -0
clerk/models/remote_device.py +7 -0
clerk/utils/__init__.py +0 -0
clerk/utils/logger.py +118 -0
clerk/utils/save_artifact.py +35 -0
{clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/METADATA +11 -1
clerk_sdk-0.2.0.dist-info/RECORD +48 -0
clerk_sdk-0.1.8.dist-info/RECORD +0 -15
{clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/WHEEL +0 -0
{clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/licenses/LICENSE +0 -0
{clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/top_level.txt +0 -0

clerk/gui_automation/client_actor/client_actor.py ADDED Viewed

@@ -0,0 +1,178 @@
+import asyncio
+import json
+import os
+from typing import Any, Dict, Union
+import pydantic
+import requests
+from .model import (
+    ExecutePayload,
+    DeleteFilesExecutePayload,
+    ApplicationExecutePayload,
+    SaveFilesExecutePayload,
+    WindowExecutePayload,
+    GetFileExecutePayload,
+)
+import backoff
+from .model import PerformActionResponse, ActionStates
+from .exception import PerformActionException, GetScreenError
+async def _perform_action_ws(payload: Dict) -> PerformActionResponse:
+    """Perform an action over a WebSocket connection.
+    Args:
+        payload (Dict): The payload request to be sent.
+    Returns:
+        PerformActionResponse: The response of performing the action.
+    Raises:
+        RuntimeError: If the ACK message is not received within the specified timeout.
+    """
+    from ..decorators.gui_automation import global_ws
+    # 1. Send the payload request
+    if global_ws:
+        await global_ws.send(json.dumps(payload))
+        # 2. wait for ack message
+        try:
+            ack = await asyncio.wait_for(global_ws.recv(), 90)
+            if ack == "OK":
+                action_info = await asyncio.wait_for(global_ws.recv(), 90)
+                return PerformActionResponse(**json.loads(action_info))
+            else:
+                raise RuntimeError("Received ACK != OK")
+        except asyncio.TimeoutError:
+            raise RuntimeError("The ack message did not arrive.")
+    else:
+        raise RuntimeError("The Websocket has not been initiated.")
+async def _get_screen_async() -> str:
+    """
+    Asynchronously retrieves a screen using a WebSocket connection.
+    Returns:
+        str: The base64 encoded screen image.
+    Note:
+        This function sends a request to perform a screenshot action over a WebSocket connection
+        and returns the base64 encoded image of the screen captured.
+    """
+    payload = {
+        "proc_inst_id": os.getenv("PROC_ID"),
+        "client_name": os.getenv("REMOTE_DEVICE_NAME"),
+        "headless": (
+            True if os.getenv("HEADLESS", "True").lower() == "true" else False
+        ),
+        "action": {"action_type": "screenshot"},
+    }
+    try:
+        action_info = await _perform_action_ws(payload)
+    except Exception as e:
+        if str(e) in (
+            "The ack message did not arrive.",
+            "Received ACK != OK",
+        ):
+            raise GetScreenError("The ack message did not arrive.")
+        raise  # else raise the error
+    if action_info.screen_b64 is not None:
+        return action_info.screen_b64
+    raise GetScreenError()
+@backoff.on_exception(
+    backoff.expo,
+    (requests.RequestException, pydantic.ValidationError, GetScreenError),
+    max_time=120,
+)
+def get_screen() -> str:
+    """
+    Request the VDI screen and return the base64 representation of the screenshot.
+    Returns:
+        str: The base64 representation of the screenshot.
+    Raises:
+        RuntimeError: If the request to the VDI screen fails.
+    """
+    loop = asyncio.get_event_loop()
+    # asyncio.set_event_loop(loop)
+    task = loop.create_task(_get_screen_async())
+    res = loop.run_until_complete(task)
+    return res
+async def _perform_action_async(
+    payload: Union[
+        ExecutePayload,
+        ApplicationExecutePayload,
+        WindowExecutePayload,
+        SaveFilesExecutePayload,
+        DeleteFilesExecutePayload,
+        GetFileExecutePayload,
+    ],
+) -> Any:
+    """
+    Perform an asynchronous action based on the provided payload.
+    Args:
+        payload (Union[ExecutePayload, ApplicationExecutePayload, WindowExecutePayload, SaveFilesExecutePayload, DeleteFilesExecutePayload, GetFileExecutePayload]): The payload containing information about the action to be performed.
+    Returns:
+        Any: The return value of the action.
+    Raises:
+        PerformActionException: If the action fails with an error message.
+    """
+    req_payload: Dict = {
+        "proc_inst_id": os.getenv("PROC_ID"),
+        "client_name": os.getenv("REMOTE_DEVICE_NAME"),
+        "headless": (
+            True if os.getenv("HEADLESS", "True").lower() == "true" else False
+        ),
+        "action": payload.model_dump(),
+    }
+    action_info = await _perform_action_ws(req_payload)
+    if action_info.state == ActionStates.failed:
+        raise PerformActionException(action_info.message)
+    return action_info.return_value
+def perform_action(
+    payload: Union[
+        ExecutePayload,
+        ApplicationExecutePayload,
+        WindowExecutePayload,
+        SaveFilesExecutePayload,
+        DeleteFilesExecutePayload,
+        GetFileExecutePayload,
+    ],
+) -> Any:
+    """
+    Perform an action on the VDI client.
+    Args:
+        payload (Union[ExecutePayload, ApplicationExecutePayload, WindowExecutePayload]): The payload containing the details of the action to be performed.
+    Raises:
+        PerformActionException: If the action fails.
+        RuntimeError: If the request to perform the action fails.
+    Returns:
+        Any
+    """
+    loop = asyncio.get_event_loop()
+    task = loop.create_task(_perform_action_async(payload))
+    res = loop.run_until_complete(task)
+    return res

clerk/gui_automation/client_actor/exception.py ADDED Viewed

@@ -0,0 +1,22 @@
+class PerformActionException(Exception):
+    """
+    A custom exception class for handling errors related to performing actions.
+    """
+    pass
+class GetScreenError(Exception):
+    """
+    A custom exception class for handling errors related to getting the screen.
+    """
+    pass
+class WebSocketConnectionFailed(Exception):
+    """
+    Connection to websocket was not successful
+    """
+    pass

clerk/gui_automation/client_actor/model.py ADDED Viewed

@@ -0,0 +1,192 @@
+from typing import Any, List, Literal, Optional, Union
+from pydantic import BaseModel, Field
+from enum import Enum
+ActionTypes = Literal[
+    "left_click",
+    "right_click",
+    "middle_click",
+    "double_click",
+    "send_keys",
+    "press_keys",
+    "hot_keys",
+    "paste_text",
+    "get_text",
+    "scroll",
+]
+class ActionStates(Enum):
+    """
+    Enumeration class representing the possible states of an action.
+    Attributes:
+        completed (str): Represents a completed action state.
+        failed (str): Represents a failed action state.
+    """
+    completed = "COMPLETED"
+    failed = "FAILED"
+class ExecutePayload(BaseModel):
+    """
+    A class representing the payload for executing various actions.
+    Attributes:
+        action_type (Literal[str]): The type of action to be performed. It can be one of the following:
+            - "left_click": Perform a left click action.
+            - "right_click": Perform a right click action.
+            - "middle_click": Perform a middle click action.
+            - "double_click": Perform a double click action.
+            - "send_keys": Send a sequence of keys.
+            - "press_keys": Press and hold a sequence of keys.
+            - "hot_keys": Perform a combination of hot keys.
+        coordinates (List[int]): The coordinates of the action. Default is an empty list.
+        keys (Optional[str]): The keys to be sent or pressed. Default is None.
+        interval (float): The interval between each action. Default is 0.05 seconds.
+    """
+    action_type: ActionTypes
+    coordinates: Union[List[int], List[float]] = Field(default=[])
+    keys: Optional[Union[str, List[str]]] = Field(default=None)
+    key_separator: Optional[str] = Field(default=None)
+    followed_by: Optional[str] = Field(default=None)
+    interval: float = Field(default=0.05)
+    clicks: Optional[int] = None
+class WindowExecutePayload(BaseModel):
+    """
+    A class representing the payload for executing window-related actions.
+    Attributes:
+        action_type (Literal[str]): The type of window action to be performed. It can be one of the following:
+            - "maximize_window": Maximize the window.
+            - "minimize_window": Minimize the window.
+            - "close_window": Close the window.
+            - "activate_window": Activate the window.
+        window_name (str): The name of the window on which the action should be performed.
+        timeout (int): The timeout value in seconds for the action to complete. Default is 10 seconds.
+    """
+    action_type: Literal[
+        "maximize_window",
+        "minimize_window",
+        "close_window",
+        "activate_window",
+    ]
+    window_name: str
+    timeout: int = Field(default=10)
+class ApplicationExecutePayload(BaseModel):
+    """
+    A class representing the payload for executing an application-related action.
+    Attributes:
+        action_type (Literal[str]): The type of application action to be performed. It can only be "open_app".
+        app_path (str): The absolute path of the application to be opened.
+        app_window_name (str): The name of the application window once it is open. Wildcard logic is enabled.
+        timeout (int): The timeout value in seconds for the action to complete. Default is 60 seconds.
+        process_name (str): Process name from task manager. Example: process.exe
+    """
+    action_type: Literal["open_app", "force_close_app"]
+    app_path: str = Field(description="Absolute path of the application", default="")
+    app_window_name: str = Field(
+        description="Name of the application window once open. Wildcard logic enabled.",
+        default="",
+    )
+    timeout: int = Field(default=60)
+    process_name: str = Field(
+        description="Process name from task manager. Example: process.exe", default=""
+    )
+class FileDetails(BaseModel):
+    """
+    A class representing the details of a file.
+    Attributes:
+        filename (str): The filename of the file.
+        value (str): The base64 string representation of the binary file.
+    """
+    filename: str = Field(description="Filename of the file")
+    value: str = Field(description="Base64 string representation of the binary file")
+class SaveFilesExecutePayload(BaseModel):
+    """
+    A class representing the payload for saving files.
+    Attributes:
+        action_type (Literal["save_files"]): The action type indicating the payload is for saving files.
+        save_location (str): The location where the files will be saved.
+        files (List[FileDetails]): A list of FileDetails objects representing the files to be saved.
+    """
+    action_type: Literal["save_files"]
+    save_location: str
+    files: List[FileDetails]
+class DeleteFilesExecutePayload(BaseModel):
+    """
+    A class representing the payload for deleting files.
+    Attributes:
+        action_type (Literal["delete_files"]): The action type indicating the payload is for deleting files.
+        files_location (List[str]): A list of file locations representing the files to be deleted.
+    """
+    action_type: Literal["delete_files"]
+    files_location: List[str]
+class GetFileExecutePayload(BaseModel):
+    """
+    A class representing the payload for executing a 'get_file' action.
+    Attributes:
+        action_type: Literal["get_file"] - Specifies the action type as 'get_file'.
+        file_location: str - The location of the file to retrieve.
+    """
+    action_type: Literal["get_file"]
+    file_location: str
+class GetScreenResponse(BaseModel):
+    """
+    A class representing the response for getting a screen.
+    Attributes:
+        screen_b64 (str): The base64 encoded string representing the screen image.
+    """
+    screen_b64: str
+class PerformActionResponse(BaseModel):
+    """
+    A class representing the response of performing an action.
+    Attributes:
+        id (str): The ID of the action.
+        state (ActionStates): The state of the action.
+        message (Optional[str]): An optional message associated with the action.
+        return_value (Optional[Any]): A value that the action could return.
+    """
+    id: Optional[str] = None
+    state: ActionStates
+    message: Optional[str] = None
+    return_value: Optional[Any] = None
+    screen_b64: Optional[str] = None
+class AllocateTargetResponse(BaseModel):
+    client: str

clerk/gui_automation/decorators/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .gui_automation import gui_automation

clerk/gui_automation/decorators/gui_automation.py ADDED Viewed

@@ -0,0 +1,109 @@
+import asyncio
+import functools
+import logging
+import os
+from typing import Callable, Union
+from websockets.asyncio.client import connect, ClientConnection
+from websockets.protocol import State
+from clerk.gui_automation.client import RPAClerk
+from clerk.models.remote_device import RemoteDevice
+from clerk.decorator.models import ClerkCodePayload
+from ..exceptions.websocket import WebSocketConnectionFailed
+# Global handle to the live connection (if any)
+global_ws: Union[ClientConnection, None] = None
+clerk_client = RPAClerk()
+wss_uri = "wss://agent-manager.f-one.group/action"
+def _allocate_remote_device(
+    clerk_client: RPAClerk, group_name: str, run_id: str
+) -> RemoteDevice:
+    remote_device = clerk_client.allocate_remote_device(
+        group_name=group_name, run_id=run_id
+    )
+    os.environ["REMOTE_DEVICE_ID"] = remote_device.id
+    os.environ["REMOTE_DEVICE_NAME"] = remote_device.name
+    return remote_device
+def _deallocate_target(
+    clerk_client: RPAClerk, remote_device: RemoteDevice, run_id: str
+):
+    clerk_client.deallocate_remote_device(remote_device=remote_device, run_id=run_id)
+    os.environ.pop("REMOTE_DEVICE_ID", None)
+    os.environ.pop("REMOTE_DEVICE_NAME", None)
+def gui_automation():
+    """
+    Decorator that:
+      • Allocates a remote device,
+      • Opens a WebSocket to the agent manager,
+      • Passes control to the wrapped function,
+      • Cleans everything up afterwards.
+    """
+    group_name: str = os.getenv("REMOTE_DEVICE_GROUP")
+    if not group_name:
+        raise ValueError("REMOTE_DEVICE_GROUP environmental variable is required.")
+    async def connect_to_ws(uri: str) -> ClientConnection:
+        # Same knobs as before, just via the new connect()
+        return await connect(uri, max_size=2**23, ping_timeout=3600)
+    async def close_ws_connection(ws_conn: ClientConnection):
+        await ws_conn.close()
+    def decorator(func: Callable):
+        @functools.wraps(func)
+        def wrapper(payload: ClerkCodePayload, *args, **kwargs):
+            global global_ws
+            os.environ["PROC_ID"] = payload.run_id
+            remote_device = _allocate_remote_device(
+                clerk_client, group_name, payload.run_id
+            )
+            # Create a dedicated loop for the WebSocket work
+            event_loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(event_loop)
+            try:
+                task = event_loop.create_task(
+                    connect_to_ws(
+                        f"{wss_uri}/{remote_device.name}/publisher"
+                        f"?token={remote_device.wss_token}"
+                    )
+                )
+                global_ws = event_loop.run_until_complete(task)
+                if global_ws and global_ws.state is State.OPEN:
+                    logging.debug("WebSocket connection established.")
+                    func_ret = func(payload, *args, **kwargs)
+                else:
+                    global_ws = None
+                    raise WebSocketConnectionFailed()
+            except Exception as e:
+                os.environ.pop("PROC_ID", None)
+                raise
+            finally:
+                _deallocate_target(clerk_client, remote_device, payload.run_id)
+                if global_ws and global_ws.state is State.OPEN:
+                    close_task = event_loop.create_task(close_ws_connection(global_ws))
+                    event_loop.run_until_complete(close_task)
+                    print("WebSocket connection closed.")
+                event_loop.run_until_complete(event_loop.shutdown_asyncgens())
+                event_loop.close()
+            return func_ret
+        return wrapper
+    return decorator

clerk/gui_automation/exceptions/__init__.py ADDED Viewed

File without changes

clerk/gui_automation/exceptions/modality/__init__.py ADDED Viewed

File without changes

clerk/gui_automation/exceptions/modality/exc.py ADDED Viewed

@@ -0,0 +1,46 @@
+class ModalityNotKnownError(Exception):
+    """
+    This exception is raised when the modality of a target is not known or not supported.
+    Attributes:
+        message (str): The error message explaining the allowed modalities.
+    Example:
+        raise ModalityNotKnownError("The modality must be either 'text' or 'icon'")
+    """
+    def __init__(self, message: str = "allowed modalities are: `text` | `icon`"):
+        super().__init__(message)
+class AnchorTypeError(Exception):
+    """
+    This exception is raised when the anchor type is not valid or not supported.
+    Attributes:
+        message (str): The error message explaining the allowed anchor types.
+    Example:
+        raise AnchorTypeError("The anchor type must be either 'text' or 'image'")
+    """
+    def __init__(self, message: str):
+        super().__init__(message)
+class TargetModalityError(Exception):
+    """
+    This exception is raised when the modality of a target is not valid or not supported.
+    Attributes:
+        message (str): The error message explaining the allowed target modalities.
+    Example:
+        raise TargetModalityError("target must be provided as either text (str) | image (ImageB64) | image path (str) or skipped")
+    """
+    def __init__(
+        self,
+        message: str = "target must be provided as either text (str) | image (ImageB64) | image path (str) or skipped",
+    ):
+        super().__init__(message)

clerk/gui_automation/exceptions/websocket.py ADDED Viewed

@@ -0,0 +1,6 @@
+class WebSocketConnectionFailed(Exception):
+    """
+    Connection to websocket was not successful
+    """
+    pass

clerk/gui_automation/ui_actions/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .actions import *

clerk-sdk 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl

clerk-sdk 0.1.8py3-none-any.whl → 0.2.0py3-none-any.whl