PyPI - clerk-sdk - Versions diffs - 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

clerk-sdk 0.1.9py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

clerk/base.py +94 -0
clerk/client.py +3 -104
clerk/decorator/models.py +1 -0
clerk/decorator/task_decorator.py +1 -0
clerk/gui_automation/__init__.py +0 -0
clerk/gui_automation/action_model/__init__.py +0 -0
clerk/gui_automation/action_model/model.py +126 -0
clerk/gui_automation/action_model/utils.py +26 -0
clerk/gui_automation/client.py +144 -0
clerk/gui_automation/client_actor/__init__.py +4 -0
clerk/gui_automation/client_actor/client_actor.py +178 -0
clerk/gui_automation/client_actor/exception.py +22 -0
clerk/gui_automation/client_actor/model.py +192 -0
clerk/gui_automation/decorators/__init__.py +1 -0
clerk/gui_automation/decorators/gui_automation.py +109 -0
clerk/gui_automation/exceptions/__init__.py +0 -0
clerk/gui_automation/exceptions/modality/__init__.py +0 -0
clerk/gui_automation/exceptions/modality/exc.py +46 -0
clerk/gui_automation/exceptions/websocket.py +6 -0
clerk/gui_automation/ui_actions/__init__.py +1 -0
clerk/gui_automation/ui_actions/actions.py +781 -0
clerk/gui_automation/ui_actions/base.py +200 -0
clerk/gui_automation/ui_actions/support.py +68 -0
clerk/gui_automation/ui_state_inspector/__init__.py +0 -0
clerk/gui_automation/ui_state_inspector/gui_vision.py +184 -0
clerk/gui_automation/ui_state_inspector/models.py +184 -0
clerk/gui_automation/ui_state_machine/__init__.py +11 -0
clerk/gui_automation/ui_state_machine/ai_recovery.py +110 -0
clerk/gui_automation/ui_state_machine/decorators.py +71 -0
clerk/gui_automation/ui_state_machine/exceptions.py +42 -0
clerk/gui_automation/ui_state_machine/models.py +40 -0
clerk/gui_automation/ui_state_machine/state_machine.py +838 -0
clerk/models/remote_device.py +7 -0
clerk/utils/__init__.py +0 -0
clerk/utils/logger.py +118 -0
clerk/utils/save_artifact.py +35 -0
{clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/METADATA +11 -1
clerk_sdk-0.2.0.dist-info/RECORD +48 -0
clerk_sdk-0.1.9.dist-info/RECORD +0 -15
{clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/WHEEL +0 -0
{clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/licenses/LICENSE +0 -0
{clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/top_level.txt +0 -0

clerk/base.py ADDED Viewed

@@ -0,0 +1,94 @@
+import os
+import requests
+import backoff
+from typing import Dict, Optional, Self
+from pydantic import BaseModel, model_validator, Field
+from .models.response_model import StandardResponse
+def giveup_handler(e):
+    return (
+        isinstance(e, requests.exceptions.HTTPError)
+        and e.response is not None
+        and e.response.status_code < 500
+    )
+class BaseClerk(BaseModel):
+    api_key: Optional[str] = Field(default=None, min_length=1)
+    headers: Dict[str, str] = Field(default_factory=dict)
+    base_url: str = Field(
+        default_factory=lambda: os.getenv("CLERK_BASE_URL", "https://api.clerk-app.com")
+    )
+    root_endpoint: Optional[str] = None
+    @model_validator(mode="after")
+    def validate_api_key(self) -> Self:
+        if not self.api_key:
+            self.api_key = os.getenv("CLERK_API_KEY")
+        if not self.api_key:
+            raise ValueError("API key has not been provided.")
+        self.headers = {"Authorization": f"Bearer {self.api_key}"}
+        return self
+    @backoff.on_exception(
+        backoff.expo,
+        (requests.exceptions.RequestException,),
+        max_tries=3,
+        jitter=None,
+        # on_backoff=backoff_handler,
+        giveup=giveup_handler,
+    )
+    def get_request(
+        self,
+        endpoint: str,
+        headers: Dict[str, str] = {},
+        json: Dict = {},
+        params: Dict = {},
+    ) -> StandardResponse:
+        merged_headers = {**self.headers, **headers}
+        url = f"{self.base_url}{endpoint}"
+        if self.root_endpoint:
+            url = f"{self.base_url}{self.root_endpoint}{endpoint}"
+        # logger.info(f"GET {url} | params={params}")
+        response = requests.get(url, headers=merged_headers, json=json, params=params)
+        response.raise_for_status()
+        return StandardResponse(**response.json())
+    @backoff.on_exception(
+        backoff.expo,
+        (requests.exceptions.RequestException,),
+        max_tries=3,
+        jitter=None,
+        # on_backoff=backoff_handler,
+        giveup=giveup_handler,
+    )
+    def post_request(
+        self,
+        endpoint: str,
+        headers: Dict[str, str] = {},
+        json: Dict = {},
+        params: Dict = {},
+    ) -> StandardResponse:
+        merged_headers = {**self.headers, **headers}
+        url = f"{self.base_url}{endpoint}"
+        if self.root_endpoint:
+            url = f"{self.base_url}{self.root_endpoint}{endpoint}"
+        # logger.info(f"POST {url} | body={json} | params={params}")
+        response = requests.post(url, headers=merged_headers, json=json, params=params)
+        response.raise_for_status()
+        return StandardResponse(**response.json())

clerk/client.py CHANGED Viewed

@@ -1,112 +1,11 @@
-import os
-# import logging
-import requests
-import backoff
-from typing import Dict, List, Optional, Self
+from typing import List
 from xml.dom.minidom import Document
-from pydantic import BaseModel, model_validator, Field
+from clerk.base import BaseClerk
 from .models.file import ParsedFile
-from .models.response_model import StandardResponse
-# logger = logging.getLogger(__name__)
-# logger.setLevel(logging.INFO)
-# if not logger.handlers:
-#     handler = logging.StreamHandler()
-#     formatter = logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s")
-#     handler.setFormatter(formatter)
-#     logger.addHandler(handler)
-# def backoff_handler(details):
-#     logger.warning(
-#         f"Retrying {details['target'].__name__} after {details['tries']} tries..."
-#     )
-def giveup_handler(e):
-    return (
-        isinstance(e, requests.exceptions.HTTPError)
-        and e.response is not None
-        and e.response.status_code < 500
-    )
-class Clerk(BaseModel):
-    api_key: Optional[str] = Field(default=None, min_length=1)
-    headers: Dict[str, str] = Field(default_factory=dict)
-    base_url: str = Field(
-        default_factory=lambda: os.getenv("CLERK_BASE_URL", "https://api.clerk-app.com")
-    )
-    @model_validator(mode="after")
-    def validate_api_key(self) -> Self:
-        if not self.api_key:
-            self.api_key = os.getenv("CLERK_API_KEY")
-        if not self.api_key:
-            raise ValueError("API key has not been provided.")
-        self.headers = {"Authorization": f"Bearer {self.api_key}"}
-        return self
-    @backoff.on_exception(
-        backoff.expo,
-        (requests.exceptions.RequestException,),
-        max_tries=3,
-        jitter=None,
-        # on_backoff=backoff_handler,
-        giveup=giveup_handler,
-    )
-    def get_request(
-        self,
-        endpoint: str,
-        headers: Dict[str, str] = {},
-        json: Dict = {},
-        params: Dict = {},
-    ) -> StandardResponse:
-        merged_headers = {**self.headers, **headers}
-        url = f"{self.base_url}{endpoint}"
-        # logger.info(f"GET {url} | params={params}")
-        response = requests.get(url, headers=merged_headers, json=json, params=params)
-        response.raise_for_status()
-        return StandardResponse(**response.json())
-    @backoff.on_exception(
-        backoff.expo,
-        (requests.exceptions.RequestException,),
-        max_tries=3,
-        jitter=None,
-        # on_backoff=backoff_handler,
-        giveup=giveup_handler,
-    )
-    def post_request(
-        self,
-        endpoint: str,
-        headers: Dict[str, str] = {},
-        json: Dict = {},
-        params: Dict = {},
-    ) -> StandardResponse:
-        merged_headers = {**self.headers, **headers}
-        url = f"{self.base_url}{endpoint}"
-        # logger.info(f"POST {url} | body={json} | params={params}")
-        response = requests.post(url, headers=merged_headers, json=json, params=params)
-        response.raise_for_status()
-        return StandardResponse(**response.json())
+class Clerk(BaseClerk):
     def get_document(self, document_id: str) -> Document:
         endpoint = f"/document/{document_id}"
         res = self.get_request(endpoint=endpoint)

clerk/decorator/models.py CHANGED Viewed

@@ -17,3 +17,4 @@ class Document(BaseModel):
 class ClerkCodePayload(BaseModel):
     document: Document
     structured_data: Dict
+    run_id: Optional[str] = None

clerk/decorator/task_decorator.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 import pickle
 from typing import Callable, Optional
 from functools import wraps

clerk/gui_automation/__init__.py ADDED Viewed

File without changes

clerk/gui_automation/action_model/__init__.py ADDED Viewed

File without changes

clerk/gui_automation/action_model/model.py ADDED Viewed

@@ -0,0 +1,126 @@
+import base64
+import os
+from typing import List, Literal, Optional, Union
+from pydantic import BaseModel, Field, validator
+CoordsType = Union[List[float], List[int]]
+PredictionsFormat = Union[
+    Literal["xyxy"], Literal["xyxyn"], Literal["xywh"], Literal["xywhn"]
+]
+RelationsType = Union[
+    Literal["above"], Literal["below"], Literal["left"], Literal["right"], Literal[""]
+]
+class ImageB64(BaseModel):
+    """
+    A class representing an image encoded in base64 format.
+    Attributes:
+        id (Optional[str]): The ID of the image. Defaults to None.
+        value (str): The base64 encoded value of the image.
+    Methods:
+        from_path(value: Union[str, "ImageB64"]) -> "ImageB64":
+            Creates an ImageB64 instance from a file path or an existing ImageB64 instance.
+            Args:
+                value (Union[str, "ImageB64"]): The file path or an existing ImageB64 instance.
+            Returns:
+                ImageB64: The created ImageB64 instance.
+        _to_b64(path: str) -> str:
+            Encodes the image file at the given path to base64 format.
+            Args:
+                path (str): The path to the image file.
+            Returns:
+                str: The base64 encoded image.
+    """
+    id: Optional[str] = None
+    value: str = ""
+    @classmethod
+    def from_path(cls, value: Union[str, "ImageB64"]) -> "ImageB64":
+        if isinstance(value, ImageB64):
+            return value
+        return ImageB64(
+            id=os.path.basename(value),
+            value=to_b64(value),
+        )
+def to_b64(path: str) -> str:
+    with open(path, "rb") as f:
+        img_b64: str = base64.b64encode(f.read()).decode("utf-8")
+    return img_b64
+class Anchor(BaseModel):
+    """
+    A class representing an anchor for a screenshot.
+    Attributes:
+        value (Union[str, ImageB64]): The value of the anchor, which can be a string or an ImageB64 instance.
+        relation (RelationsType): The relation of the anchor to the target, which can be one of the following: "above", "below", "left", "right", or an empty string.
+    """
+    value: Union[str, ImageB64] = ""
+    relation: RelationsType = ""
+class Screenshot(BaseModel):
+    """
+    A class representing a screenshot.
+    Attributes:
+        screen_b64 (ImageB64): The base64 encoded value of the screenshot.
+        target (Union[str, ImageB64]): The target of the screenshot, which can be a string or an ImageB64 instance.
+        anchors (List[Anchor]): The list of anchors for the screenshot.
+        is_awaited (bool): A flag to signal whether the target should appear immediately or is awaited.
+        target_name (Optional[str]): A readable representation of a target which is set automatically when validating the target and is used in the AM for logging.
+    """
+    screen_b64: ImageB64
+    target: Union[str, ImageB64]
+    anchors: List[Anchor] = []
+    is_awaited: bool = False
+    target_name: Optional[str] = None
+class Coords(BaseModel):
+    """
+    A class representing coordinates.
+    Attributes:
+        value (CoordsType): The value of the coordinates, which can be a list of floats or a list of integers.
+        score (int): The score associated with the coordinates, defaults to 0.
+    """
+    value: CoordsType
+    score: int = 0
+class RouterOutput(BaseModel):
+    """
+    A class representing the output of a router.
+    Attributes:
+        Resources (List[Coords]): A list of coordinates representing the resources.
+        StatusMessage (Union[Literal["Success"], Literal["Failure"], None]): The status message of the router output.
+        ErrorMessage (str): The error message associated with the router output.
+    """
+    Resources: List[Coords] = []
+    StatusMessage: Union[Literal["Success"], Literal["Failure"], None] = None
+    ErrorMessage: str = ""

clerk/gui_automation/action_model/utils.py ADDED Viewed

@@ -0,0 +1,26 @@
+from .model import Coords, Screenshot
+from ..decorators.gui_automation import clerk_client
+def get_coordinates(payload: Screenshot) -> Coords:
+    """
+    Get coordinates from the action model API endpoint.
+    The method requires the following environmental variables to work:
+        - AM_URL: action model URL
+    Parameters:
+        payload (Screenshot): The payload containing the necessary data for the request.
+    Returns:
+        Coords: The coordinates obtained from the API response.
+    Raises:
+        RuntimeError: If the API response status code is not 200.
+    Example:
+        payload = Screenshot(screen_b64="base64_encoded_image", target="target_image")
+        coordinates = get_coordinates(payload)
+    """
+    return clerk_client.get_coordinates(payload.model_dump())

clerk/gui_automation/client.py ADDED Viewed

@@ -0,0 +1,144 @@
+from typing import Dict, List, Optional
+from pydantic import BaseModel
+from clerk.base import BaseClerk
+from clerk.gui_automation.action_model.model import Coords
+from clerk.gui_automation.ui_state_inspector.models import (
+    ActionString,
+    BaseState,
+    States,
+    TargetWithAnchor,
+)
+from clerk.models.remote_device import RemoteDevice
+class RPAClerk(BaseClerk):
+    root_endpoint: str = "/gui_automation"
+    def allocate_remote_device(self, group_name: str, run_id: str):
+        endpoint = "/remote_device/allocate"
+        res = self.post_request(
+            endpoint=endpoint, json={"group_name": group_name, "run_id": run_id}
+        )
+        return RemoteDevice(**res.data[0])
+    def deallocate_remote_device(
+        self,
+        remote_device: RemoteDevice,
+        run_id: str,
+    ):
+        endpoint = "/remote_device/deallocate"
+        self.post_request(
+            endpoint=endpoint,
+            json={"id": remote_device.id, "name": remote_device.name, "run_id": run_id},
+        )
+    def get_coordinates(self, payload: Dict) -> Coords:
+        endpoint = "/action_model/get_coordinates"
+        res = self.post_request(endpoint=endpoint, json=payload)
+        if res.data[0] is None:
+            raise RuntimeError("No coordinates found in the response.")
+        return Coords(**res.data[0])
+class GUIVisionClerk(BaseClerk):
+    root_endpoint: str = "/gui_automation/vision"
+    def find_target(self, screen_b64: str, use_ocr: bool, target_prompt: str):
+        endpoint = "/find_target"
+        res = self.post_request(
+            endpoint=endpoint,
+            json={
+                "screen_b64": screen_b64,
+                "use_ocr": use_ocr,
+                "target_prompt": target_prompt,
+            },
+        )
+        return TargetWithAnchor(**res.data[0])
+    def verify_state(
+        self, screen_b64: str, use_ocr: bool, possible_states: States
+    ) -> BaseState:
+        endpoint = "/verify_state"
+        res = self.post_request(
+            endpoint=endpoint,
+            json={
+                "screen_b64": screen_b64,
+                "use_ocr": use_ocr,
+                "possible_states": possible_states,
+            },
+        )
+        return BaseState(**res.data[0])
+    def answer(
+        self, screen_b64: str, use_ocr: bool, question: str, output_model: BaseModel
+    ) -> Dict:
+        endpoint = "/answer"
+        res = self.post_request(
+            endpoint=endpoint,
+            json={
+                "screen_b64": screen_b64,
+                "use_ocr": use_ocr,
+                "question": question,
+                "output_model": output_model.model_json_schema(),
+            },
+        )
+        return output_model(**res.data[0])
+    def classify_state(
+        self, screen_b64: str, use_ocr: bool, possible_states: List[Dict[str, str]]
+    ) -> BaseState:
+        endpoint = "/classify_state"
+        res = self.post_request(
+            endpoint=endpoint,
+            json={
+                "screen_b64": screen_b64,
+                "use_ocr": use_ocr,
+                "possible_states": possible_states,
+            },
+        )
+        return BaseState(**res.data[0])
+    def write_action_string(
+        self, screen_b64: str, use_ocr: bool, action_prompt: str
+    ) -> ActionString:
+        endpoint = "/write_action-string"
+        res = self.post_request(
+            endpoint=endpoint,
+            json={
+                "screen_b64": screen_b64,
+                "use_ocr": use_ocr,
+                "action_prompt": action_prompt,
+            },
+        )
+        return ActionString(**res.data[0])
+class CourseCorrectorClerk(BaseClerk):
+    root_endpoint: str = "/gui_automation/course_corrector"
+    def get_corrective_actions(
+        self,
+        screen_b64: str,
+        use_ocr: str,
+        goal: str,
+        custom_instructions: Optional[str] = None,
+    ) -> ActionString:
+        endpoint = "/get_corrective_actions"
+        res = self.post_request(
+            endpoint=endpoint,
+            json={
+                "screen_b64": screen_b64,
+                "use_ocr": use_ocr,
+                "goal": goal,
+                "custom_instructions": custom_instructions,
+            },
+        )
+        return ActionString(**res.data[0])

clerk/gui_automation/client_actor/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .client_actor import (
+    get_screen,
+    perform_action,
+)

clerk/gui_automation/client_actor/client_actor.py ADDED Viewed

@@ -0,0 +1,178 @@
+import asyncio
+import json
+import os
+from typing import Any, Dict, Union
+import pydantic
+import requests
+from .model import (
+    ExecutePayload,
+    DeleteFilesExecutePayload,
+    ApplicationExecutePayload,
+    SaveFilesExecutePayload,
+    WindowExecutePayload,
+    GetFileExecutePayload,
+)
+import backoff
+from .model import PerformActionResponse, ActionStates
+from .exception import PerformActionException, GetScreenError
+async def _perform_action_ws(payload: Dict) -> PerformActionResponse:
+    """Perform an action over a WebSocket connection.
+    Args:
+        payload (Dict): The payload request to be sent.
+    Returns:
+        PerformActionResponse: The response of performing the action.
+    Raises:
+        RuntimeError: If the ACK message is not received within the specified timeout.
+    """
+    from ..decorators.gui_automation import global_ws
+    # 1. Send the payload request
+    if global_ws:
+        await global_ws.send(json.dumps(payload))
+        # 2. wait for ack message
+        try:
+            ack = await asyncio.wait_for(global_ws.recv(), 90)
+            if ack == "OK":
+                action_info = await asyncio.wait_for(global_ws.recv(), 90)
+                return PerformActionResponse(**json.loads(action_info))
+            else:
+                raise RuntimeError("Received ACK != OK")
+        except asyncio.TimeoutError:
+            raise RuntimeError("The ack message did not arrive.")
+    else:
+        raise RuntimeError("The Websocket has not been initiated.")
+async def _get_screen_async() -> str:
+    """
+    Asynchronously retrieves a screen using a WebSocket connection.
+    Returns:
+        str: The base64 encoded screen image.
+    Note:
+        This function sends a request to perform a screenshot action over a WebSocket connection
+        and returns the base64 encoded image of the screen captured.
+    """
+    payload = {
+        "proc_inst_id": os.getenv("PROC_ID"),
+        "client_name": os.getenv("REMOTE_DEVICE_NAME"),
+        "headless": (
+            True if os.getenv("HEADLESS", "True").lower() == "true" else False
+        ),
+        "action": {"action_type": "screenshot"},
+    }
+    try:
+        action_info = await _perform_action_ws(payload)
+    except Exception as e:
+        if str(e) in (
+            "The ack message did not arrive.",
+            "Received ACK != OK",
+        ):
+            raise GetScreenError("The ack message did not arrive.")
+        raise  # else raise the error
+    if action_info.screen_b64 is not None:
+        return action_info.screen_b64
+    raise GetScreenError()
+@backoff.on_exception(
+    backoff.expo,
+    (requests.RequestException, pydantic.ValidationError, GetScreenError),
+    max_time=120,
+)
+def get_screen() -> str:
+    """
+    Request the VDI screen and return the base64 representation of the screenshot.
+    Returns:
+        str: The base64 representation of the screenshot.
+    Raises:
+        RuntimeError: If the request to the VDI screen fails.
+    """
+    loop = asyncio.get_event_loop()
+    # asyncio.set_event_loop(loop)
+    task = loop.create_task(_get_screen_async())
+    res = loop.run_until_complete(task)
+    return res
+async def _perform_action_async(
+    payload: Union[
+        ExecutePayload,
+        ApplicationExecutePayload,
+        WindowExecutePayload,
+        SaveFilesExecutePayload,
+        DeleteFilesExecutePayload,
+        GetFileExecutePayload,
+    ],
+) -> Any:
+    """
+    Perform an asynchronous action based on the provided payload.
+    Args:
+        payload (Union[ExecutePayload, ApplicationExecutePayload, WindowExecutePayload, SaveFilesExecutePayload, DeleteFilesExecutePayload, GetFileExecutePayload]): The payload containing information about the action to be performed.
+    Returns:
+        Any: The return value of the action.
+    Raises:
+        PerformActionException: If the action fails with an error message.
+    """
+    req_payload: Dict = {
+        "proc_inst_id": os.getenv("PROC_ID"),
+        "client_name": os.getenv("REMOTE_DEVICE_NAME"),
+        "headless": (
+            True if os.getenv("HEADLESS", "True").lower() == "true" else False
+        ),
+        "action": payload.model_dump(),
+    }
+    action_info = await _perform_action_ws(req_payload)
+    if action_info.state == ActionStates.failed:
+        raise PerformActionException(action_info.message)
+    return action_info.return_value
+def perform_action(
+    payload: Union[
+        ExecutePayload,
+        ApplicationExecutePayload,
+        WindowExecutePayload,
+        SaveFilesExecutePayload,
+        DeleteFilesExecutePayload,
+        GetFileExecutePayload,
+    ],
+) -> Any:
+    """
+    Perform an action on the VDI client.
+    Args:
+        payload (Union[ExecutePayload, ApplicationExecutePayload, WindowExecutePayload]): The payload containing the details of the action to be performed.
+    Raises:
+        PerformActionException: If the action fails.
+        RuntimeError: If the request to perform the action fails.
+    Returns:
+        Any
+    """
+    loop = asyncio.get_event_loop()
+    task = loop.create_task(_perform_action_async(payload))
+    res = loop.run_until_complete(task)
+    return res

clerk-sdk 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl

clerk-sdk 0.1.9py3-none-any.whl → 0.2.0py3-none-any.whl