clerk-sdk 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. clerk/base.py +94 -0
  2. clerk/client.py +3 -104
  3. clerk/decorator/models.py +1 -0
  4. clerk/decorator/task_decorator.py +4 -1
  5. clerk/gui_automation/__init__.py +0 -0
  6. clerk/gui_automation/action_model/__init__.py +0 -0
  7. clerk/gui_automation/action_model/model.py +126 -0
  8. clerk/gui_automation/action_model/utils.py +26 -0
  9. clerk/gui_automation/client.py +144 -0
  10. clerk/gui_automation/client_actor/__init__.py +4 -0
  11. clerk/gui_automation/client_actor/client_actor.py +178 -0
  12. clerk/gui_automation/client_actor/exception.py +22 -0
  13. clerk/gui_automation/client_actor/model.py +192 -0
  14. clerk/gui_automation/decorators/__init__.py +1 -0
  15. clerk/gui_automation/decorators/gui_automation.py +109 -0
  16. clerk/gui_automation/exceptions/__init__.py +0 -0
  17. clerk/gui_automation/exceptions/modality/__init__.py +0 -0
  18. clerk/gui_automation/exceptions/modality/exc.py +46 -0
  19. clerk/gui_automation/exceptions/websocket.py +6 -0
  20. clerk/gui_automation/ui_actions/__init__.py +1 -0
  21. clerk/gui_automation/ui_actions/actions.py +781 -0
  22. clerk/gui_automation/ui_actions/base.py +200 -0
  23. clerk/gui_automation/ui_actions/support.py +68 -0
  24. clerk/gui_automation/ui_state_inspector/__init__.py +0 -0
  25. clerk/gui_automation/ui_state_inspector/gui_vision.py +184 -0
  26. clerk/gui_automation/ui_state_inspector/models.py +184 -0
  27. clerk/gui_automation/ui_state_machine/__init__.py +11 -0
  28. clerk/gui_automation/ui_state_machine/ai_recovery.py +110 -0
  29. clerk/gui_automation/ui_state_machine/decorators.py +71 -0
  30. clerk/gui_automation/ui_state_machine/exceptions.py +42 -0
  31. clerk/gui_automation/ui_state_machine/models.py +40 -0
  32. clerk/gui_automation/ui_state_machine/state_machine.py +838 -0
  33. clerk/models/remote_device.py +7 -0
  34. clerk/utils/__init__.py +0 -0
  35. clerk/utils/logger.py +118 -0
  36. clerk/utils/save_artifact.py +35 -0
  37. {clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/METADATA +11 -1
  38. clerk_sdk-0.2.0.dist-info/RECORD +48 -0
  39. clerk_sdk-0.1.8.dist-info/RECORD +0 -15
  40. {clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/WHEEL +0 -0
  41. {clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/licenses/LICENSE +0 -0
  42. {clerk_sdk-0.1.8.dist-info → clerk_sdk-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,178 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+ from typing import Any, Dict, Union
5
+
6
+ import pydantic
7
+ import requests
8
+
9
+
10
+ from .model import (
11
+ ExecutePayload,
12
+ DeleteFilesExecutePayload,
13
+ ApplicationExecutePayload,
14
+ SaveFilesExecutePayload,
15
+ WindowExecutePayload,
16
+ GetFileExecutePayload,
17
+ )
18
+ import backoff
19
+
20
+ from .model import PerformActionResponse, ActionStates
21
+ from .exception import PerformActionException, GetScreenError
22
+
23
+
24
+ async def _perform_action_ws(payload: Dict) -> PerformActionResponse:
25
+ """Perform an action over a WebSocket connection.
26
+
27
+ Args:
28
+ payload (Dict): The payload request to be sent.
29
+
30
+ Returns:
31
+ PerformActionResponse: The response of performing the action.
32
+
33
+ Raises:
34
+ RuntimeError: If the ACK message is not received within the specified timeout.
35
+ """
36
+
37
+ from ..decorators.gui_automation import global_ws
38
+
39
+ # 1. Send the payload request
40
+ if global_ws:
41
+ await global_ws.send(json.dumps(payload))
42
+
43
+ # 2. wait for ack message
44
+ try:
45
+ ack = await asyncio.wait_for(global_ws.recv(), 90)
46
+ if ack == "OK":
47
+ action_info = await asyncio.wait_for(global_ws.recv(), 90)
48
+ return PerformActionResponse(**json.loads(action_info))
49
+ else:
50
+ raise RuntimeError("Received ACK != OK")
51
+ except asyncio.TimeoutError:
52
+ raise RuntimeError("The ack message did not arrive.")
53
+ else:
54
+ raise RuntimeError("The Websocket has not been initiated.")
55
+
56
+
57
+ async def _get_screen_async() -> str:
58
+ """
59
+ Asynchronously retrieves a screen using a WebSocket connection.
60
+
61
+ Returns:
62
+ str: The base64 encoded screen image.
63
+
64
+ Note:
65
+ This function sends a request to perform a screenshot action over a WebSocket connection
66
+ and returns the base64 encoded image of the screen captured.
67
+ """
68
+ payload = {
69
+ "proc_inst_id": os.getenv("PROC_ID"),
70
+ "client_name": os.getenv("REMOTE_DEVICE_NAME"),
71
+ "headless": (
72
+ True if os.getenv("HEADLESS", "True").lower() == "true" else False
73
+ ),
74
+ "action": {"action_type": "screenshot"},
75
+ }
76
+ try:
77
+ action_info = await _perform_action_ws(payload)
78
+ except Exception as e:
79
+ if str(e) in (
80
+ "The ack message did not arrive.",
81
+ "Received ACK != OK",
82
+ ):
83
+ raise GetScreenError("The ack message did not arrive.")
84
+ raise # else raise the error
85
+
86
+ if action_info.screen_b64 is not None:
87
+ return action_info.screen_b64
88
+ raise GetScreenError()
89
+
90
+
91
+ @backoff.on_exception(
92
+ backoff.expo,
93
+ (requests.RequestException, pydantic.ValidationError, GetScreenError),
94
+ max_time=120,
95
+ )
96
+ def get_screen() -> str:
97
+ """
98
+ Request the VDI screen and return the base64 representation of the screenshot.
99
+
100
+ Returns:
101
+ str: The base64 representation of the screenshot.
102
+
103
+ Raises:
104
+ RuntimeError: If the request to the VDI screen fails.
105
+ """
106
+
107
+ loop = asyncio.get_event_loop()
108
+ # asyncio.set_event_loop(loop)
109
+ task = loop.create_task(_get_screen_async())
110
+ res = loop.run_until_complete(task)
111
+ return res
112
+
113
+
114
+ async def _perform_action_async(
115
+ payload: Union[
116
+ ExecutePayload,
117
+ ApplicationExecutePayload,
118
+ WindowExecutePayload,
119
+ SaveFilesExecutePayload,
120
+ DeleteFilesExecutePayload,
121
+ GetFileExecutePayload,
122
+ ],
123
+ ) -> Any:
124
+ """
125
+ Perform an asynchronous action based on the provided payload.
126
+
127
+ Args:
128
+ payload (Union[ExecutePayload, ApplicationExecutePayload, WindowExecutePayload, SaveFilesExecutePayload, DeleteFilesExecutePayload, GetFileExecutePayload]): The payload containing information about the action to be performed.
129
+
130
+ Returns:
131
+ Any: The return value of the action.
132
+
133
+ Raises:
134
+ PerformActionException: If the action fails with an error message.
135
+ """
136
+ req_payload: Dict = {
137
+ "proc_inst_id": os.getenv("PROC_ID"),
138
+ "client_name": os.getenv("REMOTE_DEVICE_NAME"),
139
+ "headless": (
140
+ True if os.getenv("HEADLESS", "True").lower() == "true" else False
141
+ ),
142
+ "action": payload.model_dump(),
143
+ }
144
+ action_info = await _perform_action_ws(req_payload)
145
+
146
+ if action_info.state == ActionStates.failed:
147
+ raise PerformActionException(action_info.message)
148
+ return action_info.return_value
149
+
150
+
151
+ def perform_action(
152
+ payload: Union[
153
+ ExecutePayload,
154
+ ApplicationExecutePayload,
155
+ WindowExecutePayload,
156
+ SaveFilesExecutePayload,
157
+ DeleteFilesExecutePayload,
158
+ GetFileExecutePayload,
159
+ ],
160
+ ) -> Any:
161
+ """
162
+ Perform an action on the VDI client.
163
+
164
+ Args:
165
+ payload (Union[ExecutePayload, ApplicationExecutePayload, WindowExecutePayload]): The payload containing the details of the action to be performed.
166
+
167
+ Raises:
168
+ PerformActionException: If the action fails.
169
+ RuntimeError: If the request to perform the action fails.
170
+
171
+ Returns:
172
+ Any
173
+ """
174
+
175
+ loop = asyncio.get_event_loop()
176
+ task = loop.create_task(_perform_action_async(payload))
177
+ res = loop.run_until_complete(task)
178
+ return res
@@ -0,0 +1,22 @@
1
+ class PerformActionException(Exception):
2
+ """
3
+ A custom exception class for handling errors related to performing actions.
4
+ """
5
+
6
+ pass
7
+
8
+
9
+ class GetScreenError(Exception):
10
+ """
11
+ A custom exception class for handling errors related to getting the screen.
12
+ """
13
+
14
+ pass
15
+
16
+
17
+ class WebSocketConnectionFailed(Exception):
18
+ """
19
+ Connection to websocket was not successful
20
+ """
21
+
22
+ pass
@@ -0,0 +1,192 @@
1
+ from typing import Any, List, Literal, Optional, Union
2
+ from pydantic import BaseModel, Field
3
+ from enum import Enum
4
+
5
+
6
+ ActionTypes = Literal[
7
+ "left_click",
8
+ "right_click",
9
+ "middle_click",
10
+ "double_click",
11
+ "send_keys",
12
+ "press_keys",
13
+ "hot_keys",
14
+ "paste_text",
15
+ "get_text",
16
+ "scroll",
17
+ ]
18
+
19
+
20
+ class ActionStates(Enum):
21
+ """
22
+ Enumeration class representing the possible states of an action.
23
+
24
+ Attributes:
25
+ completed (str): Represents a completed action state.
26
+ failed (str): Represents a failed action state.
27
+ """
28
+
29
+ completed = "COMPLETED"
30
+ failed = "FAILED"
31
+
32
+
33
+ class ExecutePayload(BaseModel):
34
+ """
35
+ A class representing the payload for executing various actions.
36
+
37
+ Attributes:
38
+ action_type (Literal[str]): The type of action to be performed. It can be one of the following:
39
+ - "left_click": Perform a left click action.
40
+ - "right_click": Perform a right click action.
41
+ - "middle_click": Perform a middle click action.
42
+ - "double_click": Perform a double click action.
43
+ - "send_keys": Send a sequence of keys.
44
+ - "press_keys": Press and hold a sequence of keys.
45
+ - "hot_keys": Perform a combination of hot keys.
46
+ coordinates (List[int]): The coordinates of the action. Default is an empty list.
47
+ keys (Optional[str]): The keys to be sent or pressed. Default is None.
48
+ interval (float): The interval between each action. Default is 0.05 seconds.
49
+ """
50
+
51
+ action_type: ActionTypes
52
+ coordinates: Union[List[int], List[float]] = Field(default=[])
53
+ keys: Optional[Union[str, List[str]]] = Field(default=None)
54
+ key_separator: Optional[str] = Field(default=None)
55
+ followed_by: Optional[str] = Field(default=None)
56
+ interval: float = Field(default=0.05)
57
+ clicks: Optional[int] = None
58
+
59
+
60
+ class WindowExecutePayload(BaseModel):
61
+ """
62
+ A class representing the payload for executing window-related actions.
63
+
64
+ Attributes:
65
+ action_type (Literal[str]): The type of window action to be performed. It can be one of the following:
66
+ - "maximize_window": Maximize the window.
67
+ - "minimize_window": Minimize the window.
68
+ - "close_window": Close the window.
69
+ - "activate_window": Activate the window.
70
+ window_name (str): The name of the window on which the action should be performed.
71
+ timeout (int): The timeout value in seconds for the action to complete. Default is 10 seconds.
72
+ """
73
+
74
+ action_type: Literal[
75
+ "maximize_window",
76
+ "minimize_window",
77
+ "close_window",
78
+ "activate_window",
79
+ ]
80
+ window_name: str
81
+ timeout: int = Field(default=10)
82
+
83
+
84
+ class ApplicationExecutePayload(BaseModel):
85
+ """
86
+ A class representing the payload for executing an application-related action.
87
+
88
+ Attributes:
89
+ action_type (Literal[str]): The type of application action to be performed. It can only be "open_app".
90
+ app_path (str): The absolute path of the application to be opened.
91
+ app_window_name (str): The name of the application window once it is open. Wildcard logic is enabled.
92
+ timeout (int): The timeout value in seconds for the action to complete. Default is 60 seconds.
93
+ process_name (str): Process name from task manager. Example: process.exe
94
+ """
95
+
96
+ action_type: Literal["open_app", "force_close_app"]
97
+ app_path: str = Field(description="Absolute path of the application", default="")
98
+ app_window_name: str = Field(
99
+ description="Name of the application window once open. Wildcard logic enabled.",
100
+ default="",
101
+ )
102
+ timeout: int = Field(default=60)
103
+ process_name: str = Field(
104
+ description="Process name from task manager. Example: process.exe", default=""
105
+ )
106
+
107
+
108
+ class FileDetails(BaseModel):
109
+ """
110
+ A class representing the details of a file.
111
+
112
+ Attributes:
113
+ filename (str): The filename of the file.
114
+ value (str): The base64 string representation of the binary file.
115
+ """
116
+
117
+ filename: str = Field(description="Filename of the file")
118
+ value: str = Field(description="Base64 string representation of the binary file")
119
+
120
+
121
+ class SaveFilesExecutePayload(BaseModel):
122
+ """
123
+ A class representing the payload for saving files.
124
+
125
+ Attributes:
126
+ action_type (Literal["save_files"]): The action type indicating the payload is for saving files.
127
+ save_location (str): The location where the files will be saved.
128
+ files (List[FileDetails]): A list of FileDetails objects representing the files to be saved.
129
+ """
130
+
131
+ action_type: Literal["save_files"]
132
+ save_location: str
133
+ files: List[FileDetails]
134
+
135
+
136
+ class DeleteFilesExecutePayload(BaseModel):
137
+ """
138
+ A class representing the payload for deleting files.
139
+
140
+ Attributes:
141
+ action_type (Literal["delete_files"]): The action type indicating the payload is for deleting files.
142
+ files_location (List[str]): A list of file locations representing the files to be deleted.
143
+ """
144
+
145
+ action_type: Literal["delete_files"]
146
+ files_location: List[str]
147
+
148
+
149
+ class GetFileExecutePayload(BaseModel):
150
+ """
151
+ A class representing the payload for executing a 'get_file' action.
152
+
153
+ Attributes:
154
+ action_type: Literal["get_file"] - Specifies the action type as 'get_file'.
155
+ file_location: str - The location of the file to retrieve.
156
+ """
157
+
158
+ action_type: Literal["get_file"]
159
+ file_location: str
160
+
161
+
162
+ class GetScreenResponse(BaseModel):
163
+ """
164
+ A class representing the response for getting a screen.
165
+
166
+ Attributes:
167
+ screen_b64 (str): The base64 encoded string representing the screen image.
168
+ """
169
+
170
+ screen_b64: str
171
+
172
+
173
+ class PerformActionResponse(BaseModel):
174
+ """
175
+ A class representing the response of performing an action.
176
+
177
+ Attributes:
178
+ id (str): The ID of the action.
179
+ state (ActionStates): The state of the action.
180
+ message (Optional[str]): An optional message associated with the action.
181
+ return_value (Optional[Any]): A value that the action could return.
182
+ """
183
+
184
+ id: Optional[str] = None
185
+ state: ActionStates
186
+ message: Optional[str] = None
187
+ return_value: Optional[Any] = None
188
+ screen_b64: Optional[str] = None
189
+
190
+
191
+ class AllocateTargetResponse(BaseModel):
192
+ client: str
@@ -0,0 +1 @@
1
+ from .gui_automation import gui_automation
@@ -0,0 +1,109 @@
1
+ import asyncio
2
+ import functools
3
+ import logging
4
+ import os
5
+ from typing import Callable, Union
6
+
7
+ from websockets.asyncio.client import connect, ClientConnection
8
+ from websockets.protocol import State
9
+
10
+ from clerk.gui_automation.client import RPAClerk
11
+ from clerk.models.remote_device import RemoteDevice
12
+ from clerk.decorator.models import ClerkCodePayload
13
+ from ..exceptions.websocket import WebSocketConnectionFailed
14
+
15
+
16
+ # Global handle to the live connection (if any)
17
+ global_ws: Union[ClientConnection, None] = None
18
+
19
+ clerk_client = RPAClerk()
20
+ wss_uri = "wss://agent-manager.f-one.group/action"
21
+
22
+
23
+ def _allocate_remote_device(
24
+ clerk_client: RPAClerk, group_name: str, run_id: str
25
+ ) -> RemoteDevice:
26
+ remote_device = clerk_client.allocate_remote_device(
27
+ group_name=group_name, run_id=run_id
28
+ )
29
+ os.environ["REMOTE_DEVICE_ID"] = remote_device.id
30
+ os.environ["REMOTE_DEVICE_NAME"] = remote_device.name
31
+ return remote_device
32
+
33
+
34
+ def _deallocate_target(
35
+ clerk_client: RPAClerk, remote_device: RemoteDevice, run_id: str
36
+ ):
37
+ clerk_client.deallocate_remote_device(remote_device=remote_device, run_id=run_id)
38
+ os.environ.pop("REMOTE_DEVICE_ID", None)
39
+ os.environ.pop("REMOTE_DEVICE_NAME", None)
40
+
41
+
42
+ def gui_automation():
43
+ """
44
+ Decorator that:
45
+ • Allocates a remote device,
46
+ • Opens a WebSocket to the agent manager,
47
+ • Passes control to the wrapped function,
48
+ • Cleans everything up afterwards.
49
+ """
50
+ group_name: str = os.getenv("REMOTE_DEVICE_GROUP")
51
+ if not group_name:
52
+ raise ValueError("REMOTE_DEVICE_GROUP environmental variable is required.")
53
+
54
+ async def connect_to_ws(uri: str) -> ClientConnection:
55
+ # Same knobs as before, just via the new connect()
56
+ return await connect(uri, max_size=2**23, ping_timeout=3600)
57
+
58
+ async def close_ws_connection(ws_conn: ClientConnection):
59
+ await ws_conn.close()
60
+
61
+ def decorator(func: Callable):
62
+ @functools.wraps(func)
63
+ def wrapper(payload: ClerkCodePayload, *args, **kwargs):
64
+ global global_ws
65
+ os.environ["PROC_ID"] = payload.run_id
66
+
67
+ remote_device = _allocate_remote_device(
68
+ clerk_client, group_name, payload.run_id
69
+ )
70
+
71
+ # Create a dedicated loop for the WebSocket work
72
+ event_loop = asyncio.new_event_loop()
73
+ asyncio.set_event_loop(event_loop)
74
+
75
+ try:
76
+ task = event_loop.create_task(
77
+ connect_to_ws(
78
+ f"{wss_uri}/{remote_device.name}/publisher"
79
+ f"?token={remote_device.wss_token}"
80
+ )
81
+ )
82
+ global_ws = event_loop.run_until_complete(task)
83
+
84
+ if global_ws and global_ws.state is State.OPEN:
85
+ logging.debug("WebSocket connection established.")
86
+ func_ret = func(payload, *args, **kwargs)
87
+ else:
88
+ global_ws = None
89
+ raise WebSocketConnectionFailed()
90
+
91
+ except Exception as e:
92
+ os.environ.pop("PROC_ID", None)
93
+ raise
94
+ finally:
95
+ _deallocate_target(clerk_client, remote_device, payload.run_id)
96
+
97
+ if global_ws and global_ws.state is State.OPEN:
98
+ close_task = event_loop.create_task(close_ws_connection(global_ws))
99
+ event_loop.run_until_complete(close_task)
100
+ print("WebSocket connection closed.")
101
+
102
+ event_loop.run_until_complete(event_loop.shutdown_asyncgens())
103
+ event_loop.close()
104
+
105
+ return func_ret
106
+
107
+ return wrapper
108
+
109
+ return decorator
File without changes
File without changes
@@ -0,0 +1,46 @@
1
+ class ModalityNotKnownError(Exception):
2
+ """
3
+ This exception is raised when the modality of a target is not known or not supported.
4
+
5
+ Attributes:
6
+ message (str): The error message explaining the allowed modalities.
7
+
8
+ Example:
9
+ raise ModalityNotKnownError("The modality must be either 'text' or 'icon'")
10
+ """
11
+
12
+ def __init__(self, message: str = "allowed modalities are: `text` | `icon`"):
13
+ super().__init__(message)
14
+
15
+
16
+ class AnchorTypeError(Exception):
17
+ """
18
+ This exception is raised when the anchor type is not valid or not supported.
19
+
20
+ Attributes:
21
+ message (str): The error message explaining the allowed anchor types.
22
+
23
+ Example:
24
+ raise AnchorTypeError("The anchor type must be either 'text' or 'image'")
25
+ """
26
+
27
+ def __init__(self, message: str):
28
+ super().__init__(message)
29
+
30
+
31
+ class TargetModalityError(Exception):
32
+ """
33
+ This exception is raised when the modality of a target is not valid or not supported.
34
+
35
+ Attributes:
36
+ message (str): The error message explaining the allowed target modalities.
37
+
38
+ Example:
39
+ raise TargetModalityError("target must be provided as either text (str) | image (ImageB64) | image path (str) or skipped")
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ message: str = "target must be provided as either text (str) | image (ImageB64) | image path (str) or skipped",
45
+ ):
46
+ super().__init__(message)
@@ -0,0 +1,6 @@
1
+ class WebSocketConnectionFailed(Exception):
2
+ """
3
+ Connection to websocket was not successful
4
+ """
5
+
6
+ pass
@@ -0,0 +1 @@
1
+ from .actions import *