clerk-sdk 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clerk/base.py +94 -0
- clerk/client.py +3 -104
- clerk/decorator/models.py +1 -0
- clerk/decorator/task_decorator.py +1 -0
- clerk/gui_automation/__init__.py +0 -0
- clerk/gui_automation/action_model/__init__.py +0 -0
- clerk/gui_automation/action_model/model.py +126 -0
- clerk/gui_automation/action_model/utils.py +26 -0
- clerk/gui_automation/client.py +144 -0
- clerk/gui_automation/client_actor/__init__.py +4 -0
- clerk/gui_automation/client_actor/client_actor.py +178 -0
- clerk/gui_automation/client_actor/exception.py +22 -0
- clerk/gui_automation/client_actor/model.py +192 -0
- clerk/gui_automation/decorators/__init__.py +1 -0
- clerk/gui_automation/decorators/gui_automation.py +109 -0
- clerk/gui_automation/exceptions/__init__.py +0 -0
- clerk/gui_automation/exceptions/modality/__init__.py +0 -0
- clerk/gui_automation/exceptions/modality/exc.py +46 -0
- clerk/gui_automation/exceptions/websocket.py +6 -0
- clerk/gui_automation/ui_actions/__init__.py +1 -0
- clerk/gui_automation/ui_actions/actions.py +781 -0
- clerk/gui_automation/ui_actions/base.py +200 -0
- clerk/gui_automation/ui_actions/support.py +68 -0
- clerk/gui_automation/ui_state_inspector/__init__.py +0 -0
- clerk/gui_automation/ui_state_inspector/gui_vision.py +184 -0
- clerk/gui_automation/ui_state_inspector/models.py +184 -0
- clerk/gui_automation/ui_state_machine/__init__.py +11 -0
- clerk/gui_automation/ui_state_machine/ai_recovery.py +110 -0
- clerk/gui_automation/ui_state_machine/decorators.py +71 -0
- clerk/gui_automation/ui_state_machine/exceptions.py +42 -0
- clerk/gui_automation/ui_state_machine/models.py +40 -0
- clerk/gui_automation/ui_state_machine/state_machine.py +838 -0
- clerk/models/remote_device.py +7 -0
- clerk/utils/__init__.py +0 -0
- clerk/utils/logger.py +118 -0
- clerk/utils/save_artifact.py +35 -0
- {clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/METADATA +11 -1
- clerk_sdk-0.2.0.dist-info/RECORD +48 -0
- clerk_sdk-0.1.9.dist-info/RECORD +0 -15
- {clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/WHEEL +0 -0
- {clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {clerk_sdk-0.1.9.dist-info → clerk_sdk-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
from typing import Literal, Self, Union, List, Optional
|
|
2
|
+
from pydantic import BaseModel, Field, model_validator
|
|
3
|
+
from ..client_actor import get_screen
|
|
4
|
+
from ..exceptions.modality.exc import TargetModalityError
|
|
5
|
+
from ..action_model.model import (
|
|
6
|
+
ImageB64,
|
|
7
|
+
Coords,
|
|
8
|
+
Screenshot,
|
|
9
|
+
Anchor,
|
|
10
|
+
)
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
ModalityType = Union[Literal["icon"], Literal["text"]]
|
|
14
|
+
TARGET_IMAGES_PATH = os.path.join(os.getcwd(), "targets")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def to_full_img_path(img: Union[str, ImageB64]) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Add prefix if provided `img` is a string, otherwise return a default value which will later be evaluated
|
|
20
|
+
`False` in `_is_path`
|
|
21
|
+
"""
|
|
22
|
+
if isinstance(img, ImageB64):
|
|
23
|
+
return ""
|
|
24
|
+
return os.path.join(TARGET_IMAGES_PATH, img)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
ActionTypes = Literal[
|
|
28
|
+
"left_click",
|
|
29
|
+
"right_click",
|
|
30
|
+
"middle_click",
|
|
31
|
+
"double_click",
|
|
32
|
+
"send_keys",
|
|
33
|
+
"press_keys",
|
|
34
|
+
"wait_for",
|
|
35
|
+
"open_app",
|
|
36
|
+
"force_close_app",
|
|
37
|
+
"maximize_window",
|
|
38
|
+
"minimize_window",
|
|
39
|
+
"close_window",
|
|
40
|
+
"activate_window",
|
|
41
|
+
"save_files",
|
|
42
|
+
"delete_files",
|
|
43
|
+
"get_file",
|
|
44
|
+
"get_text",
|
|
45
|
+
"paste_text",
|
|
46
|
+
"scroll",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class BaseAction(BaseModel):
|
|
51
|
+
"""
|
|
52
|
+
BaseAction class represents a base model for UI actions.
|
|
53
|
+
|
|
54
|
+
Attributes:
|
|
55
|
+
action_type (ActionTypes): Type of UI action to execute.
|
|
56
|
+
target_name (Optional[str]): A readable representation of a target which is set automatically when validating the target and is used in the AM for logging.
|
|
57
|
+
target (Optional[Union[str, ImageB64]]): Target of the UI action. It can be provided as a string, an instance of the ImageB64 class, or a path to an image.
|
|
58
|
+
anchors (List[Anchor]): List of anchor points for the UI action.
|
|
59
|
+
is_awaited (bool): A flag to signal whether the target should appear immediately or is awaited. Should be set to `True` in WaitFor
|
|
60
|
+
widget_bbox: (Optional[Coords]): The bounding box coordinates of the widget. If set, the call to the action module will be bypassed.
|
|
61
|
+
|
|
62
|
+
Methods:
|
|
63
|
+
_get_center_coords(bbox: Coords) -> Union[List[int], List[float]]:
|
|
64
|
+
Returns the center coordinates of a bounding box.
|
|
65
|
+
|
|
66
|
+
check_target(cls, value):
|
|
67
|
+
Validator function to check the target modality.
|
|
68
|
+
|
|
69
|
+
_prepare_payload():
|
|
70
|
+
Prepares the payload for the UI action.
|
|
71
|
+
|
|
72
|
+
_prepare_payload_test(screen_id: str, bbox: CoordsType, is_last: bool = False):
|
|
73
|
+
Prepares the payload for the detection test.
|
|
74
|
+
|
|
75
|
+
test(screen_id: str, bbox: CoordsType, is_last: bool = False):
|
|
76
|
+
Performs a detection test using the provided payload.
|
|
77
|
+
|
|
78
|
+
left(anchor: Union[str, ImageB64]):
|
|
79
|
+
Adds a left anchor point to the list of anchors.
|
|
80
|
+
|
|
81
|
+
right(anchor: Union[str, ImageB64]):
|
|
82
|
+
Adds a right anchor point to the list of anchors.
|
|
83
|
+
|
|
84
|
+
above(anchor: Union[str, ImageB64]):
|
|
85
|
+
Adds an above anchor point to the list of anchors.
|
|
86
|
+
|
|
87
|
+
below(anchor: Union[str, ImageB64]):
|
|
88
|
+
Adds a below anchor point to the list of anchors.
|
|
89
|
+
|
|
90
|
+
_is_path(value: str) -> bool:
|
|
91
|
+
Checks if a given value is a valid file path.
|
|
92
|
+
|
|
93
|
+
do():
|
|
94
|
+
Placeholder method for executing the UI action.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
action_type: ActionTypes = Field(..., description="Type of ui action to execute")
|
|
98
|
+
target_name: Optional[str] = Field(default=None)
|
|
99
|
+
target: Optional[Union[str, ImageB64]] = Field(default=None)
|
|
100
|
+
anchors: List[Anchor] = []
|
|
101
|
+
click_offset: List[int] = [0, 0]
|
|
102
|
+
is_awaited: bool = False
|
|
103
|
+
widget_bbox: Optional[Coords] = None
|
|
104
|
+
|
|
105
|
+
@model_validator(mode="after")
|
|
106
|
+
def validate_target_and_set_name(self) -> Self:
|
|
107
|
+
target = self.target
|
|
108
|
+
if isinstance(target, str): # either text target or img path
|
|
109
|
+
full_image_path = to_full_img_path(target)
|
|
110
|
+
if self._is_path(full_image_path):
|
|
111
|
+
self.target = ImageB64.from_path(full_image_path)
|
|
112
|
+
self.target_name = target # Set target name as path for logging
|
|
113
|
+
else:
|
|
114
|
+
# Set target name as provided string for logging
|
|
115
|
+
self.target_name = target
|
|
116
|
+
return self
|
|
117
|
+
elif isinstance(target, ImageB64):
|
|
118
|
+
self.target_name = "provided as obj with value in b64"
|
|
119
|
+
return self
|
|
120
|
+
elif target is None:
|
|
121
|
+
self.target_name = "not_provided"
|
|
122
|
+
return self
|
|
123
|
+
raise TargetModalityError()
|
|
124
|
+
|
|
125
|
+
def _get_center_coords(self, bbox: Coords) -> Union[List[int], List[float]]:
|
|
126
|
+
w: Union[int, float] = bbox.value[2] - bbox.value[0]
|
|
127
|
+
h: Union[int, float] = bbox.value[3] - bbox.value[1]
|
|
128
|
+
xcenter: Union[int, float] = bbox.value[0] + w // 2 + self.click_offset[0]
|
|
129
|
+
ycenter: Union[int, float] = bbox.value[1] + h // 2 + self.click_offset[1]
|
|
130
|
+
return [xcenter, ycenter]
|
|
131
|
+
|
|
132
|
+
def _prepare_payload(self):
|
|
133
|
+
payload: Screenshot = Screenshot(
|
|
134
|
+
screen_b64=ImageB64(value=get_screen()),
|
|
135
|
+
target=self.target,
|
|
136
|
+
anchors=self.anchors,
|
|
137
|
+
is_awaited=self.is_awaited,
|
|
138
|
+
target_name=self.target_name,
|
|
139
|
+
)
|
|
140
|
+
return payload
|
|
141
|
+
|
|
142
|
+
def left(self, anchor: Union[str, ImageB64]):
|
|
143
|
+
value: Union[str, ImageB64] = (
|
|
144
|
+
anchor
|
|
145
|
+
if not self._is_path(to_full_img_path(anchor))
|
|
146
|
+
else ImageB64.from_path(to_full_img_path(anchor))
|
|
147
|
+
)
|
|
148
|
+
self.anchors.append(Anchor(value=value, relation="left"))
|
|
149
|
+
return self
|
|
150
|
+
|
|
151
|
+
def right(self, anchor: Union[str, ImageB64]):
|
|
152
|
+
value: Union[str, ImageB64] = (
|
|
153
|
+
anchor
|
|
154
|
+
if not self._is_path(to_full_img_path(anchor))
|
|
155
|
+
else ImageB64.from_path(to_full_img_path(anchor))
|
|
156
|
+
)
|
|
157
|
+
self.anchors.append(Anchor(value=value, relation="right"))
|
|
158
|
+
return self
|
|
159
|
+
|
|
160
|
+
def above(self, anchor: Union[str, ImageB64]):
|
|
161
|
+
value: Union[str, ImageB64] = (
|
|
162
|
+
anchor
|
|
163
|
+
if not self._is_path(to_full_img_path(anchor))
|
|
164
|
+
else ImageB64.from_path(to_full_img_path(anchor))
|
|
165
|
+
)
|
|
166
|
+
self.anchors.append(Anchor(value=value, relation="above"))
|
|
167
|
+
return self
|
|
168
|
+
|
|
169
|
+
def below(self, anchor: Union[str, ImageB64]):
|
|
170
|
+
value: Union[str, ImageB64] = (
|
|
171
|
+
anchor
|
|
172
|
+
if not self._is_path(to_full_img_path(anchor))
|
|
173
|
+
else ImageB64.from_path(to_full_img_path(anchor))
|
|
174
|
+
)
|
|
175
|
+
self.anchors.append(Anchor(value=value, relation="below"))
|
|
176
|
+
return self
|
|
177
|
+
|
|
178
|
+
def offset(self, x: int = 0, y: int = 0):
|
|
179
|
+
"""
|
|
180
|
+
Add a pixel offset to the click action (coordinates start at the top-left corner of the screen).
|
|
181
|
+
Args:
|
|
182
|
+
x (int): Horizontal offset (left to right).
|
|
183
|
+
y (int): Vertical offset (top to bottom).
|
|
184
|
+
Returns:
|
|
185
|
+
BaseAction: BaseAction instance with the updated click offset.
|
|
186
|
+
Usage:
|
|
187
|
+
# click 10 pixels to the right and 20 pixels above the center of the target.
|
|
188
|
+
LeftClick(target="target").offset(x=10, y=-20).do()
|
|
189
|
+
"""
|
|
190
|
+
self.click_offset = [x, y]
|
|
191
|
+
return self
|
|
192
|
+
|
|
193
|
+
@staticmethod
|
|
194
|
+
def _is_path(value: str) -> bool:
|
|
195
|
+
if not os.path.isfile(value):
|
|
196
|
+
return os.path.isfile(value)
|
|
197
|
+
return True
|
|
198
|
+
|
|
199
|
+
def do(self):
|
|
200
|
+
pass
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from backoff._typing import Details
|
|
4
|
+
|
|
5
|
+
from clerk.utils.save_artifact import save_artifact
|
|
6
|
+
from ..client_actor import get_screen
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_MAP = {
|
|
10
|
+
"y": True,
|
|
11
|
+
"yes": True,
|
|
12
|
+
"t": True,
|
|
13
|
+
"true": True,
|
|
14
|
+
"on": True,
|
|
15
|
+
"1": True,
|
|
16
|
+
"n": False,
|
|
17
|
+
"no": False,
|
|
18
|
+
"f": False,
|
|
19
|
+
"false": False,
|
|
20
|
+
"off": False,
|
|
21
|
+
"0": False,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def strtobool(value):
|
|
26
|
+
try:
|
|
27
|
+
return _MAP[str(value).lower()]
|
|
28
|
+
except KeyError:
|
|
29
|
+
raise ValueError('"{}" is not a valid bool value'.format(value))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def save_screenshot(filename: str, sub_folder: Optional[str] = None) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Save a screenshot into the process instance folder.
|
|
35
|
+
|
|
36
|
+
This function retrieves the base64 representation of the screen from the target environment using the 'get_screen' function.
|
|
37
|
+
Then, it saves the screenshot into the process instance folder using the 'save_file_into_instance_folder' function.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
filename (str): The name of the file to save the screenshot as.
|
|
41
|
+
sub_folder (str, optional): The name of the subfolder within the instance folder where the screenshot will be saved. Defaults to None.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: The file path of the saved screenshot.
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
# get the base64 screen from target environment
|
|
48
|
+
screen_b64: str = get_screen()
|
|
49
|
+
return save_artifact(
|
|
50
|
+
filename=filename,
|
|
51
|
+
file_bytes=screen_b64.encode("utf-8"),
|
|
52
|
+
subfolder=sub_folder,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def maybe_engage_operator_ui_action(details: Details) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Makes a call to the operator queue server to create an issue and waits for the allotted time for it to be resolved.
|
|
59
|
+
:param details: A dictionary containing the details of the exception raised (https://pypi.org/project/backoff/)
|
|
60
|
+
:returns: None
|
|
61
|
+
:raises: The exception raised by the action if the issue is not resolved within the allotted time
|
|
62
|
+
"""
|
|
63
|
+
# Determine if the operator should be engaged
|
|
64
|
+
use_operator = strtobool(os.getenv("USE_OPERATOR", default="False"))
|
|
65
|
+
if not use_operator:
|
|
66
|
+
raise details["exception"] # type: ignore
|
|
67
|
+
|
|
68
|
+
raise NotImplementedError("Feature not yet implemented")
|
|
File without changes
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from typing import Dict, Union, List, Tuple, Type, Literal
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
from clerk.gui_automation.client import GUIVisionClerk
|
|
5
|
+
|
|
6
|
+
from ..client_actor.client_actor import get_screen
|
|
7
|
+
from .models import (
|
|
8
|
+
States,
|
|
9
|
+
BaseState,
|
|
10
|
+
ExpectedState,
|
|
11
|
+
TargetWithAnchor,
|
|
12
|
+
Answer,
|
|
13
|
+
ActionString,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Vision(BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Provides methods for interacting with a GUI for UI automation purposes. This class includes methods for finding
|
|
20
|
+
targets on the screen, verifying the GUI's state, answering questions about the screen, classifying the state of
|
|
21
|
+
the GUI, and generating action strings based on prompts.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
response_models: A dictionary mapping task names to their corresponding response model classes.
|
|
25
|
+
use_ocr: A boolean indicating whether OCR should be included in the model call to increase precision with small details.
|
|
26
|
+
image_resolution: A parameter defining the resolution of the image used in the vision model.
|
|
27
|
+
|
|
28
|
+
Methods:
|
|
29
|
+
find_target(target_prompt: str, output_model: Type[TargetWithAnchor] = TargetWithAnchor) -> TargetWithAnchor:
|
|
30
|
+
Finds a target in the current screen based on the provided prompt. Limited to one-word targets.
|
|
31
|
+
|
|
32
|
+
verify_state(possible_states: States, output_model: Type[BaseState] = BaseState) -> BaseState:
|
|
33
|
+
Verifies the current state of the GUI against a set of possible states.
|
|
34
|
+
|
|
35
|
+
answer(question: str, output_model: Type[BaseModel] = Answer) -> Answer:
|
|
36
|
+
Answers a question about the current screen using the specified model for the response.
|
|
37
|
+
|
|
38
|
+
classify_state(possible_states: List[Dict[str, str]], output_model: Type[BaseState] = BaseState) -> Union[BaseModel, Tuple[str, str]]:
|
|
39
|
+
Classifies the current state of the GUI into one of the provided possible states. Returns either a model instance or a tuple of the ID and description.
|
|
40
|
+
|
|
41
|
+
write_action_string(action_prompt: str, output_model: Type[ActionString] = ActionString) -> ActionString:
|
|
42
|
+
Generates an action string based on the provided prompt.
|
|
43
|
+
|
|
44
|
+
Note: Each method that interacts with the screen can optionally include OCR data to improve accuracy, controlled by the `use_ocr` attribute.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
response_models: Dict[str, Type[BaseModel]] = {
|
|
48
|
+
"find_target": TargetWithAnchor,
|
|
49
|
+
"answer": Answer,
|
|
50
|
+
"verify_state": BaseState,
|
|
51
|
+
"classify_state": BaseState,
|
|
52
|
+
"write_action_string": ActionString,
|
|
53
|
+
}
|
|
54
|
+
use_ocr: bool = Field(
|
|
55
|
+
default=False,
|
|
56
|
+
description="Whether OCR of the screen should be included with in the model call (increases precision with "
|
|
57
|
+
"small details).",
|
|
58
|
+
)
|
|
59
|
+
image_resolution: Literal["high", "low"] = "high"
|
|
60
|
+
clerk_client: GUIVisionClerk = GUIVisionClerk()
|
|
61
|
+
|
|
62
|
+
class Config:
|
|
63
|
+
arbitrary_types_allowed = True
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _sort_into_state_class(
|
|
67
|
+
model_response: BaseState, possible_states: States
|
|
68
|
+
) -> BaseState:
|
|
69
|
+
"""
|
|
70
|
+
Sorts a model response into a corresponding state class.
|
|
71
|
+
Args:
|
|
72
|
+
model_response: The response from the model.
|
|
73
|
+
possible_states: A collection of possible states.
|
|
74
|
+
Returns:
|
|
75
|
+
A state class matching the model response.
|
|
76
|
+
"""
|
|
77
|
+
for state_class, state_object in possible_states.possible_states.items():
|
|
78
|
+
if model_response.id == state_object.id:
|
|
79
|
+
return state_class(description=model_response.description)
|
|
80
|
+
# return expected by default
|
|
81
|
+
return ExpectedState(description=model_response.description)
|
|
82
|
+
|
|
83
|
+
def find_target(
|
|
84
|
+
self,
|
|
85
|
+
target_prompt: str,
|
|
86
|
+
output_model: Type[TargetWithAnchor] = TargetWithAnchor,
|
|
87
|
+
) -> TargetWithAnchor:
|
|
88
|
+
"""
|
|
89
|
+
Finds a target in the current screen. Currently limited to one word targets.
|
|
90
|
+
Args:
|
|
91
|
+
target_prompt: The prompt for the target to find.
|
|
92
|
+
output_model: The model to use for the response. If not provided, the default model for the task will be used.
|
|
93
|
+
Returns:
|
|
94
|
+
TargetWithAnchor object with the response from the model. Access the target with the "target" attribute.
|
|
95
|
+
"""
|
|
96
|
+
screen_b64 = get_screen()
|
|
97
|
+
|
|
98
|
+
target = self.clerk_client.find_target(
|
|
99
|
+
screen_b64,
|
|
100
|
+
self.use_ocr,
|
|
101
|
+
target_prompt,
|
|
102
|
+
)
|
|
103
|
+
assert isinstance(target, output_model)
|
|
104
|
+
return target
|
|
105
|
+
|
|
106
|
+
def verify_state(
|
|
107
|
+
self, possible_states: States, output_model: Type[BaseState] = BaseState
|
|
108
|
+
) -> BaseState:
|
|
109
|
+
"""
|
|
110
|
+
Verifies the current state of the GUI.
|
|
111
|
+
Args:
|
|
112
|
+
possible_states: The possible states of the GUI (State class incl. screen examples).
|
|
113
|
+
output_model: The model to use for the response. If not provided, the default model for the task will be used.
|
|
114
|
+
Returns:
|
|
115
|
+
The current state of the GUI (BaseState or a subclass of BaseState)
|
|
116
|
+
"""
|
|
117
|
+
screen_b64 = get_screen()
|
|
118
|
+
state = self.verify_state(
|
|
119
|
+
screen_b64,
|
|
120
|
+
self.use_ocr,
|
|
121
|
+
possible_states,
|
|
122
|
+
)
|
|
123
|
+
assert isinstance(state, BaseState)
|
|
124
|
+
sorted_state = self._sort_into_state_class(state, possible_states)
|
|
125
|
+
return sorted_state
|
|
126
|
+
|
|
127
|
+
def answer(
|
|
128
|
+
self, question: str, output_model: Type[BaseModel] = Answer
|
|
129
|
+
) -> BaseModel:
|
|
130
|
+
"""
|
|
131
|
+
Answers a question about the current screen.
|
|
132
|
+
Args:
|
|
133
|
+
question: The question to ask about the current screen.
|
|
134
|
+
output_model: The model to use for the response. If not provided, the default model for the task will be used.
|
|
135
|
+
Returns:
|
|
136
|
+
Answer object with the response from the model. Access the text with the "answer" attribute.
|
|
137
|
+
"""
|
|
138
|
+
screen_b64 = get_screen()
|
|
139
|
+
answer = self.clerk_client.answer(
|
|
140
|
+
screen_b64, self.use_ocr, question, output_model
|
|
141
|
+
)
|
|
142
|
+
assert isinstance(answer, output_model)
|
|
143
|
+
return answer
|
|
144
|
+
|
|
145
|
+
def classify_state(
|
|
146
|
+
self,
|
|
147
|
+
possible_states: List[Dict[str, str]],
|
|
148
|
+
output_model: Type[BaseState] = BaseState,
|
|
149
|
+
) -> Union[BaseModel, Tuple[str, str]]:
|
|
150
|
+
"""
|
|
151
|
+
Classify the current state of the GUI into one of the provided classes.
|
|
152
|
+
Args:
|
|
153
|
+
possible_states: The possible states of the GUI.
|
|
154
|
+
output_model: The model to use for the response.
|
|
155
|
+
Returns:
|
|
156
|
+
The current state of the GUI (BaseState class if an output model was provided; access class key with the "id" attribute), otherwise Tuple of the id and description of the default model.
|
|
157
|
+
"""
|
|
158
|
+
screen_b64 = get_screen()
|
|
159
|
+
state = self.clerk_client.classify_state(
|
|
160
|
+
screen_b64, self.use_ocr, possible_states
|
|
161
|
+
)
|
|
162
|
+
# if output_model is provided, return the model, otherwise return the id and description of the default model
|
|
163
|
+
if output_model is not None:
|
|
164
|
+
return state
|
|
165
|
+
assert isinstance(state, BaseState)
|
|
166
|
+
return state.id, state.description
|
|
167
|
+
|
|
168
|
+
def write_action_string(
|
|
169
|
+
self, action_prompt: str, output_model: Type[ActionString] = ActionString
|
|
170
|
+
) -> ActionString:
|
|
171
|
+
"""
|
|
172
|
+
Writes an action string based on the provided prompt.
|
|
173
|
+
Args:
|
|
174
|
+
action_prompt: The prompt for the action to write.
|
|
175
|
+
output_model: The model to use for the response.
|
|
176
|
+
Returns:
|
|
177
|
+
The action string.
|
|
178
|
+
"""
|
|
179
|
+
screen_b64 = get_screen()
|
|
180
|
+
action_string = self.clerk_client.write_action_string(
|
|
181
|
+
screen_b64, self.use_ocr, action_prompt
|
|
182
|
+
)
|
|
183
|
+
assert isinstance(action_string, ActionString)
|
|
184
|
+
return action_string
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from pydantic import BaseModel, field_validator, model_validator
|
|
2
|
+
from typing import List, Dict, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BaseState(BaseModel):
|
|
6
|
+
"""
|
|
7
|
+
BaseState class represents a base state for an application.
|
|
8
|
+
|
|
9
|
+
Attributes:
|
|
10
|
+
id (str): The ID of the state.
|
|
11
|
+
description (str): The description of the state.
|
|
12
|
+
screenshots (List): A list of dictionaries representing the screenshots associated with the state.
|
|
13
|
+
|
|
14
|
+
Methods:
|
|
15
|
+
add_screenshot(bucket_name: str, file_name: str) -> None:
|
|
16
|
+
Adds a screenshot to the state.
|
|
17
|
+
|
|
18
|
+
get_screenshots_urls() -> List[str]:
|
|
19
|
+
Returns a list of presigned URLs for the screenshots associated with the state.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
id: str
|
|
23
|
+
description: str
|
|
24
|
+
screenshots: List = []
|
|
25
|
+
|
|
26
|
+
def add_screenshot(self, bucket_name: str, file_name: str):
|
|
27
|
+
self.screenshots.append({"bucket_name": bucket_name, "file_name": file_name})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LoadingState(BaseState):
|
|
31
|
+
"""
|
|
32
|
+
LoadingState class represents a loading state for an application.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
id (str): The ID of the loading state.
|
|
36
|
+
description (str): The description of the loading state.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
id: str = "loading"
|
|
41
|
+
description: str = (
|
|
42
|
+
"the application is loading. Typically indicated by a spinner or progress bar, greyed out UI"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ErrorState(BaseState):
|
|
47
|
+
"""
|
|
48
|
+
ErrorState class represents an error state for an application.
|
|
49
|
+
|
|
50
|
+
Attributes:
|
|
51
|
+
id (str): The ID of the error state.
|
|
52
|
+
description (str): The description of the error state.
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
id: str = "error"
|
|
57
|
+
description: str = (
|
|
58
|
+
"the application is in an error state. Typically indicated by an error message, or a red banner"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ExpectedState(BaseState):
|
|
63
|
+
"""
|
|
64
|
+
ExpectedState class represents an expected state for an application.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
id (str): The ID of the state.
|
|
68
|
+
description (str): The description of the state.
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
id: str = "expected"
|
|
73
|
+
description: str = (
|
|
74
|
+
"the application is in an expected state, as in the provided screenshot"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class States(BaseModel):
|
|
79
|
+
"""
|
|
80
|
+
States class represents a collection of states for an application.
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
possible_states (Dict[type, BaseState]): A dictionary mapping state types to their corresponding instances.
|
|
84
|
+
bucket_name (str): The name of the bucket where screenshots are stored.
|
|
85
|
+
process_name (str): The name of the process associated with the states.
|
|
86
|
+
|
|
87
|
+
Methods:
|
|
88
|
+
add_screenshot(state: type[BaseState], file_name: str) -> None:
|
|
89
|
+
Adds a screenshot to the specified state.
|
|
90
|
+
|
|
91
|
+
add_description(state: type[BaseState], description: str) -> None:
|
|
92
|
+
Updates the description of the specified state.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
possible_states: Dict[type, BaseState] = {
|
|
96
|
+
LoadingState: LoadingState(),
|
|
97
|
+
ErrorState: ErrorState(),
|
|
98
|
+
ExpectedState: ExpectedState(),
|
|
99
|
+
}
|
|
100
|
+
bucket_name: str
|
|
101
|
+
process_name: str
|
|
102
|
+
|
|
103
|
+
def add_screenshot(self, state_type: type[BaseState], file_name: str):
|
|
104
|
+
state = self.possible_states.get(state_type)
|
|
105
|
+
if state is None:
|
|
106
|
+
raise ValueError("state is not found in possible states")
|
|
107
|
+
state.add_screenshot(
|
|
108
|
+
bucket_name=f"{self.bucket_name}",
|
|
109
|
+
file_name=f"{self.process_name}/{file_name}",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def add_description(self, state_type: type[BaseState], description: str):
|
|
113
|
+
state = self.possible_states.get(state_type)
|
|
114
|
+
if state is None:
|
|
115
|
+
raise ValueError("state is not found in possible states")
|
|
116
|
+
state.description = description
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class TargetWithAnchor(BaseModel):
|
|
120
|
+
"""
|
|
121
|
+
TargetWithAnchor class represents a target with an anchor for an application.
|
|
122
|
+
|
|
123
|
+
Attributes:
|
|
124
|
+
target (str): The target element or object.
|
|
125
|
+
anchor (str): The anchor element or object that the target is related to. Default is an empty string.
|
|
126
|
+
relation (str): The relation between the target and the anchor. Default is an empty string.
|
|
127
|
+
|
|
128
|
+
Methods:
|
|
129
|
+
retain_one_word(v: str) -> str:
|
|
130
|
+
Validator function that retains only the last word of the target string.
|
|
131
|
+
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
target: str
|
|
135
|
+
anchor: str = ""
|
|
136
|
+
relation: str = ""
|
|
137
|
+
|
|
138
|
+
@field_validator("target", mode="before")
|
|
139
|
+
@classmethod
|
|
140
|
+
def retain_one_word(cls, v):
|
|
141
|
+
return v.split(" ")[-1]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Answer(BaseModel):
|
|
145
|
+
"""
|
|
146
|
+
Answer class represents the result of an operation or a response to a question.
|
|
147
|
+
|
|
148
|
+
Attributes:
|
|
149
|
+
answer (str): The answer or response.
|
|
150
|
+
success (bool): Indicates whether the operation was successful or not.
|
|
151
|
+
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
answer: str
|
|
155
|
+
success: bool
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class ActionString(BaseModel):
|
|
159
|
+
"""
|
|
160
|
+
ActionString class represents a string that represents an action in an application.
|
|
161
|
+
|
|
162
|
+
Attributes:
|
|
163
|
+
action_string (str): The string representation of the action.
|
|
164
|
+
comment (str, optional): An optional comment or description for the action.
|
|
165
|
+
|
|
166
|
+
Methods:
|
|
167
|
+
ensure_format(v: str) -> str:
|
|
168
|
+
Validator function that ensures the action string has the correct format.
|
|
169
|
+
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
action_string: str
|
|
173
|
+
comment: Optional[str] = None
|
|
174
|
+
|
|
175
|
+
@field_validator("action_string", mode="before")
|
|
176
|
+
@classmethod
|
|
177
|
+
def ensure_format(cls, v):
|
|
178
|
+
if not isinstance(v, str):
|
|
179
|
+
raise ValueError("Action string must be a string")
|
|
180
|
+
if not v.startswith("LeftClick") and not v.startswith("NoAction"):
|
|
181
|
+
raise ValueError("Action string must start with 'LeftClick' or 'NoAction'")
|
|
182
|
+
if not v.endswith(".do()") and not v.startswith("NoAction"):
|
|
183
|
+
raise ValueError("Action string must end with '.do()'")
|
|
184
|
+
return v
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .state_machine import ScreenPilot
|
|
2
|
+
from .decorators import state, transition, rollback
|
|
3
|
+
from .exceptions import (
|
|
4
|
+
BusinessException,
|
|
5
|
+
ScreenPilotException,
|
|
6
|
+
SuccessfulCompletion,
|
|
7
|
+
RollbackCompleted,
|
|
8
|
+
ScreenPilotOutcome,
|
|
9
|
+
CourseCorrectionImpossible,
|
|
10
|
+
complete_ui_automation,
|
|
11
|
+
)
|