optexity 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optexity/cli.py +1 -1
- optexity/examples/__init__.py +0 -0
- optexity/examples/add_example.py +88 -0
- optexity/examples/download_pdf_url.py +29 -0
- optexity/examples/extract_price_stockanalysis.py +44 -0
- optexity/examples/file_upload.py +59 -0
- optexity/examples/i94.py +126 -0
- optexity/examples/i94_travel_history.py +126 -0
- optexity/examples/peachstate_medicaid.py +201 -0
- optexity/examples/supabase_login.py +75 -0
- optexity/inference/__init__.py +0 -0
- optexity/inference/agents/__init__.py +0 -0
- optexity/inference/agents/error_handler/__init__.py +0 -0
- optexity/inference/agents/error_handler/error_handler.py +39 -0
- optexity/inference/agents/error_handler/prompt.py +60 -0
- optexity/inference/agents/index_prediction/__init__.py +0 -0
- optexity/inference/agents/index_prediction/action_prediction_locator_axtree.py +45 -0
- optexity/inference/agents/index_prediction/prompt.py +14 -0
- optexity/inference/agents/select_value_prediction/__init__.py +0 -0
- optexity/inference/agents/select_value_prediction/prompt.py +20 -0
- optexity/inference/agents/select_value_prediction/select_value_prediction.py +39 -0
- optexity/inference/agents/two_fa_extraction/__init__.py +0 -0
- optexity/inference/agents/two_fa_extraction/prompt.py +23 -0
- optexity/inference/agents/two_fa_extraction/two_fa_extraction.py +47 -0
- optexity/inference/child_process.py +251 -0
- optexity/inference/core/__init__.py +0 -0
- optexity/inference/core/interaction/__init__.py +0 -0
- optexity/inference/core/interaction/handle_agentic_task.py +79 -0
- optexity/inference/core/interaction/handle_check.py +57 -0
- optexity/inference/core/interaction/handle_click.py +79 -0
- optexity/inference/core/interaction/handle_command.py +261 -0
- optexity/inference/core/interaction/handle_input.py +76 -0
- optexity/inference/core/interaction/handle_keypress.py +16 -0
- optexity/inference/core/interaction/handle_select.py +109 -0
- optexity/inference/core/interaction/handle_select_utils.py +132 -0
- optexity/inference/core/interaction/handle_upload.py +59 -0
- optexity/inference/core/interaction/utils.py +81 -0
- optexity/inference/core/logging.py +406 -0
- optexity/inference/core/run_assertion.py +55 -0
- optexity/inference/core/run_automation.py +463 -0
- optexity/inference/core/run_extraction.py +240 -0
- optexity/inference/core/run_interaction.py +254 -0
- optexity/inference/core/run_python_script.py +20 -0
- optexity/inference/core/run_two_fa.py +120 -0
- optexity/inference/core/two_factor_auth/__init__.py +0 -0
- optexity/inference/infra/__init__.py +0 -0
- optexity/inference/infra/browser.py +455 -0
- optexity/inference/infra/browser_extension.py +20 -0
- optexity/inference/models/__init__.py +22 -0
- optexity/inference/models/gemini.py +113 -0
- optexity/inference/models/human.py +20 -0
- optexity/inference/models/llm_model.py +210 -0
- optexity/inference/run_local.py +200 -0
- optexity/schema/__init__.py +0 -0
- optexity/schema/actions/__init__.py +0 -0
- optexity/schema/actions/assertion_action.py +66 -0
- optexity/schema/actions/extraction_action.py +143 -0
- optexity/schema/actions/interaction_action.py +330 -0
- optexity/schema/actions/misc_action.py +18 -0
- optexity/schema/actions/prompts.py +27 -0
- optexity/schema/actions/two_fa_action.py +24 -0
- optexity/schema/automation.py +432 -0
- optexity/schema/callback.py +16 -0
- optexity/schema/inference.py +87 -0
- optexity/schema/memory.py +100 -0
- optexity/schema/task.py +212 -0
- optexity/schema/token_usage.py +48 -0
- optexity/utils/__init__.py +0 -0
- optexity/utils/settings.py +54 -0
- optexity/utils/utils.py +76 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/METADATA +20 -36
- optexity-0.1.4.dist-info/RECORD +80 -0
- optexity-0.1.2.dist-info/RECORD +0 -11
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/WHEEL +0 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/entry_points.txt +0 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from playwright.async_api import Locator
|
|
5
|
+
|
|
6
|
+
from optexity.exceptions import AssertLocatorPresenceException
|
|
7
|
+
from optexity.inference.core.interaction.handle_select_utils import (
|
|
8
|
+
SelectOptionValue,
|
|
9
|
+
smart_select,
|
|
10
|
+
)
|
|
11
|
+
from optexity.inference.core.interaction.utils import handle_download
|
|
12
|
+
from optexity.inference.infra.browser import Browser
|
|
13
|
+
from optexity.schema.actions.interaction_action import (
|
|
14
|
+
CheckAction,
|
|
15
|
+
ClickElementAction,
|
|
16
|
+
InputTextAction,
|
|
17
|
+
SelectOptionAction,
|
|
18
|
+
UncheckAction,
|
|
19
|
+
UploadFileAction,
|
|
20
|
+
)
|
|
21
|
+
from optexity.schema.memory import BrowserState, Memory
|
|
22
|
+
from optexity.schema.task import Task
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def command_based_action_with_retry(
|
|
28
|
+
action: (
|
|
29
|
+
ClickElementAction
|
|
30
|
+
| InputTextAction
|
|
31
|
+
| SelectOptionAction
|
|
32
|
+
| CheckAction
|
|
33
|
+
| UploadFileAction
|
|
34
|
+
| UncheckAction
|
|
35
|
+
),
|
|
36
|
+
browser: Browser,
|
|
37
|
+
memory: Memory,
|
|
38
|
+
task: Task,
|
|
39
|
+
max_tries: int,
|
|
40
|
+
max_timeout_seconds_per_try: float,
|
|
41
|
+
):
|
|
42
|
+
|
|
43
|
+
if action.command is None or action.skip_command:
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
last_error = None
|
|
47
|
+
|
|
48
|
+
logger.debug(f"Executing command-based action: {action.__class__.__name__}")
|
|
49
|
+
|
|
50
|
+
for try_index in range(max_tries):
|
|
51
|
+
last_error = None
|
|
52
|
+
try:
|
|
53
|
+
# https://playwright.dev/docs/actionability
|
|
54
|
+
locator = await browser.get_locator_from_command(action.command)
|
|
55
|
+
if try_index == 0:
|
|
56
|
+
try:
|
|
57
|
+
await locator.wait_for(
|
|
58
|
+
state="visible", timeout=max_timeout_seconds_per_try * 1000
|
|
59
|
+
)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
pass
|
|
62
|
+
is_visible = await locator.is_visible()
|
|
63
|
+
|
|
64
|
+
if is_visible:
|
|
65
|
+
browser_state_summary = await browser.get_browser_state_summary()
|
|
66
|
+
memory.browser_states[-1] = BrowserState(
|
|
67
|
+
url=browser_state_summary.url,
|
|
68
|
+
screenshot=browser_state_summary.screenshot,
|
|
69
|
+
title=browser_state_summary.title,
|
|
70
|
+
axtree=browser_state_summary.dom_state.llm_representation(),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if isinstance(action, ClickElementAction):
|
|
74
|
+
await click_locator(
|
|
75
|
+
action,
|
|
76
|
+
locator,
|
|
77
|
+
browser,
|
|
78
|
+
memory,
|
|
79
|
+
task,
|
|
80
|
+
max_timeout_seconds_per_try,
|
|
81
|
+
)
|
|
82
|
+
elif isinstance(action, InputTextAction):
|
|
83
|
+
await input_text_locator(
|
|
84
|
+
action, locator, max_timeout_seconds_per_try
|
|
85
|
+
)
|
|
86
|
+
elif isinstance(action, SelectOptionAction):
|
|
87
|
+
await select_option_locator(
|
|
88
|
+
action,
|
|
89
|
+
locator,
|
|
90
|
+
browser,
|
|
91
|
+
memory,
|
|
92
|
+
task,
|
|
93
|
+
max_timeout_seconds_per_try,
|
|
94
|
+
)
|
|
95
|
+
elif isinstance(action, CheckAction):
|
|
96
|
+
await check_locator(
|
|
97
|
+
action, locator, max_timeout_seconds_per_try, browser
|
|
98
|
+
)
|
|
99
|
+
elif isinstance(action, UncheckAction):
|
|
100
|
+
await uncheck_locator(
|
|
101
|
+
action, locator, max_timeout_seconds_per_try, browser
|
|
102
|
+
)
|
|
103
|
+
elif isinstance(action, UploadFileAction):
|
|
104
|
+
await upload_file_locator(action, locator)
|
|
105
|
+
logger.debug(
|
|
106
|
+
f"{action.__class__.__name__} successful on try {try_index + 1}"
|
|
107
|
+
)
|
|
108
|
+
return
|
|
109
|
+
else:
|
|
110
|
+
await asyncio.sleep(max_timeout_seconds_per_try)
|
|
111
|
+
last_error = f"error: locator not visible"
|
|
112
|
+
except Exception as e:
|
|
113
|
+
last_error = f"error: {e}"
|
|
114
|
+
await asyncio.sleep(max_timeout_seconds_per_try)
|
|
115
|
+
|
|
116
|
+
if last_error is None:
|
|
117
|
+
last_error = "error in executing command"
|
|
118
|
+
logger.debug(
|
|
119
|
+
f"{action.__class__.__name__} failed after {max_tries} tries: {last_error}"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if last_error and action.assert_locator_presence:
|
|
123
|
+
logger.debug(
|
|
124
|
+
f"Error in {action.__class__.__name__} with assert_locator_presence: {action.__class__.__name__}: {last_error}"
|
|
125
|
+
)
|
|
126
|
+
raise AssertLocatorPresenceException(
|
|
127
|
+
message=f"Error in {action.__class__.__name__} with assert_locator_presence: {action.__class__.__name__}",
|
|
128
|
+
original_error=last_error,
|
|
129
|
+
command=action.command,
|
|
130
|
+
)
|
|
131
|
+
return last_error
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
async def click_locator(
|
|
135
|
+
click_element_action: ClickElementAction,
|
|
136
|
+
locator: Locator,
|
|
137
|
+
browser: Browser,
|
|
138
|
+
memory: Memory,
|
|
139
|
+
task: Task,
|
|
140
|
+
max_timeout_seconds_per_try: float,
|
|
141
|
+
):
|
|
142
|
+
async def _actual_click():
|
|
143
|
+
|
|
144
|
+
if click_element_action.double_click:
|
|
145
|
+
await locator.dblclick(
|
|
146
|
+
no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
await locator.click(
|
|
150
|
+
no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if click_element_action.expect_download:
|
|
154
|
+
await handle_download(
|
|
155
|
+
_actual_click, memory, browser, task, click_element_action.download_filename
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
await _actual_click()
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
async def input_text_locator(
|
|
162
|
+
input_text_action: InputTextAction,
|
|
163
|
+
locator: Locator,
|
|
164
|
+
max_timeout_seconds_per_try: float,
|
|
165
|
+
):
|
|
166
|
+
|
|
167
|
+
if input_text_action.fill_or_type == "fill":
|
|
168
|
+
await locator.fill(
|
|
169
|
+
input_text_action.input_text,
|
|
170
|
+
no_wait_after=True,
|
|
171
|
+
timeout=max_timeout_seconds_per_try * 1000,
|
|
172
|
+
)
|
|
173
|
+
else:
|
|
174
|
+
await locator.type(
|
|
175
|
+
input_text_action.input_text,
|
|
176
|
+
no_wait_after=True,
|
|
177
|
+
timeout=max_timeout_seconds_per_try * 1000,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
if input_text_action.press_enter:
|
|
181
|
+
await locator.press("Enter")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
async def check_locator(
|
|
185
|
+
action: CheckAction,
|
|
186
|
+
locator: Locator,
|
|
187
|
+
max_timeout_seconds_per_try: float,
|
|
188
|
+
browser: Browser,
|
|
189
|
+
):
|
|
190
|
+
await locator.uncheck(
|
|
191
|
+
no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
|
|
192
|
+
)
|
|
193
|
+
await asyncio.sleep(1)
|
|
194
|
+
locator = await browser.get_locator_from_command(action.command)
|
|
195
|
+
await locator.check(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
async def uncheck_locator(
|
|
199
|
+
action: UncheckAction,
|
|
200
|
+
locator: Locator,
|
|
201
|
+
max_timeout_seconds_per_try: float,
|
|
202
|
+
browser: Browser,
|
|
203
|
+
):
|
|
204
|
+
await locator.check(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
|
|
205
|
+
await asyncio.sleep(1)
|
|
206
|
+
locator = await browser.get_locator_from_command(action.command)
|
|
207
|
+
await locator.uncheck(
|
|
208
|
+
no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
async def upload_file_locator(upload_file_action: UploadFileAction, locator: Locator):
|
|
213
|
+
await locator.set_input_files(upload_file_action.file_path)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def select_option_locator(
|
|
217
|
+
select_option_action: SelectOptionAction,
|
|
218
|
+
locator: Locator,
|
|
219
|
+
browser: Browser,
|
|
220
|
+
memory: Memory,
|
|
221
|
+
task: Task,
|
|
222
|
+
max_timeout_seconds_per_try: float,
|
|
223
|
+
):
|
|
224
|
+
async def _actual_select_option():
|
|
225
|
+
options: list[dict[str, str]] = await locator.evaluate(
|
|
226
|
+
"""
|
|
227
|
+
sel => Array.from(sel.options).map(o => ({
|
|
228
|
+
value: o.value,
|
|
229
|
+
label: o.label || o.textContent
|
|
230
|
+
}))
|
|
231
|
+
"""
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
select_option_values = [
|
|
235
|
+
SelectOptionValue(value=o["value"], label=o["label"]) for o in options
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
matched_values = await smart_select(
|
|
239
|
+
select_option_values, options, select_option_action.select_values, memory
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
logger.debug(
|
|
243
|
+
f"Matched values for {select_option_action.command}: {matched_values}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
await locator.select_option(
|
|
247
|
+
matched_values,
|
|
248
|
+
no_wait_after=True,
|
|
249
|
+
timeout=max_timeout_seconds_per_try * 1000,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if select_option_action.expect_download:
|
|
253
|
+
await handle_download(
|
|
254
|
+
_actual_select_option,
|
|
255
|
+
memory,
|
|
256
|
+
browser,
|
|
257
|
+
task,
|
|
258
|
+
select_option_action.download_filename,
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
await _actual_select_option()
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from optexity.inference.core.interaction.handle_command import (
|
|
5
|
+
command_based_action_with_retry,
|
|
6
|
+
)
|
|
7
|
+
from optexity.inference.core.interaction.utils import get_index_from_prompt
|
|
8
|
+
from optexity.inference.infra.browser import Browser
|
|
9
|
+
from optexity.schema.actions.interaction_action import InputTextAction
|
|
10
|
+
from optexity.schema.memory import Memory
|
|
11
|
+
from optexity.schema.task import Task
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def handle_input_text(
|
|
17
|
+
input_text_action: InputTextAction,
|
|
18
|
+
task: Task,
|
|
19
|
+
memory: Memory,
|
|
20
|
+
browser: Browser,
|
|
21
|
+
max_timeout_seconds_per_try: float,
|
|
22
|
+
max_tries: int,
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
# {some english chars [0]}
|
|
26
|
+
INT_INDEX_PATTERN = re.compile(r"^\{([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]\}$")
|
|
27
|
+
|
|
28
|
+
if INT_INDEX_PATTERN.match(input_text_action.input_text) is not None:
|
|
29
|
+
logger.debug(
|
|
30
|
+
"Skipping input text because input variable was not present for this step"
|
|
31
|
+
)
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
if input_text_action.command and not input_text_action.skip_command:
|
|
35
|
+
last_error = await command_based_action_with_retry(
|
|
36
|
+
input_text_action,
|
|
37
|
+
browser,
|
|
38
|
+
memory,
|
|
39
|
+
task,
|
|
40
|
+
max_tries,
|
|
41
|
+
max_timeout_seconds_per_try,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if last_error is None:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
if not input_text_action.skip_prompt:
|
|
48
|
+
logger.debug(
|
|
49
|
+
f"Executing prompt-based action: {input_text_action.__class__.__name__}"
|
|
50
|
+
)
|
|
51
|
+
await input_text_index(input_text_action, browser, memory)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def input_text_index(
|
|
55
|
+
input_text_action: InputTextAction, browser: Browser, memory: Memory
|
|
56
|
+
):
|
|
57
|
+
try:
|
|
58
|
+
index = await get_index_from_prompt(
|
|
59
|
+
memory, input_text_action.prompt_instructions, browser
|
|
60
|
+
)
|
|
61
|
+
if index is None:
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
action_model = browser.backend_agent.ActionModel(
|
|
65
|
+
**{
|
|
66
|
+
"input": {
|
|
67
|
+
"index": int(index),
|
|
68
|
+
"text": input_text_action.input_text,
|
|
69
|
+
"clear": True,
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
)
|
|
73
|
+
await browser.backend_agent.multi_act([action_model])
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.error(f"Error in input_text_index: {e}")
|
|
76
|
+
return
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from optexity.inference.infra.browser import Browser
|
|
2
|
+
from optexity.schema.actions.interaction_action import KeyPressAction, KeyPressType
|
|
3
|
+
from optexity.schema.memory import Memory
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def handle_key_press(
|
|
7
|
+
keypress_action: KeyPressAction,
|
|
8
|
+
memory: Memory,
|
|
9
|
+
browser: Browser,
|
|
10
|
+
):
|
|
11
|
+
page = await browser.get_current_page()
|
|
12
|
+
if page is None:
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
if keypress_action.type == KeyPressType.ENTER:
|
|
16
|
+
await page.keyboard.press("Enter")
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from browser_use.dom.serializer.serializer import DOMTreeSerializer
|
|
4
|
+
|
|
5
|
+
from optexity.inference.core.interaction.handle_command import (
|
|
6
|
+
command_based_action_with_retry,
|
|
7
|
+
)
|
|
8
|
+
from optexity.inference.core.interaction.handle_select_utils import (
|
|
9
|
+
SelectOptionValue,
|
|
10
|
+
smart_select,
|
|
11
|
+
)
|
|
12
|
+
from optexity.inference.core.interaction.utils import (
|
|
13
|
+
get_index_from_prompt,
|
|
14
|
+
handle_download,
|
|
15
|
+
)
|
|
16
|
+
from optexity.inference.infra.browser import Browser
|
|
17
|
+
from optexity.schema.actions.interaction_action import SelectOptionAction
|
|
18
|
+
from optexity.schema.memory import Memory
|
|
19
|
+
from optexity.schema.task import Task
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def handle_select_option(
|
|
25
|
+
select_option_action: SelectOptionAction,
|
|
26
|
+
task: Task,
|
|
27
|
+
memory: Memory,
|
|
28
|
+
browser: Browser,
|
|
29
|
+
max_timeout_seconds_per_try: float,
|
|
30
|
+
max_tries: int,
|
|
31
|
+
):
|
|
32
|
+
|
|
33
|
+
if select_option_action.command and not select_option_action.skip_command:
|
|
34
|
+
last_error = await command_based_action_with_retry(
|
|
35
|
+
select_option_action,
|
|
36
|
+
browser,
|
|
37
|
+
memory,
|
|
38
|
+
task,
|
|
39
|
+
max_tries,
|
|
40
|
+
max_timeout_seconds_per_try,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if last_error is None:
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
if not select_option_action.skip_prompt:
|
|
47
|
+
logger.debug(
|
|
48
|
+
f"Executing prompt-based action: {select_option_action.__class__.__name__}"
|
|
49
|
+
)
|
|
50
|
+
await select_option_index(select_option_action, browser, memory, task)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def select_option_index(
|
|
54
|
+
select_option_action: SelectOptionAction,
|
|
55
|
+
browser: Browser,
|
|
56
|
+
memory: Memory,
|
|
57
|
+
task: Task,
|
|
58
|
+
):
|
|
59
|
+
## TODO either perfect text match or agenic select value prediction
|
|
60
|
+
try:
|
|
61
|
+
|
|
62
|
+
index = await get_index_from_prompt(
|
|
63
|
+
memory, select_option_action.prompt_instructions, browser
|
|
64
|
+
)
|
|
65
|
+
if index is None:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
node = await browser.backend_agent.browser_session.get_element_by_index(index)
|
|
69
|
+
if node is None:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
select_option_values = DOMTreeSerializer(node)._extract_select_options(node)
|
|
73
|
+
if select_option_values is None:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
all_options = select_option_values["all_options"]
|
|
77
|
+
|
|
78
|
+
all_options = [
|
|
79
|
+
SelectOptionValue(value=o["value"], label=o["text"]) for o in all_options
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
matched_values = await smart_select(
|
|
83
|
+
all_options, select_option_action.select_values, memory
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
async def _actual_select_option():
|
|
87
|
+
action_model = browser.backend_agent.ActionModel(
|
|
88
|
+
**{
|
|
89
|
+
"select_dropdown": {
|
|
90
|
+
"index": int(index),
|
|
91
|
+
"text": matched_values[0],
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
await browser.backend_agent.multi_act([action_model])
|
|
96
|
+
|
|
97
|
+
if select_option_action.expect_download:
|
|
98
|
+
await handle_download(
|
|
99
|
+
_actual_select_option,
|
|
100
|
+
memory,
|
|
101
|
+
browser,
|
|
102
|
+
task,
|
|
103
|
+
select_option_action.download_filename,
|
|
104
|
+
)
|
|
105
|
+
else:
|
|
106
|
+
await _actual_select_option()
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.error(f"Error in select_option_index: {e}")
|
|
109
|
+
return
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from optexity.inference.agents.select_value_prediction.select_value_prediction import (
|
|
7
|
+
SelectValuePredictionAgent,
|
|
8
|
+
)
|
|
9
|
+
from optexity.schema.actions.interaction_action import Locator
|
|
10
|
+
from optexity.schema.memory import Memory
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
select_value_prediction_agent = SelectValuePredictionAgent()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SelectOptionValue(BaseModel):
|
|
17
|
+
value: str
|
|
18
|
+
label: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def llm_select_match(
|
|
22
|
+
options: list[SelectOptionValue], patterns: list[str], memory: Memory
|
|
23
|
+
) -> list[str]:
|
|
24
|
+
final_prompt, response, token_usage = (
|
|
25
|
+
select_value_prediction_agent.predict_select_value(
|
|
26
|
+
[o.model_dump() for o in options], patterns
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
memory.token_usage += token_usage
|
|
30
|
+
memory.browser_states[-1].final_prompt = final_prompt
|
|
31
|
+
memory.browser_states[-1].llm_response = response.model_dump()
|
|
32
|
+
|
|
33
|
+
matched_values = response.matched_values
|
|
34
|
+
|
|
35
|
+
all_values = [o.value for o in options]
|
|
36
|
+
|
|
37
|
+
final_matched_values = []
|
|
38
|
+
for value in matched_values:
|
|
39
|
+
if value in all_values:
|
|
40
|
+
final_matched_values.append(value)
|
|
41
|
+
|
|
42
|
+
return final_matched_values
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def score_match(pat: str, val: str) -> int:
|
|
46
|
+
# higher is better
|
|
47
|
+
if pat == val:
|
|
48
|
+
return 100
|
|
49
|
+
if val.startswith(pat):
|
|
50
|
+
return 80
|
|
51
|
+
if pat in val:
|
|
52
|
+
return 60
|
|
53
|
+
return 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def smart_select(
|
|
57
|
+
options: list[SelectOptionValue], patterns: list[str], memory: Memory
|
|
58
|
+
):
|
|
59
|
+
# Get all options from the <select>
|
|
60
|
+
|
|
61
|
+
matched_values = []
|
|
62
|
+
|
|
63
|
+
for p in patterns:
|
|
64
|
+
# If pattern contains regex characters, treat as regex
|
|
65
|
+
is_regex = p.startswith("^") or p.endswith("$") or ".*" in p
|
|
66
|
+
|
|
67
|
+
## Check if reggex pattern and then try finding the option by value and label
|
|
68
|
+
if is_regex:
|
|
69
|
+
regex = re.compile(p)
|
|
70
|
+
for opt in options:
|
|
71
|
+
if regex.search(opt.value) or regex.search(opt.label):
|
|
72
|
+
matched_values.append(opt.value)
|
|
73
|
+
else:
|
|
74
|
+
# try exact match
|
|
75
|
+
for opt in options:
|
|
76
|
+
if opt.value == p or opt.label == p:
|
|
77
|
+
matched_values.append(opt.value)
|
|
78
|
+
|
|
79
|
+
if len(matched_values) == 0:
|
|
80
|
+
## If no matches, check if all values are unique and try score matching of values
|
|
81
|
+
|
|
82
|
+
processed_values = [
|
|
83
|
+
(v.value.lower().replace(" ", ""), v.value) for v in options
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
if len(processed_values) == len(set(processed_values)):
|
|
87
|
+
for p in patterns:
|
|
88
|
+
processed_pattern = p.lower().replace(" ", "")
|
|
89
|
+
|
|
90
|
+
best_score = 0
|
|
91
|
+
best_value = None
|
|
92
|
+
|
|
93
|
+
for processed_value, value in processed_values:
|
|
94
|
+
score = score_match(processed_pattern, processed_value)
|
|
95
|
+
if score > best_score:
|
|
96
|
+
best_score = score
|
|
97
|
+
best_value = value
|
|
98
|
+
|
|
99
|
+
if best_value is not None and best_score > 0:
|
|
100
|
+
matched_values.append(best_value)
|
|
101
|
+
|
|
102
|
+
if len(matched_values) == 0:
|
|
103
|
+
processed_labels = [
|
|
104
|
+
(v.label.lower().replace(" ", ""), v.label) for v in options
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
if len(processed_labels) == len(set(processed_labels)):
|
|
108
|
+
for p in patterns:
|
|
109
|
+
processed_pattern = p.lower().replace(" ", "")
|
|
110
|
+
|
|
111
|
+
best_score = 0
|
|
112
|
+
best_label = None
|
|
113
|
+
best_value = None
|
|
114
|
+
|
|
115
|
+
for opt in options:
|
|
116
|
+
processed_label = opt.label.lower().replace(" ", "")
|
|
117
|
+
score = score_match(processed_pattern, processed_label)
|
|
118
|
+
if score > best_score:
|
|
119
|
+
best_score = score
|
|
120
|
+
best_label = opt.label
|
|
121
|
+
best_value = opt.value
|
|
122
|
+
|
|
123
|
+
if best_label is not None and best_score > 0:
|
|
124
|
+
matched_values.append(best_value)
|
|
125
|
+
|
|
126
|
+
if len(matched_values) == 0:
|
|
127
|
+
matched_values = llm_select_match(options, patterns, memory)
|
|
128
|
+
|
|
129
|
+
if len(matched_values) == 0:
|
|
130
|
+
matched_values = patterns
|
|
131
|
+
|
|
132
|
+
return matched_values
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from optexity.inference.core.interaction.handle_command import (
|
|
4
|
+
command_based_action_with_retry,
|
|
5
|
+
)
|
|
6
|
+
from optexity.inference.core.interaction.utils import get_index_from_prompt
|
|
7
|
+
from optexity.inference.infra.browser import Browser
|
|
8
|
+
from optexity.schema.actions.interaction_action import UploadFileAction
|
|
9
|
+
from optexity.schema.memory import Memory
|
|
10
|
+
from optexity.schema.task import Task
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def handle_upload_file(
|
|
16
|
+
upload_file_action: UploadFileAction,
|
|
17
|
+
task: Task,
|
|
18
|
+
memory: Memory,
|
|
19
|
+
browser: Browser,
|
|
20
|
+
max_timeout_seconds_per_try: float,
|
|
21
|
+
max_tries: int,
|
|
22
|
+
):
|
|
23
|
+
if upload_file_action.command and not upload_file_action.skip_command:
|
|
24
|
+
last_error = await command_based_action_with_retry(
|
|
25
|
+
upload_file_action,
|
|
26
|
+
browser,
|
|
27
|
+
memory,
|
|
28
|
+
task,
|
|
29
|
+
max_tries,
|
|
30
|
+
max_timeout_seconds_per_try,
|
|
31
|
+
)
|
|
32
|
+
if last_error is None:
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
if not upload_file_action.skip_prompt:
|
|
36
|
+
logger.debug(
|
|
37
|
+
f"Executing prompt-based action: {upload_file_action.__class__.__name__}"
|
|
38
|
+
)
|
|
39
|
+
await upload_file_index(upload_file_action, browser, memory)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
async def upload_file_index(
|
|
43
|
+
upload_file_action: UploadFileAction, browser: Browser, memory: Memory
|
|
44
|
+
):
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
index = await get_index_from_prompt(
|
|
48
|
+
memory, upload_file_action.prompt_instructions, browser
|
|
49
|
+
)
|
|
50
|
+
if index is None:
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
action_model = browser.backend_agent.ActionModel(
|
|
54
|
+
**{"upload_file": {"index": index, "path": upload_file_action.file_path}}
|
|
55
|
+
)
|
|
56
|
+
await browser.backend_agent.multi_act([action_model])
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.error(f"Error in upload_file_index: {e}")
|
|
59
|
+
return
|