optexity 0.1.5__tar.gz → 0.1.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {optexity-0.1.5 → optexity-0.1.5.2}/PKG-INFO +3 -9
- {optexity-0.1.5 → optexity-0.1.5.2}/README.md +2 -8
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/__init__.py +6 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/extract_price_stockanalysis.py +3 -2
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/child_process.py +5 -2
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_click.py +5 -2
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_command.py +27 -4
- optexity-0.1.5.2/optexity/inference/core/interaction/handle_hover.py +83 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_input.py +6 -3
- optexity-0.1.5.2/optexity/inference/core/interaction/handle_keypress.py +42 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_select.py +1 -1
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_select_utils.py +11 -1
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_upload.py +3 -3
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/utils.py +4 -2
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_assertion.py +8 -4
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_automation.py +6 -5
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_extraction.py +50 -4
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_interaction.py +13 -1
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_two_fa.py +12 -7
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/extraction_action.py +9 -3
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/interaction_action.py +38 -4
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/two_fa_action.py +1 -1
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/automation.py +3 -7
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/task.py +6 -2
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/PKG-INFO +3 -9
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/SOURCES.txt +1 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/pyproject.toml +1 -1
- optexity-0.1.5/optexity/inference/core/interaction/handle_keypress.py +0 -16
- {optexity-0.1.5 → optexity-0.1.5.2}/LICENSE +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/cli.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/add_example.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/download_pdf_url.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/file_upload.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/i94.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/i94_travel_history.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/peachstate_medicaid.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/supabase_login.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/exceptions.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/error_handler/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/error_handler/error_handler.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/error_handler/prompt.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/index_prediction/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/index_prediction/action_prediction_locator_axtree.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/index_prediction/prompt.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/select_value_prediction/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/select_value_prediction/prompt.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/select_value_prediction/select_value_prediction.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/two_fa_extraction/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/two_fa_extraction/prompt.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/two_fa_extraction/two_fa_extraction.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_agentic_task.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_check.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/logging.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_python_script.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/two_factor_auth/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/infra/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/infra/browser.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/infra/browser_extension.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/gemini.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/human.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/llm_model.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/run_local.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/onepassword_integration.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/assertion_action.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/misc_action.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/prompts.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/callback.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/inference.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/memory.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/token_usage.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/test.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/utils/__init__.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/utils/settings.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity/utils/utils.py +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/dependency_links.txt +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/entry_points.txt +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/requires.txt +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/top_level.txt +0 -0
- {optexity-0.1.5 → optexity-0.1.5.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: optexity
|
|
3
|
-
Version: 0.1.5
|
|
3
|
+
Version: 0.1.5.2
|
|
4
4
|
Summary: Optexity is a platform for building and running browser and computer agents.
|
|
5
5
|
Author-email: Optexity <founders@optexity.com>
|
|
6
6
|
Requires-Python: >=3.11
|
|
@@ -83,6 +83,7 @@ Install Optexity directly from PyPI:
|
|
|
83
83
|
|
|
84
84
|
```bash
|
|
85
85
|
pip install optexity
|
|
86
|
+
optexity install-browsers
|
|
86
87
|
```
|
|
87
88
|
|
|
88
89
|
**OR**
|
|
@@ -95,6 +96,7 @@ If you want to clone and edit from source:
|
|
|
95
96
|
git clone git@github.com:Optexity/optexity.git
|
|
96
97
|
cd optexity
|
|
97
98
|
pip install -e .
|
|
99
|
+
optexity install-browsers
|
|
98
100
|
```
|
|
99
101
|
|
|
100
102
|
## Set required environment variables:
|
|
@@ -107,14 +109,6 @@ DEPLOYMENT=dev # or "prod" in production
|
|
|
107
109
|
|
|
108
110
|
You can get your free Google Gemini API key from the [Google AI Studio Console](https://aistudio.google.com).
|
|
109
111
|
|
|
110
|
-
## Install required browsers:
|
|
111
|
-
|
|
112
|
-
Install playwright and patchright browsers:
|
|
113
|
-
|
|
114
|
-
```bash
|
|
115
|
-
optexity install-browsers
|
|
116
|
-
```
|
|
117
|
-
|
|
118
112
|
## Recording Your First Automation
|
|
119
113
|
|
|
120
114
|
The fastest way to create an automation is by recording your actions directly in the browser.
|
|
@@ -58,6 +58,7 @@ Install Optexity directly from PyPI:
|
|
|
58
58
|
|
|
59
59
|
```bash
|
|
60
60
|
pip install optexity
|
|
61
|
+
optexity install-browsers
|
|
61
62
|
```
|
|
62
63
|
|
|
63
64
|
**OR**
|
|
@@ -70,6 +71,7 @@ If you want to clone and edit from source:
|
|
|
70
71
|
git clone git@github.com:Optexity/optexity.git
|
|
71
72
|
cd optexity
|
|
72
73
|
pip install -e .
|
|
74
|
+
optexity install-browsers
|
|
73
75
|
```
|
|
74
76
|
|
|
75
77
|
## Set required environment variables:
|
|
@@ -82,14 +84,6 @@ DEPLOYMENT=dev # or "prod" in production
|
|
|
82
84
|
|
|
83
85
|
You can get your free Google Gemini API key from the [Google AI Studio Console](https://aistudio.google.com).
|
|
84
86
|
|
|
85
|
-
## Install required browsers:
|
|
86
|
-
|
|
87
|
-
Install playwright and patchright browsers:
|
|
88
|
-
|
|
89
|
-
```bash
|
|
90
|
-
optexity install-browsers
|
|
91
|
-
```
|
|
92
|
-
|
|
93
87
|
## Recording Your First Automation
|
|
94
88
|
|
|
95
89
|
The fastest way to create an automation is by recording your actions directly in the browser.
|
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import sys
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
6
|
+
try:
|
|
7
|
+
__version__ = version("optexity")
|
|
8
|
+
except PackageNotFoundError:
|
|
9
|
+
__version__ = "0.0.0"
|
|
10
|
+
|
|
5
11
|
logging.basicConfig(
|
|
6
12
|
level=logging.WARNING, # Default level for root logger
|
|
7
13
|
format="%(asctime)s [%(levelname)s] %(name)s.%(funcName)s: %(message)s",
|
|
@@ -23,12 +23,13 @@ automation_json = {
|
|
|
23
23
|
"click_element": {
|
|
24
24
|
"prompt_instructions": "Click on the link with the name of the stock equivalent for {stock_ticker[0]}."
|
|
25
25
|
}
|
|
26
|
-
}
|
|
26
|
+
},
|
|
27
|
+
"before_sleep_time": 1,
|
|
27
28
|
},
|
|
28
29
|
{
|
|
29
30
|
"extraction_action": {
|
|
30
31
|
"llm": {
|
|
31
|
-
"source": ["screenshot"],
|
|
32
|
+
"source": ["screenshot", "axtree"],
|
|
32
33
|
"extraction_format": {
|
|
33
34
|
"stock_name": "str",
|
|
34
35
|
"stock_price": "str",
|
|
@@ -154,7 +154,10 @@ def get_app_with_endpoints(is_aws: bool, child_id: int):
|
|
|
154
154
|
|
|
155
155
|
await task_queue.put(task)
|
|
156
156
|
return JSONResponse(
|
|
157
|
-
content={
|
|
157
|
+
content={
|
|
158
|
+
"success": True,
|
|
159
|
+
"message": "Task has been allocated. Check its status and output at https://dashboard.optexity.com/tasks",
|
|
160
|
+
},
|
|
158
161
|
status_code=202,
|
|
159
162
|
)
|
|
160
163
|
except Exception as e:
|
|
@@ -192,7 +195,7 @@ def get_app_with_endpoints(is_aws: bool, child_id: int):
|
|
|
192
195
|
return JSONResponse(
|
|
193
196
|
content={
|
|
194
197
|
"success": True,
|
|
195
|
-
"message": "Task has been allocated",
|
|
198
|
+
"message": "Task has been allocated. Check its status and output at https://dashboard.optexity.com/tasks",
|
|
196
199
|
"task_id": task.task_id,
|
|
197
200
|
},
|
|
198
201
|
status_code=202,
|
|
@@ -53,14 +53,17 @@ async def click_element_index(
|
|
|
53
53
|
|
|
54
54
|
try:
|
|
55
55
|
index = await get_index_from_prompt(
|
|
56
|
-
memory, click_element_action.prompt_instructions, browser
|
|
56
|
+
memory, click_element_action.prompt_instructions, browser, task
|
|
57
57
|
)
|
|
58
58
|
if index is None:
|
|
59
59
|
return
|
|
60
60
|
|
|
61
61
|
async def _actual_click_element():
|
|
62
|
+
print(
|
|
63
|
+
f"Clicking element with index: {index} and button: {click_element_action.button}"
|
|
64
|
+
)
|
|
62
65
|
action_model = browser.backend_agent.ActionModel(
|
|
63
|
-
**{"click": {"index": index}}
|
|
66
|
+
**{"click": {"index": index, "button": click_element_action.button}}
|
|
64
67
|
)
|
|
65
68
|
await browser.backend_agent.multi_act([action_model])
|
|
66
69
|
|
|
@@ -13,6 +13,7 @@ from optexity.inference.infra.browser import Browser
|
|
|
13
13
|
from optexity.schema.actions.interaction_action import (
|
|
14
14
|
CheckAction,
|
|
15
15
|
ClickElementAction,
|
|
16
|
+
HoverAction,
|
|
16
17
|
InputTextAction,
|
|
17
18
|
SelectOptionAction,
|
|
18
19
|
UncheckAction,
|
|
@@ -32,6 +33,7 @@ async def command_based_action_with_retry(
|
|
|
32
33
|
| CheckAction
|
|
33
34
|
| UploadFileAction
|
|
34
35
|
| UncheckAction
|
|
36
|
+
| HoverAction
|
|
35
37
|
),
|
|
36
38
|
browser: Browser,
|
|
37
39
|
memory: Memory,
|
|
@@ -67,7 +69,9 @@ async def command_based_action_with_retry(
|
|
|
67
69
|
url=browser_state_summary.url,
|
|
68
70
|
screenshot=browser_state_summary.screenshot,
|
|
69
71
|
title=browser_state_summary.title,
|
|
70
|
-
axtree=browser_state_summary.dom_state.llm_representation(
|
|
72
|
+
axtree=browser_state_summary.dom_state.llm_representation(
|
|
73
|
+
remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
|
|
74
|
+
),
|
|
71
75
|
)
|
|
72
76
|
|
|
73
77
|
if isinstance(action, ClickElementAction):
|
|
@@ -81,7 +85,7 @@ async def command_based_action_with_retry(
|
|
|
81
85
|
)
|
|
82
86
|
elif isinstance(action, InputTextAction):
|
|
83
87
|
await input_text_locator(
|
|
84
|
-
action, locator, max_timeout_seconds_per_try
|
|
88
|
+
action, locator, browser, max_timeout_seconds_per_try
|
|
85
89
|
)
|
|
86
90
|
elif isinstance(action, SelectOptionAction):
|
|
87
91
|
await select_option_locator(
|
|
@@ -100,6 +104,8 @@ async def command_based_action_with_retry(
|
|
|
100
104
|
await uncheck_locator(
|
|
101
105
|
action, locator, max_timeout_seconds_per_try, browser
|
|
102
106
|
)
|
|
107
|
+
elif isinstance(action, HoverAction):
|
|
108
|
+
await hover_locator(locator, max_timeout_seconds_per_try)
|
|
103
109
|
elif isinstance(action, UploadFileAction):
|
|
104
110
|
await upload_file_locator(action, locator)
|
|
105
111
|
logger.debug(
|
|
@@ -147,7 +153,9 @@ async def click_locator(
|
|
|
147
153
|
)
|
|
148
154
|
else:
|
|
149
155
|
await locator.click(
|
|
150
|
-
|
|
156
|
+
button=click_element_action.button,
|
|
157
|
+
no_wait_after=True,
|
|
158
|
+
timeout=max_timeout_seconds_per_try * 1000,
|
|
151
159
|
)
|
|
152
160
|
|
|
153
161
|
if click_element_action.expect_download:
|
|
@@ -161,6 +169,7 @@ async def click_locator(
|
|
|
161
169
|
async def input_text_locator(
|
|
162
170
|
input_text_action: InputTextAction,
|
|
163
171
|
locator: Locator,
|
|
172
|
+
browser: Browser,
|
|
164
173
|
max_timeout_seconds_per_try: float,
|
|
165
174
|
):
|
|
166
175
|
|
|
@@ -170,12 +179,19 @@ async def input_text_locator(
|
|
|
170
179
|
no_wait_after=True,
|
|
171
180
|
timeout=max_timeout_seconds_per_try * 1000,
|
|
172
181
|
)
|
|
173
|
-
|
|
182
|
+
elif input_text_action.fill_or_type == "type":
|
|
174
183
|
await locator.type(
|
|
175
184
|
input_text_action.input_text,
|
|
176
185
|
no_wait_after=True,
|
|
177
186
|
timeout=max_timeout_seconds_per_try * 1000,
|
|
178
187
|
)
|
|
188
|
+
else:
|
|
189
|
+
page = await browser.get_current_page()
|
|
190
|
+
if page is None:
|
|
191
|
+
return
|
|
192
|
+
for char in input_text_action.input_text:
|
|
193
|
+
await page.keyboard.press(char)
|
|
194
|
+
await asyncio.sleep(0.1)
|
|
179
195
|
|
|
180
196
|
if input_text_action.press_enter:
|
|
181
197
|
await locator.press("Enter")
|
|
@@ -209,6 +225,13 @@ async def uncheck_locator(
|
|
|
209
225
|
)
|
|
210
226
|
|
|
211
227
|
|
|
228
|
+
async def hover_locator(
|
|
229
|
+
locator: Locator,
|
|
230
|
+
max_timeout_seconds_per_try: float,
|
|
231
|
+
):
|
|
232
|
+
await locator.hover(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
|
|
233
|
+
|
|
234
|
+
|
|
212
235
|
async def upload_file_locator(upload_file_action: UploadFileAction, locator: Locator):
|
|
213
236
|
await locator.set_input_files(upload_file_action.file_path)
|
|
214
237
|
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from optexity.inference.core.interaction.handle_command import (
|
|
4
|
+
command_based_action_with_retry,
|
|
5
|
+
)
|
|
6
|
+
from optexity.inference.core.interaction.utils import get_index_from_prompt
|
|
7
|
+
from optexity.inference.infra.browser import Browser
|
|
8
|
+
from optexity.schema.actions.interaction_action import HoverAction
|
|
9
|
+
from optexity.schema.memory import Memory
|
|
10
|
+
from optexity.schema.task import Task
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def handle_hover_element(
|
|
16
|
+
hover_element_action: HoverAction,
|
|
17
|
+
task: Task,
|
|
18
|
+
memory: Memory,
|
|
19
|
+
browser: Browser,
|
|
20
|
+
max_timeout_seconds_per_try: float,
|
|
21
|
+
max_tries: int,
|
|
22
|
+
):
|
|
23
|
+
|
|
24
|
+
if hover_element_action.command and not hover_element_action.skip_command:
|
|
25
|
+
last_error = await command_based_action_with_retry(
|
|
26
|
+
hover_element_action,
|
|
27
|
+
browser,
|
|
28
|
+
memory,
|
|
29
|
+
task,
|
|
30
|
+
max_tries,
|
|
31
|
+
max_timeout_seconds_per_try,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if last_error is None:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
if not hover_element_action.skip_prompt:
|
|
38
|
+
logger.debug(
|
|
39
|
+
f"Executing prompt-based action: {hover_element_action.__class__.__name__}"
|
|
40
|
+
)
|
|
41
|
+
await hover_element_index(hover_element_action, browser, memory, task)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def hover_element_index(
|
|
45
|
+
hover_element_action: HoverAction,
|
|
46
|
+
browser: Browser,
|
|
47
|
+
memory: Memory,
|
|
48
|
+
task: Task,
|
|
49
|
+
):
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
index = await get_index_from_prompt(
|
|
53
|
+
memory, hover_element_action.prompt_instructions, browser, task
|
|
54
|
+
)
|
|
55
|
+
if index is None:
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
print(f"Hovering element with index: {index}")
|
|
59
|
+
|
|
60
|
+
async def _actual_hover_element():
|
|
61
|
+
try:
|
|
62
|
+
action_model = browser.backend_agent.ActionModel(
|
|
63
|
+
**{"hover": {"index": index}}
|
|
64
|
+
)
|
|
65
|
+
await browser.backend_agent.multi_act([action_model])
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.error(f"Error in hover_element_index: {e} trying right click")
|
|
68
|
+
node = await browser.backend_agent.browser_session.get_element_by_index(
|
|
69
|
+
index
|
|
70
|
+
)
|
|
71
|
+
if node is None:
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
backend_page = (
|
|
75
|
+
await browser.backend_agent.browser_session.get_current_page()
|
|
76
|
+
)
|
|
77
|
+
element = await backend_page.get_element(node.backend_node_id)
|
|
78
|
+
await element.click(button="right")
|
|
79
|
+
|
|
80
|
+
await _actual_hover_element()
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(f"Error in hover_element_index: {e}")
|
|
83
|
+
return
|
|
@@ -48,15 +48,18 @@ async def handle_input_text(
|
|
|
48
48
|
logger.debug(
|
|
49
49
|
f"Executing prompt-based action: {input_text_action.__class__.__name__}"
|
|
50
50
|
)
|
|
51
|
-
await input_text_index(input_text_action, browser, memory)
|
|
51
|
+
await input_text_index(input_text_action, browser, memory, task)
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
async def input_text_index(
|
|
55
|
-
input_text_action: InputTextAction, browser: Browser, memory: Memory
|
|
55
|
+
input_text_action: InputTextAction, browser: Browser, memory: Memory, task: Task
|
|
56
56
|
):
|
|
57
57
|
try:
|
|
58
58
|
index = await get_index_from_prompt(
|
|
59
|
-
memory,
|
|
59
|
+
memory,
|
|
60
|
+
input_text_action.prompt_instructions,
|
|
61
|
+
browser,
|
|
62
|
+
task,
|
|
60
63
|
)
|
|
61
64
|
if index is None:
|
|
62
65
|
return
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from optexity.inference.infra.browser import Browser
|
|
2
|
+
from optexity.schema.actions.interaction_action import KeyPressAction, KeyPressType
|
|
3
|
+
from optexity.schema.memory import Memory
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def handle_key_press(
|
|
7
|
+
keypress_action: KeyPressAction,
|
|
8
|
+
memory: Memory,
|
|
9
|
+
browser: Browser,
|
|
10
|
+
):
|
|
11
|
+
page = await browser.get_current_page()
|
|
12
|
+
if page is None:
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
if keypress_action.type == KeyPressType.ENTER:
|
|
16
|
+
await page.keyboard.press("Enter")
|
|
17
|
+
if keypress_action.type == KeyPressType.TAB:
|
|
18
|
+
await page.keyboard.press("Tab")
|
|
19
|
+
if keypress_action.type == KeyPressType.ZERO:
|
|
20
|
+
await page.keyboard.press("0")
|
|
21
|
+
if keypress_action.type == KeyPressType.ONE:
|
|
22
|
+
await page.keyboard.press("1")
|
|
23
|
+
if keypress_action.type == KeyPressType.TWO:
|
|
24
|
+
await page.keyboard.press("2")
|
|
25
|
+
if keypress_action.type == KeyPressType.THREE:
|
|
26
|
+
await page.keyboard.press("3")
|
|
27
|
+
if keypress_action.type == KeyPressType.FOUR:
|
|
28
|
+
await page.keyboard.press("4")
|
|
29
|
+
if keypress_action.type == KeyPressType.FIVE:
|
|
30
|
+
await page.keyboard.press("5")
|
|
31
|
+
if keypress_action.type == KeyPressType.SIX:
|
|
32
|
+
await page.keyboard.press("6")
|
|
33
|
+
if keypress_action.type == KeyPressType.SEVEN:
|
|
34
|
+
await page.keyboard.press("7")
|
|
35
|
+
if keypress_action.type == KeyPressType.EIGHT:
|
|
36
|
+
await page.keyboard.press("8")
|
|
37
|
+
if keypress_action.type == KeyPressType.NINE:
|
|
38
|
+
await page.keyboard.press("9")
|
|
39
|
+
if keypress_action.type == KeyPressType.SLASH:
|
|
40
|
+
await page.keyboard.press("/")
|
|
41
|
+
if keypress_action.type == KeyPressType.SPACE:
|
|
42
|
+
await page.keyboard.press("Space")
|
{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_select_utils.py
RENAMED
|
@@ -57,9 +57,19 @@ async def smart_select(
|
|
|
57
57
|
options: list[SelectOptionValue], patterns: list[str], memory: Memory
|
|
58
58
|
):
|
|
59
59
|
# Get all options from the <select>
|
|
60
|
-
|
|
60
|
+
## TODO: remove this once we have a better way to handle select one
|
|
61
61
|
matched_values = []
|
|
62
62
|
|
|
63
|
+
if len(options) == 0:
|
|
64
|
+
return []
|
|
65
|
+
if len(options) == 1:
|
|
66
|
+
return [options[0].value]
|
|
67
|
+
if len(options) == 2 and "Select One" in [o.value for o in options]:
|
|
68
|
+
if options[0].value == "Select One":
|
|
69
|
+
return [options[1].value]
|
|
70
|
+
else:
|
|
71
|
+
return [options[0].value]
|
|
72
|
+
|
|
63
73
|
for p in patterns:
|
|
64
74
|
# If pattern contains regex characters, treat as regex
|
|
65
75
|
is_regex = p.startswith("^") or p.endswith("$") or ".*" in p
|
|
@@ -36,16 +36,16 @@ async def handle_upload_file(
|
|
|
36
36
|
logger.debug(
|
|
37
37
|
f"Executing prompt-based action: {upload_file_action.__class__.__name__}"
|
|
38
38
|
)
|
|
39
|
-
await upload_file_index(upload_file_action, browser, memory)
|
|
39
|
+
await upload_file_index(upload_file_action, browser, memory, task)
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
async def upload_file_index(
|
|
43
|
-
upload_file_action: UploadFileAction, browser: Browser, memory: Memory
|
|
43
|
+
upload_file_action: UploadFileAction, browser: Browser, memory: Memory, task: Task
|
|
44
44
|
):
|
|
45
45
|
|
|
46
46
|
try:
|
|
47
47
|
index = await get_index_from_prompt(
|
|
48
|
-
memory, upload_file_action.prompt_instructions, browser
|
|
48
|
+
memory, upload_file_action.prompt_instructions, browser, task
|
|
49
49
|
)
|
|
50
50
|
if index is None:
|
|
51
51
|
return
|
|
@@ -18,14 +18,16 @@ index_prediction_agent = ActionPredictionLocatorAxtree()
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
async def get_index_from_prompt(
|
|
21
|
-
memory: Memory, prompt_instructions: str, browser: Browser
|
|
21
|
+
memory: Memory, prompt_instructions: str, browser: Browser, task: Task
|
|
22
22
|
):
|
|
23
23
|
browser_state_summary = await browser.get_browser_state_summary()
|
|
24
24
|
memory.browser_states[-1] = BrowserState(
|
|
25
25
|
url=browser_state_summary.url,
|
|
26
26
|
screenshot=browser_state_summary.screenshot,
|
|
27
27
|
title=browser_state_summary.title,
|
|
28
|
-
axtree=browser_state_summary.dom_state.llm_representation(
|
|
28
|
+
axtree=browser_state_summary.dom_state.llm_representation(
|
|
29
|
+
remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
|
|
30
|
+
),
|
|
29
31
|
)
|
|
30
32
|
|
|
31
33
|
try:
|
|
@@ -6,6 +6,7 @@ from optexity.inference.infra.browser import Browser
|
|
|
6
6
|
from optexity.inference.models import GeminiModels, get_llm_model
|
|
7
7
|
from optexity.schema.actions.assertion_action import AssertionAction, LLMAssertion
|
|
8
8
|
from optexity.schema.memory import Memory
|
|
9
|
+
from optexity.schema.task import Task
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
@@ -13,14 +14,17 @@ llm_model = get_llm_model(GeminiModels.GEMINI_2_5_FLASH, True)
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
async def run_assertion_action(
|
|
16
|
-
assertion_action: AssertionAction,
|
|
17
|
+
assertion_action: AssertionAction,
|
|
18
|
+
memory: Memory,
|
|
19
|
+
browser: Browser,
|
|
20
|
+
task: Task,
|
|
17
21
|
):
|
|
18
22
|
logger.debug(
|
|
19
23
|
f"---------Running assertion action {assertion_action.model_dump_json()}---------"
|
|
20
24
|
)
|
|
21
25
|
|
|
22
26
|
if assertion_action.llm:
|
|
23
|
-
await handle_llm_assertion(assertion_action.llm, memory, browser)
|
|
27
|
+
await handle_llm_assertion(assertion_action.llm, memory, browser, task)
|
|
24
28
|
elif assertion_action.network_call:
|
|
25
29
|
raise ValueError("Network call assertions are not supported yet")
|
|
26
30
|
# await handle_network_call_assertion(
|
|
@@ -34,7 +38,7 @@ async def run_assertion_action(
|
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
async def handle_llm_assertion(
|
|
37
|
-
llm_assertion: LLMAssertion, memory: Memory, browser: Browser
|
|
41
|
+
llm_assertion: LLMAssertion, memory: Memory, browser: Browser, task: Task
|
|
38
42
|
):
|
|
39
43
|
extra_instruction = """You are a helpful assistant that verifies if the condition is met.
|
|
40
44
|
Use the info supplied below to verify the condition.
|
|
@@ -45,7 +49,7 @@ async def handle_llm_assertion(
|
|
|
45
49
|
llm_assertion_new.extraction_instructions = (
|
|
46
50
|
extra_instruction + "\n" + llm_assertion_new.extraction_instructions
|
|
47
51
|
)
|
|
48
|
-
output_data = await handle_llm_extraction(llm_assertion_new, memory, browser)
|
|
52
|
+
output_data = await handle_llm_extraction(llm_assertion_new, memory, browser, task)
|
|
49
53
|
|
|
50
54
|
if output_data.json_data["assertion_result"]:
|
|
51
55
|
return True
|
|
@@ -25,7 +25,6 @@ from optexity.inference.core.run_interaction import (
|
|
|
25
25
|
run_interaction_action,
|
|
26
26
|
)
|
|
27
27
|
from optexity.inference.core.run_python_script import run_python_script_action
|
|
28
|
-
from optexity.inference.core.run_two_fa import run_two_fa_action
|
|
29
28
|
from optexity.inference.infra.browser import Browser
|
|
30
29
|
from optexity.schema.actions.interaction_action import DownloadUrlAsPdfAction
|
|
31
30
|
from optexity.schema.automation import ActionNode, ForLoopNode, IfElseNode
|
|
@@ -209,7 +208,9 @@ async def run_final_logging(
|
|
|
209
208
|
url=browser_state_summary.url,
|
|
210
209
|
screenshot=browser_state_summary.screenshot,
|
|
211
210
|
title=browser_state_summary.title,
|
|
212
|
-
axtree=browser_state_summary.dom_state.llm_representation(
|
|
211
|
+
axtree=browser_state_summary.dom_state.llm_representation(
|
|
212
|
+
remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
|
|
213
|
+
),
|
|
213
214
|
)
|
|
214
215
|
)
|
|
215
216
|
|
|
@@ -272,14 +273,14 @@ async def run_action_node(
|
|
|
272
273
|
await run_extraction_action(
|
|
273
274
|
action_node.extraction_action, memory, browser, task
|
|
274
275
|
)
|
|
275
|
-
elif action_node.two_fa_action:
|
|
276
|
-
await run_two_fa_action(action_node.two_fa_action, memory)
|
|
277
276
|
elif action_node.python_script_action:
|
|
278
277
|
await run_python_script_action(
|
|
279
278
|
action_node.python_script_action, memory, browser
|
|
280
279
|
)
|
|
281
280
|
elif action_node.assertion_action:
|
|
282
|
-
await run_assertion_action(
|
|
281
|
+
await run_assertion_action(
|
|
282
|
+
action_node.assertion_action, memory, browser, task
|
|
283
|
+
)
|
|
283
284
|
|
|
284
285
|
except Exception as e:
|
|
285
286
|
logger.error(f"Error running node {memory.automation_state.step_index}: {e}")
|
|
@@ -4,12 +4,14 @@ import traceback
|
|
|
4
4
|
import aiofiles
|
|
5
5
|
import httpx
|
|
6
6
|
|
|
7
|
+
from optexity.inference.core.run_two_fa import run_two_fa_action
|
|
7
8
|
from optexity.inference.infra.browser import Browser
|
|
8
9
|
from optexity.inference.models import GeminiModels, get_llm_model
|
|
9
10
|
from optexity.schema.actions.extraction_action import (
|
|
10
11
|
ExtractionAction,
|
|
11
12
|
LLMExtraction,
|
|
12
13
|
NetworkCallExtraction,
|
|
14
|
+
PythonScriptExtraction,
|
|
13
15
|
ScreenshotExtraction,
|
|
14
16
|
StateExtraction,
|
|
15
17
|
)
|
|
@@ -37,7 +39,11 @@ async def run_extraction_action(
|
|
|
37
39
|
|
|
38
40
|
if extraction_action.llm:
|
|
39
41
|
await handle_llm_extraction(
|
|
40
|
-
extraction_action.llm,
|
|
42
|
+
extraction_action.llm,
|
|
43
|
+
memory,
|
|
44
|
+
browser,
|
|
45
|
+
task,
|
|
46
|
+
extraction_action.unique_identifier,
|
|
41
47
|
)
|
|
42
48
|
elif extraction_action.network_call:
|
|
43
49
|
await handle_network_call_extraction(
|
|
@@ -47,6 +53,14 @@ async def run_extraction_action(
|
|
|
47
53
|
task,
|
|
48
54
|
extraction_action.unique_identifier,
|
|
49
55
|
)
|
|
56
|
+
elif extraction_action.python_script:
|
|
57
|
+
await handle_python_script_extraction(
|
|
58
|
+
extraction_action.python_script,
|
|
59
|
+
memory,
|
|
60
|
+
browser,
|
|
61
|
+
task,
|
|
62
|
+
extraction_action.unique_identifier,
|
|
63
|
+
)
|
|
50
64
|
elif extraction_action.screenshot:
|
|
51
65
|
await handle_screenshot_extraction(
|
|
52
66
|
extraction_action.screenshot,
|
|
@@ -61,6 +75,8 @@ async def run_extraction_action(
|
|
|
61
75
|
browser,
|
|
62
76
|
extraction_action.unique_identifier,
|
|
63
77
|
)
|
|
78
|
+
elif extraction_action.two_fa_action:
|
|
79
|
+
await run_two_fa_action(extraction_action.two_fa_action, memory)
|
|
64
80
|
|
|
65
81
|
|
|
66
82
|
async def handle_state_extraction(
|
|
@@ -108,6 +124,7 @@ async def handle_llm_extraction(
|
|
|
108
124
|
llm_extraction: LLMExtraction,
|
|
109
125
|
memory: Memory,
|
|
110
126
|
browser: Browser,
|
|
127
|
+
task: Task,
|
|
111
128
|
unique_identifier: str | None = None,
|
|
112
129
|
):
|
|
113
130
|
browser_state_summary = await browser.get_browser_state_summary()
|
|
@@ -115,7 +132,9 @@ async def handle_llm_extraction(
|
|
|
115
132
|
url=browser_state_summary.url,
|
|
116
133
|
screenshot=browser_state_summary.screenshot,
|
|
117
134
|
title=browser_state_summary.title,
|
|
118
|
-
axtree=browser_state_summary.dom_state.llm_representation(
|
|
135
|
+
axtree=browser_state_summary.dom_state.llm_representation(
|
|
136
|
+
remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
|
|
137
|
+
),
|
|
119
138
|
)
|
|
120
139
|
|
|
121
140
|
# TODO: fix this double calling of screenshot and axtree
|
|
@@ -131,8 +150,8 @@ async def handle_llm_extraction(
|
|
|
131
150
|
|
|
132
151
|
system_instruction = f"""
|
|
133
152
|
You are an expert in extracting information from a website. You will be given an axtree of a webpage.
|
|
134
|
-
Your task is to extract the information from the webpage and return it in the format specified by the instructions.
|
|
135
|
-
{llm_extraction.extraction_instructions}
|
|
153
|
+
Your task is to extract the information from the webpage and return it in the format specified by the instructions. You will be first provided the instructions and then the axtree.
|
|
154
|
+
Instructions: {llm_extraction.extraction_instructions}
|
|
136
155
|
"""
|
|
137
156
|
|
|
138
157
|
prompt = f"""
|
|
@@ -163,6 +182,8 @@ async def handle_llm_extraction(
|
|
|
163
182
|
memory.token_usage += token_usage
|
|
164
183
|
memory.variables.output_data.append(output_data)
|
|
165
184
|
|
|
185
|
+
memory.browser_states[-1].final_prompt = f"{system_instruction}\n{prompt}"
|
|
186
|
+
|
|
166
187
|
if llm_extraction.output_variable_names is not None:
|
|
167
188
|
for output_variable_name in llm_extraction.output_variable_names:
|
|
168
189
|
v = response_dict[output_variable_name]
|
|
@@ -216,6 +237,31 @@ async def handle_network_call_extraction(
|
|
|
216
237
|
)
|
|
217
238
|
|
|
218
239
|
|
|
240
|
+
async def handle_python_script_extraction(
|
|
241
|
+
python_script_extraction: PythonScriptExtraction,
|
|
242
|
+
memory: Memory,
|
|
243
|
+
browser: Browser,
|
|
244
|
+
task: Task,
|
|
245
|
+
unique_identifier: str | None = None,
|
|
246
|
+
):
|
|
247
|
+
local_vars = {}
|
|
248
|
+
exec(python_script_extraction.script, {}, local_vars)
|
|
249
|
+
code_fn = local_vars["code_fn"]
|
|
250
|
+
axtree = memory.browser_states[-1].axtree
|
|
251
|
+
result = await code_fn(axtree)
|
|
252
|
+
if result is not None:
|
|
253
|
+
memory.variables.output_data.append(
|
|
254
|
+
OutputData(
|
|
255
|
+
unique_identifier=unique_identifier,
|
|
256
|
+
json_data=result,
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
else:
|
|
260
|
+
logger.warning(
|
|
261
|
+
f"No result from Python script extraction: {python_script_extraction.script}"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
|
|
219
265
|
async def download_request(
|
|
220
266
|
network_call: NetworkRequest, download_filename: str, task: Task, memory: Memory
|
|
221
267
|
):
|