optexity 0.1.5__tar.gz → 0.1.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {optexity-0.1.5 → optexity-0.1.5.2}/PKG-INFO +3 -9
  2. {optexity-0.1.5 → optexity-0.1.5.2}/README.md +2 -8
  3. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/__init__.py +6 -0
  4. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/extract_price_stockanalysis.py +3 -2
  5. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/child_process.py +5 -2
  6. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_click.py +5 -2
  7. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_command.py +27 -4
  8. optexity-0.1.5.2/optexity/inference/core/interaction/handle_hover.py +83 -0
  9. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_input.py +6 -3
  10. optexity-0.1.5.2/optexity/inference/core/interaction/handle_keypress.py +42 -0
  11. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_select.py +1 -1
  12. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_select_utils.py +11 -1
  13. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_upload.py +3 -3
  14. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/utils.py +4 -2
  15. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_assertion.py +8 -4
  16. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_automation.py +6 -5
  17. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_extraction.py +50 -4
  18. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_interaction.py +13 -1
  19. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_two_fa.py +12 -7
  20. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/extraction_action.py +9 -3
  21. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/interaction_action.py +38 -4
  22. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/two_fa_action.py +1 -1
  23. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/automation.py +3 -7
  24. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/task.py +6 -2
  25. {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/PKG-INFO +3 -9
  26. {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/SOURCES.txt +1 -0
  27. {optexity-0.1.5 → optexity-0.1.5.2}/pyproject.toml +1 -1
  28. optexity-0.1.5/optexity/inference/core/interaction/handle_keypress.py +0 -16
  29. {optexity-0.1.5 → optexity-0.1.5.2}/LICENSE +0 -0
  30. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/cli.py +0 -0
  31. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/__init__.py +0 -0
  32. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/add_example.py +0 -0
  33. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/download_pdf_url.py +0 -0
  34. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/file_upload.py +0 -0
  35. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/i94.py +0 -0
  36. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/i94_travel_history.py +0 -0
  37. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/peachstate_medicaid.py +0 -0
  38. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/supabase_login.py +0 -0
  39. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/exceptions.py +0 -0
  40. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/__init__.py +0 -0
  41. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/__init__.py +0 -0
  42. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/error_handler/__init__.py +0 -0
  43. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/error_handler/error_handler.py +0 -0
  44. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/error_handler/prompt.py +0 -0
  45. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/index_prediction/__init__.py +0 -0
  46. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/index_prediction/action_prediction_locator_axtree.py +0 -0
  47. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/index_prediction/prompt.py +0 -0
  48. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/select_value_prediction/__init__.py +0 -0
  49. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/select_value_prediction/prompt.py +0 -0
  50. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/select_value_prediction/select_value_prediction.py +0 -0
  51. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/two_fa_extraction/__init__.py +0 -0
  52. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/two_fa_extraction/prompt.py +0 -0
  53. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/agents/two_fa_extraction/two_fa_extraction.py +0 -0
  54. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/__init__.py +0 -0
  55. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/__init__.py +0 -0
  56. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_agentic_task.py +0 -0
  57. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_check.py +0 -0
  58. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/logging.py +0 -0
  59. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_python_script.py +0 -0
  60. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/two_factor_auth/__init__.py +0 -0
  61. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/infra/__init__.py +0 -0
  62. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/infra/browser.py +0 -0
  63. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/infra/browser_extension.py +0 -0
  64. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/__init__.py +0 -0
  65. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/gemini.py +0 -0
  66. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/human.py +0 -0
  67. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/models/llm_model.py +0 -0
  68. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/run_local.py +0 -0
  69. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/onepassword_integration.py +0 -0
  70. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/__init__.py +0 -0
  71. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/__init__.py +0 -0
  72. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/assertion_action.py +0 -0
  73. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/misc_action.py +0 -0
  74. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/actions/prompts.py +0 -0
  75. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/callback.py +0 -0
  76. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/inference.py +0 -0
  77. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/memory.py +0 -0
  78. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/schema/token_usage.py +0 -0
  79. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/test.py +0 -0
  80. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/utils/__init__.py +0 -0
  81. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/utils/settings.py +0 -0
  82. {optexity-0.1.5 → optexity-0.1.5.2}/optexity/utils/utils.py +0 -0
  83. {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/dependency_links.txt +0 -0
  84. {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/entry_points.txt +0 -0
  85. {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/requires.txt +0 -0
  86. {optexity-0.1.5 → optexity-0.1.5.2}/optexity.egg-info/top_level.txt +0 -0
  87. {optexity-0.1.5 → optexity-0.1.5.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optexity
3
- Version: 0.1.5
3
+ Version: 0.1.5.2
4
4
  Summary: Optexity is a platform for building and running browser and computer agents.
5
5
  Author-email: Optexity <founders@optexity.com>
6
6
  Requires-Python: >=3.11
@@ -83,6 +83,7 @@ Install Optexity directly from PyPI:
83
83
 
84
84
  ```bash
85
85
  pip install optexity
86
+ optexity install-browsers
86
87
  ```
87
88
 
88
89
  **OR**
@@ -95,6 +96,7 @@ If you want to clone and edit from source:
95
96
  git clone git@github.com:Optexity/optexity.git
96
97
  cd optexity
97
98
  pip install -e .
99
+ optexity install-browsers
98
100
  ```
99
101
 
100
102
  ## Set required environment variables:
@@ -107,14 +109,6 @@ DEPLOYMENT=dev # or "prod" in production
107
109
 
108
110
  You can get your free Google Gemini API key from the [Google AI Studio Console](https://aistudio.google.com).
109
111
 
110
- ## Install required browsers:
111
-
112
- Install playwright and patchright browsers:
113
-
114
- ```bash
115
- optexity install-browsers
116
- ```
117
-
118
112
  ## Recording Your First Automation
119
113
 
120
114
  The fastest way to create an automation is by recording your actions directly in the browser.
@@ -58,6 +58,7 @@ Install Optexity directly from PyPI:
58
58
 
59
59
  ```bash
60
60
  pip install optexity
61
+ optexity install-browsers
61
62
  ```
62
63
 
63
64
  **OR**
@@ -70,6 +71,7 @@ If you want to clone and edit from source:
70
71
  git clone git@github.com:Optexity/optexity.git
71
72
  cd optexity
72
73
  pip install -e .
74
+ optexity install-browsers
73
75
  ```
74
76
 
75
77
  ## Set required environment variables:
@@ -82,14 +84,6 @@ DEPLOYMENT=dev # or "prod" in production
82
84
 
83
85
  You can get your free Google Gemini API key from the [Google AI Studio Console](https://aistudio.google.com).
84
86
 
85
- ## Install required browsers:
86
-
87
- Install playwright and patchright browsers:
88
-
89
- ```bash
90
- optexity install-browsers
91
- ```
92
-
93
87
  ## Recording Your First Automation
94
88
 
95
89
  The fastest way to create an automation is by recording your actions directly in the browser.
@@ -1,7 +1,13 @@
1
1
  import logging
2
2
  import sys
3
+ from importlib.metadata import PackageNotFoundError, version
3
4
  from pathlib import Path
4
5
 
6
+ try:
7
+ __version__ = version("optexity")
8
+ except PackageNotFoundError:
9
+ __version__ = "0.0.0"
10
+
5
11
  logging.basicConfig(
6
12
  level=logging.WARNING, # Default level for root logger
7
13
  format="%(asctime)s [%(levelname)s] %(name)s.%(funcName)s: %(message)s",
@@ -23,12 +23,13 @@ automation_json = {
23
23
  "click_element": {
24
24
  "prompt_instructions": "Click on the link with the name of the stock equivalent for {stock_ticker[0]}."
25
25
  }
26
- }
26
+ },
27
+ "before_sleep_time": 1,
27
28
  },
28
29
  {
29
30
  "extraction_action": {
30
31
  "llm": {
31
- "source": ["screenshot"],
32
+ "source": ["screenshot", "axtree"],
32
33
  "extraction_format": {
33
34
  "stock_name": "str",
34
35
  "stock_price": "str",
@@ -154,7 +154,10 @@ def get_app_with_endpoints(is_aws: bool, child_id: int):
154
154
 
155
155
  await task_queue.put(task)
156
156
  return JSONResponse(
157
- content={"success": True, "message": "Task has been allocated"},
157
+ content={
158
+ "success": True,
159
+ "message": "Task has been allocated. Check its status and output at https://dashboard.optexity.com/tasks",
160
+ },
158
161
  status_code=202,
159
162
  )
160
163
  except Exception as e:
@@ -192,7 +195,7 @@ def get_app_with_endpoints(is_aws: bool, child_id: int):
192
195
  return JSONResponse(
193
196
  content={
194
197
  "success": True,
195
- "message": "Task has been allocated",
198
+ "message": "Task has been allocated. Check its status and output at https://dashboard.optexity.com/tasks",
196
199
  "task_id": task.task_id,
197
200
  },
198
201
  status_code=202,
@@ -53,14 +53,17 @@ async def click_element_index(
53
53
 
54
54
  try:
55
55
  index = await get_index_from_prompt(
56
- memory, click_element_action.prompt_instructions, browser
56
+ memory, click_element_action.prompt_instructions, browser, task
57
57
  )
58
58
  if index is None:
59
59
  return
60
60
 
61
61
  async def _actual_click_element():
62
+ print(
63
+ f"Clicking element with index: {index} and button: {click_element_action.button}"
64
+ )
62
65
  action_model = browser.backend_agent.ActionModel(
63
- **{"click": {"index": index}}
66
+ **{"click": {"index": index, "button": click_element_action.button}}
64
67
  )
65
68
  await browser.backend_agent.multi_act([action_model])
66
69
 
@@ -13,6 +13,7 @@ from optexity.inference.infra.browser import Browser
13
13
  from optexity.schema.actions.interaction_action import (
14
14
  CheckAction,
15
15
  ClickElementAction,
16
+ HoverAction,
16
17
  InputTextAction,
17
18
  SelectOptionAction,
18
19
  UncheckAction,
@@ -32,6 +33,7 @@ async def command_based_action_with_retry(
32
33
  | CheckAction
33
34
  | UploadFileAction
34
35
  | UncheckAction
36
+ | HoverAction
35
37
  ),
36
38
  browser: Browser,
37
39
  memory: Memory,
@@ -67,7 +69,9 @@ async def command_based_action_with_retry(
67
69
  url=browser_state_summary.url,
68
70
  screenshot=browser_state_summary.screenshot,
69
71
  title=browser_state_summary.title,
70
- axtree=browser_state_summary.dom_state.llm_representation(),
72
+ axtree=browser_state_summary.dom_state.llm_representation(
73
+ remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
74
+ ),
71
75
  )
72
76
 
73
77
  if isinstance(action, ClickElementAction):
@@ -81,7 +85,7 @@ async def command_based_action_with_retry(
81
85
  )
82
86
  elif isinstance(action, InputTextAction):
83
87
  await input_text_locator(
84
- action, locator, max_timeout_seconds_per_try
88
+ action, locator, browser, max_timeout_seconds_per_try
85
89
  )
86
90
  elif isinstance(action, SelectOptionAction):
87
91
  await select_option_locator(
@@ -100,6 +104,8 @@ async def command_based_action_with_retry(
100
104
  await uncheck_locator(
101
105
  action, locator, max_timeout_seconds_per_try, browser
102
106
  )
107
+ elif isinstance(action, HoverAction):
108
+ await hover_locator(locator, max_timeout_seconds_per_try)
103
109
  elif isinstance(action, UploadFileAction):
104
110
  await upload_file_locator(action, locator)
105
111
  logger.debug(
@@ -147,7 +153,9 @@ async def click_locator(
147
153
  )
148
154
  else:
149
155
  await locator.click(
150
- no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
156
+ button=click_element_action.button,
157
+ no_wait_after=True,
158
+ timeout=max_timeout_seconds_per_try * 1000,
151
159
  )
152
160
 
153
161
  if click_element_action.expect_download:
@@ -161,6 +169,7 @@ async def click_locator(
161
169
  async def input_text_locator(
162
170
  input_text_action: InputTextAction,
163
171
  locator: Locator,
172
+ browser: Browser,
164
173
  max_timeout_seconds_per_try: float,
165
174
  ):
166
175
 
@@ -170,12 +179,19 @@ async def input_text_locator(
170
179
  no_wait_after=True,
171
180
  timeout=max_timeout_seconds_per_try * 1000,
172
181
  )
173
- else:
182
+ elif input_text_action.fill_or_type == "type":
174
183
  await locator.type(
175
184
  input_text_action.input_text,
176
185
  no_wait_after=True,
177
186
  timeout=max_timeout_seconds_per_try * 1000,
178
187
  )
188
+ else:
189
+ page = await browser.get_current_page()
190
+ if page is None:
191
+ return
192
+ for char in input_text_action.input_text:
193
+ await page.keyboard.press(char)
194
+ await asyncio.sleep(0.1)
179
195
 
180
196
  if input_text_action.press_enter:
181
197
  await locator.press("Enter")
@@ -209,6 +225,13 @@ async def uncheck_locator(
209
225
  )
210
226
 
211
227
 
228
+ async def hover_locator(
229
+ locator: Locator,
230
+ max_timeout_seconds_per_try: float,
231
+ ):
232
+ await locator.hover(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
233
+
234
+
212
235
  async def upload_file_locator(upload_file_action: UploadFileAction, locator: Locator):
213
236
  await locator.set_input_files(upload_file_action.file_path)
214
237
 
@@ -0,0 +1,83 @@
1
+ import logging
2
+
3
+ from optexity.inference.core.interaction.handle_command import (
4
+ command_based_action_with_retry,
5
+ )
6
+ from optexity.inference.core.interaction.utils import get_index_from_prompt
7
+ from optexity.inference.infra.browser import Browser
8
+ from optexity.schema.actions.interaction_action import HoverAction
9
+ from optexity.schema.memory import Memory
10
+ from optexity.schema.task import Task
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ async def handle_hover_element(
16
+ hover_element_action: HoverAction,
17
+ task: Task,
18
+ memory: Memory,
19
+ browser: Browser,
20
+ max_timeout_seconds_per_try: float,
21
+ max_tries: int,
22
+ ):
23
+
24
+ if hover_element_action.command and not hover_element_action.skip_command:
25
+ last_error = await command_based_action_with_retry(
26
+ hover_element_action,
27
+ browser,
28
+ memory,
29
+ task,
30
+ max_tries,
31
+ max_timeout_seconds_per_try,
32
+ )
33
+
34
+ if last_error is None:
35
+ return
36
+
37
+ if not hover_element_action.skip_prompt:
38
+ logger.debug(
39
+ f"Executing prompt-based action: {hover_element_action.__class__.__name__}"
40
+ )
41
+ await hover_element_index(hover_element_action, browser, memory, task)
42
+
43
+
44
+ async def hover_element_index(
45
+ hover_element_action: HoverAction,
46
+ browser: Browser,
47
+ memory: Memory,
48
+ task: Task,
49
+ ):
50
+
51
+ try:
52
+ index = await get_index_from_prompt(
53
+ memory, hover_element_action.prompt_instructions, browser, task
54
+ )
55
+ if index is None:
56
+ return
57
+
58
+ print(f"Hovering element with index: {index}")
59
+
60
+ async def _actual_hover_element():
61
+ try:
62
+ action_model = browser.backend_agent.ActionModel(
63
+ **{"hover": {"index": index}}
64
+ )
65
+ await browser.backend_agent.multi_act([action_model])
66
+ except Exception as e:
67
+ logger.error(f"Error in hover_element_index: {e} trying right click")
68
+ node = await browser.backend_agent.browser_session.get_element_by_index(
69
+ index
70
+ )
71
+ if node is None:
72
+ return
73
+
74
+ backend_page = (
75
+ await browser.backend_agent.browser_session.get_current_page()
76
+ )
77
+ element = await backend_page.get_element(node.backend_node_id)
78
+ await element.click(button="right")
79
+
80
+ await _actual_hover_element()
81
+ except Exception as e:
82
+ logger.error(f"Error in hover_element_index: {e}")
83
+ return
@@ -48,15 +48,18 @@ async def handle_input_text(
48
48
  logger.debug(
49
49
  f"Executing prompt-based action: {input_text_action.__class__.__name__}"
50
50
  )
51
- await input_text_index(input_text_action, browser, memory)
51
+ await input_text_index(input_text_action, browser, memory, task)
52
52
 
53
53
 
54
54
  async def input_text_index(
55
- input_text_action: InputTextAction, browser: Browser, memory: Memory
55
+ input_text_action: InputTextAction, browser: Browser, memory: Memory, task: Task
56
56
  ):
57
57
  try:
58
58
  index = await get_index_from_prompt(
59
- memory, input_text_action.prompt_instructions, browser
59
+ memory,
60
+ input_text_action.prompt_instructions,
61
+ browser,
62
+ task,
60
63
  )
61
64
  if index is None:
62
65
  return
@@ -0,0 +1,42 @@
1
+ from optexity.inference.infra.browser import Browser
2
+ from optexity.schema.actions.interaction_action import KeyPressAction, KeyPressType
3
+ from optexity.schema.memory import Memory
4
+
5
+
6
+ async def handle_key_press(
7
+ keypress_action: KeyPressAction,
8
+ memory: Memory,
9
+ browser: Browser,
10
+ ):
11
+ page = await browser.get_current_page()
12
+ if page is None:
13
+ return
14
+
15
+ if keypress_action.type == KeyPressType.ENTER:
16
+ await page.keyboard.press("Enter")
17
+ if keypress_action.type == KeyPressType.TAB:
18
+ await page.keyboard.press("Tab")
19
+ if keypress_action.type == KeyPressType.ZERO:
20
+ await page.keyboard.press("0")
21
+ if keypress_action.type == KeyPressType.ONE:
22
+ await page.keyboard.press("1")
23
+ if keypress_action.type == KeyPressType.TWO:
24
+ await page.keyboard.press("2")
25
+ if keypress_action.type == KeyPressType.THREE:
26
+ await page.keyboard.press("3")
27
+ if keypress_action.type == KeyPressType.FOUR:
28
+ await page.keyboard.press("4")
29
+ if keypress_action.type == KeyPressType.FIVE:
30
+ await page.keyboard.press("5")
31
+ if keypress_action.type == KeyPressType.SIX:
32
+ await page.keyboard.press("6")
33
+ if keypress_action.type == KeyPressType.SEVEN:
34
+ await page.keyboard.press("7")
35
+ if keypress_action.type == KeyPressType.EIGHT:
36
+ await page.keyboard.press("8")
37
+ if keypress_action.type == KeyPressType.NINE:
38
+ await page.keyboard.press("9")
39
+ if keypress_action.type == KeyPressType.SLASH:
40
+ await page.keyboard.press("/")
41
+ if keypress_action.type == KeyPressType.SPACE:
42
+ await page.keyboard.press("Space")
@@ -60,7 +60,7 @@ async def select_option_index(
60
60
  try:
61
61
 
62
62
  index = await get_index_from_prompt(
63
- memory, select_option_action.prompt_instructions, browser
63
+ memory, select_option_action.prompt_instructions, browser, task
64
64
  )
65
65
  if index is None:
66
66
  return
@@ -57,9 +57,19 @@ async def smart_select(
57
57
  options: list[SelectOptionValue], patterns: list[str], memory: Memory
58
58
  ):
59
59
  # Get all options from the <select>
60
-
60
+ ## TODO: remove this once we have a better way to handle select one
61
61
  matched_values = []
62
62
 
63
+ if len(options) == 0:
64
+ return []
65
+ if len(options) == 1:
66
+ return [options[0].value]
67
+ if len(options) == 2 and "Select One" in [o.value for o in options]:
68
+ if options[0].value == "Select One":
69
+ return [options[1].value]
70
+ else:
71
+ return [options[0].value]
72
+
63
73
  for p in patterns:
64
74
  # If pattern contains regex characters, treat as regex
65
75
  is_regex = p.startswith("^") or p.endswith("$") or ".*" in p
@@ -36,16 +36,16 @@ async def handle_upload_file(
36
36
  logger.debug(
37
37
  f"Executing prompt-based action: {upload_file_action.__class__.__name__}"
38
38
  )
39
- await upload_file_index(upload_file_action, browser, memory)
39
+ await upload_file_index(upload_file_action, browser, memory, task)
40
40
 
41
41
 
42
42
  async def upload_file_index(
43
- upload_file_action: UploadFileAction, browser: Browser, memory: Memory
43
+ upload_file_action: UploadFileAction, browser: Browser, memory: Memory, task: Task
44
44
  ):
45
45
 
46
46
  try:
47
47
  index = await get_index_from_prompt(
48
- memory, upload_file_action.prompt_instructions, browser
48
+ memory, upload_file_action.prompt_instructions, browser, task
49
49
  )
50
50
  if index is None:
51
51
  return
@@ -18,14 +18,16 @@ index_prediction_agent = ActionPredictionLocatorAxtree()
18
18
 
19
19
 
20
20
  async def get_index_from_prompt(
21
- memory: Memory, prompt_instructions: str, browser: Browser
21
+ memory: Memory, prompt_instructions: str, browser: Browser, task: Task
22
22
  ):
23
23
  browser_state_summary = await browser.get_browser_state_summary()
24
24
  memory.browser_states[-1] = BrowserState(
25
25
  url=browser_state_summary.url,
26
26
  screenshot=browser_state_summary.screenshot,
27
27
  title=browser_state_summary.title,
28
- axtree=browser_state_summary.dom_state.llm_representation(),
28
+ axtree=browser_state_summary.dom_state.llm_representation(
29
+ remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
30
+ ),
29
31
  )
30
32
 
31
33
  try:
@@ -6,6 +6,7 @@ from optexity.inference.infra.browser import Browser
6
6
  from optexity.inference.models import GeminiModels, get_llm_model
7
7
  from optexity.schema.actions.assertion_action import AssertionAction, LLMAssertion
8
8
  from optexity.schema.memory import Memory
9
+ from optexity.schema.task import Task
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -13,14 +14,17 @@ llm_model = get_llm_model(GeminiModels.GEMINI_2_5_FLASH, True)
13
14
 
14
15
 
15
16
  async def run_assertion_action(
16
- assertion_action: AssertionAction, memory: Memory, browser: Browser
17
+ assertion_action: AssertionAction,
18
+ memory: Memory,
19
+ browser: Browser,
20
+ task: Task,
17
21
  ):
18
22
  logger.debug(
19
23
  f"---------Running assertion action {assertion_action.model_dump_json()}---------"
20
24
  )
21
25
 
22
26
  if assertion_action.llm:
23
- await handle_llm_assertion(assertion_action.llm, memory, browser)
27
+ await handle_llm_assertion(assertion_action.llm, memory, browser, task)
24
28
  elif assertion_action.network_call:
25
29
  raise ValueError("Network call assertions are not supported yet")
26
30
  # await handle_network_call_assertion(
@@ -34,7 +38,7 @@ async def run_assertion_action(
34
38
 
35
39
 
36
40
  async def handle_llm_assertion(
37
- llm_assertion: LLMAssertion, memory: Memory, browser: Browser
41
+ llm_assertion: LLMAssertion, memory: Memory, browser: Browser, task: Task
38
42
  ):
39
43
  extra_instruction = """You are a helpful assistant that verifies if the condition is met.
40
44
  Use the info supplied below to verify the condition.
@@ -45,7 +49,7 @@ async def handle_llm_assertion(
45
49
  llm_assertion_new.extraction_instructions = (
46
50
  extra_instruction + "\n" + llm_assertion_new.extraction_instructions
47
51
  )
48
- output_data = await handle_llm_extraction(llm_assertion_new, memory, browser)
52
+ output_data = await handle_llm_extraction(llm_assertion_new, memory, browser, task)
49
53
 
50
54
  if output_data.json_data["assertion_result"]:
51
55
  return True
@@ -25,7 +25,6 @@ from optexity.inference.core.run_interaction import (
25
25
  run_interaction_action,
26
26
  )
27
27
  from optexity.inference.core.run_python_script import run_python_script_action
28
- from optexity.inference.core.run_two_fa import run_two_fa_action
29
28
  from optexity.inference.infra.browser import Browser
30
29
  from optexity.schema.actions.interaction_action import DownloadUrlAsPdfAction
31
30
  from optexity.schema.automation import ActionNode, ForLoopNode, IfElseNode
@@ -209,7 +208,9 @@ async def run_final_logging(
209
208
  url=browser_state_summary.url,
210
209
  screenshot=browser_state_summary.screenshot,
211
210
  title=browser_state_summary.title,
212
- axtree=browser_state_summary.dom_state.llm_representation(),
211
+ axtree=browser_state_summary.dom_state.llm_representation(
212
+ remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
213
+ ),
213
214
  )
214
215
  )
215
216
 
@@ -272,14 +273,14 @@ async def run_action_node(
272
273
  await run_extraction_action(
273
274
  action_node.extraction_action, memory, browser, task
274
275
  )
275
- elif action_node.two_fa_action:
276
- await run_two_fa_action(action_node.two_fa_action, memory)
277
276
  elif action_node.python_script_action:
278
277
  await run_python_script_action(
279
278
  action_node.python_script_action, memory, browser
280
279
  )
281
280
  elif action_node.assertion_action:
282
- await run_assertion_action(action_node.assertion_action, memory, browser)
281
+ await run_assertion_action(
282
+ action_node.assertion_action, memory, browser, task
283
+ )
283
284
 
284
285
  except Exception as e:
285
286
  logger.error(f"Error running node {memory.automation_state.step_index}: {e}")
@@ -4,12 +4,14 @@ import traceback
4
4
  import aiofiles
5
5
  import httpx
6
6
 
7
+ from optexity.inference.core.run_two_fa import run_two_fa_action
7
8
  from optexity.inference.infra.browser import Browser
8
9
  from optexity.inference.models import GeminiModels, get_llm_model
9
10
  from optexity.schema.actions.extraction_action import (
10
11
  ExtractionAction,
11
12
  LLMExtraction,
12
13
  NetworkCallExtraction,
14
+ PythonScriptExtraction,
13
15
  ScreenshotExtraction,
14
16
  StateExtraction,
15
17
  )
@@ -37,7 +39,11 @@ async def run_extraction_action(
37
39
 
38
40
  if extraction_action.llm:
39
41
  await handle_llm_extraction(
40
- extraction_action.llm, memory, browser, extraction_action.unique_identifier
42
+ extraction_action.llm,
43
+ memory,
44
+ browser,
45
+ task,
46
+ extraction_action.unique_identifier,
41
47
  )
42
48
  elif extraction_action.network_call:
43
49
  await handle_network_call_extraction(
@@ -47,6 +53,14 @@ async def run_extraction_action(
47
53
  task,
48
54
  extraction_action.unique_identifier,
49
55
  )
56
+ elif extraction_action.python_script:
57
+ await handle_python_script_extraction(
58
+ extraction_action.python_script,
59
+ memory,
60
+ browser,
61
+ task,
62
+ extraction_action.unique_identifier,
63
+ )
50
64
  elif extraction_action.screenshot:
51
65
  await handle_screenshot_extraction(
52
66
  extraction_action.screenshot,
@@ -61,6 +75,8 @@ async def run_extraction_action(
61
75
  browser,
62
76
  extraction_action.unique_identifier,
63
77
  )
78
+ elif extraction_action.two_fa_action:
79
+ await run_two_fa_action(extraction_action.two_fa_action, memory)
64
80
 
65
81
 
66
82
  async def handle_state_extraction(
@@ -108,6 +124,7 @@ async def handle_llm_extraction(
108
124
  llm_extraction: LLMExtraction,
109
125
  memory: Memory,
110
126
  browser: Browser,
127
+ task: Task,
111
128
  unique_identifier: str | None = None,
112
129
  ):
113
130
  browser_state_summary = await browser.get_browser_state_summary()
@@ -115,7 +132,9 @@ async def handle_llm_extraction(
115
132
  url=browser_state_summary.url,
116
133
  screenshot=browser_state_summary.screenshot,
117
134
  title=browser_state_summary.title,
118
- axtree=browser_state_summary.dom_state.llm_representation(),
135
+ axtree=browser_state_summary.dom_state.llm_representation(
136
+ remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
137
+ ),
119
138
  )
120
139
 
121
140
  # TODO: fix this double calling of screenshot and axtree
@@ -131,8 +150,8 @@ async def handle_llm_extraction(
131
150
 
132
151
  system_instruction = f"""
133
152
  You are an expert in extracting information from a website. You will be given an axtree of a webpage.
134
- Your task is to extract the information from the webpage and return it in the format specified by the instructions.
135
- {llm_extraction.extraction_instructions}
153
+ Your task is to extract the information from the webpage and return it in the format specified by the instructions. You will be first provided the instructions and then the axtree.
154
+ Instructions: {llm_extraction.extraction_instructions}
136
155
  """
137
156
 
138
157
  prompt = f"""
@@ -163,6 +182,8 @@ async def handle_llm_extraction(
163
182
  memory.token_usage += token_usage
164
183
  memory.variables.output_data.append(output_data)
165
184
 
185
+ memory.browser_states[-1].final_prompt = f"{system_instruction}\n{prompt}"
186
+
166
187
  if llm_extraction.output_variable_names is not None:
167
188
  for output_variable_name in llm_extraction.output_variable_names:
168
189
  v = response_dict[output_variable_name]
@@ -216,6 +237,31 @@ async def handle_network_call_extraction(
216
237
  )
217
238
 
218
239
 
240
+ async def handle_python_script_extraction(
241
+ python_script_extraction: PythonScriptExtraction,
242
+ memory: Memory,
243
+ browser: Browser,
244
+ task: Task,
245
+ unique_identifier: str | None = None,
246
+ ):
247
+ local_vars = {}
248
+ exec(python_script_extraction.script, {}, local_vars)
249
+ code_fn = local_vars["code_fn"]
250
+ axtree = memory.browser_states[-1].axtree
251
+ result = await code_fn(axtree)
252
+ if result is not None:
253
+ memory.variables.output_data.append(
254
+ OutputData(
255
+ unique_identifier=unique_identifier,
256
+ json_data=result,
257
+ )
258
+ )
259
+ else:
260
+ logger.warning(
261
+ f"No result from Python script extraction: {python_script_extraction.script}"
262
+ )
263
+
264
+
219
265
  async def download_request(
220
266
  network_call: NetworkRequest, download_filename: str, task: Task, memory: Memory
221
267
  ):