optexity 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. optexity/cli.py +1 -1
  2. optexity/examples/__init__.py +0 -0
  3. optexity/examples/add_example.py +88 -0
  4. optexity/examples/download_pdf_url.py +29 -0
  5. optexity/examples/extract_price_stockanalysis.py +44 -0
  6. optexity/examples/file_upload.py +59 -0
  7. optexity/examples/i94.py +126 -0
  8. optexity/examples/i94_travel_history.py +126 -0
  9. optexity/examples/peachstate_medicaid.py +201 -0
  10. optexity/examples/supabase_login.py +75 -0
  11. optexity/inference/__init__.py +0 -0
  12. optexity/inference/agents/__init__.py +0 -0
  13. optexity/inference/agents/error_handler/__init__.py +0 -0
  14. optexity/inference/agents/error_handler/error_handler.py +39 -0
  15. optexity/inference/agents/error_handler/prompt.py +60 -0
  16. optexity/inference/agents/index_prediction/__init__.py +0 -0
  17. optexity/inference/agents/index_prediction/action_prediction_locator_axtree.py +45 -0
  18. optexity/inference/agents/index_prediction/prompt.py +14 -0
  19. optexity/inference/agents/select_value_prediction/__init__.py +0 -0
  20. optexity/inference/agents/select_value_prediction/prompt.py +20 -0
  21. optexity/inference/agents/select_value_prediction/select_value_prediction.py +39 -0
  22. optexity/inference/agents/two_fa_extraction/__init__.py +0 -0
  23. optexity/inference/agents/two_fa_extraction/prompt.py +23 -0
  24. optexity/inference/agents/two_fa_extraction/two_fa_extraction.py +47 -0
  25. optexity/inference/child_process.py +251 -0
  26. optexity/inference/core/__init__.py +0 -0
  27. optexity/inference/core/interaction/__init__.py +0 -0
  28. optexity/inference/core/interaction/handle_agentic_task.py +79 -0
  29. optexity/inference/core/interaction/handle_check.py +57 -0
  30. optexity/inference/core/interaction/handle_click.py +79 -0
  31. optexity/inference/core/interaction/handle_command.py +261 -0
  32. optexity/inference/core/interaction/handle_input.py +76 -0
  33. optexity/inference/core/interaction/handle_keypress.py +16 -0
  34. optexity/inference/core/interaction/handle_select.py +109 -0
  35. optexity/inference/core/interaction/handle_select_utils.py +132 -0
  36. optexity/inference/core/interaction/handle_upload.py +59 -0
  37. optexity/inference/core/interaction/utils.py +81 -0
  38. optexity/inference/core/logging.py +406 -0
  39. optexity/inference/core/run_assertion.py +55 -0
  40. optexity/inference/core/run_automation.py +463 -0
  41. optexity/inference/core/run_extraction.py +240 -0
  42. optexity/inference/core/run_interaction.py +254 -0
  43. optexity/inference/core/run_python_script.py +20 -0
  44. optexity/inference/core/run_two_fa.py +120 -0
  45. optexity/inference/core/two_factor_auth/__init__.py +0 -0
  46. optexity/inference/infra/__init__.py +0 -0
  47. optexity/inference/infra/browser.py +455 -0
  48. optexity/inference/infra/browser_extension.py +20 -0
  49. optexity/inference/models/__init__.py +22 -0
  50. optexity/inference/models/gemini.py +113 -0
  51. optexity/inference/models/human.py +20 -0
  52. optexity/inference/models/llm_model.py +210 -0
  53. optexity/inference/run_local.py +200 -0
  54. optexity/schema/__init__.py +0 -0
  55. optexity/schema/actions/__init__.py +0 -0
  56. optexity/schema/actions/assertion_action.py +66 -0
  57. optexity/schema/actions/extraction_action.py +143 -0
  58. optexity/schema/actions/interaction_action.py +330 -0
  59. optexity/schema/actions/misc_action.py +18 -0
  60. optexity/schema/actions/prompts.py +27 -0
  61. optexity/schema/actions/two_fa_action.py +24 -0
  62. optexity/schema/automation.py +432 -0
  63. optexity/schema/callback.py +16 -0
  64. optexity/schema/inference.py +87 -0
  65. optexity/schema/memory.py +100 -0
  66. optexity/schema/task.py +212 -0
  67. optexity/schema/token_usage.py +48 -0
  68. optexity/utils/__init__.py +0 -0
  69. optexity/utils/settings.py +54 -0
  70. optexity/utils/utils.py +76 -0
  71. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/METADATA +20 -36
  72. optexity-0.1.4.dist-info/RECORD +80 -0
  73. optexity-0.1.2.dist-info/RECORD +0 -11
  74. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/WHEEL +0 -0
  75. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/entry_points.txt +0 -0
  76. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/licenses/LICENSE +0 -0
  77. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,261 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from playwright.async_api import Locator
5
+
6
+ from optexity.exceptions import AssertLocatorPresenceException
7
+ from optexity.inference.core.interaction.handle_select_utils import (
8
+ SelectOptionValue,
9
+ smart_select,
10
+ )
11
+ from optexity.inference.core.interaction.utils import handle_download
12
+ from optexity.inference.infra.browser import Browser
13
+ from optexity.schema.actions.interaction_action import (
14
+ CheckAction,
15
+ ClickElementAction,
16
+ InputTextAction,
17
+ SelectOptionAction,
18
+ UncheckAction,
19
+ UploadFileAction,
20
+ )
21
+ from optexity.schema.memory import BrowserState, Memory
22
+ from optexity.schema.task import Task
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ async def command_based_action_with_retry(
28
+ action: (
29
+ ClickElementAction
30
+ | InputTextAction
31
+ | SelectOptionAction
32
+ | CheckAction
33
+ | UploadFileAction
34
+ | UncheckAction
35
+ ),
36
+ browser: Browser,
37
+ memory: Memory,
38
+ task: Task,
39
+ max_tries: int,
40
+ max_timeout_seconds_per_try: float,
41
+ ):
42
+
43
+ if action.command is None or action.skip_command:
44
+ return
45
+
46
+ last_error = None
47
+
48
+ logger.debug(f"Executing command-based action: {action.__class__.__name__}")
49
+
50
+ for try_index in range(max_tries):
51
+ last_error = None
52
+ try:
53
+ # https://playwright.dev/docs/actionability
54
+ locator = await browser.get_locator_from_command(action.command)
55
+ if try_index == 0:
56
+ try:
57
+ await locator.wait_for(
58
+ state="visible", timeout=max_timeout_seconds_per_try * 1000
59
+ )
60
+ except Exception as e:
61
+ pass
62
+ is_visible = await locator.is_visible()
63
+
64
+ if is_visible:
65
+ browser_state_summary = await browser.get_browser_state_summary()
66
+ memory.browser_states[-1] = BrowserState(
67
+ url=browser_state_summary.url,
68
+ screenshot=browser_state_summary.screenshot,
69
+ title=browser_state_summary.title,
70
+ axtree=browser_state_summary.dom_state.llm_representation(),
71
+ )
72
+
73
+ if isinstance(action, ClickElementAction):
74
+ await click_locator(
75
+ action,
76
+ locator,
77
+ browser,
78
+ memory,
79
+ task,
80
+ max_timeout_seconds_per_try,
81
+ )
82
+ elif isinstance(action, InputTextAction):
83
+ await input_text_locator(
84
+ action, locator, max_timeout_seconds_per_try
85
+ )
86
+ elif isinstance(action, SelectOptionAction):
87
+ await select_option_locator(
88
+ action,
89
+ locator,
90
+ browser,
91
+ memory,
92
+ task,
93
+ max_timeout_seconds_per_try,
94
+ )
95
+ elif isinstance(action, CheckAction):
96
+ await check_locator(
97
+ action, locator, max_timeout_seconds_per_try, browser
98
+ )
99
+ elif isinstance(action, UncheckAction):
100
+ await uncheck_locator(
101
+ action, locator, max_timeout_seconds_per_try, browser
102
+ )
103
+ elif isinstance(action, UploadFileAction):
104
+ await upload_file_locator(action, locator)
105
+ logger.debug(
106
+ f"{action.__class__.__name__} successful on try {try_index + 1}"
107
+ )
108
+ return
109
+ else:
110
+ await asyncio.sleep(max_timeout_seconds_per_try)
111
+ last_error = f"error: locator not visible"
112
+ except Exception as e:
113
+ last_error = f"error: {e}"
114
+ await asyncio.sleep(max_timeout_seconds_per_try)
115
+
116
+ if last_error is None:
117
+ last_error = "error in executing command"
118
+ logger.debug(
119
+ f"{action.__class__.__name__} failed after {max_tries} tries: {last_error}"
120
+ )
121
+
122
+ if last_error and action.assert_locator_presence:
123
+ logger.debug(
124
+ f"Error in {action.__class__.__name__} with assert_locator_presence: {action.__class__.__name__}: {last_error}"
125
+ )
126
+ raise AssertLocatorPresenceException(
127
+ message=f"Error in {action.__class__.__name__} with assert_locator_presence: {action.__class__.__name__}",
128
+ original_error=last_error,
129
+ command=action.command,
130
+ )
131
+ return last_error
132
+
133
+
134
+ async def click_locator(
135
+ click_element_action: ClickElementAction,
136
+ locator: Locator,
137
+ browser: Browser,
138
+ memory: Memory,
139
+ task: Task,
140
+ max_timeout_seconds_per_try: float,
141
+ ):
142
+ async def _actual_click():
143
+
144
+ if click_element_action.double_click:
145
+ await locator.dblclick(
146
+ no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
147
+ )
148
+ else:
149
+ await locator.click(
150
+ no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
151
+ )
152
+
153
+ if click_element_action.expect_download:
154
+ await handle_download(
155
+ _actual_click, memory, browser, task, click_element_action.download_filename
156
+ )
157
+ else:
158
+ await _actual_click()
159
+
160
+
161
+ async def input_text_locator(
162
+ input_text_action: InputTextAction,
163
+ locator: Locator,
164
+ max_timeout_seconds_per_try: float,
165
+ ):
166
+
167
+ if input_text_action.fill_or_type == "fill":
168
+ await locator.fill(
169
+ input_text_action.input_text,
170
+ no_wait_after=True,
171
+ timeout=max_timeout_seconds_per_try * 1000,
172
+ )
173
+ else:
174
+ await locator.type(
175
+ input_text_action.input_text,
176
+ no_wait_after=True,
177
+ timeout=max_timeout_seconds_per_try * 1000,
178
+ )
179
+
180
+ if input_text_action.press_enter:
181
+ await locator.press("Enter")
182
+
183
+
184
+ async def check_locator(
185
+ action: CheckAction,
186
+ locator: Locator,
187
+ max_timeout_seconds_per_try: float,
188
+ browser: Browser,
189
+ ):
190
+ await locator.uncheck(
191
+ no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
192
+ )
193
+ await asyncio.sleep(1)
194
+ locator = await browser.get_locator_from_command(action.command)
195
+ await locator.check(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
196
+
197
+
198
+ async def uncheck_locator(
199
+ action: UncheckAction,
200
+ locator: Locator,
201
+ max_timeout_seconds_per_try: float,
202
+ browser: Browser,
203
+ ):
204
+ await locator.check(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
205
+ await asyncio.sleep(1)
206
+ locator = await browser.get_locator_from_command(action.command)
207
+ await locator.uncheck(
208
+ no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
209
+ )
210
+
211
+
212
+ async def upload_file_locator(upload_file_action: UploadFileAction, locator: Locator):
213
+ await locator.set_input_files(upload_file_action.file_path)
214
+
215
+
216
+ async def select_option_locator(
217
+ select_option_action: SelectOptionAction,
218
+ locator: Locator,
219
+ browser: Browser,
220
+ memory: Memory,
221
+ task: Task,
222
+ max_timeout_seconds_per_try: float,
223
+ ):
224
+ async def _actual_select_option():
225
+ options: list[dict[str, str]] = await locator.evaluate(
226
+ """
227
+ sel => Array.from(sel.options).map(o => ({
228
+ value: o.value,
229
+ label: o.label || o.textContent
230
+ }))
231
+ """
232
+ )
233
+
234
+ select_option_values = [
235
+ SelectOptionValue(value=o["value"], label=o["label"]) for o in options
236
+ ]
237
+
238
+ matched_values = await smart_select(
239
+ select_option_values, options, select_option_action.select_values, memory
240
+ )
241
+
242
+ logger.debug(
243
+ f"Matched values for {select_option_action.command}: {matched_values}"
244
+ )
245
+
246
+ await locator.select_option(
247
+ matched_values,
248
+ no_wait_after=True,
249
+ timeout=max_timeout_seconds_per_try * 1000,
250
+ )
251
+
252
+ if select_option_action.expect_download:
253
+ await handle_download(
254
+ _actual_select_option,
255
+ memory,
256
+ browser,
257
+ task,
258
+ select_option_action.download_filename,
259
+ )
260
+ else:
261
+ await _actual_select_option()
@@ -0,0 +1,76 @@
1
+ import logging
2
+ import re
3
+
4
+ from optexity.inference.core.interaction.handle_command import (
5
+ command_based_action_with_retry,
6
+ )
7
+ from optexity.inference.core.interaction.utils import get_index_from_prompt
8
+ from optexity.inference.infra.browser import Browser
9
+ from optexity.schema.actions.interaction_action import InputTextAction
10
+ from optexity.schema.memory import Memory
11
+ from optexity.schema.task import Task
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ async def handle_input_text(
17
+ input_text_action: InputTextAction,
18
+ task: Task,
19
+ memory: Memory,
20
+ browser: Browser,
21
+ max_timeout_seconds_per_try: float,
22
+ max_tries: int,
23
+ ):
24
+
25
+ # {some english chars [0]}
26
+ INT_INDEX_PATTERN = re.compile(r"^\{([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]\}$")
27
+
28
+ if INT_INDEX_PATTERN.match(input_text_action.input_text) is not None:
29
+ logger.debug(
30
+ "Skipping input text because input variable was not present for this step"
31
+ )
32
+ return
33
+
34
+ if input_text_action.command and not input_text_action.skip_command:
35
+ last_error = await command_based_action_with_retry(
36
+ input_text_action,
37
+ browser,
38
+ memory,
39
+ task,
40
+ max_tries,
41
+ max_timeout_seconds_per_try,
42
+ )
43
+
44
+ if last_error is None:
45
+ return
46
+
47
+ if not input_text_action.skip_prompt:
48
+ logger.debug(
49
+ f"Executing prompt-based action: {input_text_action.__class__.__name__}"
50
+ )
51
+ await input_text_index(input_text_action, browser, memory)
52
+
53
+
54
+ async def input_text_index(
55
+ input_text_action: InputTextAction, browser: Browser, memory: Memory
56
+ ):
57
+ try:
58
+ index = await get_index_from_prompt(
59
+ memory, input_text_action.prompt_instructions, browser
60
+ )
61
+ if index is None:
62
+ return
63
+
64
+ action_model = browser.backend_agent.ActionModel(
65
+ **{
66
+ "input": {
67
+ "index": int(index),
68
+ "text": input_text_action.input_text,
69
+ "clear": True,
70
+ }
71
+ }
72
+ )
73
+ await browser.backend_agent.multi_act([action_model])
74
+ except Exception as e:
75
+ logger.error(f"Error in input_text_index: {e}")
76
+ return
@@ -0,0 +1,16 @@
1
+ from optexity.inference.infra.browser import Browser
2
+ from optexity.schema.actions.interaction_action import KeyPressAction, KeyPressType
3
+ from optexity.schema.memory import Memory
4
+
5
+
6
+ async def handle_key_press(
7
+ keypress_action: KeyPressAction,
8
+ memory: Memory,
9
+ browser: Browser,
10
+ ):
11
+ page = await browser.get_current_page()
12
+ if page is None:
13
+ return
14
+
15
+ if keypress_action.type == KeyPressType.ENTER:
16
+ await page.keyboard.press("Enter")
@@ -0,0 +1,109 @@
1
+ import logging
2
+
3
+ from browser_use.dom.serializer.serializer import DOMTreeSerializer
4
+
5
+ from optexity.inference.core.interaction.handle_command import (
6
+ command_based_action_with_retry,
7
+ )
8
+ from optexity.inference.core.interaction.handle_select_utils import (
9
+ SelectOptionValue,
10
+ smart_select,
11
+ )
12
+ from optexity.inference.core.interaction.utils import (
13
+ get_index_from_prompt,
14
+ handle_download,
15
+ )
16
+ from optexity.inference.infra.browser import Browser
17
+ from optexity.schema.actions.interaction_action import SelectOptionAction
18
+ from optexity.schema.memory import Memory
19
+ from optexity.schema.task import Task
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ async def handle_select_option(
25
+ select_option_action: SelectOptionAction,
26
+ task: Task,
27
+ memory: Memory,
28
+ browser: Browser,
29
+ max_timeout_seconds_per_try: float,
30
+ max_tries: int,
31
+ ):
32
+
33
+ if select_option_action.command and not select_option_action.skip_command:
34
+ last_error = await command_based_action_with_retry(
35
+ select_option_action,
36
+ browser,
37
+ memory,
38
+ task,
39
+ max_tries,
40
+ max_timeout_seconds_per_try,
41
+ )
42
+
43
+ if last_error is None:
44
+ return
45
+
46
+ if not select_option_action.skip_prompt:
47
+ logger.debug(
48
+ f"Executing prompt-based action: {select_option_action.__class__.__name__}"
49
+ )
50
+ await select_option_index(select_option_action, browser, memory, task)
51
+
52
+
53
+ async def select_option_index(
54
+ select_option_action: SelectOptionAction,
55
+ browser: Browser,
56
+ memory: Memory,
57
+ task: Task,
58
+ ):
59
+ ## TODO either perfect text match or agenic select value prediction
60
+ try:
61
+
62
+ index = await get_index_from_prompt(
63
+ memory, select_option_action.prompt_instructions, browser
64
+ )
65
+ if index is None:
66
+ return
67
+
68
+ node = await browser.backend_agent.browser_session.get_element_by_index(index)
69
+ if node is None:
70
+ return
71
+
72
+ select_option_values = DOMTreeSerializer(node)._extract_select_options(node)
73
+ if select_option_values is None:
74
+ return
75
+
76
+ all_options = select_option_values["all_options"]
77
+
78
+ all_options = [
79
+ SelectOptionValue(value=o["value"], label=o["text"]) for o in all_options
80
+ ]
81
+
82
+ matched_values = await smart_select(
83
+ all_options, select_option_action.select_values, memory
84
+ )
85
+
86
+ async def _actual_select_option():
87
+ action_model = browser.backend_agent.ActionModel(
88
+ **{
89
+ "select_dropdown": {
90
+ "index": int(index),
91
+ "text": matched_values[0],
92
+ }
93
+ }
94
+ )
95
+ await browser.backend_agent.multi_act([action_model])
96
+
97
+ if select_option_action.expect_download:
98
+ await handle_download(
99
+ _actual_select_option,
100
+ memory,
101
+ browser,
102
+ task,
103
+ select_option_action.download_filename,
104
+ )
105
+ else:
106
+ await _actual_select_option()
107
+ except Exception as e:
108
+ logger.error(f"Error in select_option_index: {e}")
109
+ return
@@ -0,0 +1,132 @@
1
+ import logging
2
+ import re
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from optexity.inference.agents.select_value_prediction.select_value_prediction import (
7
+ SelectValuePredictionAgent,
8
+ )
9
+ from optexity.schema.actions.interaction_action import Locator
10
+ from optexity.schema.memory import Memory
11
+
12
+ logger = logging.getLogger(__name__)
13
+ select_value_prediction_agent = SelectValuePredictionAgent()
14
+
15
+
16
+ class SelectOptionValue(BaseModel):
17
+ value: str
18
+ label: str
19
+
20
+
21
+ def llm_select_match(
22
+ options: list[SelectOptionValue], patterns: list[str], memory: Memory
23
+ ) -> list[str]:
24
+ final_prompt, response, token_usage = (
25
+ select_value_prediction_agent.predict_select_value(
26
+ [o.model_dump() for o in options], patterns
27
+ )
28
+ )
29
+ memory.token_usage += token_usage
30
+ memory.browser_states[-1].final_prompt = final_prompt
31
+ memory.browser_states[-1].llm_response = response.model_dump()
32
+
33
+ matched_values = response.matched_values
34
+
35
+ all_values = [o.value for o in options]
36
+
37
+ final_matched_values = []
38
+ for value in matched_values:
39
+ if value in all_values:
40
+ final_matched_values.append(value)
41
+
42
+ return final_matched_values
43
+
44
+
45
+ def score_match(pat: str, val: str) -> int:
46
+ # higher is better
47
+ if pat == val:
48
+ return 100
49
+ if val.startswith(pat):
50
+ return 80
51
+ if pat in val:
52
+ return 60
53
+ return 0
54
+
55
+
56
+ async def smart_select(
57
+ options: list[SelectOptionValue], patterns: list[str], memory: Memory
58
+ ):
59
+ # Get all options from the <select>
60
+
61
+ matched_values = []
62
+
63
+ for p in patterns:
64
+ # If pattern contains regex characters, treat as regex
65
+ is_regex = p.startswith("^") or p.endswith("$") or ".*" in p
66
+
67
+ ## Check if reggex pattern and then try finding the option by value and label
68
+ if is_regex:
69
+ regex = re.compile(p)
70
+ for opt in options:
71
+ if regex.search(opt.value) or regex.search(opt.label):
72
+ matched_values.append(opt.value)
73
+ else:
74
+ # try exact match
75
+ for opt in options:
76
+ if opt.value == p or opt.label == p:
77
+ matched_values.append(opt.value)
78
+
79
+ if len(matched_values) == 0:
80
+ ## If no matches, check if all values are unique and try score matching of values
81
+
82
+ processed_values = [
83
+ (v.value.lower().replace(" ", ""), v.value) for v in options
84
+ ]
85
+
86
+ if len(processed_values) == len(set(processed_values)):
87
+ for p in patterns:
88
+ processed_pattern = p.lower().replace(" ", "")
89
+
90
+ best_score = 0
91
+ best_value = None
92
+
93
+ for processed_value, value in processed_values:
94
+ score = score_match(processed_pattern, processed_value)
95
+ if score > best_score:
96
+ best_score = score
97
+ best_value = value
98
+
99
+ if best_value is not None and best_score > 0:
100
+ matched_values.append(best_value)
101
+
102
+ if len(matched_values) == 0:
103
+ processed_labels = [
104
+ (v.label.lower().replace(" ", ""), v.label) for v in options
105
+ ]
106
+
107
+ if len(processed_labels) == len(set(processed_labels)):
108
+ for p in patterns:
109
+ processed_pattern = p.lower().replace(" ", "")
110
+
111
+ best_score = 0
112
+ best_label = None
113
+ best_value = None
114
+
115
+ for opt in options:
116
+ processed_label = opt.label.lower().replace(" ", "")
117
+ score = score_match(processed_pattern, processed_label)
118
+ if score > best_score:
119
+ best_score = score
120
+ best_label = opt.label
121
+ best_value = opt.value
122
+
123
+ if best_label is not None and best_score > 0:
124
+ matched_values.append(best_value)
125
+
126
+ if len(matched_values) == 0:
127
+ matched_values = llm_select_match(options, patterns, memory)
128
+
129
+ if len(matched_values) == 0:
130
+ matched_values = patterns
131
+
132
+ return matched_values
@@ -0,0 +1,59 @@
1
+ import logging
2
+
3
+ from optexity.inference.core.interaction.handle_command import (
4
+ command_based_action_with_retry,
5
+ )
6
+ from optexity.inference.core.interaction.utils import get_index_from_prompt
7
+ from optexity.inference.infra.browser import Browser
8
+ from optexity.schema.actions.interaction_action import UploadFileAction
9
+ from optexity.schema.memory import Memory
10
+ from optexity.schema.task import Task
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ async def handle_upload_file(
16
+ upload_file_action: UploadFileAction,
17
+ task: Task,
18
+ memory: Memory,
19
+ browser: Browser,
20
+ max_timeout_seconds_per_try: float,
21
+ max_tries: int,
22
+ ):
23
+ if upload_file_action.command and not upload_file_action.skip_command:
24
+ last_error = await command_based_action_with_retry(
25
+ upload_file_action,
26
+ browser,
27
+ memory,
28
+ task,
29
+ max_tries,
30
+ max_timeout_seconds_per_try,
31
+ )
32
+ if last_error is None:
33
+ return
34
+
35
+ if not upload_file_action.skip_prompt:
36
+ logger.debug(
37
+ f"Executing prompt-based action: {upload_file_action.__class__.__name__}"
38
+ )
39
+ await upload_file_index(upload_file_action, browser, memory)
40
+
41
+
42
+ async def upload_file_index(
43
+ upload_file_action: UploadFileAction, browser: Browser, memory: Memory
44
+ ):
45
+
46
+ try:
47
+ index = await get_index_from_prompt(
48
+ memory, upload_file_action.prompt_instructions, browser
49
+ )
50
+ if index is None:
51
+ return
52
+
53
+ action_model = browser.backend_agent.ActionModel(
54
+ **{"upload_file": {"index": index, "path": upload_file_action.file_path}}
55
+ )
56
+ await browser.backend_agent.multi_act([action_model])
57
+ except Exception as e:
58
+ logger.error(f"Error in upload_file_index: {e}")
59
+ return