optexity 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. optexity/cli.py +1 -1
  2. optexity/examples/__init__.py +0 -0
  3. optexity/examples/add_example.py +88 -0
  4. optexity/examples/download_pdf_url.py +29 -0
  5. optexity/examples/extract_price_stockanalysis.py +44 -0
  6. optexity/examples/file_upload.py +59 -0
  7. optexity/examples/i94.py +126 -0
  8. optexity/examples/i94_travel_history.py +126 -0
  9. optexity/examples/peachstate_medicaid.py +201 -0
  10. optexity/examples/supabase_login.py +75 -0
  11. optexity/inference/__init__.py +0 -0
  12. optexity/inference/agents/__init__.py +0 -0
  13. optexity/inference/agents/error_handler/__init__.py +0 -0
  14. optexity/inference/agents/error_handler/error_handler.py +39 -0
  15. optexity/inference/agents/error_handler/prompt.py +60 -0
  16. optexity/inference/agents/index_prediction/__init__.py +0 -0
  17. optexity/inference/agents/index_prediction/action_prediction_locator_axtree.py +45 -0
  18. optexity/inference/agents/index_prediction/prompt.py +14 -0
  19. optexity/inference/agents/select_value_prediction/__init__.py +0 -0
  20. optexity/inference/agents/select_value_prediction/prompt.py +20 -0
  21. optexity/inference/agents/select_value_prediction/select_value_prediction.py +39 -0
  22. optexity/inference/agents/two_fa_extraction/__init__.py +0 -0
  23. optexity/inference/agents/two_fa_extraction/prompt.py +23 -0
  24. optexity/inference/agents/two_fa_extraction/two_fa_extraction.py +47 -0
  25. optexity/inference/child_process.py +251 -0
  26. optexity/inference/core/__init__.py +0 -0
  27. optexity/inference/core/interaction/__init__.py +0 -0
  28. optexity/inference/core/interaction/handle_agentic_task.py +79 -0
  29. optexity/inference/core/interaction/handle_check.py +57 -0
  30. optexity/inference/core/interaction/handle_click.py +79 -0
  31. optexity/inference/core/interaction/handle_command.py +261 -0
  32. optexity/inference/core/interaction/handle_input.py +76 -0
  33. optexity/inference/core/interaction/handle_keypress.py +16 -0
  34. optexity/inference/core/interaction/handle_select.py +109 -0
  35. optexity/inference/core/interaction/handle_select_utils.py +132 -0
  36. optexity/inference/core/interaction/handle_upload.py +59 -0
  37. optexity/inference/core/interaction/utils.py +81 -0
  38. optexity/inference/core/logging.py +406 -0
  39. optexity/inference/core/run_assertion.py +55 -0
  40. optexity/inference/core/run_automation.py +463 -0
  41. optexity/inference/core/run_extraction.py +240 -0
  42. optexity/inference/core/run_interaction.py +254 -0
  43. optexity/inference/core/run_python_script.py +20 -0
  44. optexity/inference/core/run_two_fa.py +120 -0
  45. optexity/inference/core/two_factor_auth/__init__.py +0 -0
  46. optexity/inference/infra/__init__.py +0 -0
  47. optexity/inference/infra/browser.py +455 -0
  48. optexity/inference/infra/browser_extension.py +20 -0
  49. optexity/inference/models/__init__.py +22 -0
  50. optexity/inference/models/gemini.py +113 -0
  51. optexity/inference/models/human.py +20 -0
  52. optexity/inference/models/llm_model.py +210 -0
  53. optexity/inference/run_local.py +200 -0
  54. optexity/schema/__init__.py +0 -0
  55. optexity/schema/actions/__init__.py +0 -0
  56. optexity/schema/actions/assertion_action.py +66 -0
  57. optexity/schema/actions/extraction_action.py +143 -0
  58. optexity/schema/actions/interaction_action.py +330 -0
  59. optexity/schema/actions/misc_action.py +18 -0
  60. optexity/schema/actions/prompts.py +27 -0
  61. optexity/schema/actions/two_fa_action.py +24 -0
  62. optexity/schema/automation.py +432 -0
  63. optexity/schema/callback.py +16 -0
  64. optexity/schema/inference.py +87 -0
  65. optexity/schema/memory.py +100 -0
  66. optexity/schema/task.py +212 -0
  67. optexity/schema/token_usage.py +48 -0
  68. optexity/utils/__init__.py +0 -0
  69. optexity/utils/settings.py +54 -0
  70. optexity/utils/utils.py +76 -0
  71. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/METADATA +20 -36
  72. optexity-0.1.4.dist-info/RECORD +80 -0
  73. optexity-0.1.2.dist-info/RECORD +0 -11
  74. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/WHEEL +0 -0
  75. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/entry_points.txt +0 -0
  76. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/licenses/LICENSE +0 -0
  77. {optexity-0.1.2.dist-info → optexity-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,463 @@
1
+ import asyncio
2
+ import logging
3
+ import time
4
+ import traceback
5
+ from copy import deepcopy
6
+
7
+ from patchright._impl._errors import TimeoutError as PatchrightTimeoutError
8
+ from playwright._impl._errors import TimeoutError as PlaywrightTimeoutError
9
+
10
+ from optexity.inference.core.interaction.utils import clean_download
11
+ from optexity.inference.core.logging import (
12
+ complete_task_in_server,
13
+ delete_local_data,
14
+ initiate_callback,
15
+ save_downloads_in_server,
16
+ save_latest_memory_state_locally,
17
+ save_output_data_in_server,
18
+ save_trajectory_in_server,
19
+ start_task_in_server,
20
+ )
21
+ from optexity.inference.core.run_assertion import run_assertion_action
22
+ from optexity.inference.core.run_extraction import run_extraction_action
23
+ from optexity.inference.core.run_interaction import (
24
+ handle_download_url_as_pdf,
25
+ run_interaction_action,
26
+ )
27
+ from optexity.inference.core.run_python_script import run_python_script_action
28
+ from optexity.inference.core.run_two_fa import run_two_fa_action
29
+ from optexity.inference.infra.browser import Browser
30
+ from optexity.schema.actions.interaction_action import DownloadUrlAsPdfAction
31
+ from optexity.schema.automation import ActionNode, ForLoopNode, IfElseNode
32
+ from optexity.schema.memory import BrowserState, ForLoopStatus, Memory, OutputData
33
+ from optexity.schema.task import Task
34
+ from optexity.utils.settings import settings
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ # TODO: static check that index for all replacement of input variables are within the bounds of the input variables
39
+
40
+ # TODO: static check that all for loop expansion for generated variables have some place where generated variables are added to the memory
41
+
42
+ # TODO: Check that all for loop expansion for generated variables have some place where generated variables are added to the memory
43
+
44
+ # TODO: give a warning where any variable of type {variable_name[index]} is used but variable_name is not in the memory in generated variables or in input variables
45
+
46
+
47
+ async def run_automation(task: Task, child_process_id: int):
48
+ file_handler = logging.FileHandler(str(task.log_file_path))
49
+ file_handler.setLevel(logging.DEBUG)
50
+
51
+ current_module = __name__.split(".")[0] # top-level module/package
52
+ logging.getLogger(current_module).addHandler(file_handler)
53
+ logging.getLogger("browser_use").setLevel(logging.INFO)
54
+
55
+ logger.info(f"Task {task.task_id} started running")
56
+ memory = None
57
+ browser = None
58
+ try:
59
+ await start_task_in_server(task)
60
+ memory = Memory()
61
+ browser = Browser(
62
+ memory=memory,
63
+ headless=False,
64
+ channel=task.automation.browser_channel,
65
+ debug_port=9222 + child_process_id,
66
+ use_proxy=task.use_proxy,
67
+ proxy_session_id=task.proxy_session_id(
68
+ settings.PROXY_PROVIDER if task.use_proxy else None
69
+ ),
70
+ )
71
+ await browser.start()
72
+
73
+ automation = task.automation
74
+
75
+ memory.automation_state.step_index = -1
76
+ memory.automation_state.try_index = 0
77
+
78
+ if task.use_proxy:
79
+
80
+ await browser.go_to_url("https://ipinfo.io/json")
81
+ page = await browser.get_current_page()
82
+
83
+ ip_info = await page.evaluate(
84
+ """
85
+ async () => {
86
+ const res = await fetch("https://ipinfo.io/json");
87
+ return await res.json();
88
+ }
89
+ """
90
+ )
91
+ if isinstance(ip_info, dict):
92
+ memory.variables.output_data.append(
93
+ OutputData(unique_identifier="ip_info", json_data=ip_info)
94
+ )
95
+ elif isinstance(ip_info, str):
96
+ memory.variables.output_data.append(
97
+ OutputData(unique_identifier="ip_info", text=ip_info)
98
+ )
99
+ else:
100
+ try:
101
+ memory.variables.output_data.append(
102
+ OutputData(unique_identifier="ip_info", text=str(ip_info))
103
+ )
104
+ except Exception as e:
105
+ logger.error(f"Error getting IP info: {e}")
106
+
107
+ await browser.go_to_url(task.automation.url)
108
+
109
+ full_automation = []
110
+
111
+ for node in automation.nodes:
112
+ if isinstance(node, ForLoopNode):
113
+ await handle_for_loop_node(node, memory, task, browser, full_automation)
114
+ elif isinstance(node, IfElseNode):
115
+ await handle_if_else_node(node, memory, task, browser, full_automation)
116
+ else:
117
+ full_automation.append(node.model_dump())
118
+ await run_action_node(
119
+ node,
120
+ task,
121
+ memory,
122
+ browser,
123
+ )
124
+
125
+ task.status = "success"
126
+ except AssertionError as e:
127
+ logger.error(f"Assertion error: {e}")
128
+ task.error = str(e)
129
+ task.status = "failed"
130
+ except Exception as e:
131
+ logger.error(f"Error running automation: {traceback.format_exc()}")
132
+ task.error = str(e)
133
+ task.status = "failed"
134
+ finally:
135
+ if task and memory:
136
+ await run_final_downloads_check(task, memory, browser)
137
+ if memory and browser:
138
+ await run_final_logging(task, memory, browser, child_process_id)
139
+ if browser:
140
+ await browser.stop()
141
+
142
+ logger.info(f"Task {task.task_id} completed with status {task.status}")
143
+ logging.getLogger(current_module).removeHandler(file_handler)
144
+
145
+
146
+ async def run_final_downloads_check(task: Task, memory: Memory, browser: Browser):
147
+
148
+ try:
149
+ logger.debug("Running final downloads check")
150
+ max_timeout = 10.0
151
+ start = time.monotonic()
152
+ await asyncio.wait_for(
153
+ browser.all_active_downloads_done.wait(), timeout=max_timeout
154
+ )
155
+ max_timeout = max(0.0, max_timeout - (time.monotonic() - start))
156
+
157
+ for temp_download_path, (
158
+ is_downloaded,
159
+ download,
160
+ ) in memory.raw_downloads.items():
161
+ if is_downloaded:
162
+ continue
163
+
164
+ download_path = task.downloads_directory / download.suggested_filename
165
+ await download.save_as(download_path)
166
+ memory.downloads.append(download_path)
167
+ await clean_download(download_path)
168
+ memory.raw_downloads[temp_download_path] = (True, download)
169
+
170
+ while max_timeout > 0:
171
+ if (
172
+ len(memory.urls_to_downloads) + len(memory.downloads)
173
+ >= task.automation.expected_downloads
174
+ ):
175
+ break
176
+ interval = min(1, max_timeout)
177
+ await asyncio.sleep(interval)
178
+ max_timeout = max(0.0, max_timeout - interval)
179
+
180
+ for url, filename in memory.urls_to_downloads:
181
+ download_path = task.downloads_directory / filename
182
+ await handle_download_url_as_pdf(
183
+ DownloadUrlAsPdfAction(url=url, download_filename=filename),
184
+ task,
185
+ memory,
186
+ browser,
187
+ )
188
+
189
+ except Exception as e:
190
+ logger.error(f"Error running final downloads check: {e}")
191
+
192
+ logger.warning(
193
+ f"Found {len(memory.downloads)} downloads, expected {task.automation.expected_downloads}"
194
+ )
195
+
196
+
197
+ async def run_final_logging(
198
+ task: Task, memory: Memory, browser: Browser, child_process_id: int
199
+ ):
200
+
201
+ try:
202
+ await complete_task_in_server(task, memory.token_usage, child_process_id)
203
+
204
+ try:
205
+ memory.automation_state.step_index += 1
206
+ browser_state_summary = await browser.get_browser_state_summary()
207
+ memory.browser_states.append(
208
+ BrowserState(
209
+ url=browser_state_summary.url,
210
+ screenshot=browser_state_summary.screenshot,
211
+ title=browser_state_summary.title,
212
+ axtree=browser_state_summary.dom_state.llm_representation(),
213
+ )
214
+ )
215
+
216
+ memory.final_screenshot = await browser.get_screenshot(full_page=True)
217
+ except Exception as e:
218
+ logger.error(f"Error getting final screenshot: {e}")
219
+
220
+ await save_output_data_in_server(task, memory)
221
+ await save_downloads_in_server(task, memory)
222
+ await save_latest_memory_state_locally(task, memory, None)
223
+ await save_trajectory_in_server(task, memory)
224
+ await initiate_callback(task)
225
+ await delete_local_data(task)
226
+
227
+ except Exception as e:
228
+ logger.error(f"Error running final logging: {e}")
229
+
230
+
231
+ async def run_action_node(
232
+ action_node: ActionNode,
233
+ task: Task,
234
+ memory: Memory,
235
+ browser: Browser,
236
+ ):
237
+
238
+ await asyncio.sleep(action_node.before_sleep_time)
239
+ await browser.handle_new_tabs(0)
240
+
241
+ memory.automation_state.step_index += 1
242
+ memory.automation_state.try_index = 0
243
+
244
+ await action_node.replace_variables(task.input_parameters)
245
+ await action_node.replace_variables(task.secure_parameters)
246
+ await action_node.replace_variables(memory.variables.generated_variables)
247
+
248
+ # ## TODO: optimize this by taking screenshot and axtree only if needed
249
+ # browser_state_summary = await browser.get_browser_state_summary()
250
+
251
+ memory.browser_states.append(
252
+ BrowserState(
253
+ url=await browser.get_current_page_url(),
254
+ screenshot=None,
255
+ title=await browser.get_current_page_title(),
256
+ axtree=None,
257
+ )
258
+ )
259
+
260
+ logger.debug(f"-----Running node {memory.automation_state.step_index}-----")
261
+
262
+ try:
263
+ if action_node.interaction_action:
264
+ ## Assuming network calls are only made during interaction actions and not during extraction actions
265
+ await browser.clear_network_calls()
266
+ await browser.attach_network_listeners()
267
+
268
+ await run_interaction_action(
269
+ action_node.interaction_action, task, memory, browser, 2
270
+ )
271
+ elif action_node.extraction_action:
272
+ await run_extraction_action(
273
+ action_node.extraction_action, memory, browser, task
274
+ )
275
+ elif action_node.two_fa_action:
276
+ await run_two_fa_action(action_node.two_fa_action, memory)
277
+ elif action_node.python_script_action:
278
+ await run_python_script_action(
279
+ action_node.python_script_action, memory, browser
280
+ )
281
+ elif action_node.assertion_action:
282
+ await run_assertion_action(action_node.assertion_action, memory, browser)
283
+
284
+ except Exception as e:
285
+ logger.error(f"Error running node {memory.automation_state.step_index}: {e}")
286
+ raise e
287
+ finally:
288
+ await save_latest_memory_state_locally(task, memory, action_node)
289
+
290
+ if action_node.expect_new_tab:
291
+ found_new_tab, total_time = await browser.handle_new_tabs(
292
+ action_node.max_new_tab_wait_time
293
+ )
294
+ if not found_new_tab:
295
+ logger.warning(
296
+ f"No new tab found after {action_node.max_new_tab_wait_time} seconds, even though expect_new_tab is True"
297
+ )
298
+ else:
299
+ logger.debug(f"Switched to new tab after {total_time} seconds, as expected")
300
+
301
+ else:
302
+ await sleep_for_page_to_load(browser, action_node.end_sleep_time)
303
+
304
+ logger.debug(f"-----Finished node {memory.automation_state.step_index}-----")
305
+
306
+
307
+ async def sleep_for_page_to_load(browser: Browser, sleep_time: float):
308
+ await asyncio.sleep(0.1)
309
+
310
+ sleep_time = max(0.0, sleep_time - 0.1)
311
+
312
+ if float(sleep_time) == 0.0:
313
+ return
314
+
315
+ page = await browser.get_current_page()
316
+ if page is None:
317
+ return
318
+ try:
319
+ await page.wait_for_load_state("load", timeout=sleep_time * 1000)
320
+ except TimeoutError as e:
321
+ pass
322
+ except PatchrightTimeoutError as e:
323
+ pass
324
+ except PlaywrightTimeoutError as e:
325
+ pass
326
+
327
+
328
+ def evaluate_condition(condition: str, memory: Memory, task: Task) -> bool:
329
+ return eval(
330
+ condition,
331
+ {},
332
+ {**task.input_parameters, **memory.variables.generated_variables},
333
+ )
334
+
335
+
336
+ async def handle_if_else_node(
337
+ if_else_node: IfElseNode,
338
+ memory: Memory,
339
+ task: Task,
340
+ browser: Browser,
341
+ full_automation: list[ActionNode],
342
+ ):
343
+ logger.debug(
344
+ f"Handling if else node {if_else_node.condition} with if nodes {if_else_node.if_nodes} and else nodes {if_else_node.else_nodes}"
345
+ )
346
+ condition_result = evaluate_condition(if_else_node.condition, memory, task)
347
+ if condition_result:
348
+ nodes = if_else_node.if_nodes
349
+ else:
350
+ nodes = if_else_node.else_nodes
351
+
352
+ for node in nodes:
353
+ if isinstance(node, ActionNode):
354
+ full_automation.append(node.model_dump())
355
+ await run_action_node(
356
+ node,
357
+ task,
358
+ memory,
359
+ browser,
360
+ )
361
+ elif isinstance(node, IfElseNode):
362
+ await handle_if_else_node(node, memory, task, browser, full_automation)
363
+ elif isinstance(node, ForLoopNode):
364
+ await handle_for_loop_node(node, memory, task, browser, full_automation)
365
+
366
+ logger.debug(f"Finished handling if else node {if_else_node.condition}")
367
+
368
+
369
+ async def handle_for_loop_node(
370
+ for_loop_node: ForLoopNode,
371
+ memory: Memory,
372
+ task: Task,
373
+ browser: Browser,
374
+ full_automation: list[ActionNode],
375
+ ):
376
+ if for_loop_node.variable_name in task.input_parameters:
377
+ values = task.input_parameters[for_loop_node.variable_name]
378
+ elif for_loop_node.variable_name in memory.variables.generated_variables:
379
+ values = memory.variables.generated_variables[for_loop_node.variable_name]
380
+ else:
381
+ raise ValueError(
382
+ f"Variable name {for_loop_node.variable_name} not found in input variables or generated variables"
383
+ )
384
+ memory.variables.for_loop_status.append([])
385
+ for index in range(len(values)):
386
+
387
+ try:
388
+ for node in for_loop_node.nodes:
389
+ new_node = deepcopy(node)
390
+ new_node.replace(
391
+ f"{{{for_loop_node.variable_name}[index]}}",
392
+ f"{{{for_loop_node.variable_name}[{index}]}}",
393
+ )
394
+ new_node.replace(
395
+ f"{{index_of({for_loop_node.variable_name})}}", f"{index}"
396
+ )
397
+
398
+ if isinstance(new_node, IfElseNode):
399
+ await handle_if_else_node(
400
+ new_node, memory, task, browser, full_automation
401
+ )
402
+
403
+ else:
404
+ full_automation.append(new_node.model_dump())
405
+ await run_action_node(
406
+ new_node,
407
+ task,
408
+ memory,
409
+ browser,
410
+ )
411
+ memory.variables.for_loop_status[-1].append(
412
+ ForLoopStatus(
413
+ variable_name=for_loop_node.variable_name,
414
+ index=index,
415
+ value=values[index],
416
+ status="success",
417
+ )
418
+ )
419
+ except Exception as e:
420
+ logger.error(
421
+ f"Error running for loop node {for_loop_node.variable_name}: {e}"
422
+ )
423
+ memory.variables.for_loop_status[-1].append(
424
+ ForLoopStatus(
425
+ variable_name=for_loop_node.variable_name,
426
+ index=index,
427
+ value=values[index],
428
+ status="error",
429
+ error=str(e),
430
+ )
431
+ )
432
+ if for_loop_node.on_error_in_loop == "continue":
433
+ continue
434
+ elif for_loop_node.on_error_in_loop == "break":
435
+ for index2 in range(index + 1, len(values)):
436
+ memory.variables.for_loop_status[-1].append(
437
+ ForLoopStatus(
438
+ variable_name=for_loop_node.variable_name,
439
+ index=index2,
440
+ value=values[index2],
441
+ status="skipped",
442
+ )
443
+ )
444
+
445
+ break
446
+ else:
447
+ raise e
448
+
449
+ if index < len(values) - 1:
450
+ for node in for_loop_node.reset_nodes:
451
+ if isinstance(node, IfElseNode):
452
+ await handle_if_else_node(
453
+ node, memory, task, browser, full_automation
454
+ )
455
+
456
+ else:
457
+ full_automation.append(node.model_dump())
458
+ await run_action_node(
459
+ node,
460
+ task,
461
+ memory,
462
+ browser,
463
+ )
@@ -0,0 +1,240 @@
1
+ import logging
2
+ import traceback
3
+
4
+ import aiofiles
5
+ import httpx
6
+
7
+ from optexity.inference.infra.browser import Browser
8
+ from optexity.inference.models import GeminiModels, get_llm_model
9
+ from optexity.schema.actions.extraction_action import (
10
+ ExtractionAction,
11
+ LLMExtraction,
12
+ NetworkCallExtraction,
13
+ ScreenshotExtraction,
14
+ StateExtraction,
15
+ )
16
+ from optexity.schema.memory import (
17
+ BrowserState,
18
+ Memory,
19
+ NetworkRequest,
20
+ NetworkResponse,
21
+ OutputData,
22
+ ScreenshotData,
23
+ )
24
+ from optexity.schema.task import Task
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ llm_model = get_llm_model(GeminiModels.GEMINI_2_5_FLASH, True)
29
+
30
+
31
+ async def run_extraction_action(
32
+ extraction_action: ExtractionAction, memory: Memory, browser: Browser, task: Task
33
+ ):
34
+ logger.debug(
35
+ f"---------Running extraction action {extraction_action.model_dump_json()}---------"
36
+ )
37
+
38
+ if extraction_action.llm:
39
+ await handle_llm_extraction(
40
+ extraction_action.llm, memory, browser, extraction_action.unique_identifier
41
+ )
42
+ elif extraction_action.network_call:
43
+ await handle_network_call_extraction(
44
+ extraction_action.network_call,
45
+ memory,
46
+ browser,
47
+ task,
48
+ extraction_action.unique_identifier,
49
+ )
50
+ elif extraction_action.screenshot:
51
+ await handle_screenshot_extraction(
52
+ extraction_action.screenshot,
53
+ memory,
54
+ browser,
55
+ extraction_action.unique_identifier,
56
+ )
57
+ elif extraction_action.state:
58
+ await handle_state_extraction(
59
+ extraction_action.state,
60
+ memory,
61
+ browser,
62
+ extraction_action.unique_identifier,
63
+ )
64
+
65
+
66
+ async def handle_state_extraction(
67
+ state_extraction: StateExtraction,
68
+ memory: Memory,
69
+ browser: Browser,
70
+ unique_identifier: str | None = None,
71
+ ):
72
+ page = await browser.get_current_page()
73
+ if page is None:
74
+ return
75
+
76
+ memory.variables.output_data.append(
77
+ OutputData(
78
+ unique_identifier=unique_identifier,
79
+ json_data={"page_url": page.url, "page_title": await page.title()},
80
+ )
81
+ )
82
+
83
+
84
+ async def handle_screenshot_extraction(
85
+ screenshot_extraction: ScreenshotExtraction,
86
+ memory: Memory,
87
+ browser: Browser,
88
+ unique_identifier: str | None = None,
89
+ ):
90
+
91
+ screenshot_base64 = await browser.get_screenshot(
92
+ full_page=screenshot_extraction.full_page
93
+ )
94
+ if screenshot_base64 is None:
95
+ return
96
+
97
+ memory.variables.output_data.append(
98
+ OutputData(
99
+ unique_identifier=unique_identifier,
100
+ screenshot=ScreenshotData(
101
+ filename=screenshot_extraction.filename, base64=screenshot_base64
102
+ ),
103
+ )
104
+ )
105
+
106
+
107
+ async def handle_llm_extraction(
108
+ llm_extraction: LLMExtraction,
109
+ memory: Memory,
110
+ browser: Browser,
111
+ unique_identifier: str | None = None,
112
+ ):
113
+ browser_state_summary = await browser.get_browser_state_summary()
114
+ memory.browser_states[-1] = BrowserState(
115
+ url=browser_state_summary.url,
116
+ screenshot=browser_state_summary.screenshot,
117
+ title=browser_state_summary.title,
118
+ axtree=browser_state_summary.dom_state.llm_representation(),
119
+ )
120
+
121
+ # TODO: fix this double calling of screenshot and axtree
122
+ if "axtree" in llm_extraction.source:
123
+ axtree = memory.browser_states[-1].axtree
124
+ else:
125
+ axtree = None
126
+
127
+ if "screenshot" in llm_extraction.source:
128
+ screenshot = memory.browser_states[-1].screenshot
129
+ else:
130
+ screenshot = None
131
+
132
+ system_instruction = f"""
133
+ You are an expert in extracting information from a website. You will be given an axtree of a webpage.
134
+ Your task is to extract the information from the webpage and return it in the format specified by the instructions.
135
+ {llm_extraction.extraction_instructions}
136
+ """
137
+
138
+ prompt = f"""
139
+ [INPUT]
140
+ Axtree: {axtree}
141
+ [/INPUT]
142
+ """
143
+
144
+ if llm_extraction.llm_provider == "gemini":
145
+ model_name = GeminiModels(llm_extraction.llm_model_name)
146
+ llm_model.model_name = model_name
147
+ else:
148
+ raise ValueError(f"Invalid LLM provider: {llm_extraction.llm_provider}")
149
+
150
+ response, token_usage = llm_model.get_model_response_with_structured_output(
151
+ prompt=prompt,
152
+ response_schema=llm_extraction.build_model(),
153
+ screenshot=screenshot,
154
+ system_instruction=system_instruction,
155
+ )
156
+ response_dict = response.model_dump()
157
+ output_data = OutputData(
158
+ unique_identifier=unique_identifier, json_data=response_dict
159
+ )
160
+
161
+ logger.debug(f"Response: {response_dict}")
162
+
163
+ memory.token_usage += token_usage
164
+ memory.variables.output_data.append(output_data)
165
+
166
+ if llm_extraction.output_variable_names is not None:
167
+ for output_variable_name in llm_extraction.output_variable_names:
168
+ v = response_dict[output_variable_name]
169
+ if isinstance(v, list):
170
+ memory.variables.generated_variables[output_variable_name] = v
171
+ elif (
172
+ isinstance(v, str)
173
+ or isinstance(v, int)
174
+ or isinstance(v, float)
175
+ or isinstance(v, bool)
176
+ ):
177
+ memory.variables.generated_variables[output_variable_name] = [v]
178
+ else:
179
+ raise ValueError(
180
+ f"Output variable {output_variable_name} must be a string, int, float, bool, or a list of strings, ints, floats, or bools. Extracted values: {response_dict[output_variable_name]}"
181
+ )
182
+ return output_data
183
+
184
+
185
+ async def handle_network_call_extraction(
186
+ network_call_extraction: NetworkCallExtraction,
187
+ memory: Memory,
188
+ browser: Browser,
189
+ task: Task,
190
+ unique_identifier: str | None = None,
191
+ ):
192
+
193
+ for network_call in browser.network_calls:
194
+ if network_call_extraction.url_pattern not in network_call.url:
195
+ continue
196
+
197
+ if network_call_extraction.download_from == "request" and isinstance(
198
+ network_call, NetworkRequest
199
+ ):
200
+ await download_request(
201
+ network_call, network_call_extraction.download_filename, task, memory
202
+ )
203
+
204
+ if (
205
+ network_call_extraction.extract_from == "request"
206
+ and isinstance(network_call, NetworkRequest)
207
+ ) or (
208
+ network_call_extraction.extract_from == "response"
209
+ and isinstance(network_call, NetworkResponse)
210
+ ):
211
+ memory.variables.output_data.append(
212
+ OutputData(
213
+ unique_identifier=unique_identifier,
214
+ json_data=network_call.model_dump(include={"body"}),
215
+ )
216
+ )
217
+
218
+
219
+ async def download_request(
220
+ network_call: NetworkRequest, download_filename: str, task: Task, memory: Memory
221
+ ):
222
+ try:
223
+ async with httpx.AsyncClient(follow_redirects=True) as client:
224
+ response = await client.request(
225
+ network_call.method,
226
+ network_call.url,
227
+ headers=network_call.headers,
228
+ content=network_call.body, # not data=
229
+ )
230
+
231
+ response.raise_for_status()
232
+
233
+ # Save raw response to PDF
234
+ download_path = task.downloads_directory / download_filename
235
+ async with aiofiles.open(download_path, "wb") as f:
236
+ await f.write(response.content)
237
+
238
+ memory.downloads.append(download_path)
239
+ except Exception as e:
240
+ logger.error(f"Failed to download request: {e}, {traceback.format_exc()}")