PyPI - optexity - Versions diffs - 0.1.5__tar.gz → 0.1.5.2__tar.gz - Mend

optexity 0.1.5tar.gz → 0.1.5.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

{optexity-0.1.5 → optexity-0.1.5.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optexity
-Version: 0.1.5
+Version: 0.1.5.2
 Summary: Optexity is a platform for building and running browser and computer agents.
 Author-email: Optexity <founders@optexity.com>
 Requires-Python: >=3.11
@@ -83,6 +83,7 @@ Install Optexity directly from PyPI:
 ```bash
 pip install optexity
+optexity install-browsers
 ```
 **OR**
@@ -95,6 +96,7 @@ If you want to clone and edit from source:
 git clone git@github.com:Optexity/optexity.git
 cd optexity
 pip install -e .
+optexity install-browsers
 ```
 ## Set required environment variables:
@@ -107,14 +109,6 @@ DEPLOYMENT=dev                          # or "prod" in production
 You can get your free Google Gemini API key from the [Google AI Studio Console](https://aistudio.google.com).
-## Install required browsers:
-Install playwright and patchright browsers:
-```bash
-optexity install-browsers
-```
 ## Recording Your First Automation
 The fastest way to create an automation is by recording your actions directly in the browser.

{optexity-0.1.5 → optexity-0.1.5.2}/README.md RENAMED Viewed

@@ -58,6 +58,7 @@ Install Optexity directly from PyPI:
 ```bash
 pip install optexity
+optexity install-browsers
 ```
 **OR**
@@ -70,6 +71,7 @@ If you want to clone and edit from source:
 git clone git@github.com:Optexity/optexity.git
 cd optexity
 pip install -e .
+optexity install-browsers
 ```
 ## Set required environment variables:
@@ -82,14 +84,6 @@ DEPLOYMENT=dev                          # or "prod" in production
 You can get your free Google Gemini API key from the [Google AI Studio Console](https://aistudio.google.com).
-## Install required browsers:
-Install playwright and patchright browsers:
-```bash
-optexity install-browsers
-```
 ## Recording Your First Automation
 The fastest way to create an automation is by recording your actions directly in the browser.

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/__init__.py RENAMED Viewed

@@ -1,7 +1,13 @@
 import logging
 import sys
+from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
+try:
+    __version__ = version("optexity")
+except PackageNotFoundError:
+    __version__ = "0.0.0"
 logging.basicConfig(
     level=logging.WARNING,  # Default level for root logger
     format="%(asctime)s [%(levelname)s] %(name)s.%(funcName)s: %(message)s",

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/examples/extract_price_stockanalysis.py RENAMED Viewed

@@ -23,12 +23,13 @@ automation_json = {
                 "click_element": {
                     "prompt_instructions": "Click on the link with the name of the stock equivalent for {stock_ticker[0]}."
                 }
-            }
+            },
+            "before_sleep_time": 1,
         },
         {
             "extraction_action": {
                 "llm": {
-                    "source": ["screenshot"],
+                    "source": ["screenshot", "axtree"],
                     "extraction_format": {
                         "stock_name": "str",
                         "stock_price": "str",

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/child_process.py RENAMED Viewed

@@ -154,7 +154,10 @@ def get_app_with_endpoints(is_aws: bool, child_id: int):
             await task_queue.put(task)
             return JSONResponse(
-                content={"success": True, "message": "Task has been allocated"},
+                content={
+                    "success": True,
+                    "message": "Task has been allocated. Check its status and output at https://dashboard.optexity.com/tasks",
+                },
                 status_code=202,
             )
         except Exception as e:
@@ -192,7 +195,7 @@ def get_app_with_endpoints(is_aws: bool, child_id: int):
                 return JSONResponse(
                     content={
                         "success": True,
-                        "message": "Task has been allocated",
+                        "message": "Task has been allocated. Check its status and output at https://dashboard.optexity.com/tasks",
                         "task_id": task.task_id,
                     },
                     status_code=202,

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_click.py RENAMED Viewed

@@ -53,14 +53,17 @@ async def click_element_index(
     try:
         index = await get_index_from_prompt(
-            memory, click_element_action.prompt_instructions, browser
+            memory, click_element_action.prompt_instructions, browser, task
         )
         if index is None:
             return
         async def _actual_click_element():
+            print(
+                f"Clicking element with index: {index} and button: {click_element_action.button}"
+            )
             action_model = browser.backend_agent.ActionModel(
-                **{"click": {"index": index}}
+                **{"click": {"index": index, "button": click_element_action.button}}
             )
             await browser.backend_agent.multi_act([action_model])

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_command.py RENAMED Viewed

@@ -13,6 +13,7 @@ from optexity.inference.infra.browser import Browser
 from optexity.schema.actions.interaction_action import (
     CheckAction,
     ClickElementAction,
+    HoverAction,
     InputTextAction,
     SelectOptionAction,
     UncheckAction,
@@ -32,6 +33,7 @@ async def command_based_action_with_retry(
         | CheckAction
         | UploadFileAction
         | UncheckAction
+        | HoverAction
     ),
     browser: Browser,
     memory: Memory,
@@ -67,7 +69,9 @@ async def command_based_action_with_retry(
                     url=browser_state_summary.url,
                     screenshot=browser_state_summary.screenshot,
                     title=browser_state_summary.title,
-                    axtree=browser_state_summary.dom_state.llm_representation(),
+                    axtree=browser_state_summary.dom_state.llm_representation(
+                        remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
+                    ),
                 )
                 if isinstance(action, ClickElementAction):
@@ -81,7 +85,7 @@ async def command_based_action_with_retry(
                     )
                 elif isinstance(action, InputTextAction):
                     await input_text_locator(
-                        action, locator, max_timeout_seconds_per_try
+                        action, locator, browser, max_timeout_seconds_per_try
                     )
                 elif isinstance(action, SelectOptionAction):
                     await select_option_locator(
@@ -100,6 +104,8 @@ async def command_based_action_with_retry(
                     await uncheck_locator(
                         action, locator, max_timeout_seconds_per_try, browser
                     )
+                elif isinstance(action, HoverAction):
+                    await hover_locator(locator, max_timeout_seconds_per_try)
                 elif isinstance(action, UploadFileAction):
                     await upload_file_locator(action, locator)
                 logger.debug(
@@ -147,7 +153,9 @@ async def click_locator(
             )
         else:
             await locator.click(
-                no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
+                button=click_element_action.button,
+                no_wait_after=True,
+                timeout=max_timeout_seconds_per_try * 1000,
             )
     if click_element_action.expect_download:
@@ -161,6 +169,7 @@ async def click_locator(
 async def input_text_locator(
     input_text_action: InputTextAction,
     locator: Locator,
+    browser: Browser,
     max_timeout_seconds_per_try: float,
 ):
@@ -170,12 +179,19 @@ async def input_text_locator(
             no_wait_after=True,
             timeout=max_timeout_seconds_per_try * 1000,
         )
-    else:
+    elif input_text_action.fill_or_type == "type":
         await locator.type(
             input_text_action.input_text,
             no_wait_after=True,
             timeout=max_timeout_seconds_per_try * 1000,
         )
+    else:
+        page = await browser.get_current_page()
+        if page is None:
+            return
+        for char in input_text_action.input_text:
+            await page.keyboard.press(char)
+            await asyncio.sleep(0.1)
     if input_text_action.press_enter:
         await locator.press("Enter")
@@ -209,6 +225,13 @@ async def uncheck_locator(
     )
+async def hover_locator(
+    locator: Locator,
+    max_timeout_seconds_per_try: float,
+):
+    await locator.hover(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
 async def upload_file_locator(upload_file_action: UploadFileAction, locator: Locator):
     await locator.set_input_files(upload_file_action.file_path)

optexity-0.1.5.2/optexity/inference/core/interaction/handle_hover.py ADDED Viewed

@@ -0,0 +1,83 @@
+import logging
+from optexity.inference.core.interaction.handle_command import (
+    command_based_action_with_retry,
+)
+from optexity.inference.core.interaction.utils import get_index_from_prompt
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import HoverAction
+from optexity.schema.memory import Memory
+from optexity.schema.task import Task
+logger = logging.getLogger(__name__)
+async def handle_hover_element(
+    hover_element_action: HoverAction,
+    task: Task,
+    memory: Memory,
+    browser: Browser,
+    max_timeout_seconds_per_try: float,
+    max_tries: int,
+):
+    if hover_element_action.command and not hover_element_action.skip_command:
+        last_error = await command_based_action_with_retry(
+            hover_element_action,
+            browser,
+            memory,
+            task,
+            max_tries,
+            max_timeout_seconds_per_try,
+        )
+        if last_error is None:
+            return
+    if not hover_element_action.skip_prompt:
+        logger.debug(
+            f"Executing prompt-based action: {hover_element_action.__class__.__name__}"
+        )
+        await hover_element_index(hover_element_action, browser, memory, task)
+async def hover_element_index(
+    hover_element_action: HoverAction,
+    browser: Browser,
+    memory: Memory,
+    task: Task,
+):
+    try:
+        index = await get_index_from_prompt(
+            memory, hover_element_action.prompt_instructions, browser, task
+        )
+        if index is None:
+            return
+        print(f"Hovering element with index: {index}")
+        async def _actual_hover_element():
+            try:
+                action_model = browser.backend_agent.ActionModel(
+                    **{"hover": {"index": index}}
+                )
+                await browser.backend_agent.multi_act([action_model])
+            except Exception as e:
+                logger.error(f"Error in hover_element_index: {e} trying right click")
+                node = await browser.backend_agent.browser_session.get_element_by_index(
+                    index
+                )
+                if node is None:
+                    return
+                backend_page = (
+                    await browser.backend_agent.browser_session.get_current_page()
+                )
+                element = await backend_page.get_element(node.backend_node_id)
+                await element.click(button="right")
+        await _actual_hover_element()
+    except Exception as e:
+        logger.error(f"Error in hover_element_index: {e}")
+        return

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_input.py RENAMED Viewed

@@ -48,15 +48,18 @@ async def handle_input_text(
         logger.debug(
             f"Executing prompt-based action: {input_text_action.__class__.__name__}"
         )
-        await input_text_index(input_text_action, browser, memory)
+        await input_text_index(input_text_action, browser, memory, task)
 async def input_text_index(
-    input_text_action: InputTextAction, browser: Browser, memory: Memory
+    input_text_action: InputTextAction, browser: Browser, memory: Memory, task: Task
 ):
     try:
         index = await get_index_from_prompt(
-            memory, input_text_action.prompt_instructions, browser
+            memory,
+            input_text_action.prompt_instructions,
+            browser,
+            task,
         )
         if index is None:
             return

optexity-0.1.5.2/optexity/inference/core/interaction/handle_keypress.py ADDED Viewed

@@ -0,0 +1,42 @@
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import KeyPressAction, KeyPressType
+from optexity.schema.memory import Memory
+async def handle_key_press(
+    keypress_action: KeyPressAction,
+    memory: Memory,
+    browser: Browser,
+):
+    page = await browser.get_current_page()
+    if page is None:
+        return
+    if keypress_action.type == KeyPressType.ENTER:
+        await page.keyboard.press("Enter")
+    if keypress_action.type == KeyPressType.TAB:
+        await page.keyboard.press("Tab")
+    if keypress_action.type == KeyPressType.ZERO:
+        await page.keyboard.press("0")
+    if keypress_action.type == KeyPressType.ONE:
+        await page.keyboard.press("1")
+    if keypress_action.type == KeyPressType.TWO:
+        await page.keyboard.press("2")
+    if keypress_action.type == KeyPressType.THREE:
+        await page.keyboard.press("3")
+    if keypress_action.type == KeyPressType.FOUR:
+        await page.keyboard.press("4")
+    if keypress_action.type == KeyPressType.FIVE:
+        await page.keyboard.press("5")
+    if keypress_action.type == KeyPressType.SIX:
+        await page.keyboard.press("6")
+    if keypress_action.type == KeyPressType.SEVEN:
+        await page.keyboard.press("7")
+    if keypress_action.type == KeyPressType.EIGHT:
+        await page.keyboard.press("8")
+    if keypress_action.type == KeyPressType.NINE:
+        await page.keyboard.press("9")
+    if keypress_action.type == KeyPressType.SLASH:
+        await page.keyboard.press("/")
+    if keypress_action.type == KeyPressType.SPACE:
+        await page.keyboard.press("Space")

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_select.py RENAMED Viewed

@@ -60,7 +60,7 @@ async def select_option_index(
     try:
         index = await get_index_from_prompt(
-            memory, select_option_action.prompt_instructions, browser
+            memory, select_option_action.prompt_instructions, browser, task
         )
         if index is None:
             return

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_select_utils.py RENAMED Viewed

@@ -57,9 +57,19 @@ async def smart_select(
     options: list[SelectOptionValue], patterns: list[str], memory: Memory
 ):
     # Get all options from the <select>
+    ## TODO: remove this once we have a better way to handle select one
     matched_values = []
+    if len(options) == 0:
+        return []
+    if len(options) == 1:
+        return [options[0].value]
+    if len(options) == 2 and "Select One" in [o.value for o in options]:
+        if options[0].value == "Select One":
+            return [options[1].value]
+        else:
+            return [options[0].value]
     for p in patterns:
         # If pattern contains regex characters, treat as regex
         is_regex = p.startswith("^") or p.endswith("$") or ".*" in p

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/handle_upload.py RENAMED Viewed

@@ -36,16 +36,16 @@ async def handle_upload_file(
         logger.debug(
             f"Executing prompt-based action: {upload_file_action.__class__.__name__}"
         )
-        await upload_file_index(upload_file_action, browser, memory)
+        await upload_file_index(upload_file_action, browser, memory, task)
 async def upload_file_index(
-    upload_file_action: UploadFileAction, browser: Browser, memory: Memory
+    upload_file_action: UploadFileAction, browser: Browser, memory: Memory, task: Task
 ):
     try:
         index = await get_index_from_prompt(
-            memory, upload_file_action.prompt_instructions, browser
+            memory, upload_file_action.prompt_instructions, browser, task
         )
         if index is None:
             return

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/interaction/utils.py RENAMED Viewed

@@ -18,14 +18,16 @@ index_prediction_agent = ActionPredictionLocatorAxtree()
 async def get_index_from_prompt(
-    memory: Memory, prompt_instructions: str, browser: Browser
+    memory: Memory, prompt_instructions: str, browser: Browser, task: Task
 ):
     browser_state_summary = await browser.get_browser_state_summary()
     memory.browser_states[-1] = BrowserState(
         url=browser_state_summary.url,
         screenshot=browser_state_summary.screenshot,
         title=browser_state_summary.title,
-        axtree=browser_state_summary.dom_state.llm_representation(),
+        axtree=browser_state_summary.dom_state.llm_representation(
+            remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
+        ),
     )
     try:

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_assertion.py RENAMED Viewed

@@ -6,6 +6,7 @@ from optexity.inference.infra.browser import Browser
 from optexity.inference.models import GeminiModels, get_llm_model
 from optexity.schema.actions.assertion_action import AssertionAction, LLMAssertion
 from optexity.schema.memory import Memory
+from optexity.schema.task import Task
 logger = logging.getLogger(__name__)
@@ -13,14 +14,17 @@ llm_model = get_llm_model(GeminiModels.GEMINI_2_5_FLASH, True)
 async def run_assertion_action(
-    assertion_action: AssertionAction, memory: Memory, browser: Browser
+    assertion_action: AssertionAction,
+    memory: Memory,
+    browser: Browser,
+    task: Task,
 ):
     logger.debug(
         f"---------Running assertion action {assertion_action.model_dump_json()}---------"
     )
     if assertion_action.llm:
-        await handle_llm_assertion(assertion_action.llm, memory, browser)
+        await handle_llm_assertion(assertion_action.llm, memory, browser, task)
     elif assertion_action.network_call:
         raise ValueError("Network call assertions are not supported yet")
         # await handle_network_call_assertion(
@@ -34,7 +38,7 @@ async def run_assertion_action(
 async def handle_llm_assertion(
-    llm_assertion: LLMAssertion, memory: Memory, browser: Browser
+    llm_assertion: LLMAssertion, memory: Memory, browser: Browser, task: Task
 ):
     extra_instruction = """You are a helpful assistant that verifies if the condition is met.
         Use the info supplied below to verify the condition.
@@ -45,7 +49,7 @@ async def handle_llm_assertion(
     llm_assertion_new.extraction_instructions = (
         extra_instruction + "\n" + llm_assertion_new.extraction_instructions
     )
-    output_data = await handle_llm_extraction(llm_assertion_new, memory, browser)
+    output_data = await handle_llm_extraction(llm_assertion_new, memory, browser, task)
     if output_data.json_data["assertion_result"]:
         return True

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_automation.py RENAMED Viewed

@@ -25,7 +25,6 @@ from optexity.inference.core.run_interaction import (
     run_interaction_action,
 )
 from optexity.inference.core.run_python_script import run_python_script_action
-from optexity.inference.core.run_two_fa import run_two_fa_action
 from optexity.inference.infra.browser import Browser
 from optexity.schema.actions.interaction_action import DownloadUrlAsPdfAction
 from optexity.schema.automation import ActionNode, ForLoopNode, IfElseNode
@@ -209,7 +208,9 @@ async def run_final_logging(
                     url=browser_state_summary.url,
                     screenshot=browser_state_summary.screenshot,
                     title=browser_state_summary.title,
-                    axtree=browser_state_summary.dom_state.llm_representation(),
+                    axtree=browser_state_summary.dom_state.llm_representation(
+                        remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
+                    ),
                 )
             )
@@ -272,14 +273,14 @@ async def run_action_node(
             await run_extraction_action(
                 action_node.extraction_action, memory, browser, task
             )
-        elif action_node.two_fa_action:
-            await run_two_fa_action(action_node.two_fa_action, memory)
         elif action_node.python_script_action:
             await run_python_script_action(
                 action_node.python_script_action, memory, browser
             )
         elif action_node.assertion_action:
-            await run_assertion_action(action_node.assertion_action, memory, browser)
+            await run_assertion_action(
+                action_node.assertion_action, memory, browser, task
+            )
     except Exception as e:
         logger.error(f"Error running node {memory.automation_state.step_index}: {e}")

{optexity-0.1.5 → optexity-0.1.5.2}/optexity/inference/core/run_extraction.py RENAMED Viewed

@@ -4,12 +4,14 @@ import traceback
 import aiofiles
 import httpx
+from optexity.inference.core.run_two_fa import run_two_fa_action
 from optexity.inference.infra.browser import Browser
 from optexity.inference.models import GeminiModels, get_llm_model
 from optexity.schema.actions.extraction_action import (
     ExtractionAction,
     LLMExtraction,
     NetworkCallExtraction,
+    PythonScriptExtraction,
     ScreenshotExtraction,
     StateExtraction,
 )
@@ -37,7 +39,11 @@ async def run_extraction_action(
     if extraction_action.llm:
         await handle_llm_extraction(
-            extraction_action.llm, memory, browser, extraction_action.unique_identifier
+            extraction_action.llm,
+            memory,
+            browser,
+            task,
+            extraction_action.unique_identifier,
         )
     elif extraction_action.network_call:
         await handle_network_call_extraction(
@@ -47,6 +53,14 @@ async def run_extraction_action(
             task,
             extraction_action.unique_identifier,
         )
+    elif extraction_action.python_script:
+        await handle_python_script_extraction(
+            extraction_action.python_script,
+            memory,
+            browser,
+            task,
+            extraction_action.unique_identifier,
+        )
     elif extraction_action.screenshot:
         await handle_screenshot_extraction(
             extraction_action.screenshot,
@@ -61,6 +75,8 @@ async def run_extraction_action(
             browser,
             extraction_action.unique_identifier,
         )
+    elif extraction_action.two_fa_action:
+        await run_two_fa_action(extraction_action.two_fa_action, memory)
 async def handle_state_extraction(
@@ -108,6 +124,7 @@ async def handle_llm_extraction(
     llm_extraction: LLMExtraction,
     memory: Memory,
     browser: Browser,
+    task: Task,
     unique_identifier: str | None = None,
 ):
     browser_state_summary = await browser.get_browser_state_summary()
@@ -115,7 +132,9 @@ async def handle_llm_extraction(
         url=browser_state_summary.url,
         screenshot=browser_state_summary.screenshot,
         title=browser_state_summary.title,
-        axtree=browser_state_summary.dom_state.llm_representation(),
+        axtree=browser_state_summary.dom_state.llm_representation(
+            remove_empty_nodes=task.automation.remove_empty_nodes_in_axtree
+        ),
     )
     # TODO: fix this double calling of screenshot and axtree
@@ -131,8 +150,8 @@ async def handle_llm_extraction(
     system_instruction = f"""
     You are an expert in extracting information from a website. You will be given an axtree of a webpage.
-    Your task is to extract the information from the webpage and return it in the format specified by the instructions.
-    {llm_extraction.extraction_instructions}
+    Your task is to extract the information from the webpage and return it in the format specified by the instructions. You will be first provided the instructions and then the axtree.
+    Instructions: {llm_extraction.extraction_instructions}
     """
     prompt = f"""
@@ -163,6 +182,8 @@ async def handle_llm_extraction(
     memory.token_usage += token_usage
     memory.variables.output_data.append(output_data)
+    memory.browser_states[-1].final_prompt = f"{system_instruction}\n{prompt}"
     if llm_extraction.output_variable_names is not None:
         for output_variable_name in llm_extraction.output_variable_names:
             v = response_dict[output_variable_name]
@@ -216,6 +237,31 @@ async def handle_network_call_extraction(
             )
+async def handle_python_script_extraction(
+    python_script_extraction: PythonScriptExtraction,
+    memory: Memory,
+    browser: Browser,
+    task: Task,
+    unique_identifier: str | None = None,
+):
+    local_vars = {}
+    exec(python_script_extraction.script, {}, local_vars)
+    code_fn = local_vars["code_fn"]
+    axtree = memory.browser_states[-1].axtree
+    result = await code_fn(axtree)
+    if result is not None:
+        memory.variables.output_data.append(
+            OutputData(
+                unique_identifier=unique_identifier,
+                json_data=result,
+            )
+        )
+    else:
+        logger.warning(
+            f"No result from Python script extraction: {python_script_extraction.script}"
+        )
 async def download_request(
     network_call: NetworkRequest, download_filename: str, task: Task, memory: Memory
 ):

optexity 0.1.5__tar.gz → 0.1.5.2__tar.gz

optexity 0.1.5tar.gz → 0.1.5.2tar.gz