PyPI - optexity - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

optexity 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

optexity/examples/__init__.py +0 -0
optexity/examples/add_example.py +88 -0
optexity/examples/download_pdf_url.py +29 -0
optexity/examples/extract_price_stockanalysis.py +44 -0
optexity/examples/file_upload.py +59 -0
optexity/examples/i94.py +126 -0
optexity/examples/i94_travel_history.py +126 -0
optexity/examples/peachstate_medicaid.py +201 -0
optexity/examples/supabase_login.py +75 -0
optexity/inference/__init__.py +0 -0
optexity/inference/agents/__init__.py +0 -0
optexity/inference/agents/error_handler/__init__.py +0 -0
optexity/inference/agents/error_handler/error_handler.py +39 -0
optexity/inference/agents/error_handler/prompt.py +60 -0
optexity/inference/agents/index_prediction/__init__.py +0 -0
optexity/inference/agents/index_prediction/action_prediction_locator_axtree.py +45 -0
optexity/inference/agents/index_prediction/prompt.py +14 -0
optexity/inference/agents/select_value_prediction/__init__.py +0 -0
optexity/inference/agents/select_value_prediction/prompt.py +20 -0
optexity/inference/agents/select_value_prediction/select_value_prediction.py +39 -0
optexity/inference/agents/two_fa_extraction/__init__.py +0 -0
optexity/inference/agents/two_fa_extraction/prompt.py +23 -0
optexity/inference/agents/two_fa_extraction/two_fa_extraction.py +47 -0
optexity/inference/child_process.py +251 -0
optexity/inference/core/__init__.py +0 -0
optexity/inference/core/interaction/__init__.py +0 -0
optexity/inference/core/interaction/handle_agentic_task.py +79 -0
optexity/inference/core/interaction/handle_check.py +57 -0
optexity/inference/core/interaction/handle_click.py +79 -0
optexity/inference/core/interaction/handle_command.py +261 -0
optexity/inference/core/interaction/handle_input.py +76 -0
optexity/inference/core/interaction/handle_keypress.py +16 -0
optexity/inference/core/interaction/handle_select.py +109 -0
optexity/inference/core/interaction/handle_select_utils.py +132 -0
optexity/inference/core/interaction/handle_upload.py +59 -0
optexity/inference/core/interaction/utils.py +81 -0
optexity/inference/core/logging.py +406 -0
optexity/inference/core/run_assertion.py +55 -0
optexity/inference/core/run_automation.py +463 -0
optexity/inference/core/run_extraction.py +240 -0
optexity/inference/core/run_interaction.py +254 -0
optexity/inference/core/run_python_script.py +20 -0
optexity/inference/core/run_two_fa.py +120 -0
optexity/inference/core/two_factor_auth/__init__.py +0 -0
optexity/inference/infra/__init__.py +0 -0
optexity/inference/infra/browser.py +455 -0
optexity/inference/infra/browser_extension.py +20 -0
optexity/inference/models/__init__.py +22 -0
optexity/inference/models/gemini.py +113 -0
optexity/inference/models/human.py +20 -0
optexity/inference/models/llm_model.py +210 -0
optexity/inference/run_local.py +200 -0
optexity/schema/__init__.py +0 -0
optexity/schema/actions/__init__.py +0 -0
optexity/schema/actions/assertion_action.py +66 -0
optexity/schema/actions/extraction_action.py +143 -0
optexity/schema/actions/interaction_action.py +330 -0
optexity/schema/actions/misc_action.py +18 -0
optexity/schema/actions/prompts.py +27 -0
optexity/schema/actions/two_fa_action.py +24 -0
optexity/schema/automation.py +432 -0
optexity/schema/callback.py +16 -0
optexity/schema/inference.py +87 -0
optexity/schema/memory.py +100 -0
optexity/schema/task.py +212 -0
optexity/schema/token_usage.py +48 -0
optexity/utils/__init__.py +0 -0
optexity/utils/settings.py +54 -0
optexity/utils/utils.py +76 -0
{optexity-0.1.2.dist-info → optexity-0.1.3.dist-info}/METADATA +1 -1
optexity-0.1.3.dist-info/RECORD +80 -0
optexity-0.1.2.dist-info/RECORD +0 -11
{optexity-0.1.2.dist-info → optexity-0.1.3.dist-info}/WHEEL +0 -0
{optexity-0.1.2.dist-info → optexity-0.1.3.dist-info}/entry_points.txt +0 -0
{optexity-0.1.2.dist-info → optexity-0.1.3.dist-info}/licenses/LICENSE +0 -0
{optexity-0.1.2.dist-info → optexity-0.1.3.dist-info}/top_level.txt +0 -0

optexity/inference/core/interaction/handle_agentic_task.py ADDED Viewed

@@ -0,0 +1,79 @@
+import logging
+from browser_use import Agent, BrowserSession, ChatGoogle, Tools
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import (
+    AgenticTask,
+    CloseOverlayPopupAction,
+)
+from optexity.schema.memory import Memory
+from optexity.schema.task import Task
+logger = logging.getLogger(__name__)
+async def handle_agentic_task(
+    agentic_task_action: AgenticTask | CloseOverlayPopupAction,
+    task: Task,
+    memory: Memory,
+    browser: Browser,
+):
+    if agentic_task_action.backend == "browser_use":
+        if isinstance(agentic_task_action, CloseOverlayPopupAction):
+            tools = Tools(
+                exclude_actions=[
+                    "search",
+                    "navigate",
+                    "go_back",
+                    "upload_file",
+                    "scroll",
+                    "find_text",
+                    "send_keys",
+                    "evaluate",
+                    "switch",
+                    "close",
+                    "extract",
+                    "dropdown_options",
+                    "select_dropdown",
+                    "write_file",
+                    "read_file",
+                    "replace_file",
+                ]
+            )
+        else:
+            tools = Tools()
+        llm = ChatGoogle(model="gemini-flash-latest")
+        browser_session = BrowserSession(
+            cdp_url=browser.cdp_url, keep_alive=agentic_task_action.keep_alive
+        )
+        step_directory = (
+            task.logs_directory / f"step_{str(memory.automation_state.step_index)}"
+        )
+        step_directory.mkdir(parents=True, exist_ok=True)
+        agent = Agent(
+            task=agentic_task_action.task,
+            llm=llm,
+            browser_session=browser_session,
+            use_vision=agentic_task_action.use_vision,
+            tools=tools,
+            calculate_cost=True,
+            save_conversation_path=step_directory,
+        )
+        logger.debug(f"Starting browser session for agentic task {browser.cdp_url} ")
+        await agent.browser_session.start()
+        logger.debug(f"Finally running agentic task on browser_use {browser.cdp_url} ")
+        await agent.run(max_steps=agentic_task_action.max_steps)
+        logger.debug(f"Agentic task completed on browser_use {browser.cdp_url} ")
+        agent.stop()
+        if agent.browser_session:
+            await agent.browser_session.stop()
+            await agent.browser_session.reset()
+    elif agentic_task_action.backend == "browserbase":
+        raise NotImplementedError("Browserbase is not supported yet")

optexity/inference/core/interaction/handle_check.py ADDED Viewed

@@ -0,0 +1,57 @@
+import logging
+from optexity.inference.core.interaction.handle_command import (
+    command_based_action_with_retry,
+)
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import CheckAction, UncheckAction
+from optexity.schema.memory import Memory
+from optexity.schema.task import Task
+logger = logging.getLogger(__name__)
+async def handle_check_element(
+    check_element_action: CheckAction,
+    task: Task,
+    memory: Memory,
+    browser: Browser,
+    max_timeout_seconds_per_try: float,
+    max_tries: int,
+):
+    if check_element_action.command and not check_element_action.skip_command:
+        last_error = await command_based_action_with_retry(
+            check_element_action,
+            browser,
+            memory,
+            task,
+            max_tries,
+            max_timeout_seconds_per_try,
+        )
+        if last_error is None:
+            return
+async def handle_uncheck_element(
+    uncheck_element_action: UncheckAction,
+    task: Task,
+    memory: Memory,
+    browser: Browser,
+    max_timeout_seconds_per_try: float,
+    max_tries: int,
+):
+    if uncheck_element_action.command and not uncheck_element_action.skip_command:
+        last_error = await command_based_action_with_retry(
+            uncheck_element_action,
+            browser,
+            memory,
+            task,
+            max_tries,
+            max_timeout_seconds_per_try,
+        )
+        if last_error is None:
+            return

optexity/inference/core/interaction/handle_click.py ADDED Viewed

@@ -0,0 +1,79 @@
+import logging
+from optexity.inference.core.interaction.handle_command import (
+    command_based_action_with_retry,
+)
+from optexity.inference.core.interaction.utils import (
+    get_index_from_prompt,
+    handle_download,
+)
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import ClickElementAction
+from optexity.schema.memory import Memory
+from optexity.schema.task import Task
+logger = logging.getLogger(__name__)
+async def handle_click_element(
+    click_element_action: ClickElementAction,
+    task: Task,
+    memory: Memory,
+    browser: Browser,
+    max_timeout_seconds_per_try: float,
+    max_tries: int,
+):
+    if click_element_action.command and not click_element_action.skip_command:
+        last_error = await command_based_action_with_retry(
+            click_element_action,
+            browser,
+            memory,
+            task,
+            max_tries,
+            max_timeout_seconds_per_try,
+        )
+        if last_error is None:
+            return
+    if not click_element_action.skip_prompt:
+        logger.debug(
+            f"Executing prompt-based action: {click_element_action.__class__.__name__}"
+        )
+        await click_element_index(click_element_action, browser, memory, task)
+async def click_element_index(
+    click_element_action: ClickElementAction,
+    browser: Browser,
+    memory: Memory,
+    task: Task,
+):
+    try:
+        index = await get_index_from_prompt(
+            memory, click_element_action.prompt_instructions, browser
+        )
+        if index is None:
+            return
+        async def _actual_click_element():
+            action_model = browser.backend_agent.ActionModel(
+                **{"click": {"index": index}}
+            )
+            await browser.backend_agent.multi_act([action_model])
+        if click_element_action.expect_download:
+            await handle_download(
+                _actual_click_element,
+                memory,
+                browser,
+                task,
+                click_element_action.download_filename,
+            )
+        else:
+            await _actual_click_element()
+    except Exception as e:
+        logger.error(f"Error in click_element_index: {e}")
+        return

optexity/inference/core/interaction/handle_command.py ADDED Viewed

@@ -0,0 +1,261 @@
+import asyncio
+import logging
+from playwright.async_api import Locator
+from optexity.exceptions import AssertLocatorPresenceException
+from optexity.inference.core.interaction.handle_select_utils import (
+    SelectOptionValue,
+    smart_select,
+)
+from optexity.inference.core.interaction.utils import handle_download
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import (
+    CheckAction,
+    ClickElementAction,
+    InputTextAction,
+    SelectOptionAction,
+    UncheckAction,
+    UploadFileAction,
+)
+from optexity.schema.memory import BrowserState, Memory
+from optexity.schema.task import Task
+logger = logging.getLogger(__name__)
+async def command_based_action_with_retry(
+    action: (
+        ClickElementAction
+        | InputTextAction
+        | SelectOptionAction
+        | CheckAction
+        | UploadFileAction
+        | UncheckAction
+    ),
+    browser: Browser,
+    memory: Memory,
+    task: Task,
+    max_tries: int,
+    max_timeout_seconds_per_try: float,
+):
+    if action.command is None or action.skip_command:
+        return
+    last_error = None
+    logger.debug(f"Executing command-based action: {action.__class__.__name__}")
+    for try_index in range(max_tries):
+        last_error = None
+        try:
+            # https://playwright.dev/docs/actionability
+            locator = await browser.get_locator_from_command(action.command)
+            if try_index == 0:
+                try:
+                    await locator.wait_for(
+                        state="visible", timeout=max_timeout_seconds_per_try * 1000
+                    )
+                except Exception as e:
+                    pass
+            is_visible = await locator.is_visible()
+            if is_visible:
+                browser_state_summary = await browser.get_browser_state_summary()
+                memory.browser_states[-1] = BrowserState(
+                    url=browser_state_summary.url,
+                    screenshot=browser_state_summary.screenshot,
+                    title=browser_state_summary.title,
+                    axtree=browser_state_summary.dom_state.llm_representation(),
+                )
+                if isinstance(action, ClickElementAction):
+                    await click_locator(
+                        action,
+                        locator,
+                        browser,
+                        memory,
+                        task,
+                        max_timeout_seconds_per_try,
+                    )
+                elif isinstance(action, InputTextAction):
+                    await input_text_locator(
+                        action, locator, max_timeout_seconds_per_try
+                    )
+                elif isinstance(action, SelectOptionAction):
+                    await select_option_locator(
+                        action,
+                        locator,
+                        browser,
+                        memory,
+                        task,
+                        max_timeout_seconds_per_try,
+                    )
+                elif isinstance(action, CheckAction):
+                    await check_locator(
+                        action, locator, max_timeout_seconds_per_try, browser
+                    )
+                elif isinstance(action, UncheckAction):
+                    await uncheck_locator(
+                        action, locator, max_timeout_seconds_per_try, browser
+                    )
+                elif isinstance(action, UploadFileAction):
+                    await upload_file_locator(action, locator)
+                logger.debug(
+                    f"{action.__class__.__name__} successful on try {try_index + 1}"
+                )
+                return
+            else:
+                await asyncio.sleep(max_timeout_seconds_per_try)
+                last_error = f"error: locator not visible"
+        except Exception as e:
+            last_error = f"error: {e}"
+            await asyncio.sleep(max_timeout_seconds_per_try)
+    if last_error is None:
+        last_error = "error in executing command"
+    logger.debug(
+        f"{action.__class__.__name__} failed after {max_tries} tries: {last_error}"
+    )
+    if last_error and action.assert_locator_presence:
+        logger.debug(
+            f"Error in {action.__class__.__name__} with assert_locator_presence: {action.__class__.__name__}: {last_error}"
+        )
+        raise AssertLocatorPresenceException(
+            message=f"Error in {action.__class__.__name__} with assert_locator_presence: {action.__class__.__name__}",
+            original_error=last_error,
+            command=action.command,
+        )
+    return last_error
+async def click_locator(
+    click_element_action: ClickElementAction,
+    locator: Locator,
+    browser: Browser,
+    memory: Memory,
+    task: Task,
+    max_timeout_seconds_per_try: float,
+):
+    async def _actual_click():
+        if click_element_action.double_click:
+            await locator.dblclick(
+                no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
+            )
+        else:
+            await locator.click(
+                no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
+            )
+    if click_element_action.expect_download:
+        await handle_download(
+            _actual_click, memory, browser, task, click_element_action.download_filename
+        )
+    else:
+        await _actual_click()
+async def input_text_locator(
+    input_text_action: InputTextAction,
+    locator: Locator,
+    max_timeout_seconds_per_try: float,
+):
+    if input_text_action.fill_or_type == "fill":
+        await locator.fill(
+            input_text_action.input_text,
+            no_wait_after=True,
+            timeout=max_timeout_seconds_per_try * 1000,
+        )
+    else:
+        await locator.type(
+            input_text_action.input_text,
+            no_wait_after=True,
+            timeout=max_timeout_seconds_per_try * 1000,
+        )
+    if input_text_action.press_enter:
+        await locator.press("Enter")
+async def check_locator(
+    action: CheckAction,
+    locator: Locator,
+    max_timeout_seconds_per_try: float,
+    browser: Browser,
+):
+    await locator.uncheck(
+        no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
+    )
+    await asyncio.sleep(1)
+    locator = await browser.get_locator_from_command(action.command)
+    await locator.check(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
+async def uncheck_locator(
+    action: UncheckAction,
+    locator: Locator,
+    max_timeout_seconds_per_try: float,
+    browser: Browser,
+):
+    await locator.check(no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000)
+    await asyncio.sleep(1)
+    locator = await browser.get_locator_from_command(action.command)
+    await locator.uncheck(
+        no_wait_after=True, timeout=max_timeout_seconds_per_try * 1000
+    )
+async def upload_file_locator(upload_file_action: UploadFileAction, locator: Locator):
+    await locator.set_input_files(upload_file_action.file_path)
+async def select_option_locator(
+    select_option_action: SelectOptionAction,
+    locator: Locator,
+    browser: Browser,
+    memory: Memory,
+    task: Task,
+    max_timeout_seconds_per_try: float,
+):
+    async def _actual_select_option():
+        options: list[dict[str, str]] = await locator.evaluate(
+            """
+        sel => Array.from(sel.options).map(o => ({
+            value: o.value,
+            label: o.label || o.textContent
+        }))
+    """
+        )
+        select_option_values = [
+            SelectOptionValue(value=o["value"], label=o["label"]) for o in options
+        ]
+        matched_values = await smart_select(
+            select_option_values, options, select_option_action.select_values, memory
+        )
+        logger.debug(
+            f"Matched values for {select_option_action.command}: {matched_values}"
+        )
+        await locator.select_option(
+            matched_values,
+            no_wait_after=True,
+            timeout=max_timeout_seconds_per_try * 1000,
+        )
+    if select_option_action.expect_download:
+        await handle_download(
+            _actual_select_option,
+            memory,
+            browser,
+            task,
+            select_option_action.download_filename,
+        )
+    else:
+        await _actual_select_option()

optexity/inference/core/interaction/handle_input.py ADDED Viewed

@@ -0,0 +1,76 @@
+import logging
+import re
+from optexity.inference.core.interaction.handle_command import (
+    command_based_action_with_retry,
+)
+from optexity.inference.core.interaction.utils import get_index_from_prompt
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import InputTextAction
+from optexity.schema.memory import Memory
+from optexity.schema.task import Task
+logger = logging.getLogger(__name__)
+async def handle_input_text(
+    input_text_action: InputTextAction,
+    task: Task,
+    memory: Memory,
+    browser: Browser,
+    max_timeout_seconds_per_try: float,
+    max_tries: int,
+):
+    # {some english chars [0]}
+    INT_INDEX_PATTERN = re.compile(r"^\{([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]\}$")
+    if INT_INDEX_PATTERN.match(input_text_action.input_text) is not None:
+        logger.debug(
+            "Skipping input text because input variable was not present for this step"
+        )
+        return
+    if input_text_action.command and not input_text_action.skip_command:
+        last_error = await command_based_action_with_retry(
+            input_text_action,
+            browser,
+            memory,
+            task,
+            max_tries,
+            max_timeout_seconds_per_try,
+        )
+        if last_error is None:
+            return
+    if not input_text_action.skip_prompt:
+        logger.debug(
+            f"Executing prompt-based action: {input_text_action.__class__.__name__}"
+        )
+        await input_text_index(input_text_action, browser, memory)
+async def input_text_index(
+    input_text_action: InputTextAction, browser: Browser, memory: Memory
+):
+    try:
+        index = await get_index_from_prompt(
+            memory, input_text_action.prompt_instructions, browser
+        )
+        if index is None:
+            return
+        action_model = browser.backend_agent.ActionModel(
+            **{
+                "input": {
+                    "index": int(index),
+                    "text": input_text_action.input_text,
+                    "clear": True,
+                }
+            }
+        )
+        await browser.backend_agent.multi_act([action_model])
+    except Exception as e:
+        logger.error(f"Error in input_text_index: {e}")
+        return

optexity/inference/core/interaction/handle_keypress.py ADDED Viewed

@@ -0,0 +1,16 @@
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import KeyPressAction, KeyPressType
+from optexity.schema.memory import Memory
+async def handle_key_press(
+    keypress_action: KeyPressAction,
+    memory: Memory,
+    browser: Browser,
+):
+    page = await browser.get_current_page()
+    if page is None:
+        return
+    if keypress_action.type == KeyPressType.ENTER:
+        await page.keyboard.press("Enter")

optexity/inference/core/interaction/handle_select.py ADDED Viewed

@@ -0,0 +1,109 @@
+import logging
+from browser_use.dom.serializer.serializer import DOMTreeSerializer
+from optexity.inference.core.interaction.handle_command import (
+    command_based_action_with_retry,
+)
+from optexity.inference.core.interaction.handle_select_utils import (
+    SelectOptionValue,
+    smart_select,
+)
+from optexity.inference.core.interaction.utils import (
+    get_index_from_prompt,
+    handle_download,
+)
+from optexity.inference.infra.browser import Browser
+from optexity.schema.actions.interaction_action import SelectOptionAction
+from optexity.schema.memory import Memory
+from optexity.schema.task import Task
+logger = logging.getLogger(__name__)
+async def handle_select_option(
+    select_option_action: SelectOptionAction,
+    task: Task,
+    memory: Memory,
+    browser: Browser,
+    max_timeout_seconds_per_try: float,
+    max_tries: int,
+):
+    if select_option_action.command and not select_option_action.skip_command:
+        last_error = await command_based_action_with_retry(
+            select_option_action,
+            browser,
+            memory,
+            task,
+            max_tries,
+            max_timeout_seconds_per_try,
+        )
+        if last_error is None:
+            return
+    if not select_option_action.skip_prompt:
+        logger.debug(
+            f"Executing prompt-based action: {select_option_action.__class__.__name__}"
+        )
+        await select_option_index(select_option_action, browser, memory, task)
+async def select_option_index(
+    select_option_action: SelectOptionAction,
+    browser: Browser,
+    memory: Memory,
+    task: Task,
+):
+    ## TODO either perfect text match or agenic select value prediction
+    try:
+        index = await get_index_from_prompt(
+            memory, select_option_action.prompt_instructions, browser
+        )
+        if index is None:
+            return
+        node = await browser.backend_agent.browser_session.get_element_by_index(index)
+        if node is None:
+            return
+        select_option_values = DOMTreeSerializer(node)._extract_select_options(node)
+        if select_option_values is None:
+            return
+        all_options = select_option_values["all_options"]
+        all_options = [
+            SelectOptionValue(value=o["value"], label=o["text"]) for o in all_options
+        ]
+        matched_values = await smart_select(
+            all_options, select_option_action.select_values, memory
+        )
+        async def _actual_select_option():
+            action_model = browser.backend_agent.ActionModel(
+                **{
+                    "select_dropdown": {
+                        "index": int(index),
+                        "text": matched_values[0],
+                    }
+                }
+            )
+            await browser.backend_agent.multi_act([action_model])
+        if select_option_action.expect_download:
+            await handle_download(
+                _actual_select_option,
+                memory,
+                browser,
+                task,
+                select_option_action.download_filename,
+            )
+        else:
+            await _actual_select_option()
+    except Exception as e:
+        logger.error(f"Error in select_option_index: {e}")
+        return

optexity 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

optexity 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl