PyPI - browsergym-workarena - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

browsergym-workarena 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

browsergym/workarena/tasks/mark_duplicate_problem.py ADDED Viewed

@@ -0,0 +1,171 @@
+import json
+from playwright.sync_api import Page
+from typing import Tuple
+from .base import AbstractServiceNowTask
+from .comp_building_block import CompositionalBuildingBlockTask
+from ..api.utils import table_api_call
+class SetProblemAsDuplicateTask(AbstractServiceNowTask, CompositionalBuildingBlockTask):
+    """
+    Set a problem as duplicate, assuming we start on the problems list view.
+    Parameters:
+    -----------
+    instance: SNowInstance
+        The instance to use.
+    start_rel_url: str
+        The relative URL of the task list.
+    fixed_config: dict
+        Configuration to use for the task. If provided, the task will use the provided configuration instead of
+        selecting a random one. See browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json
+        for an example of a configuration file.
+    respect_problem_ordering: bool
+        Whether to respect the ordering of the problems in the list. If True, the task will pick the first problem in the
+        list as the target problem. If False, the task validation will check if any problem is a duplicate of the other.
+    add_comment: bool
+        Whether or not to add comment to the duplicated task. If set to True, will add "Duplicate" as the problem description
+    goal_version: str
+        choice of "base", "priority", "high_priority". Adjusts the goal to the task setting for L2
+    """
+    def __init__(
+        self,
+        seed: int = None,
+        instance=None,
+        start_rel_url="/now/nav/ui/classic/params/target/problem_list.do",
+        fixed_config: dict = None,
+        respect_problem_ordering: bool = False,
+        add_comment: bool = False,
+        goal_version: str = "base",
+        level: int = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(seed=seed, instance=instance, start_rel_url=start_rel_url)
+        self.fixed_config = fixed_config
+        self.config = fixed_config
+        self.problem_sys_id = None
+        self.respect_problem_ordering = respect_problem_ordering
+        self.add_comment = add_comment
+        self.goal_version = goal_version
+        self.level = level
+        self.__dict__.update(kwargs)
+    def setup_goal(self, page: Page) -> tuple[str, dict]:
+        self.target_problem = self.fixed_config["target_problem"]
+        self.source_problem = self.fixed_config["source_problem"]
+        goal = self.get_pretty_printed_description()
+        return goal, {}
+    def get_pretty_printed_description(self) -> str:
+        """
+        Get the task info for this task when used in a private task; Used in L2 compositional tasks.
+        called by subclasses
+        """
+        if self.level == 3:
+            task_info = " "
+        elif self.goal_version == "base":
+            task_info = "Mark problems with duplicated problem statements as such. You can mark any as duplicate of the other."
+        elif self.goal_version == "priority":
+            task_info = "Among the problems with duplicated problem statements, mark the lower priority one as duplicate of the higher priority one"
+        elif self.goal_version == "high priority":
+            task_info = "Among the problems with duplicated problem statements, mark any as duplicate of the other. Change the description of the problem marked as duplicate to 'duplicate'."
+        return task_info
+    def cheat(self, page: Page, chat_messages: list[str]) -> None:
+        super().cheat(page, chat_messages)
+        target_problem_number = self.target_problem["number"]
+        frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame()
+        # Search for the private task by search for the number
+        frame.wait_for_selector(f"[aria-label='Preview record: {target_problem_number}']").click()
+        page.wait_for_timeout(1500)
+        # Click on the private task to open it
+        frame.get_by_text("Open Record").click()
+        page.wait_for_timeout(2000)
+        page.wait_for_load_state("networkidle")
+        frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame()
+        page.wait_for_timeout(1500)
+        # Open the duplicate mode
+        frame.get_by_text("Mark Duplicate").first.click()
+        page.wait_for_timeout(1000)
+        # Close the pop-up to edit the duplicate problem in the same window
+        frame.get_by_text("Close").last.click()
+        frame.locator('[aria-labelledby="label.problem.duplicate_of"]').fill(
+            self.source_problem["number"]
+        )
+        page.keyboard.press("Enter")
+        page.wait_for_timeout(1000)
+        if self.add_comment:
+            frame.locator('[id="problem.description"]').fill("Duplicate")
+        frame.get_by_text("update").first.click()
+    def validate(self, page: Page, chat_messages: list[str]) -> Tuple[float, bool, str, dict]:
+        """
+        Validate the solution
+        """
+        target_problem_record = table_api_call(
+            instance=self.instance,
+            table="problem",
+            params={"sysparm_query": f"number={self.target_problem['number']}"},
+        )["result"]
+        source_problem_record = table_api_call(
+            instance=self.instance,
+            table="problem",
+            params={"sysparm_query": f"number={self.source_problem['number']}"},
+        )["result"]
+        # If the ordering can be anything, we check both problems
+        problem_found = source_problem_record and target_problem_record
+        if not problem_found:
+            return 0, False, "", {"message": "Problem not found in DB."}
+        # if the duplicate value is not set, the field will be an empty string; otherwise it will be a dict
+        target_duplicate_value = target_problem_record[0]["duplicate_of"]
+        if target_duplicate_value:
+            target_duplicate_value = target_duplicate_value["value"]
+        target_is_duplicate = target_duplicate_value == source_problem_record[0]["sys_id"]
+        if self.respect_problem_ordering:
+            problem_marked_as_duplicate = target_is_duplicate
+        else:
+            source_duplicate_value = source_problem_record[0]["duplicate_of"]
+            if source_duplicate_value:
+                source_duplicate_value = source_duplicate_value["value"]
+            source_is_duplicate = source_duplicate_value == target_problem_record[0]["sys_id"]
+            problem_marked_as_duplicate = target_is_duplicate or source_is_duplicate
+        if self.add_comment:
+            comment_added = (
+                target_problem_record[0]["description"].lower() == "duplicate"
+                and target_is_duplicate
+            )
+            if not self.respect_problem_ordering:
+                comment_added = comment_added or (
+                    source_problem_record[0]["description"].lower() == "duplicate"
+                    and source_is_duplicate
+                )
+            if not comment_added:
+                return 0, False, "", {"message": "Comment not added."}
+        if not problem_marked_as_duplicate:
+            return 0, False, "", {"message": "Problem not marked as duplicate."}
+        return (
+            1,
+            True,
+            "Nice work, thank you!",
+            {"message": "Problem task was closed as duplicate."},
+        )
+__TASKS__ = [SetProblemAsDuplicateTask]

browsergym/workarena/tasks/navigation.py CHANGED Viewed

@@ -3,13 +3,14 @@ Tasks related to basic menu navigation.
 """
+import json
 import playwright.sync_api
+import re
 from importlib import resources
-import json
 from playwright.sync_api import Page
-from urllib.parse import urlparse, urlunparse, unquote
-from typing import Tuple
+from urllib import parse
+from typing import List, Tuple
 from ..api.utils import table_api_call
 from .base import AbstractServiceNowTask
@@ -34,11 +35,14 @@ class AllMenuTask(AbstractServiceNowTask):
     """
-    def __init__(self, seed: int, instance: SNowInstance = None, fixed_config: dict = None) -> None:
+    def __init__(
+        self, seed: int = None, instance: SNowInstance = None, fixed_config: dict = None, **kwargs
+    ) -> None:
         super().__init__(seed=seed, instance=instance, start_rel_url="/now/nav/ui/home")
         self.fixed_config = fixed_config
         with open(ALL_MENU_PATH, "r") as f:
             self.all_configs = json.load(f)
+        self.__dict__.update(kwargs)
     def setup_goal(self, page: Page) -> tuple[str, dict]:
         super().setup_goal(page=page)
@@ -47,7 +51,9 @@ class AllMenuTask(AbstractServiceNowTask):
         self.module = (
             self.fixed_config if self.fixed_config else self.random.choice(self.all_configs)
         )
-        self.final_url = self.instance.snow_url + self.module["url"]
+        # When menu tasks do not need to be validated, the URL can be omitted from their config
+        self.final_url = self.instance.snow_url + self.module.get("url", "")
         # Generate goal
         goal = f'Navigate to the "{self.module["module"]}" module of the "{self.module["application"]}" application.'
@@ -55,9 +61,19 @@ class AllMenuTask(AbstractServiceNowTask):
         return goal, info
+    def get_pretty_printed_description(self) -> str:
+        """
+        Get the task info for this task when used in a private task; Used in L3 compositional tasks.
+        called by subclasses
+        """
+        task_info = f'- Navigate to the "{self.module["module"]}" module of the "{self.module["application"]}" application.'
+        return task_info
     def cheat(self, page: Page, chat_messages: list[str]) -> None:
         super().cheat(page=page, chat_messages=chat_messages)
+        # gsft_main remains undefined on the landing page; we have to wait for the network to be idle instead.
+        page.wait_for_load_state("networkidle")
         menu_button = page.locator('div[aria-label="All"]')
         if menu_button.get_attribute("aria-expanded").lower() != "true":
             menu_button.click()
@@ -100,7 +116,7 @@ class AllMenuTask(AbstractServiceNowTask):
         # In some cases, like System Scheduler > Scheduled Jobs > Scheduled Jobs, modules are repeated in the path
         # This causes problems when clicking. Therefore, we pick the last item
         if menu_item.count() > 1:
-            menu_item = menu_item.last
+            menu_item = menu_item.first
         with page.expect_navigation():
             menu_item.click()
         page.wait_for_timeout(2000)
@@ -111,11 +127,18 @@ class AllMenuTask(AbstractServiceNowTask):
         page.wait_for_load_state("domcontentloaded")
         # Get the current URL and the final URL
-        current_url = urlunparse(urlparse(unquote(page.evaluate("() => window.location.href"))))
-        final_url = urlunparse(urlparse(unquote(self.final_url)))
+        current_url = parse.urlunparse(
+            parse.urlparse(parse.unquote(page.evaluate("() => window.location.href")))
+        )
+        final_url = parse.urlunparse(parse.urlparse(parse.unquote(self.final_url)))
         if final_url == current_url:
-            return 1, True, "Nice work, thank you!", {"message": "Correct module reached."}
+            return (
+                1,
+                True,
+                "Nice work, thank you!",
+                {"message": "Correct module reached."},
+            )
         return 0, False, "", {"message": "Not at expected URL."}
@@ -139,11 +162,14 @@ class ImpersonationTask(AbstractServiceNowTask):
     """
-    def __init__(self, seed: int, instance=None, fixed_config: dict = None) -> None:
+    def __init__(
+        self, seed: int = None, instance=None, fixed_config: dict = None, **kwargs
+    ) -> None:
         super().__init__(seed=seed, instance=instance, start_rel_url="/now/nav/ui/home")
         self.fixed_config = fixed_config
         with open(IMPERSONATION_CONFIG_PATH, "r") as f:
             self.all_configs = json.load(f)
+        self.__dict__.update(kwargs)
     def setup_goal(self, page: Page) -> tuple[str, dict]:
         super().setup_goal(page=page)
@@ -160,6 +186,15 @@ class ImpersonationTask(AbstractServiceNowTask):
         return goal, info
+    def get_pretty_printed_description(self) -> str:
+        """
+        Get the task info for this task when used in a private task; Used in L3 compositional tasks.
+        called by subclasses
+        """
+        task_info = f"- Impersonate the user {self.user_full_name} \n"
+        return task_info
     def cheat(self, page: Page, chat_messages: list[str]) -> None:
         super().cheat(page=page, chat_messages=chat_messages)
         impersonate_user(self.user_full_name, page)
@@ -167,7 +202,9 @@ class ImpersonationTask(AbstractServiceNowTask):
     def validate(
         self, page: playwright.sync_api.Page, chat_messages: list[str]
     ) -> Tuple[float, bool, str, dict]:
-        user_info = self.page.evaluate("window.NOW")["user"]
+        page.wait_for_function("window.NOW && window.NOW.user")
+        user_info = page.evaluate("window.NOW")["user"]
         # If the current user is not being impersonated, fail.
         if not user_info["isImpersonating"]:
@@ -185,7 +222,12 @@ class ImpersonationTask(AbstractServiceNowTask):
         # If the name matches, success.
         if user_fullname == self.user_full_name:
-            return 1, True, "Nice work, thank you!", {"message": "Correct user impersonated."}
+            return (
+                1,
+                True,
+                "Nice work, thank you!",
+                {"message": "Correct user impersonated."},
+            )
         # Otherwise, fail.
         return 0, False, "", {"message": "Currently impersonating the wrong user."}

browsergym/workarena/tasks/scripts/extract_all_menu_items.py CHANGED Viewed

@@ -72,7 +72,10 @@ if __name__ == "__main__":
             expand_and_gather_paths(page, nested_parent_selector, new_path)
         if not collapsible_lists:
-            current_path_item = {"path": current_path.copy(), "selector": parent_selector}
+            current_path_item = {
+                "path": current_path.copy(),
+                "selector": parent_selector,
+            }
             base_paths.append(current_path_item)
     def expand_menu():
@@ -190,7 +193,11 @@ if __name__ == "__main__":
                             45:
                         ]  # get only the end of the url
                         if url not in urls:
-                            menu_task = {"application": application, "module": module, "url": url}
+                            menu_task = {
+                                "application": application,
+                                "module": module,
+                                "url": url,
+                            }
                             all_menu_items.append(menu_task)
                             urls[url] = True

browsergym/workarena/tasks/scripts/generate_dashboard_configs.py CHANGED Viewed

@@ -28,7 +28,7 @@ from browsergym.workarena.tasks.dashboard import DashboardRetrievalTask
 N_CPU = 20
 MAX_CONFIGS = 1000
-REPORT = True  # Set to True for reports, False for dashboards
+REPORT = False  # Set to True for reports, False for dashboards
 class DummyDashboard(DashboardRetrievalTask):
@@ -102,10 +102,10 @@ def get_dashboard_urls(instance):
         "18b1f472533130104c90ddeeff7b12a6",  # Incident overview
         "287d07d1ff3130106c1ef9a7cddcbd5d",  # Request overview
         "7ab78953eb32011008f2951ff15228e6",  # Service catalog overview
-        "2d297c880f1130101527008c07767e27",  # Survey overview
+        # "2d297c880f1130101527008c07767e27",  # Survey overview (almost empty post deleting reports that rely on time)
         "6b706f448f231110953ddffc9071a4f3",  # Telemetry - Table growth
-        "15c5d2d377213010a435478c4f5a993c",  # Usage overview
-        "85a57f9677100110ba155631dc5a9905",  # Web api usage overview
+        # "15c5d2d377213010a435478c4f5a993c",  # Usage overview
+        # "85a57f9677100110ba155631dc5a9905",  # Web api usage overview (empty post deleting reports that rely on time)
         "c38ca3a273031010ae8dd21efaf6a747",  # Data classification
         "3d48f669538223008329ddeeff7b1253",  # Problem overview
     ]
@@ -131,6 +131,7 @@ def get_all_configs_by_url(url, is_report):
                 "chart_series": "",
                 "question": "max",
             },
+            seed=0,
         )
         task.setup(page=page)
@@ -196,7 +197,7 @@ def get_all_configs_by_url(url, is_report):
                     )
             except Exception as e:
                 print("Exception in worker", url, chart_title, e)
-                return []
+                continue  # Skip this chart
         if len(questions) == 0:
             return []

browsergym/workarena/tasks/scripts/service_catalog.py CHANGED Viewed

@@ -65,7 +65,8 @@ def generate_configs_for_all_items():
             "w",
         ) as f:
             all_configs_for_a_single_item = sorted(
-                all_configs_for_a_single_item, key=lambda x: x["item"] + str(x["quantity"])
+                all_configs_for_a_single_item,
+                key=lambda x: x["item"] + str(x["quantity"]),
             )
             json.dump(all_configs_for_a_single_item, f, indent=4, sort_keys=True)

browsergym/workarena/tasks/scripts/validate.py CHANGED Viewed

@@ -156,7 +156,11 @@ def validate_on_page(task_class, task_config, page):
 def validate_configs(
-    task_class, config_path, num_tasks: int = None, save_failed_tasks: bool = True, page=None
+    task_class,
+    config_path,
+    num_tasks: int = None,
+    save_failed_tasks: bool = True,
+    page=None,
 ) -> list[dict]:
     """Validate that the configs are working. Saves failing configs to json so they can be tested."""
     with open(config_path, "r") as f:
@@ -167,7 +171,9 @@ def validate_configs(
     failed_tasks = {"cheat": [], "no_reward": [], "exception": [], "not_done": []}
     with tqdm(
-        total=len(all_configs), desc=f"Validating {task_class.__name__} configs", ncols=150
+        total=len(all_configs),
+        desc=f"Validating {task_class.__name__} configs",
+        ncols=150,
     ) as pbar:
         for task_config in all_configs:
             try:

browsergym/workarena/tasks/send_chat_message.py ADDED Viewed

@@ -0,0 +1,90 @@
+from typing import Tuple
+from playwright.sync_api import Page
+from .base import AbstractServiceNowTask
+from .comp_building_block import CompositionalBuildingBlockTask
+from ..instance import SNowInstance
+class SendChatMessageTask(AbstractServiceNowTask, CompositionalBuildingBlockTask):
+    """Task to send a chat message in the chat. Only used as a compositional building block for the cheat function.
+    Args:
+    --------
+    message (str):
+        The message to send in the chat
+    answer_format (str):
+        The type of answer to generate. Choice of total_return_only, total_return_and_investments, investments_only, cleanup, cleanup_and_return
+    """
+    def __init__(
+        self,
+        instance: SNowInstance,
+        message: str,
+        answer_format: str,
+        use_description_in_l3: bool = False,
+        **kwargs,
+    ):
+        super().__init__(seed=0, instance=instance, start_rel_url="")
+        self.message = message
+        self.answer_format = answer_format
+        self.use_description_in_l3 = use_description_in_l3
+        self.__dict__.update(kwargs)
+    def setup_goal(self, page: Page):
+        return self.get_pretty_printed_description(), {}
+    def validate(self, page: Page, chat_messages: list[str]) -> Tuple[float | bool | str | dict]:
+        return super().validate(page, chat_messages)
+    def cheat(self, page: Page, chat_messages: list[str]):
+        super().cheat(page=page, chat_messages=chat_messages)
+        chat_messages.append({"role": "assistant", "message": str(self.message)})
+    def teardown(self) -> None:
+        pass
+    def get_pretty_printed_description(self) -> str:
+        """
+        Get the task info for this task when used in a private task; Used in compositional tasks.
+        """
+        raise NotImplementedError
+class SendChatMessageForBudgetAllocationTask(SendChatMessageTask):
+    def get_pretty_printed_description(self) -> str:
+        """
+        Get the task info for this task when used in a private task; Used in compositional tasks.
+        """
+        if self.level == 3:
+            task_info = "Allocate the budget to maximize revenue."
+        elif self.level == 2:
+            task_info = f"Allocate the budget to maximize revenue. This involves going over expense lines and identifying the ones maximizing revenue while fitting in the allowed budget of {self.budget} $. The returns are written in their short description."
+            if self.answer_format == "total_return_only":
+                task_info += " Provide only the total return of the investments in the chat."
+            if self.answer_format == "total_return_and_investments":
+                task_info += " Provide the total return of the investments as well as the value of their 'Number' field in the chat."
+            if self.answer_format == "investments_only":
+                task_info += " Provide only the value of the 'Number' field of the selected investments in the chat."
+            if self.answer_format == "cleanup":
+                task_info += " Delete the investments that will not be kept so that only the selected investments remain."
+            if self.answer_format == "cleanup_and_return":
+                task_info += " Delete the investments that will not be kept so that only the selected investments remain as well as returning their total value in the chat."
+        return task_info
+class SendChatMessageGenericTask(SendChatMessageTask):
+    def get_pretty_printed_description(self) -> str:
+        """
+        Get the task info for this task when used in a private task; Used in compositional tasks.
+        """
+        if self.use_description_in_l3:
+            task_info = self.description
+        elif self.level == 3:
+            task_info = ""
+        elif self.level == 2:
+            task_info = self.description
+        return task_info

browsergym-workarena 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

browsergym-workarena 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl