PyPI - browsergym-workarena - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

browsergym-workarena 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

browsergym/workarena/install.py CHANGED Viewed

@@ -17,11 +17,16 @@ from .config import (
     # for knowledge base setup
     KB_FILEPATH,
     KB_NAME,
+    PROTOCOL_KB_FILEPATH,
+    PROTOCOL_KB_NAME,
     # For list setup
     EXPECTED_ASSET_LIST_COLUMNS_PATH,
     EXPECTED_CHANGE_REQUEST_COLUMNS_PATH,
+    EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
     EXPECTED_HARDWARE_COLUMNS_PATH,
     EXPECTED_INCIDENT_COLUMNS_PATH,
+    EXPECTED_PROBLEM_COLUMNS_PATH,
+    EXPECTED_REQUESTED_ITEMS_COLUMNS_PATH,
     EXPECTED_SERVICE_CATALOG_COLUMNS_PATH,
     EXPECTED_USER_COLUMNS_PATH,
     # for form setup
@@ -29,6 +34,7 @@ from .config import (
     EXPECTED_HARDWARE_FORM_FIELDS_PATH,
     EXPECTED_INCIDENT_FORM_FIELDS_PATH,
     EXPECTED_PROBLEM_FORM_FIELDS_PATH,
+    EXPECTED_REQUEST_ITEM_FORM_FIELDS_PATH,
     EXPECTED_USER_FORM_FIELDS_PATH,
     # Patch flag for reports
     REPORT_PATCH_FLAG,
@@ -269,7 +275,11 @@ def delete_knowledge_base(instance: SNowInstance, kb_id: str, kb_name: str):
 def create_knowledge_base(
-    instance: SNowInstance, kb_name: str, kb_data: dict, disable_commenting: bool = True
+    instance: SNowInstance,
+    kb_name: str,
+    kb_data: dict,
+    disable_commenting: bool = True,
+    add_article_name: bool = False,
 ):
     """
     Create knowledge base and upload all articles.
@@ -283,6 +293,9 @@ def create_knowledge_base(
         The knowledge base data to upload
     disable_commenting: bool
         Whether to disable commenting on the knowledge base
+    add_article_name: bool
+        Whether to add the article name to the article text. If False, the articles will be named "Article <number>"
+        Otherwise, we will extract the article title from the 'item' field in the JSON file.
     """
     logging.info(f"Installing knowledge base {kb_name}...")
@@ -311,7 +324,10 @@ def create_knowledge_base(
     for i, kb_entry in enumerate(kb_data):
         logging.info(f"... Knowledge Base {kb_name} uploading article {i + 1}/{len(kb_data)}")
         article = kb_entry["article"]
+        if add_article_name:
+            short_description = kb_entry["item"]
+        else:
+            short_description = f"Article {i + 1}"
         # Plant a new article in kb_knowledge table
         table_api_call(
             instance,
@@ -319,7 +335,7 @@ def create_knowledge_base(
             method="POST",
             data=json.dumps(
                 {
-                    "short_description": f"Article {i + 1}",
+                    "short_description": short_description,
                     "sys_class_name": "kb_knowledge",
                     "text": article,
                     "article_type": "text",
@@ -337,11 +353,12 @@ def setup_knowledge_bases():
     """
     # Get the ServiceNow instance
     instance = SNowInstance()
-    # Mapping between knowledge base name and filepath + whether or not to disable comments
+    # Mapping between knowledge base name and filepath + whether or not to disable comments + whether or not to add article name
     knowledge_bases = {
-        KB_NAME: (KB_FILEPATH, True),
+        KB_NAME: (KB_FILEPATH, True, False),
+        PROTOCOL_KB_NAME: (PROTOCOL_KB_FILEPATH, True, True),
     }
-    for kb_name, (kb_filepath, disable_commenting) in knowledge_bases.items():
+    for kb_name, (kb_filepath, disable_commenting, add_article_name) in knowledge_bases.items():
         # Load the knowledge base
         with open(kb_filepath, "r") as f:
             kb_data = json.load(f)
@@ -365,6 +382,7 @@ def setup_knowledge_bases():
                 kb_name=kb_name,
                 kb_data=kb_data,
                 disable_commenting=disable_commenting,
+                add_article_name=add_article_name,
             )
             # Confirm that the knowledge base was installed correctly
@@ -570,10 +588,22 @@ def setup_list_columns():
             "url": "/now/nav/ui/classic/params/target/incident_list.do",
             "expected_columns_path": EXPECTED_INCIDENT_COLUMNS_PATH,
         },
+        "problem": {
+            "url": "/now/nav/ui/classic/params/target/problem_list.do",
+            "expected_columns_path": EXPECTED_PROBLEM_COLUMNS_PATH,
+        },
         "sys_user": {
             "url": "/now/nav/ui/classic/params/target/sys_user_list.do",
             "expected_columns_path": EXPECTED_USER_COLUMNS_PATH,
         },
+        "sc_req_item": {
+            "url": "/now/nav/ui/classic/params/target/sc_req_item_list.do",
+            "expected_columns_path": EXPECTED_REQUESTED_ITEMS_COLUMNS_PATH,
+        },
+        "fm_expense_line": {
+            "url": "/now/nav/ui/classic/params/target/fm_expense_line_list.do",
+            "expected_columns_path": EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
+        },
         "sc_cat_item": {
             "url": "/now/nav/ui/classic/params/target/sc_cat_item_list.do",
             "expected_columns_path": EXPECTED_SERVICE_CATALOG_COLUMNS_PATH,
@@ -680,6 +710,10 @@ def setup_form_fields():
             "expected_fields_path": EXPECTED_USER_FORM_FIELDS_PATH,
             "url": "/now/nav/ui/classic/params/target/sys_user.do",
         },
+        "create_request_item": {
+            "expected_fields_path": EXPECTED_REQUEST_ITEM_FORM_FIELDS_PATH,
+            "url": "/now/nav/ui/classic/params/target/sc_req_item.do",
+        },
     }
     logging.info("... Creating a new user account to validate form fields")
@@ -872,6 +906,17 @@ def wipe_system_admin_preferences():
         )
+def is_report_filter_using_time(filter):
+    """
+    Heuristic to check if a report is filtering based on time
+    This aims to detect the use of functions like "gs.endOfToday()". To avoid hardcoding all of them,
+    we simply check for the use of keywords. Our filter is definitely too wide, but that's ok.
+    """
+    return "javascript:gs." in filter or "@ago" in filter
 def patch_report_filters():
     """
     Add filters to reports to make sure they stay frozen in time and don't show new data
@@ -880,8 +925,6 @@ def patch_report_filters():
     """
     logging.info("Patching reports with date filter...")
-    cutoff_date = REPORT_DATE_FILTER
     instance = SNowInstance()
     # Get all reports that are not already patched
@@ -893,22 +936,35 @@ def patch_report_filters():
         },
     )["result"]
-    incompatible_reports = []
     for report in reports:
         # Find all sys_created_on columns of this record. Some have many.
         sys_created_on_cols = [
             c for c in table_column_info(instance, report["table"]).keys() if "sys_created_on" in c
         ]
         try:
             # XXX: We purposely do not support reports with multiple filter conditions for simplicity
             if len(sys_created_on_cols) == 0 or "^NQ" in report["filter"]:
-                raise NotImplementedError()
+                logging.info(f"Discarding report {report['title']} {report['sys_id']}...")
+                raise NotImplementedError()  # Mark for deletion
+            if not is_report_filter_using_time(report["filter"]):
+                # That's a report we want to keep (use date cutoff filter)
+                filter_date = REPORT_DATE_FILTER
+                logging.info(
+                    f"Keeping report {report['title']} {report['sys_id']} (columns: {sys_created_on_cols})..."
+                )
+            else:
+                # XXX: We do not support reports with filters that rely on time (e.g., last 10 days) because
+                #      there are not stable. In this case, we don't delete them but add a filter to make
+                #      them empty. They will be shown as "No data available".
+                logging.info(
+                    f"Disabling report {report['title']} {report['sys_id']} because it uses time filters..."
+                )
+                filter_date = "1900-01-01"
-            # Add the filter
             filter = "".join(
                 [
-                    f"^{col}<javascript:gs.dateGenerate('{cutoff_date}','00:00:00')"
+                    f"^{col}<javascript:gs.dateGenerate('{filter_date}','00:00:00')"
                     for col in sys_created_on_cols
                 ]
             ) + ("^" if len(report["filter"]) > 0 and not report["filter"].startswith("^") else "")
@@ -921,16 +977,21 @@ def patch_report_filters():
                     "description": report["description"] + " " + REPORT_PATCH_FLAG,
                 },
             )
-            logging.info(
-                f"Patched report {report['title']} {report['sys_id']} (columns: {sys_created_on_cols})..."
-            )
+            logging.info(f"... done")
         except (NotImplementedError, HTTPError):
             # HTTPError occurs when some reports simply cannot be patched because they are critical and protected
-            incompatible_reports.append(report["sys_id"])
-            logging.info(
-                f"Did not patch report {report['title']} {report['title']} (columns: {sys_created_on_cols})..."
-            )
+            logging.info(f"...failed to patch report. Attempting delete...")
+            # Delete the report if it cannot be patched
+            # This might fail sometimes, but it's the best we can do.
+            try:
+                table_api_call(
+                    instance=instance, table=f"sys_report/{report['sys_id']}", method="DELETE"
+                )
+                logging.info(f"...... deleted.")
+            except:
+                logging.error(f"...... could not delete.")
 @tenacity.retry(

browsergym/workarena/tasks/base.py CHANGED Viewed

@@ -10,7 +10,7 @@ import playwright.sync_api
 from abc import ABC, abstractmethod
 from copy import deepcopy
-from typing import Dict, List, Optional, Tuple
+from typing import List, Optional, Tuple
 from uuid import uuid4
 from urllib import parse
@@ -18,7 +18,7 @@ from browsergym.core.task import AbstractBrowserTask
 from ..api.user import create_user
 from ..api.utils import table_api_call
 from ..config import SNOW_BROWSER_TIMEOUT, SNOW_JS_UTILS_FILEPATH
-from ..utils import impersonate_user, url_login
+from ..utils import url_login
 from ..instance import SNowInstance
@@ -34,7 +34,8 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
         start_rel_url: str,
         instance: SNowInstance = None,
         final_rel_url: Optional[str] = None,
-        username: Optional[str] = "admin",
+        user_roles: List[str] = ["admin"],
+        has_description: bool = False,
     ) -> None:
         """
         Initialize the task
@@ -45,10 +46,14 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
             Random seed
         instance: SNowInstance
             The ServiceNow instance in which the task will be performed
-        start_url: str
+        start_rel_url: str
             The URL for the starting page of the task
-        final_url: str (optional)
+        final_rel_url: str (optional)
             The URL for the final page of the task (default: uses the value of base_url)
+        user_roles: list[str]
+            The roles to assign to the user (default: ["admin"])
+        has_description: bool
+            Whether the task has a description in L3 compositional tasks
         """
         super().__init__(seed)
@@ -67,6 +72,16 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
             self.final_url = self.start_url
         self.final_url_ = parse.urlparse(self.final_url)
+        # Set the task's unique ID
+        self.unique_id = str(uuid4())
+        # Flag to ensure the task is setup only once
+        self.task_is_setup = False
+        self.delete_user_on_teardown = False
+        self.user_roles = user_roles
+        self.has_description = (
+            has_description  # Whether the task has a description in L3 compositional tasks
+        )
     def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> None:
         # Don't call super cheat function because it's not implemented at the base level
         logging.debug("Cheat is solving the task")
@@ -102,6 +117,8 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
         """
         logging.debug("Setting up the base task")
+        if self.task_is_setup:
+            raise ValueError("The task is already setup")
         # Keep the page for client-side validation
         self.page = page
@@ -109,6 +126,15 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
         # Set the page timeout
         page.set_default_timeout(SNOW_BROWSER_TIMEOUT)
+        # Create a new user to run the task if this is the starting task
+        if do_start:
+            self._base_initial_instance = self.instance
+            self._base_user_name, self._base_user_password, self._base_user_sysid = create_user(
+                instance=self.instance, user_roles=self.user_roles, random=self.random
+            )
+            self.instance = deepcopy(self.instance)
+            self.instance.snow_credentials = (self._base_user_name, self._base_user_password)
+            self.delete_user_on_teardown = True
         # Set the task's unique ID
         self.unique_id = str(uuid4())
@@ -116,26 +142,26 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
         goal, info = self.setup_goal(page=page)
         # Load a few utility functions for init scripts
-        page.add_init_script(path=SNOW_JS_UTILS_FILEPATH)
+        page.context.add_init_script(path=SNOW_JS_UTILS_FILEPATH)
         # Add the initialization scripts to the page context
         for script in self.get_init_scripts():
             page.context.add_init_script(script)
-        # Create a new user to run the task
-        self._base_initial_instance = self.instance
-        self._base_user_name, self._base_user_password, self._base_user_sysid = create_user(
-            self.instance
-        )
-        self.instance = deepcopy(self.instance)
-        self.instance.snow_credentials = (self._base_user_name, self._base_user_password)
         # Start the task if requested
         if do_start:
             self.start(page)
+        self.task_is_setup = True
         return goal, info
+    def create_user(self, first_name: str = None, last_name: str = None):
+        """
+        Create a user in the ServiceNow instance
+        """
     @abstractmethod
     def setup_goal(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
         """
@@ -157,11 +183,20 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
         page.goto(self.start_url)
     def teardown(self) -> None:
+        """
+        Clean up after the task
+        Notes:
+        ------
+        This method should not make assumptions on the state of the page (e.g., a specific URL).
+        """
         logging.debug("Tearing down the task")
-        # Delete the user
-        table_api_call(
-            instance=self._base_initial_instance,
-            table=f"sys_user/{self._base_user_sysid}",
-            method="DELETE",
-        )
+        if self.delete_user_on_teardown:
+            # Delete the user
+            table_api_call(
+                instance=self._base_initial_instance,
+                table=f"sys_user/{self._base_user_sysid}",
+                method="DELETE",
+            )

browsergym/workarena/tasks/comp_building_block.py ADDED Viewed

@@ -0,0 +1,4 @@
+class CompositionalBuildingBlockTask:
+    """Base class for compositional building block tasks. Used to exclude these tasks from the list of tasks that are tested like atomic tasks"""
+    pass

browsergym/workarena/tasks/compositional/__init__.py ADDED Viewed

@@ -0,0 +1,76 @@
+from .utils.curriculum import AGENT_CURRICULUM, HUMAN_CURRICULUM
+ALL_COMPOSITIONAL_TASKS = []
+for category, items in AGENT_CURRICULUM.items():
+    category_tasks = []
+    for task in items["buckets"]:
+        category_tasks += task
+    ALL_COMPOSITIONAL_TASKS += category_tasks
+def specialize_task_class_to_level(task_cls, level):
+    """
+    Function to hardcode the level for the tasks
+    """
+    new_name = f"{task_cls.__name__}L{level}"
+    patched_cls = f"""
+class {new_name}(task_cls):
+    def __init__(self, **kwargs):
+        super().__init__(level={level}, **kwargs)
+"""
+    # Dictionary to capture local variables defined by exec
+    local_vars = {"task_cls": task_cls}
+    exec(patched_cls, globals(), local_vars)
+    return local_vars[new_name]
+ALL_COMPOSITIONAL_TASKS_L2 = [
+    specialize_task_class_to_level(task, level=2) for task in ALL_COMPOSITIONAL_TASKS
+]
+ALL_COMPOSITIONAL_TASKS_L3 = [
+    specialize_task_class_to_level(task, level=3) for task in ALL_COMPOSITIONAL_TASKS
+]
+AGENT_CURRICULUM_L2 = dict()
+AGENT_CURRICULUM_L3 = dict()
+for category, items in AGENT_CURRICULUM.items():
+    AGENT_CURRICULUM_L2[category] = {
+        "buckets": [
+            [specialize_task_class_to_level(task, level=2) for task in task_set]
+            for task_set in items["buckets"]
+        ],
+        "num_seeds": items["num_seeds"],
+        "weights": items["weights"],
+    }
+    AGENT_CURRICULUM_L3[category] = {
+        "buckets": [
+            [specialize_task_class_to_level(task, level=3) for task in task_set]
+            for task_set in items["buckets"]
+        ],
+        "num_seeds": items["num_seeds"],
+        "weights": items["weights"],
+    }
+HUMAN_CURRICULUM_L2 = dict()
+HUMAN_CURRICULUM_L3 = dict()
+for category, items in HUMAN_CURRICULUM.items():
+    HUMAN_CURRICULUM_L2[category] = {
+        "buckets": [
+            [specialize_task_class_to_level(task, level=2) for task in task_set]
+            for task_set in items["buckets"]
+        ],
+        "num_seeds": items["num_seeds"],
+        "weights": items["weights"],
+    }
+    HUMAN_CURRICULUM_L3[category] = {
+        "buckets": [
+            [specialize_task_class_to_level(task, level=3) for task in task_set]
+            for task_set in items["buckets"]
+        ],
+        "num_seeds": items["num_seeds"],
+        "weights": items["weights"],
+    }

browsergym-workarena 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

browsergym-workarena 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl