PyPI - browsergym-workarena - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

browsergym-workarena 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

browsergym/workarena/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.4.1"
+__version__ = "0.4.2"
 import inspect
 from logging import warning

browsergym/workarena/install.py CHANGED Viewed

@@ -788,6 +788,7 @@ def check_instance_release_support():
             f"You are running {version_info['build name']} {version_info}."
         )
         return False
     return True
@@ -800,6 +801,17 @@ def enable_url_login():
     logging.info("URL login enabled.")
+def disable_password_policies():
+    """
+    Disable password policies in the instance.
+    Notes: this is required to allow the creation of users with weak passwords.
+    """
+    _set_sys_property(property_name="glide.security.password.policy.enabled", value="false")
+    logging.info("Password policies disabled.")
 def disable_guided_tours():
     """
     Hide guided tour popups
@@ -1010,6 +1022,9 @@ def setup():
     # Enable URL login (XXX: Do this first since other functions can use URL login)
     enable_url_login()
+    # Disable password policies
+    disable_password_policies()
     # Set default landing page
     set_home_page()

browsergym/workarena/instance.py CHANGED Viewed

@@ -103,7 +103,7 @@ class SNowInstance:
         # XXX: Need to include the import here to avoid circular imports
         from .utils import ui_login
-        keys = ["build name", "build date", "build tag"]
+        keys = ["build name", "build date", "build tag", "connected to cluster node"]
         # We need to use playwright since the page is loaded dynamically
         # and its source doesn't contain the information we need

browsergym/workarena/tasks/list.py CHANGED Viewed

@@ -101,6 +101,11 @@ EXTRACT_USER_LIST_INFO_CONFIG = [
 class ServiceNowListTask(AbstractServiceNowTask):
+    OPERATOR_EQUALS = "="
+    OPERATOR_NOT_EQUALS = "!="
+    OPERATOR_STARTSWITH = "STARTSWITH"
+    OPERATOR_ISEMPTY = "ISEMPTY"
+    OPERATOR_EMPTYSTRING = "EMPTYSTRING"
     @classmethod
     def all_configs(cls) -> List[dict]:
@@ -777,6 +782,9 @@ class FilterListTask(ServiceNowListTask):
         list_info = self._extract_list_info(page)
         current_query = list_info["query"]
+        if not current_query:
+            return 0, False, "", {"message": "There are no filters yet."}
         # Replace "new query" statements with the standard OR separator
         current_query = current_query.replace("^NQ", "^OR")
@@ -789,24 +797,74 @@ class FilterListTask(ServiceNowListTask):
             current_sep = "^"
         if current_kind != self.filter_kind:
-            return 0, False, "", {"message": "The kind of filter used is incorrect."}
+            return (
+                0,
+                False,
+                "",
+                {"message": f"The kind of filter used is incorrect: {current_query}."},
+            )
         # Extract the query pieces for validation
         current_query = current_query.split(current_sep)
         # Validate query length is ok
         if len(current_query) != self.filter_len:
-            return 0, False, "", {"message": "Incorrect number of filter conditions."}
+            return (
+                0,
+                False,
+                "",
+                {"message": f"Incorrect number of filter conditions: {current_query}."},
+            )
+        # Parse column names, operators, and values
+        current_columns, current_operators, current_values = [], [], []
+        # Note that this is not exhaustive. If/when other operators are added, this will have to be updated.
+        for predicate in current_query:
+            if self.OPERATOR_EMPTYSTRING in predicate:
+                current_columns.append(predicate.replace(self.OPERATOR_EMPTYSTRING, "").strip())
+                current_operators.append("=")
+                current_values.append("")
+            elif self.OPERATOR_ISEMPTY in predicate:
+                current_columns.append(predicate.replace(self.OPERATOR_ISEMPTY, "").strip())
+                current_operators.append("=")
+                current_values.append("")
+            elif any(
+                unsupported_operator in predicate
+                for unsupported_operator in [self.OPERATOR_NOT_EQUALS, self.OPERATOR_STARTSWITH]
+            ):
+                return (
+                    0,
+                    False,
+                    "",
+                    {"message": f"Unexpected operator in filter condition: {current_query}."},
+                )
+            elif self.OPERATOR_EQUALS in predicate:
+                col, val = predicate.split(self.OPERATOR_EQUALS, 1)
+                current_columns.append(col.strip())
+                current_operators.append("=")
+                current_values.append(val.strip())
+            else:
+                return (
+                    0,
+                    False,
+                    "",
+                    {"message": f"Unexpected operator in filter condition: {current_query}."},
+                )
-        # Validate query columns are ok
-        current_columns = [x.split("=")[0] for x in current_query]
         if set(current_columns) != set(self.filter_columns):
-            return 0, False, "", {"message": "Incorrect filter columns."}
+            return (
+                0,
+                False,
+                "",
+                {
+                    "message": f"Incorrect filter columns: {set(current_columns)}. Expected: {set(self.filter_columns)}."
+                },
+            )
         # Validate query values are ok
         # This is the tricky part because we need to expand the values to their display values
         # We also need to handle the case where the value is a reference
-        current_values = [x.split("=")[1] for x in current_query]
         # Handle filtering across multiple rows
         if len(set(current_columns)) < len(current_columns):
@@ -856,9 +914,21 @@ class FilterListTask(ServiceNowListTask):
         # Validate the values
         if set(current_values) != set(self.filter_values):
-            return 0, False, "", {"message": "Incorrect filter values."}
+            return (
+                0,
+                False,
+                "",
+                {
+                    "message": f"Incorrect filter values {set(current_values)}. Expected: {set(self.filter_values)}."
+                },
+            )
-        return 1, True, "Nice work, thank you!", {"message": "Correct filter."}
+        return (
+            1,
+            True,
+            "Nice work, thank you!",
+            {"message": f"Correct filter: {list_info['query']}."},
+        )
 class ExtractListInfoTask(ServiceNowListTask):

browsergym/workarena/tasks/service_catalog.py CHANGED Viewed

@@ -472,16 +472,6 @@ class OrderHardwareTask(AbstractServiceNowTask):
             )
     def validate(self, page: Page, chat_messages: list[str]) -> tuple[int, bool, str, dict]:
-        right_url = check_url_suffix_match(page, expected_url=self.final_url, task=self)
-        if not right_url:
-            return (
-                0,
-                False,
-                "",
-                {
-                    "message": f"The page is not in the right URL to validate task {self.__class__.__name__}."
-                },
-            )
         # Retrieve the request sysid from the URL
         current_url = parse.urlparse(parse.unquote(page.evaluate("() => window.location.href")))

{browsergym_workarena-0.4.1.dist-info → browsergym_workarena-0.4.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: browsergym-workarena
-Version: 0.4.1
+Version: 0.4.2
 Summary: WorkArena benchmark for BrowserGym
 Project-URL: homepage, https://github.com/ServiceNow/WorkArena
 Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
@@ -22,20 +22,44 @@ Requires-Dist: tenacity>=8.2.3
 Requires-Dist: tqdm>=4.66.2
 Description-Content-Type: text/markdown
+<a href="./assets/WorkArena_banner.png">
+  <img src="./assets/WorkArena_banner.png" width="1000" />
+</a>
 # WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
-[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work)
+[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work) ♦ [Join us on Discord!](https://discord.gg/rDkP69X7)
+## Join Our Discord Community
+Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
+- Exchange tips, tricks, and success stories
+- Get real-time support and feedback
+- Stay updated on the latest features and announcements
+[Join us on Discord!](https://discord.gg/rDkP69X7)
+---
+### Explore the BrowserGym Ecosystem
+Looking for more tools and resources? Check out these open-source projects:
+- **[AgentLab](https://github.com/ServiceNow/AgentLab)**
+- **[BrowserGym](https://github.com/ServiceNow/BrowserGym)**
+Both are part of the broader [BrowserGym ecosystem](https://arxiv.org/abs/2412.05467)
 ### Papers
 *  [ICML 2024] WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks? [[Paper]](https://arxiv.org/abs/2403.07718)
-*  WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
+*  [NeurIPS 2024] WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
 `WorkArena` is a suite of browser-based tasks tailored to gauge web agents' effectiveness in supporting routine tasks for knowledge workers.
 By harnessing the ubiquitous [ServiceNow](https://www.servicenow.com/what-is-servicenow.html) platform, this benchmark will be instrumental in assessing the widespread state of such automations in modern knowledge work environments.
-WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym), a conversational gym environment for the evaluation of web agents.
+The preferred way to evaluate on WorkArena is with [AgentLab](https://github.com/ServiceNow/AgentLab/) which will conduct parallel experiments through [BrowserGym](https://github.com/ServiceNow/BrowserGym) and report on a [unified leaderboard](https://huggingface.co/spaces/ServiceNow/browsergym-leaderboard).
 https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
@@ -48,7 +72,9 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
 1. Go to https://developer.servicenow.com/ and create an account.
 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
 3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
-4. You should now see your URL and credentials. Based on this information, set the following environment variables:
+4. Change the role of the user to admin in yoyr instance parameters ![image](https://github.com/user-attachments/assets/6f0fbf8e-f40f-411a-84cb-fead93d85f60)
+5. You should now see your URL and credentials. Based on this information, set the following environment variables:
     * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
     * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
     * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
@@ -123,41 +149,6 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-8
 https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
-## Getting Started
-To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
-### a) Create a ServiceNow Developer Instance
-1. Go to https://developer.servicenow.com/ and create an account.
-2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
-3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
-4. You should now see your URL and credentials. Based on this information, set the following environment variables:
-    * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
-    * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
-    * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in single quotes '' and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
-6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
-**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
-### b) Install WorkArena and Initialize your Instance
-Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
-```
-pip install browsergym-workarena
-```
-Then, install [Playwright](https://github.com/microsoft/playwright):
-```
-playwright install
-```
-Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
-```
-workarena-install
-```
-Your installation is now complete! 🎉
 ## Live Demo
 Run this code to see WorkArena in action.
@@ -169,12 +160,12 @@ Note: the following example executes WorkArena's oracle (cheat) function to solv
 import random
 from browsergym.core.env import BrowserEnv
-from browsergym.workarena import ALL_WORKARENA_TASKS
+from browsergym.workarena import ATOMIC_TASKS
 from time import sleep
-random.shuffle(ALL_WORKARENA_TASKS)
-for task in ALL_WORKARENA_TASKS:
+random.shuffle(ATOMIC_TASKS)
+for task in ATOMIC_TASKS:
     print("Task:", task)
     # Instantiate a new environment
@@ -276,4 +267,4 @@ Please use the following BibTeX to cite our work:
       primaryClass={cs.AI},
       url={https://arxiv.org/abs/2407.05291},
 }
-```
+```

{browsergym_workarena-0.4.1.dist-info → browsergym_workarena-0.4.2.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-browsergym/workarena/__init__.py,sha256=ocdVJcRZysM8quznRst33KAV39ubpZuvVgjjwQXmKtw,6289
+browsergym/workarena/__init__.py,sha256=4tXIdcxacjmC3AjbzOQBrX2PrM_lEq_1UbmXKCv1_fk,6289
 browsergym/workarena/config.py,sha256=tblmOUpqSoL3qlQHK_TFEDSFbC3o2kuRP_GFpoTNsX4,8522
-browsergym/workarena/install.py,sha256=UaPE1K70xJB-2Gr1P5rJbcolkwMeWyRt04F7_5gpR4E,39341
-browsergym/workarena/instance.py,sha256=Qw4lzHhgnl8IuiWOelsmzCJce3jXYivYYwtfTPt2H-s,4314
+browsergym/workarena/install.py,sha256=iEps7IkXFObJaQlE9t78LUFvoqsfKsQbcLLvuFIfBK8,39728
+browsergym/workarena/instance.py,sha256=-w21jT-lnXVWtUolJbuTKsPuULvq-Qa-j9FwdfNJrmE,4343
 browsergym/workarena/utils.py,sha256=mD6RqVua-m1-mKM1RGGlUEu1s6un0ZI9a5ZTPN7g1hY,3199
 browsergym/workarena/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 browsergym/workarena/api/category.py,sha256=4oiwPnRas0ZWCdky76zhNpu_9PfB_HmhnFa_DJZyGfA,2084
@@ -79,11 +79,11 @@ browsergym/workarena/tasks/comp_building_block.py,sha256=Lg3KbAWrxzAHe5XbPN6L8bv
 browsergym/workarena/tasks/dashboard.py,sha256=HDGygBVtUM88lWKkUjyd43JvqmGUOPjmGfmRPkTJruE,34199
 browsergym/workarena/tasks/form.py,sha256=_s07yZ-zcZbi5v6VK6km1BPzUfIFfMEVWFm56QhoznM,64141
 browsergym/workarena/tasks/knowledge.py,sha256=kANjlC7DpptMbRlUlZGdDjqZeWIwwyJzozV58qEA6KU,13751
-browsergym/workarena/tasks/list.py,sha256=4Ov7fHD4smr_L_EB9og7j7pWTQ2zKAI8LWRrr-7ryiA,53389
+browsergym/workarena/tasks/list.py,sha256=7eb9F1JooLzFGIciul2_E1bCmNyBo5AzOPozO1p1HaM,55778
 browsergym/workarena/tasks/mark_duplicate_problem.py,sha256=2znPoyuC47hkIEz59jWR-KB2o4GKJ9z5K_C-mpBqBfE,7278
 browsergym/workarena/tasks/navigation.py,sha256=Y80DpL8xBA8u9zSudW0W6Vf4qaRZUgW-jQO7pl6gOFs,8729
 browsergym/workarena/tasks/send_chat_message.py,sha256=8yWSBEMDpv_reU4QH92rjtyPV6ZjhOAgby465Olc3jM,3854
-browsergym/workarena/tasks/service_catalog.py,sha256=y-MxuJ-L3uJDB1RJz4cUpkoCN1F-Gc8q9HqzbOY_Cpg,25099
+browsergym/workarena/tasks/service_catalog.py,sha256=g1X2id4PHAyYPYZ6vkwEjJusgx8SCyEjZqC4SilWoaA,24739
 browsergym/workarena/tasks/compositional/__init__.py,sha256=zgbl23owwUZSnFD84rh-QJitaAsNCH0PNSct_H_NrM4,2341
 browsergym/workarena/tasks/compositional/base.py,sha256=eIZhfpBOvZvrlC2X7PSbY_7JrILuezYe-NRzDTECHik,14578
 browsergym/workarena/tasks/compositional/dash_do_base.py,sha256=ihxgwVxUfxBJXt49KzOSEH1i_8uymm1oMLGPrsD4zfI,58252
@@ -131,8 +131,8 @@ browsergym/workarena/tasks/utils/js_utils.js,sha256=n97fmY2Jkr59rEcQSuSbCnn1L2ZN
 browsergym/workarena/tasks/utils/private_tasks.py,sha256=r7Z9SnBMuZdZ2i-tK6eULj0q8hclANXFSzdLl49KYHI,2128
 browsergym/workarena/tasks/utils/string.py,sha256=ir5_ASD9QSFMZ9kuHo2snSXRuSfv_wROH6nxBLOTP4I,330
 browsergym/workarena/tasks/utils/utils.py,sha256=xQD-njEwgN7qxfn1dLBN8MYfd3kl3TuVfpmI1yxML9k,955
-browsergym_workarena-0.4.1.dist-info/METADATA,sha256=dboAv2_pwEwNrxbHQKrgKHnG2oxLHq_iB5qO5oAeUms,12498
-browsergym_workarena-0.4.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-browsergym_workarena-0.4.1.dist-info/entry_points.txt,sha256=1lCeAbQFCcU6UTFwS5QIA3TKhT2P9ZabaZKT7sIShKc,137
-browsergym_workarena-0.4.1.dist-info/licenses/LICENSE,sha256=sZLFiZHo_1hcxXRhXUDnQYVATUuWwRCdQjBxqxNnNEs,579
-browsergym_workarena-0.4.1.dist-info/RECORD,,
+browsergym_workarena-0.4.2.dist-info/METADATA,sha256=SV-hDJ1zdD4tS1ZKbfZCj86F2PphPAgY0X-JEY7w8CY,11698
+browsergym_workarena-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+browsergym_workarena-0.4.2.dist-info/entry_points.txt,sha256=1lCeAbQFCcU6UTFwS5QIA3TKhT2P9ZabaZKT7sIShKc,137
+browsergym_workarena-0.4.2.dist-info/licenses/LICENSE,sha256=sZLFiZHo_1hcxXRhXUDnQYVATUuWwRCdQjBxqxNnNEs,579
+browsergym_workarena-0.4.2.dist-info/RECORD,,

{browsergym_workarena-0.4.1.dist-info → browsergym_workarena-0.4.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.25.0
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{browsergym_workarena-0.4.1.dist-info → browsergym_workarena-0.4.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{browsergym_workarena-0.4.1.dist-info → browsergym_workarena-0.4.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

browsergym-workarena 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

browsergym-workarena 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl