PyPI - browsergym-workarena - Versions diffs - 0.3.2__tar.gz → 0.4.2__tar.gz - Mend

browsergym-workarena 0.3.2tar.gz → 0.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

browsergym_workarena-0.4.2/.github/ISSUE_TEMPLATE/bug_report.yml ADDED Viewed

@@ -0,0 +1,125 @@
+name: Bug Report
+description: Report an issue with browsergym/Playwright setup
+title: "[Bug]: "
+labels: ["bug"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for reporting an issue! Please fill out the information below to help us diagnose the problem.
+  - type: input
+    id: browsergym-version
+    attributes:
+      label: Browsergym Version
+      description: What version of browsergym are you using?
+      placeholder: "e.g., 1.2.0"
+    validations:
+      required: true
+  - type: input
+    id: playwright-version
+    attributes:
+      label: Playwright Version
+      description: What version of Playwright are you using?
+      placeholder: "e.g., 1.41.0"
+    validations:
+      required: true
+  - type: dropdown
+    id: os-type
+    attributes:
+      label: Operating System Type
+      description: What type of operating system are you using?
+      options:
+        - Ubuntu
+        - Debian
+        - macOS
+        - Windows
+        - Other Linux (specify version below)
+    validations:
+      required: true
+  - type: dropdown
+    id: os-version
+    attributes:
+      label: Operating System Version
+      description: Select your OS version
+      options:
+        # Ubuntu LTS versions
+        - Ubuntu 24.04 LTS (Noble Numbat)
+        - Ubuntu 22.04 LTS (Jammy Jellyfish)
+        - Ubuntu 20.04 LTS (Focal Fossa)
+        # macOS versions
+        - macOS 14 (Sonoma)
+        - macOS 13 (Ventura)
+        - macOS 12 (Monterey)
+        - macOS 11 (Big Sur)
+        # Windows versions
+        - Windows 11 23H2 (Build 22631)
+        - Windows 11 22H2 (Build 22621)
+        - Windows 10 22H2 (Build 19045)
+        - Windows 10 21H2 (Build 19044)
+        # Other
+        - Other (specify in Additional Context)
+    validations:
+      required: true
+  - type: dropdown
+    id: browsers
+    attributes:
+      label: Affected Browsers
+      description: Which browsers are you seeing this issue with?
+      multiple: true
+      options:
+        - Chromium
+        - Firefox
+        - WebKit
+    validations:
+      required: true
+  - type: textarea
+    id: what-happened
+    attributes:
+      label: What happened?
+      description: Please describe what happened and what you expected to happen
+      placeholder: |
+        1. What did you do?
+        2. What happened?
+        3. What did you expect to happen?
+    validations:
+      required: true
+  - type: textarea
+    id: reproduction
+    attributes:
+      label: Reproduction Steps
+      description: Please provide minimal steps to reproduce the issue
+      placeholder: |
+        1. Install dependencies...
+        2. Run command...
+        3. See error...
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant Logs
+      description: Please copy and paste any relevant logs. This will be automatically formatted into code.
+      render: shell
+  - type: textarea
+    id: additional-context
+    attributes:
+      label: Additional Context
+      description: For "Other" OS versions, please specify here. Also add any other context about the problem.
+  - type: checkboxes
+    id: terms
+    attributes:
+      label: Code of Conduct
+      description: By submitting this issue, you agree to follow our project's Code of Conduct
+      options:
+        - label: I agree to follow this project's Code of Conduct
+          required: true

browsergym_workarena-0.4.2/CODE_OF_CONDUCT.md ADDED Viewed

@@ -0,0 +1,46 @@
+### ServiceNow Open Source Code-of-Conduct
+This code of conduct provides guidelines for participation in ServiceNow-managed open-source communities and projects.
+**Discussion forum guidelines**
+Communities thrive when members support each other and provide useful feedback.
+- Be polite and courteous. Respect and treat others as you would expect to be treated yourself.
+- Respect your audience. Posts should not upset, annoy, threaten, harass, abuse or embarrass other members.
+- User Contributions must not include material that is defamatory, obscene, indecent, abusive, offensive, harassing, violent, hateful, inflammatory or otherwise objectionable.
+- Lively and collegial discussions are always encouraged in a healthy community. It is okay to argue facts but not okay to argue personalities or personal beliefs.
+- Do not use text formats such as all caps or bold that may be read as annoying, rude or send a strong message.
+- Do not publish anyone’s private personal information without their explicit consent.
+- Avoid using abbreviations or terminology that others may not understand. An abbreviation may mean something to you but in another context or country, it may have another meaning.
+- Be accountable for your actions by correcting your mistakes and indicating where you have changed a previous post of yours.
+- Mark content as correct and helpful, and provide feedback. If you read a discussion post that you find helpful, we encourage you to leave a positive vote and comment in the replies. If you find a post that is unhelpful, please provide more information in the issue comments.
+**Issue board guidelines**
+Many open-source projects provide an Issues board, with similar functionality to a Discussions forum. The same rules from the discussion forum guidelines apply to the Issues board.
+ServiceNow suggests the following technical support pathways for open-source projects:
+1. Clearly identify and document the issue or question you have.
+2. View the Documentation.
+3. Search the Discussions.
+4. Search the project knowledge base or Wiki for known errors, useful solutions, and troubleshooting tips.
+5. Check the project guidelines in the [`CONTRIBUTING.md`](CONTRIBUTING.md) file if you would like details on how you can submit a change. Community contributions are valued and appreciated!
+6. Log an Issue if it hasn’t already been logged. If the issue has already been logged by another user, vote it up, and add a comment with additional or missing information. Do your best to choose the correct category when logging a new issue. This will make it easier to differentiate bugs from new feature requests or ideas. If after logging an issue you find the solution, please close your issue and provide a comment with the solution. This will help the project owners and other users.
+7. Contact the project team contributors of the project to see if they can help as a last resort only.
+**Repositories**
+- Read and follow the license instructions
+- Remember to include citations if you use someone else’s work in your own project. Use the [`CITATION.cff`](CITATION.cff) to find the correct project citation reference.
+- ‘Star’ project repos to save for future reference.
+- ‘Watch’ project repos to get notifications of changes – this can get noisy for some projects, so only watch the ones you really need to track closely.
+**Enforcement and reporting**
+We encourage community members and users to help each other and to resolve issues amongst themselves as much as possible. If a matter cannot be resolved in good faith within the means available, please reach out to a team member or email servicenow-research@servicenow.com.
+**ServiceNow Disclaimer.**
+We may, but are under no obligation to, monitor or censor comments made by users or content provided by contributors and we are not responsible for the accuracy, completeness, appropriateness or legality of anything posted, depicted or otherwise provided by third‑party users and we disclaim any and all liability relating thereto.

{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: browsergym-workarena
-Version: 0.3.2
+Version: 0.4.2
 Summary: WorkArena benchmark for BrowserGym
 Project-URL: homepage, https://github.com/ServiceNow/WorkArena
 Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
@@ -22,20 +22,44 @@ Requires-Dist: tenacity>=8.2.3
 Requires-Dist: tqdm>=4.66.2
 Description-Content-Type: text/markdown
+<a href="./assets/WorkArena_banner.png">
+  <img src="./assets/WorkArena_banner.png" width="1000" />
+</a>
 # WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
-[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work)
+[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work) ♦ [Join us on Discord!](https://discord.gg/rDkP69X7)
+## Join Our Discord Community
+Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
+- Exchange tips, tricks, and success stories
+- Get real-time support and feedback
+- Stay updated on the latest features and announcements
+[Join us on Discord!](https://discord.gg/rDkP69X7)
+---
+### Explore the BrowserGym Ecosystem
+Looking for more tools and resources? Check out these open-source projects:
+- **[AgentLab](https://github.com/ServiceNow/AgentLab)**
+- **[BrowserGym](https://github.com/ServiceNow/BrowserGym)**
+Both are part of the broader [BrowserGym ecosystem](https://arxiv.org/abs/2412.05467)
 ### Papers
 *  [ICML 2024] WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks? [[Paper]](https://arxiv.org/abs/2403.07718)
-*  WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
+*  [NeurIPS 2024] WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
 `WorkArena` is a suite of browser-based tasks tailored to gauge web agents' effectiveness in supporting routine tasks for knowledge workers.
 By harnessing the ubiquitous [ServiceNow](https://www.servicenow.com/what-is-servicenow.html) platform, this benchmark will be instrumental in assessing the widespread state of such automations in modern knowledge work environments.
-WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym), a conversational gym environment for the evaluation of web agents.
+The preferred way to evaluate on WorkArena is with [AgentLab](https://github.com/ServiceNow/AgentLab/) which will conduct parallel experiments through [BrowserGym](https://github.com/ServiceNow/BrowserGym) and report on a [unified leaderboard](https://huggingface.co/spaces/ServiceNow/browsergym-leaderboard).
 https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
@@ -48,7 +72,9 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
 1. Go to https://developer.servicenow.com/ and create an account.
 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
 3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
-4. You should now see your URL and credentials. Based on this information, set the following environment variables:
+4. Change the role of the user to admin in yoyr instance parameters ![image](https://github.com/user-attachments/assets/6f0fbf8e-f40f-411a-84cb-fead93d85f60)
+5. You should now see your URL and credentials. Based on this information, set the following environment variables:
     * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
     * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
     * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
@@ -123,41 +149,6 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-8
 https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
-## Getting Started
-To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
-### a) Create a ServiceNow Developer Instance
-1. Go to https://developer.servicenow.com/ and create an account.
-2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
-3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
-4. You should now see your URL and credentials. Based on this information, set the following environment variables:
-    * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
-    * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
-    * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in single quotes '' and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
-6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
-**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
-### b) Install WorkArena and Initialize your Instance
-Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
-```
-pip install browsergym-workarena
-```
-Then, install [Playwright](https://github.com/microsoft/playwright):
-```
-playwright install
-```
-Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
-```
-workarena-install
-```
-Your installation is now complete! 🎉
 ## Live Demo
 Run this code to see WorkArena in action.
@@ -169,12 +160,12 @@ Note: the following example executes WorkArena's oracle (cheat) function to solv
 import random
 from browsergym.core.env import BrowserEnv
-from browsergym.workarena import ALL_WORKARENA_TASKS
+from browsergym.workarena import ATOMIC_TASKS
 from time import sleep
-random.shuffle(ALL_WORKARENA_TASKS)
-for task in ALL_WORKARENA_TASKS:
+random.shuffle(ATOMIC_TASKS)
+for task in ATOMIC_TASKS:
     print("Task:", task)
     # Instantiate a new environment
@@ -276,4 +267,4 @@ Please use the following BibTeX to cite our work:
       primaryClass={cs.AI},
       url={https://arxiv.org/abs/2407.05291},
 }
-```
+```

{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/README.md RENAMED Viewed

@@ -1,17 +1,41 @@
+<a href="./assets/WorkArena_banner.png">
+  <img src="./assets/WorkArena_banner.png" width="1000" />
+</a>
 # WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
-[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work)
+[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work) ♦ [Join us on Discord!](https://discord.gg/rDkP69X7)
+## Join Our Discord Community
+Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
+- Exchange tips, tricks, and success stories
+- Get real-time support and feedback
+- Stay updated on the latest features and announcements
+[Join us on Discord!](https://discord.gg/rDkP69X7)
+---
+### Explore the BrowserGym Ecosystem
+Looking for more tools and resources? Check out these open-source projects:
+- **[AgentLab](https://github.com/ServiceNow/AgentLab)**
+- **[BrowserGym](https://github.com/ServiceNow/BrowserGym)**
+Both are part of the broader [BrowserGym ecosystem](https://arxiv.org/abs/2412.05467)
 ### Papers
 *  [ICML 2024] WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks? [[Paper]](https://arxiv.org/abs/2403.07718)
-*  WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
+*  [NeurIPS 2024] WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
 `WorkArena` is a suite of browser-based tasks tailored to gauge web agents' effectiveness in supporting routine tasks for knowledge workers.
 By harnessing the ubiquitous [ServiceNow](https://www.servicenow.com/what-is-servicenow.html) platform, this benchmark will be instrumental in assessing the widespread state of such automations in modern knowledge work environments.
-WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym), a conversational gym environment for the evaluation of web agents.
+The preferred way to evaluate on WorkArena is with [AgentLab](https://github.com/ServiceNow/AgentLab/) which will conduct parallel experiments through [BrowserGym](https://github.com/ServiceNow/BrowserGym) and report on a [unified leaderboard](https://huggingface.co/spaces/ServiceNow/browsergym-leaderboard).
 https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
@@ -24,7 +48,9 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
 1. Go to https://developer.servicenow.com/ and create an account.
 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
 3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
-4. You should now see your URL and credentials. Based on this information, set the following environment variables:
+4. Change the role of the user to admin in yoyr instance parameters ![image](https://github.com/user-attachments/assets/6f0fbf8e-f40f-411a-84cb-fead93d85f60)
+5. You should now see your URL and credentials. Based on this information, set the following environment variables:
     * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
     * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
     * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
@@ -99,41 +125,6 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-8
 https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
-## Getting Started
-To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
-### a) Create a ServiceNow Developer Instance
-1. Go to https://developer.servicenow.com/ and create an account.
-2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
-3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
-4. You should now see your URL and credentials. Based on this information, set the following environment variables:
-    * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
-    * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
-    * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in single quotes '' and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
-6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
-**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
-### b) Install WorkArena and Initialize your Instance
-Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
-```
-pip install browsergym-workarena
-```
-Then, install [Playwright](https://github.com/microsoft/playwright):
-```
-playwright install
-```
-Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
-```
-workarena-install
-```
-Your installation is now complete! 🎉
 ## Live Demo
 Run this code to see WorkArena in action.
@@ -145,12 +136,12 @@ Note: the following example executes WorkArena's oracle (cheat) function to solv
 import random
 from browsergym.core.env import BrowserEnv
-from browsergym.workarena import ALL_WORKARENA_TASKS
+from browsergym.workarena import ATOMIC_TASKS
 from time import sleep
-random.shuffle(ALL_WORKARENA_TASKS)
-for task in ALL_WORKARENA_TASKS:
+random.shuffle(ATOMIC_TASKS)
+for task in ATOMIC_TASKS:
     print("Task:", task)
     # Instantiate a new environment
@@ -252,4 +243,4 @@ Please use the following BibTeX to cite our work:
       primaryClass={cs.AI},
       url={https://arxiv.org/abs/2407.05291},
 }
-```
+```

browsergym_workarena-0.4.2/SECURITY.md ADDED Viewed

@@ -0,0 +1,22 @@
+# Security Policy
+## Reporting a Vulnerability
+If you find a vulnerability in ServiceNow systems, products, or network infrastructure, our [Responsible Disclosure Program](https://www.servicenow.com/company/trust/privacy/responsible-disclosure.html#our+Commitment) is the place to make a report.
+If you find a vulnerability in this open-source project published by the ServiceNow Research team, please email [servicenow-research@servicenow.com](servicenow-research@servicenow.com) to report your findings.
+We will process your report as soon as possible, depending on the severity of your report. We appreciate everyone’s help in disclosing vulnerabilities in a responsible manner.
+## Guidelines
+Please follow the guidelines below when disclosing vulnerabilities:
+- Report any potential security issue as soon as possible. We will make every effort to quickly resolve the issue.
+- Provide sufficient detail to reproduce the vulnerability, including proof of concept.
+- Please do not disclose an issue to the public or a third party until ServiceNow has resolved it.
+- Make a good faith effort to avoid privacy violations, destruction of data, and interruption or degradation of our service. Only interact with accounts you own or accounts for which you have the explicit permission of the account holder.
+- Redact any language or images that may identify the program or ServiceNow customers from information about a fixed vulnerability.
+- Do not engage in disruptive testing (such as DoS) or any action that could impact the confidentiality, integrity, or availability of information and systems.
+- Do not engage in social engineering or phishing of customers or employees.
+- Please do not request compensation for time and materials or discovered vulnerabilities.

browsergym_workarena-0.4.2/assets/WorkArena_banner.png ADDED Viewed

Binary file

{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.3.2"
+__version__ = "0.4.2"
 import inspect
 from logging import warning

{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/install.py RENAMED Viewed

@@ -788,6 +788,7 @@ def check_instance_release_support():
             f"You are running {version_info['build name']} {version_info}."
         )
         return False
     return True
@@ -800,6 +801,17 @@ def enable_url_login():
     logging.info("URL login enabled.")
+def disable_password_policies():
+    """
+    Disable password policies in the instance.
+    Notes: this is required to allow the creation of users with weak passwords.
+    """
+    _set_sys_property(property_name="glide.security.password.policy.enabled", value="false")
+    logging.info("Password policies disabled.")
 def disable_guided_tours():
     """
     Hide guided tour popups
@@ -1010,6 +1022,9 @@ def setup():
     # Enable URL login (XXX: Do this first since other functions can use URL login)
     enable_url_login()
+    # Disable password policies
+    disable_password_policies()
     # Set default landing page
     set_home_page()

{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/instance.py RENAMED Viewed

@@ -103,7 +103,7 @@ class SNowInstance:
         # XXX: Need to include the import here to avoid circular imports
         from .utils import ui_login
-        keys = ["build name", "build date", "build tag"]
+        keys = ["build name", "build date", "build tag", "connected to cluster node"]
         # We need to use playwright since the page is loaded dynamically
         # and its source doesn't contain the information we need

{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/list.py RENAMED Viewed

@@ -101,6 +101,11 @@ EXTRACT_USER_LIST_INFO_CONFIG = [
 class ServiceNowListTask(AbstractServiceNowTask):
+    OPERATOR_EQUALS = "="
+    OPERATOR_NOT_EQUALS = "!="
+    OPERATOR_STARTSWITH = "STARTSWITH"
+    OPERATOR_ISEMPTY = "ISEMPTY"
+    OPERATOR_EMPTYSTRING = "EMPTYSTRING"
     @classmethod
     def all_configs(cls) -> List[dict]:
@@ -777,6 +782,9 @@ class FilterListTask(ServiceNowListTask):
         list_info = self._extract_list_info(page)
         current_query = list_info["query"]
+        if not current_query:
+            return 0, False, "", {"message": "There are no filters yet."}
         # Replace "new query" statements with the standard OR separator
         current_query = current_query.replace("^NQ", "^OR")
@@ -789,24 +797,74 @@ class FilterListTask(ServiceNowListTask):
             current_sep = "^"
         if current_kind != self.filter_kind:
-            return 0, False, "", {"message": "The kind of filter used is incorrect."}
+            return (
+                0,
+                False,
+                "",
+                {"message": f"The kind of filter used is incorrect: {current_query}."},
+            )
         # Extract the query pieces for validation
         current_query = current_query.split(current_sep)
         # Validate query length is ok
         if len(current_query) != self.filter_len:
-            return 0, False, "", {"message": "Incorrect number of filter conditions."}
+            return (
+                0,
+                False,
+                "",
+                {"message": f"Incorrect number of filter conditions: {current_query}."},
+            )
+        # Parse column names, operators, and values
+        current_columns, current_operators, current_values = [], [], []
+        # Note that this is not exhaustive. If/when other operators are added, this will have to be updated.
+        for predicate in current_query:
+            if self.OPERATOR_EMPTYSTRING in predicate:
+                current_columns.append(predicate.replace(self.OPERATOR_EMPTYSTRING, "").strip())
+                current_operators.append("=")
+                current_values.append("")
+            elif self.OPERATOR_ISEMPTY in predicate:
+                current_columns.append(predicate.replace(self.OPERATOR_ISEMPTY, "").strip())
+                current_operators.append("=")
+                current_values.append("")
+            elif any(
+                unsupported_operator in predicate
+                for unsupported_operator in [self.OPERATOR_NOT_EQUALS, self.OPERATOR_STARTSWITH]
+            ):
+                return (
+                    0,
+                    False,
+                    "",
+                    {"message": f"Unexpected operator in filter condition: {current_query}."},
+                )
+            elif self.OPERATOR_EQUALS in predicate:
+                col, val = predicate.split(self.OPERATOR_EQUALS, 1)
+                current_columns.append(col.strip())
+                current_operators.append("=")
+                current_values.append(val.strip())
+            else:
+                return (
+                    0,
+                    False,
+                    "",
+                    {"message": f"Unexpected operator in filter condition: {current_query}."},
+                )
-        # Validate query columns are ok
-        current_columns = [x.split("=")[0] for x in current_query]
         if set(current_columns) != set(self.filter_columns):
-            return 0, False, "", {"message": "Incorrect filter columns."}
+            return (
+                0,
+                False,
+                "",
+                {
+                    "message": f"Incorrect filter columns: {set(current_columns)}. Expected: {set(self.filter_columns)}."
+                },
+            )
         # Validate query values are ok
         # This is the tricky part because we need to expand the values to their display values
         # We also need to handle the case where the value is a reference
-        current_values = [x.split("=")[1] for x in current_query]
         # Handle filtering across multiple rows
         if len(set(current_columns)) < len(current_columns):
@@ -856,9 +914,21 @@ class FilterListTask(ServiceNowListTask):
         # Validate the values
         if set(current_values) != set(self.filter_values):
-            return 0, False, "", {"message": "Incorrect filter values."}
+            return (
+                0,
+                False,
+                "",
+                {
+                    "message": f"Incorrect filter values {set(current_values)}. Expected: {set(self.filter_values)}."
+                },
+            )
-        return 1, True, "Nice work, thank you!", {"message": "Correct filter."}
+        return (
+            1,
+            True,
+            "Nice work, thank you!",
+            {"message": f"Correct filter: {list_info['query']}."},
+        )
 class ExtractListInfoTask(ServiceNowListTask):

{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/service_catalog.py RENAMED Viewed

@@ -472,16 +472,6 @@ class OrderHardwareTask(AbstractServiceNowTask):
             )
     def validate(self, page: Page, chat_messages: list[str]) -> tuple[int, bool, str, dict]:
-        right_url = check_url_suffix_match(page, expected_url=self.final_url, task=self)
-        if not right_url:
-            return (
-                0,
-                False,
-                "",
-                {
-                    "message": f"The page is not in the right URL to validate task {self.__class__.__name__}."
-                },
-            )
         # Retrieve the request sysid from the URL
         current_url = parse.urlparse(parse.unquote(page.evaluate("() => window.location.href")))

browsergym-workarena 0.3.2__tar.gz → 0.4.2__tar.gz

browsergym-workarena 0.3.2tar.gz → 0.4.2tar.gz