browsergym-workarena 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- __version__ = "0.4.1"
1
+ __version__ = "0.4.2"
2
2
 
3
3
  import inspect
4
4
  from logging import warning
@@ -788,6 +788,7 @@ def check_instance_release_support():
788
788
  f"You are running {version_info['build name']} {version_info}."
789
789
  )
790
790
  return False
791
+
791
792
  return True
792
793
 
793
794
 
@@ -800,6 +801,17 @@ def enable_url_login():
800
801
  logging.info("URL login enabled.")
801
802
 
802
803
 
804
+ def disable_password_policies():
805
+ """
806
+ Disable password policies in the instance.
807
+
808
+ Notes: this is required to allow the creation of users with weak passwords.
809
+
810
+ """
811
+ _set_sys_property(property_name="glide.security.password.policy.enabled", value="false")
812
+ logging.info("Password policies disabled.")
813
+
814
+
803
815
  def disable_guided_tours():
804
816
  """
805
817
  Hide guided tour popups
@@ -1010,6 +1022,9 @@ def setup():
1010
1022
  # Enable URL login (XXX: Do this first since other functions can use URL login)
1011
1023
  enable_url_login()
1012
1024
 
1025
+ # Disable password policies
1026
+ disable_password_policies()
1027
+
1013
1028
  # Set default landing page
1014
1029
  set_home_page()
1015
1030
 
@@ -103,7 +103,7 @@ class SNowInstance:
103
103
  # XXX: Need to include the import here to avoid circular imports
104
104
  from .utils import ui_login
105
105
 
106
- keys = ["build name", "build date", "build tag"]
106
+ keys = ["build name", "build date", "build tag", "connected to cluster node"]
107
107
 
108
108
  # We need to use playwright since the page is loaded dynamically
109
109
  # and its source doesn't contain the information we need
@@ -101,6 +101,11 @@ EXTRACT_USER_LIST_INFO_CONFIG = [
101
101
 
102
102
 
103
103
  class ServiceNowListTask(AbstractServiceNowTask):
104
+ OPERATOR_EQUALS = "="
105
+ OPERATOR_NOT_EQUALS = "!="
106
+ OPERATOR_STARTSWITH = "STARTSWITH"
107
+ OPERATOR_ISEMPTY = "ISEMPTY"
108
+ OPERATOR_EMPTYSTRING = "EMPTYSTRING"
104
109
 
105
110
  @classmethod
106
111
  def all_configs(cls) -> List[dict]:
@@ -777,6 +782,9 @@ class FilterListTask(ServiceNowListTask):
777
782
  list_info = self._extract_list_info(page)
778
783
  current_query = list_info["query"]
779
784
 
785
+ if not current_query:
786
+ return 0, False, "", {"message": "There are no filters yet."}
787
+
780
788
  # Replace "new query" statements with the standard OR separator
781
789
  current_query = current_query.replace("^NQ", "^OR")
782
790
 
@@ -789,24 +797,74 @@ class FilterListTask(ServiceNowListTask):
789
797
  current_sep = "^"
790
798
 
791
799
  if current_kind != self.filter_kind:
792
- return 0, False, "", {"message": "The kind of filter used is incorrect."}
800
+ return (
801
+ 0,
802
+ False,
803
+ "",
804
+ {"message": f"The kind of filter used is incorrect: {current_query}."},
805
+ )
793
806
 
794
807
  # Extract the query pieces for validation
795
808
  current_query = current_query.split(current_sep)
796
809
 
797
810
  # Validate query length is ok
798
811
  if len(current_query) != self.filter_len:
799
- return 0, False, "", {"message": "Incorrect number of filter conditions."}
812
+ return (
813
+ 0,
814
+ False,
815
+ "",
816
+ {"message": f"Incorrect number of filter conditions: {current_query}."},
817
+ )
818
+
819
+ # Parse column names, operators, and values
820
+ current_columns, current_operators, current_values = [], [], []
821
+
822
+ # Note that this is not exhaustive. If/when other operators are added, this will have to be updated.
823
+ for predicate in current_query:
824
+ if self.OPERATOR_EMPTYSTRING in predicate:
825
+ current_columns.append(predicate.replace(self.OPERATOR_EMPTYSTRING, "").strip())
826
+ current_operators.append("=")
827
+ current_values.append("")
828
+ elif self.OPERATOR_ISEMPTY in predicate:
829
+ current_columns.append(predicate.replace(self.OPERATOR_ISEMPTY, "").strip())
830
+ current_operators.append("=")
831
+ current_values.append("")
832
+ elif any(
833
+ unsupported_operator in predicate
834
+ for unsupported_operator in [self.OPERATOR_NOT_EQUALS, self.OPERATOR_STARTSWITH]
835
+ ):
836
+ return (
837
+ 0,
838
+ False,
839
+ "",
840
+ {"message": f"Unexpected operator in filter condition: {current_query}."},
841
+ )
842
+ elif self.OPERATOR_EQUALS in predicate:
843
+ col, val = predicate.split(self.OPERATOR_EQUALS, 1)
844
+ current_columns.append(col.strip())
845
+ current_operators.append("=")
846
+ current_values.append(val.strip())
847
+ else:
848
+ return (
849
+ 0,
850
+ False,
851
+ "",
852
+ {"message": f"Unexpected operator in filter condition: {current_query}."},
853
+ )
800
854
 
801
- # Validate query columns are ok
802
- current_columns = [x.split("=")[0] for x in current_query]
803
855
  if set(current_columns) != set(self.filter_columns):
804
- return 0, False, "", {"message": "Incorrect filter columns."}
856
+ return (
857
+ 0,
858
+ False,
859
+ "",
860
+ {
861
+ "message": f"Incorrect filter columns: {set(current_columns)}. Expected: {set(self.filter_columns)}."
862
+ },
863
+ )
805
864
 
806
865
  # Validate query values are ok
807
866
  # This is the tricky part because we need to expand the values to their display values
808
867
  # We also need to handle the case where the value is a reference
809
- current_values = [x.split("=")[1] for x in current_query]
810
868
 
811
869
  # Handle filtering across multiple rows
812
870
  if len(set(current_columns)) < len(current_columns):
@@ -856,9 +914,21 @@ class FilterListTask(ServiceNowListTask):
856
914
 
857
915
  # Validate the values
858
916
  if set(current_values) != set(self.filter_values):
859
- return 0, False, "", {"message": "Incorrect filter values."}
917
+ return (
918
+ 0,
919
+ False,
920
+ "",
921
+ {
922
+ "message": f"Incorrect filter values {set(current_values)}. Expected: {set(self.filter_values)}."
923
+ },
924
+ )
860
925
 
861
- return 1, True, "Nice work, thank you!", {"message": "Correct filter."}
926
+ return (
927
+ 1,
928
+ True,
929
+ "Nice work, thank you!",
930
+ {"message": f"Correct filter: {list_info['query']}."},
931
+ )
862
932
 
863
933
 
864
934
  class ExtractListInfoTask(ServiceNowListTask):
@@ -472,16 +472,6 @@ class OrderHardwareTask(AbstractServiceNowTask):
472
472
  )
473
473
 
474
474
  def validate(self, page: Page, chat_messages: list[str]) -> tuple[int, bool, str, dict]:
475
- right_url = check_url_suffix_match(page, expected_url=self.final_url, task=self)
476
- if not right_url:
477
- return (
478
- 0,
479
- False,
480
- "",
481
- {
482
- "message": f"The page is not in the right URL to validate task {self.__class__.__name__}."
483
- },
484
- )
485
475
 
486
476
  # Retrieve the request sysid from the URL
487
477
  current_url = parse.urlparse(parse.unquote(page.evaluate("() => window.location.href")))
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: browsergym-workarena
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: WorkArena benchmark for BrowserGym
5
5
  Project-URL: homepage, https://github.com/ServiceNow/WorkArena
6
6
  Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
@@ -22,20 +22,44 @@ Requires-Dist: tenacity>=8.2.3
22
22
  Requires-Dist: tqdm>=4.66.2
23
23
  Description-Content-Type: text/markdown
24
24
 
25
+ <a href="./assets/WorkArena_banner.png">
26
+ <img src="./assets/WorkArena_banner.png" width="1000" />
27
+ </a>
28
+
25
29
  # WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
26
- [[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work)
30
+ [[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work) ♦ [Join us on Discord!](https://discord.gg/rDkP69X7)
31
+
32
+ ## Join Our Discord Community
33
+
34
+ Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
35
+
36
+ - Exchange tips, tricks, and success stories
37
+ - Get real-time support and feedback
38
+ - Stay updated on the latest features and announcements
39
+
40
+ [Join us on Discord!](https://discord.gg/rDkP69X7)
41
+
42
+ ---
43
+
44
+ ### Explore the BrowserGym Ecosystem
45
+
46
+ Looking for more tools and resources? Check out these open-source projects:
47
+
48
+ - **[AgentLab](https://github.com/ServiceNow/AgentLab)**
49
+ - **[BrowserGym](https://github.com/ServiceNow/BrowserGym)**
50
+
51
+ Both are part of the broader [BrowserGym ecosystem](https://arxiv.org/abs/2412.05467)
27
52
 
28
53
  ### Papers
29
54
  * [ICML 2024] WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks? [[Paper]](https://arxiv.org/abs/2403.07718)
30
55
 
31
- * WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
56
+ * [NeurIPS 2024] WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
32
57
 
33
58
 
34
59
  `WorkArena` is a suite of browser-based tasks tailored to gauge web agents' effectiveness in supporting routine tasks for knowledge workers.
35
60
  By harnessing the ubiquitous [ServiceNow](https://www.servicenow.com/what-is-servicenow.html) platform, this benchmark will be instrumental in assessing the widespread state of such automations in modern knowledge work environments.
36
61
 
37
- WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym), a conversational gym environment for the evaluation of web agents.
38
-
62
+ The preferred way to evaluate on WorkArena is with [AgentLab](https://github.com/ServiceNow/AgentLab/) which will conduct parallel experiments through [BrowserGym](https://github.com/ServiceNow/BrowserGym) and report on a [unified leaderboard](https://huggingface.co/spaces/ServiceNow/browsergym-leaderboard).
39
63
 
40
64
  https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
41
65
 
@@ -48,7 +72,9 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
48
72
  1. Go to https://developer.servicenow.com/ and create an account.
49
73
  2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
50
74
  3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
51
- 4. You should now see your URL and credentials. Based on this information, set the following environment variables:
75
+ 4. Change the role of the user to admin in yoyr instance parameters ![image](https://github.com/user-attachments/assets/6f0fbf8e-f40f-411a-84cb-fead93d85f60)
76
+
77
+ 5. You should now see your URL and credentials. Based on this information, set the following environment variables:
52
78
  * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
53
79
  * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
54
80
  * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
@@ -123,41 +149,6 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-8
123
149
 
124
150
  https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
125
151
 
126
- ## Getting Started
127
-
128
- To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
129
-
130
- ### a) Create a ServiceNow Developer Instance
131
-
132
- 1. Go to https://developer.servicenow.com/ and create an account.
133
- 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
134
- 3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
135
- 4. You should now see your URL and credentials. Based on this information, set the following environment variables:
136
- * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
137
- * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
138
- * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in single quotes '' and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
139
- 6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
140
-
141
- **Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
142
-
143
- ### b) Install WorkArena and Initialize your Instance
144
-
145
- Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
146
- ```
147
- pip install browsergym-workarena
148
- ```
149
-
150
- Then, install [Playwright](https://github.com/microsoft/playwright):
151
- ```
152
- playwright install
153
- ```
154
-
155
- Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
156
- ```
157
- workarena-install
158
- ```
159
- Your installation is now complete! 🎉
160
-
161
152
  ## Live Demo
162
153
 
163
154
  Run this code to see WorkArena in action.
@@ -169,12 +160,12 @@ Note: the following example executes WorkArena's oracle (cheat) function to solv
169
160
  import random
170
161
 
171
162
  from browsergym.core.env import BrowserEnv
172
- from browsergym.workarena import ALL_WORKARENA_TASKS
163
+ from browsergym.workarena import ATOMIC_TASKS
173
164
  from time import sleep
174
165
 
175
166
 
176
- random.shuffle(ALL_WORKARENA_TASKS)
177
- for task in ALL_WORKARENA_TASKS:
167
+ random.shuffle(ATOMIC_TASKS)
168
+ for task in ATOMIC_TASKS:
178
169
  print("Task:", task)
179
170
 
180
171
  # Instantiate a new environment
@@ -276,4 +267,4 @@ Please use the following BibTeX to cite our work:
276
267
  primaryClass={cs.AI},
277
268
  url={https://arxiv.org/abs/2407.05291},
278
269
  }
279
- ```
270
+ ```
@@ -1,7 +1,7 @@
1
- browsergym/workarena/__init__.py,sha256=ocdVJcRZysM8quznRst33KAV39ubpZuvVgjjwQXmKtw,6289
1
+ browsergym/workarena/__init__.py,sha256=4tXIdcxacjmC3AjbzOQBrX2PrM_lEq_1UbmXKCv1_fk,6289
2
2
  browsergym/workarena/config.py,sha256=tblmOUpqSoL3qlQHK_TFEDSFbC3o2kuRP_GFpoTNsX4,8522
3
- browsergym/workarena/install.py,sha256=UaPE1K70xJB-2Gr1P5rJbcolkwMeWyRt04F7_5gpR4E,39341
4
- browsergym/workarena/instance.py,sha256=Qw4lzHhgnl8IuiWOelsmzCJce3jXYivYYwtfTPt2H-s,4314
3
+ browsergym/workarena/install.py,sha256=iEps7IkXFObJaQlE9t78LUFvoqsfKsQbcLLvuFIfBK8,39728
4
+ browsergym/workarena/instance.py,sha256=-w21jT-lnXVWtUolJbuTKsPuULvq-Qa-j9FwdfNJrmE,4343
5
5
  browsergym/workarena/utils.py,sha256=mD6RqVua-m1-mKM1RGGlUEu1s6un0ZI9a5ZTPN7g1hY,3199
6
6
  browsergym/workarena/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  browsergym/workarena/api/category.py,sha256=4oiwPnRas0ZWCdky76zhNpu_9PfB_HmhnFa_DJZyGfA,2084
@@ -79,11 +79,11 @@ browsergym/workarena/tasks/comp_building_block.py,sha256=Lg3KbAWrxzAHe5XbPN6L8bv
79
79
  browsergym/workarena/tasks/dashboard.py,sha256=HDGygBVtUM88lWKkUjyd43JvqmGUOPjmGfmRPkTJruE,34199
80
80
  browsergym/workarena/tasks/form.py,sha256=_s07yZ-zcZbi5v6VK6km1BPzUfIFfMEVWFm56QhoznM,64141
81
81
  browsergym/workarena/tasks/knowledge.py,sha256=kANjlC7DpptMbRlUlZGdDjqZeWIwwyJzozV58qEA6KU,13751
82
- browsergym/workarena/tasks/list.py,sha256=4Ov7fHD4smr_L_EB9og7j7pWTQ2zKAI8LWRrr-7ryiA,53389
82
+ browsergym/workarena/tasks/list.py,sha256=7eb9F1JooLzFGIciul2_E1bCmNyBo5AzOPozO1p1HaM,55778
83
83
  browsergym/workarena/tasks/mark_duplicate_problem.py,sha256=2znPoyuC47hkIEz59jWR-KB2o4GKJ9z5K_C-mpBqBfE,7278
84
84
  browsergym/workarena/tasks/navigation.py,sha256=Y80DpL8xBA8u9zSudW0W6Vf4qaRZUgW-jQO7pl6gOFs,8729
85
85
  browsergym/workarena/tasks/send_chat_message.py,sha256=8yWSBEMDpv_reU4QH92rjtyPV6ZjhOAgby465Olc3jM,3854
86
- browsergym/workarena/tasks/service_catalog.py,sha256=y-MxuJ-L3uJDB1RJz4cUpkoCN1F-Gc8q9HqzbOY_Cpg,25099
86
+ browsergym/workarena/tasks/service_catalog.py,sha256=g1X2id4PHAyYPYZ6vkwEjJusgx8SCyEjZqC4SilWoaA,24739
87
87
  browsergym/workarena/tasks/compositional/__init__.py,sha256=zgbl23owwUZSnFD84rh-QJitaAsNCH0PNSct_H_NrM4,2341
88
88
  browsergym/workarena/tasks/compositional/base.py,sha256=eIZhfpBOvZvrlC2X7PSbY_7JrILuezYe-NRzDTECHik,14578
89
89
  browsergym/workarena/tasks/compositional/dash_do_base.py,sha256=ihxgwVxUfxBJXt49KzOSEH1i_8uymm1oMLGPrsD4zfI,58252
@@ -131,8 +131,8 @@ browsergym/workarena/tasks/utils/js_utils.js,sha256=n97fmY2Jkr59rEcQSuSbCnn1L2ZN
131
131
  browsergym/workarena/tasks/utils/private_tasks.py,sha256=r7Z9SnBMuZdZ2i-tK6eULj0q8hclANXFSzdLl49KYHI,2128
132
132
  browsergym/workarena/tasks/utils/string.py,sha256=ir5_ASD9QSFMZ9kuHo2snSXRuSfv_wROH6nxBLOTP4I,330
133
133
  browsergym/workarena/tasks/utils/utils.py,sha256=xQD-njEwgN7qxfn1dLBN8MYfd3kl3TuVfpmI1yxML9k,955
134
- browsergym_workarena-0.4.1.dist-info/METADATA,sha256=dboAv2_pwEwNrxbHQKrgKHnG2oxLHq_iB5qO5oAeUms,12498
135
- browsergym_workarena-0.4.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
136
- browsergym_workarena-0.4.1.dist-info/entry_points.txt,sha256=1lCeAbQFCcU6UTFwS5QIA3TKhT2P9ZabaZKT7sIShKc,137
137
- browsergym_workarena-0.4.1.dist-info/licenses/LICENSE,sha256=sZLFiZHo_1hcxXRhXUDnQYVATUuWwRCdQjBxqxNnNEs,579
138
- browsergym_workarena-0.4.1.dist-info/RECORD,,
134
+ browsergym_workarena-0.4.2.dist-info/METADATA,sha256=SV-hDJ1zdD4tS1ZKbfZCj86F2PphPAgY0X-JEY7w8CY,11698
135
+ browsergym_workarena-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
136
+ browsergym_workarena-0.4.2.dist-info/entry_points.txt,sha256=1lCeAbQFCcU6UTFwS5QIA3TKhT2P9ZabaZKT7sIShKc,137
137
+ browsergym_workarena-0.4.2.dist-info/licenses/LICENSE,sha256=sZLFiZHo_1hcxXRhXUDnQYVATUuWwRCdQjBxqxNnNEs,579
138
+ browsergym_workarena-0.4.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any