browsergym-workarena 0.1.0rc3__py3-none-any.whl → 0.1.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.0rc3"
1
+ __version__ = "0.1.0rc5"
2
2
 
3
3
  from browsergym.core.registration import register_task
4
4
 
@@ -51,7 +51,7 @@ ORDER_LOANER_LAPTOP_TASK_CONFIG_PATH = str(
51
51
 
52
52
  # Knowledge base that is included with the benchmark
53
53
  KB_NAME = "General Knowledge"
54
- KB_FILEPATH = str(resources.files(data_files).joinpath("knowledge_base.json"))
54
+ KB_FILEPATH = str(resources.files(data_files).joinpath("setup_files/knowledge_base.json"))
55
55
 
56
56
  # Form tasks
57
57
  CREATE_CHANGE_REQUEST_CONFIG_PATH = str(
@@ -112,6 +112,13 @@ SORT_USER_LIST_CONFIG_PATH = str(
112
112
  WORKFLOWS = {
113
113
  "kb_publish": {
114
114
  "name": "WorkArena Auto-Publish",
115
- "update_set": str(resources.files(data_files).joinpath("kb_autopublish_workflow.xml")),
115
+ "update_set": str(
116
+ resources.files(data_files).joinpath("setup_files/kb_autopublish_workflow.xml")
117
+ ),
116
118
  }
117
119
  }
120
+
121
+ # Number of columns in the user list; used for setup
122
+ EXPECTED_USER_COLUMNS_PATH = str(
123
+ resources.files(data_files).joinpath("setup_files/expected_user_columns.json")
124
+ )
@@ -0,0 +1,59 @@
1
+ [
2
+ "user_name",
3
+ "name",
4
+ "email",
5
+ "active",
6
+ "sys_created_on",
7
+ "sys_updated_on",
8
+ "avatar",
9
+ "building",
10
+ "phone",
11
+ "calendar_integration",
12
+ "city",
13
+ "sys_class_name",
14
+ "company",
15
+ "cost_center",
16
+ "country",
17
+ "sys_created_by",
18
+ "date_format",
19
+ "default_perspective",
20
+ "department",
21
+ "sys_domain",
22
+ "sys_domain_path",
23
+ "employee_number",
24
+ "enable_multifactor_authn",
25
+ "failed_attempts",
26
+ "first_name",
27
+ "gender",
28
+ "home_phone",
29
+ "internal_integration_user",
30
+ "ldap_server",
31
+ "preferred_language",
32
+ "last_login",
33
+ "last_login_time",
34
+ "last_name",
35
+ "location",
36
+ "locked_out",
37
+ "manager",
38
+ "middle_name",
39
+ "mobile_phone",
40
+ "notification",
41
+ "user_password",
42
+ "password_needs_reset",
43
+ "photo",
44
+ "introduction",
45
+ "roles",
46
+ "schedule",
47
+ "source",
48
+ "state",
49
+ "street",
50
+ "sys_tags",
51
+ "time_format",
52
+ "time_zone",
53
+ "title",
54
+ "sys_updated_by",
55
+ "sys_mod_count",
56
+ "vip",
57
+ "web_service_access_only",
58
+ "zip"
59
+ ]
@@ -6,7 +6,7 @@ import re
6
6
  from playwright.sync_api import sync_playwright
7
7
 
8
8
  from .api.utils import table_api_call
9
- from .config import KB_FILEPATH, KB_NAME, WORKFLOWS
9
+ from .config import KB_FILEPATH, KB_NAME, EXPECTED_USER_COLUMNS_PATH, WORKFLOWS
10
10
  from .instance import SNowInstance
11
11
  from .utils import ui_login
12
12
 
@@ -277,6 +277,60 @@ def install_workflows():
277
277
  browser.close()
278
278
 
279
279
 
280
+ def display_all_columns(url: str):
281
+ """Display all columns in a given list view."""
282
+ with sync_playwright() as playwright:
283
+ instance = SNowInstance()
284
+ browser = playwright.chromium.launch(headless=True, slow_mo=1000)
285
+ page = browser.new_page()
286
+ ui_login(instance, page)
287
+ page.goto(instance.snow_url + url)
288
+ frame = page.wait_for_selector("iframe#gsft_main").content_frame()
289
+ frame.get_by_text("Personalize List").click()
290
+ available_columns = frame.get_by_label("Available")
291
+ available_columns.get_by_role("option").first.click()
292
+ available_columns.get_by_role("option").last.click(modifiers=["Shift"])
293
+ frame.get_by_text("Add").click()
294
+ frame.click("#ok_button")
295
+
296
+
297
+ def check_all_columns_displayed(url: str, expected_columns_path: str):
298
+ """Get the visible columns and checks that all expected columns are displayed."""
299
+ with open(expected_columns_path, "r") as f:
300
+ expected_columns = set(json.load(f))
301
+ with sync_playwright() as playwright:
302
+ instance = SNowInstance()
303
+ browser = playwright.chromium.launch(headless=True, slow_mo=1000)
304
+ page = browser.new_page()
305
+ ui_login(instance, page)
306
+ page.goto(instance.snow_url + url)
307
+ iframe = page.frame("gsft_main")
308
+ lst = iframe.locator("table.data_list_table")
309
+ lst.wait_for()
310
+
311
+ # Validate the number of lists on the page
312
+ lst = lst.nth(0)
313
+ js_selector = f"gsft_main.GlideList2.get('{lst.get_attribute('data-list_id')}')"
314
+ visible_columns = set(page.evaluate(f"{js_selector}.fields").split(","))
315
+
316
+ # check if expected columns is contained in the visible columns
317
+ if not expected_columns.issubset(visible_columns):
318
+ logging.info(
319
+ f"Error setting up list at {url} \n Expected {expected_columns} columns, but got {visible_columns}."
320
+ )
321
+ return False
322
+ logging.info(f"All columns properly displayed for {url}.")
323
+ return True
324
+
325
+
326
+ def setup_list_columns(url: str, expected_columns_path: str):
327
+ """Setup the list view to display the expected number of columns."""
328
+ display_all_columns(url)
329
+ assert check_all_columns_displayed(
330
+ url, expected_columns_path
331
+ ), f"Error setting up list columns at {url}"
332
+
333
+
280
334
  def setup():
281
335
  """
282
336
  Check that WorkArena is installed correctly in the instance.
@@ -285,6 +339,11 @@ def setup():
285
339
  # XXX: Install workflows first because they may automate some downstream installations
286
340
  setup_workflows()
287
341
  setup_knowledge_base()
342
+ # Setup the user list columns by displaying all columns and checking that the expected number are displayed
343
+ setup_list_columns(
344
+ "/now/nav/ui/classic/params/target/sys_user_list.do%3Fsysparm_view%3D%26sysparm_userpref.sys_user_list.view%3D%26sysparm_userpref.sys_user.view%3D%26sysparm_query%3Dactive%253Dtrue%255Ecompany%253D81fd65ecac1d55eb42a426568fc87a63",
345
+ EXPECTED_USER_COLUMNS_PATH,
346
+ )
288
347
 
289
348
 
290
349
  def main():
@@ -81,7 +81,7 @@ class ServiceNowFormTask(AbstractServiceNowTask):
81
81
  # ... augment with rendered metadata
82
82
  # XXX: Additional useful info is present in the rendered HTML. We extract it from there.
83
83
  for f in self.table_metadata:
84
- loc = page.frame(name=self.js_prefix).locator(f"#sys_display\.{self.table_name}\.{f}")
84
+ loc = page.frame(name=self.js_prefix).locator(f"#sys_display.{self.table_name}.{f}")
85
85
  if loc.count() > 0:
86
86
  # Check if the field is dependent on another field
87
87
  self.table_metadata[f]["dependent_on_field"] = loc.first.get_attribute(
@@ -33,7 +33,7 @@ class KnowledgeBaseSearchTask(AbstractServiceNowTask):
33
33
  def __init__(self, instance=None, fixed_config: dict = None) -> None:
34
34
  super().__init__(
35
35
  instance=instance,
36
- start_rel_url="/now/nav/ui/classic/params/target/knowledge_home_launcher.do",
36
+ start_rel_url="/now/nav/ui/classic/params/target/%24knowledge.do",
37
37
  )
38
38
 
39
39
  # Load the knowledge base and check its integrity
@@ -128,7 +128,8 @@ def validate_task(task_config, task_class, page=None):
128
128
  chat_messages = []
129
129
  task.cheat(page=page, chat_messages=chat_messages)
130
130
  page.wait_for_timeout(2000)
131
- task_successful = task.validate(page, chat_messages)[1]
131
+ reward, done, _, _ = task.validate(page, chat_messages)
132
+ task_successful = done is True and reward == 1.0
132
133
  task.teardown()
133
134
  tries += 1
134
135
  if task_successful:
@@ -23,11 +23,12 @@ from ..config import (
23
23
  ORDER_DEVELOPMENT_LAPTOP_PC_TASK_CONFIG_PATH,
24
24
  ORDER_LOANER_LAPTOP_TASK_CONFIG_PATH,
25
25
  )
26
- from browsergym.workarena.api.requests import (
26
+ from .utils.form import fill_text
27
+ from ..api.requests import (
27
28
  get_request_by_id,
28
29
  db_delete_from_table,
29
30
  )
30
- from browsergym.workarena.tasks.base import AbstractServiceNowTask
31
+ from .base import AbstractServiceNowTask
31
32
 
32
33
  ADDITIONAL_SOFTWARE = [
33
34
  "Slack",
@@ -310,18 +311,10 @@ class OrderHardwareTask(AbstractServiceNowTask):
310
311
  element_id = element_control.get_attribute("id") # this look superfluous
311
312
  text_element = iframe.query_selector(f'[id="{element_id}"]')
312
313
  text_element.click()
313
- from .utils.form import fill_text
314
-
315
314
  fill_text(page=page, input_field=text_element, value=value, iframe=iframe)
315
+
316
316
  elif control_type == "select-one":
317
- select_options = iframe.query_selector(f'select[id="{element_id}"]', strict=True)
318
- select_options.click()
319
- options = select_options.query_selector_all("option")
320
- for option in options:
321
- if option.inner_text().startswith(value):
322
- page.keyboard.press("Enter")
323
- break
324
- page.keyboard.press("ArrowDown")
317
+ iframe.locator(f"id={element_id}").select_option(value)
325
318
  else:
326
319
  raise ValueError(f"Unknown control type {control_type}")
327
320
 
@@ -475,10 +468,13 @@ class OrderHardwareTask(AbstractServiceNowTask):
475
468
 
476
469
 
477
470
  def option_match_heuristic(value, option):
478
- value = str(value).lower()
479
- option = str(option).lower()
480
- option = option.replace("_", " ")
481
- return value == option
471
+ def _process(x):
472
+ x = str(x).lower()
473
+ x = x.replace("_", "")
474
+ x = x.replace(" ", "")
475
+ return x
476
+
477
+ return _process(value) == _process(option)
482
478
 
483
479
 
484
480
  class OrderDeveloperLaptopTask(OrderHardwareTask):
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: browsergym-workarena
3
- Version: 0.1.0rc3
3
+ Version: 0.1.0rc5
4
4
  Summary: WorkArena benchmark for BrowserGym
5
5
  Project-URL: homepage, https://github.com/ServiceNow/WorkArena
6
6
  Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme
@@ -13,14 +13,14 @@ Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
15
  Requires-Python: >3.7
16
- Requires-Dist: browsergym-core==0.1.0rc3
16
+ Requires-Dist: browsergym-core==0.1.0rc5
17
17
  Requires-Dist: english-words>=2.0.1
18
18
  Requires-Dist: numpy>=1.14
19
19
  Requires-Dist: requests>=2.31
20
20
  Requires-Dist: tenacity>=8.2.3
21
21
  Description-Content-Type: text/markdown
22
22
 
23
- # WorkArena - How Capable are Web Agents at Solving Common Knowledge Work Tasks?
23
+ # WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?
24
24
 
25
25
  [[Paper]](https://arxiv.org/abs/2403.07718) ♦ [[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work)
26
26
 
@@ -78,12 +78,13 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
78
78
  ### a) Create a ServiceNow Developer Instance
79
79
 
80
80
  1. Go to https://developer.servicenow.com/ and create an account.
81
- 2. Click on `Request an instance` and select the `Vancouver` release (initializing the instance will take a few minutes)
82
- 3. Once the instance is ready, you will see a popup showing its URL and credentials. You will also receive a copy by email. Based on this information, set the following environment variables:
83
- * `SNOW_INSTANCE_URL`: URL of your ServiceNow developer instance
84
- * `SNOW_INSTANCE_UNAME`: Just use "admin"
85
- * `SNOW_INSTANCE_PWD`: The password for your instance. Make sure you place the value in quotes "" since it might contain special characters.
86
- 4. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
81
+ 2. Click on `Request an instance` and select the `Utah` release (initializing the instance will take a few minutes)
82
+ 3. Once the instance is ready, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
83
+ 4. You should now see your URL and credentials. Based on this information, set the following environment variables:
84
+ * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
85
+ * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
86
+ * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" since it might contain special characters.
87
+ 6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
87
88
 
88
89
  **Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
89
90
 
@@ -1,13 +1,14 @@
1
- browsergym/workarena/__init__.py,sha256=UOnLE54QQ90EnokWr8-z5f5peLUNtBSarE2HX60YvU8,599
2
- browsergym/workarena/config.py,sha256=cCdWzQ4h9qXC106PQKptWzZdxdI2RwccDlcwI0LlmoQ,4610
3
- browsergym/workarena/install.py,sha256=-pfRzPRmuN7RZElS2LFmm2Zo8uy6ZD0nJeGZA4TOXKs,9789
1
+ browsergym/workarena/__init__.py,sha256=nQzWa5xwZr2SGZLk747NUqSDN8h_VbKr-s5ytxTH5Dg,599
2
+ browsergym/workarena/config.py,sha256=dFOwG8EiAyWohmk0iq-1pcV8keHJlACaDeDAuWP2ac8,4829
3
+ browsergym/workarena/install.py,sha256=nxYO4CarQ2AnLcVLmPg3NlcR-uAUzlXJULSkDgMumR8,12558
4
4
  browsergym/workarena/instance.py,sha256=W_2C4QM_D7l36bXgXP_BtJwItGCW4uUJuFxmjdari1I,3071
5
5
  browsergym/workarena/utils.py,sha256=UXV-loPmwFCGF8E874QcyACmVS4BBHpPrW6BofKMADY,2289
6
6
  browsergym/workarena/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  browsergym/workarena/api/requests.py,sha256=z2vQTTkm1HR04_d_pce6jopuN--Oh0gUaT-YhrVa9-8,4148
8
8
  browsergym/workarena/api/utils.py,sha256=0S6-yWg8mIk-2zT2fvP_Xakvwz9sKGuNFTSAUoNVDTI,3307
9
- browsergym/workarena/data_files/kb_autopublish_workflow.xml,sha256=zMSoA3lfg4-Fvs7egnR286CPzzDHoTk6ffKnODWrunM,11500
10
- browsergym/workarena/data_files/knowledge_base.json,sha256=HHUH6ctyjea8R5oyhVCDedC__lGa_W9jaCGNfUVst50,474939
9
+ browsergym/workarena/data_files/setup_files/expected_user_columns.json,sha256=L4yWNhOczhgC4gKOikrRgX2p2jFxaJ5XSsMM20zwjV4,1068
10
+ browsergym/workarena/data_files/setup_files/kb_autopublish_workflow.xml,sha256=zMSoA3lfg4-Fvs7egnR286CPzzDHoTk6ffKnODWrunM,11500
11
+ browsergym/workarena/data_files/setup_files/knowledge_base.json,sha256=HHUH6ctyjea8R5oyhVCDedC__lGa_W9jaCGNfUVst50,474939
11
12
  browsergym/workarena/data_files/task_configs/all_menu.json,sha256=sm5nMArqRdV16YCzd_UbSs2PbEE2Aofi42c5vG-0QoY,269871
12
13
  browsergym/workarena/data_files/task_configs/create_change_request_task.json,sha256=ik2qFv8Kj-CH7WDyOqL4ZCUEqQx9C2euipb20LNr9ns,4486789
13
14
  browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json,sha256=LMqLfHvZkJOok--Xm2q4yqXWjupsphvcAc9zVlaZ-1A,3439223
@@ -39,11 +40,11 @@ browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task
39
40
  browsergym/workarena/data_files/task_configs/sort_user_list_task.json,sha256=-M0hhooaS5NXnP1cDMBg8T3BAT4asU-ATdFPxRa21Uk,210213
40
41
  browsergym/workarena/tasks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
42
  browsergym/workarena/tasks/base.py,sha256=O0IDwvSIRrHUvFRzIsr0Yl9ph59IacpC52O_xlFmjTU,3419
42
- browsergym/workarena/tasks/form.py,sha256=_sBj0d1a0s_zmlIc6KVfP-evNduYaQjYc-pLUw6GSic,29340
43
- browsergym/workarena/tasks/knowledge.py,sha256=kPV8roBb2JqMnAPoxagH1qKtAMROzziY-8AVup9eQ7E,5782
43
+ browsergym/workarena/tasks/form.py,sha256=z7OrJ1hgJt2bCzfCXuZPcO6UJkee0UEaBU5hAVnUyvE,29338
44
+ browsergym/workarena/tasks/knowledge.py,sha256=lVs058sceJS_DkoBw3lRymmy5rTjLH38Fr7BqaGZikI,5771
44
45
  browsergym/workarena/tasks/list.py,sha256=8An0v13vgWvgiP0xA1wo31hVZuvWO4D6O8Iih6yAfs0,32776
45
46
  browsergym/workarena/tasks/navigation.py,sha256=5n6wIXxF-vZK5OXl-runw6tFdBeH6JdvnBO5dtEJrbw,7384
46
- browsergym/workarena/tasks/service_catalog.py,sha256=F-D92vHzfcXqtzeKPSe4pz5GMMMtZ8XiB3F14o-uW5s,22048
47
+ browsergym/workarena/tasks/service_catalog.py,sha256=36XsIsKX5OowEJQySoI8-pIuSC34rge8bnkvKS_vU5I,21661
47
48
  browsergym/workarena/tasks/scripts/README.md,sha256=-jOtGf9k2zoBhrBkesmfDnr9-s0PhOEi0KWI9xfOw1s,296
48
49
  browsergym/workarena/tasks/scripts/extract_all_menu_items.py,sha256=oqSH2Sx5P_tgvC51SbvbgaY8oCa9w6IDR2BEsK-HsmI,9729
49
50
  browsergym/workarena/tasks/scripts/generate_forms.py,sha256=gABzW4ya1H2CVuGMqU38cJ5nWcGMVtv6gXQVqn6Ycxg,3590
@@ -51,13 +52,13 @@ browsergym/workarena/tasks/scripts/knowledge.py,sha256=A--1BRXbRgwlHGU-JIBRv3ozb
51
52
  browsergym/workarena/tasks/scripts/list.py,sha256=VQpoc9cjqkShtG-g-Yx6ZVjVrZOfB-__BrRgb7QFVMU,3475
52
53
  browsergym/workarena/tasks/scripts/navigation.py,sha256=bvNv2pLTHdstKDowowbP7qeI8IlkDexxCJLZxpRbgbc,823
53
54
  browsergym/workarena/tasks/scripts/service_catalog.py,sha256=HwLNQ3qxYxJJ_uXmA5_Rn1Xntv0V2tQzfgBBflWlfMc,4678
54
- browsergym/workarena/tasks/scripts/validate.py,sha256=o6IAxQmJWzvMX1zmUbs_q4TwWdzXi0Djo1OXkBC2l08,6686
55
+ browsergym/workarena/tasks/scripts/validate.py,sha256=tJ6a1VC0u7F1bbszwnRLj2ef7dWumuYgpOt3Vt-ceTQ,6743
55
56
  browsergym/workarena/tasks/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
57
  browsergym/workarena/tasks/utils/debug.py,sha256=EEy7Rt2ETwkorPm0s6y_u0wutl5rDp_PVuM7BvnsbIA,670
57
58
  browsergym/workarena/tasks/utils/form.py,sha256=EMi_L6WBvIX9loczyVMGqBbYcb34r3zwVphr2KPh69M,2487
58
59
  browsergym/workarena/tasks/utils/js_utils.js,sha256=DTU9oiO1IC-8sAb_CY5KwVWx9jrjdmP8aAEMZZEww_k,1642
59
- browsergym_workarena-0.1.0rc3.dist-info/METADATA,sha256=FIRsZiZUSLXS8XKr_d6OTEApEvdNabRB2gpL8Gh9hj8,6976
60
- browsergym_workarena-0.1.0rc3.dist-info/WHEEL,sha256=TJPnKdtrSue7xZ_AVGkp9YXcvDrobsjBds1du3Nx6dc,87
61
- browsergym_workarena-0.1.0rc3.dist-info/entry_points.txt,sha256=rjnc1GaWB89r1PO0P_Uwriv-iVzWMhRM7EqUtXluDPs,72
62
- browsergym_workarena-0.1.0rc3.dist-info/licenses/LICENSE,sha256=sZLFiZHo_1hcxXRhXUDnQYVATUuWwRCdQjBxqxNnNEs,579
63
- browsergym_workarena-0.1.0rc3.dist-info/RECORD,,
60
+ browsergym_workarena-0.1.0rc5.dist-info/METADATA,sha256=fA7i8dBUvF64XU661W629Be4hW6egnsld7R6extawoI,7047
61
+ browsergym_workarena-0.1.0rc5.dist-info/WHEEL,sha256=LL0B1KxSLwaTWceo7tT-0aDLd-qq9dmbnsnk1DnXlg8,87
62
+ browsergym_workarena-0.1.0rc5.dist-info/entry_points.txt,sha256=rjnc1GaWB89r1PO0P_Uwriv-iVzWMhRM7EqUtXluDPs,72
63
+ browsergym_workarena-0.1.0rc5.dist-info/licenses/LICENSE,sha256=sZLFiZHo_1hcxXRhXUDnQYVATUuWwRCdQjBxqxNnNEs,579
64
+ browsergym_workarena-0.1.0rc5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.21.1
2
+ Generator: hatchling 1.22.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any