browsergym-workarena 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browsergym/workarena/__init__.py +11 -1
- browsergym/workarena/config.py +1 -1
- browsergym/workarena/instance.py +13 -3
- browsergym/workarena/tasks/form.py +24 -0
- browsergym/workarena/tasks/list.py +22 -1
- browsergym/workarena/tasks/service_catalog.py +57 -0
- {browsergym_workarena-0.5.1.dist-info → browsergym_workarena-0.5.2.dist-info}/METADATA +2 -11
- {browsergym_workarena-0.5.1.dist-info → browsergym_workarena-0.5.2.dist-info}/RECORD +11 -11
- {browsergym_workarena-0.5.1.dist-info → browsergym_workarena-0.5.2.dist-info}/WHEEL +0 -0
- {browsergym_workarena-0.5.1.dist-info → browsergym_workarena-0.5.2.dist-info}/entry_points.txt +0 -0
- {browsergym_workarena-0.5.1.dist-info → browsergym_workarena-0.5.2.dist-info}/licenses/LICENSE +0 -0
browsergym/workarena/__init__.py
CHANGED
|
@@ -1,4 +1,14 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.2"
|
|
2
|
+
|
|
3
|
+
# Check playwright version early to avoid cryptic errors
|
|
4
|
+
import importlib.metadata
|
|
5
|
+
|
|
6
|
+
_playwright_version = importlib.metadata.version("playwright")
|
|
7
|
+
if _playwright_version != "1.44.0":
|
|
8
|
+
raise RuntimeError(
|
|
9
|
+
f"browsergym-workarena requires playwright==1.44.0, but found {_playwright_version}. "
|
|
10
|
+
f"Please install the correct version: pip install playwright==1.44.0"
|
|
11
|
+
)
|
|
2
12
|
|
|
3
13
|
import inspect
|
|
4
14
|
from logging import warning
|
browsergym/workarena/config.py
CHANGED
|
@@ -13,7 +13,7 @@ SNOW_SUPPORTED_RELEASES = ["washingtondc"]
|
|
|
13
13
|
|
|
14
14
|
# Hugging Face dataset containing available instances
|
|
15
15
|
INSTANCE_REPO_ID = "ServiceNow/WorkArena-Instances"
|
|
16
|
-
INSTANCE_REPO_FILENAME = "
|
|
16
|
+
INSTANCE_REPO_FILENAME = "instances_v2.json"
|
|
17
17
|
INSTANCE_REPO_TYPE = "dataset"
|
|
18
18
|
INSTANCE_XOR_SEED = "x3!+-9mi#nhlo%a02$9hna{]"
|
|
19
19
|
|
browsergym/workarena/instance.py
CHANGED
|
@@ -45,10 +45,18 @@ def encrypt_instance_password(password: str) -> str:
|
|
|
45
45
|
return base64.b64encode(cipher_bytes).decode("utf-8")
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def fetch_instances():
|
|
48
|
+
def fetch_instances(filename: str = None):
|
|
49
49
|
"""
|
|
50
50
|
Load the latest instances from either a custom pool (SNOW_INSTANCE_POOL env var) or the gated HF dataset.
|
|
51
|
+
|
|
52
|
+
Parameters:
|
|
53
|
+
-----------
|
|
54
|
+
filename: str
|
|
55
|
+
Optional filename to fetch from the HF dataset. Defaults to INSTANCE_REPO_FILENAME.
|
|
51
56
|
"""
|
|
57
|
+
if filename is None:
|
|
58
|
+
filename = INSTANCE_REPO_FILENAME
|
|
59
|
+
|
|
52
60
|
pool_path = os.getenv("SNOW_INSTANCE_POOL")
|
|
53
61
|
if pool_path:
|
|
54
62
|
path = os.path.expanduser(pool_path)
|
|
@@ -62,13 +70,13 @@ def fetch_instances():
|
|
|
62
70
|
disable_progress_bars()
|
|
63
71
|
path = hf_hub_download(
|
|
64
72
|
repo_id=INSTANCE_REPO_ID,
|
|
65
|
-
filename=
|
|
73
|
+
filename=filename,
|
|
66
74
|
repo_type=INSTANCE_REPO_TYPE,
|
|
67
75
|
)
|
|
68
76
|
logging.info("Loaded ServiceNow instances from the default instance pool.")
|
|
69
77
|
except Exception as e:
|
|
70
78
|
raise RuntimeError(
|
|
71
|
-
f"Could not access {INSTANCE_REPO_ID}/{
|
|
79
|
+
f"Could not access {INSTANCE_REPO_ID}/{filename}. "
|
|
72
80
|
"Make sure you have been granted access to the gated repo and that you are "
|
|
73
81
|
"authenticated (run `huggingface-cli login` or set HUGGING_FACE_HUB_TOKEN)."
|
|
74
82
|
) from e
|
|
@@ -77,6 +85,8 @@ def fetch_instances():
|
|
|
77
85
|
entries = json.load(f)
|
|
78
86
|
|
|
79
87
|
for entry in entries:
|
|
88
|
+
if entry.get("error"):
|
|
89
|
+
raise RuntimeError(entry.get("message", "Unknown error from instance pool"))
|
|
80
90
|
entry["url"] = entry["u"]
|
|
81
91
|
entry["password"] = decrypt_instance_password(entry["p"])
|
|
82
92
|
del entry["u"]
|
|
@@ -371,6 +371,30 @@ class ServiceNowFormTask(AbstractServiceNowTask):
|
|
|
371
371
|
|
|
372
372
|
runInGsftMainOnlyAndProtectByURL(monitorChangeOnFields, '{url_suffix}');
|
|
373
373
|
""",
|
|
374
|
+
f"""
|
|
375
|
+
function removePersonalizeFormButton() {{
|
|
376
|
+
waLog('Searching for Personalize Form button...', 'removePersonalizeFormButton');
|
|
377
|
+
let button = document.querySelector('#togglePersonalizeForm');
|
|
378
|
+
if (button) {{
|
|
379
|
+
button.remove();
|
|
380
|
+
waLog('Removed Personalize Form button', 'removePersonalizeFormButton');
|
|
381
|
+
}}
|
|
382
|
+
}}
|
|
383
|
+
|
|
384
|
+
runInGsftMainOnlyAndProtectByURL(removePersonalizeFormButton, '{url_suffix}');
|
|
385
|
+
""",
|
|
386
|
+
f"""
|
|
387
|
+
function removeAdditionalActionsButton() {{
|
|
388
|
+
waLog('Searching for Additional Actions button...', 'removeAdditionalActionsButton');
|
|
389
|
+
let button = document.querySelector('button.additional-actions-context-menu-button');
|
|
390
|
+
if (button) {{
|
|
391
|
+
button.remove();
|
|
392
|
+
waLog('Removed Additional Actions button', 'removeAdditionalActionsButton');
|
|
393
|
+
}}
|
|
394
|
+
}}
|
|
395
|
+
|
|
396
|
+
runInGsftMainOnlyAndProtectByURL(removeAdditionalActionsButton, '{url_suffix}');
|
|
397
|
+
""",
|
|
374
398
|
]
|
|
375
399
|
|
|
376
400
|
def start(self, page: Page) -> None:
|
|
@@ -113,7 +113,28 @@ class ServiceNowListTask(AbstractServiceNowTask):
|
|
|
113
113
|
return json.load(f)
|
|
114
114
|
|
|
115
115
|
def get_init_scripts(self) -> List[str]:
|
|
116
|
-
return super().get_init_scripts() + [
|
|
116
|
+
return super().get_init_scripts() + [
|
|
117
|
+
"registerGsftMainLoaded();",
|
|
118
|
+
self._get_remove_personalize_list_button_script(),
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
def _get_remove_personalize_list_button_script(self):
|
|
122
|
+
"""
|
|
123
|
+
Removes the 'Personalize List' button on list pages.
|
|
124
|
+
"""
|
|
125
|
+
script = """
|
|
126
|
+
function removePersonalizeListButton() {
|
|
127
|
+
waLog('Searching for Personalize List buttons...', 'removePersonalizeListButton');
|
|
128
|
+
let buttons = document.querySelectorAll('i[data-type="list_mechanic2_open"]');
|
|
129
|
+
buttons.forEach((button) => {
|
|
130
|
+
button.remove();
|
|
131
|
+
});
|
|
132
|
+
waLog('Removed ' + buttons.length + ' Personalize List buttons', 'removePersonalizeListButton');
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
runInGsftMainOnlyAndProtectByURL(removePersonalizeListButton, '_list.do');
|
|
136
|
+
"""
|
|
137
|
+
return script
|
|
117
138
|
|
|
118
139
|
def _get_visible_list(self, page: Page):
|
|
119
140
|
self._wait_for_ready(page)
|
|
@@ -225,6 +225,9 @@ class OrderHardwareTask(AbstractServiceNowTask):
|
|
|
225
225
|
"registerGsftMainLoaded()",
|
|
226
226
|
self._get_disable_add_to_cart_script(),
|
|
227
227
|
self._get_remove_top_items_panel_script(),
|
|
228
|
+
self._get_remove_add_content_button_script(),
|
|
229
|
+
self._get_remove_header_decorations_script(),
|
|
230
|
+
self._get_remove_more_options_buttons_script(),
|
|
228
231
|
]
|
|
229
232
|
|
|
230
233
|
def _get_disable_add_to_cart_script(self):
|
|
@@ -276,6 +279,60 @@ class OrderHardwareTask(AbstractServiceNowTask):
|
|
|
276
279
|
"""
|
|
277
280
|
return script
|
|
278
281
|
|
|
282
|
+
def _get_remove_add_content_button_script(self):
|
|
283
|
+
"""
|
|
284
|
+
Removes the 'Add content' button from the service catalog page.
|
|
285
|
+
"""
|
|
286
|
+
script = """
|
|
287
|
+
function removeAddContentButton() {
|
|
288
|
+
waLog('Searching for Add content button...', 'removeAddContentButton');
|
|
289
|
+
let button = document.querySelector('button[aria-label="Add content"]');
|
|
290
|
+
if (button) {
|
|
291
|
+
button.remove();
|
|
292
|
+
waLog('Removed Add content button', 'removeAddContentButton');
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
runInGsftMainOnlyAndProtectByURL(removeAddContentButton, 'catalog_home');
|
|
297
|
+
"""
|
|
298
|
+
return script
|
|
299
|
+
|
|
300
|
+
def _get_remove_header_decorations_script(self):
|
|
301
|
+
"""
|
|
302
|
+
Removes all header decoration panels (edit/settings/close buttons) from the service catalog page.
|
|
303
|
+
"""
|
|
304
|
+
script = """
|
|
305
|
+
function removeHeaderDecorations() {
|
|
306
|
+
waLog('Searching for header decoration panels...', 'removeHeaderDecorations');
|
|
307
|
+
let panels = document.querySelectorAll('div.header_decorations');
|
|
308
|
+
panels.forEach((panel) => {
|
|
309
|
+
panel.remove();
|
|
310
|
+
});
|
|
311
|
+
waLog('Removed ' + panels.length + ' header decoration panels', 'removeHeaderDecorations');
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
runInGsftMainOnlyAndProtectByURL(removeHeaderDecorations, 'catalog_home');
|
|
315
|
+
"""
|
|
316
|
+
return script
|
|
317
|
+
|
|
318
|
+
def _get_remove_more_options_buttons_script(self):
|
|
319
|
+
"""
|
|
320
|
+
Removes all 'More Options' buttons from the service catalog page.
|
|
321
|
+
"""
|
|
322
|
+
script = """
|
|
323
|
+
function removeMoreOptionsButtons() {
|
|
324
|
+
waLog('Searching for More Options buttons...', 'removeMoreOptionsButtons');
|
|
325
|
+
let buttons = document.querySelectorAll('button.btn.btn-icon.icon-ellipsis');
|
|
326
|
+
buttons.forEach((button) => {
|
|
327
|
+
button.remove();
|
|
328
|
+
});
|
|
329
|
+
waLog('Removed ' + buttons.length + ' More Options buttons', 'removeMoreOptionsButtons');
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
runInGsftMainOnlyAndProtectByURL(removeMoreOptionsButtons, 'com.glideapp.servicecatalog');
|
|
333
|
+
"""
|
|
334
|
+
return script
|
|
335
|
+
|
|
279
336
|
def setup_goal(self, page: Page) -> tuple[str, dict]:
|
|
280
337
|
super().setup_goal(page=page)
|
|
281
338
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: browsergym-workarena
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: WorkArena benchmark for BrowserGym
|
|
5
5
|
Project-URL: homepage, https://github.com/ServiceNow/WorkArena
|
|
6
6
|
Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
|
|
@@ -30,16 +30,6 @@ Description-Content-Type: text/markdown
|
|
|
30
30
|
# WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
|
|
31
31
|
[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work) ♦ [Join us on Discord!](https://discord.gg/rDkP69X7)
|
|
32
32
|
|
|
33
|
-
## Join Our Discord Community
|
|
34
|
-
|
|
35
|
-
Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
|
|
36
|
-
|
|
37
|
-
- Exchange tips, tricks, and success stories
|
|
38
|
-
- Get real-time support and feedback
|
|
39
|
-
- Stay updated on the latest features and announcements
|
|
40
|
-
|
|
41
|
-
[Join us on Discord!](https://discord.gg/rDkP69X7)
|
|
42
|
-
|
|
43
33
|
---
|
|
44
34
|
|
|
45
35
|
### Explore the BrowserGym Ecosystem
|
|
@@ -73,6 +63,7 @@ To setup WorkArena, you will need to gain access to ServiceNow instances and ins
|
|
|
73
63
|
1. Navigate to https://huggingface.co/datasets/ServiceNow/WorkArena-Instances.
|
|
74
64
|
2. Fill the form, accept the terms to gain access to the gated repository and wait for approval.
|
|
75
65
|
3. Ensure that the machine where you will run WorkArena is [authenticated with Hugging Face](https://huggingface.co/docs/hub/en/datasets-polars-auth) (e.g., via huggingface-cli login or the HUGGING_FACE_HUB_TOKEN environment variable).
|
|
66
|
+
4. Unset any previous WorkArena environment variables if you are upgrading from a previous install (`SNOW_INSTANCE_URL`, etc.)
|
|
76
67
|
|
|
77
68
|
### b) Install WorkArena
|
|
78
69
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
browsergym/workarena/__init__.py,sha256=
|
|
2
|
-
browsergym/workarena/config.py,sha256=
|
|
1
|
+
browsergym/workarena/__init__.py,sha256=ho1CCagiI_bHKbtdqNIG5SJNDdQHPEUaCf4xzjhtj_I,6676
|
|
2
|
+
browsergym/workarena/config.py,sha256=n_nE6G08Edschfv9tKvJ1CWngpbaO3Uobxmbk9vfESU,8838
|
|
3
3
|
browsergym/workarena/install.py,sha256=sgj8h0VXMqXue7xrFrvlXHm2XryvyWEf6v_SJSUd9yc,43197
|
|
4
|
-
browsergym/workarena/instance.py,sha256=
|
|
4
|
+
browsergym/workarena/instance.py,sha256=nPDMCjdleQLlidpkyRGNm9VBSztAsz7bjeGAL9RO0M0,8396
|
|
5
5
|
browsergym/workarena/utils.py,sha256=mD6RqVua-m1-mKM1RGGlUEu1s6un0ZI9a5ZTPN7g1hY,3199
|
|
6
6
|
browsergym/workarena/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
browsergym/workarena/api/category.py,sha256=4oiwPnRas0ZWCdky76zhNpu_9PfB_HmhnFa_DJZyGfA,2084
|
|
@@ -78,13 +78,13 @@ browsergym/workarena/tasks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
78
78
|
browsergym/workarena/tasks/base.py,sha256=Ikh_A5I9_9acHFQCcnVMEnlBg3u3QHQD2I_NbGvD6SE,6411
|
|
79
79
|
browsergym/workarena/tasks/comp_building_block.py,sha256=Lg3KbAWrxzAHe5XbPN6L8bvdu7mfJpmBvI7jXeSDwKE,194
|
|
80
80
|
browsergym/workarena/tasks/dashboard.py,sha256=6ohpC40zpK1NLlfYM7RIqeenmuEuoIL9wOBUdG3JTFI,35842
|
|
81
|
-
browsergym/workarena/tasks/form.py,sha256=
|
|
81
|
+
browsergym/workarena/tasks/form.py,sha256=eTITgbZnz0EpxIKlJNXpjZ2v5kUXLU9rtM5PUh2Qhk4,65352
|
|
82
82
|
browsergym/workarena/tasks/knowledge.py,sha256=zf-Rx6C8OhJcULEeWe4IVzN_SeDNgQ0jSGKi16GIJXk,13671
|
|
83
|
-
browsergym/workarena/tasks/list.py,sha256=
|
|
83
|
+
browsergym/workarena/tasks/list.py,sha256=6Z8UypPFtvgpoT-Wm0pRpAfG-YgqXSz5Eya1P0z5KJQ,56628
|
|
84
84
|
browsergym/workarena/tasks/mark_duplicate_problem.py,sha256=2znPoyuC47hkIEz59jWR-KB2o4GKJ9z5K_C-mpBqBfE,7278
|
|
85
85
|
browsergym/workarena/tasks/navigation.py,sha256=Y80DpL8xBA8u9zSudW0W6Vf4qaRZUgW-jQO7pl6gOFs,8729
|
|
86
86
|
browsergym/workarena/tasks/send_chat_message.py,sha256=8yWSBEMDpv_reU4QH92rjtyPV6ZjhOAgby465Olc3jM,3854
|
|
87
|
-
browsergym/workarena/tasks/service_catalog.py,sha256=
|
|
87
|
+
browsergym/workarena/tasks/service_catalog.py,sha256=3OD3EnHT84W-5a85Uv_2Rf63Kzl9nAXrrocsrX9iuyE,27161
|
|
88
88
|
browsergym/workarena/tasks/compositional/__init__.py,sha256=zgbl23owwUZSnFD84rh-QJitaAsNCH0PNSct_H_NrM4,2341
|
|
89
89
|
browsergym/workarena/tasks/compositional/base.py,sha256=eIZhfpBOvZvrlC2X7PSbY_7JrILuezYe-NRzDTECHik,14578
|
|
90
90
|
browsergym/workarena/tasks/compositional/dash_do_base.py,sha256=ihxgwVxUfxBJXt49KzOSEH1i_8uymm1oMLGPrsD4zfI,58252
|
|
@@ -132,8 +132,8 @@ browsergym/workarena/tasks/utils/js_utils.js,sha256=n97fmY2Jkr59rEcQSuSbCnn1L2ZN
|
|
|
132
132
|
browsergym/workarena/tasks/utils/private_tasks.py,sha256=r7Z9SnBMuZdZ2i-tK6eULj0q8hclANXFSzdLl49KYHI,2128
|
|
133
133
|
browsergym/workarena/tasks/utils/string.py,sha256=ir5_ASD9QSFMZ9kuHo2snSXRuSfv_wROH6nxBLOTP4I,330
|
|
134
134
|
browsergym/workarena/tasks/utils/utils.py,sha256=xQD-njEwgN7qxfn1dLBN8MYfd3kl3TuVfpmI1yxML9k,955
|
|
135
|
-
browsergym_workarena-0.5.
|
|
136
|
-
browsergym_workarena-0.5.
|
|
137
|
-
browsergym_workarena-0.5.
|
|
138
|
-
browsergym_workarena-0.5.
|
|
139
|
-
browsergym_workarena-0.5.
|
|
135
|
+
browsergym_workarena-0.5.2.dist-info/METADATA,sha256=DLprG6i689Q5htAHQTocxfXSLgVz2iKgeGZPGzwX1p8,10242
|
|
136
|
+
browsergym_workarena-0.5.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
137
|
+
browsergym_workarena-0.5.2.dist-info/entry_points.txt,sha256=1lCeAbQFCcU6UTFwS5QIA3TKhT2P9ZabaZKT7sIShKc,137
|
|
138
|
+
browsergym_workarena-0.5.2.dist-info/licenses/LICENSE,sha256=sZLFiZHo_1hcxXRhXUDnQYVATUuWwRCdQjBxqxNnNEs,579
|
|
139
|
+
browsergym_workarena-0.5.2.dist-info/RECORD,,
|
|
File without changes
|
{browsergym_workarena-0.5.1.dist-info → browsergym_workarena-0.5.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{browsergym_workarena-0.5.1.dist-info → browsergym_workarena-0.5.2.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|