browsergym-workarena 0.1.0rc7__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/.github/workflows/pypi.yml +4 -3
- browsergym_workarena-0.2.1/.github/workflows/unit_tests.yml +104 -0
- browsergym_workarena-0.2.1/.gitignore +3 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/PKG-INFO +15 -5
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/README.md +10 -2
- browsergym_workarena-0.2.1/dev/environment.yaml +13 -0
- browsergym_workarena-0.2.1/dev/requirements.txt +9 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/pyproject.toml +28 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/requirements.txt +3 -1
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/__init__.py +3 -2
- browsergym_workarena-0.2.1/src/browsergym/workarena/api/ui_themes.py +35 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/api/user.py +153 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/api/utils.py +1 -1
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/config.py +43 -1
- browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/setup_files/ui_themes/workarena_themes.xml +2313 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/all_menu.json +94 -94
- browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +7985 -7981
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/impersonation_users.json +2 -2
- browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/install.py +1014 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/base.py +167 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/dashboard.py +620 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/form.py +121 -85
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/knowledge.py +30 -14
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/list.py +121 -67
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/navigation.py +18 -16
- browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +272 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/generate_forms.py +2 -2
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/list.py +2 -2
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/validate.py +2 -2
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/service_catalog.py +106 -74
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/utils/form.py +5 -3
- browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/utils/js_utils.js +177 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/utils/string.py +15 -0
- browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/utils/utils.py +20 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/utils.py +31 -2
- browsergym_workarena-0.2.1/tests/test_api.py +40 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_random_config_generation.py +26 -25
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_task_from_config.py +2 -2
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_task_general.py +2 -2
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_task_setup.py +28 -13
- browsergym_workarena-0.2.1/tests/test_utils.py +32 -0
- browsergym_workarena-0.1.0rc7/src/browsergym/workarena/install.py +0 -549
- browsergym_workarena-0.1.0rc7/src/browsergym/workarena/tasks/base.py +0 -108
- browsergym_workarena-0.1.0rc7/src/browsergym/workarena/tasks/utils/js_utils.js +0 -56
- browsergym_workarena-0.1.0rc7/tests/test_utils.py +0 -30
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/LICENSE +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/scripts/generate_knowledge_base.ipynb +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/api/__init__.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/api/requests.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_change_request_form_fields.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_hardware_form_fields.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_problem_form_fields.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_user_form_fields.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/knowledge/kb_autopublish_workflow.xml +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/knowledge/knowledge_base.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_change_request_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_incident_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_problem_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_user_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_asset_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_hardware_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_incident_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_user_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/knowledge_base_configs.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_apple_mac_book_pro15_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_apple_watch_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_developer_laptop_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_development_laptop_pc_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_mini_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_pro_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_loaner_laptop_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_sales_laptop_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_standard_laptop_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_asset_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_change_request_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_hardware_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_incident_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_user_list_task.json +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/instance.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/__init__.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/README.md +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/extract_all_menu_items.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/knowledge.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/navigation.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/service_catalog.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/utils/__init__.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/utils/debug.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_snow_instance.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_validate.py +0 -0
- {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/utils.py +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
name:
|
|
1
|
+
name: Build and Publish
|
|
2
2
|
|
|
3
3
|
on: [push, workflow_dispatch]
|
|
4
4
|
|
|
@@ -48,10 +48,11 @@ jobs:
|
|
|
48
48
|
uses: pypa/gh-action-pypi-publish@release/v1
|
|
49
49
|
|
|
50
50
|
github-release:
|
|
51
|
-
name: Sign with Sigstore and upload them to GitHub Release
|
|
51
|
+
name: Sign packages with Sigstore and upload them to GitHub Release
|
|
52
52
|
needs:
|
|
53
53
|
- publish-to-pypi
|
|
54
54
|
runs-on: ubuntu-latest
|
|
55
|
+
|
|
55
56
|
permissions:
|
|
56
57
|
contents: write # IMPORTANT: mandatory for making GitHub Releases
|
|
57
58
|
id-token: write # IMPORTANT: mandatory for sigstore
|
|
@@ -64,7 +65,7 @@ jobs:
|
|
|
64
65
|
path: dist/
|
|
65
66
|
|
|
66
67
|
- name: Sign the dists with Sigstore
|
|
67
|
-
uses: sigstore/gh-action-sigstore-python@
|
|
68
|
+
uses: sigstore/gh-action-sigstore-python@v2.1.1
|
|
68
69
|
with:
|
|
69
70
|
inputs: >-
|
|
70
71
|
./dist/*.tar.gz
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
name: Unit tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
|
|
11
|
+
code-format:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
defaults:
|
|
14
|
+
run:
|
|
15
|
+
shell: bash -l {0}
|
|
16
|
+
steps:
|
|
17
|
+
|
|
18
|
+
- name: Checkout Repository
|
|
19
|
+
uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Set up Python
|
|
22
|
+
uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: '3.10'
|
|
25
|
+
cache: 'pip' # caching pip dependencies
|
|
26
|
+
|
|
27
|
+
- name: Pip install
|
|
28
|
+
run: pip install black[jupyter]==24.2.0 blacken-docs
|
|
29
|
+
|
|
30
|
+
- name: Pip list
|
|
31
|
+
run: pip list
|
|
32
|
+
|
|
33
|
+
- name: Code Formatting
|
|
34
|
+
run: black . --check
|
|
35
|
+
|
|
36
|
+
browsergym-workarena-fast:
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
|
|
39
|
+
defaults:
|
|
40
|
+
run:
|
|
41
|
+
shell: bash -l {0}
|
|
42
|
+
|
|
43
|
+
steps:
|
|
44
|
+
|
|
45
|
+
- name: Checkout Repository
|
|
46
|
+
uses: actions/checkout@v4
|
|
47
|
+
|
|
48
|
+
- name: Set up Python
|
|
49
|
+
uses: actions/setup-python@v5
|
|
50
|
+
with:
|
|
51
|
+
python-version: '3.10'
|
|
52
|
+
cache: 'pip' # caching pip dependencies
|
|
53
|
+
|
|
54
|
+
- name: Pip install
|
|
55
|
+
working-directory: ./dev
|
|
56
|
+
run: pip install -r requirements.txt
|
|
57
|
+
|
|
58
|
+
- name: Pip list
|
|
59
|
+
run: pip list
|
|
60
|
+
|
|
61
|
+
- name: Install Playwright
|
|
62
|
+
run: playwright install --with-deps
|
|
63
|
+
|
|
64
|
+
- name: Run non-slow browsergym-workarena Unit Tests
|
|
65
|
+
env:
|
|
66
|
+
SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
|
|
67
|
+
SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
|
|
68
|
+
SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
|
|
69
|
+
run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests
|
|
70
|
+
|
|
71
|
+
browsergym-workarena-slow:
|
|
72
|
+
runs-on: ubuntu-latest
|
|
73
|
+
|
|
74
|
+
defaults:
|
|
75
|
+
run:
|
|
76
|
+
shell: bash -l {0}
|
|
77
|
+
|
|
78
|
+
steps:
|
|
79
|
+
|
|
80
|
+
- name: Checkout Repository
|
|
81
|
+
uses: actions/checkout@v4
|
|
82
|
+
|
|
83
|
+
- name: Set up Python
|
|
84
|
+
uses: actions/setup-python@v5
|
|
85
|
+
with:
|
|
86
|
+
python-version: '3.10'
|
|
87
|
+
cache: 'pip' # caching pip dependencies
|
|
88
|
+
|
|
89
|
+
- name: Pip install
|
|
90
|
+
working-directory: ./dev
|
|
91
|
+
run: pip install -r requirements.txt
|
|
92
|
+
|
|
93
|
+
- name: Pip list
|
|
94
|
+
run: pip list
|
|
95
|
+
|
|
96
|
+
- name: Install Playwright
|
|
97
|
+
run: playwright install --with-deps
|
|
98
|
+
|
|
99
|
+
- name: Run slow browsergym-workarena Unit Tests
|
|
100
|
+
env:
|
|
101
|
+
SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
|
|
102
|
+
SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
|
|
103
|
+
SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
|
|
104
|
+
run: pytest -n 5 --durations=10 -m 'slow and not pricy' --slowmo 1000 -v tests
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: browsergym-workarena
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: WorkArena benchmark for BrowserGym
|
|
5
5
|
Project-URL: homepage, https://github.com/ServiceNow/WorkArena
|
|
6
|
-
Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme
|
|
6
|
+
Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
|
|
7
7
|
License: Apache-2.0
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Classifier: Development Status :: 2 - Pre-Alpha
|
|
@@ -13,11 +13,13 @@ Classifier: Operating System :: OS Independent
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
15
|
Requires-Python: >3.7
|
|
16
|
-
Requires-Dist: browsergym-core
|
|
16
|
+
Requires-Dist: browsergym-core>=0.2
|
|
17
17
|
Requires-Dist: english-words>=2.0.1
|
|
18
|
+
Requires-Dist: faker>=24.11.0
|
|
18
19
|
Requires-Dist: numpy>=1.14
|
|
19
20
|
Requires-Dist: requests>=2.31
|
|
20
21
|
Requires-Dist: tenacity>=8.2.3
|
|
22
|
+
Requires-Dist: tqdm>=4.66.2
|
|
21
23
|
Description-Content-Type: text/markdown
|
|
22
24
|
|
|
23
25
|
# WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?
|
|
@@ -32,10 +34,12 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
|
|
|
32
34
|
|
|
33
35
|
https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
|
|
34
36
|
|
|
37
|
+
## ⚠️ Pre-Release warning ⚠️
|
|
38
|
+
Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
|
|
35
39
|
|
|
36
40
|
## Benchmark Contents
|
|
37
41
|
|
|
38
|
-
At the moment, WorkArena includes `
|
|
42
|
+
At the moment, WorkArena includes `18,050` task instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
|
|
39
43
|
|
|
40
44
|
### Knowledge Bases
|
|
41
45
|
|
|
@@ -71,6 +75,12 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
|
|
|
71
75
|
|
|
72
76
|
https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
|
|
73
77
|
|
|
78
|
+
### Dashboards
|
|
79
|
+
|
|
80
|
+
**Goal:** The agent must extract information from a dashboard.
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
74
84
|
## Getting Started
|
|
75
85
|
|
|
76
86
|
To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
|
|
@@ -78,7 +88,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
|
|
|
78
88
|
### a) Create a ServiceNow Developer Instance
|
|
79
89
|
|
|
80
90
|
1. Go to https://developer.servicenow.com/ and create an account.
|
|
81
|
-
2. Click on `Request an instance` and select the `
|
|
91
|
+
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
82
92
|
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
83
93
|
4. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
84
94
|
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
@@ -10,10 +10,12 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
|
|
|
10
10
|
|
|
11
11
|
https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
|
|
12
12
|
|
|
13
|
+
## ⚠️ Pre-Release warning ⚠️
|
|
14
|
+
Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
|
|
13
15
|
|
|
14
16
|
## Benchmark Contents
|
|
15
17
|
|
|
16
|
-
At the moment, WorkArena includes `
|
|
18
|
+
At the moment, WorkArena includes `18,050` task instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
|
|
17
19
|
|
|
18
20
|
### Knowledge Bases
|
|
19
21
|
|
|
@@ -49,6 +51,12 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
|
|
|
49
51
|
|
|
50
52
|
https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
|
|
51
53
|
|
|
54
|
+
### Dashboards
|
|
55
|
+
|
|
56
|
+
**Goal:** The agent must extract information from a dashboard.
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
52
60
|
## Getting Started
|
|
53
61
|
|
|
54
62
|
To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
|
|
@@ -56,7 +64,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
|
|
|
56
64
|
### a) Create a ServiceNow Developer Instance
|
|
57
65
|
|
|
58
66
|
1. Go to https://developer.servicenow.com/ and create an account.
|
|
59
|
-
2. Click on `Request an instance` and select the `
|
|
67
|
+
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
60
68
|
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
61
69
|
4. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
62
70
|
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
@@ -11,6 +11,7 @@ authors = [
|
|
|
11
11
|
{name = "Maxime Gasse"},
|
|
12
12
|
{name = "Alex Lacoste"},
|
|
13
13
|
{name = "Manuel Del Verme"},
|
|
14
|
+
{name = "Megh Thakkar"},
|
|
14
15
|
]
|
|
15
16
|
readme = "README.md"
|
|
16
17
|
requires-python = ">3.7"
|
|
@@ -39,3 +40,30 @@ files = ["requirements.txt"]
|
|
|
39
40
|
|
|
40
41
|
[tool.hatch.build.targets.wheel]
|
|
41
42
|
packages = ["src/browsergym"]
|
|
43
|
+
|
|
44
|
+
[tool.black]
|
|
45
|
+
line-length = 100
|
|
46
|
+
include = '\.pyi?$'
|
|
47
|
+
exclude = '''
|
|
48
|
+
/(
|
|
49
|
+
\.eggs
|
|
50
|
+
| \.git
|
|
51
|
+
| \.hg
|
|
52
|
+
| \.mypy_cache
|
|
53
|
+
| \.nox
|
|
54
|
+
| \.tox
|
|
55
|
+
| \.venv
|
|
56
|
+
| _build
|
|
57
|
+
| buck-out
|
|
58
|
+
| build
|
|
59
|
+
| dist
|
|
60
|
+
)/
|
|
61
|
+
'''
|
|
62
|
+
|
|
63
|
+
[tool.pytest.ini_options]
|
|
64
|
+
filterwarnings = [
|
|
65
|
+
'ignore::UserWarning:gymnasium.*:', # too many "The obs is not within the observation space." warnings.
|
|
66
|
+
]
|
|
67
|
+
markers = [
|
|
68
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
69
|
+
]
|
{browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/__init__.py
RENAMED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
__version__ = "0.1
|
|
1
|
+
__version__ = "0.2.1"
|
|
2
2
|
|
|
3
3
|
from browsergym.core.registration import register_task
|
|
4
4
|
|
|
5
|
+
from .tasks.dashboard import __TASKS__ as DASHBOARD_TASKS
|
|
5
6
|
from .tasks.form import __TASKS__ as FORM_TASKS
|
|
6
7
|
from .tasks.knowledge import __TASKS__ as KB_TASKS
|
|
7
8
|
from .tasks.list import __TASKS__ as LIST_TASKS
|
|
@@ -9,6 +10,7 @@ from .tasks.navigation import __TASKS__ as NAVIGATION_TASKS
|
|
|
9
10
|
from .tasks.service_catalog import __TASKS__ as SERVICE_CATALOG_TASKS
|
|
10
11
|
|
|
11
12
|
ALL_WORKARENA_TASKS = [
|
|
13
|
+
*DASHBOARD_TASKS,
|
|
12
14
|
*FORM_TASKS,
|
|
13
15
|
*KB_TASKS,
|
|
14
16
|
*LIST_TASKS,
|
|
@@ -21,5 +23,4 @@ for task in ALL_WORKARENA_TASKS:
|
|
|
21
23
|
register_task(
|
|
22
24
|
task.get_task_id(),
|
|
23
25
|
task,
|
|
24
|
-
kwargs={"viewport": {"width": 1280, "height": 720}, "timeout": 10000},
|
|
25
26
|
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for UI themes
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .utils import table_api_call
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_workarena_theme_variants(instance):
|
|
10
|
+
"""
|
|
11
|
+
Get the list of available WorkArena UI themes
|
|
12
|
+
|
|
13
|
+
Parameters:
|
|
14
|
+
-----------
|
|
15
|
+
instance: SNowInstance
|
|
16
|
+
The ServiceNow instance to get the UI themes from
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
--------
|
|
20
|
+
list[dict]
|
|
21
|
+
The list of available WorkArena UI themes and their information
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
themes = table_api_call(
|
|
25
|
+
instance=instance,
|
|
26
|
+
table="m2m_theme_style",
|
|
27
|
+
params={
|
|
28
|
+
"sysparm_query": "style.type=variant",
|
|
29
|
+
"sysparm_fields": "theme.name,theme.sys_id,style.name,style.sys_id",
|
|
30
|
+
"sysparm_display_value": True,
|
|
31
|
+
},
|
|
32
|
+
method="GET",
|
|
33
|
+
)["result"]
|
|
34
|
+
themes = [t for t in themes if t["theme.name"] == "WorkArena"]
|
|
35
|
+
return themes
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from faker import Faker
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
fake = Faker()
|
|
6
|
+
|
|
7
|
+
from ..instance import SNowInstance
|
|
8
|
+
from .ui_themes import get_workarena_theme_variants
|
|
9
|
+
from .utils import table_api_call
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_user(
|
|
13
|
+
instance: SNowInstance,
|
|
14
|
+
first_name: str = None,
|
|
15
|
+
last_name: str = None,
|
|
16
|
+
user_name: str = None,
|
|
17
|
+
admin=True,
|
|
18
|
+
) -> list[str]:
|
|
19
|
+
"""
|
|
20
|
+
Create a user with a random username and password with an admin role
|
|
21
|
+
|
|
22
|
+
Parameters:
|
|
23
|
+
-----------
|
|
24
|
+
first_name: str
|
|
25
|
+
The first name of the user, defaults to a random first name
|
|
26
|
+
last_name: str
|
|
27
|
+
The last name of the user, defaults to a random last name
|
|
28
|
+
user_name: str
|
|
29
|
+
The user name of the user, defaults to first_name.last_name
|
|
30
|
+
admin: bool
|
|
31
|
+
Whether to give the user admin permissions
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
--------
|
|
35
|
+
username, password, sys_id
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
user_idx = str(random.randint(1000, 9999))
|
|
39
|
+
user_password = "aStrongPassword!"
|
|
40
|
+
first_name = fake.first_name() if not first_name else first_name
|
|
41
|
+
last_name = fake.last_name() if not last_name else last_name
|
|
42
|
+
|
|
43
|
+
# Create user
|
|
44
|
+
user_data = {
|
|
45
|
+
"user_name": f"{first_name}.{last_name}.{user_idx}" if not user_name else user_name,
|
|
46
|
+
"first_name": first_name,
|
|
47
|
+
"last_name": last_name,
|
|
48
|
+
"email": f"{first_name}.{last_name}.{user_idx}@workarena.com".lower(),
|
|
49
|
+
"user_password": user_password,
|
|
50
|
+
"active": True,
|
|
51
|
+
}
|
|
52
|
+
user_params = {"sysparm_input_display_value": True}
|
|
53
|
+
user_response = table_api_call(
|
|
54
|
+
instance=instance, table="sys_user", json=user_data, params=user_params, method="POST"
|
|
55
|
+
)["result"]
|
|
56
|
+
user_name = user_response["user_name"]
|
|
57
|
+
user_sys_id = user_response["sys_id"]
|
|
58
|
+
|
|
59
|
+
# Get admin role sys_id
|
|
60
|
+
if admin:
|
|
61
|
+
role_sys_id = table_api_call(
|
|
62
|
+
instance=instance,
|
|
63
|
+
table="sys_user_role",
|
|
64
|
+
params={"sysparm_query": "name=admin", "sysparm_fields": "sys_id"},
|
|
65
|
+
method="GET",
|
|
66
|
+
)["result"][0]["sys_id"]
|
|
67
|
+
|
|
68
|
+
# Give admin permissions
|
|
69
|
+
association_data = {"user": user_sys_id, "role": role_sys_id}
|
|
70
|
+
table_api_call(
|
|
71
|
+
instance=instance, table="sys_user_has_role", json=association_data, method="POST"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Randomly pick a UI theme and set it for the user
|
|
75
|
+
themes = get_workarena_theme_variants(instance)
|
|
76
|
+
theme = random.choice(themes)
|
|
77
|
+
set_user_preference(
|
|
78
|
+
instance, "glide.ui.polaris.theme.variant", theme["style.sys_id"], user=user_sys_id
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return user_name, user_password, user_sys_id
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def set_user_preference(instance: SNowInstance, key: str, value: str, user=None) -> dict:
|
|
85
|
+
"""
|
|
86
|
+
Set a user preference in the ServiceNow instance
|
|
87
|
+
|
|
88
|
+
Parameters:
|
|
89
|
+
-----------
|
|
90
|
+
key: str
|
|
91
|
+
The name of the preference
|
|
92
|
+
value: str
|
|
93
|
+
The value of the preference
|
|
94
|
+
user: str
|
|
95
|
+
The sys_id of the user. If None, the preference will be set globally.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
--------
|
|
99
|
+
dict
|
|
100
|
+
The preference that was set
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
if user is None:
|
|
104
|
+
# make it global
|
|
105
|
+
user = ""
|
|
106
|
+
system = True
|
|
107
|
+
else:
|
|
108
|
+
system = False
|
|
109
|
+
|
|
110
|
+
# Try to get the preference's sys_id
|
|
111
|
+
preference = table_api_call(
|
|
112
|
+
instance=instance,
|
|
113
|
+
table="sys_user_preference",
|
|
114
|
+
params={"sysparm_query": f"name={key},user={user}", "sysparm_fields": "sys_id"},
|
|
115
|
+
)["result"]
|
|
116
|
+
|
|
117
|
+
if not preference:
|
|
118
|
+
# ... The preference key doesn't exist, create it
|
|
119
|
+
pref_sysid = ""
|
|
120
|
+
method = "POST"
|
|
121
|
+
else:
|
|
122
|
+
# ... The preference key exists, update it
|
|
123
|
+
pref_sysid = "/" + preference[0]["sys_id"]
|
|
124
|
+
method = "PUT"
|
|
125
|
+
|
|
126
|
+
property = table_api_call(
|
|
127
|
+
instance=instance,
|
|
128
|
+
table=f"sys_user_preference{pref_sysid}",
|
|
129
|
+
method=method,
|
|
130
|
+
json={
|
|
131
|
+
"name": key,
|
|
132
|
+
"value": value,
|
|
133
|
+
"user": user,
|
|
134
|
+
"system": system,
|
|
135
|
+
"description": "Updated by WorkArena",
|
|
136
|
+
},
|
|
137
|
+
)["result"]
|
|
138
|
+
|
|
139
|
+
# Verify that the property was updated
|
|
140
|
+
property["user"] = (
|
|
141
|
+
property["user"].get("value") if isinstance(property["user"], dict) else property["user"]
|
|
142
|
+
)
|
|
143
|
+
assert (
|
|
144
|
+
property["value"] == value
|
|
145
|
+
), f"Error setting system property {key}, incorrect value {property['value']}, while expecting {value}."
|
|
146
|
+
assert (
|
|
147
|
+
property["user"] == user
|
|
148
|
+
), f"Error setting system property {key}, incorrect user {property['user']}, while expecting {user}."
|
|
149
|
+
assert (
|
|
150
|
+
property["system"] == str(system).lower()
|
|
151
|
+
), f"Error setting {key}, incorrect system {property['system']}, while expecting {system}."
|
|
152
|
+
|
|
153
|
+
return property
|
{browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/api/utils.py
RENAMED
|
@@ -86,7 +86,7 @@ def table_column_info(instance: SNowInstance, table: str) -> dict:
|
|
|
86
86
|
|
|
87
87
|
# Clean column value choices
|
|
88
88
|
for info in meta_info.values():
|
|
89
|
-
if "choices"
|
|
89
|
+
if info.get("choices", None):
|
|
90
90
|
info["choices"] = {c["value"]: c["label"] for c in info["choices"]}
|
|
91
91
|
|
|
92
92
|
# Query the sys_dictionnary table to find more info (e.g., is this column dependent on another)
|
{browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/config.py
RENAMED
|
@@ -7,10 +7,25 @@ from ..workarena.tasks import utils
|
|
|
7
7
|
SNOW_DATA_LOOKBACK_MINUTES = 5
|
|
8
8
|
SNOW_BROWSER_TIMEOUT = 30000 # Milliseconds
|
|
9
9
|
SNOW_JS_UTILS_FILEPATH = str(resources.files(utils).joinpath("js_utils.js"))
|
|
10
|
-
SNOW_SUPPORTED_RELEASES = ["
|
|
10
|
+
SNOW_SUPPORTED_RELEASES = ["washingtondc"]
|
|
11
11
|
|
|
12
12
|
# Path to the Menu navigation task configuration
|
|
13
13
|
ALL_MENU_PATH = str(resources.files(data_files).joinpath("task_configs/all_menu.json"))
|
|
14
|
+
|
|
15
|
+
# Path to the dashboard/report retrieval task configurations
|
|
16
|
+
DASHBOARD_RETRIEVAL_MINMAX_CONFIG_PATH = str(
|
|
17
|
+
resources.files(data_files).joinpath("task_configs/dashboard_retrieval_minmax_task.json")
|
|
18
|
+
)
|
|
19
|
+
DASHBOARD_RETRIEVAL_VALUE_CONFIG_PATH = str(
|
|
20
|
+
resources.files(data_files).joinpath("task_configs/dashboard_retrieval_value_task.json")
|
|
21
|
+
)
|
|
22
|
+
REPORT_RETRIEVAL_MINMAX_CONFIG_PATH = str(
|
|
23
|
+
resources.files(data_files).joinpath("task_configs/report_retrieval_minmax_task.json")
|
|
24
|
+
)
|
|
25
|
+
REPORT_RETRIEVAL_VALUE_CONFIG_PATH = str(
|
|
26
|
+
resources.files(data_files).joinpath("task_configs/report_retrieval_value_task.json")
|
|
27
|
+
)
|
|
28
|
+
|
|
14
29
|
# Path to knowledge base task configurations
|
|
15
30
|
KB_CONFIG_PATH = str(
|
|
16
31
|
resources.files(data_files).joinpath("task_configs/knowledge_base_configs.json")
|
|
@@ -119,6 +134,28 @@ WORKFLOWS = {
|
|
|
119
134
|
}
|
|
120
135
|
}
|
|
121
136
|
|
|
137
|
+
|
|
138
|
+
# Custom UI Themes
|
|
139
|
+
UI_THEMES_UPDATE_SET = {
|
|
140
|
+
"name": "WorkArena UI Themes",
|
|
141
|
+
"update_set": str(
|
|
142
|
+
resources.files(data_files).joinpath("setup_files/ui_themes/workarena_themes.xml")
|
|
143
|
+
),
|
|
144
|
+
"variants": [
|
|
145
|
+
"Astranova",
|
|
146
|
+
"Charlies",
|
|
147
|
+
"Great pasta",
|
|
148
|
+
"Mighty capital",
|
|
149
|
+
"Speedy tires",
|
|
150
|
+
"Skyward",
|
|
151
|
+
"Turbobots",
|
|
152
|
+
"Ultrashoes",
|
|
153
|
+
"Vitasphere",
|
|
154
|
+
"Workarena",
|
|
155
|
+
],
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
|
|
122
159
|
# Expected columns for list tasks; used in setup
|
|
123
160
|
EXPECTED_ASSET_LIST_COLUMNS_PATH = str(
|
|
124
161
|
resources.files(data_files).joinpath("setup_files/lists/expected_asset_list_columns.json")
|
|
@@ -163,3 +200,8 @@ EXPECTED_PROBLEM_FORM_FIELDS_PATH = str(
|
|
|
163
200
|
EXPECTED_USER_FORM_FIELDS_PATH = str(
|
|
164
201
|
resources.files(data_files).joinpath("setup_files/forms/expected_user_form_fields.json")
|
|
165
202
|
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# Report date filter patch flag
|
|
206
|
+
REPORT_PATCH_FLAG = "WORKARENA_DATE_FILTER_PATCH"
|
|
207
|
+
REPORT_DATE_FILTER = "2024-04-01"
|