browsergym-workarena 0.1.0rc7__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/.github/workflows/pypi.yml +4 -3
  2. browsergym_workarena-0.2.1/.github/workflows/unit_tests.yml +104 -0
  3. browsergym_workarena-0.2.1/.gitignore +3 -0
  4. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/PKG-INFO +15 -5
  5. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/README.md +10 -2
  6. browsergym_workarena-0.2.1/dev/environment.yaml +13 -0
  7. browsergym_workarena-0.2.1/dev/requirements.txt +9 -0
  8. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/pyproject.toml +28 -0
  9. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/requirements.txt +3 -1
  10. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/__init__.py +3 -2
  11. browsergym_workarena-0.2.1/src/browsergym/workarena/api/ui_themes.py +35 -0
  12. browsergym_workarena-0.2.1/src/browsergym/workarena/api/user.py +153 -0
  13. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/api/utils.py +1 -1
  14. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/config.py +43 -1
  15. browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/setup_files/ui_themes/workarena_themes.xml +2313 -0
  16. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/all_menu.json +94 -94
  17. browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -0
  18. browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -0
  19. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +7985 -7981
  20. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/impersonation_users.json +2 -2
  21. browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -0
  22. browsergym_workarena-0.2.1/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -0
  23. browsergym_workarena-0.2.1/src/browsergym/workarena/install.py +1014 -0
  24. browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/base.py +167 -0
  25. browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/dashboard.py +620 -0
  26. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/form.py +121 -85
  27. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/knowledge.py +30 -14
  28. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/list.py +121 -67
  29. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/navigation.py +18 -16
  30. browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +272 -0
  31. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/generate_forms.py +2 -2
  32. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/list.py +2 -2
  33. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/validate.py +2 -2
  34. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/service_catalog.py +106 -74
  35. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/utils/form.py +5 -3
  36. browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/utils/js_utils.js +177 -0
  37. browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/utils/string.py +15 -0
  38. browsergym_workarena-0.2.1/src/browsergym/workarena/tasks/utils/utils.py +20 -0
  39. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/utils.py +31 -2
  40. browsergym_workarena-0.2.1/tests/test_api.py +40 -0
  41. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_random_config_generation.py +26 -25
  42. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_task_from_config.py +2 -2
  43. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_task_general.py +2 -2
  44. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_task_setup.py +28 -13
  45. browsergym_workarena-0.2.1/tests/test_utils.py +32 -0
  46. browsergym_workarena-0.1.0rc7/src/browsergym/workarena/install.py +0 -549
  47. browsergym_workarena-0.1.0rc7/src/browsergym/workarena/tasks/base.py +0 -108
  48. browsergym_workarena-0.1.0rc7/src/browsergym/workarena/tasks/utils/js_utils.js +0 -56
  49. browsergym_workarena-0.1.0rc7/tests/test_utils.py +0 -30
  50. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/LICENSE +0 -0
  51. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/scripts/generate_knowledge_base.ipynb +0 -0
  52. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/api/__init__.py +0 -0
  53. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/api/requests.py +0 -0
  54. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_change_request_form_fields.json +0 -0
  55. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_hardware_form_fields.json +0 -0
  56. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +0 -0
  57. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_problem_form_fields.json +0 -0
  58. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_user_form_fields.json +0 -0
  59. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/knowledge/kb_autopublish_workflow.xml +0 -0
  60. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/knowledge/knowledge_base.json +0 -0
  61. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +0 -0
  62. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +0 -0
  63. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +0 -0
  64. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +0 -0
  65. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +0 -0
  66. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +0 -0
  67. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_change_request_task.json +0 -0
  68. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json +0 -0
  69. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_incident_task.json +0 -0
  70. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_problem_task.json +0 -0
  71. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/create_user_task.json +0 -0
  72. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_asset_list_task.json +0 -0
  73. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json +0 -0
  74. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_hardware_list_task.json +0 -0
  75. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_incident_list_task.json +0 -0
  76. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/filter_user_list_task.json +0 -0
  77. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/knowledge_base_configs.json +0 -0
  78. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_apple_mac_book_pro15_task.json +0 -0
  79. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_apple_watch_task.json +0 -0
  80. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_developer_laptop_task.json +0 -0
  81. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_development_laptop_pc_task.json +0 -0
  82. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_mini_task.json +0 -0
  83. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_pro_task.json +0 -0
  84. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_loaner_laptop_task.json +0 -0
  85. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_sales_laptop_task.json +0 -0
  86. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/order_standard_laptop_task.json +0 -0
  87. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_asset_list_task.json +0 -0
  88. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_change_request_list_task.json +0 -0
  89. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_hardware_list_task.json +0 -0
  90. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_incident_list_task.json +0 -0
  91. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task.json +0 -0
  92. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/data_files/task_configs/sort_user_list_task.json +0 -0
  93. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/instance.py +0 -0
  94. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/__init__.py +0 -0
  95. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/README.md +0 -0
  96. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/extract_all_menu_items.py +0 -0
  97. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/knowledge.py +0 -0
  98. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/navigation.py +0 -0
  99. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/scripts/service_catalog.py +0 -0
  100. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/utils/__init__.py +0 -0
  101. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/src/browsergym/workarena/tasks/utils/debug.py +0 -0
  102. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_snow_instance.py +0 -0
  103. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/test_validate.py +0 -0
  104. {browsergym_workarena-0.1.0rc7 → browsergym_workarena-0.2.1}/tests/utils.py +0 -0
@@ -1,4 +1,4 @@
1
- name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
1
+ name: Build and Publish
2
2
 
3
3
  on: [push, workflow_dispatch]
4
4
 
@@ -48,10 +48,11 @@ jobs:
48
48
  uses: pypa/gh-action-pypi-publish@release/v1
49
49
 
50
50
  github-release:
51
- name: Sign with Sigstore and upload them to GitHub Release
51
+ name: Sign packages with Sigstore and upload them to GitHub Release
52
52
  needs:
53
53
  - publish-to-pypi
54
54
  runs-on: ubuntu-latest
55
+
55
56
  permissions:
56
57
  contents: write # IMPORTANT: mandatory for making GitHub Releases
57
58
  id-token: write # IMPORTANT: mandatory for sigstore
@@ -64,7 +65,7 @@ jobs:
64
65
  path: dist/
65
66
 
66
67
  - name: Sign the dists with Sigstore
67
- uses: sigstore/gh-action-sigstore-python@v1.2.3
68
+ uses: sigstore/gh-action-sigstore-python@v2.1.1
68
69
  with:
69
70
  inputs: >-
70
71
  ./dist/*.tar.gz
@@ -0,0 +1,104 @@
1
+ name: Unit tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+
9
+ jobs:
10
+
11
+ code-format:
12
+ runs-on: ubuntu-latest
13
+ defaults:
14
+ run:
15
+ shell: bash -l {0}
16
+ steps:
17
+
18
+ - name: Checkout Repository
19
+ uses: actions/checkout@v4
20
+
21
+ - name: Set up Python
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: '3.10'
25
+ cache: 'pip' # caching pip dependencies
26
+
27
+ - name: Pip install
28
+ run: pip install black[jupyter]==24.2.0 blacken-docs
29
+
30
+ - name: Pip list
31
+ run: pip list
32
+
33
+ - name: Code Formatting
34
+ run: black . --check
35
+
36
+ browsergym-workarena-fast:
37
+ runs-on: ubuntu-latest
38
+
39
+ defaults:
40
+ run:
41
+ shell: bash -l {0}
42
+
43
+ steps:
44
+
45
+ - name: Checkout Repository
46
+ uses: actions/checkout@v4
47
+
48
+ - name: Set up Python
49
+ uses: actions/setup-python@v5
50
+ with:
51
+ python-version: '3.10'
52
+ cache: 'pip' # caching pip dependencies
53
+
54
+ - name: Pip install
55
+ working-directory: ./dev
56
+ run: pip install -r requirements.txt
57
+
58
+ - name: Pip list
59
+ run: pip list
60
+
61
+ - name: Install Playwright
62
+ run: playwright install --with-deps
63
+
64
+ - name: Run non-slow browsergym-workarena Unit Tests
65
+ env:
66
+ SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
67
+ SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
68
+ SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
69
+ run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests
70
+
71
+ browsergym-workarena-slow:
72
+ runs-on: ubuntu-latest
73
+
74
+ defaults:
75
+ run:
76
+ shell: bash -l {0}
77
+
78
+ steps:
79
+
80
+ - name: Checkout Repository
81
+ uses: actions/checkout@v4
82
+
83
+ - name: Set up Python
84
+ uses: actions/setup-python@v5
85
+ with:
86
+ python-version: '3.10'
87
+ cache: 'pip' # caching pip dependencies
88
+
89
+ - name: Pip install
90
+ working-directory: ./dev
91
+ run: pip install -r requirements.txt
92
+
93
+ - name: Pip list
94
+ run: pip list
95
+
96
+ - name: Install Playwright
97
+ run: playwright install --with-deps
98
+
99
+ - name: Run slow browsergym-workarena Unit Tests
100
+ env:
101
+ SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
102
+ SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
103
+ SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
104
+ run: pytest -n 5 --durations=10 -m 'slow and not pricy' --slowmo 1000 -v tests
@@ -0,0 +1,3 @@
1
+ .DS_store
2
+ __pycache__/
3
+ *.py[cod]
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: browsergym-workarena
3
- Version: 0.1.0rc7
3
+ Version: 0.2.1
4
4
  Summary: WorkArena benchmark for BrowserGym
5
5
  Project-URL: homepage, https://github.com/ServiceNow/WorkArena
6
- Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme
6
+ Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
7
7
  License: Apache-2.0
8
8
  License-File: LICENSE
9
9
  Classifier: Development Status :: 2 - Pre-Alpha
@@ -13,11 +13,13 @@ Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
15
  Requires-Python: >3.7
16
- Requires-Dist: browsergym-core==0.1.0rc7
16
+ Requires-Dist: browsergym-core>=0.2
17
17
  Requires-Dist: english-words>=2.0.1
18
+ Requires-Dist: faker>=24.11.0
18
19
  Requires-Dist: numpy>=1.14
19
20
  Requires-Dist: requests>=2.31
20
21
  Requires-Dist: tenacity>=8.2.3
22
+ Requires-Dist: tqdm>=4.66.2
21
23
  Description-Content-Type: text/markdown
22
24
 
23
25
  # WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?
@@ -32,10 +34,12 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
32
34
 
33
35
  https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
34
36
 
37
+ ## ⚠️ Pre-Release warning ⚠️
38
+ Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
35
39
 
36
40
  ## Benchmark Contents
37
41
 
38
- At the moment, WorkArena includes `23,150` task instances drawn from `29` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
42
+ At the moment, WorkArena includes `18,050` task instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
39
43
 
40
44
  ### Knowledge Bases
41
45
 
@@ -71,6 +75,12 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
71
75
 
72
76
  https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
73
77
 
78
+ ### Dashboards
79
+
80
+ **Goal:** The agent must extract information from a dashboard.
81
+
82
+
83
+
74
84
  ## Getting Started
75
85
 
76
86
  To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
@@ -78,7 +88,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
78
88
  ### a) Create a ServiceNow Developer Instance
79
89
 
80
90
  1. Go to https://developer.servicenow.com/ and create an account.
81
- 2. Click on `Request an instance` and select the `Utah` release (initializing the instance will take a few minutes)
91
+ 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
82
92
  3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
83
93
  4. You should now see your URL and credentials. Based on this information, set the following environment variables:
84
94
  * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
@@ -10,10 +10,12 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
10
10
 
11
11
  https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
12
12
 
13
+ ## ⚠️ Pre-Release warning ⚠️
14
+ Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
13
15
 
14
16
  ## Benchmark Contents
15
17
 
16
- At the moment, WorkArena includes `23,150` task instances drawn from `29` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
18
+ At the moment, WorkArena includes `18,050` task instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
17
19
 
18
20
  ### Knowledge Bases
19
21
 
@@ -49,6 +51,12 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
49
51
 
50
52
  https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
51
53
 
54
+ ### Dashboards
55
+
56
+ **Goal:** The agent must extract information from a dashboard.
57
+
58
+
59
+
52
60
  ## Getting Started
53
61
 
54
62
  To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
@@ -56,7 +64,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
56
64
  ### a) Create a ServiceNow Developer Instance
57
65
 
58
66
  1. Go to https://developer.servicenow.com/ and create an account.
59
- 2. Click on `Request an instance` and select the `Utah` release (initializing the instance will take a few minutes)
67
+ 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
60
68
  3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
61
69
  4. You should now see your URL and credentials. Based on this information, set the following environment variables:
62
70
  * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
@@ -0,0 +1,13 @@
1
+ name: workarena-dev
2
+
3
+ channels:
4
+ - huggingface
5
+ - conda-forge
6
+ - defaults
7
+
8
+ dependencies:
9
+ - python>=3.10
10
+ - pip
11
+
12
+ - pip:
13
+ - -r requirements.txt
@@ -0,0 +1,9 @@
1
+ black[jupyter]==24.2.0
2
+ blacken-docs
3
+ pre-commit
4
+ pytest==7.3.2
5
+ pytest-xdist
6
+ pytest-playwright
7
+ tenacity
8
+ browsergym-core
9
+ -e .. # local package
@@ -11,6 +11,7 @@ authors = [
11
11
  {name = "Maxime Gasse"},
12
12
  {name = "Alex Lacoste"},
13
13
  {name = "Manuel Del Verme"},
14
+ {name = "Megh Thakkar"},
14
15
  ]
15
16
  readme = "README.md"
16
17
  requires-python = ">3.7"
@@ -39,3 +40,30 @@ files = ["requirements.txt"]
39
40
 
40
41
  [tool.hatch.build.targets.wheel]
41
42
  packages = ["src/browsergym"]
43
+
44
+ [tool.black]
45
+ line-length = 100
46
+ include = '\.pyi?$'
47
+ exclude = '''
48
+ /(
49
+ \.eggs
50
+ | \.git
51
+ | \.hg
52
+ | \.mypy_cache
53
+ | \.nox
54
+ | \.tox
55
+ | \.venv
56
+ | _build
57
+ | buck-out
58
+ | build
59
+ | dist
60
+ )/
61
+ '''
62
+
63
+ [tool.pytest.ini_options]
64
+ filterwarnings = [
65
+ 'ignore::UserWarning:gymnasium.*:', # too many "The obs is not within the observation space." warnings.
66
+ ]
67
+ markers = [
68
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
69
+ ]
@@ -1,5 +1,7 @@
1
- browsergym-core==0.1.0rc7
1
+ browsergym-core>=0.2
2
2
  english-words>=2.0.1
3
+ faker>=24.11.0
3
4
  numpy>=1.14
4
5
  requests>=2.31
5
6
  tenacity>=8.2.3 # only used in cheat() -> move to tests?
7
+ tqdm>=4.66.2
@@ -1,7 +1,8 @@
1
- __version__ = "0.1.0rc7"
1
+ __version__ = "0.2.1"
2
2
 
3
3
  from browsergym.core.registration import register_task
4
4
 
5
+ from .tasks.dashboard import __TASKS__ as DASHBOARD_TASKS
5
6
  from .tasks.form import __TASKS__ as FORM_TASKS
6
7
  from .tasks.knowledge import __TASKS__ as KB_TASKS
7
8
  from .tasks.list import __TASKS__ as LIST_TASKS
@@ -9,6 +10,7 @@ from .tasks.navigation import __TASKS__ as NAVIGATION_TASKS
9
10
  from .tasks.service_catalog import __TASKS__ as SERVICE_CATALOG_TASKS
10
11
 
11
12
  ALL_WORKARENA_TASKS = [
13
+ *DASHBOARD_TASKS,
12
14
  *FORM_TASKS,
13
15
  *KB_TASKS,
14
16
  *LIST_TASKS,
@@ -21,5 +23,4 @@ for task in ALL_WORKARENA_TASKS:
21
23
  register_task(
22
24
  task.get_task_id(),
23
25
  task,
24
- kwargs={"viewport": {"width": 1280, "height": 720}, "timeout": 10000},
25
26
  )
@@ -0,0 +1,35 @@
1
+ """
2
+ Utility functions for UI themes
3
+
4
+ """
5
+
6
+ from .utils import table_api_call
7
+
8
+
9
+ def get_workarena_theme_variants(instance):
10
+ """
11
+ Get the list of available WorkArena UI themes
12
+
13
+ Parameters:
14
+ -----------
15
+ instance: SNowInstance
16
+ The ServiceNow instance to get the UI themes from
17
+
18
+ Returns:
19
+ --------
20
+ list[dict]
21
+ The list of available WorkArena UI themes and their information
22
+
23
+ """
24
+ themes = table_api_call(
25
+ instance=instance,
26
+ table="m2m_theme_style",
27
+ params={
28
+ "sysparm_query": "style.type=variant",
29
+ "sysparm_fields": "theme.name,theme.sys_id,style.name,style.sys_id",
30
+ "sysparm_display_value": True,
31
+ },
32
+ method="GET",
33
+ )["result"]
34
+ themes = [t for t in themes if t["theme.name"] == "WorkArena"]
35
+ return themes
@@ -0,0 +1,153 @@
1
+ import random
2
+ from faker import Faker
3
+ import time
4
+
5
+ fake = Faker()
6
+
7
+ from ..instance import SNowInstance
8
+ from .ui_themes import get_workarena_theme_variants
9
+ from .utils import table_api_call
10
+
11
+
12
+ def create_user(
13
+ instance: SNowInstance,
14
+ first_name: str = None,
15
+ last_name: str = None,
16
+ user_name: str = None,
17
+ admin=True,
18
+ ) -> list[str]:
19
+ """
20
+ Create a user with a random username and password with an admin role
21
+
22
+ Parameters:
23
+ -----------
24
+ first_name: str
25
+ The first name of the user, defaults to a random first name
26
+ last_name: str
27
+ The last name of the user, defaults to a random last name
28
+ user_name: str
29
+ The user name of the user, defaults to first_name.last_name
30
+ admin: bool
31
+ Whether to give the user admin permissions
32
+
33
+ Returns:
34
+ --------
35
+ username, password, sys_id
36
+
37
+ """
38
+ user_idx = str(random.randint(1000, 9999))
39
+ user_password = "aStrongPassword!"
40
+ first_name = fake.first_name() if not first_name else first_name
41
+ last_name = fake.last_name() if not last_name else last_name
42
+
43
+ # Create user
44
+ user_data = {
45
+ "user_name": f"{first_name}.{last_name}.{user_idx}" if not user_name else user_name,
46
+ "first_name": first_name,
47
+ "last_name": last_name,
48
+ "email": f"{first_name}.{last_name}.{user_idx}@workarena.com".lower(),
49
+ "user_password": user_password,
50
+ "active": True,
51
+ }
52
+ user_params = {"sysparm_input_display_value": True}
53
+ user_response = table_api_call(
54
+ instance=instance, table="sys_user", json=user_data, params=user_params, method="POST"
55
+ )["result"]
56
+ user_name = user_response["user_name"]
57
+ user_sys_id = user_response["sys_id"]
58
+
59
+ # Get admin role sys_id
60
+ if admin:
61
+ role_sys_id = table_api_call(
62
+ instance=instance,
63
+ table="sys_user_role",
64
+ params={"sysparm_query": "name=admin", "sysparm_fields": "sys_id"},
65
+ method="GET",
66
+ )["result"][0]["sys_id"]
67
+
68
+ # Give admin permissions
69
+ association_data = {"user": user_sys_id, "role": role_sys_id}
70
+ table_api_call(
71
+ instance=instance, table="sys_user_has_role", json=association_data, method="POST"
72
+ )
73
+
74
+ # Randomly pick a UI theme and set it for the user
75
+ themes = get_workarena_theme_variants(instance)
76
+ theme = random.choice(themes)
77
+ set_user_preference(
78
+ instance, "glide.ui.polaris.theme.variant", theme["style.sys_id"], user=user_sys_id
79
+ )
80
+
81
+ return user_name, user_password, user_sys_id
82
+
83
+
84
+ def set_user_preference(instance: SNowInstance, key: str, value: str, user=None) -> dict:
85
+ """
86
+ Set a user preference in the ServiceNow instance
87
+
88
+ Parameters:
89
+ -----------
90
+ key: str
91
+ The name of the preference
92
+ value: str
93
+ The value of the preference
94
+ user: str
95
+ The sys_id of the user. If None, the preference will be set globally.
96
+
97
+ Returns:
98
+ --------
99
+ dict
100
+ The preference that was set
101
+
102
+ """
103
+ if user is None:
104
+ # make it global
105
+ user = ""
106
+ system = True
107
+ else:
108
+ system = False
109
+
110
+ # Try to get the preference's sys_id
111
+ preference = table_api_call(
112
+ instance=instance,
113
+ table="sys_user_preference",
114
+ params={"sysparm_query": f"name={key},user={user}", "sysparm_fields": "sys_id"},
115
+ )["result"]
116
+
117
+ if not preference:
118
+ # ... The preference key doesn't exist, create it
119
+ pref_sysid = ""
120
+ method = "POST"
121
+ else:
122
+ # ... The preference key exists, update it
123
+ pref_sysid = "/" + preference[0]["sys_id"]
124
+ method = "PUT"
125
+
126
+ property = table_api_call(
127
+ instance=instance,
128
+ table=f"sys_user_preference{pref_sysid}",
129
+ method=method,
130
+ json={
131
+ "name": key,
132
+ "value": value,
133
+ "user": user,
134
+ "system": system,
135
+ "description": "Updated by WorkArena",
136
+ },
137
+ )["result"]
138
+
139
+ # Verify that the property was updated
140
+ property["user"] = (
141
+ property["user"].get("value") if isinstance(property["user"], dict) else property["user"]
142
+ )
143
+ assert (
144
+ property["value"] == value
145
+ ), f"Error setting system property {key}, incorrect value {property['value']}, while expecting {value}."
146
+ assert (
147
+ property["user"] == user
148
+ ), f"Error setting system property {key}, incorrect user {property['user']}, while expecting {user}."
149
+ assert (
150
+ property["system"] == str(system).lower()
151
+ ), f"Error setting {key}, incorrect system {property['system']}, while expecting {system}."
152
+
153
+ return property
@@ -86,7 +86,7 @@ def table_column_info(instance: SNowInstance, table: str) -> dict:
86
86
 
87
87
  # Clean column value choices
88
88
  for info in meta_info.values():
89
- if "choices" in info:
89
+ if info.get("choices", None):
90
90
  info["choices"] = {c["value"]: c["label"] for c in info["choices"]}
91
91
 
92
92
  # Query the sys_dictionnary table to find more info (e.g., is this column dependent on another)
@@ -7,10 +7,25 @@ from ..workarena.tasks import utils
7
7
  SNOW_DATA_LOOKBACK_MINUTES = 5
8
8
  SNOW_BROWSER_TIMEOUT = 30000 # Milliseconds
9
9
  SNOW_JS_UTILS_FILEPATH = str(resources.files(utils).joinpath("js_utils.js"))
10
- SNOW_SUPPORTED_RELEASES = ["utah"]
10
+ SNOW_SUPPORTED_RELEASES = ["washingtondc"]
11
11
 
12
12
  # Path to the Menu navigation task configuration
13
13
  ALL_MENU_PATH = str(resources.files(data_files).joinpath("task_configs/all_menu.json"))
14
+
15
+ # Path to the dashboard/report retrieval task configurations
16
+ DASHBOARD_RETRIEVAL_MINMAX_CONFIG_PATH = str(
17
+ resources.files(data_files).joinpath("task_configs/dashboard_retrieval_minmax_task.json")
18
+ )
19
+ DASHBOARD_RETRIEVAL_VALUE_CONFIG_PATH = str(
20
+ resources.files(data_files).joinpath("task_configs/dashboard_retrieval_value_task.json")
21
+ )
22
+ REPORT_RETRIEVAL_MINMAX_CONFIG_PATH = str(
23
+ resources.files(data_files).joinpath("task_configs/report_retrieval_minmax_task.json")
24
+ )
25
+ REPORT_RETRIEVAL_VALUE_CONFIG_PATH = str(
26
+ resources.files(data_files).joinpath("task_configs/report_retrieval_value_task.json")
27
+ )
28
+
14
29
  # Path to knowledge base task configurations
15
30
  KB_CONFIG_PATH = str(
16
31
  resources.files(data_files).joinpath("task_configs/knowledge_base_configs.json")
@@ -119,6 +134,28 @@ WORKFLOWS = {
119
134
  }
120
135
  }
121
136
 
137
+
138
+ # Custom UI Themes
139
+ UI_THEMES_UPDATE_SET = {
140
+ "name": "WorkArena UI Themes",
141
+ "update_set": str(
142
+ resources.files(data_files).joinpath("setup_files/ui_themes/workarena_themes.xml")
143
+ ),
144
+ "variants": [
145
+ "Astranova",
146
+ "Charlies",
147
+ "Great pasta",
148
+ "Mighty capital",
149
+ "Speedy tires",
150
+ "Skyward",
151
+ "Turbobots",
152
+ "Ultrashoes",
153
+ "Vitasphere",
154
+ "Workarena",
155
+ ],
156
+ }
157
+
158
+
122
159
  # Expected columns for list tasks; used in setup
123
160
  EXPECTED_ASSET_LIST_COLUMNS_PATH = str(
124
161
  resources.files(data_files).joinpath("setup_files/lists/expected_asset_list_columns.json")
@@ -163,3 +200,8 @@ EXPECTED_PROBLEM_FORM_FIELDS_PATH = str(
163
200
  EXPECTED_USER_FORM_FIELDS_PATH = str(
164
201
  resources.files(data_files).joinpath("setup_files/forms/expected_user_form_fields.json")
165
202
  )
203
+
204
+
205
+ # Report date filter patch flag
206
+ REPORT_PATCH_FLAG = "WORKARENA_DATE_FILTER_PATCH"
207
+ REPORT_DATE_FILTER = "2024-04-01"