browsergym-workarena 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/.github/workflows/pypi.yml +3 -2
  2. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/.github/workflows/unit_tests.yml +26 -24
  3. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/PKG-INFO +27 -20
  4. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/README.md +23 -16
  5. browsergym_workarena-0.3.0/dev/environment.yaml +13 -0
  6. browsergym_workarena-0.3.0/dev/requirements.txt +9 -0
  7. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/pyproject.toml +29 -0
  8. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/requirements.txt +2 -2
  9. browsergym_workarena-0.3.0/scripts/extract_finetuning_traces.py +131 -0
  10. browsergym_workarena-0.3.0/scripts/generate_knowledge_base.ipynb +1499 -0
  11. browsergym_workarena-0.3.0/scripts/make_human_eval_curriculum.py +54 -0
  12. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/__init__.py +13 -1
  13. browsergym_workarena-0.3.0/src/browsergym/workarena/api/category.py +74 -0
  14. browsergym_workarena-0.3.0/src/browsergym/workarena/api/change_request.py +87 -0
  15. browsergym_workarena-0.3.0/src/browsergym/workarena/api/computer_asset.py +90 -0
  16. browsergym_workarena-0.3.0/src/browsergym/workarena/api/cost_center.py +19 -0
  17. browsergym_workarena-0.3.0/src/browsergym/workarena/api/expense_line.py +89 -0
  18. browsergym_workarena-0.3.0/src/browsergym/workarena/api/incident.py +45 -0
  19. browsergym_workarena-0.3.0/src/browsergym/workarena/api/knowledge.py +29 -0
  20. browsergym_workarena-0.3.0/src/browsergym/workarena/api/problem.py +90 -0
  21. browsergym_workarena-0.3.0/src/browsergym/workarena/api/report.py +183 -0
  22. browsergym_workarena-0.3.0/src/browsergym/workarena/api/requested_items.py +63 -0
  23. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/user.py +11 -8
  24. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/utils.py +47 -3
  25. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/config.py +21 -1
  26. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +1 -1
  27. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +1 -0
  28. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/knowledge/protocols.json +46 -0
  29. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/knowledge/test.html +1 -0
  30. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +12 -0
  31. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +12 -0
  32. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +12 -0
  33. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +12 -0
  34. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +12 -0
  35. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +12 -0
  36. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +12 -0
  37. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +12 -0
  38. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +12 -0
  39. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/all_menu.json +95 -95
  40. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -0
  41. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -0
  42. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +7986 -7982
  43. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/impersonation_users.json +3 -3
  44. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -1
  45. browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -0
  46. browsergym_workarena-0.3.0/src/browsergym/workarena/human_eval/console.js +176 -0
  47. browsergym_workarena-0.3.0/src/browsergym/workarena/human_eval/tool.py +366 -0
  48. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/install.py +81 -20
  49. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/base.py +55 -20
  50. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/comp_building_block.py +4 -0
  51. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/__init__.py +76 -0
  52. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/base.py +364 -0
  53. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_base.py +1366 -0
  54. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_catalog.py +1127 -0
  55. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +2047 -0
  56. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_incident.py +403 -0
  57. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +278 -0
  58. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_problem.py +336 -0
  59. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +235 -0
  60. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_filter.py +1600 -0
  61. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_request_item.py +1315 -0
  62. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +693 -0
  63. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/delete_record.py +341 -0
  64. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/edit_knowledge_base.py +457 -0
  65. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/expense_management.py +598 -0
  66. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/filter_and_do.py +139 -0
  67. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/find_and_order_item.py +345 -0
  68. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +1417 -0
  69. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +499 -0
  70. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/maximize_investment_return.py +1763 -0
  71. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/navigate_and_do.py +1151 -0
  72. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +2100 -0
  73. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/offboard_user.py +207 -0
  74. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/onboard_user.py +226 -0
  75. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/update_task.py +145 -0
  76. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/utils/curriculum.py +215 -0
  77. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +151 -0
  78. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/utils/knapsack.py +192 -0
  79. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/warranty_check.py +227 -0
  80. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/work_assignment.py +804 -0
  81. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/workload_balancing.py +396 -0
  82. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/dashboard.py +188 -8
  83. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/form.py +1593 -0
  84. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/knowledge.py +359 -0
  85. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/list.py +519 -102
  86. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/mark_duplicate_problem.py +171 -0
  87. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/navigation.py +55 -13
  88. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/extract_all_menu_items.py +9 -2
  89. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +6 -5
  90. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/service_catalog.py +2 -1
  91. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/validate.py +8 -2
  92. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/send_chat_message.py +90 -0
  93. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/service_catalog.py +94 -26
  94. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/form.py +1 -4
  95. browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/utils/private_tasks.py +63 -0
  96. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/utils.py +13 -0
  97. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_api.py +1 -0
  98. browsergym_workarena-0.3.0/tests/test_compositional_utils.py +92 -0
  99. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_random_config_generation.py +24 -23
  100. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_task_from_config.py +37 -3
  101. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_task_general.py +4 -9
  102. browsergym_workarena-0.2.0/scripts/generate_knowledge_base.ipynb +0 -1374
  103. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +0 -34
  104. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +0 -48
  105. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +0 -53
  106. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +0 -28
  107. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +0 -29
  108. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +0 -59
  109. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +0 -1
  110. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +0 -1
  111. browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +0 -1
  112. browsergym_workarena-0.2.0/src/browsergym/workarena/tasks/form.py +0 -801
  113. browsergym_workarena-0.2.0/src/browsergym/workarena/tasks/knowledge.py +0 -168
  114. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/.gitignore +0 -0
  115. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/LICENSE +0 -0
  116. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/__init__.py +0 -0
  117. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/requests.py +0 -0
  118. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/ui_themes.py +0 -0
  119. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_change_request_form_fields.json +0 -0
  120. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_hardware_form_fields.json +0 -0
  121. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_problem_form_fields.json +0 -0
  122. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_user_form_fields.json +0 -0
  123. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/knowledge/kb_autopublish_workflow.xml +0 -0
  124. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/knowledge/knowledge_base.json +0 -0
  125. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/ui_themes/workarena_themes.xml +0 -0
  126. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_change_request_task.json +0 -0
  127. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json +0 -0
  128. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_incident_task.json +0 -0
  129. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_problem_task.json +0 -0
  130. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_user_task.json +0 -0
  131. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_asset_list_task.json +0 -0
  132. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json +0 -0
  133. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_hardware_list_task.json +0 -0
  134. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_incident_list_task.json +0 -0
  135. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_user_list_task.json +0 -0
  136. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/knowledge_base_configs.json +0 -0
  137. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_apple_mac_book_pro15_task.json +0 -0
  138. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_apple_watch_task.json +0 -0
  139. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_developer_laptop_task.json +0 -0
  140. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_development_laptop_pc_task.json +0 -0
  141. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_ipad_mini_task.json +0 -0
  142. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_ipad_pro_task.json +0 -0
  143. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_loaner_laptop_task.json +0 -0
  144. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_sales_laptop_task.json +0 -0
  145. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_standard_laptop_task.json +0 -0
  146. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_asset_list_task.json +0 -0
  147. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_change_request_list_task.json +0 -0
  148. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_hardware_list_task.json +0 -0
  149. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_incident_list_task.json +0 -0
  150. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task.json +0 -0
  151. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_user_list_task.json +0 -0
  152. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/instance.py +0 -0
  153. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/__init__.py +0 -0
  154. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/README.md +0 -0
  155. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/generate_forms.py +0 -0
  156. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/knowledge.py +0 -0
  157. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/list.py +0 -0
  158. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/navigation.py +0 -0
  159. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/__init__.py +0 -0
  160. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/debug.py +0 -0
  161. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/js_utils.js +0 -0
  162. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/string.py +0 -0
  163. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/utils.py +0 -0
  164. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_snow_instance.py +0 -0
  165. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_task_setup.py +0 -0
  166. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_utils.py +0 -0
  167. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_validate.py +0 -0
  168. {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/utils.py +0 -0
@@ -48,10 +48,11 @@ jobs:
48
48
  uses: pypa/gh-action-pypi-publish@release/v1
49
49
 
50
50
  github-release:
51
- name: Sign with Sigstore and upload them to GitHub Release
51
+ name: Sign packages with Sigstore and upload them to GitHub Release
52
52
  needs:
53
53
  - publish-to-pypi
54
54
  runs-on: ubuntu-latest
55
+
55
56
  permissions:
56
57
  contents: write # IMPORTANT: mandatory for making GitHub Releases
57
58
  id-token: write # IMPORTANT: mandatory for sigstore
@@ -64,7 +65,7 @@ jobs:
64
65
  path: dist/
65
66
 
66
67
  - name: Sign the dists with Sigstore
67
- uses: sigstore/gh-action-sigstore-python@v1.2.3
68
+ uses: sigstore/gh-action-sigstore-python@v2.1.1
68
69
  with:
69
70
  inputs: >-
70
71
  ./dist/*.tar.gz
@@ -34,38 +34,39 @@ jobs:
34
34
  run: black . --check
35
35
 
36
36
  browsergym-workarena-fast:
37
- runs-on: ubuntu-latest
37
+ runs-on: ubuntu-latest
38
38
 
39
- defaults:
40
- run:
41
- shell: bash -l {0}
39
+ defaults:
40
+ run:
41
+ shell: bash -l {0}
42
42
 
43
- steps:
43
+ steps:
44
44
 
45
- - name: Checkout Repository
46
- uses: actions/checkout@v4
45
+ - name: Checkout Repository
46
+ uses: actions/checkout@v4
47
47
 
48
- - name: Set up Python
49
- uses: actions/setup-python@v5
50
- with:
51
- python-version: '3.10'
52
- cache: 'pip' # caching pip dependencies
48
+ - name: Set up Python
49
+ uses: actions/setup-python@v5
50
+ with:
51
+ python-version: '3.10'
52
+ cache: 'pip' # caching pip dependencies
53
53
 
54
- - name: Pip install
55
- run: pip install -r requirements.txt
54
+ - name: Pip install
55
+ working-directory: ./dev
56
+ run: pip install -r requirements.txt
56
57
 
57
- - name: Pip list
58
- run: pip list
58
+ - name: Pip list
59
+ run: pip list
59
60
 
60
- - name: Install Playwright
61
- run: playwright install --with-deps
61
+ - name: Install Playwright
62
+ run: playwright install --with-deps
62
63
 
63
- - name: Run non-slow browsergym-workarena Unit Tests
64
- env:
65
- SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
66
- SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
67
- SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
68
- run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests
64
+ - name: Run non-slow browsergym-workarena Unit Tests
65
+ env:
66
+ SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
67
+ SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
68
+ SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
69
+ run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests
69
70
 
70
71
  browsergym-workarena-slow:
71
72
  runs-on: ubuntu-latest
@@ -86,6 +87,7 @@ jobs:
86
87
  cache: 'pip' # caching pip dependencies
87
88
 
88
89
  - name: Pip install
90
+ working-directory: ./dev
89
91
  run: pip install -r requirements.txt
90
92
 
91
93
  - name: Pip list
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: browsergym-workarena
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: WorkArena benchmark for BrowserGym
5
5
  Project-URL: homepage, https://github.com/ServiceNow/WorkArena
6
- Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme
6
+ Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
7
7
  License: Apache-2.0
8
8
  License-File: LICENSE
9
9
  Classifier: Development Status :: 2 - Pre-Alpha
@@ -13,9 +13,9 @@ Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
15
  Requires-Python: >3.7
16
- Requires-Dist: browsergym-core==0.2.0
16
+ Requires-Dist: browsergym-core>=0.2
17
17
  Requires-Dist: english-words>=2.0.1
18
- Requires-Dist: faker>=24.11.0
18
+ Requires-Dist: faker>=24.8.0
19
19
  Requires-Dist: numpy>=1.14
20
20
  Requires-Dist: requests>=2.31
21
21
  Requires-Dist: tenacity>=8.2.3
@@ -34,12 +34,9 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
34
34
 
35
35
  https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
36
36
 
37
- ## ⚠️ Pre-Release warning ⚠️
38
- Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena v0.1.0 with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
39
-
40
37
  ## Benchmark Contents
41
38
 
42
- At the moment, WorkArena includes `18,050` task instances drawn from `29` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
39
+ At the moment, WorkArena includes `19,912` unique instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
43
40
 
44
41
  ### Knowledge Bases
45
42
 
@@ -75,6 +72,15 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
75
72
 
76
73
  https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
77
74
 
75
+ ### Dashboards
76
+
77
+ **Goal:** The agent must answer a question that requires reading charts and (optionally) performing simple reasoning over them.
78
+
79
+ *Note: For demonstration purposes, a human is controlling the cursor since this is a pure retrieval task*
80
+
81
+ https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
82
+
83
+
78
84
  ## Getting Started
79
85
 
80
86
  To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
@@ -82,7 +88,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
82
88
  ### a) Create a ServiceNow Developer Instance
83
89
 
84
90
  1. Go to https://developer.servicenow.com/ and create an account.
85
- 2. Click on `Request an instance` and select the `Utah` release (initializing the instance will take a few minutes)
91
+ 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
86
92
  3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
87
93
  4. You should now see your URL and credentials. Based on this information, set the following environment variables:
88
94
  * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
@@ -116,6 +122,8 @@ Your installation is now complete! 🎉
116
122
 
117
123
  Run this code to see WorkArena in action.
118
124
 
125
+ Note: the following example executes WorkArena's oracle (cheat) function to solve each task. To evaluate an agent, calls to `env.step()` must be used instead.
126
+
119
127
  ```python
120
128
  import random
121
129
 
@@ -130,28 +138,27 @@ for task in ALL_WORKARENA_TASKS:
130
138
 
131
139
  # Instantiate a new environment
132
140
  env = BrowserEnv(task_entrypoint=task,
133
- headless=False,
134
- slow_mo=1000)
141
+ headless=False)
135
142
  env.reset()
136
143
 
137
144
  # Cheat functions use Playwright to automatically solve the task
138
145
  env.chat.add_message(role="assistant", msg="On it. Please wait...")
139
- env.task.cheat(env.page, env.chat.messages)
146
+ cheat_messages = []
147
+ env.task.cheat(env.page, cheat_messages)
148
+
149
+ # Send cheat messages to chat
150
+ for cheat_msg in cheat_messages:
151
+ env.chat.add_message(role=cheat_msg["role"], msg=cheat_msg["message"])
140
152
 
141
153
  # Post solution to chat
142
- if "KnowledgeBaseSearchTask" in str(task):
143
- answer = env.chat.messages[-1]["message"]
144
- env.chat.add_message(role="assistant", msg=f"The answer is:")
145
- env.chat.add_message(role="assistant", msg=answer)
146
- else:
147
- env.chat.add_message(role="assistant", msg="I'm done!")
154
+ env.chat.add_message(role="assistant", msg="I'm done!")
148
155
 
149
156
  # Validate the solution
150
- reward, stop, info, message = env.task.validate(env.page, env.chat.messages)
157
+ reward, stop, message, info = env.task.validate(env.page, cheat_messages)
151
158
  if reward == 1:
152
159
  env.chat.add_message(role="user", msg="Yes, that works. Thanks!")
153
160
  else:
154
- env.chat.add_message(role="user", msg=f"No, that doesn't work. {message.get('message', '')}")
161
+ env.chat.add_message(role="user", msg=f"No, that doesn't work. {info.get('message', '')}")
155
162
 
156
163
  sleep(3)
157
164
  env.close()
@@ -10,12 +10,9 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
10
10
 
11
11
  https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
12
12
 
13
- ## ⚠️ Pre-Release warning ⚠️
14
- Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena v0.1.0 with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
15
-
16
13
  ## Benchmark Contents
17
14
 
18
- At the moment, WorkArena includes `18,050` task instances drawn from `29` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
15
+ At the moment, WorkArena includes `19,912` unique instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
19
16
 
20
17
  ### Knowledge Bases
21
18
 
@@ -51,6 +48,15 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
51
48
 
52
49
  https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
53
50
 
51
+ ### Dashboards
52
+
53
+ **Goal:** The agent must answer a question that requires reading charts and (optionally) performing simple reasoning over them.
54
+
55
+ *Note: For demonstration purposes, a human is controlling the cursor since this is a pure retrieval task*
56
+
57
+ https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
58
+
59
+
54
60
  ## Getting Started
55
61
 
56
62
  To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
@@ -58,7 +64,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
58
64
  ### a) Create a ServiceNow Developer Instance
59
65
 
60
66
  1. Go to https://developer.servicenow.com/ and create an account.
61
- 2. Click on `Request an instance` and select the `Utah` release (initializing the instance will take a few minutes)
67
+ 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
62
68
  3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
63
69
  4. You should now see your URL and credentials. Based on this information, set the following environment variables:
64
70
  * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
@@ -92,6 +98,8 @@ Your installation is now complete! 🎉
92
98
 
93
99
  Run this code to see WorkArena in action.
94
100
 
101
+ Note: the following example executes WorkArena's oracle (cheat) function to solve each task. To evaluate an agent, calls to `env.step()` must be used instead.
102
+
95
103
  ```python
96
104
  import random
97
105
 
@@ -106,28 +114,27 @@ for task in ALL_WORKARENA_TASKS:
106
114
 
107
115
  # Instantiate a new environment
108
116
  env = BrowserEnv(task_entrypoint=task,
109
- headless=False,
110
- slow_mo=1000)
117
+ headless=False)
111
118
  env.reset()
112
119
 
113
120
  # Cheat functions use Playwright to automatically solve the task
114
121
  env.chat.add_message(role="assistant", msg="On it. Please wait...")
115
- env.task.cheat(env.page, env.chat.messages)
122
+ cheat_messages = []
123
+ env.task.cheat(env.page, cheat_messages)
124
+
125
+ # Send cheat messages to chat
126
+ for cheat_msg in cheat_messages:
127
+ env.chat.add_message(role=cheat_msg["role"], msg=cheat_msg["message"])
116
128
 
117
129
  # Post solution to chat
118
- if "KnowledgeBaseSearchTask" in str(task):
119
- answer = env.chat.messages[-1]["message"]
120
- env.chat.add_message(role="assistant", msg=f"The answer is:")
121
- env.chat.add_message(role="assistant", msg=answer)
122
- else:
123
- env.chat.add_message(role="assistant", msg="I'm done!")
130
+ env.chat.add_message(role="assistant", msg="I'm done!")
124
131
 
125
132
  # Validate the solution
126
- reward, stop, info, message = env.task.validate(env.page, env.chat.messages)
133
+ reward, stop, message, info = env.task.validate(env.page, cheat_messages)
127
134
  if reward == 1:
128
135
  env.chat.add_message(role="user", msg="Yes, that works. Thanks!")
129
136
  else:
130
- env.chat.add_message(role="user", msg=f"No, that doesn't work. {message.get('message', '')}")
137
+ env.chat.add_message(role="user", msg=f"No, that doesn't work. {info.get('message', '')}")
131
138
 
132
139
  sleep(3)
133
140
  env.close()
@@ -0,0 +1,13 @@
1
+ name: workarena-dev
2
+
3
+ channels:
4
+ - huggingface
5
+ - conda-forge
6
+ - defaults
7
+
8
+ dependencies:
9
+ - python>=3.10
10
+ - pip
11
+
12
+ - pip:
13
+ - -r requirements.txt
@@ -0,0 +1,9 @@
1
+ black[jupyter]==24.2.0
2
+ blacken-docs
3
+ pre-commit
4
+ pytest==7.3.2
5
+ pytest-xdist
6
+ pytest-playwright
7
+ tenacity
8
+ browsergym-core
9
+ -e .. # local package
@@ -11,6 +11,7 @@ authors = [
11
11
  {name = "Maxime Gasse"},
12
12
  {name = "Alex Lacoste"},
13
13
  {name = "Manuel Del Verme"},
14
+ {name = "Megh Thakkar"},
14
15
  ]
15
16
  readme = "README.md"
16
17
  requires-python = ">3.7"
@@ -30,6 +31,7 @@ homepage = "https://github.com/ServiceNow/WorkArena"
30
31
 
31
32
  [project.scripts]
32
33
  workarena-install = "browsergym.workarena.install:main"
34
+ workarena-human-eval = "browsergym.workarena.human_eval.tool:main"
33
35
 
34
36
  [tool.hatch.version]
35
37
  path = "src/browsergym/workarena/__init__.py"
@@ -39,3 +41,30 @@ files = ["requirements.txt"]
39
41
 
40
42
  [tool.hatch.build.targets.wheel]
41
43
  packages = ["src/browsergym"]
44
+
45
+ [tool.black]
46
+ line-length = 100
47
+ include = '\.pyi?$'
48
+ exclude = '''
49
+ /(
50
+ \.eggs
51
+ | \.git
52
+ | \.hg
53
+ | \.mypy_cache
54
+ | \.nox
55
+ | \.tox
56
+ | \.venv
57
+ | _build
58
+ | buck-out
59
+ | build
60
+ | dist
61
+ )/
62
+ '''
63
+
64
+ [tool.pytest.ini_options]
65
+ filterwarnings = [
66
+ 'ignore::UserWarning:gymnasium.*:', # too many "The obs is not within the observation space." warnings.
67
+ ]
68
+ markers = [
69
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
70
+ ]
@@ -1,6 +1,6 @@
1
- browsergym-core==0.2.0
1
+ browsergym-core>=0.2
2
2
  english-words>=2.0.1
3
- faker>=24.11.0
3
+ Faker>=24.8.0
4
4
  numpy>=1.14
5
5
  requests>=2.31
6
6
  tenacity>=8.2.3 # only used in cheat() -> move to tests?
@@ -0,0 +1,131 @@
1
+ """
2
+ A demonstration of how observation/action traces can be extracted
3
+ for WorkArena tasks without modifying the task code.
4
+
5
+ Author: Alexandre Drouin (alexandre.drouin@servicenow.com)
6
+
7
+ Notes:
8
+ - This approach relies on monkey patching the playwright actions to log the actions and observations.
9
+ It has not been tested for parallel execution. It might work with multiprocessing, but it will for
10
+ sure not work with multithreading.
11
+
12
+ """
13
+
14
+ import importlib
15
+ import logging
16
+ import os
17
+ import pickle
18
+ import playwright.sync_api as playwright_sync
19
+
20
+ from browsergym.core.env import BrowserEnv
21
+ from browsergym.workarena import ALL_WORKARENA_TASKS
22
+ from collections import defaultdict
23
+ from tenacity import retry, stop_after_attempt, wait_fixed
24
+ from time import time
25
+
26
+
27
+ N_PER_TASK = 10
28
+
29
+
30
+ def monkey_patch_playwright(observation_callback, trace_storage):
31
+ """
32
+ A function that overrides the default playwright actions to log the actions and observations.
33
+
34
+ Parameters:
35
+ ------------
36
+ observation_callback: callable
37
+ A function that returns the observation of the environment.
38
+ trace_storage: list
39
+ A list to store the trace of the actions and observations.
40
+ These will be appended in-place.
41
+
42
+ """
43
+
44
+ def wrapper(func, interface):
45
+ def wrapped(*args, **kwargs):
46
+ # Get the observation
47
+ obs = observation_callback()
48
+
49
+ # Get the BID of the element on which we are acting.
50
+ if interface.__name__ == "Locator":
51
+ # Get the locator
52
+ locator = args[0]
53
+ # Get the BID
54
+ bid = locator.element_handle().evaluate('(el) => el.getAttribute("bid")')
55
+ elif interface.__name__ == "Keyboard":
56
+ # Get the BID of the element
57
+ bid = "keyboard"
58
+ else:
59
+ # Get the BID of the element
60
+ bid = args[0].evaluate('(el) => el.getAttribute("bid")')
61
+
62
+ logging.info(f"Action: {func.__name__} BID: {bid} -- Args: {args[1:]} {kwargs}")
63
+ trace_storage.append(
64
+ {
65
+ "obs": obs,
66
+ "action": func.__name__,
67
+ "args": args[1:],
68
+ "kwargs": kwargs,
69
+ "bid": bid,
70
+ "time": time(),
71
+ }
72
+ )
73
+
74
+ # Resume action
75
+ return func(*args, **kwargs)
76
+
77
+ return wrapped
78
+
79
+ # Interfaces and actions we want to monkey patch
80
+ importlib.reload(playwright_sync)
81
+ from playwright.sync_api import Page, Frame, Locator, Keyboard, ElementHandle
82
+
83
+ # TODO: Make sure the list of interfaces and actions is exhaustive
84
+ # It covers all that is used in WorkArena cheats as of April 11, 2024
85
+ interfaces = [Page, Frame, Locator, Keyboard, ElementHandle]
86
+ actions = ["click", "select_option", "set_checked", "fill", "press", "type", "down", "up"]
87
+
88
+ for interface in interfaces:
89
+ for action in actions:
90
+ if hasattr(interface, action):
91
+ setattr(interface, action, wrapper(getattr(interface, action), interface))
92
+ print(f"Monkey patched {interface.__name__}.{action}")
93
+
94
+
95
+ @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
96
+ def extract_trace(task_cls, headless=True):
97
+ """
98
+ Extracts the trace of actions and observations for a given task.
99
+
100
+ Parameters:
101
+ ------------
102
+ task_cls: class
103
+ The class of the task to extract the trace from.
104
+
105
+ """
106
+ # Instantiate a new environment
107
+ env = BrowserEnv(task_entrypoint=task_cls, headless=headless, slow_mo=1000)
108
+
109
+ # Setup customized tracing
110
+ trace = []
111
+ monkey_patch_playwright(observation_callback=env._get_obs, trace_storage=trace)
112
+
113
+ env.reset()
114
+ env.task.cheat(env.page, env.chat.messages)
115
+ env.close()
116
+
117
+ return trace
118
+
119
+
120
+ if __name__ == "__main__":
121
+ os.makedirs("trace_profiling", exist_ok=True)
122
+
123
+ task_traces = defaultdict(list)
124
+ for task in ALL_WORKARENA_TASKS:
125
+ print("Task:", task)
126
+ for i in range(N_PER_TASK):
127
+ print(f"Extracting trace {i+1}/{N_PER_TASK}")
128
+ trace = extract_trace(task, headless=True)
129
+ task_traces[task].append(trace)
130
+
131
+ pickle.dump(task_traces, open("trace_profiling/task_traces.pkl", "wb"))