browsergym-workarena 0.4.4__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. browsergym_workarena-0.5.1/.github/workflows/instance_pool_ci.yml +82 -0
  2. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.github/workflows/unit_tests.yml +4 -4
  3. browsergym_workarena-0.5.1/CITATION.cff +81 -0
  4. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/PKG-INFO +9 -21
  5. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/README.md +7 -20
  6. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/requirements.txt +1 -0
  7. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/__init__.py +1 -1
  8. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/config.py +6 -0
  9. browsergym_workarena-0.5.1/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -0
  10. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/install.py +56 -3
  11. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/instance.py +101 -17
  12. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/dashboard.py +20 -12
  13. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/knowledge.py +1 -1
  14. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +11 -2
  15. browsergym_workarena-0.4.4/src/browsergym/workarena/tasks/scripts/navigation.py → browsergym_workarena-0.5.1/src/browsergym/workarena/tasks/scripts/generate_navigation_tasks.py +4 -1
  16. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/knowledge.py +6 -4
  17. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/wa_action_traces.py +9 -2
  18. browsergym_workarena-0.5.1/tests/test_snow_instance.py +92 -0
  19. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_task_general.py +23 -4
  20. browsergym_workarena-0.4.4/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +0 -1
  21. browsergym_workarena-0.4.4/tests/test_snow_instance.py +0 -52
  22. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  23. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.github/workflows/pypi.yml +0 -0
  24. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.gitignore +0 -0
  25. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/CODE_OF_CONDUCT.md +0 -0
  26. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/LICENSE +0 -0
  27. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/SECURITY.md +0 -0
  28. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/assets/WorkArena_banner.png +0 -0
  29. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/dcat-metadata.jsonld +0 -0
  30. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/dev/environment.yaml +0 -0
  31. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/dev/requirements.txt +0 -0
  32. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/generate_knowledge_base.ipynb +0 -0
  33. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/make_human_eval_curriculum.py +0 -0
  34. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/pyproject.toml +0 -0
  35. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/scripts/extract_finetuning_traces.py +0 -0
  36. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/scripts/generate_knowledge_base.ipynb +0 -0
  37. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/scripts/make_human_eval_curriculum.py +0 -0
  38. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/__init__.py +0 -0
  39. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/category.py +0 -0
  40. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/change_request.py +0 -0
  41. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/computer_asset.py +0 -0
  42. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/cost_center.py +0 -0
  43. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/expense_line.py +0 -0
  44. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/incident.py +0 -0
  45. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/knowledge.py +0 -0
  46. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/problem.py +0 -0
  47. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/report.py +0 -0
  48. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/requested_items.py +0 -0
  49. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/requests.py +0 -0
  50. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/system_properties.py +0 -0
  51. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/ui_themes.py +0 -0
  52. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/user.py +0 -0
  53. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/utils.py +0 -0
  54. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_change_request_form_fields.json +0 -0
  55. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_hardware_form_fields.json +0 -0
  56. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +0 -0
  57. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_problem_form_fields.json +0 -0
  58. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +0 -0
  59. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_user_form_fields.json +0 -0
  60. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/kb_autopublish_workflow.xml +0 -0
  61. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/knowledge_base.json +0 -0
  62. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/protocols.json +0 -0
  63. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/test.html +0 -0
  64. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +0 -0
  65. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +0 -0
  66. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +0 -0
  67. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +0 -0
  68. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +0 -0
  69. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +0 -0
  70. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +0 -0
  71. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +0 -0
  72. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +0 -0
  73. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/ui_themes/workarena_themes.xml +0 -0
  74. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/all_menu.json +0 -0
  75. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_change_request_task.json +0 -0
  76. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json +0 -0
  77. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_incident_task.json +0 -0
  78. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_problem_task.json +0 -0
  79. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_user_task.json +0 -0
  80. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +0 -0
  81. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_asset_list_task.json +0 -0
  82. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json +0 -0
  83. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_hardware_list_task.json +0 -0
  84. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_incident_list_task.json +0 -0
  85. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +0 -0
  86. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_user_list_task.json +0 -0
  87. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/impersonation_users.json +0 -0
  88. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/knowledge_base_configs.json +0 -0
  89. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_apple_mac_book_pro15_task.json +0 -0
  90. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_apple_watch_task.json +0 -0
  91. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_developer_laptop_task.json +0 -0
  92. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_development_laptop_pc_task.json +0 -0
  93. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_mini_task.json +0 -0
  94. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_pro_task.json +0 -0
  95. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_loaner_laptop_task.json +0 -0
  96. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_sales_laptop_task.json +0 -0
  97. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_standard_laptop_task.json +0 -0
  98. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +0 -0
  99. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +0 -0
  100. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_asset_list_task.json +0 -0
  101. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_change_request_list_task.json +0 -0
  102. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_hardware_list_task.json +0 -0
  103. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_incident_list_task.json +0 -0
  104. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task.json +0 -0
  105. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_user_list_task.json +0 -0
  106. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/human_eval/console.js +0 -0
  107. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/human_eval/tool.py +0 -0
  108. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/__init__.py +0 -0
  109. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/base.py +0 -0
  110. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/comp_building_block.py +0 -0
  111. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/__init__.py +0 -0
  112. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/base.py +0 -0
  113. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_base.py +0 -0
  114. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_catalog.py +0 -0
  115. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +0 -0
  116. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_incident.py +0 -0
  117. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +0 -0
  118. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_problem.py +0 -0
  119. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +0 -0
  120. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_filter.py +0 -0
  121. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_request_item.py +0 -0
  122. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +0 -0
  123. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/delete_record.py +0 -0
  124. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/edit_knowledge_base.py +0 -0
  125. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/expense_management.py +0 -0
  126. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/filter_and_do.py +0 -0
  127. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/find_and_order_item.py +0 -0
  128. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +0 -0
  129. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +0 -0
  130. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/maximize_investment_return.py +0 -0
  131. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/navigate_and_do.py +0 -0
  132. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +0 -0
  133. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/offboard_user.py +0 -0
  134. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/onboard_user.py +0 -0
  135. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/update_task.py +0 -0
  136. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/utils/curriculum.py +0 -0
  137. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +0 -0
  138. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/utils/knapsack.py +0 -0
  139. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/warranty_check.py +0 -0
  140. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/work_assignment.py +0 -0
  141. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/workload_balancing.py +0 -0
  142. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/form.py +0 -0
  143. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/list.py +0 -0
  144. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/mark_duplicate_problem.py +0 -0
  145. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/navigation.py +0 -0
  146. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/README.md +0 -0
  147. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/extract_all_menu_items.py +0 -0
  148. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/generate_forms.py +0 -0
  149. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/list.py +0 -0
  150. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/service_catalog.py +0 -0
  151. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/validate.py +0 -0
  152. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/send_chat_message.py +0 -0
  153. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/service_catalog.py +0 -0
  154. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/__init__.py +0 -0
  155. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/debug.py +0 -0
  156. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/form.py +0 -0
  157. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/js_utils.js +0 -0
  158. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/private_tasks.py +0 -0
  159. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/string.py +0 -0
  160. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/utils.py +0 -0
  161. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/utils.py +0 -0
  162. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/workarena_test.py +0 -0
  163. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_api.py +0 -0
  164. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_compositional.py +0 -0
  165. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_compositional_utils.py +0 -0
  166. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_filter_list_task.py +0 -0
  167. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_random_config_generation.py +0 -0
  168. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_task_from_config.py +0 -0
  169. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_task_setup.py +0 -0
  170. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_utils.py +0 -0
  171. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_validate.py +0 -0
  172. {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/utils.py +0 -0
@@ -0,0 +1,82 @@
1
+ name: Monitor the pool of WorkArena instances
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ schedule:
6
+ - cron: "0 3 * * *" # daily at 03:00 UTC
7
+
8
+ jobs:
9
+
10
+ test-l1-tasks:
11
+ name: Test L1 tasks
12
+ runs-on: ubuntu-22.04
13
+
14
+ defaults:
15
+ run:
16
+ shell: bash -l {0}
17
+
18
+ env:
19
+ HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
20
+
21
+ steps:
22
+ - name: Checkout Repository
23
+ uses: actions/checkout@v4
24
+
25
+ - name: Set up Python
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: '3.12'
29
+ cache: 'pip'
30
+
31
+ - name: Install Python dependencies
32
+ working-directory: ./dev
33
+ run: |
34
+ pip install -r requirements.txt
35
+ pip install huggingface_hub
36
+
37
+ - name: Pip list
38
+ run: pip list
39
+
40
+ - name: Install Playwright
41
+ run: playwright install chromium --with-deps
42
+
43
+ - name: Run L1 tests
44
+ run: pytest -n 20 --durations=10 --slowmo 1000 -v tests/test_task_general.py
45
+
46
+
47
+ test-snow-instance:
48
+ name: Test snow instance
49
+ runs-on: ubuntu-22.04
50
+ needs: test-l1-tasks # remove this line if you want both jobs to run in parallel
51
+
52
+ defaults:
53
+ run:
54
+ shell: bash -l {0}
55
+
56
+ env:
57
+ HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
58
+
59
+ steps:
60
+ - name: Checkout Repository
61
+ uses: actions/checkout@v4
62
+
63
+ - name: Set up Python
64
+ uses: actions/setup-python@v5
65
+ with:
66
+ python-version: '3.12'
67
+ cache: 'pip'
68
+
69
+ - name: Install Python dependencies
70
+ working-directory: ./dev
71
+ run: |
72
+ pip install -r requirements.txt
73
+ pip install huggingface_hub
74
+
75
+ - name: Pip list
76
+ run: pip list
77
+
78
+ - name: Install Playwright
79
+ run: playwright install chromium --with-deps
80
+
81
+ - name: Run snow instance tests
82
+ run: pytest -n 20 --durations=10 --slowmo 1000 -v tests/test_snow_instance.py
@@ -36,7 +36,7 @@ jobs:
36
36
  run: black . --check
37
37
 
38
38
  browsergym-workarena-fast:
39
- runs-on: ubuntu-latest
39
+ runs-on: ubuntu-22.04
40
40
 
41
41
  defaults:
42
42
  run:
@@ -50,7 +50,7 @@ jobs:
50
50
  - name: Set up Python
51
51
  uses: actions/setup-python@v5
52
52
  with:
53
- python-version: '3.10'
53
+ python-version: '3.12'
54
54
  cache: 'pip' # caching pip dependencies
55
55
 
56
56
  - name: Pip install
@@ -59,9 +59,9 @@ jobs:
59
59
 
60
60
  - name: Pip list
61
61
  run: pip list
62
-
62
+
63
63
  - name: Install Playwright
64
- run: playwright install --with-deps
64
+ run: playwright install chromium --with-deps
65
65
 
66
66
  - name: Run non-slow browsergym-workarena Unit Tests
67
67
  env:
@@ -0,0 +1,81 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use WorkArena in your research, please cite both of the following papers."
3
+ title: "WorkArena"
4
+ date-released: 2024-03-12
5
+ url: "https://github.com/ServiceNow/workarena"
6
+ license: "Apache-2.0"
7
+
8
+ authors:
9
+ - family-names: Drouin
10
+ given-names: Alexandre
11
+ - family-names: Gasse
12
+ given-names: Maxime
13
+ - family-names: Caccia
14
+ given-names: Massimo
15
+ - family-names: Laradji
16
+ given-names: Issam H.
17
+ - family-names: Del Verme
18
+ given-names: Manuel
19
+ - family-names: Marty
20
+ given-names: Tom
21
+ - family-names: Vazquez
22
+ given-names: David
23
+ - family-names: Chapados
24
+ given-names: Nicolas
25
+ - family-names: Lacoste
26
+ given-names: Alexandre
27
+
28
+ preferred-citation:
29
+ - type: inproceedings
30
+ title: "WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?"
31
+ authors:
32
+ - family-names: Drouin
33
+ given-names: Alexandre
34
+ - family-names: Gasse
35
+ given-names: Maxime
36
+ - family-names: Caccia
37
+ given-names: Massimo
38
+ - family-names: Laradji
39
+ given-names: Issam H.
40
+ - family-names: Del Verme
41
+ given-names: Manuel
42
+ - family-names: Marty
43
+ given-names: Tom
44
+ - family-names: Vazquez
45
+ given-names: David
46
+ - family-names: Chapados
47
+ given-names: Nicolas
48
+ - family-names: Lacoste
49
+ given-names: Alexandre
50
+ booktitle: "Proceedings of the 41st International Conference on Machine Learning (ICML)"
51
+ series: "Proceedings of Machine Learning Research"
52
+ volume: 235
53
+ pages: "11642–11662"
54
+ year: 2024
55
+ url: "https://proceedings.mlr.press/v235/drouin24a.html"
56
+
57
+ - type: inproceedings
58
+ title: "WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks"
59
+ authors:
60
+ - family-names: Boisvert
61
+ given-names: Léo
62
+ - family-names: Thakkar
63
+ given-names: Megh
64
+ - family-names: Gasse
65
+ given-names: Maxime
66
+ - family-names: Caccia
67
+ given-names: Massimo
68
+ - family-names: Le Sellier De Chezelles
69
+ given-names: Thibault
70
+ - family-names: Cappart
71
+ given-names: Quentin
72
+ - family-names: Chapados
73
+ given-names: Nicolas
74
+ - family-names: Lacoste
75
+ given-names: Alexandre
76
+ - family-names: Drouin
77
+ given-names: Alexandre
78
+ booktitle: "Advances in Neural Information Processing Systems 37 (NeurIPS 2024), Datasets & Benchmarks Track"
79
+ year: 2024
80
+ url: "https://proceedings.neurips.cc/paper_files/paper/2024/hash/0b82662b6c32e887bb252a74d8cb2d5e-Paper-Datasets_and_Benchmarks_Track.pdf"
81
+ doi: "10.52202/079017-0195"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: browsergym-workarena
3
- Version: 0.4.4
3
+ Version: 0.5.1
4
4
  Summary: WorkArena benchmark for BrowserGym
5
5
  Project-URL: homepage, https://github.com/ServiceNow/WorkArena
6
6
  Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
@@ -16,6 +16,7 @@ Requires-Python: >3.7
16
16
  Requires-Dist: browsergym-core>=0.2
17
17
  Requires-Dist: english-words>=2.0.1
18
18
  Requires-Dist: faker>=24.8.0
19
+ Requires-Dist: huggingface-hub>=0.23
19
20
  Requires-Dist: numpy>=1.14
20
21
  Requires-Dist: requests>=2.31
21
22
  Requires-Dist: tenacity>=8.2.3
@@ -65,28 +66,19 @@ https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c
65
66
 
66
67
  ## Getting Started
67
68
 
68
- To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
69
+ To setup WorkArena, you will need to gain access to ServiceNow instances and install our Python package locally. Follow the steps below to achieve this.
69
70
 
70
- ### a) Create a ServiceNow Developer Instance
71
+ ### a) Gain Access to ServiceNow Instances
71
72
 
72
- 1. Go to https://developer.servicenow.com/ and create an account.
73
- 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
74
- 3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
75
- 4. Change the role of the user to admin in yoyr instance parameters ![image](https://github.com/user-attachments/assets/6f0fbf8e-f40f-411a-84cb-fead93d85f60)
73
+ 1. Navigate to https://huggingface.co/datasets/ServiceNow/WorkArena-Instances.
74
+ 2. Fill the form, accept the terms to gain access to the gated repository and wait for approval.
75
+ 3. Ensure that the machine where you will run WorkArena is [authenticated with Hugging Face](https://huggingface.co/docs/hub/en/datasets-polars-auth) (e.g., via huggingface-cli login or the HUGGING_FACE_HUB_TOKEN environment variable).
76
76
 
77
- 5. You should now see your URL and credentials. Based on this information, set the following environment variables:
78
- * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
79
- * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
80
- * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
81
- 6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
82
-
83
- **Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
84
-
85
- ### b) Install WorkArena and Initialize your Instance
77
+ ### b) Install WorkArena
86
78
 
87
79
  Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
88
80
  ```
89
- pip install browsergym
81
+ pip install browsergym-workarena
90
82
  ```
91
83
 
92
84
  Then, install [Playwright](https://github.com/microsoft/playwright):
@@ -94,10 +86,6 @@ Then, install [Playwright](https://github.com/microsoft/playwright):
94
86
  playwright install
95
87
  ```
96
88
 
97
- Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
98
- ```
99
- workarena-install
100
- ```
101
89
  Your installation is now complete! 🎉
102
90
 
103
91
 
@@ -41,28 +41,19 @@ https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c
41
41
 
42
42
  ## Getting Started
43
43
 
44
- To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
44
+ To setup WorkArena, you will need to gain access to ServiceNow instances and install our Python package locally. Follow the steps below to achieve this.
45
45
 
46
- ### a) Create a ServiceNow Developer Instance
46
+ ### a) Gain Access to ServiceNow Instances
47
47
 
48
- 1. Go to https://developer.servicenow.com/ and create an account.
49
- 2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
50
- 3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
51
- 4. Change the role of the user to admin in yoyr instance parameters ![image](https://github.com/user-attachments/assets/6f0fbf8e-f40f-411a-84cb-fead93d85f60)
48
+ 1. Navigate to https://huggingface.co/datasets/ServiceNow/WorkArena-Instances.
49
+ 2. Fill the form, accept the terms to gain access to the gated repository and wait for approval.
50
+ 3. Ensure that the machine where you will run WorkArena is [authenticated with Hugging Face](https://huggingface.co/docs/hub/en/datasets-polars-auth) (e.g., via huggingface-cli login or the HUGGING_FACE_HUB_TOKEN environment variable).
52
51
 
53
- 5. You should now see your URL and credentials. Based on this information, set the following environment variables:
54
- * `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
55
- * `SNOW_INSTANCE_UNAME`: The username, should be "admin"
56
- * `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
57
- 6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
58
-
59
- **Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
60
-
61
- ### b) Install WorkArena and Initialize your Instance
52
+ ### b) Install WorkArena
62
53
 
63
54
  Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
64
55
  ```
65
- pip install browsergym
56
+ pip install browsergym-workarena
66
57
  ```
67
58
 
68
59
  Then, install [Playwright](https://github.com/microsoft/playwright):
@@ -70,10 +61,6 @@ Then, install [Playwright](https://github.com/microsoft/playwright):
70
61
  playwright install
71
62
  ```
72
63
 
73
- Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
74
- ```
75
- workarena-install
76
- ```
77
64
  Your installation is now complete! 🎉
78
65
 
79
66
 
@@ -5,3 +5,4 @@ numpy>=1.14
5
5
  requests>=2.31
6
6
  tenacity>=8.2.3 # only used in cheat() -> move to tests?
7
7
  tqdm>=4.66.2
8
+ huggingface_hub>=0.23
@@ -1,4 +1,4 @@
1
- __version__ = "0.4.4"
1
+ __version__ = "0.5.1"
2
2
 
3
3
  import inspect
4
4
  from logging import warning
@@ -11,6 +11,12 @@ SNOW_BROWSER_TIMEOUT = 30000 # Milliseconds
11
11
  SNOW_JS_UTILS_FILEPATH = str(resources.files(utils).joinpath("js_utils.js"))
12
12
  SNOW_SUPPORTED_RELEASES = ["washingtondc"]
13
13
 
14
+ # Hugging Face dataset containing available instances
15
+ INSTANCE_REPO_ID = "ServiceNow/WorkArena-Instances"
16
+ INSTANCE_REPO_FILENAME = "instances.json"
17
+ INSTANCE_REPO_TYPE = "dataset"
18
+ INSTANCE_XOR_SEED = "x3!+-9mi#nhlo%a02$9hna{]"
19
+
14
20
  # Path to the Menu navigation task configuration
15
21
  ALL_MENU_PATH = str(resources.files(data_files).joinpath("task_configs/all_menu.json"))
16
22