browsergym-workarena 0.3.2__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browsergym_workarena-0.4.2/.github/ISSUE_TEMPLATE/bug_report.yml +125 -0
- browsergym_workarena-0.4.2/CODE_OF_CONDUCT.md +46 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/PKG-INFO +37 -46
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/README.md +35 -44
- browsergym_workarena-0.4.2/SECURITY.md +22 -0
- browsergym_workarena-0.4.2/assets/WorkArena_banner.png +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/__init__.py +1 -1
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/install.py +15 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/instance.py +1 -1
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/list.py +78 -8
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/service_catalog.py +0 -10
- browsergym_workarena-0.4.2/tests/test_filter_list_task.py +81 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/.github/workflows/pypi.yml +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/.github/workflows/unit_tests.yml +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/.gitignore +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/LICENSE +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/dcat-metadata.jsonld +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/dev/environment.yaml +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/dev/requirements.txt +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/generate_knowledge_base.ipynb +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/make_human_eval_curriculum.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/pyproject.toml +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/requirements.txt +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/scripts/extract_finetuning_traces.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/scripts/generate_knowledge_base.ipynb +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/scripts/make_human_eval_curriculum.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/__init__.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/category.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/change_request.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/computer_asset.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/cost_center.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/expense_line.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/incident.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/knowledge.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/problem.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/report.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/requested_items.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/requests.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/ui_themes.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/user.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/api/utils.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/config.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/forms/expected_change_request_form_fields.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/forms/expected_hardware_form_fields.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/forms/expected_problem_form_fields.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/forms/expected_user_form_fields.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/knowledge/kb_autopublish_workflow.xml +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/knowledge/knowledge_base.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/knowledge/protocols.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/knowledge/test.html +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/setup_files/ui_themes/workarena_themes.xml +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/all_menu.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/create_change_request_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/create_incident_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/create_problem_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/create_user_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/filter_asset_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/filter_hardware_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/filter_incident_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/filter_user_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/impersonation_users.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/knowledge_base_configs.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_apple_mac_book_pro15_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_apple_watch_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_developer_laptop_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_development_laptop_pc_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_ipad_mini_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_ipad_pro_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_loaner_laptop_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_sales_laptop_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/order_standard_laptop_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/sort_asset_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/sort_change_request_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/sort_hardware_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/sort_incident_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/data_files/task_configs/sort_user_list_task.json +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/human_eval/console.js +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/human_eval/tool.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/__init__.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/base.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/comp_building_block.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/__init__.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/base.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_base.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_catalog.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_create_incident.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_create_problem.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_filter.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_request_item.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/delete_record.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/edit_knowledge_base.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/expense_management.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/filter_and_do.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/find_and_order_item.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/maximize_investment_return.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/navigate_and_do.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/offboard_user.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/onboard_user.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/update_task.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/utils/curriculum.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/utils/knapsack.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/warranty_check.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/work_assignment.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/compositional/workload_balancing.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/dashboard.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/form.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/knowledge.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/mark_duplicate_problem.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/navigation.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/README.md +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/extract_all_menu_items.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/generate_forms.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/knowledge.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/list.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/navigation.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/service_catalog.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/scripts/validate.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/send_chat_message.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/utils/__init__.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/utils/debug.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/utils/form.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/utils/js_utils.js +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/utils/private_tasks.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/utils/string.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/utils/utils.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/utils.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/wa_action_traces.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/workarena_test.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_api.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_compositional.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_compositional_utils.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_random_config_generation.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_snow_instance.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_task_from_config.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_task_general.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_task_setup.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_utils.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/test_validate.py +0 -0
- {browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/tests/utils.py +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Report an issue with browsergym/Playwright setup
|
|
3
|
+
title: "[Bug]: "
|
|
4
|
+
labels: ["bug"]
|
|
5
|
+
body:
|
|
6
|
+
- type: markdown
|
|
7
|
+
attributes:
|
|
8
|
+
value: |
|
|
9
|
+
Thanks for reporting an issue! Please fill out the information below to help us diagnose the problem.
|
|
10
|
+
|
|
11
|
+
- type: input
|
|
12
|
+
id: browsergym-version
|
|
13
|
+
attributes:
|
|
14
|
+
label: Browsergym Version
|
|
15
|
+
description: What version of browsergym are you using?
|
|
16
|
+
placeholder: "e.g., 1.2.0"
|
|
17
|
+
validations:
|
|
18
|
+
required: true
|
|
19
|
+
|
|
20
|
+
- type: input
|
|
21
|
+
id: playwright-version
|
|
22
|
+
attributes:
|
|
23
|
+
label: Playwright Version
|
|
24
|
+
description: What version of Playwright are you using?
|
|
25
|
+
placeholder: "e.g., 1.41.0"
|
|
26
|
+
validations:
|
|
27
|
+
required: true
|
|
28
|
+
|
|
29
|
+
- type: dropdown
|
|
30
|
+
id: os-type
|
|
31
|
+
attributes:
|
|
32
|
+
label: Operating System Type
|
|
33
|
+
description: What type of operating system are you using?
|
|
34
|
+
options:
|
|
35
|
+
- Ubuntu
|
|
36
|
+
- Debian
|
|
37
|
+
- macOS
|
|
38
|
+
- Windows
|
|
39
|
+
- Other Linux (specify version below)
|
|
40
|
+
validations:
|
|
41
|
+
required: true
|
|
42
|
+
|
|
43
|
+
- type: dropdown
|
|
44
|
+
id: os-version
|
|
45
|
+
attributes:
|
|
46
|
+
label: Operating System Version
|
|
47
|
+
description: Select your OS version
|
|
48
|
+
options:
|
|
49
|
+
# Ubuntu LTS versions
|
|
50
|
+
- Ubuntu 24.04 LTS (Noble Numbat)
|
|
51
|
+
- Ubuntu 22.04 LTS (Jammy Jellyfish)
|
|
52
|
+
- Ubuntu 20.04 LTS (Focal Fossa)
|
|
53
|
+
# macOS versions
|
|
54
|
+
- macOS 14 (Sonoma)
|
|
55
|
+
- macOS 13 (Ventura)
|
|
56
|
+
- macOS 12 (Monterey)
|
|
57
|
+
- macOS 11 (Big Sur)
|
|
58
|
+
# Windows versions
|
|
59
|
+
- Windows 11 23H2 (Build 22631)
|
|
60
|
+
- Windows 11 22H2 (Build 22621)
|
|
61
|
+
- Windows 10 22H2 (Build 19045)
|
|
62
|
+
- Windows 10 21H2 (Build 19044)
|
|
63
|
+
# Other
|
|
64
|
+
- Other (specify in Additional Context)
|
|
65
|
+
validations:
|
|
66
|
+
required: true
|
|
67
|
+
|
|
68
|
+
- type: dropdown
|
|
69
|
+
id: browsers
|
|
70
|
+
attributes:
|
|
71
|
+
label: Affected Browsers
|
|
72
|
+
description: Which browsers are you seeing this issue with?
|
|
73
|
+
multiple: true
|
|
74
|
+
options:
|
|
75
|
+
- Chromium
|
|
76
|
+
- Firefox
|
|
77
|
+
- WebKit
|
|
78
|
+
validations:
|
|
79
|
+
required: true
|
|
80
|
+
|
|
81
|
+
- type: textarea
|
|
82
|
+
id: what-happened
|
|
83
|
+
attributes:
|
|
84
|
+
label: What happened?
|
|
85
|
+
description: Please describe what happened and what you expected to happen
|
|
86
|
+
placeholder: |
|
|
87
|
+
1. What did you do?
|
|
88
|
+
2. What happened?
|
|
89
|
+
3. What did you expect to happen?
|
|
90
|
+
validations:
|
|
91
|
+
required: true
|
|
92
|
+
|
|
93
|
+
- type: textarea
|
|
94
|
+
id: reproduction
|
|
95
|
+
attributes:
|
|
96
|
+
label: Reproduction Steps
|
|
97
|
+
description: Please provide minimal steps to reproduce the issue
|
|
98
|
+
placeholder: |
|
|
99
|
+
1. Install dependencies...
|
|
100
|
+
2. Run command...
|
|
101
|
+
3. See error...
|
|
102
|
+
validations:
|
|
103
|
+
required: true
|
|
104
|
+
|
|
105
|
+
- type: textarea
|
|
106
|
+
id: logs
|
|
107
|
+
attributes:
|
|
108
|
+
label: Relevant Logs
|
|
109
|
+
description: Please copy and paste any relevant logs. This will be automatically formatted into code.
|
|
110
|
+
render: shell
|
|
111
|
+
|
|
112
|
+
- type: textarea
|
|
113
|
+
id: additional-context
|
|
114
|
+
attributes:
|
|
115
|
+
label: Additional Context
|
|
116
|
+
description: For "Other" OS versions, please specify here. Also add any other context about the problem.
|
|
117
|
+
|
|
118
|
+
- type: checkboxes
|
|
119
|
+
id: terms
|
|
120
|
+
attributes:
|
|
121
|
+
label: Code of Conduct
|
|
122
|
+
description: By submitting this issue, you agree to follow our project's Code of Conduct
|
|
123
|
+
options:
|
|
124
|
+
- label: I agree to follow this project's Code of Conduct
|
|
125
|
+
required: true
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
### ServiceNow Open Source Code-of-Conduct
|
|
2
|
+
|
|
3
|
+
This code of conduct provides guidelines for participation in ServiceNow-managed open-source communities and projects.
|
|
4
|
+
|
|
5
|
+
**Discussion forum guidelines**
|
|
6
|
+
|
|
7
|
+
Communities thrive when members support each other and provide useful feedback.
|
|
8
|
+
|
|
9
|
+
- Be polite and courteous. Respect and treat others as you would expect to be treated yourself.
|
|
10
|
+
- Respect your audience. Posts should not upset, annoy, threaten, harass, abuse or embarrass other members.
|
|
11
|
+
- User Contributions must not include material that is defamatory, obscene, indecent, abusive, offensive, harassing, violent, hateful, inflammatory or otherwise objectionable.
|
|
12
|
+
- Lively and collegial discussions are always encouraged in a healthy community. It is okay to argue facts but not okay to argue personalities or personal beliefs.
|
|
13
|
+
- Do not use text formats such as all caps or bold that may be read as annoying, rude or send a strong message.
|
|
14
|
+
- Do not publish anyone’s private personal information without their explicit consent.
|
|
15
|
+
- Avoid using abbreviations or terminology that others may not understand. An abbreviation may mean something to you but in another context or country, it may have another meaning.
|
|
16
|
+
- Be accountable for your actions by correcting your mistakes and indicating where you have changed a previous post of yours.
|
|
17
|
+
- Mark content as correct and helpful, and provide feedback. If you read a discussion post that you find helpful, we encourage you to leave a positive vote and comment in the replies. If you find a post that is unhelpful, please provide more information in the issue comments.
|
|
18
|
+
|
|
19
|
+
**Issue board guidelines**
|
|
20
|
+
|
|
21
|
+
Many open-source projects provide an Issues board, with similar functionality to a Discussions forum. The same rules from the discussion forum guidelines apply to the Issues board.
|
|
22
|
+
|
|
23
|
+
ServiceNow suggests the following technical support pathways for open-source projects:
|
|
24
|
+
|
|
25
|
+
1. Clearly identify and document the issue or question you have.
|
|
26
|
+
2. View the Documentation.
|
|
27
|
+
3. Search the Discussions.
|
|
28
|
+
4. Search the project knowledge base or Wiki for known errors, useful solutions, and troubleshooting tips.
|
|
29
|
+
5. Check the project guidelines in the [`CONTRIBUTING.md`](CONTRIBUTING.md) file if you would like details on how you can submit a change. Community contributions are valued and appreciated!
|
|
30
|
+
6. Log an Issue if it hasn’t already been logged. If the issue has already been logged by another user, vote it up, and add a comment with additional or missing information. Do your best to choose the correct category when logging a new issue. This will make it easier to differentiate bugs from new feature requests or ideas. If after logging an issue you find the solution, please close your issue and provide a comment with the solution. This will help the project owners and other users.
|
|
31
|
+
7. Contact the project team contributors of the project to see if they can help as a last resort only.
|
|
32
|
+
|
|
33
|
+
**Repositories**
|
|
34
|
+
|
|
35
|
+
- Read and follow the license instructions
|
|
36
|
+
- Remember to include citations if you use someone else’s work in your own project. Use the [`CITATION.cff`](CITATION.cff) to find the correct project citation reference.
|
|
37
|
+
- ‘Star’ project repos to save for future reference.
|
|
38
|
+
- ‘Watch’ project repos to get notifications of changes – this can get noisy for some projects, so only watch the ones you really need to track closely.
|
|
39
|
+
|
|
40
|
+
**Enforcement and reporting**
|
|
41
|
+
|
|
42
|
+
We encourage community members and users to help each other and to resolve issues amongst themselves as much as possible. If a matter cannot be resolved in good faith within the means available, please reach out to a team member or email servicenow-research@servicenow.com.
|
|
43
|
+
|
|
44
|
+
**ServiceNow Disclaimer.**
|
|
45
|
+
|
|
46
|
+
We may, but are under no obligation to, monitor or censor comments made by users or content provided by contributors and we are not responsible for the accuracy, completeness, appropriateness or legality of anything posted, depicted or otherwise provided by third‑party users and we disclaim any and all liability relating thereto.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: browsergym-workarena
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: WorkArena benchmark for BrowserGym
|
|
5
5
|
Project-URL: homepage, https://github.com/ServiceNow/WorkArena
|
|
6
6
|
Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
|
|
@@ -22,20 +22,44 @@ Requires-Dist: tenacity>=8.2.3
|
|
|
22
22
|
Requires-Dist: tqdm>=4.66.2
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
|
|
25
|
+
<a href="./assets/WorkArena_banner.png">
|
|
26
|
+
<img src="./assets/WorkArena_banner.png" width="1000" />
|
|
27
|
+
</a>
|
|
28
|
+
|
|
25
29
|
# WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
|
|
26
|
-
[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work)
|
|
30
|
+
[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work) ♦ [Join us on Discord!](https://discord.gg/rDkP69X7)
|
|
31
|
+
|
|
32
|
+
## Join Our Discord Community
|
|
33
|
+
|
|
34
|
+
Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
|
|
35
|
+
|
|
36
|
+
- Exchange tips, tricks, and success stories
|
|
37
|
+
- Get real-time support and feedback
|
|
38
|
+
- Stay updated on the latest features and announcements
|
|
39
|
+
|
|
40
|
+
[Join us on Discord!](https://discord.gg/rDkP69X7)
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
### Explore the BrowserGym Ecosystem
|
|
45
|
+
|
|
46
|
+
Looking for more tools and resources? Check out these open-source projects:
|
|
47
|
+
|
|
48
|
+
- **[AgentLab](https://github.com/ServiceNow/AgentLab)**
|
|
49
|
+
- **[BrowserGym](https://github.com/ServiceNow/BrowserGym)**
|
|
50
|
+
|
|
51
|
+
Both are part of the broader [BrowserGym ecosystem](https://arxiv.org/abs/2412.05467)
|
|
27
52
|
|
|
28
53
|
### Papers
|
|
29
54
|
* [ICML 2024] WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks? [[Paper]](https://arxiv.org/abs/2403.07718)
|
|
30
55
|
|
|
31
|
-
* WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
|
|
56
|
+
* [NeurIPS 2024] WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
|
|
32
57
|
|
|
33
58
|
|
|
34
59
|
`WorkArena` is a suite of browser-based tasks tailored to gauge web agents' effectiveness in supporting routine tasks for knowledge workers.
|
|
35
60
|
By harnessing the ubiquitous [ServiceNow](https://www.servicenow.com/what-is-servicenow.html) platform, this benchmark will be instrumental in assessing the widespread state of such automations in modern knowledge work environments.
|
|
36
61
|
|
|
37
|
-
WorkArena is
|
|
38
|
-
|
|
62
|
+
The preferred way to evaluate on WorkArena is with [AgentLab](https://github.com/ServiceNow/AgentLab/) which will conduct parallel experiments through [BrowserGym](https://github.com/ServiceNow/BrowserGym) and report on a [unified leaderboard](https://huggingface.co/spaces/ServiceNow/browsergym-leaderboard).
|
|
39
63
|
|
|
40
64
|
https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
|
|
41
65
|
|
|
@@ -48,7 +72,9 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
|
|
|
48
72
|
1. Go to https://developer.servicenow.com/ and create an account.
|
|
49
73
|
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
50
74
|
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
51
|
-
4.
|
|
75
|
+
4. Change the role of the user to admin in yoyr instance parameters 
|
|
76
|
+
|
|
77
|
+
5. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
52
78
|
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
53
79
|
* `SNOW_INSTANCE_UNAME`: The username, should be "admin"
|
|
54
80
|
* `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
|
|
@@ -123,41 +149,6 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-8
|
|
|
123
149
|
|
|
124
150
|
https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
|
|
125
151
|
|
|
126
|
-
## Getting Started
|
|
127
|
-
|
|
128
|
-
To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
|
|
129
|
-
|
|
130
|
-
### a) Create a ServiceNow Developer Instance
|
|
131
|
-
|
|
132
|
-
1. Go to https://developer.servicenow.com/ and create an account.
|
|
133
|
-
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
134
|
-
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
135
|
-
4. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
136
|
-
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
137
|
-
* `SNOW_INSTANCE_UNAME`: The username, should be "admin"
|
|
138
|
-
* `SNOW_INSTANCE_PWD`: The password, make sure you place the value in single quotes '' and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
|
|
139
|
-
6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
|
|
140
|
-
|
|
141
|
-
**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
|
|
142
|
-
|
|
143
|
-
### b) Install WorkArena and Initialize your Instance
|
|
144
|
-
|
|
145
|
-
Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
|
|
146
|
-
```
|
|
147
|
-
pip install browsergym-workarena
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
Then, install [Playwright](https://github.com/microsoft/playwright):
|
|
151
|
-
```
|
|
152
|
-
playwright install
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
|
|
156
|
-
```
|
|
157
|
-
workarena-install
|
|
158
|
-
```
|
|
159
|
-
Your installation is now complete! 🎉
|
|
160
|
-
|
|
161
152
|
## Live Demo
|
|
162
153
|
|
|
163
154
|
Run this code to see WorkArena in action.
|
|
@@ -169,12 +160,12 @@ Note: the following example executes WorkArena's oracle (cheat) function to solv
|
|
|
169
160
|
import random
|
|
170
161
|
|
|
171
162
|
from browsergym.core.env import BrowserEnv
|
|
172
|
-
from browsergym.workarena import
|
|
163
|
+
from browsergym.workarena import ATOMIC_TASKS
|
|
173
164
|
from time import sleep
|
|
174
165
|
|
|
175
166
|
|
|
176
|
-
random.shuffle(
|
|
177
|
-
for task in
|
|
167
|
+
random.shuffle(ATOMIC_TASKS)
|
|
168
|
+
for task in ATOMIC_TASKS:
|
|
178
169
|
print("Task:", task)
|
|
179
170
|
|
|
180
171
|
# Instantiate a new environment
|
|
@@ -276,4 +267,4 @@ Please use the following BibTeX to cite our work:
|
|
|
276
267
|
primaryClass={cs.AI},
|
|
277
268
|
url={https://arxiv.org/abs/2407.05291},
|
|
278
269
|
}
|
|
279
|
-
```
|
|
270
|
+
```
|
|
@@ -1,17 +1,41 @@
|
|
|
1
|
+
<a href="./assets/WorkArena_banner.png">
|
|
2
|
+
<img src="./assets/WorkArena_banner.png" width="1000" />
|
|
3
|
+
</a>
|
|
4
|
+
|
|
1
5
|
# WorkArena: A Benchmark for Evaluating Agents on Knowledge Work Tasks
|
|
2
|
-
[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work)
|
|
6
|
+
[[Benchmark Contents]](#benchmark-contents) ♦ [[Getting Started]](#getting-started) ♦ [[Live Demo]](#live-demo) ♦ [[BrowserGym]](https://github.com/ServiceNow/BrowserGym) ♦ [[Citing This Work]](#citing-this-work) ♦ [Join us on Discord!](https://discord.gg/rDkP69X7)
|
|
7
|
+
|
|
8
|
+
## Join Our Discord Community
|
|
9
|
+
|
|
10
|
+
Want to brainstorm ideas, troubleshoot issues, or just geek out with fellow agent builders? Our official Discord server is the perfect place to connect and collaborate. Come hang out with us to:
|
|
11
|
+
|
|
12
|
+
- Exchange tips, tricks, and success stories
|
|
13
|
+
- Get real-time support and feedback
|
|
14
|
+
- Stay updated on the latest features and announcements
|
|
15
|
+
|
|
16
|
+
[Join us on Discord!](https://discord.gg/rDkP69X7)
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
### Explore the BrowserGym Ecosystem
|
|
21
|
+
|
|
22
|
+
Looking for more tools and resources? Check out these open-source projects:
|
|
23
|
+
|
|
24
|
+
- **[AgentLab](https://github.com/ServiceNow/AgentLab)**
|
|
25
|
+
- **[BrowserGym](https://github.com/ServiceNow/BrowserGym)**
|
|
26
|
+
|
|
27
|
+
Both are part of the broader [BrowserGym ecosystem](https://arxiv.org/abs/2412.05467)
|
|
3
28
|
|
|
4
29
|
### Papers
|
|
5
30
|
* [ICML 2024] WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks? [[Paper]](https://arxiv.org/abs/2403.07718)
|
|
6
31
|
|
|
7
|
-
* WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
|
|
32
|
+
* [NeurIPS 2024] WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks [[Paper]](https://arxiv.org/abs/2407.05291)
|
|
8
33
|
|
|
9
34
|
|
|
10
35
|
`WorkArena` is a suite of browser-based tasks tailored to gauge web agents' effectiveness in supporting routine tasks for knowledge workers.
|
|
11
36
|
By harnessing the ubiquitous [ServiceNow](https://www.servicenow.com/what-is-servicenow.html) platform, this benchmark will be instrumental in assessing the widespread state of such automations in modern knowledge work environments.
|
|
12
37
|
|
|
13
|
-
WorkArena is
|
|
14
|
-
|
|
38
|
+
The preferred way to evaluate on WorkArena is with [AgentLab](https://github.com/ServiceNow/AgentLab/) which will conduct parallel experiments through [BrowserGym](https://github.com/ServiceNow/BrowserGym) and report on a [unified leaderboard](https://huggingface.co/spaces/ServiceNow/browsergym-leaderboard).
|
|
15
39
|
|
|
16
40
|
https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
|
|
17
41
|
|
|
@@ -24,7 +48,9 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
|
|
|
24
48
|
1. Go to https://developer.servicenow.com/ and create an account.
|
|
25
49
|
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
26
50
|
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
27
|
-
4.
|
|
51
|
+
4. Change the role of the user to admin in yoyr instance parameters 
|
|
52
|
+
|
|
53
|
+
5. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
28
54
|
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
29
55
|
* `SNOW_INSTANCE_UNAME`: The username, should be "admin"
|
|
30
56
|
* `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
|
|
@@ -99,41 +125,6 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-8
|
|
|
99
125
|
|
|
100
126
|
https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
|
|
101
127
|
|
|
102
|
-
## Getting Started
|
|
103
|
-
|
|
104
|
-
To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
|
|
105
|
-
|
|
106
|
-
### a) Create a ServiceNow Developer Instance
|
|
107
|
-
|
|
108
|
-
1. Go to https://developer.servicenow.com/ and create an account.
|
|
109
|
-
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
110
|
-
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
111
|
-
4. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
112
|
-
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
113
|
-
* `SNOW_INSTANCE_UNAME`: The username, should be "admin"
|
|
114
|
-
* `SNOW_INSTANCE_PWD`: The password, make sure you place the value in single quotes '' and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
|
|
115
|
-
6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
|
|
116
|
-
|
|
117
|
-
**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
|
|
118
|
-
|
|
119
|
-
### b) Install WorkArena and Initialize your Instance
|
|
120
|
-
|
|
121
|
-
Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
|
|
122
|
-
```
|
|
123
|
-
pip install browsergym-workarena
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
Then, install [Playwright](https://github.com/microsoft/playwright):
|
|
127
|
-
```
|
|
128
|
-
playwright install
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
|
|
132
|
-
```
|
|
133
|
-
workarena-install
|
|
134
|
-
```
|
|
135
|
-
Your installation is now complete! 🎉
|
|
136
|
-
|
|
137
128
|
## Live Demo
|
|
138
129
|
|
|
139
130
|
Run this code to see WorkArena in action.
|
|
@@ -145,12 +136,12 @@ Note: the following example executes WorkArena's oracle (cheat) function to solv
|
|
|
145
136
|
import random
|
|
146
137
|
|
|
147
138
|
from browsergym.core.env import BrowserEnv
|
|
148
|
-
from browsergym.workarena import
|
|
139
|
+
from browsergym.workarena import ATOMIC_TASKS
|
|
149
140
|
from time import sleep
|
|
150
141
|
|
|
151
142
|
|
|
152
|
-
random.shuffle(
|
|
153
|
-
for task in
|
|
143
|
+
random.shuffle(ATOMIC_TASKS)
|
|
144
|
+
for task in ATOMIC_TASKS:
|
|
154
145
|
print("Task:", task)
|
|
155
146
|
|
|
156
147
|
# Instantiate a new environment
|
|
@@ -252,4 +243,4 @@ Please use the following BibTeX to cite our work:
|
|
|
252
243
|
primaryClass={cs.AI},
|
|
253
244
|
url={https://arxiv.org/abs/2407.05291},
|
|
254
245
|
}
|
|
255
|
-
```
|
|
246
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Reporting a Vulnerability
|
|
4
|
+
|
|
5
|
+
If you find a vulnerability in ServiceNow systems, products, or network infrastructure, our [Responsible Disclosure Program](https://www.servicenow.com/company/trust/privacy/responsible-disclosure.html#our+Commitment) is the place to make a report.
|
|
6
|
+
|
|
7
|
+
If you find a vulnerability in this open-source project published by the ServiceNow Research team, please email [servicenow-research@servicenow.com](servicenow-research@servicenow.com) to report your findings.
|
|
8
|
+
|
|
9
|
+
We will process your report as soon as possible, depending on the severity of your report. We appreciate everyone’s help in disclosing vulnerabilities in a responsible manner.
|
|
10
|
+
|
|
11
|
+
## Guidelines
|
|
12
|
+
|
|
13
|
+
Please follow the guidelines below when disclosing vulnerabilities:
|
|
14
|
+
|
|
15
|
+
- Report any potential security issue as soon as possible. We will make every effort to quickly resolve the issue.
|
|
16
|
+
- Provide sufficient detail to reproduce the vulnerability, including proof of concept.
|
|
17
|
+
- Please do not disclose an issue to the public or a third party until ServiceNow has resolved it.
|
|
18
|
+
- Make a good faith effort to avoid privacy violations, destruction of data, and interruption or degradation of our service. Only interact with accounts you own or accounts for which you have the explicit permission of the account holder.
|
|
19
|
+
- Redact any language or images that may identify the program or ServiceNow customers from information about a fixed vulnerability.
|
|
20
|
+
- Do not engage in disruptive testing (such as DoS) or any action that could impact the confidentiality, integrity, or availability of information and systems.
|
|
21
|
+
- Do not engage in social engineering or phishing of customers or employees.
|
|
22
|
+
- Please do not request compensation for time and materials or discovered vulnerabilities.
|
|
Binary file
|
{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/install.py
RENAMED
|
@@ -788,6 +788,7 @@ def check_instance_release_support():
|
|
|
788
788
|
f"You are running {version_info['build name']} {version_info}."
|
|
789
789
|
)
|
|
790
790
|
return False
|
|
791
|
+
|
|
791
792
|
return True
|
|
792
793
|
|
|
793
794
|
|
|
@@ -800,6 +801,17 @@ def enable_url_login():
|
|
|
800
801
|
logging.info("URL login enabled.")
|
|
801
802
|
|
|
802
803
|
|
|
804
|
+
def disable_password_policies():
|
|
805
|
+
"""
|
|
806
|
+
Disable password policies in the instance.
|
|
807
|
+
|
|
808
|
+
Notes: this is required to allow the creation of users with weak passwords.
|
|
809
|
+
|
|
810
|
+
"""
|
|
811
|
+
_set_sys_property(property_name="glide.security.password.policy.enabled", value="false")
|
|
812
|
+
logging.info("Password policies disabled.")
|
|
813
|
+
|
|
814
|
+
|
|
803
815
|
def disable_guided_tours():
|
|
804
816
|
"""
|
|
805
817
|
Hide guided tour popups
|
|
@@ -1010,6 +1022,9 @@ def setup():
|
|
|
1010
1022
|
# Enable URL login (XXX: Do this first since other functions can use URL login)
|
|
1011
1023
|
enable_url_login()
|
|
1012
1024
|
|
|
1025
|
+
# Disable password policies
|
|
1026
|
+
disable_password_policies()
|
|
1027
|
+
|
|
1013
1028
|
# Set default landing page
|
|
1014
1029
|
set_home_page()
|
|
1015
1030
|
|
{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/instance.py
RENAMED
|
@@ -103,7 +103,7 @@ class SNowInstance:
|
|
|
103
103
|
# XXX: Need to include the import here to avoid circular imports
|
|
104
104
|
from .utils import ui_login
|
|
105
105
|
|
|
106
|
-
keys = ["build name", "build date", "build tag"]
|
|
106
|
+
keys = ["build name", "build date", "build tag", "connected to cluster node"]
|
|
107
107
|
|
|
108
108
|
# We need to use playwright since the page is loaded dynamically
|
|
109
109
|
# and its source doesn't contain the information we need
|
{browsergym_workarena-0.3.2 → browsergym_workarena-0.4.2}/src/browsergym/workarena/tasks/list.py
RENAMED
|
@@ -101,6 +101,11 @@ EXTRACT_USER_LIST_INFO_CONFIG = [
|
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
class ServiceNowListTask(AbstractServiceNowTask):
|
|
104
|
+
OPERATOR_EQUALS = "="
|
|
105
|
+
OPERATOR_NOT_EQUALS = "!="
|
|
106
|
+
OPERATOR_STARTSWITH = "STARTSWITH"
|
|
107
|
+
OPERATOR_ISEMPTY = "ISEMPTY"
|
|
108
|
+
OPERATOR_EMPTYSTRING = "EMPTYSTRING"
|
|
104
109
|
|
|
105
110
|
@classmethod
|
|
106
111
|
def all_configs(cls) -> List[dict]:
|
|
@@ -777,6 +782,9 @@ class FilterListTask(ServiceNowListTask):
|
|
|
777
782
|
list_info = self._extract_list_info(page)
|
|
778
783
|
current_query = list_info["query"]
|
|
779
784
|
|
|
785
|
+
if not current_query:
|
|
786
|
+
return 0, False, "", {"message": "There are no filters yet."}
|
|
787
|
+
|
|
780
788
|
# Replace "new query" statements with the standard OR separator
|
|
781
789
|
current_query = current_query.replace("^NQ", "^OR")
|
|
782
790
|
|
|
@@ -789,24 +797,74 @@ class FilterListTask(ServiceNowListTask):
|
|
|
789
797
|
current_sep = "^"
|
|
790
798
|
|
|
791
799
|
if current_kind != self.filter_kind:
|
|
792
|
-
return
|
|
800
|
+
return (
|
|
801
|
+
0,
|
|
802
|
+
False,
|
|
803
|
+
"",
|
|
804
|
+
{"message": f"The kind of filter used is incorrect: {current_query}."},
|
|
805
|
+
)
|
|
793
806
|
|
|
794
807
|
# Extract the query pieces for validation
|
|
795
808
|
current_query = current_query.split(current_sep)
|
|
796
809
|
|
|
797
810
|
# Validate query length is ok
|
|
798
811
|
if len(current_query) != self.filter_len:
|
|
799
|
-
return
|
|
812
|
+
return (
|
|
813
|
+
0,
|
|
814
|
+
False,
|
|
815
|
+
"",
|
|
816
|
+
{"message": f"Incorrect number of filter conditions: {current_query}."},
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
# Parse column names, operators, and values
|
|
820
|
+
current_columns, current_operators, current_values = [], [], []
|
|
821
|
+
|
|
822
|
+
# Note that this is not exhaustive. If/when other operators are added, this will have to be updated.
|
|
823
|
+
for predicate in current_query:
|
|
824
|
+
if self.OPERATOR_EMPTYSTRING in predicate:
|
|
825
|
+
current_columns.append(predicate.replace(self.OPERATOR_EMPTYSTRING, "").strip())
|
|
826
|
+
current_operators.append("=")
|
|
827
|
+
current_values.append("")
|
|
828
|
+
elif self.OPERATOR_ISEMPTY in predicate:
|
|
829
|
+
current_columns.append(predicate.replace(self.OPERATOR_ISEMPTY, "").strip())
|
|
830
|
+
current_operators.append("=")
|
|
831
|
+
current_values.append("")
|
|
832
|
+
elif any(
|
|
833
|
+
unsupported_operator in predicate
|
|
834
|
+
for unsupported_operator in [self.OPERATOR_NOT_EQUALS, self.OPERATOR_STARTSWITH]
|
|
835
|
+
):
|
|
836
|
+
return (
|
|
837
|
+
0,
|
|
838
|
+
False,
|
|
839
|
+
"",
|
|
840
|
+
{"message": f"Unexpected operator in filter condition: {current_query}."},
|
|
841
|
+
)
|
|
842
|
+
elif self.OPERATOR_EQUALS in predicate:
|
|
843
|
+
col, val = predicate.split(self.OPERATOR_EQUALS, 1)
|
|
844
|
+
current_columns.append(col.strip())
|
|
845
|
+
current_operators.append("=")
|
|
846
|
+
current_values.append(val.strip())
|
|
847
|
+
else:
|
|
848
|
+
return (
|
|
849
|
+
0,
|
|
850
|
+
False,
|
|
851
|
+
"",
|
|
852
|
+
{"message": f"Unexpected operator in filter condition: {current_query}."},
|
|
853
|
+
)
|
|
800
854
|
|
|
801
|
-
# Validate query columns are ok
|
|
802
|
-
current_columns = [x.split("=")[0] for x in current_query]
|
|
803
855
|
if set(current_columns) != set(self.filter_columns):
|
|
804
|
-
return
|
|
856
|
+
return (
|
|
857
|
+
0,
|
|
858
|
+
False,
|
|
859
|
+
"",
|
|
860
|
+
{
|
|
861
|
+
"message": f"Incorrect filter columns: {set(current_columns)}. Expected: {set(self.filter_columns)}."
|
|
862
|
+
},
|
|
863
|
+
)
|
|
805
864
|
|
|
806
865
|
# Validate query values are ok
|
|
807
866
|
# This is the tricky part because we need to expand the values to their display values
|
|
808
867
|
# We also need to handle the case where the value is a reference
|
|
809
|
-
current_values = [x.split("=")[1] for x in current_query]
|
|
810
868
|
|
|
811
869
|
# Handle filtering across multiple rows
|
|
812
870
|
if len(set(current_columns)) < len(current_columns):
|
|
@@ -856,9 +914,21 @@ class FilterListTask(ServiceNowListTask):
|
|
|
856
914
|
|
|
857
915
|
# Validate the values
|
|
858
916
|
if set(current_values) != set(self.filter_values):
|
|
859
|
-
return
|
|
917
|
+
return (
|
|
918
|
+
0,
|
|
919
|
+
False,
|
|
920
|
+
"",
|
|
921
|
+
{
|
|
922
|
+
"message": f"Incorrect filter values {set(current_values)}. Expected: {set(self.filter_values)}."
|
|
923
|
+
},
|
|
924
|
+
)
|
|
860
925
|
|
|
861
|
-
return
|
|
926
|
+
return (
|
|
927
|
+
1,
|
|
928
|
+
True,
|
|
929
|
+
"Nice work, thank you!",
|
|
930
|
+
{"message": f"Correct filter: {list_info['query']}."},
|
|
931
|
+
)
|
|
862
932
|
|
|
863
933
|
|
|
864
934
|
class ExtractListInfoTask(ServiceNowListTask):
|
|
@@ -472,16 +472,6 @@ class OrderHardwareTask(AbstractServiceNowTask):
|
|
|
472
472
|
)
|
|
473
473
|
|
|
474
474
|
def validate(self, page: Page, chat_messages: list[str]) -> tuple[int, bool, str, dict]:
|
|
475
|
-
right_url = check_url_suffix_match(page, expected_url=self.final_url, task=self)
|
|
476
|
-
if not right_url:
|
|
477
|
-
return (
|
|
478
|
-
0,
|
|
479
|
-
False,
|
|
480
|
-
"",
|
|
481
|
-
{
|
|
482
|
-
"message": f"The page is not in the right URL to validate task {self.__class__.__name__}."
|
|
483
|
-
},
|
|
484
|
-
)
|
|
485
475
|
|
|
486
476
|
# Retrieve the request sysid from the URL
|
|
487
477
|
current_url = parse.urlparse(parse.unquote(page.evaluate("() => window.location.href")))
|