browsergym-workarena 0.4.4__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browsergym_workarena-0.5.1/.github/workflows/instance_pool_ci.yml +82 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.github/workflows/unit_tests.yml +4 -4
- browsergym_workarena-0.5.1/CITATION.cff +81 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/PKG-INFO +9 -21
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/README.md +7 -20
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/requirements.txt +1 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/__init__.py +1 -1
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/config.py +6 -0
- browsergym_workarena-0.5.1/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/install.py +56 -3
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/instance.py +101 -17
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/dashboard.py +20 -12
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/knowledge.py +1 -1
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +11 -2
- browsergym_workarena-0.4.4/src/browsergym/workarena/tasks/scripts/navigation.py → browsergym_workarena-0.5.1/src/browsergym/workarena/tasks/scripts/generate_navigation_tasks.py +4 -1
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/knowledge.py +6 -4
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/wa_action_traces.py +9 -2
- browsergym_workarena-0.5.1/tests/test_snow_instance.py +92 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_task_general.py +23 -4
- browsergym_workarena-0.4.4/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +0 -1
- browsergym_workarena-0.4.4/tests/test_snow_instance.py +0 -52
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.github/workflows/pypi.yml +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/.gitignore +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/CODE_OF_CONDUCT.md +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/LICENSE +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/SECURITY.md +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/assets/WorkArena_banner.png +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/dcat-metadata.jsonld +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/dev/environment.yaml +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/dev/requirements.txt +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/generate_knowledge_base.ipynb +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/make_human_eval_curriculum.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/pyproject.toml +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/scripts/extract_finetuning_traces.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/scripts/generate_knowledge_base.ipynb +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/scripts/make_human_eval_curriculum.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/__init__.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/category.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/change_request.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/computer_asset.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/cost_center.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/expense_line.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/incident.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/knowledge.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/problem.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/report.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/requested_items.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/requests.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/system_properties.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/ui_themes.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/user.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/api/utils.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_change_request_form_fields.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_hardware_form_fields.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_problem_form_fields.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/forms/expected_user_form_fields.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/kb_autopublish_workflow.xml +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/knowledge_base.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/protocols.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/knowledge/test.html +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/setup_files/ui_themes/workarena_themes.xml +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/all_menu.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_change_request_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_incident_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_problem_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/create_user_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_asset_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_hardware_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_incident_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/filter_user_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/impersonation_users.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/knowledge_base_configs.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_apple_mac_book_pro15_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_apple_watch_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_developer_laptop_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_development_laptop_pc_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_mini_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_ipad_pro_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_loaner_laptop_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_sales_laptop_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/order_standard_laptop_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_asset_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_change_request_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_hardware_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_incident_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/data_files/task_configs/sort_user_list_task.json +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/human_eval/console.js +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/human_eval/tool.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/__init__.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/base.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/comp_building_block.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/__init__.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/base.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_base.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_catalog.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_incident.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_problem.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_filter.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_request_item.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/delete_record.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/edit_knowledge_base.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/expense_management.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/filter_and_do.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/find_and_order_item.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/maximize_investment_return.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/navigate_and_do.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/offboard_user.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/onboard_user.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/update_task.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/utils/curriculum.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/utils/knapsack.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/warranty_check.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/work_assignment.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/compositional/workload_balancing.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/form.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/list.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/mark_duplicate_problem.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/navigation.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/README.md +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/extract_all_menu_items.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/generate_forms.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/list.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/service_catalog.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/scripts/validate.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/send_chat_message.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/service_catalog.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/__init__.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/debug.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/form.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/js_utils.js +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/private_tasks.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/string.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/tasks/utils/utils.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/utils.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/workarena_test.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_api.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_compositional.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_compositional_utils.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_filter_list_task.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_random_config_generation.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_task_from_config.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_task_setup.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_utils.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/test_validate.py +0 -0
- {browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/tests/utils.py +0 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
name: Monitor the pool of WorkArena instances
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
schedule:
|
|
6
|
+
- cron: "0 3 * * *" # daily at 03:00 UTC
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
|
|
10
|
+
test-l1-tasks:
|
|
11
|
+
name: Test L1 tasks
|
|
12
|
+
runs-on: ubuntu-22.04
|
|
13
|
+
|
|
14
|
+
defaults:
|
|
15
|
+
run:
|
|
16
|
+
shell: bash -l {0}
|
|
17
|
+
|
|
18
|
+
env:
|
|
19
|
+
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Checkout Repository
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Set up Python
|
|
26
|
+
uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: '3.12'
|
|
29
|
+
cache: 'pip'
|
|
30
|
+
|
|
31
|
+
- name: Install Python dependencies
|
|
32
|
+
working-directory: ./dev
|
|
33
|
+
run: |
|
|
34
|
+
pip install -r requirements.txt
|
|
35
|
+
pip install huggingface_hub
|
|
36
|
+
|
|
37
|
+
- name: Pip list
|
|
38
|
+
run: pip list
|
|
39
|
+
|
|
40
|
+
- name: Install Playwright
|
|
41
|
+
run: playwright install chromium --with-deps
|
|
42
|
+
|
|
43
|
+
- name: Run L1 tests
|
|
44
|
+
run: pytest -n 20 --durations=10 --slowmo 1000 -v tests/test_task_general.py
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
test-snow-instance:
|
|
48
|
+
name: Test snow instance
|
|
49
|
+
runs-on: ubuntu-22.04
|
|
50
|
+
needs: test-l1-tasks # remove this line if you want both jobs to run in parallel
|
|
51
|
+
|
|
52
|
+
defaults:
|
|
53
|
+
run:
|
|
54
|
+
shell: bash -l {0}
|
|
55
|
+
|
|
56
|
+
env:
|
|
57
|
+
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
|
58
|
+
|
|
59
|
+
steps:
|
|
60
|
+
- name: Checkout Repository
|
|
61
|
+
uses: actions/checkout@v4
|
|
62
|
+
|
|
63
|
+
- name: Set up Python
|
|
64
|
+
uses: actions/setup-python@v5
|
|
65
|
+
with:
|
|
66
|
+
python-version: '3.12'
|
|
67
|
+
cache: 'pip'
|
|
68
|
+
|
|
69
|
+
- name: Install Python dependencies
|
|
70
|
+
working-directory: ./dev
|
|
71
|
+
run: |
|
|
72
|
+
pip install -r requirements.txt
|
|
73
|
+
pip install huggingface_hub
|
|
74
|
+
|
|
75
|
+
- name: Pip list
|
|
76
|
+
run: pip list
|
|
77
|
+
|
|
78
|
+
- name: Install Playwright
|
|
79
|
+
run: playwright install chromium --with-deps
|
|
80
|
+
|
|
81
|
+
- name: Run snow instance tests
|
|
82
|
+
run: pytest -n 20 --durations=10 --slowmo 1000 -v tests/test_snow_instance.py
|
|
@@ -36,7 +36,7 @@ jobs:
|
|
|
36
36
|
run: black . --check
|
|
37
37
|
|
|
38
38
|
browsergym-workarena-fast:
|
|
39
|
-
runs-on: ubuntu-
|
|
39
|
+
runs-on: ubuntu-22.04
|
|
40
40
|
|
|
41
41
|
defaults:
|
|
42
42
|
run:
|
|
@@ -50,7 +50,7 @@ jobs:
|
|
|
50
50
|
- name: Set up Python
|
|
51
51
|
uses: actions/setup-python@v5
|
|
52
52
|
with:
|
|
53
|
-
python-version: '3.
|
|
53
|
+
python-version: '3.12'
|
|
54
54
|
cache: 'pip' # caching pip dependencies
|
|
55
55
|
|
|
56
56
|
- name: Pip install
|
|
@@ -59,9 +59,9 @@ jobs:
|
|
|
59
59
|
|
|
60
60
|
- name: Pip list
|
|
61
61
|
run: pip list
|
|
62
|
-
|
|
62
|
+
|
|
63
63
|
- name: Install Playwright
|
|
64
|
-
run: playwright install --with-deps
|
|
64
|
+
run: playwright install chromium --with-deps
|
|
65
65
|
|
|
66
66
|
- name: Run non-slow browsergym-workarena Unit Tests
|
|
67
67
|
env:
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use WorkArena in your research, please cite both of the following papers."
|
|
3
|
+
title: "WorkArena"
|
|
4
|
+
date-released: 2024-03-12
|
|
5
|
+
url: "https://github.com/ServiceNow/workarena"
|
|
6
|
+
license: "Apache-2.0"
|
|
7
|
+
|
|
8
|
+
authors:
|
|
9
|
+
- family-names: Drouin
|
|
10
|
+
given-names: Alexandre
|
|
11
|
+
- family-names: Gasse
|
|
12
|
+
given-names: Maxime
|
|
13
|
+
- family-names: Caccia
|
|
14
|
+
given-names: Massimo
|
|
15
|
+
- family-names: Laradji
|
|
16
|
+
given-names: Issam H.
|
|
17
|
+
- family-names: Del Verme
|
|
18
|
+
given-names: Manuel
|
|
19
|
+
- family-names: Marty
|
|
20
|
+
given-names: Tom
|
|
21
|
+
- family-names: Vazquez
|
|
22
|
+
given-names: David
|
|
23
|
+
- family-names: Chapados
|
|
24
|
+
given-names: Nicolas
|
|
25
|
+
- family-names: Lacoste
|
|
26
|
+
given-names: Alexandre
|
|
27
|
+
|
|
28
|
+
preferred-citation:
|
|
29
|
+
- type: inproceedings
|
|
30
|
+
title: "WorkArena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?"
|
|
31
|
+
authors:
|
|
32
|
+
- family-names: Drouin
|
|
33
|
+
given-names: Alexandre
|
|
34
|
+
- family-names: Gasse
|
|
35
|
+
given-names: Maxime
|
|
36
|
+
- family-names: Caccia
|
|
37
|
+
given-names: Massimo
|
|
38
|
+
- family-names: Laradji
|
|
39
|
+
given-names: Issam H.
|
|
40
|
+
- family-names: Del Verme
|
|
41
|
+
given-names: Manuel
|
|
42
|
+
- family-names: Marty
|
|
43
|
+
given-names: Tom
|
|
44
|
+
- family-names: Vazquez
|
|
45
|
+
given-names: David
|
|
46
|
+
- family-names: Chapados
|
|
47
|
+
given-names: Nicolas
|
|
48
|
+
- family-names: Lacoste
|
|
49
|
+
given-names: Alexandre
|
|
50
|
+
booktitle: "Proceedings of the 41st International Conference on Machine Learning (ICML)"
|
|
51
|
+
series: "Proceedings of Machine Learning Research"
|
|
52
|
+
volume: 235
|
|
53
|
+
pages: "11642–11662"
|
|
54
|
+
year: 2024
|
|
55
|
+
url: "https://proceedings.mlr.press/v235/drouin24a.html"
|
|
56
|
+
|
|
57
|
+
- type: inproceedings
|
|
58
|
+
title: "WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks"
|
|
59
|
+
authors:
|
|
60
|
+
- family-names: Boisvert
|
|
61
|
+
given-names: Léo
|
|
62
|
+
- family-names: Thakkar
|
|
63
|
+
given-names: Megh
|
|
64
|
+
- family-names: Gasse
|
|
65
|
+
given-names: Maxime
|
|
66
|
+
- family-names: Caccia
|
|
67
|
+
given-names: Massimo
|
|
68
|
+
- family-names: Le Sellier De Chezelles
|
|
69
|
+
given-names: Thibault
|
|
70
|
+
- family-names: Cappart
|
|
71
|
+
given-names: Quentin
|
|
72
|
+
- family-names: Chapados
|
|
73
|
+
given-names: Nicolas
|
|
74
|
+
- family-names: Lacoste
|
|
75
|
+
given-names: Alexandre
|
|
76
|
+
- family-names: Drouin
|
|
77
|
+
given-names: Alexandre
|
|
78
|
+
booktitle: "Advances in Neural Information Processing Systems 37 (NeurIPS 2024), Datasets & Benchmarks Track"
|
|
79
|
+
year: 2024
|
|
80
|
+
url: "https://proceedings.neurips.cc/paper_files/paper/2024/hash/0b82662b6c32e887bb252a74d8cb2d5e-Paper-Datasets_and_Benchmarks_Track.pdf"
|
|
81
|
+
doi: "10.52202/079017-0195"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: browsergym-workarena
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: WorkArena benchmark for BrowserGym
|
|
5
5
|
Project-URL: homepage, https://github.com/ServiceNow/WorkArena
|
|
6
6
|
Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
|
|
@@ -16,6 +16,7 @@ Requires-Python: >3.7
|
|
|
16
16
|
Requires-Dist: browsergym-core>=0.2
|
|
17
17
|
Requires-Dist: english-words>=2.0.1
|
|
18
18
|
Requires-Dist: faker>=24.8.0
|
|
19
|
+
Requires-Dist: huggingface-hub>=0.23
|
|
19
20
|
Requires-Dist: numpy>=1.14
|
|
20
21
|
Requires-Dist: requests>=2.31
|
|
21
22
|
Requires-Dist: tenacity>=8.2.3
|
|
@@ -65,28 +66,19 @@ https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c
|
|
|
65
66
|
|
|
66
67
|
## Getting Started
|
|
67
68
|
|
|
68
|
-
To setup WorkArena, you will need to
|
|
69
|
+
To setup WorkArena, you will need to gain access to ServiceNow instances and install our Python package locally. Follow the steps below to achieve this.
|
|
69
70
|
|
|
70
|
-
### a)
|
|
71
|
+
### a) Gain Access to ServiceNow Instances
|
|
71
72
|
|
|
72
|
-
1.
|
|
73
|
-
2.
|
|
74
|
-
3.
|
|
75
|
-
4. Change the role of the user to admin in yoyr instance parameters 
|
|
73
|
+
1. Navigate to https://huggingface.co/datasets/ServiceNow/WorkArena-Instances.
|
|
74
|
+
2. Fill the form, accept the terms to gain access to the gated repository and wait for approval.
|
|
75
|
+
3. Ensure that the machine where you will run WorkArena is [authenticated with Hugging Face](https://huggingface.co/docs/hub/en/datasets-polars-auth) (e.g., via huggingface-cli login or the HUGGING_FACE_HUB_TOKEN environment variable).
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
79
|
-
* `SNOW_INSTANCE_UNAME`: The username, should be "admin"
|
|
80
|
-
* `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
|
|
81
|
-
6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
|
|
82
|
-
|
|
83
|
-
**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
|
|
84
|
-
|
|
85
|
-
### b) Install WorkArena and Initialize your Instance
|
|
77
|
+
### b) Install WorkArena
|
|
86
78
|
|
|
87
79
|
Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
|
|
88
80
|
```
|
|
89
|
-
pip install browsergym
|
|
81
|
+
pip install browsergym-workarena
|
|
90
82
|
```
|
|
91
83
|
|
|
92
84
|
Then, install [Playwright](https://github.com/microsoft/playwright):
|
|
@@ -94,10 +86,6 @@ Then, install [Playwright](https://github.com/microsoft/playwright):
|
|
|
94
86
|
playwright install
|
|
95
87
|
```
|
|
96
88
|
|
|
97
|
-
Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
|
|
98
|
-
```
|
|
99
|
-
workarena-install
|
|
100
|
-
```
|
|
101
89
|
Your installation is now complete! 🎉
|
|
102
90
|
|
|
103
91
|
|
|
@@ -41,28 +41,19 @@ https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c
|
|
|
41
41
|
|
|
42
42
|
## Getting Started
|
|
43
43
|
|
|
44
|
-
To setup WorkArena, you will need to
|
|
44
|
+
To setup WorkArena, you will need to gain access to ServiceNow instances and install our Python package locally. Follow the steps below to achieve this.
|
|
45
45
|
|
|
46
|
-
### a)
|
|
46
|
+
### a) Gain Access to ServiceNow Instances
|
|
47
47
|
|
|
48
|
-
1.
|
|
49
|
-
2.
|
|
50
|
-
3.
|
|
51
|
-
4. Change the role of the user to admin in yoyr instance parameters 
|
|
48
|
+
1. Navigate to https://huggingface.co/datasets/ServiceNow/WorkArena-Instances.
|
|
49
|
+
2. Fill the form, accept the terms to gain access to the gated repository and wait for approval.
|
|
50
|
+
3. Ensure that the machine where you will run WorkArena is [authenticated with Hugging Face](https://huggingface.co/docs/hub/en/datasets-polars-auth) (e.g., via huggingface-cli login or the HUGGING_FACE_HUB_TOKEN environment variable).
|
|
52
51
|
|
|
53
|
-
|
|
54
|
-
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
55
|
-
* `SNOW_INSTANCE_UNAME`: The username, should be "admin"
|
|
56
|
-
* `SNOW_INSTANCE_PWD`: The password, make sure you place the value in quotes "" and be mindful of [escaping special shell characters](https://onlinelinuxtools.com/escape-shell-characters). Running `echo $SNOW_INSTANCE_PWD` should print the correct password.
|
|
57
|
-
6. Log into your instance via a browser using the admin credentials. Close any popup that appears on the main screen (e.g., agreeing to analytics).
|
|
58
|
-
|
|
59
|
-
**Warning:** Feel free to look around the platform, but please make sure you revert any changes (e.g., changes to list views, pinning some menus, etc.) as these changes will be persistent and affect the benchmarking process.
|
|
60
|
-
|
|
61
|
-
### b) Install WorkArena and Initialize your Instance
|
|
52
|
+
### b) Install WorkArena
|
|
62
53
|
|
|
63
54
|
Run the following command to install WorkArena in the [BrowswerGym](https://github.com/servicenow/browsergym) environment:
|
|
64
55
|
```
|
|
65
|
-
pip install browsergym
|
|
56
|
+
pip install browsergym-workarena
|
|
66
57
|
```
|
|
67
58
|
|
|
68
59
|
Then, install [Playwright](https://github.com/microsoft/playwright):
|
|
@@ -70,10 +61,6 @@ Then, install [Playwright](https://github.com/microsoft/playwright):
|
|
|
70
61
|
playwright install
|
|
71
62
|
```
|
|
72
63
|
|
|
73
|
-
Finally, run this command in a terminal to upload the benchmark data to your ServiceNow instance:
|
|
74
|
-
```
|
|
75
|
-
workarena-install
|
|
76
|
-
```
|
|
77
64
|
Your installation is now complete! 🎉
|
|
78
65
|
|
|
79
66
|
|
{browsergym_workarena-0.4.4 → browsergym_workarena-0.5.1}/src/browsergym/workarena/config.py
RENAMED
|
@@ -11,6 +11,12 @@ SNOW_BROWSER_TIMEOUT = 30000 # Milliseconds
|
|
|
11
11
|
SNOW_JS_UTILS_FILEPATH = str(resources.files(utils).joinpath("js_utils.js"))
|
|
12
12
|
SNOW_SUPPORTED_RELEASES = ["washingtondc"]
|
|
13
13
|
|
|
14
|
+
# Hugging Face dataset containing available instances
|
|
15
|
+
INSTANCE_REPO_ID = "ServiceNow/WorkArena-Instances"
|
|
16
|
+
INSTANCE_REPO_FILENAME = "instances.json"
|
|
17
|
+
INSTANCE_REPO_TYPE = "dataset"
|
|
18
|
+
INSTANCE_XOR_SEED = "x3!+-9mi#nhlo%a02$9hna{]"
|
|
19
|
+
|
|
14
20
|
# Path to the Menu navigation task configuration
|
|
15
21
|
ALL_MENU_PATH = str(resources.files(data_files).joinpath("task_configs/all_menu.json"))
|
|
16
22
|
|