browsergym-workarena 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/.github/workflows/pypi.yml +3 -2
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/.github/workflows/unit_tests.yml +26 -24
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/PKG-INFO +27 -20
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/README.md +23 -16
- browsergym_workarena-0.3.0/dev/environment.yaml +13 -0
- browsergym_workarena-0.3.0/dev/requirements.txt +9 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/pyproject.toml +29 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/requirements.txt +2 -2
- browsergym_workarena-0.3.0/scripts/extract_finetuning_traces.py +131 -0
- browsergym_workarena-0.3.0/scripts/generate_knowledge_base.ipynb +1499 -0
- browsergym_workarena-0.3.0/scripts/make_human_eval_curriculum.py +54 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/__init__.py +13 -1
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/category.py +74 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/change_request.py +87 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/computer_asset.py +90 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/cost_center.py +19 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/expense_line.py +89 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/incident.py +45 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/knowledge.py +29 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/problem.py +90 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/report.py +183 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/api/requested_items.py +63 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/user.py +11 -8
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/utils.py +47 -3
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/config.py +21 -1
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +1 -1
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +1 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/knowledge/protocols.json +46 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/knowledge/test.html +1 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +12 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +12 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/all_menu.json +95 -95
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +7986 -7982
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/impersonation_users.json +3 -3
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -1
- browsergym_workarena-0.3.0/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/human_eval/console.js +176 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/human_eval/tool.py +366 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/install.py +81 -20
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/base.py +55 -20
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/comp_building_block.py +4 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/__init__.py +76 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/base.py +364 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_base.py +1366 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_catalog.py +1127 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +2047 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_incident.py +403 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +278 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_problem.py +336 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +235 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_filter.py +1600 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_request_item.py +1315 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +693 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/delete_record.py +341 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/edit_knowledge_base.py +457 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/expense_management.py +598 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/filter_and_do.py +139 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/find_and_order_item.py +345 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +1417 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +499 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/maximize_investment_return.py +1763 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/navigate_and_do.py +1151 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +2100 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/offboard_user.py +207 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/onboard_user.py +226 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/update_task.py +145 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/utils/curriculum.py +215 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +151 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/utils/knapsack.py +192 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/warranty_check.py +227 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/work_assignment.py +804 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/compositional/workload_balancing.py +396 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/dashboard.py +188 -8
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/form.py +1593 -0
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/knowledge.py +359 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/list.py +519 -102
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/mark_duplicate_problem.py +171 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/navigation.py +55 -13
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/extract_all_menu_items.py +9 -2
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +6 -5
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/service_catalog.py +2 -1
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/validate.py +8 -2
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/send_chat_message.py +90 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/service_catalog.py +94 -26
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/form.py +1 -4
- browsergym_workarena-0.3.0/src/browsergym/workarena/tasks/utils/private_tasks.py +63 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/utils.py +13 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_api.py +1 -0
- browsergym_workarena-0.3.0/tests/test_compositional_utils.py +92 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_random_config_generation.py +24 -23
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_task_from_config.py +37 -3
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_task_general.py +4 -9
- browsergym_workarena-0.2.0/scripts/generate_knowledge_base.ipynb +0 -1374
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +0 -34
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +0 -48
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +0 -53
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +0 -28
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +0 -29
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +0 -59
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +0 -1
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +0 -1
- browsergym_workarena-0.2.0/src/browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +0 -1
- browsergym_workarena-0.2.0/src/browsergym/workarena/tasks/form.py +0 -801
- browsergym_workarena-0.2.0/src/browsergym/workarena/tasks/knowledge.py +0 -168
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/.gitignore +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/LICENSE +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/__init__.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/requests.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/api/ui_themes.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_change_request_form_fields.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_hardware_form_fields.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_problem_form_fields.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/forms/expected_user_form_fields.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/knowledge/kb_autopublish_workflow.xml +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/knowledge/knowledge_base.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/setup_files/ui_themes/workarena_themes.xml +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_change_request_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_hardware_asset_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_incident_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_problem_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/create_user_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_asset_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_hardware_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_incident_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/filter_user_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/knowledge_base_configs.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_apple_mac_book_pro15_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_apple_watch_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_developer_laptop_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_development_laptop_pc_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_ipad_mini_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_ipad_pro_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_loaner_laptop_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_sales_laptop_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/order_standard_laptop_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_asset_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_change_request_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_hardware_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_incident_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_service_catalog_item_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/data_files/task_configs/sort_user_list_task.json +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/instance.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/__init__.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/README.md +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/generate_forms.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/knowledge.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/list.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/scripts/navigation.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/__init__.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/debug.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/js_utils.js +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/tasks/utils/string.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/src/browsergym/workarena/utils.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_snow_instance.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_task_setup.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_utils.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/test_validate.py +0 -0
- {browsergym_workarena-0.2.0 → browsergym_workarena-0.3.0}/tests/utils.py +0 -0
|
@@ -48,10 +48,11 @@ jobs:
|
|
|
48
48
|
uses: pypa/gh-action-pypi-publish@release/v1
|
|
49
49
|
|
|
50
50
|
github-release:
|
|
51
|
-
name: Sign with Sigstore and upload them to GitHub Release
|
|
51
|
+
name: Sign packages with Sigstore and upload them to GitHub Release
|
|
52
52
|
needs:
|
|
53
53
|
- publish-to-pypi
|
|
54
54
|
runs-on: ubuntu-latest
|
|
55
|
+
|
|
55
56
|
permissions:
|
|
56
57
|
contents: write # IMPORTANT: mandatory for making GitHub Releases
|
|
57
58
|
id-token: write # IMPORTANT: mandatory for sigstore
|
|
@@ -64,7 +65,7 @@ jobs:
|
|
|
64
65
|
path: dist/
|
|
65
66
|
|
|
66
67
|
- name: Sign the dists with Sigstore
|
|
67
|
-
uses: sigstore/gh-action-sigstore-python@
|
|
68
|
+
uses: sigstore/gh-action-sigstore-python@v2.1.1
|
|
68
69
|
with:
|
|
69
70
|
inputs: >-
|
|
70
71
|
./dist/*.tar.gz
|
|
@@ -34,38 +34,39 @@ jobs:
|
|
|
34
34
|
run: black . --check
|
|
35
35
|
|
|
36
36
|
browsergym-workarena-fast:
|
|
37
|
-
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
defaults:
|
|
40
|
+
run:
|
|
41
|
+
shell: bash -l {0}
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
steps:
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
- name: Checkout Repository
|
|
46
|
+
uses: actions/checkout@v4
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
48
|
+
- name: Set up Python
|
|
49
|
+
uses: actions/setup-python@v5
|
|
50
|
+
with:
|
|
51
|
+
python-version: '3.10'
|
|
52
|
+
cache: 'pip' # caching pip dependencies
|
|
53
53
|
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
- name: Pip install
|
|
55
|
+
working-directory: ./dev
|
|
56
|
+
run: pip install -r requirements.txt
|
|
56
57
|
|
|
57
|
-
|
|
58
|
-
|
|
58
|
+
- name: Pip list
|
|
59
|
+
run: pip list
|
|
59
60
|
|
|
60
|
-
|
|
61
|
-
|
|
61
|
+
- name: Install Playwright
|
|
62
|
+
run: playwright install --with-deps
|
|
62
63
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
64
|
+
- name: Run non-slow browsergym-workarena Unit Tests
|
|
65
|
+
env:
|
|
66
|
+
SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
|
|
67
|
+
SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
|
|
68
|
+
SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
|
|
69
|
+
run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests
|
|
69
70
|
|
|
70
71
|
browsergym-workarena-slow:
|
|
71
72
|
runs-on: ubuntu-latest
|
|
@@ -86,6 +87,7 @@ jobs:
|
|
|
86
87
|
cache: 'pip' # caching pip dependencies
|
|
87
88
|
|
|
88
89
|
- name: Pip install
|
|
90
|
+
working-directory: ./dev
|
|
89
91
|
run: pip install -r requirements.txt
|
|
90
92
|
|
|
91
93
|
- name: Pip list
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: browsergym-workarena
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: WorkArena benchmark for BrowserGym
|
|
5
5
|
Project-URL: homepage, https://github.com/ServiceNow/WorkArena
|
|
6
|
-
Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme
|
|
6
|
+
Author: Léo Boisvert, Alex Drouin, Maxime Gasse, Alex Lacoste, Manuel Del Verme, Megh Thakkar
|
|
7
7
|
License: Apache-2.0
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Classifier: Development Status :: 2 - Pre-Alpha
|
|
@@ -13,9 +13,9 @@ Classifier: Operating System :: OS Independent
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
15
|
Requires-Python: >3.7
|
|
16
|
-
Requires-Dist: browsergym-core
|
|
16
|
+
Requires-Dist: browsergym-core>=0.2
|
|
17
17
|
Requires-Dist: english-words>=2.0.1
|
|
18
|
-
Requires-Dist: faker>=24.
|
|
18
|
+
Requires-Dist: faker>=24.8.0
|
|
19
19
|
Requires-Dist: numpy>=1.14
|
|
20
20
|
Requires-Dist: requests>=2.31
|
|
21
21
|
Requires-Dist: tenacity>=8.2.3
|
|
@@ -34,12 +34,9 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
|
|
|
34
34
|
|
|
35
35
|
https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
|
|
36
36
|
|
|
37
|
-
## ⚠️ Pre-Release warning ⚠️
|
|
38
|
-
Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena v0.1.0 with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
|
|
39
|
-
|
|
40
37
|
## Benchmark Contents
|
|
41
38
|
|
|
42
|
-
At the moment, WorkArena includes `
|
|
39
|
+
At the moment, WorkArena includes `19,912` unique instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
|
|
43
40
|
|
|
44
41
|
### Knowledge Bases
|
|
45
42
|
|
|
@@ -75,6 +72,15 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
|
|
|
75
72
|
|
|
76
73
|
https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
|
|
77
74
|
|
|
75
|
+
### Dashboards
|
|
76
|
+
|
|
77
|
+
**Goal:** The agent must answer a question that requires reading charts and (optionally) performing simple reasoning over them.
|
|
78
|
+
|
|
79
|
+
*Note: For demonstration purposes, a human is controlling the cursor since this is a pure retrieval task*
|
|
80
|
+
|
|
81
|
+
https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
|
|
82
|
+
|
|
83
|
+
|
|
78
84
|
## Getting Started
|
|
79
85
|
|
|
80
86
|
To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
|
|
@@ -82,7 +88,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
|
|
|
82
88
|
### a) Create a ServiceNow Developer Instance
|
|
83
89
|
|
|
84
90
|
1. Go to https://developer.servicenow.com/ and create an account.
|
|
85
|
-
2. Click on `Request an instance` and select the `
|
|
91
|
+
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
86
92
|
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
87
93
|
4. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
88
94
|
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
@@ -116,6 +122,8 @@ Your installation is now complete! 🎉
|
|
|
116
122
|
|
|
117
123
|
Run this code to see WorkArena in action.
|
|
118
124
|
|
|
125
|
+
Note: the following example executes WorkArena's oracle (cheat) function to solve each task. To evaluate an agent, calls to `env.step()` must be used instead.
|
|
126
|
+
|
|
119
127
|
```python
|
|
120
128
|
import random
|
|
121
129
|
|
|
@@ -130,28 +138,27 @@ for task in ALL_WORKARENA_TASKS:
|
|
|
130
138
|
|
|
131
139
|
# Instantiate a new environment
|
|
132
140
|
env = BrowserEnv(task_entrypoint=task,
|
|
133
|
-
headless=False
|
|
134
|
-
slow_mo=1000)
|
|
141
|
+
headless=False)
|
|
135
142
|
env.reset()
|
|
136
143
|
|
|
137
144
|
# Cheat functions use Playwright to automatically solve the task
|
|
138
145
|
env.chat.add_message(role="assistant", msg="On it. Please wait...")
|
|
139
|
-
|
|
146
|
+
cheat_messages = []
|
|
147
|
+
env.task.cheat(env.page, cheat_messages)
|
|
148
|
+
|
|
149
|
+
# Send cheat messages to chat
|
|
150
|
+
for cheat_msg in cheat_messages:
|
|
151
|
+
env.chat.add_message(role=cheat_msg["role"], msg=cheat_msg["message"])
|
|
140
152
|
|
|
141
153
|
# Post solution to chat
|
|
142
|
-
|
|
143
|
-
answer = env.chat.messages[-1]["message"]
|
|
144
|
-
env.chat.add_message(role="assistant", msg=f"The answer is:")
|
|
145
|
-
env.chat.add_message(role="assistant", msg=answer)
|
|
146
|
-
else:
|
|
147
|
-
env.chat.add_message(role="assistant", msg="I'm done!")
|
|
154
|
+
env.chat.add_message(role="assistant", msg="I'm done!")
|
|
148
155
|
|
|
149
156
|
# Validate the solution
|
|
150
|
-
reward, stop,
|
|
157
|
+
reward, stop, message, info = env.task.validate(env.page, cheat_messages)
|
|
151
158
|
if reward == 1:
|
|
152
159
|
env.chat.add_message(role="user", msg="Yes, that works. Thanks!")
|
|
153
160
|
else:
|
|
154
|
-
env.chat.add_message(role="user", msg=f"No, that doesn't work. {
|
|
161
|
+
env.chat.add_message(role="user", msg=f"No, that doesn't work. {info.get('message', '')}")
|
|
155
162
|
|
|
156
163
|
sleep(3)
|
|
157
164
|
env.close()
|
|
@@ -10,12 +10,9 @@ WorkArena is included in [BrowserGym](https://github.com/ServiceNow/BrowserGym),
|
|
|
10
10
|
|
|
11
11
|
https://github.com/ServiceNow/WorkArena/assets/2374980/68640f09-7d6f-4eb1-b556-c294a6afef70
|
|
12
12
|
|
|
13
|
-
## ⚠️ Pre-Release warning ⚠️
|
|
14
|
-
Please note that the WorkArena benchmark is still undergoing minor bug fixes and updates, which may cause discrepancies with results reported in our latest arXiv preprint. We plan to release soon a stable version of WorkArena v0.1.0 with enhanced stability, and a final version v1.0.0 with a new suite of tasks.
|
|
15
|
-
|
|
16
13
|
## Benchmark Contents
|
|
17
14
|
|
|
18
|
-
At the moment, WorkArena includes `
|
|
15
|
+
At the moment, WorkArena includes `19,912` unique instances drawn from `33` tasks that cover the main components of the ServiceNow user interface. The following videos show an agent built on `GPT-4-vision` interacting with every such component. As emphasized by our results, this benchmark is not solved and thus, the performance of the agent is not always on point.
|
|
19
16
|
|
|
20
17
|
### Knowledge Bases
|
|
21
18
|
|
|
@@ -51,6 +48,15 @@ https://github.com/ServiceNow/WorkArena/assets/1726818/7538b3ef-d39b-4978-b9ea-8
|
|
|
51
48
|
|
|
52
49
|
https://github.com/ServiceNow/WorkArena/assets/1726818/ca26dfaf-2358-4418-855f-80e482435e6e
|
|
53
50
|
|
|
51
|
+
### Dashboards
|
|
52
|
+
|
|
53
|
+
**Goal:** The agent must answer a question that requires reading charts and (optionally) performing simple reasoning over them.
|
|
54
|
+
|
|
55
|
+
*Note: For demonstration purposes, a human is controlling the cursor since this is a pure retrieval task*
|
|
56
|
+
|
|
57
|
+
https://github.com/ServiceNow/WorkArena/assets/1726818/0023232c-081f-4be4-99bd-f60c766e6c3f
|
|
58
|
+
|
|
59
|
+
|
|
54
60
|
## Getting Started
|
|
55
61
|
|
|
56
62
|
To setup WorkArena, you will need to get your own ServiceNow instance, install our Python package, and upload some data to your instance. Follow the steps below to achieve this.
|
|
@@ -58,7 +64,7 @@ To setup WorkArena, you will need to get your own ServiceNow instance, install o
|
|
|
58
64
|
### a) Create a ServiceNow Developer Instance
|
|
59
65
|
|
|
60
66
|
1. Go to https://developer.servicenow.com/ and create an account.
|
|
61
|
-
2. Click on `Request an instance` and select the `
|
|
67
|
+
2. Click on `Request an instance` and select the `Washington` release (initializing the instance will take a few minutes)
|
|
62
68
|
3. Once the instance is ready, you should see your instance URL and credentials. If not, click _Return to the Developer Portal_, then navigate to _Manage instance password_ and click _Reset instance password_.
|
|
63
69
|
4. You should now see your URL and credentials. Based on this information, set the following environment variables:
|
|
64
70
|
* `SNOW_INSTANCE_URL`: The URL of your ServiceNow developer instance
|
|
@@ -92,6 +98,8 @@ Your installation is now complete! 🎉
|
|
|
92
98
|
|
|
93
99
|
Run this code to see WorkArena in action.
|
|
94
100
|
|
|
101
|
+
Note: the following example executes WorkArena's oracle (cheat) function to solve each task. To evaluate an agent, calls to `env.step()` must be used instead.
|
|
102
|
+
|
|
95
103
|
```python
|
|
96
104
|
import random
|
|
97
105
|
|
|
@@ -106,28 +114,27 @@ for task in ALL_WORKARENA_TASKS:
|
|
|
106
114
|
|
|
107
115
|
# Instantiate a new environment
|
|
108
116
|
env = BrowserEnv(task_entrypoint=task,
|
|
109
|
-
headless=False
|
|
110
|
-
slow_mo=1000)
|
|
117
|
+
headless=False)
|
|
111
118
|
env.reset()
|
|
112
119
|
|
|
113
120
|
# Cheat functions use Playwright to automatically solve the task
|
|
114
121
|
env.chat.add_message(role="assistant", msg="On it. Please wait...")
|
|
115
|
-
|
|
122
|
+
cheat_messages = []
|
|
123
|
+
env.task.cheat(env.page, cheat_messages)
|
|
124
|
+
|
|
125
|
+
# Send cheat messages to chat
|
|
126
|
+
for cheat_msg in cheat_messages:
|
|
127
|
+
env.chat.add_message(role=cheat_msg["role"], msg=cheat_msg["message"])
|
|
116
128
|
|
|
117
129
|
# Post solution to chat
|
|
118
|
-
|
|
119
|
-
answer = env.chat.messages[-1]["message"]
|
|
120
|
-
env.chat.add_message(role="assistant", msg=f"The answer is:")
|
|
121
|
-
env.chat.add_message(role="assistant", msg=answer)
|
|
122
|
-
else:
|
|
123
|
-
env.chat.add_message(role="assistant", msg="I'm done!")
|
|
130
|
+
env.chat.add_message(role="assistant", msg="I'm done!")
|
|
124
131
|
|
|
125
132
|
# Validate the solution
|
|
126
|
-
reward, stop,
|
|
133
|
+
reward, stop, message, info = env.task.validate(env.page, cheat_messages)
|
|
127
134
|
if reward == 1:
|
|
128
135
|
env.chat.add_message(role="user", msg="Yes, that works. Thanks!")
|
|
129
136
|
else:
|
|
130
|
-
env.chat.add_message(role="user", msg=f"No, that doesn't work. {
|
|
137
|
+
env.chat.add_message(role="user", msg=f"No, that doesn't work. {info.get('message', '')}")
|
|
131
138
|
|
|
132
139
|
sleep(3)
|
|
133
140
|
env.close()
|
|
@@ -11,6 +11,7 @@ authors = [
|
|
|
11
11
|
{name = "Maxime Gasse"},
|
|
12
12
|
{name = "Alex Lacoste"},
|
|
13
13
|
{name = "Manuel Del Verme"},
|
|
14
|
+
{name = "Megh Thakkar"},
|
|
14
15
|
]
|
|
15
16
|
readme = "README.md"
|
|
16
17
|
requires-python = ">3.7"
|
|
@@ -30,6 +31,7 @@ homepage = "https://github.com/ServiceNow/WorkArena"
|
|
|
30
31
|
|
|
31
32
|
[project.scripts]
|
|
32
33
|
workarena-install = "browsergym.workarena.install:main"
|
|
34
|
+
workarena-human-eval = "browsergym.workarena.human_eval.tool:main"
|
|
33
35
|
|
|
34
36
|
[tool.hatch.version]
|
|
35
37
|
path = "src/browsergym/workarena/__init__.py"
|
|
@@ -39,3 +41,30 @@ files = ["requirements.txt"]
|
|
|
39
41
|
|
|
40
42
|
[tool.hatch.build.targets.wheel]
|
|
41
43
|
packages = ["src/browsergym"]
|
|
44
|
+
|
|
45
|
+
[tool.black]
|
|
46
|
+
line-length = 100
|
|
47
|
+
include = '\.pyi?$'
|
|
48
|
+
exclude = '''
|
|
49
|
+
/(
|
|
50
|
+
\.eggs
|
|
51
|
+
| \.git
|
|
52
|
+
| \.hg
|
|
53
|
+
| \.mypy_cache
|
|
54
|
+
| \.nox
|
|
55
|
+
| \.tox
|
|
56
|
+
| \.venv
|
|
57
|
+
| _build
|
|
58
|
+
| buck-out
|
|
59
|
+
| build
|
|
60
|
+
| dist
|
|
61
|
+
)/
|
|
62
|
+
'''
|
|
63
|
+
|
|
64
|
+
[tool.pytest.ini_options]
|
|
65
|
+
filterwarnings = [
|
|
66
|
+
'ignore::UserWarning:gymnasium.*:', # too many "The obs is not within the observation space." warnings.
|
|
67
|
+
]
|
|
68
|
+
markers = [
|
|
69
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
70
|
+
]
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A demonstration of how observation/action traces can be extracted
|
|
3
|
+
for WorkArena tasks without modifying the task code.
|
|
4
|
+
|
|
5
|
+
Author: Alexandre Drouin (alexandre.drouin@servicenow.com)
|
|
6
|
+
|
|
7
|
+
Notes:
|
|
8
|
+
- This approach relies on monkey patching the playwright actions to log the actions and observations.
|
|
9
|
+
It has not been tested for parallel execution. It might work with multiprocessing, but it will for
|
|
10
|
+
sure not work with multithreading.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import importlib
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import pickle
|
|
18
|
+
import playwright.sync_api as playwright_sync
|
|
19
|
+
|
|
20
|
+
from browsergym.core.env import BrowserEnv
|
|
21
|
+
from browsergym.workarena import ALL_WORKARENA_TASKS
|
|
22
|
+
from collections import defaultdict
|
|
23
|
+
from tenacity import retry, stop_after_attempt, wait_fixed
|
|
24
|
+
from time import time
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
N_PER_TASK = 10
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def monkey_patch_playwright(observation_callback, trace_storage):
|
|
31
|
+
"""
|
|
32
|
+
A function that overrides the default playwright actions to log the actions and observations.
|
|
33
|
+
|
|
34
|
+
Parameters:
|
|
35
|
+
------------
|
|
36
|
+
observation_callback: callable
|
|
37
|
+
A function that returns the observation of the environment.
|
|
38
|
+
trace_storage: list
|
|
39
|
+
A list to store the trace of the actions and observations.
|
|
40
|
+
These will be appended in-place.
|
|
41
|
+
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def wrapper(func, interface):
|
|
45
|
+
def wrapped(*args, **kwargs):
|
|
46
|
+
# Get the observation
|
|
47
|
+
obs = observation_callback()
|
|
48
|
+
|
|
49
|
+
# Get the BID of the element on which we are acting.
|
|
50
|
+
if interface.__name__ == "Locator":
|
|
51
|
+
# Get the locator
|
|
52
|
+
locator = args[0]
|
|
53
|
+
# Get the BID
|
|
54
|
+
bid = locator.element_handle().evaluate('(el) => el.getAttribute("bid")')
|
|
55
|
+
elif interface.__name__ == "Keyboard":
|
|
56
|
+
# Get the BID of the element
|
|
57
|
+
bid = "keyboard"
|
|
58
|
+
else:
|
|
59
|
+
# Get the BID of the element
|
|
60
|
+
bid = args[0].evaluate('(el) => el.getAttribute("bid")')
|
|
61
|
+
|
|
62
|
+
logging.info(f"Action: {func.__name__} BID: {bid} -- Args: {args[1:]} {kwargs}")
|
|
63
|
+
trace_storage.append(
|
|
64
|
+
{
|
|
65
|
+
"obs": obs,
|
|
66
|
+
"action": func.__name__,
|
|
67
|
+
"args": args[1:],
|
|
68
|
+
"kwargs": kwargs,
|
|
69
|
+
"bid": bid,
|
|
70
|
+
"time": time(),
|
|
71
|
+
}
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Resume action
|
|
75
|
+
return func(*args, **kwargs)
|
|
76
|
+
|
|
77
|
+
return wrapped
|
|
78
|
+
|
|
79
|
+
# Interfaces and actions we want to monkey patch
|
|
80
|
+
importlib.reload(playwright_sync)
|
|
81
|
+
from playwright.sync_api import Page, Frame, Locator, Keyboard, ElementHandle
|
|
82
|
+
|
|
83
|
+
# TODO: Make sure the list of interfaces and actions is exhaustive
|
|
84
|
+
# It covers all that is used in WorkArena cheats as of April 11, 2024
|
|
85
|
+
interfaces = [Page, Frame, Locator, Keyboard, ElementHandle]
|
|
86
|
+
actions = ["click", "select_option", "set_checked", "fill", "press", "type", "down", "up"]
|
|
87
|
+
|
|
88
|
+
for interface in interfaces:
|
|
89
|
+
for action in actions:
|
|
90
|
+
if hasattr(interface, action):
|
|
91
|
+
setattr(interface, action, wrapper(getattr(interface, action), interface))
|
|
92
|
+
print(f"Monkey patched {interface.__name__}.{action}")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
|
96
|
+
def extract_trace(task_cls, headless=True):
|
|
97
|
+
"""
|
|
98
|
+
Extracts the trace of actions and observations for a given task.
|
|
99
|
+
|
|
100
|
+
Parameters:
|
|
101
|
+
------------
|
|
102
|
+
task_cls: class
|
|
103
|
+
The class of the task to extract the trace from.
|
|
104
|
+
|
|
105
|
+
"""
|
|
106
|
+
# Instantiate a new environment
|
|
107
|
+
env = BrowserEnv(task_entrypoint=task_cls, headless=headless, slow_mo=1000)
|
|
108
|
+
|
|
109
|
+
# Setup customized tracing
|
|
110
|
+
trace = []
|
|
111
|
+
monkey_patch_playwright(observation_callback=env._get_obs, trace_storage=trace)
|
|
112
|
+
|
|
113
|
+
env.reset()
|
|
114
|
+
env.task.cheat(env.page, env.chat.messages)
|
|
115
|
+
env.close()
|
|
116
|
+
|
|
117
|
+
return trace
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
if __name__ == "__main__":
|
|
121
|
+
os.makedirs("trace_profiling", exist_ok=True)
|
|
122
|
+
|
|
123
|
+
task_traces = defaultdict(list)
|
|
124
|
+
for task in ALL_WORKARENA_TASKS:
|
|
125
|
+
print("Task:", task)
|
|
126
|
+
for i in range(N_PER_TASK):
|
|
127
|
+
print(f"Extracting trace {i+1}/{N_PER_TASK}")
|
|
128
|
+
trace = extract_trace(task, headless=True)
|
|
129
|
+
task_traces[task].append(trace)
|
|
130
|
+
|
|
131
|
+
pickle.dump(task_traces, open("trace_profiling/task_traces.pkl", "wb"))
|