browsergym-workarena 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browsergym/workarena/__init__.py +13 -1
- browsergym/workarena/api/category.py +74 -0
- browsergym/workarena/api/change_request.py +87 -0
- browsergym/workarena/api/computer_asset.py +90 -0
- browsergym/workarena/api/cost_center.py +19 -0
- browsergym/workarena/api/expense_line.py +89 -0
- browsergym/workarena/api/incident.py +45 -0
- browsergym/workarena/api/knowledge.py +29 -0
- browsergym/workarena/api/problem.py +90 -0
- browsergym/workarena/api/report.py +183 -0
- browsergym/workarena/api/requested_items.py +63 -0
- browsergym/workarena/api/user.py +11 -8
- browsergym/workarena/api/utils.py +47 -3
- browsergym/workarena/config.py +21 -1
- browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +1 -1
- browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +1 -0
- browsergym/workarena/data_files/setup_files/knowledge/protocols.json +46 -0
- browsergym/workarena/data_files/setup_files/knowledge/test.html +1 -0
- browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +2 -24
- browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +4 -40
- browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +12 -0
- browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +1 -42
- browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +2 -18
- browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +12 -0
- browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +12 -0
- browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +2 -19
- browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +3 -50
- browsergym/workarena/data_files/task_configs/all_menu.json +95 -95
- browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -1
- browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -1
- browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +7986 -7982
- browsergym/workarena/data_files/task_configs/impersonation_users.json +3 -3
- browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -1
- browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -1
- browsergym/workarena/human_eval/console.js +176 -0
- browsergym/workarena/human_eval/tool.py +366 -0
- browsergym/workarena/install.py +81 -20
- browsergym/workarena/tasks/base.py +55 -20
- browsergym/workarena/tasks/comp_building_block.py +4 -0
- browsergym/workarena/tasks/compositional/__init__.py +76 -0
- browsergym/workarena/tasks/compositional/base.py +364 -0
- browsergym/workarena/tasks/compositional/dash_do_base.py +1366 -0
- browsergym/workarena/tasks/compositional/dash_do_catalog.py +1127 -0
- browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +2047 -0
- browsergym/workarena/tasks/compositional/dash_do_create_incident.py +403 -0
- browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +278 -0
- browsergym/workarena/tasks/compositional/dash_do_create_problem.py +336 -0
- browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +235 -0
- browsergym/workarena/tasks/compositional/dash_do_filter.py +1600 -0
- browsergym/workarena/tasks/compositional/dash_do_request_item.py +1315 -0
- browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +693 -0
- browsergym/workarena/tasks/compositional/delete_record.py +341 -0
- browsergym/workarena/tasks/compositional/edit_knowledge_base.py +457 -0
- browsergym/workarena/tasks/compositional/expense_management.py +598 -0
- browsergym/workarena/tasks/compositional/filter_and_do.py +139 -0
- browsergym/workarena/tasks/compositional/find_and_order_item.py +345 -0
- browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +1417 -0
- browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +499 -0
- browsergym/workarena/tasks/compositional/maximize_investment_return.py +1763 -0
- browsergym/workarena/tasks/compositional/navigate_and_do.py +1151 -0
- browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +2100 -0
- browsergym/workarena/tasks/compositional/offboard_user.py +207 -0
- browsergym/workarena/tasks/compositional/onboard_user.py +226 -0
- browsergym/workarena/tasks/compositional/update_task.py +145 -0
- browsergym/workarena/tasks/compositional/utils/curriculum.py +215 -0
- browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +151 -0
- browsergym/workarena/tasks/compositional/utils/knapsack.py +192 -0
- browsergym/workarena/tasks/compositional/warranty_check.py +227 -0
- browsergym/workarena/tasks/compositional/work_assignment.py +804 -0
- browsergym/workarena/tasks/compositional/workload_balancing.py +396 -0
- browsergym/workarena/tasks/dashboard.py +188 -8
- browsergym/workarena/tasks/form.py +1024 -232
- browsergym/workarena/tasks/knowledge.py +216 -25
- browsergym/workarena/tasks/list.py +519 -102
- browsergym/workarena/tasks/mark_duplicate_problem.py +171 -0
- browsergym/workarena/tasks/navigation.py +55 -13
- browsergym/workarena/tasks/scripts/extract_all_menu_items.py +9 -2
- browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +6 -5
- browsergym/workarena/tasks/scripts/service_catalog.py +2 -1
- browsergym/workarena/tasks/scripts/validate.py +8 -2
- browsergym/workarena/tasks/send_chat_message.py +90 -0
- browsergym/workarena/tasks/service_catalog.py +94 -26
- browsergym/workarena/tasks/utils/form.py +1 -4
- browsergym/workarena/tasks/utils/private_tasks.py +63 -0
- browsergym/workarena/tasks/utils/utils.py +13 -0
- {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/METADATA +27 -20
- browsergym_workarena-0.3.0.dist-info/RECORD +138 -0
- {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/entry_points.txt +1 -0
- browsergym_workarena-0.2.0.dist-info/RECORD +0 -85
- {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/WHEEL +0 -0
- {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from playwright.sync_api import Page
|
|
4
|
+
from typing import Tuple
|
|
5
|
+
|
|
6
|
+
from .base import AbstractServiceNowTask
|
|
7
|
+
from .comp_building_block import CompositionalBuildingBlockTask
|
|
8
|
+
|
|
9
|
+
from ..api.utils import table_api_call
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SetProblemAsDuplicateTask(AbstractServiceNowTask, CompositionalBuildingBlockTask):
|
|
13
|
+
"""
|
|
14
|
+
Set a problem as duplicate, assuming we start on the problems list view.
|
|
15
|
+
|
|
16
|
+
Parameters:
|
|
17
|
+
-----------
|
|
18
|
+
instance: SNowInstance
|
|
19
|
+
The instance to use.
|
|
20
|
+
start_rel_url: str
|
|
21
|
+
The relative URL of the task list.
|
|
22
|
+
fixed_config: dict
|
|
23
|
+
Configuration to use for the task. If provided, the task will use the provided configuration instead of
|
|
24
|
+
selecting a random one. See browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json
|
|
25
|
+
for an example of a configuration file.
|
|
26
|
+
respect_problem_ordering: bool
|
|
27
|
+
Whether to respect the ordering of the problems in the list. If True, the task will pick the first problem in the
|
|
28
|
+
list as the target problem. If False, the task validation will check if any problem is a duplicate of the other.
|
|
29
|
+
add_comment: bool
|
|
30
|
+
Whether or not to add comment to the duplicated task. If set to True, will add "Duplicate" as the problem description
|
|
31
|
+
goal_version: str
|
|
32
|
+
choice of "base", "priority", "high_priority". Adjusts the goal to the task setting for L2
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
seed: int = None,
|
|
38
|
+
instance=None,
|
|
39
|
+
start_rel_url="/now/nav/ui/classic/params/target/problem_list.do",
|
|
40
|
+
fixed_config: dict = None,
|
|
41
|
+
respect_problem_ordering: bool = False,
|
|
42
|
+
add_comment: bool = False,
|
|
43
|
+
goal_version: str = "base",
|
|
44
|
+
level: int = None,
|
|
45
|
+
**kwargs,
|
|
46
|
+
) -> None:
|
|
47
|
+
super().__init__(seed=seed, instance=instance, start_rel_url=start_rel_url)
|
|
48
|
+
self.fixed_config = fixed_config
|
|
49
|
+
self.config = fixed_config
|
|
50
|
+
|
|
51
|
+
self.problem_sys_id = None
|
|
52
|
+
self.respect_problem_ordering = respect_problem_ordering
|
|
53
|
+
self.add_comment = add_comment
|
|
54
|
+
self.goal_version = goal_version
|
|
55
|
+
self.level = level
|
|
56
|
+
self.__dict__.update(kwargs)
|
|
57
|
+
|
|
58
|
+
def setup_goal(self, page: Page) -> tuple[str, dict]:
|
|
59
|
+
self.target_problem = self.fixed_config["target_problem"]
|
|
60
|
+
self.source_problem = self.fixed_config["source_problem"]
|
|
61
|
+
|
|
62
|
+
goal = self.get_pretty_printed_description()
|
|
63
|
+
|
|
64
|
+
return goal, {}
|
|
65
|
+
|
|
66
|
+
def get_pretty_printed_description(self) -> str:
|
|
67
|
+
"""
|
|
68
|
+
Get the task info for this task when used in a private task; Used in L2 compositional tasks.
|
|
69
|
+
called by subclasses
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
if self.level == 3:
|
|
73
|
+
task_info = " "
|
|
74
|
+
elif self.goal_version == "base":
|
|
75
|
+
task_info = "Mark problems with duplicated problem statements as such. You can mark any as duplicate of the other."
|
|
76
|
+
elif self.goal_version == "priority":
|
|
77
|
+
task_info = "Among the problems with duplicated problem statements, mark the lower priority one as duplicate of the higher priority one"
|
|
78
|
+
elif self.goal_version == "high priority":
|
|
79
|
+
task_info = "Among the problems with duplicated problem statements, mark any as duplicate of the other. Change the description of the problem marked as duplicate to 'duplicate'."
|
|
80
|
+
|
|
81
|
+
return task_info
|
|
82
|
+
|
|
83
|
+
def cheat(self, page: Page, chat_messages: list[str]) -> None:
|
|
84
|
+
super().cheat(page, chat_messages)
|
|
85
|
+
target_problem_number = self.target_problem["number"]
|
|
86
|
+
|
|
87
|
+
frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame()
|
|
88
|
+
# Search for the private task by search for the number
|
|
89
|
+
frame.wait_for_selector(f"[aria-label='Preview record: {target_problem_number}']").click()
|
|
90
|
+
page.wait_for_timeout(1500)
|
|
91
|
+
# Click on the private task to open it
|
|
92
|
+
frame.get_by_text("Open Record").click()
|
|
93
|
+
page.wait_for_timeout(2000)
|
|
94
|
+
page.wait_for_load_state("networkidle")
|
|
95
|
+
frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame()
|
|
96
|
+
page.wait_for_timeout(1500)
|
|
97
|
+
# Open the duplicate mode
|
|
98
|
+
frame.get_by_text("Mark Duplicate").first.click()
|
|
99
|
+
page.wait_for_timeout(1000)
|
|
100
|
+
# Close the pop-up to edit the duplicate problem in the same window
|
|
101
|
+
frame.get_by_text("Close").last.click()
|
|
102
|
+
frame.locator('[aria-labelledby="label.problem.duplicate_of"]').fill(
|
|
103
|
+
self.source_problem["number"]
|
|
104
|
+
)
|
|
105
|
+
page.keyboard.press("Enter")
|
|
106
|
+
page.wait_for_timeout(1000)
|
|
107
|
+
if self.add_comment:
|
|
108
|
+
frame.locator('[id="problem.description"]').fill("Duplicate")
|
|
109
|
+
|
|
110
|
+
frame.get_by_text("update").first.click()
|
|
111
|
+
|
|
112
|
+
def validate(self, page: Page, chat_messages: list[str]) -> Tuple[float, bool, str, dict]:
|
|
113
|
+
"""
|
|
114
|
+
Validate the solution
|
|
115
|
+
"""
|
|
116
|
+
target_problem_record = table_api_call(
|
|
117
|
+
instance=self.instance,
|
|
118
|
+
table="problem",
|
|
119
|
+
params={"sysparm_query": f"number={self.target_problem['number']}"},
|
|
120
|
+
)["result"]
|
|
121
|
+
source_problem_record = table_api_call(
|
|
122
|
+
instance=self.instance,
|
|
123
|
+
table="problem",
|
|
124
|
+
params={"sysparm_query": f"number={self.source_problem['number']}"},
|
|
125
|
+
)["result"]
|
|
126
|
+
# If the ordering can be anything, we check both problems
|
|
127
|
+
problem_found = source_problem_record and target_problem_record
|
|
128
|
+
|
|
129
|
+
if not problem_found:
|
|
130
|
+
return 0, False, "", {"message": "Problem not found in DB."}
|
|
131
|
+
|
|
132
|
+
# if the duplicate value is not set, the field will be an empty string; otherwise it will be a dict
|
|
133
|
+
target_duplicate_value = target_problem_record[0]["duplicate_of"]
|
|
134
|
+
if target_duplicate_value:
|
|
135
|
+
target_duplicate_value = target_duplicate_value["value"]
|
|
136
|
+
|
|
137
|
+
target_is_duplicate = target_duplicate_value == source_problem_record[0]["sys_id"]
|
|
138
|
+
if self.respect_problem_ordering:
|
|
139
|
+
problem_marked_as_duplicate = target_is_duplicate
|
|
140
|
+
else:
|
|
141
|
+
source_duplicate_value = source_problem_record[0]["duplicate_of"]
|
|
142
|
+
if source_duplicate_value:
|
|
143
|
+
source_duplicate_value = source_duplicate_value["value"]
|
|
144
|
+
source_is_duplicate = source_duplicate_value == target_problem_record[0]["sys_id"]
|
|
145
|
+
problem_marked_as_duplicate = target_is_duplicate or source_is_duplicate
|
|
146
|
+
|
|
147
|
+
if self.add_comment:
|
|
148
|
+
comment_added = (
|
|
149
|
+
target_problem_record[0]["description"].lower() == "duplicate"
|
|
150
|
+
and target_is_duplicate
|
|
151
|
+
)
|
|
152
|
+
if not self.respect_problem_ordering:
|
|
153
|
+
comment_added = comment_added or (
|
|
154
|
+
source_problem_record[0]["description"].lower() == "duplicate"
|
|
155
|
+
and source_is_duplicate
|
|
156
|
+
)
|
|
157
|
+
if not comment_added:
|
|
158
|
+
return 0, False, "", {"message": "Comment not added."}
|
|
159
|
+
|
|
160
|
+
if not problem_marked_as_duplicate:
|
|
161
|
+
return 0, False, "", {"message": "Problem not marked as duplicate."}
|
|
162
|
+
|
|
163
|
+
return (
|
|
164
|
+
1,
|
|
165
|
+
True,
|
|
166
|
+
"Nice work, thank you!",
|
|
167
|
+
{"message": "Problem task was closed as duplicate."},
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
__TASKS__ = [SetProblemAsDuplicateTask]
|
|
@@ -3,13 +3,14 @@ Tasks related to basic menu navigation.
|
|
|
3
3
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
import json
|
|
6
7
|
import playwright.sync_api
|
|
8
|
+
import re
|
|
7
9
|
|
|
8
10
|
from importlib import resources
|
|
9
|
-
import json
|
|
10
11
|
from playwright.sync_api import Page
|
|
11
|
-
from urllib
|
|
12
|
-
from typing import Tuple
|
|
12
|
+
from urllib import parse
|
|
13
|
+
from typing import List, Tuple
|
|
13
14
|
|
|
14
15
|
from ..api.utils import table_api_call
|
|
15
16
|
from .base import AbstractServiceNowTask
|
|
@@ -34,11 +35,14 @@ class AllMenuTask(AbstractServiceNowTask):
|
|
|
34
35
|
|
|
35
36
|
"""
|
|
36
37
|
|
|
37
|
-
def __init__(
|
|
38
|
+
def __init__(
|
|
39
|
+
self, seed: int = None, instance: SNowInstance = None, fixed_config: dict = None, **kwargs
|
|
40
|
+
) -> None:
|
|
38
41
|
super().__init__(seed=seed, instance=instance, start_rel_url="/now/nav/ui/home")
|
|
39
42
|
self.fixed_config = fixed_config
|
|
40
43
|
with open(ALL_MENU_PATH, "r") as f:
|
|
41
44
|
self.all_configs = json.load(f)
|
|
45
|
+
self.__dict__.update(kwargs)
|
|
42
46
|
|
|
43
47
|
def setup_goal(self, page: Page) -> tuple[str, dict]:
|
|
44
48
|
super().setup_goal(page=page)
|
|
@@ -47,7 +51,9 @@ class AllMenuTask(AbstractServiceNowTask):
|
|
|
47
51
|
self.module = (
|
|
48
52
|
self.fixed_config if self.fixed_config else self.random.choice(self.all_configs)
|
|
49
53
|
)
|
|
50
|
-
|
|
54
|
+
|
|
55
|
+
# When menu tasks do not need to be validated, the URL can be omitted from their config
|
|
56
|
+
self.final_url = self.instance.snow_url + self.module.get("url", "")
|
|
51
57
|
|
|
52
58
|
# Generate goal
|
|
53
59
|
goal = f'Navigate to the "{self.module["module"]}" module of the "{self.module["application"]}" application.'
|
|
@@ -55,9 +61,19 @@ class AllMenuTask(AbstractServiceNowTask):
|
|
|
55
61
|
|
|
56
62
|
return goal, info
|
|
57
63
|
|
|
64
|
+
def get_pretty_printed_description(self) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Get the task info for this task when used in a private task; Used in L3 compositional tasks.
|
|
67
|
+
called by subclasses
|
|
68
|
+
"""
|
|
69
|
+
task_info = f'- Navigate to the "{self.module["module"]}" module of the "{self.module["application"]}" application.'
|
|
70
|
+
|
|
71
|
+
return task_info
|
|
72
|
+
|
|
58
73
|
def cheat(self, page: Page, chat_messages: list[str]) -> None:
|
|
59
74
|
super().cheat(page=page, chat_messages=chat_messages)
|
|
60
|
-
|
|
75
|
+
# gsft_main remains undefined on the landing page; we have to wait for the network to be idle instead.
|
|
76
|
+
page.wait_for_load_state("networkidle")
|
|
61
77
|
menu_button = page.locator('div[aria-label="All"]')
|
|
62
78
|
if menu_button.get_attribute("aria-expanded").lower() != "true":
|
|
63
79
|
menu_button.click()
|
|
@@ -100,7 +116,7 @@ class AllMenuTask(AbstractServiceNowTask):
|
|
|
100
116
|
# In some cases, like System Scheduler > Scheduled Jobs > Scheduled Jobs, modules are repeated in the path
|
|
101
117
|
# This causes problems when clicking. Therefore, we pick the last item
|
|
102
118
|
if menu_item.count() > 1:
|
|
103
|
-
menu_item = menu_item.
|
|
119
|
+
menu_item = menu_item.first
|
|
104
120
|
with page.expect_navigation():
|
|
105
121
|
menu_item.click()
|
|
106
122
|
page.wait_for_timeout(2000)
|
|
@@ -111,11 +127,18 @@ class AllMenuTask(AbstractServiceNowTask):
|
|
|
111
127
|
page.wait_for_load_state("domcontentloaded")
|
|
112
128
|
|
|
113
129
|
# Get the current URL and the final URL
|
|
114
|
-
current_url = urlunparse(
|
|
115
|
-
|
|
130
|
+
current_url = parse.urlunparse(
|
|
131
|
+
parse.urlparse(parse.unquote(page.evaluate("() => window.location.href")))
|
|
132
|
+
)
|
|
133
|
+
final_url = parse.urlunparse(parse.urlparse(parse.unquote(self.final_url)))
|
|
116
134
|
|
|
117
135
|
if final_url == current_url:
|
|
118
|
-
return
|
|
136
|
+
return (
|
|
137
|
+
1,
|
|
138
|
+
True,
|
|
139
|
+
"Nice work, thank you!",
|
|
140
|
+
{"message": "Correct module reached."},
|
|
141
|
+
)
|
|
119
142
|
|
|
120
143
|
return 0, False, "", {"message": "Not at expected URL."}
|
|
121
144
|
|
|
@@ -139,11 +162,14 @@ class ImpersonationTask(AbstractServiceNowTask):
|
|
|
139
162
|
|
|
140
163
|
"""
|
|
141
164
|
|
|
142
|
-
def __init__(
|
|
165
|
+
def __init__(
|
|
166
|
+
self, seed: int = None, instance=None, fixed_config: dict = None, **kwargs
|
|
167
|
+
) -> None:
|
|
143
168
|
super().__init__(seed=seed, instance=instance, start_rel_url="/now/nav/ui/home")
|
|
144
169
|
self.fixed_config = fixed_config
|
|
145
170
|
with open(IMPERSONATION_CONFIG_PATH, "r") as f:
|
|
146
171
|
self.all_configs = json.load(f)
|
|
172
|
+
self.__dict__.update(kwargs)
|
|
147
173
|
|
|
148
174
|
def setup_goal(self, page: Page) -> tuple[str, dict]:
|
|
149
175
|
super().setup_goal(page=page)
|
|
@@ -160,6 +186,15 @@ class ImpersonationTask(AbstractServiceNowTask):
|
|
|
160
186
|
|
|
161
187
|
return goal, info
|
|
162
188
|
|
|
189
|
+
def get_pretty_printed_description(self) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Get the task info for this task when used in a private task; Used in L3 compositional tasks.
|
|
192
|
+
called by subclasses
|
|
193
|
+
"""
|
|
194
|
+
task_info = f"- Impersonate the user {self.user_full_name} \n"
|
|
195
|
+
|
|
196
|
+
return task_info
|
|
197
|
+
|
|
163
198
|
def cheat(self, page: Page, chat_messages: list[str]) -> None:
|
|
164
199
|
super().cheat(page=page, chat_messages=chat_messages)
|
|
165
200
|
impersonate_user(self.user_full_name, page)
|
|
@@ -167,7 +202,9 @@ class ImpersonationTask(AbstractServiceNowTask):
|
|
|
167
202
|
def validate(
|
|
168
203
|
self, page: playwright.sync_api.Page, chat_messages: list[str]
|
|
169
204
|
) -> Tuple[float, bool, str, dict]:
|
|
170
|
-
|
|
205
|
+
page.wait_for_function("window.NOW && window.NOW.user")
|
|
206
|
+
|
|
207
|
+
user_info = page.evaluate("window.NOW")["user"]
|
|
171
208
|
|
|
172
209
|
# If the current user is not being impersonated, fail.
|
|
173
210
|
if not user_info["isImpersonating"]:
|
|
@@ -185,7 +222,12 @@ class ImpersonationTask(AbstractServiceNowTask):
|
|
|
185
222
|
|
|
186
223
|
# If the name matches, success.
|
|
187
224
|
if user_fullname == self.user_full_name:
|
|
188
|
-
return
|
|
225
|
+
return (
|
|
226
|
+
1,
|
|
227
|
+
True,
|
|
228
|
+
"Nice work, thank you!",
|
|
229
|
+
{"message": "Correct user impersonated."},
|
|
230
|
+
)
|
|
189
231
|
|
|
190
232
|
# Otherwise, fail.
|
|
191
233
|
return 0, False, "", {"message": "Currently impersonating the wrong user."}
|
|
@@ -72,7 +72,10 @@ if __name__ == "__main__":
|
|
|
72
72
|
expand_and_gather_paths(page, nested_parent_selector, new_path)
|
|
73
73
|
|
|
74
74
|
if not collapsible_lists:
|
|
75
|
-
current_path_item = {
|
|
75
|
+
current_path_item = {
|
|
76
|
+
"path": current_path.copy(),
|
|
77
|
+
"selector": parent_selector,
|
|
78
|
+
}
|
|
76
79
|
base_paths.append(current_path_item)
|
|
77
80
|
|
|
78
81
|
def expand_menu():
|
|
@@ -190,7 +193,11 @@ if __name__ == "__main__":
|
|
|
190
193
|
45:
|
|
191
194
|
] # get only the end of the url
|
|
192
195
|
if url not in urls:
|
|
193
|
-
menu_task = {
|
|
196
|
+
menu_task = {
|
|
197
|
+
"application": application,
|
|
198
|
+
"module": module,
|
|
199
|
+
"url": url,
|
|
200
|
+
}
|
|
194
201
|
all_menu_items.append(menu_task)
|
|
195
202
|
urls[url] = True
|
|
196
203
|
|
|
@@ -28,7 +28,7 @@ from browsergym.workarena.tasks.dashboard import DashboardRetrievalTask
|
|
|
28
28
|
|
|
29
29
|
N_CPU = 20
|
|
30
30
|
MAX_CONFIGS = 1000
|
|
31
|
-
REPORT =
|
|
31
|
+
REPORT = False # Set to True for reports, False for dashboards
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class DummyDashboard(DashboardRetrievalTask):
|
|
@@ -102,10 +102,10 @@ def get_dashboard_urls(instance):
|
|
|
102
102
|
"18b1f472533130104c90ddeeff7b12a6", # Incident overview
|
|
103
103
|
"287d07d1ff3130106c1ef9a7cddcbd5d", # Request overview
|
|
104
104
|
"7ab78953eb32011008f2951ff15228e6", # Service catalog overview
|
|
105
|
-
"2d297c880f1130101527008c07767e27", # Survey overview
|
|
105
|
+
# "2d297c880f1130101527008c07767e27", # Survey overview (almost empty post deleting reports that rely on time)
|
|
106
106
|
"6b706f448f231110953ddffc9071a4f3", # Telemetry - Table growth
|
|
107
|
-
"15c5d2d377213010a435478c4f5a993c", # Usage overview
|
|
108
|
-
"85a57f9677100110ba155631dc5a9905", # Web api usage overview
|
|
107
|
+
# "15c5d2d377213010a435478c4f5a993c", # Usage overview
|
|
108
|
+
# "85a57f9677100110ba155631dc5a9905", # Web api usage overview (empty post deleting reports that rely on time)
|
|
109
109
|
"c38ca3a273031010ae8dd21efaf6a747", # Data classification
|
|
110
110
|
"3d48f669538223008329ddeeff7b1253", # Problem overview
|
|
111
111
|
]
|
|
@@ -131,6 +131,7 @@ def get_all_configs_by_url(url, is_report):
|
|
|
131
131
|
"chart_series": "",
|
|
132
132
|
"question": "max",
|
|
133
133
|
},
|
|
134
|
+
seed=0,
|
|
134
135
|
)
|
|
135
136
|
task.setup(page=page)
|
|
136
137
|
|
|
@@ -196,7 +197,7 @@ def get_all_configs_by_url(url, is_report):
|
|
|
196
197
|
)
|
|
197
198
|
except Exception as e:
|
|
198
199
|
print("Exception in worker", url, chart_title, e)
|
|
199
|
-
|
|
200
|
+
continue # Skip this chart
|
|
200
201
|
|
|
201
202
|
if len(questions) == 0:
|
|
202
203
|
return []
|
|
@@ -65,7 +65,8 @@ def generate_configs_for_all_items():
|
|
|
65
65
|
"w",
|
|
66
66
|
) as f:
|
|
67
67
|
all_configs_for_a_single_item = sorted(
|
|
68
|
-
all_configs_for_a_single_item,
|
|
68
|
+
all_configs_for_a_single_item,
|
|
69
|
+
key=lambda x: x["item"] + str(x["quantity"]),
|
|
69
70
|
)
|
|
70
71
|
json.dump(all_configs_for_a_single_item, f, indent=4, sort_keys=True)
|
|
71
72
|
|
|
@@ -156,7 +156,11 @@ def validate_on_page(task_class, task_config, page):
|
|
|
156
156
|
|
|
157
157
|
|
|
158
158
|
def validate_configs(
|
|
159
|
-
task_class,
|
|
159
|
+
task_class,
|
|
160
|
+
config_path,
|
|
161
|
+
num_tasks: int = None,
|
|
162
|
+
save_failed_tasks: bool = True,
|
|
163
|
+
page=None,
|
|
160
164
|
) -> list[dict]:
|
|
161
165
|
"""Validate that the configs are working. Saves failing configs to json so they can be tested."""
|
|
162
166
|
with open(config_path, "r") as f:
|
|
@@ -167,7 +171,9 @@ def validate_configs(
|
|
|
167
171
|
|
|
168
172
|
failed_tasks = {"cheat": [], "no_reward": [], "exception": [], "not_done": []}
|
|
169
173
|
with tqdm(
|
|
170
|
-
total=len(all_configs),
|
|
174
|
+
total=len(all_configs),
|
|
175
|
+
desc=f"Validating {task_class.__name__} configs",
|
|
176
|
+
ncols=150,
|
|
171
177
|
) as pbar:
|
|
172
178
|
for task_config in all_configs:
|
|
173
179
|
try:
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
from playwright.sync_api import Page
|
|
3
|
+
|
|
4
|
+
from .base import AbstractServiceNowTask
|
|
5
|
+
from .comp_building_block import CompositionalBuildingBlockTask
|
|
6
|
+
|
|
7
|
+
from ..instance import SNowInstance
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SendChatMessageTask(AbstractServiceNowTask, CompositionalBuildingBlockTask):
|
|
11
|
+
"""Task to send a chat message in the chat. Only used as a compositional building block for the cheat function.
|
|
12
|
+
Args:
|
|
13
|
+
--------
|
|
14
|
+
message (str):
|
|
15
|
+
The message to send in the chat
|
|
16
|
+
answer_format (str):
|
|
17
|
+
The type of answer to generate. Choice of total_return_only, total_return_and_investments, investments_only, cleanup, cleanup_and_return
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
instance: SNowInstance,
|
|
23
|
+
message: str,
|
|
24
|
+
answer_format: str,
|
|
25
|
+
use_description_in_l3: bool = False,
|
|
26
|
+
**kwargs,
|
|
27
|
+
):
|
|
28
|
+
super().__init__(seed=0, instance=instance, start_rel_url="")
|
|
29
|
+
self.message = message
|
|
30
|
+
self.answer_format = answer_format
|
|
31
|
+
self.use_description_in_l3 = use_description_in_l3
|
|
32
|
+
self.__dict__.update(kwargs)
|
|
33
|
+
|
|
34
|
+
def setup_goal(self, page: Page):
|
|
35
|
+
return self.get_pretty_printed_description(), {}
|
|
36
|
+
|
|
37
|
+
def validate(self, page: Page, chat_messages: list[str]) -> Tuple[float | bool | str | dict]:
|
|
38
|
+
return super().validate(page, chat_messages)
|
|
39
|
+
|
|
40
|
+
def cheat(self, page: Page, chat_messages: list[str]):
|
|
41
|
+
super().cheat(page=page, chat_messages=chat_messages)
|
|
42
|
+
chat_messages.append({"role": "assistant", "message": str(self.message)})
|
|
43
|
+
|
|
44
|
+
def teardown(self) -> None:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
def get_pretty_printed_description(self) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Get the task info for this task when used in a private task; Used in compositional tasks.
|
|
50
|
+
"""
|
|
51
|
+
raise NotImplementedError
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class SendChatMessageForBudgetAllocationTask(SendChatMessageTask):
|
|
55
|
+
def get_pretty_printed_description(self) -> str:
|
|
56
|
+
"""
|
|
57
|
+
Get the task info for this task when used in a private task; Used in compositional tasks.
|
|
58
|
+
"""
|
|
59
|
+
if self.level == 3:
|
|
60
|
+
task_info = "Allocate the budget to maximize revenue."
|
|
61
|
+
elif self.level == 2:
|
|
62
|
+
task_info = f"Allocate the budget to maximize revenue. This involves going over expense lines and identifying the ones maximizing revenue while fitting in the allowed budget of {self.budget} $. The returns are written in their short description."
|
|
63
|
+
if self.answer_format == "total_return_only":
|
|
64
|
+
task_info += " Provide only the total return of the investments in the chat."
|
|
65
|
+
if self.answer_format == "total_return_and_investments":
|
|
66
|
+
task_info += " Provide the total return of the investments as well as the value of their 'Number' field in the chat."
|
|
67
|
+
if self.answer_format == "investments_only":
|
|
68
|
+
task_info += " Provide only the value of the 'Number' field of the selected investments in the chat."
|
|
69
|
+
if self.answer_format == "cleanup":
|
|
70
|
+
task_info += " Delete the investments that will not be kept so that only the selected investments remain."
|
|
71
|
+
if self.answer_format == "cleanup_and_return":
|
|
72
|
+
task_info += " Delete the investments that will not be kept so that only the selected investments remain as well as returning their total value in the chat."
|
|
73
|
+
|
|
74
|
+
return task_info
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class SendChatMessageGenericTask(SendChatMessageTask):
|
|
78
|
+
|
|
79
|
+
def get_pretty_printed_description(self) -> str:
|
|
80
|
+
"""
|
|
81
|
+
Get the task info for this task when used in a private task; Used in compositional tasks.
|
|
82
|
+
"""
|
|
83
|
+
if self.use_description_in_l3:
|
|
84
|
+
task_info = self.description
|
|
85
|
+
elif self.level == 3:
|
|
86
|
+
task_info = ""
|
|
87
|
+
elif self.level == 2:
|
|
88
|
+
task_info = self.description
|
|
89
|
+
|
|
90
|
+
return task_info
|