browsergym-workarena 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. browsergym/workarena/__init__.py +13 -1
  2. browsergym/workarena/api/category.py +74 -0
  3. browsergym/workarena/api/change_request.py +87 -0
  4. browsergym/workarena/api/computer_asset.py +90 -0
  5. browsergym/workarena/api/cost_center.py +19 -0
  6. browsergym/workarena/api/expense_line.py +89 -0
  7. browsergym/workarena/api/incident.py +45 -0
  8. browsergym/workarena/api/knowledge.py +29 -0
  9. browsergym/workarena/api/problem.py +90 -0
  10. browsergym/workarena/api/report.py +183 -0
  11. browsergym/workarena/api/requested_items.py +63 -0
  12. browsergym/workarena/api/user.py +11 -8
  13. browsergym/workarena/api/utils.py +47 -3
  14. browsergym/workarena/config.py +21 -1
  15. browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +1 -1
  16. browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +1 -0
  17. browsergym/workarena/data_files/setup_files/knowledge/protocols.json +46 -0
  18. browsergym/workarena/data_files/setup_files/knowledge/test.html +1 -0
  19. browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +2 -24
  20. browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +4 -40
  21. browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +12 -0
  22. browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +1 -42
  23. browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +2 -18
  24. browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +12 -0
  25. browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +12 -0
  26. browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +2 -19
  27. browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +3 -50
  28. browsergym/workarena/data_files/task_configs/all_menu.json +95 -95
  29. browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -1
  30. browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -1
  31. browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +7986 -7982
  32. browsergym/workarena/data_files/task_configs/impersonation_users.json +3 -3
  33. browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -1
  34. browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -1
  35. browsergym/workarena/human_eval/console.js +176 -0
  36. browsergym/workarena/human_eval/tool.py +366 -0
  37. browsergym/workarena/install.py +81 -20
  38. browsergym/workarena/tasks/base.py +55 -20
  39. browsergym/workarena/tasks/comp_building_block.py +4 -0
  40. browsergym/workarena/tasks/compositional/__init__.py +76 -0
  41. browsergym/workarena/tasks/compositional/base.py +364 -0
  42. browsergym/workarena/tasks/compositional/dash_do_base.py +1366 -0
  43. browsergym/workarena/tasks/compositional/dash_do_catalog.py +1127 -0
  44. browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +2047 -0
  45. browsergym/workarena/tasks/compositional/dash_do_create_incident.py +403 -0
  46. browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +278 -0
  47. browsergym/workarena/tasks/compositional/dash_do_create_problem.py +336 -0
  48. browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +235 -0
  49. browsergym/workarena/tasks/compositional/dash_do_filter.py +1600 -0
  50. browsergym/workarena/tasks/compositional/dash_do_request_item.py +1315 -0
  51. browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +693 -0
  52. browsergym/workarena/tasks/compositional/delete_record.py +341 -0
  53. browsergym/workarena/tasks/compositional/edit_knowledge_base.py +457 -0
  54. browsergym/workarena/tasks/compositional/expense_management.py +598 -0
  55. browsergym/workarena/tasks/compositional/filter_and_do.py +139 -0
  56. browsergym/workarena/tasks/compositional/find_and_order_item.py +345 -0
  57. browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +1417 -0
  58. browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +499 -0
  59. browsergym/workarena/tasks/compositional/maximize_investment_return.py +1763 -0
  60. browsergym/workarena/tasks/compositional/navigate_and_do.py +1151 -0
  61. browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +2100 -0
  62. browsergym/workarena/tasks/compositional/offboard_user.py +207 -0
  63. browsergym/workarena/tasks/compositional/onboard_user.py +226 -0
  64. browsergym/workarena/tasks/compositional/update_task.py +145 -0
  65. browsergym/workarena/tasks/compositional/utils/curriculum.py +215 -0
  66. browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +151 -0
  67. browsergym/workarena/tasks/compositional/utils/knapsack.py +192 -0
  68. browsergym/workarena/tasks/compositional/warranty_check.py +227 -0
  69. browsergym/workarena/tasks/compositional/work_assignment.py +804 -0
  70. browsergym/workarena/tasks/compositional/workload_balancing.py +396 -0
  71. browsergym/workarena/tasks/dashboard.py +188 -8
  72. browsergym/workarena/tasks/form.py +1024 -232
  73. browsergym/workarena/tasks/knowledge.py +216 -25
  74. browsergym/workarena/tasks/list.py +519 -102
  75. browsergym/workarena/tasks/mark_duplicate_problem.py +171 -0
  76. browsergym/workarena/tasks/navigation.py +55 -13
  77. browsergym/workarena/tasks/scripts/extract_all_menu_items.py +9 -2
  78. browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +6 -5
  79. browsergym/workarena/tasks/scripts/service_catalog.py +2 -1
  80. browsergym/workarena/tasks/scripts/validate.py +8 -2
  81. browsergym/workarena/tasks/send_chat_message.py +90 -0
  82. browsergym/workarena/tasks/service_catalog.py +94 -26
  83. browsergym/workarena/tasks/utils/form.py +1 -4
  84. browsergym/workarena/tasks/utils/private_tasks.py +63 -0
  85. browsergym/workarena/tasks/utils/utils.py +13 -0
  86. {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/METADATA +27 -20
  87. browsergym_workarena-0.3.0.dist-info/RECORD +138 -0
  88. {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/entry_points.txt +1 -0
  89. browsergym_workarena-0.2.0.dist-info/RECORD +0 -85
  90. {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/WHEEL +0 -0
  91. {browsergym_workarena-0.2.0.dist-info → browsergym_workarena-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,171 @@
1
+ import json
2
+
3
+ from playwright.sync_api import Page
4
+ from typing import Tuple
5
+
6
+ from .base import AbstractServiceNowTask
7
+ from .comp_building_block import CompositionalBuildingBlockTask
8
+
9
+ from ..api.utils import table_api_call
10
+
11
+
12
+ class SetProblemAsDuplicateTask(AbstractServiceNowTask, CompositionalBuildingBlockTask):
13
+ """
14
+ Set a problem as duplicate, assuming we start on the problems list view.
15
+
16
+ Parameters:
17
+ -----------
18
+ instance: SNowInstance
19
+ The instance to use.
20
+ start_rel_url: str
21
+ The relative URL of the task list.
22
+ fixed_config: dict
23
+ Configuration to use for the task. If provided, the task will use the provided configuration instead of
24
+ selecting a random one. See browsergym/workarena/data_files/task_configs/filter_change_request_list_task.json
25
+ for an example of a configuration file.
26
+ respect_problem_ordering: bool
27
+ Whether to respect the ordering of the problems in the list. If True, the task will pick the first problem in the
28
+ list as the target problem. If False, the task validation will check if any problem is a duplicate of the other.
29
+ add_comment: bool
30
+ Whether or not to add comment to the duplicated task. If set to True, will add "Duplicate" as the problem description
31
+ goal_version: str
32
+ choice of "base", "priority", "high_priority". Adjusts the goal to the task setting for L2
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ seed: int = None,
38
+ instance=None,
39
+ start_rel_url="/now/nav/ui/classic/params/target/problem_list.do",
40
+ fixed_config: dict = None,
41
+ respect_problem_ordering: bool = False,
42
+ add_comment: bool = False,
43
+ goal_version: str = "base",
44
+ level: int = None,
45
+ **kwargs,
46
+ ) -> None:
47
+ super().__init__(seed=seed, instance=instance, start_rel_url=start_rel_url)
48
+ self.fixed_config = fixed_config
49
+ self.config = fixed_config
50
+
51
+ self.problem_sys_id = None
52
+ self.respect_problem_ordering = respect_problem_ordering
53
+ self.add_comment = add_comment
54
+ self.goal_version = goal_version
55
+ self.level = level
56
+ self.__dict__.update(kwargs)
57
+
58
+ def setup_goal(self, page: Page) -> tuple[str, dict]:
59
+ self.target_problem = self.fixed_config["target_problem"]
60
+ self.source_problem = self.fixed_config["source_problem"]
61
+
62
+ goal = self.get_pretty_printed_description()
63
+
64
+ return goal, {}
65
+
66
+ def get_pretty_printed_description(self) -> str:
67
+ """
68
+ Get the task info for this task when used in a private task; Used in L2 compositional tasks.
69
+ called by subclasses
70
+ """
71
+
72
+ if self.level == 3:
73
+ task_info = " "
74
+ elif self.goal_version == "base":
75
+ task_info = "Mark problems with duplicated problem statements as such. You can mark any as duplicate of the other."
76
+ elif self.goal_version == "priority":
77
+ task_info = "Among the problems with duplicated problem statements, mark the lower priority one as duplicate of the higher priority one"
78
+ elif self.goal_version == "high priority":
79
+ task_info = "Among the problems with duplicated problem statements, mark any as duplicate of the other. Change the description of the problem marked as duplicate to 'duplicate'."
80
+
81
+ return task_info
82
+
83
+ def cheat(self, page: Page, chat_messages: list[str]) -> None:
84
+ super().cheat(page, chat_messages)
85
+ target_problem_number = self.target_problem["number"]
86
+
87
+ frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame()
88
+ # Search for the private task by search for the number
89
+ frame.wait_for_selector(f"[aria-label='Preview record: {target_problem_number}']").click()
90
+ page.wait_for_timeout(1500)
91
+ # Click on the private task to open it
92
+ frame.get_by_text("Open Record").click()
93
+ page.wait_for_timeout(2000)
94
+ page.wait_for_load_state("networkidle")
95
+ frame = page.wait_for_selector('iframe[name="gsft_main"]').content_frame()
96
+ page.wait_for_timeout(1500)
97
+ # Open the duplicate mode
98
+ frame.get_by_text("Mark Duplicate").first.click()
99
+ page.wait_for_timeout(1000)
100
+ # Close the pop-up to edit the duplicate problem in the same window
101
+ frame.get_by_text("Close").last.click()
102
+ frame.locator('[aria-labelledby="label.problem.duplicate_of"]').fill(
103
+ self.source_problem["number"]
104
+ )
105
+ page.keyboard.press("Enter")
106
+ page.wait_for_timeout(1000)
107
+ if self.add_comment:
108
+ frame.locator('[id="problem.description"]').fill("Duplicate")
109
+
110
+ frame.get_by_text("update").first.click()
111
+
112
+ def validate(self, page: Page, chat_messages: list[str]) -> Tuple[float, bool, str, dict]:
113
+ """
114
+ Validate the solution
115
+ """
116
+ target_problem_record = table_api_call(
117
+ instance=self.instance,
118
+ table="problem",
119
+ params={"sysparm_query": f"number={self.target_problem['number']}"},
120
+ )["result"]
121
+ source_problem_record = table_api_call(
122
+ instance=self.instance,
123
+ table="problem",
124
+ params={"sysparm_query": f"number={self.source_problem['number']}"},
125
+ )["result"]
126
+ # If the ordering can be anything, we check both problems
127
+ problem_found = source_problem_record and target_problem_record
128
+
129
+ if not problem_found:
130
+ return 0, False, "", {"message": "Problem not found in DB."}
131
+
132
+ # if the duplicate value is not set, the field will be an empty string; otherwise it will be a dict
133
+ target_duplicate_value = target_problem_record[0]["duplicate_of"]
134
+ if target_duplicate_value:
135
+ target_duplicate_value = target_duplicate_value["value"]
136
+
137
+ target_is_duplicate = target_duplicate_value == source_problem_record[0]["sys_id"]
138
+ if self.respect_problem_ordering:
139
+ problem_marked_as_duplicate = target_is_duplicate
140
+ else:
141
+ source_duplicate_value = source_problem_record[0]["duplicate_of"]
142
+ if source_duplicate_value:
143
+ source_duplicate_value = source_duplicate_value["value"]
144
+ source_is_duplicate = source_duplicate_value == target_problem_record[0]["sys_id"]
145
+ problem_marked_as_duplicate = target_is_duplicate or source_is_duplicate
146
+
147
+ if self.add_comment:
148
+ comment_added = (
149
+ target_problem_record[0]["description"].lower() == "duplicate"
150
+ and target_is_duplicate
151
+ )
152
+ if not self.respect_problem_ordering:
153
+ comment_added = comment_added or (
154
+ source_problem_record[0]["description"].lower() == "duplicate"
155
+ and source_is_duplicate
156
+ )
157
+ if not comment_added:
158
+ return 0, False, "", {"message": "Comment not added."}
159
+
160
+ if not problem_marked_as_duplicate:
161
+ return 0, False, "", {"message": "Problem not marked as duplicate."}
162
+
163
+ return (
164
+ 1,
165
+ True,
166
+ "Nice work, thank you!",
167
+ {"message": "Problem task was closed as duplicate."},
168
+ )
169
+
170
+
171
+ __TASKS__ = [SetProblemAsDuplicateTask]
@@ -3,13 +3,14 @@ Tasks related to basic menu navigation.
3
3
 
4
4
  """
5
5
 
6
+ import json
6
7
  import playwright.sync_api
8
+ import re
7
9
 
8
10
  from importlib import resources
9
- import json
10
11
  from playwright.sync_api import Page
11
- from urllib.parse import urlparse, urlunparse, unquote
12
- from typing import Tuple
12
+ from urllib import parse
13
+ from typing import List, Tuple
13
14
 
14
15
  from ..api.utils import table_api_call
15
16
  from .base import AbstractServiceNowTask
@@ -34,11 +35,14 @@ class AllMenuTask(AbstractServiceNowTask):
34
35
 
35
36
  """
36
37
 
37
- def __init__(self, seed: int, instance: SNowInstance = None, fixed_config: dict = None) -> None:
38
+ def __init__(
39
+ self, seed: int = None, instance: SNowInstance = None, fixed_config: dict = None, **kwargs
40
+ ) -> None:
38
41
  super().__init__(seed=seed, instance=instance, start_rel_url="/now/nav/ui/home")
39
42
  self.fixed_config = fixed_config
40
43
  with open(ALL_MENU_PATH, "r") as f:
41
44
  self.all_configs = json.load(f)
45
+ self.__dict__.update(kwargs)
42
46
 
43
47
  def setup_goal(self, page: Page) -> tuple[str, dict]:
44
48
  super().setup_goal(page=page)
@@ -47,7 +51,9 @@ class AllMenuTask(AbstractServiceNowTask):
47
51
  self.module = (
48
52
  self.fixed_config if self.fixed_config else self.random.choice(self.all_configs)
49
53
  )
50
- self.final_url = self.instance.snow_url + self.module["url"]
54
+
55
+ # When menu tasks do not need to be validated, the URL can be omitted from their config
56
+ self.final_url = self.instance.snow_url + self.module.get("url", "")
51
57
 
52
58
  # Generate goal
53
59
  goal = f'Navigate to the "{self.module["module"]}" module of the "{self.module["application"]}" application.'
@@ -55,9 +61,19 @@ class AllMenuTask(AbstractServiceNowTask):
55
61
 
56
62
  return goal, info
57
63
 
64
+ def get_pretty_printed_description(self) -> str:
65
+ """
66
+ Get the task info for this task when used in a private task; Used in L3 compositional tasks.
67
+ called by subclasses
68
+ """
69
+ task_info = f'- Navigate to the "{self.module["module"]}" module of the "{self.module["application"]}" application.'
70
+
71
+ return task_info
72
+
58
73
  def cheat(self, page: Page, chat_messages: list[str]) -> None:
59
74
  super().cheat(page=page, chat_messages=chat_messages)
60
-
75
+ # gsft_main remains undefined on the landing page; we have to wait for the network to be idle instead.
76
+ page.wait_for_load_state("networkidle")
61
77
  menu_button = page.locator('div[aria-label="All"]')
62
78
  if menu_button.get_attribute("aria-expanded").lower() != "true":
63
79
  menu_button.click()
@@ -100,7 +116,7 @@ class AllMenuTask(AbstractServiceNowTask):
100
116
  # In some cases, like System Scheduler > Scheduled Jobs > Scheduled Jobs, modules are repeated in the path
101
117
  # This causes problems when clicking. Therefore, we pick the last item
102
118
  if menu_item.count() > 1:
103
- menu_item = menu_item.last
119
+ menu_item = menu_item.first
104
120
  with page.expect_navigation():
105
121
  menu_item.click()
106
122
  page.wait_for_timeout(2000)
@@ -111,11 +127,18 @@ class AllMenuTask(AbstractServiceNowTask):
111
127
  page.wait_for_load_state("domcontentloaded")
112
128
 
113
129
  # Get the current URL and the final URL
114
- current_url = urlunparse(urlparse(unquote(page.evaluate("() => window.location.href"))))
115
- final_url = urlunparse(urlparse(unquote(self.final_url)))
130
+ current_url = parse.urlunparse(
131
+ parse.urlparse(parse.unquote(page.evaluate("() => window.location.href")))
132
+ )
133
+ final_url = parse.urlunparse(parse.urlparse(parse.unquote(self.final_url)))
116
134
 
117
135
  if final_url == current_url:
118
- return 1, True, "Nice work, thank you!", {"message": "Correct module reached."}
136
+ return (
137
+ 1,
138
+ True,
139
+ "Nice work, thank you!",
140
+ {"message": "Correct module reached."},
141
+ )
119
142
 
120
143
  return 0, False, "", {"message": "Not at expected URL."}
121
144
 
@@ -139,11 +162,14 @@ class ImpersonationTask(AbstractServiceNowTask):
139
162
 
140
163
  """
141
164
 
142
- def __init__(self, seed: int, instance=None, fixed_config: dict = None) -> None:
165
+ def __init__(
166
+ self, seed: int = None, instance=None, fixed_config: dict = None, **kwargs
167
+ ) -> None:
143
168
  super().__init__(seed=seed, instance=instance, start_rel_url="/now/nav/ui/home")
144
169
  self.fixed_config = fixed_config
145
170
  with open(IMPERSONATION_CONFIG_PATH, "r") as f:
146
171
  self.all_configs = json.load(f)
172
+ self.__dict__.update(kwargs)
147
173
 
148
174
  def setup_goal(self, page: Page) -> tuple[str, dict]:
149
175
  super().setup_goal(page=page)
@@ -160,6 +186,15 @@ class ImpersonationTask(AbstractServiceNowTask):
160
186
 
161
187
  return goal, info
162
188
 
189
+ def get_pretty_printed_description(self) -> str:
190
+ """
191
+ Get the task info for this task when used in a private task; Used in L3 compositional tasks.
192
+ called by subclasses
193
+ """
194
+ task_info = f"- Impersonate the user {self.user_full_name} \n"
195
+
196
+ return task_info
197
+
163
198
  def cheat(self, page: Page, chat_messages: list[str]) -> None:
164
199
  super().cheat(page=page, chat_messages=chat_messages)
165
200
  impersonate_user(self.user_full_name, page)
@@ -167,7 +202,9 @@ class ImpersonationTask(AbstractServiceNowTask):
167
202
  def validate(
168
203
  self, page: playwright.sync_api.Page, chat_messages: list[str]
169
204
  ) -> Tuple[float, bool, str, dict]:
170
- user_info = self.page.evaluate("window.NOW")["user"]
205
+ page.wait_for_function("window.NOW && window.NOW.user")
206
+
207
+ user_info = page.evaluate("window.NOW")["user"]
171
208
 
172
209
  # If the current user is not being impersonated, fail.
173
210
  if not user_info["isImpersonating"]:
@@ -185,7 +222,12 @@ class ImpersonationTask(AbstractServiceNowTask):
185
222
 
186
223
  # If the name matches, success.
187
224
  if user_fullname == self.user_full_name:
188
- return 1, True, "Nice work, thank you!", {"message": "Correct user impersonated."}
225
+ return (
226
+ 1,
227
+ True,
228
+ "Nice work, thank you!",
229
+ {"message": "Correct user impersonated."},
230
+ )
189
231
 
190
232
  # Otherwise, fail.
191
233
  return 0, False, "", {"message": "Currently impersonating the wrong user."}
@@ -72,7 +72,10 @@ if __name__ == "__main__":
72
72
  expand_and_gather_paths(page, nested_parent_selector, new_path)
73
73
 
74
74
  if not collapsible_lists:
75
- current_path_item = {"path": current_path.copy(), "selector": parent_selector}
75
+ current_path_item = {
76
+ "path": current_path.copy(),
77
+ "selector": parent_selector,
78
+ }
76
79
  base_paths.append(current_path_item)
77
80
 
78
81
  def expand_menu():
@@ -190,7 +193,11 @@ if __name__ == "__main__":
190
193
  45:
191
194
  ] # get only the end of the url
192
195
  if url not in urls:
193
- menu_task = {"application": application, "module": module, "url": url}
196
+ menu_task = {
197
+ "application": application,
198
+ "module": module,
199
+ "url": url,
200
+ }
194
201
  all_menu_items.append(menu_task)
195
202
  urls[url] = True
196
203
 
@@ -28,7 +28,7 @@ from browsergym.workarena.tasks.dashboard import DashboardRetrievalTask
28
28
 
29
29
  N_CPU = 20
30
30
  MAX_CONFIGS = 1000
31
- REPORT = True # Set to True for reports, False for dashboards
31
+ REPORT = False # Set to True for reports, False for dashboards
32
32
 
33
33
 
34
34
  class DummyDashboard(DashboardRetrievalTask):
@@ -102,10 +102,10 @@ def get_dashboard_urls(instance):
102
102
  "18b1f472533130104c90ddeeff7b12a6", # Incident overview
103
103
  "287d07d1ff3130106c1ef9a7cddcbd5d", # Request overview
104
104
  "7ab78953eb32011008f2951ff15228e6", # Service catalog overview
105
- "2d297c880f1130101527008c07767e27", # Survey overview
105
+ # "2d297c880f1130101527008c07767e27", # Survey overview (almost empty post deleting reports that rely on time)
106
106
  "6b706f448f231110953ddffc9071a4f3", # Telemetry - Table growth
107
- "15c5d2d377213010a435478c4f5a993c", # Usage overview
108
- "85a57f9677100110ba155631dc5a9905", # Web api usage overview
107
+ # "15c5d2d377213010a435478c4f5a993c", # Usage overview
108
+ # "85a57f9677100110ba155631dc5a9905", # Web api usage overview (empty post deleting reports that rely on time)
109
109
  "c38ca3a273031010ae8dd21efaf6a747", # Data classification
110
110
  "3d48f669538223008329ddeeff7b1253", # Problem overview
111
111
  ]
@@ -131,6 +131,7 @@ def get_all_configs_by_url(url, is_report):
131
131
  "chart_series": "",
132
132
  "question": "max",
133
133
  },
134
+ seed=0,
134
135
  )
135
136
  task.setup(page=page)
136
137
 
@@ -196,7 +197,7 @@ def get_all_configs_by_url(url, is_report):
196
197
  )
197
198
  except Exception as e:
198
199
  print("Exception in worker", url, chart_title, e)
199
- return []
200
+ continue # Skip this chart
200
201
 
201
202
  if len(questions) == 0:
202
203
  return []
@@ -65,7 +65,8 @@ def generate_configs_for_all_items():
65
65
  "w",
66
66
  ) as f:
67
67
  all_configs_for_a_single_item = sorted(
68
- all_configs_for_a_single_item, key=lambda x: x["item"] + str(x["quantity"])
68
+ all_configs_for_a_single_item,
69
+ key=lambda x: x["item"] + str(x["quantity"]),
69
70
  )
70
71
  json.dump(all_configs_for_a_single_item, f, indent=4, sort_keys=True)
71
72
 
@@ -156,7 +156,11 @@ def validate_on_page(task_class, task_config, page):
156
156
 
157
157
 
158
158
  def validate_configs(
159
- task_class, config_path, num_tasks: int = None, save_failed_tasks: bool = True, page=None
159
+ task_class,
160
+ config_path,
161
+ num_tasks: int = None,
162
+ save_failed_tasks: bool = True,
163
+ page=None,
160
164
  ) -> list[dict]:
161
165
  """Validate that the configs are working. Saves failing configs to json so they can be tested."""
162
166
  with open(config_path, "r") as f:
@@ -167,7 +171,9 @@ def validate_configs(
167
171
 
168
172
  failed_tasks = {"cheat": [], "no_reward": [], "exception": [], "not_done": []}
169
173
  with tqdm(
170
- total=len(all_configs), desc=f"Validating {task_class.__name__} configs", ncols=150
174
+ total=len(all_configs),
175
+ desc=f"Validating {task_class.__name__} configs",
176
+ ncols=150,
171
177
  ) as pbar:
172
178
  for task_config in all_configs:
173
179
  try:
@@ -0,0 +1,90 @@
1
+ from typing import Tuple
2
+ from playwright.sync_api import Page
3
+
4
+ from .base import AbstractServiceNowTask
5
+ from .comp_building_block import CompositionalBuildingBlockTask
6
+
7
+ from ..instance import SNowInstance
8
+
9
+
10
+ class SendChatMessageTask(AbstractServiceNowTask, CompositionalBuildingBlockTask):
11
+ """Task to send a chat message in the chat. Only used as a compositional building block for the cheat function.
12
+ Args:
13
+ --------
14
+ message (str):
15
+ The message to send in the chat
16
+ answer_format (str):
17
+ The type of answer to generate. Choice of total_return_only, total_return_and_investments, investments_only, cleanup, cleanup_and_return
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ instance: SNowInstance,
23
+ message: str,
24
+ answer_format: str,
25
+ use_description_in_l3: bool = False,
26
+ **kwargs,
27
+ ):
28
+ super().__init__(seed=0, instance=instance, start_rel_url="")
29
+ self.message = message
30
+ self.answer_format = answer_format
31
+ self.use_description_in_l3 = use_description_in_l3
32
+ self.__dict__.update(kwargs)
33
+
34
+ def setup_goal(self, page: Page):
35
+ return self.get_pretty_printed_description(), {}
36
+
37
+ def validate(self, page: Page, chat_messages: list[str]) -> Tuple[float | bool | str | dict]:
38
+ return super().validate(page, chat_messages)
39
+
40
+ def cheat(self, page: Page, chat_messages: list[str]):
41
+ super().cheat(page=page, chat_messages=chat_messages)
42
+ chat_messages.append({"role": "assistant", "message": str(self.message)})
43
+
44
+ def teardown(self) -> None:
45
+ pass
46
+
47
+ def get_pretty_printed_description(self) -> str:
48
+ """
49
+ Get the task info for this task when used in a private task; Used in compositional tasks.
50
+ """
51
+ raise NotImplementedError
52
+
53
+
54
+ class SendChatMessageForBudgetAllocationTask(SendChatMessageTask):
55
+ def get_pretty_printed_description(self) -> str:
56
+ """
57
+ Get the task info for this task when used in a private task; Used in compositional tasks.
58
+ """
59
+ if self.level == 3:
60
+ task_info = "Allocate the budget to maximize revenue."
61
+ elif self.level == 2:
62
+ task_info = f"Allocate the budget to maximize revenue. This involves going over expense lines and identifying the ones maximizing revenue while fitting in the allowed budget of {self.budget} $. The returns are written in their short description."
63
+ if self.answer_format == "total_return_only":
64
+ task_info += " Provide only the total return of the investments in the chat."
65
+ if self.answer_format == "total_return_and_investments":
66
+ task_info += " Provide the total return of the investments as well as the value of their 'Number' field in the chat."
67
+ if self.answer_format == "investments_only":
68
+ task_info += " Provide only the value of the 'Number' field of the selected investments in the chat."
69
+ if self.answer_format == "cleanup":
70
+ task_info += " Delete the investments that will not be kept so that only the selected investments remain."
71
+ if self.answer_format == "cleanup_and_return":
72
+ task_info += " Delete the investments that will not be kept so that only the selected investments remain as well as returning their total value in the chat."
73
+
74
+ return task_info
75
+
76
+
77
+ class SendChatMessageGenericTask(SendChatMessageTask):
78
+
79
+ def get_pretty_printed_description(self) -> str:
80
+ """
81
+ Get the task info for this task when used in a private task; Used in compositional tasks.
82
+ """
83
+ if self.use_description_in_l3:
84
+ task_info = self.description
85
+ elif self.level == 3:
86
+ task_info = ""
87
+ elif self.level == 2:
88
+ task_info = self.description
89
+
90
+ return task_info