browsergym-workarena 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browsergym/workarena/__init__.py +13 -1
- browsergym/workarena/api/category.py +74 -0
- browsergym/workarena/api/change_request.py +87 -0
- browsergym/workarena/api/computer_asset.py +90 -0
- browsergym/workarena/api/cost_center.py +19 -0
- browsergym/workarena/api/expense_line.py +89 -0
- browsergym/workarena/api/incident.py +45 -0
- browsergym/workarena/api/knowledge.py +29 -0
- browsergym/workarena/api/problem.py +90 -0
- browsergym/workarena/api/report.py +183 -0
- browsergym/workarena/api/requested_items.py +63 -0
- browsergym/workarena/api/user.py +11 -8
- browsergym/workarena/api/utils.py +47 -3
- browsergym/workarena/config.py +21 -1
- browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +1 -1
- browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +1 -0
- browsergym/workarena/data_files/setup_files/knowledge/protocols.json +46 -0
- browsergym/workarena/data_files/setup_files/knowledge/test.html +1 -0
- browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +2 -24
- browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +4 -40
- browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +12 -0
- browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +1 -42
- browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +2 -18
- browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +12 -0
- browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +12 -0
- browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +2 -19
- browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +3 -50
- browsergym/workarena/data_files/task_configs/all_menu.json +1 -1
- browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -1
- browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -1
- browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +1 -1
- browsergym/workarena/data_files/task_configs/impersonation_users.json +1 -1
- browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -1
- browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -1
- browsergym/workarena/human_eval/console.js +176 -0
- browsergym/workarena/human_eval/tool.py +366 -0
- browsergym/workarena/install.py +81 -20
- browsergym/workarena/tasks/base.py +55 -20
- browsergym/workarena/tasks/comp_building_block.py +4 -0
- browsergym/workarena/tasks/compositional/__init__.py +76 -0
- browsergym/workarena/tasks/compositional/base.py +364 -0
- browsergym/workarena/tasks/compositional/dash_do_base.py +1366 -0
- browsergym/workarena/tasks/compositional/dash_do_catalog.py +1127 -0
- browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +2047 -0
- browsergym/workarena/tasks/compositional/dash_do_create_incident.py +403 -0
- browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +278 -0
- browsergym/workarena/tasks/compositional/dash_do_create_problem.py +336 -0
- browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +235 -0
- browsergym/workarena/tasks/compositional/dash_do_filter.py +1600 -0
- browsergym/workarena/tasks/compositional/dash_do_request_item.py +1315 -0
- browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +693 -0
- browsergym/workarena/tasks/compositional/delete_record.py +341 -0
- browsergym/workarena/tasks/compositional/edit_knowledge_base.py +457 -0
- browsergym/workarena/tasks/compositional/expense_management.py +598 -0
- browsergym/workarena/tasks/compositional/filter_and_do.py +139 -0
- browsergym/workarena/tasks/compositional/find_and_order_item.py +345 -0
- browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +1417 -0
- browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +499 -0
- browsergym/workarena/tasks/compositional/maximize_investment_return.py +1763 -0
- browsergym/workarena/tasks/compositional/navigate_and_do.py +1151 -0
- browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +2100 -0
- browsergym/workarena/tasks/compositional/offboard_user.py +207 -0
- browsergym/workarena/tasks/compositional/onboard_user.py +226 -0
- browsergym/workarena/tasks/compositional/update_task.py +145 -0
- browsergym/workarena/tasks/compositional/utils/curriculum.py +215 -0
- browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +151 -0
- browsergym/workarena/tasks/compositional/utils/knapsack.py +192 -0
- browsergym/workarena/tasks/compositional/warranty_check.py +227 -0
- browsergym/workarena/tasks/compositional/work_assignment.py +804 -0
- browsergym/workarena/tasks/compositional/workload_balancing.py +396 -0
- browsergym/workarena/tasks/dashboard.py +188 -8
- browsergym/workarena/tasks/form.py +1024 -232
- browsergym/workarena/tasks/knowledge.py +216 -25
- browsergym/workarena/tasks/list.py +519 -102
- browsergym/workarena/tasks/mark_duplicate_problem.py +171 -0
- browsergym/workarena/tasks/navigation.py +55 -13
- browsergym/workarena/tasks/scripts/extract_all_menu_items.py +9 -2
- browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +6 -5
- browsergym/workarena/tasks/scripts/service_catalog.py +2 -1
- browsergym/workarena/tasks/scripts/validate.py +8 -2
- browsergym/workarena/tasks/send_chat_message.py +90 -0
- browsergym/workarena/tasks/service_catalog.py +94 -26
- browsergym/workarena/tasks/utils/form.py +1 -4
- browsergym/workarena/tasks/utils/private_tasks.py +63 -0
- browsergym/workarena/tasks/utils/utils.py +13 -0
- {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/METADATA +19 -18
- browsergym_workarena-0.3.0.dist-info/RECORD +138 -0
- {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/entry_points.txt +1 -0
- browsergym_workarena-0.2.1.dist-info/RECORD +0 -85
- {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/WHEEL +0 -0
- {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1763 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from faker import Faker
|
|
4
|
+
from typing import List, Tuple
|
|
5
|
+
|
|
6
|
+
fake = Faker()
|
|
7
|
+
|
|
8
|
+
from playwright.sync_api._generated import Page
|
|
9
|
+
|
|
10
|
+
from browsergym.workarena.tasks.send_chat_message import SendChatMessageForBudgetAllocationTask
|
|
11
|
+
|
|
12
|
+
from .base import HumanEvalTask
|
|
13
|
+
from .delete_record import DeleteExpenseLineKnapsack
|
|
14
|
+
from .filter_and_do import FilterAndDoTask
|
|
15
|
+
from .utils.knapsack import KnapsackInstanceGenarator
|
|
16
|
+
|
|
17
|
+
from ..base import AbstractServiceNowTask
|
|
18
|
+
|
|
19
|
+
from ...api.expense_line import create_expense_line
|
|
20
|
+
from ...api.utils import table_api_call, db_delete_from_table
|
|
21
|
+
from ...config import (
|
|
22
|
+
# Expected columns for the different lists
|
|
23
|
+
EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
|
|
24
|
+
)
|
|
25
|
+
from ...instance import SNowInstance
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FilterExpensesAndAllocateInvestmentsTask(FilterAndDoTask):
|
|
29
|
+
"""Task to filter expenses and allocate investments.
|
|
30
|
+
Args:
|
|
31
|
+
num_expenses: list[int]
|
|
32
|
+
The range to choose the number of expenses from
|
|
33
|
+
budget: int
|
|
34
|
+
The budget to allocate to the expenses
|
|
35
|
+
mode: str
|
|
36
|
+
Mode of generation. Choice of "random", "trivial", "single_item", "single_item_uniform", "n_items"
|
|
37
|
+
- random: Randomly generate the instance and return it; guaranteed to have a unique optimal solution
|
|
38
|
+
- trivial: Generate a trivial instance with all items fitting in the knapsack; return the instance
|
|
39
|
+
- single_item: Generate an instance where the optimal solution has only one item
|
|
40
|
+
- n_items: Generate an instance with all items having uniform weight and value; n items fitting in the knapsack
|
|
41
|
+
- single_item_uniform: Generate an instance with all items having uniform weight and value; optimal solution has only one item and it can be any
|
|
42
|
+
answer_format: str
|
|
43
|
+
The type of answer to generate. Choice of total_return_only, total_return_and_investments, investments_only, cleanup, cleanup_and_return
|
|
44
|
+
num_items_uniform: int
|
|
45
|
+
The number of items to generate in the "n_items" mode
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
seed: int,
|
|
51
|
+
instance: SNowInstance = None,
|
|
52
|
+
fixed_config: list[AbstractServiceNowTask] = None,
|
|
53
|
+
num_expenses: list[int] = [3, 4],
|
|
54
|
+
budget: int = 150000,
|
|
55
|
+
mode: str = "random",
|
|
56
|
+
num_items_uniform: int = None,
|
|
57
|
+
answer_format: str = None,
|
|
58
|
+
level: int = 2,
|
|
59
|
+
) -> None:
|
|
60
|
+
super().__init__(
|
|
61
|
+
seed=seed,
|
|
62
|
+
instance=instance,
|
|
63
|
+
fixed_config=fixed_config,
|
|
64
|
+
navigation_config={
|
|
65
|
+
"module": "Expense Lines",
|
|
66
|
+
"application": "Cost",
|
|
67
|
+
},
|
|
68
|
+
level=level,
|
|
69
|
+
protocol_name="Maximizing total investment return",
|
|
70
|
+
)
|
|
71
|
+
self.num_expenses = self.random.randint(num_expenses[0], num_expenses[1] + 1)
|
|
72
|
+
# In these settings, we need to vary the budget
|
|
73
|
+
if mode in ["single_item_uniform", "n_items"]:
|
|
74
|
+
min_budget = budget / 5
|
|
75
|
+
max_budget = budget * 5
|
|
76
|
+
self.budget = self.random.randint(min_budget, max_budget)
|
|
77
|
+
else:
|
|
78
|
+
self.budget = budget
|
|
79
|
+
self.mode = mode
|
|
80
|
+
self.answer_format = answer_format
|
|
81
|
+
self.num_items_uniform = 1 if mode == "single_item_uniform" else num_items_uniform
|
|
82
|
+
|
|
83
|
+
self.expense_hashtag = "#" + self.unique_id[:10]
|
|
84
|
+
self.short_description = f"Allocate investments to maximize returns"
|
|
85
|
+
self.expense_line_sys_ids = []
|
|
86
|
+
self.expense_line_numbers = []
|
|
87
|
+
self.correct_investments = (
|
|
88
|
+
[]
|
|
89
|
+
) # List of correct investments to check for in the chat messages
|
|
90
|
+
self.incorrect_investments = (
|
|
91
|
+
[]
|
|
92
|
+
) # List of incorrect investments to check for in the chat messages
|
|
93
|
+
self.potential_investments = None # List of tuples (cost, return) of potential investments
|
|
94
|
+
self.max_return = None # Maximum return possible with optimal solution
|
|
95
|
+
self.alternative_max_return_formats = (
|
|
96
|
+
[]
|
|
97
|
+
) # List of alternative formats for the maximum return to check for in the chat messages
|
|
98
|
+
self.selected_investment_indices = (
|
|
99
|
+
None # Indices of the selected investments in the optimal solution
|
|
100
|
+
)
|
|
101
|
+
# flag to check if the investments are correctly selected and total return is correct
|
|
102
|
+
self.investments_correctly_selected = False
|
|
103
|
+
self.total_return_correct = False
|
|
104
|
+
|
|
105
|
+
def _setup_list(self) -> None:
|
|
106
|
+
self.filter_config = {
|
|
107
|
+
"list_url": "/now/nav/ui/classic/params/target/fm_expense_line_list.do",
|
|
108
|
+
"expected_fields_path": EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
|
|
109
|
+
"filter_columns": [
|
|
110
|
+
"short_description",
|
|
111
|
+
],
|
|
112
|
+
"filter_kind": "AND",
|
|
113
|
+
"filter_operators": ["contains"],
|
|
114
|
+
"filter_values": [
|
|
115
|
+
f"{self.expense_hashtag}",
|
|
116
|
+
],
|
|
117
|
+
}
|
|
118
|
+
knapsack = KnapsackInstanceGenarator(
|
|
119
|
+
random=self.random,
|
|
120
|
+
num_items=self.num_expenses,
|
|
121
|
+
max_capacity=self.budget,
|
|
122
|
+
mode=self.mode,
|
|
123
|
+
num_items_in_solution=self.num_items_uniform,
|
|
124
|
+
)
|
|
125
|
+
# investments is a list of tuples, where each tuple is (cost, return)
|
|
126
|
+
self.potential_investments, self.max_return, self.selected_investment_indices = (
|
|
127
|
+
knapsack.get_instance()
|
|
128
|
+
)
|
|
129
|
+
# Accepted answer formats for the maximum return
|
|
130
|
+
self.alternative_max_return_formats = [
|
|
131
|
+
str(self.max_return), # No comma
|
|
132
|
+
"{:,}".format(self.max_return), # Comma as thousand separator
|
|
133
|
+
"{:,}".format(self.max_return).replace(
|
|
134
|
+
",", ", "
|
|
135
|
+
), # Comma as thousand separator with space after
|
|
136
|
+
"{:,}".format(self.max_return).replace(",", " "), # Space as thousand separator
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
for i, investment in enumerate(self.potential_investments):
|
|
140
|
+
expense_number = f"EXP-{i}{self.unique_id[:10]}"
|
|
141
|
+
# Include the return inside the short description
|
|
142
|
+
short_description = f"Build {fake.sentence(2)} - Return: {investment[1]}$ "
|
|
143
|
+
expense_sys_id, expense_number = create_expense_line(
|
|
144
|
+
instance=self.instance,
|
|
145
|
+
amount=investment[0],
|
|
146
|
+
number=expense_number,
|
|
147
|
+
date=str(fake.date_this_year(before_today=True, after_today=False)),
|
|
148
|
+
short_description=short_description,
|
|
149
|
+
expense_hashtag=self.expense_hashtag,
|
|
150
|
+
user_sys_id=self._base_user_sysid,
|
|
151
|
+
)
|
|
152
|
+
self.expense_line_sys_ids.append(expense_sys_id)
|
|
153
|
+
self.expense_line_numbers.append(expense_number)
|
|
154
|
+
|
|
155
|
+
# In this setting there is only one valid answer
|
|
156
|
+
if self.mode in ["random", "trivial", "single_item"]:
|
|
157
|
+
for i, investment in enumerate(self.potential_investments):
|
|
158
|
+
if i in self.selected_investment_indices:
|
|
159
|
+
self.correct_investments.append(self.expense_line_numbers[i])
|
|
160
|
+
else:
|
|
161
|
+
self.incorrect_investments.append(self.expense_line_numbers[i])
|
|
162
|
+
# In this setting, many answers are possible, it's only a matter of respecting the number of items in the solution
|
|
163
|
+
# We store values here just so the cheat function can work uniformly
|
|
164
|
+
elif self.mode in ["n_items", "single_item_uniform"]:
|
|
165
|
+
for i, investment in enumerate(self.potential_investments):
|
|
166
|
+
if i < self.num_items_uniform:
|
|
167
|
+
self.correct_investments.append(self.expense_line_numbers[i])
|
|
168
|
+
else:
|
|
169
|
+
self.incorrect_investments.append(self.expense_line_numbers[i])
|
|
170
|
+
|
|
171
|
+
def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
|
|
172
|
+
super().validate(page, chat_messages)
|
|
173
|
+
|
|
174
|
+
def check_total_return(
|
|
175
|
+
self, page: Page, chat_messages: List[str]
|
|
176
|
+
) -> Tuple[float, bool, str, dict]:
|
|
177
|
+
"""Simple check that validates that the total return is correct."""
|
|
178
|
+
if self.total_return_correct:
|
|
179
|
+
return (
|
|
180
|
+
1,
|
|
181
|
+
True,
|
|
182
|
+
"That is correct, thank you!",
|
|
183
|
+
{"message": "Correct total return."},
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if chat_messages and chat_messages[-1]["role"] == "assistant":
|
|
187
|
+
answer = chat_messages[-1]["message"]
|
|
188
|
+
else:
|
|
189
|
+
return (
|
|
190
|
+
0,
|
|
191
|
+
False,
|
|
192
|
+
"",
|
|
193
|
+
{"message": "The assistant did not provide an answer."},
|
|
194
|
+
)
|
|
195
|
+
for format in self.alternative_max_return_formats:
|
|
196
|
+
if format in answer:
|
|
197
|
+
self.total_return_correct = True
|
|
198
|
+
return (
|
|
199
|
+
1,
|
|
200
|
+
True,
|
|
201
|
+
"That is correct, thank you!",
|
|
202
|
+
{"message": "Correct answer."},
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return (
|
|
206
|
+
0,
|
|
207
|
+
False,
|
|
208
|
+
"",
|
|
209
|
+
{"message": "Incorrect answer."},
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def check_correct_investments_sent_in_chat(
|
|
213
|
+
self, page: Page, chat_messages: List[str]
|
|
214
|
+
) -> Tuple[float, bool, str, dict]:
|
|
215
|
+
"""Check that the correct investments have been selected and their numbers have been sent in the chat"""
|
|
216
|
+
if not self.investments_correctly_selected:
|
|
217
|
+
if chat_messages and chat_messages[-1]["role"] == "assistant":
|
|
218
|
+
answer = chat_messages[-1]["message"]
|
|
219
|
+
else:
|
|
220
|
+
return (
|
|
221
|
+
0,
|
|
222
|
+
False,
|
|
223
|
+
"",
|
|
224
|
+
{"message": "The assistant did not provide an answer."},
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# In these settings, there is only one valid answer
|
|
228
|
+
if self.mode in ["random", "trivial", "single_item"]:
|
|
229
|
+
# Check that the correct investments have been selected
|
|
230
|
+
for investment in self.correct_investments:
|
|
231
|
+
if investment not in answer:
|
|
232
|
+
return (
|
|
233
|
+
0,
|
|
234
|
+
False,
|
|
235
|
+
"",
|
|
236
|
+
{"message": "Investment missing from selected list."},
|
|
237
|
+
)
|
|
238
|
+
# Check that the incorrect investments have not been selected
|
|
239
|
+
for investment in self.incorrect_investments:
|
|
240
|
+
if investment in answer:
|
|
241
|
+
return (
|
|
242
|
+
0,
|
|
243
|
+
False,
|
|
244
|
+
"",
|
|
245
|
+
{"message": "Incorrect investment selected."},
|
|
246
|
+
)
|
|
247
|
+
# In those settings, many answers are possible, it's only a matter of respecting the number of items in the solution
|
|
248
|
+
elif self.mode in ["n_items", "single_item_uniform"]:
|
|
249
|
+
# Extract the expense line numbers from the answer
|
|
250
|
+
pattern = r"EXP-\w+-\w+"
|
|
251
|
+
matches = re.findall(pattern, answer)
|
|
252
|
+
if len(matches) != self.num_items_uniform:
|
|
253
|
+
return (
|
|
254
|
+
0,
|
|
255
|
+
False,
|
|
256
|
+
"",
|
|
257
|
+
{"message": "Incorrect number of investments selected."},
|
|
258
|
+
)
|
|
259
|
+
self.correct_investments_selected = True
|
|
260
|
+
|
|
261
|
+
return (
|
|
262
|
+
1,
|
|
263
|
+
True,
|
|
264
|
+
"That is correct, thank you!",
|
|
265
|
+
{"message": "Correct investments selected."},
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def check_only_right_investment_kept(
|
|
269
|
+
self, page: Page, chat_messages: List[str]
|
|
270
|
+
) -> Tuple[float, bool, str, dict]:
|
|
271
|
+
"""Checks that only the expected investments were kept; i.e. the others were deleted"""
|
|
272
|
+
for i, investment_sys_id in enumerate(self.expense_line_sys_ids):
|
|
273
|
+
record_expected = i in self.selected_investment_indices
|
|
274
|
+
record_exists = table_api_call(
|
|
275
|
+
instance=self.instance,
|
|
276
|
+
table="fm_expense_line",
|
|
277
|
+
params={"sysparm_query": f"sys_id={investment_sys_id}"},
|
|
278
|
+
)["result"]
|
|
279
|
+
# Missing investment that should be kept
|
|
280
|
+
if record_expected and not record_exists:
|
|
281
|
+
return (
|
|
282
|
+
0,
|
|
283
|
+
True,
|
|
284
|
+
"",
|
|
285
|
+
{"message": "Expected investment has been deleted."},
|
|
286
|
+
)
|
|
287
|
+
# Unexpected investment that should be deleted
|
|
288
|
+
if not record_expected and record_exists:
|
|
289
|
+
return (
|
|
290
|
+
0,
|
|
291
|
+
False,
|
|
292
|
+
"",
|
|
293
|
+
{"message": "Unexpected investment is present."},
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
return (
|
|
297
|
+
1,
|
|
298
|
+
True,
|
|
299
|
+
"That is correct, thank you!",
|
|
300
|
+
{"message": "Correct investments kept."},
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
def teardown(self) -> None:
|
|
304
|
+
for expense_sys_id in self.expense_line_sys_ids:
|
|
305
|
+
record_exists = table_api_call(
|
|
306
|
+
instance=self.instance,
|
|
307
|
+
table="fm_expense_line",
|
|
308
|
+
params={"sysparm_query": f"sys_id={expense_sys_id}"},
|
|
309
|
+
)["result"]
|
|
310
|
+
if record_exists:
|
|
311
|
+
db_delete_from_table(
|
|
312
|
+
instance=self.instance,
|
|
313
|
+
table="fm_expense_line",
|
|
314
|
+
sys_id=expense_sys_id,
|
|
315
|
+
)
|
|
316
|
+
super().teardown()
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class FilterExpensesAndFindTotalReturnTask(FilterExpensesAndAllocateInvestmentsTask):
|
|
320
|
+
def __init__(
|
|
321
|
+
self,
|
|
322
|
+
seed: int,
|
|
323
|
+
instance: SNowInstance = None,
|
|
324
|
+
fixed_config: list[AbstractServiceNowTask] = None,
|
|
325
|
+
num_expenses: list[int] = [3, 4],
|
|
326
|
+
budget: int = 150000,
|
|
327
|
+
mode: str = "random",
|
|
328
|
+
answer_format: str = "total_return_only",
|
|
329
|
+
num_items_uniform: int = 1,
|
|
330
|
+
level: int = 2,
|
|
331
|
+
):
|
|
332
|
+
super().__init__(
|
|
333
|
+
seed,
|
|
334
|
+
instance,
|
|
335
|
+
fixed_config,
|
|
336
|
+
num_expenses=num_expenses,
|
|
337
|
+
budget=budget,
|
|
338
|
+
mode=mode,
|
|
339
|
+
num_items_uniform=num_items_uniform,
|
|
340
|
+
answer_format=answer_format,
|
|
341
|
+
level=level,
|
|
342
|
+
)
|
|
343
|
+
self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Give total return of selected investments only. '
|
|
344
|
+
|
|
345
|
+
def _setup_list(self) -> None:
|
|
346
|
+
super()._setup_list()
|
|
347
|
+
self.tasks = [
|
|
348
|
+
SendChatMessageForBudgetAllocationTask(
|
|
349
|
+
instance=self.instance,
|
|
350
|
+
message=f"The total value of the investments is {self.max_return}$",
|
|
351
|
+
used_in_level_2=True,
|
|
352
|
+
is_validated=False,
|
|
353
|
+
budget=self.budget,
|
|
354
|
+
answer_format=self.answer_format,
|
|
355
|
+
level=self.level,
|
|
356
|
+
)
|
|
357
|
+
]
|
|
358
|
+
|
|
359
|
+
def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
|
|
360
|
+
reward, done, message, info = self.check_total_return(page, chat_messages)
|
|
361
|
+
if reward == 1 and done:
|
|
362
|
+
return FilterAndDoTask.validate(self, page, chat_messages)
|
|
363
|
+
else:
|
|
364
|
+
return reward, done, message, info
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
class FilterRandomExpensesAndFindTotalReturnSmallTask(
|
|
368
|
+
FilterExpensesAndFindTotalReturnTask, HumanEvalTask
|
|
369
|
+
):
|
|
370
|
+
def __init__(
|
|
371
|
+
self,
|
|
372
|
+
seed: int,
|
|
373
|
+
instance: SNowInstance = None,
|
|
374
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
375
|
+
level: int = 2,
|
|
376
|
+
):
|
|
377
|
+
super().__init__(
|
|
378
|
+
seed,
|
|
379
|
+
instance,
|
|
380
|
+
fixed_config,
|
|
381
|
+
num_expenses=[3, 5],
|
|
382
|
+
budget=150000,
|
|
383
|
+
mode="random",
|
|
384
|
+
level=level,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class FilterRandomExpensesAndFindTotalReturnMediumTask(FilterExpensesAndFindTotalReturnTask):
|
|
389
|
+
def __init__(
|
|
390
|
+
self,
|
|
391
|
+
seed: int,
|
|
392
|
+
instance: SNowInstance = None,
|
|
393
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
394
|
+
level: int = 2,
|
|
395
|
+
):
|
|
396
|
+
super().__init__(
|
|
397
|
+
seed,
|
|
398
|
+
instance,
|
|
399
|
+
fixed_config,
|
|
400
|
+
num_expenses=[6, 8],
|
|
401
|
+
budget=150000,
|
|
402
|
+
mode="random",
|
|
403
|
+
level=level,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class FilterRandomExpensesAndFindTotalReturnLargeTask(FilterExpensesAndFindTotalReturnTask):
|
|
408
|
+
def __init__(
|
|
409
|
+
self,
|
|
410
|
+
seed: int,
|
|
411
|
+
instance: SNowInstance = None,
|
|
412
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
413
|
+
level: int = 2,
|
|
414
|
+
):
|
|
415
|
+
super().__init__(
|
|
416
|
+
seed,
|
|
417
|
+
instance,
|
|
418
|
+
fixed_config,
|
|
419
|
+
num_expenses=[9, 12],
|
|
420
|
+
budget=150000,
|
|
421
|
+
mode="random",
|
|
422
|
+
level=level,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class FilterTrivialExpensesAndFindTotalReturnSmallTask(
|
|
427
|
+
FilterExpensesAndFindTotalReturnTask, HumanEvalTask
|
|
428
|
+
):
|
|
429
|
+
def __init__(
|
|
430
|
+
self,
|
|
431
|
+
seed: int,
|
|
432
|
+
instance: SNowInstance = None,
|
|
433
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
434
|
+
level: int = 2,
|
|
435
|
+
):
|
|
436
|
+
super().__init__(
|
|
437
|
+
seed,
|
|
438
|
+
instance,
|
|
439
|
+
fixed_config,
|
|
440
|
+
num_expenses=[3, 5],
|
|
441
|
+
budget=150000,
|
|
442
|
+
mode="trivial",
|
|
443
|
+
level=level,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class FilterTrivialExpensesAndFindTotalReturnMediumTask(FilterExpensesAndFindTotalReturnTask):
|
|
448
|
+
def __init__(
|
|
449
|
+
self,
|
|
450
|
+
seed: int,
|
|
451
|
+
instance: SNowInstance = None,
|
|
452
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
453
|
+
level: int = 2,
|
|
454
|
+
):
|
|
455
|
+
super().__init__(
|
|
456
|
+
seed,
|
|
457
|
+
instance,
|
|
458
|
+
fixed_config,
|
|
459
|
+
num_expenses=[6, 8],
|
|
460
|
+
budget=150000,
|
|
461
|
+
mode="trivial",
|
|
462
|
+
level=level,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class FilterTrivialExpensesAndFindTotalReturnLargeTask(FilterExpensesAndFindTotalReturnTask):
|
|
467
|
+
def __init__(
|
|
468
|
+
self,
|
|
469
|
+
seed: int,
|
|
470
|
+
instance: SNowInstance = None,
|
|
471
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
472
|
+
level: int = 2,
|
|
473
|
+
):
|
|
474
|
+
super().__init__(
|
|
475
|
+
seed,
|
|
476
|
+
instance,
|
|
477
|
+
fixed_config,
|
|
478
|
+
num_expenses=[9, 12],
|
|
479
|
+
budget=150000,
|
|
480
|
+
mode="trivial",
|
|
481
|
+
level=level,
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
class FilterSingleItemExpensesAndFindTotalReturnSmallTask(
|
|
486
|
+
FilterExpensesAndFindTotalReturnTask, HumanEvalTask
|
|
487
|
+
):
|
|
488
|
+
def __init__(
|
|
489
|
+
self,
|
|
490
|
+
seed: int,
|
|
491
|
+
instance: SNowInstance = None,
|
|
492
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
493
|
+
level: int = 2,
|
|
494
|
+
):
|
|
495
|
+
super().__init__(
|
|
496
|
+
seed,
|
|
497
|
+
instance,
|
|
498
|
+
fixed_config,
|
|
499
|
+
num_expenses=[3, 5],
|
|
500
|
+
budget=150000,
|
|
501
|
+
mode="single_item",
|
|
502
|
+
level=level,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
class FilterSingleItemExpensesAndFindTotalReturnMediumTask(FilterExpensesAndFindTotalReturnTask):
|
|
507
|
+
def __init__(
|
|
508
|
+
self,
|
|
509
|
+
seed: int,
|
|
510
|
+
instance: SNowInstance = None,
|
|
511
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
512
|
+
level: int = 2,
|
|
513
|
+
):
|
|
514
|
+
super().__init__(
|
|
515
|
+
seed,
|
|
516
|
+
instance,
|
|
517
|
+
fixed_config,
|
|
518
|
+
num_expenses=[6, 8],
|
|
519
|
+
budget=150000,
|
|
520
|
+
mode="single_item",
|
|
521
|
+
level=level,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
class FilterSingleItemExpensesAndFindTotalReturnLargeTask(FilterExpensesAndFindTotalReturnTask):
|
|
526
|
+
def __init__(
|
|
527
|
+
self,
|
|
528
|
+
seed: int,
|
|
529
|
+
instance: SNowInstance = None,
|
|
530
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
531
|
+
level: int = 2,
|
|
532
|
+
):
|
|
533
|
+
super().__init__(
|
|
534
|
+
seed,
|
|
535
|
+
instance,
|
|
536
|
+
fixed_config,
|
|
537
|
+
num_expenses=[9, 12],
|
|
538
|
+
budget=150000,
|
|
539
|
+
mode="single_item",
|
|
540
|
+
level=level,
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
class FilterSingleItemUniformExpensesAndFindTotalReturnSmallTask(
|
|
545
|
+
FilterExpensesAndFindTotalReturnTask, HumanEvalTask
|
|
546
|
+
):
|
|
547
|
+
def __init__(
|
|
548
|
+
self,
|
|
549
|
+
seed: int,
|
|
550
|
+
instance: SNowInstance = None,
|
|
551
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
552
|
+
level: int = 2,
|
|
553
|
+
):
|
|
554
|
+
super().__init__(
|
|
555
|
+
seed,
|
|
556
|
+
instance,
|
|
557
|
+
fixed_config,
|
|
558
|
+
num_expenses=[3, 5],
|
|
559
|
+
budget=150000,
|
|
560
|
+
mode="single_item_uniform",
|
|
561
|
+
level=level,
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
class FilterSingleItemUniformExpensesAndFindTotalReturnMediumTask(
|
|
566
|
+
FilterExpensesAndFindTotalReturnTask
|
|
567
|
+
):
|
|
568
|
+
def __init__(
|
|
569
|
+
self,
|
|
570
|
+
seed: int,
|
|
571
|
+
instance: SNowInstance = None,
|
|
572
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
573
|
+
level: int = 2,
|
|
574
|
+
):
|
|
575
|
+
super().__init__(
|
|
576
|
+
seed,
|
|
577
|
+
instance,
|
|
578
|
+
fixed_config,
|
|
579
|
+
num_expenses=[6, 8],
|
|
580
|
+
budget=150000,
|
|
581
|
+
mode="single_item_uniform",
|
|
582
|
+
level=level,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
class FilterSingleItemUniformExpensesAndFindTotalReturnLargeTask(
|
|
587
|
+
FilterExpensesAndFindTotalReturnTask
|
|
588
|
+
):
|
|
589
|
+
def __init__(
|
|
590
|
+
self,
|
|
591
|
+
seed: int,
|
|
592
|
+
instance: SNowInstance = None,
|
|
593
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
594
|
+
level: int = 2,
|
|
595
|
+
):
|
|
596
|
+
super().__init__(
|
|
597
|
+
seed,
|
|
598
|
+
instance,
|
|
599
|
+
fixed_config,
|
|
600
|
+
num_expenses=[9, 12],
|
|
601
|
+
budget=150000,
|
|
602
|
+
mode="single_item_uniform",
|
|
603
|
+
level=level,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
class FilterTwoItemsUniformExpensesAndFindTotalReturnSmallTask(
|
|
608
|
+
FilterExpensesAndFindTotalReturnTask, HumanEvalTask
|
|
609
|
+
):
|
|
610
|
+
def __init__(
|
|
611
|
+
self,
|
|
612
|
+
seed: int,
|
|
613
|
+
instance: SNowInstance = None,
|
|
614
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
615
|
+
level: int = 2,
|
|
616
|
+
):
|
|
617
|
+
super().__init__(
|
|
618
|
+
seed,
|
|
619
|
+
instance,
|
|
620
|
+
fixed_config,
|
|
621
|
+
num_expenses=[3, 5],
|
|
622
|
+
budget=150000,
|
|
623
|
+
mode="n_items",
|
|
624
|
+
level=level,
|
|
625
|
+
num_items_uniform=2,
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
class FilterThreeItemsUniformExpensesAndFindTotalReturnMediumTask(
|
|
630
|
+
FilterExpensesAndFindTotalReturnTask
|
|
631
|
+
):
|
|
632
|
+
def __init__(
|
|
633
|
+
self,
|
|
634
|
+
seed: int,
|
|
635
|
+
instance: SNowInstance = None,
|
|
636
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
637
|
+
level: int = 2,
|
|
638
|
+
):
|
|
639
|
+
super().__init__(
|
|
640
|
+
seed,
|
|
641
|
+
instance,
|
|
642
|
+
fixed_config,
|
|
643
|
+
num_expenses=[6, 8],
|
|
644
|
+
budget=150000,
|
|
645
|
+
mode="n_items",
|
|
646
|
+
level=level,
|
|
647
|
+
num_items_uniform=3,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
class FilterThreeItemsUniformExpensesAndFindTotalReturnLargeTask(
|
|
652
|
+
FilterExpensesAndFindTotalReturnTask
|
|
653
|
+
):
|
|
654
|
+
def __init__(
|
|
655
|
+
self,
|
|
656
|
+
seed: int,
|
|
657
|
+
instance: SNowInstance = None,
|
|
658
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
659
|
+
level: int = 2,
|
|
660
|
+
):
|
|
661
|
+
super().__init__(
|
|
662
|
+
seed,
|
|
663
|
+
instance,
|
|
664
|
+
fixed_config,
|
|
665
|
+
num_expenses=[9, 12],
|
|
666
|
+
budget=150000,
|
|
667
|
+
mode="n_items",
|
|
668
|
+
level=level,
|
|
669
|
+
num_items_uniform=3,
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
class FilterExpensesAndSelectInvestmentsTask(FilterExpensesAndAllocateInvestmentsTask):
|
|
674
|
+
def __init__(
|
|
675
|
+
self,
|
|
676
|
+
seed: int,
|
|
677
|
+
instance: SNowInstance = None,
|
|
678
|
+
fixed_config: list[AbstractServiceNowTask] = None,
|
|
679
|
+
num_expenses: list[int] = [3, 4],
|
|
680
|
+
budget: int = 150000,
|
|
681
|
+
mode: str = "random",
|
|
682
|
+
num_items_uniform: int = None,
|
|
683
|
+
level: int = 2,
|
|
684
|
+
):
|
|
685
|
+
super().__init__(
|
|
686
|
+
seed,
|
|
687
|
+
instance,
|
|
688
|
+
fixed_config,
|
|
689
|
+
num_expenses=num_expenses,
|
|
690
|
+
budget=budget,
|
|
691
|
+
mode=mode,
|
|
692
|
+
level=level,
|
|
693
|
+
answer_format="investments_only",
|
|
694
|
+
num_items_uniform=num_items_uniform,
|
|
695
|
+
)
|
|
696
|
+
self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Give selected investments only. '
|
|
697
|
+
|
|
698
|
+
def _setup_list(self) -> None:
|
|
699
|
+
super()._setup_list()
|
|
700
|
+
message = f"The correct investments to select are: {', '.join(self.correct_investments)}"
|
|
701
|
+
self.tasks.append(
|
|
702
|
+
SendChatMessageForBudgetAllocationTask(
|
|
703
|
+
instance=self.instance,
|
|
704
|
+
message=message,
|
|
705
|
+
used_in_level_2=True,
|
|
706
|
+
is_validated=False,
|
|
707
|
+
budget=self.budget,
|
|
708
|
+
answer_format=self.answer_format,
|
|
709
|
+
level=self.level,
|
|
710
|
+
)
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
|
|
714
|
+
reward, done, message, info = self.check_correct_investments_sent_in_chat(
|
|
715
|
+
page, chat_messages
|
|
716
|
+
)
|
|
717
|
+
if reward == 1 and done:
|
|
718
|
+
return FilterAndDoTask.validate(self, page, chat_messages)
|
|
719
|
+
else:
|
|
720
|
+
return reward, done, message, info
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
class FilterRandomExpensesAndSelectInvestmentsSmallTask(
|
|
724
|
+
FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
|
|
725
|
+
):
|
|
726
|
+
def __init__(
|
|
727
|
+
self,
|
|
728
|
+
seed: int,
|
|
729
|
+
instance: SNowInstance = None,
|
|
730
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
731
|
+
level: int = 2,
|
|
732
|
+
):
|
|
733
|
+
super().__init__(
|
|
734
|
+
seed,
|
|
735
|
+
instance,
|
|
736
|
+
fixed_config,
|
|
737
|
+
num_expenses=[3, 5],
|
|
738
|
+
budget=150000,
|
|
739
|
+
mode="random",
|
|
740
|
+
level=level,
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
class FilterRandomExpensesAndSelectInvestmentsMediumTask(FilterExpensesAndSelectInvestmentsTask):
|
|
745
|
+
def __init__(
|
|
746
|
+
self,
|
|
747
|
+
seed: int,
|
|
748
|
+
instance: SNowInstance = None,
|
|
749
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
750
|
+
level: int = 2,
|
|
751
|
+
):
|
|
752
|
+
super().__init__(
|
|
753
|
+
seed,
|
|
754
|
+
instance,
|
|
755
|
+
fixed_config,
|
|
756
|
+
num_expenses=[6, 8],
|
|
757
|
+
budget=150000,
|
|
758
|
+
mode="random",
|
|
759
|
+
level=level,
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
class FilterRandomExpensesAndSelectInvestmentsLargeTask(FilterExpensesAndSelectInvestmentsTask):
|
|
764
|
+
def __init__(
|
|
765
|
+
self,
|
|
766
|
+
seed: int,
|
|
767
|
+
instance: SNowInstance = None,
|
|
768
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
769
|
+
level: int = 2,
|
|
770
|
+
):
|
|
771
|
+
super().__init__(
|
|
772
|
+
seed,
|
|
773
|
+
instance,
|
|
774
|
+
fixed_config,
|
|
775
|
+
num_expenses=[9, 12],
|
|
776
|
+
budget=150000,
|
|
777
|
+
mode="random",
|
|
778
|
+
level=level,
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
class FilterTrivialExpensesAndSelectInvestmentsSmallTask(
|
|
783
|
+
FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
|
|
784
|
+
):
|
|
785
|
+
def __init__(
|
|
786
|
+
self,
|
|
787
|
+
seed: int,
|
|
788
|
+
instance: SNowInstance = None,
|
|
789
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
790
|
+
level: int = 2,
|
|
791
|
+
):
|
|
792
|
+
super().__init__(
|
|
793
|
+
seed,
|
|
794
|
+
instance,
|
|
795
|
+
fixed_config,
|
|
796
|
+
num_expenses=[3, 5],
|
|
797
|
+
budget=150000,
|
|
798
|
+
mode="trivial",
|
|
799
|
+
level=level,
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
class FilterTrivialExpensesAndSelectInvestmentsMediumTask(FilterExpensesAndSelectInvestmentsTask):
|
|
804
|
+
def __init__(
|
|
805
|
+
self,
|
|
806
|
+
seed: int,
|
|
807
|
+
instance: SNowInstance = None,
|
|
808
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
809
|
+
level: int = 2,
|
|
810
|
+
):
|
|
811
|
+
super().__init__(
|
|
812
|
+
seed,
|
|
813
|
+
instance,
|
|
814
|
+
fixed_config,
|
|
815
|
+
num_expenses=[6, 8],
|
|
816
|
+
budget=150000,
|
|
817
|
+
mode="trivial",
|
|
818
|
+
level=level,
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
class FilterTrivialExpensesAndSelectInvestmentsLargeTask(FilterExpensesAndSelectInvestmentsTask):
|
|
823
|
+
def __init__(
|
|
824
|
+
self,
|
|
825
|
+
seed: int,
|
|
826
|
+
instance: SNowInstance = None,
|
|
827
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
828
|
+
level: int = 2,
|
|
829
|
+
):
|
|
830
|
+
super().__init__(
|
|
831
|
+
seed,
|
|
832
|
+
instance,
|
|
833
|
+
fixed_config,
|
|
834
|
+
num_expenses=[9, 12],
|
|
835
|
+
budget=150000,
|
|
836
|
+
mode="trivial",
|
|
837
|
+
level=level,
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
class FilterSingleItemExpensesAndSelectInvestmentsSmallTask(
|
|
842
|
+
FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
|
|
843
|
+
):
|
|
844
|
+
def __init__(
|
|
845
|
+
self,
|
|
846
|
+
seed: int,
|
|
847
|
+
instance: SNowInstance = None,
|
|
848
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
849
|
+
level: int = 2,
|
|
850
|
+
):
|
|
851
|
+
super().__init__(
|
|
852
|
+
seed,
|
|
853
|
+
instance,
|
|
854
|
+
fixed_config,
|
|
855
|
+
num_expenses=[3, 5],
|
|
856
|
+
budget=150000,
|
|
857
|
+
mode="single_item",
|
|
858
|
+
level=level,
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
class FilterSingleItemExpensesAndSelectInvestmentsMediumTask(
|
|
863
|
+
FilterExpensesAndSelectInvestmentsTask
|
|
864
|
+
):
|
|
865
|
+
def __init__(
|
|
866
|
+
self,
|
|
867
|
+
seed: int,
|
|
868
|
+
instance: SNowInstance = None,
|
|
869
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
870
|
+
level: int = 2,
|
|
871
|
+
):
|
|
872
|
+
super().__init__(
|
|
873
|
+
seed,
|
|
874
|
+
instance,
|
|
875
|
+
fixed_config,
|
|
876
|
+
num_expenses=[6, 8],
|
|
877
|
+
budget=150000,
|
|
878
|
+
mode="single_item",
|
|
879
|
+
level=level,
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
class FilterSingleItemExpensesAndSelectInvestmentsLargeTask(FilterExpensesAndSelectInvestmentsTask):
|
|
884
|
+
def __init__(
|
|
885
|
+
self,
|
|
886
|
+
seed: int,
|
|
887
|
+
instance: SNowInstance = None,
|
|
888
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
889
|
+
level: int = 2,
|
|
890
|
+
):
|
|
891
|
+
super().__init__(
|
|
892
|
+
seed,
|
|
893
|
+
instance,
|
|
894
|
+
fixed_config,
|
|
895
|
+
num_expenses=[9, 12],
|
|
896
|
+
budget=150000,
|
|
897
|
+
mode="single_item",
|
|
898
|
+
level=level,
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
class FilterSingleItemUniformExpensesAndSelectInvestmentsSmallTask(
|
|
903
|
+
FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
|
|
904
|
+
):
|
|
905
|
+
def __init__(
|
|
906
|
+
self,
|
|
907
|
+
seed: int,
|
|
908
|
+
instance: SNowInstance = None,
|
|
909
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
910
|
+
level: int = 2,
|
|
911
|
+
):
|
|
912
|
+
super().__init__(
|
|
913
|
+
seed,
|
|
914
|
+
instance,
|
|
915
|
+
fixed_config,
|
|
916
|
+
num_expenses=[3, 5],
|
|
917
|
+
budget=150000,
|
|
918
|
+
mode="single_item_uniform",
|
|
919
|
+
level=level,
|
|
920
|
+
)
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
class FilterSingleItemUniformExpensesAndSelectInvestmentsMediumTask(
|
|
924
|
+
FilterExpensesAndSelectInvestmentsTask
|
|
925
|
+
):
|
|
926
|
+
def __init__(
|
|
927
|
+
self,
|
|
928
|
+
seed: int,
|
|
929
|
+
instance: SNowInstance = None,
|
|
930
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
931
|
+
level: int = 2,
|
|
932
|
+
):
|
|
933
|
+
super().__init__(
|
|
934
|
+
seed,
|
|
935
|
+
instance,
|
|
936
|
+
fixed_config,
|
|
937
|
+
num_expenses=[6, 8],
|
|
938
|
+
budget=150000,
|
|
939
|
+
mode="single_item_uniform",
|
|
940
|
+
level=level,
|
|
941
|
+
)
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
class FilterSingleItemUniformExpensesAndSelectInvestmentsLargeTask(
|
|
945
|
+
FilterExpensesAndSelectInvestmentsTask
|
|
946
|
+
):
|
|
947
|
+
def __init__(
|
|
948
|
+
self,
|
|
949
|
+
seed: int,
|
|
950
|
+
instance: SNowInstance = None,
|
|
951
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
952
|
+
level: int = 2,
|
|
953
|
+
):
|
|
954
|
+
super().__init__(
|
|
955
|
+
seed,
|
|
956
|
+
instance,
|
|
957
|
+
fixed_config,
|
|
958
|
+
num_expenses=[9, 12],
|
|
959
|
+
budget=150000,
|
|
960
|
+
mode="single_item_uniform",
|
|
961
|
+
level=level,
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
|
|
965
|
+
class FilterTwoItemsUniformExpensesAndSelectInvestmentsSmallTask(
|
|
966
|
+
FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
|
|
967
|
+
):
|
|
968
|
+
def __init__(
|
|
969
|
+
self,
|
|
970
|
+
seed: int,
|
|
971
|
+
instance: SNowInstance = None,
|
|
972
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
973
|
+
level: int = 2,
|
|
974
|
+
):
|
|
975
|
+
super().__init__(
|
|
976
|
+
seed,
|
|
977
|
+
instance,
|
|
978
|
+
fixed_config,
|
|
979
|
+
num_expenses=[3, 5],
|
|
980
|
+
budget=150000,
|
|
981
|
+
mode="n_items",
|
|
982
|
+
level=level,
|
|
983
|
+
num_items_uniform=2,
|
|
984
|
+
)
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
class FilterThreeItemsUniformExpensesAndSelectInvestmentsMediumTask(
|
|
988
|
+
FilterExpensesAndSelectInvestmentsTask
|
|
989
|
+
):
|
|
990
|
+
def __init__(
|
|
991
|
+
self,
|
|
992
|
+
seed: int,
|
|
993
|
+
instance: SNowInstance = None,
|
|
994
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
995
|
+
level: int = 2,
|
|
996
|
+
):
|
|
997
|
+
super().__init__(
|
|
998
|
+
seed,
|
|
999
|
+
instance,
|
|
1000
|
+
fixed_config,
|
|
1001
|
+
num_expenses=[6, 8],
|
|
1002
|
+
budget=150000,
|
|
1003
|
+
mode="n_items",
|
|
1004
|
+
level=level,
|
|
1005
|
+
num_items_uniform=3,
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
class FilterThreeItemsUniformExpensesAndSelectInvestmentsLargeTask(
|
|
1010
|
+
FilterExpensesAndSelectInvestmentsTask
|
|
1011
|
+
):
|
|
1012
|
+
def __init__(
|
|
1013
|
+
self,
|
|
1014
|
+
seed: int,
|
|
1015
|
+
instance: SNowInstance = None,
|
|
1016
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1017
|
+
level: int = 2,
|
|
1018
|
+
):
|
|
1019
|
+
super().__init__(
|
|
1020
|
+
seed,
|
|
1021
|
+
instance,
|
|
1022
|
+
fixed_config,
|
|
1023
|
+
num_expenses=[9, 12],
|
|
1024
|
+
budget=150000,
|
|
1025
|
+
mode="n_items",
|
|
1026
|
+
level=level,
|
|
1027
|
+
num_items_uniform=3,
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
class FilterExpensesFindTotalReturnAndSelectInvestmentsTask(FilterExpensesAndFindTotalReturnTask):
|
|
1032
|
+
def __init__(
|
|
1033
|
+
self,
|
|
1034
|
+
seed: int,
|
|
1035
|
+
instance: SNowInstance = None,
|
|
1036
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1037
|
+
num_expenses: list[int] = [3, 4],
|
|
1038
|
+
budget: int = 150000,
|
|
1039
|
+
mode="random",
|
|
1040
|
+
num_items_uniform: int = None,
|
|
1041
|
+
level: int = 2,
|
|
1042
|
+
):
|
|
1043
|
+
super().__init__(
|
|
1044
|
+
seed,
|
|
1045
|
+
instance,
|
|
1046
|
+
fixed_config,
|
|
1047
|
+
num_expenses=num_expenses,
|
|
1048
|
+
budget=budget,
|
|
1049
|
+
mode=mode,
|
|
1050
|
+
num_items_uniform=num_items_uniform,
|
|
1051
|
+
answer_format="total_return_and_investments",
|
|
1052
|
+
level=level,
|
|
1053
|
+
)
|
|
1054
|
+
self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Give selected investments and total return. '
|
|
1055
|
+
|
|
1056
|
+
def _setup_list(self) -> None:
|
|
1057
|
+
super()._setup_list()
|
|
1058
|
+
message = f"The correct investments to select are: {', '.join(self.correct_investments)} and their total return is {self.max_return}$"
|
|
1059
|
+
self.tasks = [
|
|
1060
|
+
SendChatMessageForBudgetAllocationTask(
|
|
1061
|
+
instance=self.instance,
|
|
1062
|
+
message=message,
|
|
1063
|
+
used_in_level_2=True,
|
|
1064
|
+
is_validated=False,
|
|
1065
|
+
budget=self.budget,
|
|
1066
|
+
answer_format=self.answer_format,
|
|
1067
|
+
level=self.level,
|
|
1068
|
+
)
|
|
1069
|
+
]
|
|
1070
|
+
|
|
1071
|
+
def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
|
|
1072
|
+
reward, done, message, info = self.check_correct_investments_sent_in_chat(
|
|
1073
|
+
page, chat_messages
|
|
1074
|
+
)
|
|
1075
|
+
if not (reward == 1 and done):
|
|
1076
|
+
return reward, done, message, info
|
|
1077
|
+
|
|
1078
|
+
reward, done, message, info = self.check_total_return(page, chat_messages)
|
|
1079
|
+
if not (reward == 1 and done):
|
|
1080
|
+
return reward, done, message, info
|
|
1081
|
+
|
|
1082
|
+
return FilterAndDoTask.validate(self, page, chat_messages)
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
class FilterRandomExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
|
|
1086
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
|
|
1087
|
+
):
|
|
1088
|
+
def __init__(
|
|
1089
|
+
self,
|
|
1090
|
+
seed: int,
|
|
1091
|
+
instance: SNowInstance = None,
|
|
1092
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1093
|
+
level: int = 2,
|
|
1094
|
+
):
|
|
1095
|
+
super().__init__(
|
|
1096
|
+
seed,
|
|
1097
|
+
instance,
|
|
1098
|
+
fixed_config,
|
|
1099
|
+
num_expenses=[3, 5],
|
|
1100
|
+
budget=150000,
|
|
1101
|
+
mode="random",
|
|
1102
|
+
level=level,
|
|
1103
|
+
)
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
class FilterRandomExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
|
|
1107
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1108
|
+
):
|
|
1109
|
+
def __init__(
|
|
1110
|
+
self,
|
|
1111
|
+
seed: int,
|
|
1112
|
+
instance: SNowInstance = None,
|
|
1113
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1114
|
+
level: int = 2,
|
|
1115
|
+
):
|
|
1116
|
+
super().__init__(
|
|
1117
|
+
seed,
|
|
1118
|
+
instance,
|
|
1119
|
+
fixed_config,
|
|
1120
|
+
num_expenses=[6, 8],
|
|
1121
|
+
budget=150000,
|
|
1122
|
+
mode="random",
|
|
1123
|
+
level=level,
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
class FilterRandomExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
|
|
1128
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1129
|
+
):
|
|
1130
|
+
def __init__(
|
|
1131
|
+
self,
|
|
1132
|
+
seed: int,
|
|
1133
|
+
instance: SNowInstance = None,
|
|
1134
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1135
|
+
level: int = 2,
|
|
1136
|
+
):
|
|
1137
|
+
super().__init__(
|
|
1138
|
+
seed,
|
|
1139
|
+
instance,
|
|
1140
|
+
fixed_config,
|
|
1141
|
+
num_expenses=[9, 12],
|
|
1142
|
+
budget=150000,
|
|
1143
|
+
mode="random",
|
|
1144
|
+
level=level,
|
|
1145
|
+
)
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
class FilterTrivialExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
|
|
1149
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
|
|
1150
|
+
):
|
|
1151
|
+
def __init__(
|
|
1152
|
+
self,
|
|
1153
|
+
seed: int,
|
|
1154
|
+
instance: SNowInstance = None,
|
|
1155
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1156
|
+
level: int = 2,
|
|
1157
|
+
):
|
|
1158
|
+
super().__init__(
|
|
1159
|
+
seed,
|
|
1160
|
+
instance,
|
|
1161
|
+
fixed_config,
|
|
1162
|
+
num_expenses=[3, 5],
|
|
1163
|
+
budget=150000,
|
|
1164
|
+
mode="trivial",
|
|
1165
|
+
level=level,
|
|
1166
|
+
)
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
class FilterTrivialExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
|
|
1170
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1171
|
+
):
|
|
1172
|
+
def __init__(
|
|
1173
|
+
self,
|
|
1174
|
+
seed: int,
|
|
1175
|
+
instance: SNowInstance = None,
|
|
1176
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1177
|
+
level: int = 2,
|
|
1178
|
+
):
|
|
1179
|
+
super().__init__(
|
|
1180
|
+
seed,
|
|
1181
|
+
instance,
|
|
1182
|
+
fixed_config,
|
|
1183
|
+
num_expenses=[6, 8],
|
|
1184
|
+
budget=150000,
|
|
1185
|
+
mode="trivial",
|
|
1186
|
+
level=level,
|
|
1187
|
+
)
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
class FilterTrivialExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
|
|
1191
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1192
|
+
):
|
|
1193
|
+
def __init__(
|
|
1194
|
+
self,
|
|
1195
|
+
seed: int,
|
|
1196
|
+
instance: SNowInstance = None,
|
|
1197
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1198
|
+
level: int = 2,
|
|
1199
|
+
):
|
|
1200
|
+
super().__init__(
|
|
1201
|
+
seed,
|
|
1202
|
+
instance,
|
|
1203
|
+
fixed_config,
|
|
1204
|
+
num_expenses=[9, 12],
|
|
1205
|
+
budget=150000,
|
|
1206
|
+
mode="trivial",
|
|
1207
|
+
level=level,
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
class FilterSingleItemExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
|
|
1212
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
|
|
1213
|
+
):
|
|
1214
|
+
def __init__(
|
|
1215
|
+
self,
|
|
1216
|
+
seed: int,
|
|
1217
|
+
instance: SNowInstance = None,
|
|
1218
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1219
|
+
level: int = 2,
|
|
1220
|
+
):
|
|
1221
|
+
super().__init__(
|
|
1222
|
+
seed,
|
|
1223
|
+
instance,
|
|
1224
|
+
fixed_config,
|
|
1225
|
+
num_expenses=[3, 5],
|
|
1226
|
+
budget=150000,
|
|
1227
|
+
mode="single_item",
|
|
1228
|
+
level=level,
|
|
1229
|
+
)
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
class FilterSingleItemExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
|
|
1233
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1234
|
+
):
|
|
1235
|
+
def __init__(
|
|
1236
|
+
self,
|
|
1237
|
+
seed: int,
|
|
1238
|
+
instance: SNowInstance = None,
|
|
1239
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1240
|
+
level: int = 2,
|
|
1241
|
+
):
|
|
1242
|
+
super().__init__(
|
|
1243
|
+
seed,
|
|
1244
|
+
instance,
|
|
1245
|
+
fixed_config,
|
|
1246
|
+
num_expenses=[6, 8],
|
|
1247
|
+
budget=150000,
|
|
1248
|
+
mode="single_item",
|
|
1249
|
+
level=level,
|
|
1250
|
+
)
|
|
1251
|
+
|
|
1252
|
+
|
|
1253
|
+
class FilterSingleItemExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
|
|
1254
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1255
|
+
):
|
|
1256
|
+
def __init__(
|
|
1257
|
+
self,
|
|
1258
|
+
seed: int,
|
|
1259
|
+
instance: SNowInstance = None,
|
|
1260
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1261
|
+
level: int = 2,
|
|
1262
|
+
):
|
|
1263
|
+
super().__init__(
|
|
1264
|
+
seed,
|
|
1265
|
+
instance,
|
|
1266
|
+
fixed_config,
|
|
1267
|
+
num_expenses=[9, 12],
|
|
1268
|
+
budget=150000,
|
|
1269
|
+
mode="single_item",
|
|
1270
|
+
level=level,
|
|
1271
|
+
)
|
|
1272
|
+
|
|
1273
|
+
|
|
1274
|
+
class FilterSingleItemUniformExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
|
|
1275
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
|
|
1276
|
+
):
|
|
1277
|
+
def __init__(
|
|
1278
|
+
self,
|
|
1279
|
+
seed: int,
|
|
1280
|
+
instance: SNowInstance = None,
|
|
1281
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1282
|
+
level: int = 2,
|
|
1283
|
+
):
|
|
1284
|
+
super().__init__(
|
|
1285
|
+
seed,
|
|
1286
|
+
instance,
|
|
1287
|
+
fixed_config,
|
|
1288
|
+
num_expenses=[3, 5],
|
|
1289
|
+
budget=150000,
|
|
1290
|
+
mode="single_item_uniform",
|
|
1291
|
+
level=level,
|
|
1292
|
+
)
|
|
1293
|
+
|
|
1294
|
+
|
|
1295
|
+
class FilterSingleItemUniformExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
|
|
1296
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1297
|
+
):
|
|
1298
|
+
def __init__(
|
|
1299
|
+
self,
|
|
1300
|
+
seed: int,
|
|
1301
|
+
instance: SNowInstance = None,
|
|
1302
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1303
|
+
level: int = 2,
|
|
1304
|
+
):
|
|
1305
|
+
super().__init__(
|
|
1306
|
+
seed,
|
|
1307
|
+
instance,
|
|
1308
|
+
fixed_config,
|
|
1309
|
+
num_expenses=[6, 8],
|
|
1310
|
+
budget=150000,
|
|
1311
|
+
mode="single_item_uniform",
|
|
1312
|
+
level=level,
|
|
1313
|
+
)
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
class FilterSingleItemUniformExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
|
|
1317
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1318
|
+
):
|
|
1319
|
+
def __init__(
|
|
1320
|
+
self,
|
|
1321
|
+
seed: int,
|
|
1322
|
+
instance: SNowInstance = None,
|
|
1323
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1324
|
+
level: int = 2,
|
|
1325
|
+
):
|
|
1326
|
+
super().__init__(
|
|
1327
|
+
seed,
|
|
1328
|
+
instance,
|
|
1329
|
+
fixed_config,
|
|
1330
|
+
num_expenses=[9, 12],
|
|
1331
|
+
budget=150000,
|
|
1332
|
+
mode="single_item_uniform",
|
|
1333
|
+
level=level,
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
class FilterTwoItemsUniformExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
|
|
1338
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
|
|
1339
|
+
):
|
|
1340
|
+
def __init__(
|
|
1341
|
+
self,
|
|
1342
|
+
seed: int,
|
|
1343
|
+
instance: SNowInstance = None,
|
|
1344
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1345
|
+
level: int = 2,
|
|
1346
|
+
):
|
|
1347
|
+
super().__init__(
|
|
1348
|
+
seed,
|
|
1349
|
+
instance,
|
|
1350
|
+
fixed_config,
|
|
1351
|
+
num_expenses=[9, 12],
|
|
1352
|
+
budget=150000,
|
|
1353
|
+
mode="n_items",
|
|
1354
|
+
level=level,
|
|
1355
|
+
num_items_uniform=2,
|
|
1356
|
+
)
|
|
1357
|
+
|
|
1358
|
+
|
|
1359
|
+
class FilterThreeItemsUniformExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
|
|
1360
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1361
|
+
):
|
|
1362
|
+
def __init__(
|
|
1363
|
+
self,
|
|
1364
|
+
seed: int,
|
|
1365
|
+
instance: SNowInstance = None,
|
|
1366
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1367
|
+
level: int = 2,
|
|
1368
|
+
):
|
|
1369
|
+
super().__init__(
|
|
1370
|
+
seed,
|
|
1371
|
+
instance,
|
|
1372
|
+
fixed_config,
|
|
1373
|
+
num_expenses=[9, 12],
|
|
1374
|
+
budget=150000,
|
|
1375
|
+
mode="n_items",
|
|
1376
|
+
level=level,
|
|
1377
|
+
num_items_uniform=3,
|
|
1378
|
+
)
|
|
1379
|
+
|
|
1380
|
+
|
|
1381
|
+
class FilterThreeItemsUniformExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
|
|
1382
|
+
FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1383
|
+
):
|
|
1384
|
+
def __init__(
|
|
1385
|
+
self,
|
|
1386
|
+
seed: int,
|
|
1387
|
+
instance: SNowInstance = None,
|
|
1388
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1389
|
+
level: int = 2,
|
|
1390
|
+
):
|
|
1391
|
+
super().__init__(
|
|
1392
|
+
seed,
|
|
1393
|
+
instance,
|
|
1394
|
+
fixed_config,
|
|
1395
|
+
num_expenses=[9, 12],
|
|
1396
|
+
budget=150000,
|
|
1397
|
+
mode="n_items",
|
|
1398
|
+
level=level,
|
|
1399
|
+
num_items_uniform=3,
|
|
1400
|
+
)
|
|
1401
|
+
|
|
1402
|
+
|
|
1403
|
+
class FilterExpenseLinesAndDeleteWrongInvestments(FilterExpensesAndAllocateInvestmentsTask):
|
|
1404
|
+
def __init__(
|
|
1405
|
+
self,
|
|
1406
|
+
seed: int,
|
|
1407
|
+
instance: SNowInstance = None,
|
|
1408
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1409
|
+
num_expenses: List[int] = [3, 4],
|
|
1410
|
+
budget: int = 150000,
|
|
1411
|
+
mode: str = "random",
|
|
1412
|
+
num_items_uniform: int = None,
|
|
1413
|
+
level: int = 2,
|
|
1414
|
+
) -> None:
|
|
1415
|
+
super().__init__(
|
|
1416
|
+
seed=seed,
|
|
1417
|
+
instance=instance,
|
|
1418
|
+
fixed_config=fixed_config,
|
|
1419
|
+
num_expenses=num_expenses,
|
|
1420
|
+
budget=budget,
|
|
1421
|
+
mode=mode,
|
|
1422
|
+
answer_format="cleanup",
|
|
1423
|
+
num_items_uniform=num_items_uniform,
|
|
1424
|
+
level=level,
|
|
1425
|
+
)
|
|
1426
|
+
self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Delete the investments that were not selected. '
|
|
1427
|
+
|
|
1428
|
+
def _setup_list(self) -> None:
|
|
1429
|
+
super()._setup_list()
|
|
1430
|
+
# in modes "n_items", "single_item_uniform", this yields one of many valid solutions
|
|
1431
|
+
for i, expense_line_number in enumerate(self.incorrect_investments):
|
|
1432
|
+
skip_description = i > 0
|
|
1433
|
+
expense_line_sys_id = self.expense_line_sys_ids[i]
|
|
1434
|
+
self.tasks.append(
|
|
1435
|
+
DeleteExpenseLineKnapsack(
|
|
1436
|
+
instance=self.instance,
|
|
1437
|
+
record_number=expense_line_number,
|
|
1438
|
+
record_sys_id=expense_line_sys_id,
|
|
1439
|
+
fixed_config={
|
|
1440
|
+
"field_name": "number",
|
|
1441
|
+
"field_value": f"{expense_line_number}",
|
|
1442
|
+
},
|
|
1443
|
+
used_in_level_2=True,
|
|
1444
|
+
is_validated=False,
|
|
1445
|
+
budget=self.budget,
|
|
1446
|
+
answer_format=self.answer_format,
|
|
1447
|
+
level=self.level,
|
|
1448
|
+
skip_description=skip_description,
|
|
1449
|
+
)
|
|
1450
|
+
)
|
|
1451
|
+
|
|
1452
|
+
def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
|
|
1453
|
+
expenses = table_api_call(
|
|
1454
|
+
instance=self.instance,
|
|
1455
|
+
table="fm_expense_line",
|
|
1456
|
+
params={
|
|
1457
|
+
"sysparm_query": f"short_descriptionLIKE{self.expense_hashtag}",
|
|
1458
|
+
"sysparm_fields": "number,amount,sys_id",
|
|
1459
|
+
},
|
|
1460
|
+
)["result"]
|
|
1461
|
+
|
|
1462
|
+
if self.mode in ["random", "trivial", "single_item"]:
|
|
1463
|
+
# Check that the correct investments have been selected
|
|
1464
|
+
for investment in self.correct_investments:
|
|
1465
|
+
if investment not in [expense["number"] for expense in expenses]:
|
|
1466
|
+
return (
|
|
1467
|
+
0,
|
|
1468
|
+
False,
|
|
1469
|
+
"",
|
|
1470
|
+
{"message": "Investment missing from selected list."},
|
|
1471
|
+
)
|
|
1472
|
+
# Check that the incorrect investments have not been selected
|
|
1473
|
+
for investment in self.incorrect_investments:
|
|
1474
|
+
if investment in [expense["number"] for expense in expenses]:
|
|
1475
|
+
return (
|
|
1476
|
+
0,
|
|
1477
|
+
False,
|
|
1478
|
+
"",
|
|
1479
|
+
{"message": "Incorrect investment selected."},
|
|
1480
|
+
)
|
|
1481
|
+
# In those settings, many answers are possible, it's only a matter of respecting the number of items in the solution
|
|
1482
|
+
elif self.mode in ["n_items", "single_item_uniform"]:
|
|
1483
|
+
if len(expenses) != self.num_items_uniform:
|
|
1484
|
+
return (
|
|
1485
|
+
0,
|
|
1486
|
+
False,
|
|
1487
|
+
"",
|
|
1488
|
+
{"message": "Incorrect number of investments selected."},
|
|
1489
|
+
)
|
|
1490
|
+
reward, done, message, info = FilterAndDoTask.validate(self, page, chat_messages)
|
|
1491
|
+
|
|
1492
|
+
return reward, done, message, info
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
class FilterRandomExpensesAndDeleteWrongInvestmentsSmallTask(
|
|
1496
|
+
FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
|
|
1497
|
+
):
|
|
1498
|
+
def __init__(
|
|
1499
|
+
self,
|
|
1500
|
+
seed: int,
|
|
1501
|
+
instance: SNowInstance = None,
|
|
1502
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1503
|
+
level: int = 2,
|
|
1504
|
+
):
|
|
1505
|
+
super().__init__(
|
|
1506
|
+
seed,
|
|
1507
|
+
instance,
|
|
1508
|
+
fixed_config,
|
|
1509
|
+
num_expenses=[3, 5],
|
|
1510
|
+
budget=150000,
|
|
1511
|
+
mode="random",
|
|
1512
|
+
level=level,
|
|
1513
|
+
)
|
|
1514
|
+
|
|
1515
|
+
|
|
1516
|
+
class FilterRandomExpensesAndDeleteWrongInvestmentsMediumTask(
|
|
1517
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1518
|
+
):
|
|
1519
|
+
def __init__(
|
|
1520
|
+
self,
|
|
1521
|
+
seed: int,
|
|
1522
|
+
instance: SNowInstance = None,
|
|
1523
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1524
|
+
level: int = 2,
|
|
1525
|
+
):
|
|
1526
|
+
super().__init__(
|
|
1527
|
+
seed,
|
|
1528
|
+
instance,
|
|
1529
|
+
fixed_config,
|
|
1530
|
+
num_expenses=[6, 8],
|
|
1531
|
+
budget=150000,
|
|
1532
|
+
mode="random",
|
|
1533
|
+
level=level,
|
|
1534
|
+
)
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
class FilterRandomExpensesAndDeleteWrongInvestmentsLargeTask(
|
|
1538
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1539
|
+
):
|
|
1540
|
+
def __init__(
|
|
1541
|
+
self,
|
|
1542
|
+
seed: int,
|
|
1543
|
+
instance: SNowInstance = None,
|
|
1544
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1545
|
+
level: int = 2,
|
|
1546
|
+
):
|
|
1547
|
+
super().__init__(
|
|
1548
|
+
seed,
|
|
1549
|
+
instance,
|
|
1550
|
+
fixed_config,
|
|
1551
|
+
num_expenses=[9, 12],
|
|
1552
|
+
budget=150000,
|
|
1553
|
+
mode="random",
|
|
1554
|
+
level=level,
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
class FilterSingleItemExpensesAndDeleteWrongInvestmentsSmallTask(
|
|
1559
|
+
FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
|
|
1560
|
+
):
|
|
1561
|
+
def __init__(
|
|
1562
|
+
self,
|
|
1563
|
+
seed: int,
|
|
1564
|
+
instance: SNowInstance = None,
|
|
1565
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1566
|
+
level: int = 2,
|
|
1567
|
+
):
|
|
1568
|
+
super().__init__(
|
|
1569
|
+
seed,
|
|
1570
|
+
instance,
|
|
1571
|
+
fixed_config,
|
|
1572
|
+
num_expenses=[3, 5],
|
|
1573
|
+
budget=150000,
|
|
1574
|
+
mode="single_item",
|
|
1575
|
+
level=level,
|
|
1576
|
+
)
|
|
1577
|
+
|
|
1578
|
+
|
|
1579
|
+
class FilterSingleItemExpensesAndDeleteWrongInvestmentsMediumTask(
|
|
1580
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1581
|
+
):
|
|
1582
|
+
def __init__(
|
|
1583
|
+
self,
|
|
1584
|
+
seed: int,
|
|
1585
|
+
instance: SNowInstance = None,
|
|
1586
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1587
|
+
level: int = 2,
|
|
1588
|
+
):
|
|
1589
|
+
super().__init__(
|
|
1590
|
+
seed,
|
|
1591
|
+
instance,
|
|
1592
|
+
fixed_config,
|
|
1593
|
+
num_expenses=[6, 8],
|
|
1594
|
+
budget=150000,
|
|
1595
|
+
mode="single_item",
|
|
1596
|
+
level=level,
|
|
1597
|
+
)
|
|
1598
|
+
|
|
1599
|
+
|
|
1600
|
+
class FilterSingleItemExpensesAndDeleteWrongInvestmentsLargeTask(
|
|
1601
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1602
|
+
):
|
|
1603
|
+
def __init__(
|
|
1604
|
+
self,
|
|
1605
|
+
seed: int,
|
|
1606
|
+
instance: SNowInstance = None,
|
|
1607
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1608
|
+
level: int = 2,
|
|
1609
|
+
):
|
|
1610
|
+
super().__init__(
|
|
1611
|
+
seed,
|
|
1612
|
+
instance,
|
|
1613
|
+
fixed_config,
|
|
1614
|
+
num_expenses=[9, 12],
|
|
1615
|
+
budget=150000,
|
|
1616
|
+
mode="single_item",
|
|
1617
|
+
level=level,
|
|
1618
|
+
)
|
|
1619
|
+
|
|
1620
|
+
|
|
1621
|
+
class FilterSingleItemUniformExpensesAndDeleteWrongInvestmentsSmallTask(
|
|
1622
|
+
FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
|
|
1623
|
+
):
|
|
1624
|
+
def __init__(
|
|
1625
|
+
self,
|
|
1626
|
+
seed: int,
|
|
1627
|
+
instance: SNowInstance = None,
|
|
1628
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1629
|
+
level: int = 2,
|
|
1630
|
+
):
|
|
1631
|
+
super().__init__(
|
|
1632
|
+
seed,
|
|
1633
|
+
instance,
|
|
1634
|
+
fixed_config,
|
|
1635
|
+
num_expenses=[3, 5],
|
|
1636
|
+
budget=150000,
|
|
1637
|
+
mode="single_item_uniform",
|
|
1638
|
+
level=level,
|
|
1639
|
+
)
|
|
1640
|
+
|
|
1641
|
+
|
|
1642
|
+
class FilterSingleItemUniformExpensesAndDeleteWrongInvestmentsMediumTask(
|
|
1643
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1644
|
+
):
|
|
1645
|
+
def __init__(
|
|
1646
|
+
self,
|
|
1647
|
+
seed: int,
|
|
1648
|
+
instance: SNowInstance = None,
|
|
1649
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1650
|
+
level: int = 2,
|
|
1651
|
+
):
|
|
1652
|
+
super().__init__(
|
|
1653
|
+
seed,
|
|
1654
|
+
instance,
|
|
1655
|
+
fixed_config,
|
|
1656
|
+
num_expenses=[6, 8],
|
|
1657
|
+
budget=150000,
|
|
1658
|
+
mode="single_item_uniform",
|
|
1659
|
+
level=level,
|
|
1660
|
+
)
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
class FilterSingleItemUniformExpensesAndDeleteWrongInvestmentsLargeTask(
|
|
1664
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1665
|
+
):
|
|
1666
|
+
def __init__(
|
|
1667
|
+
self,
|
|
1668
|
+
seed: int,
|
|
1669
|
+
instance: SNowInstance = None,
|
|
1670
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1671
|
+
level: int = 2,
|
|
1672
|
+
):
|
|
1673
|
+
super().__init__(
|
|
1674
|
+
seed,
|
|
1675
|
+
instance,
|
|
1676
|
+
fixed_config,
|
|
1677
|
+
num_expenses=[9, 12],
|
|
1678
|
+
budget=150000,
|
|
1679
|
+
mode="single_item_uniform",
|
|
1680
|
+
level=level,
|
|
1681
|
+
)
|
|
1682
|
+
|
|
1683
|
+
|
|
1684
|
+
class FilterTwoItemsUniformExpensesAndDeleteWrongInvestmentsSmallTask(
|
|
1685
|
+
FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
|
|
1686
|
+
):
|
|
1687
|
+
def __init__(
|
|
1688
|
+
self,
|
|
1689
|
+
seed: int,
|
|
1690
|
+
instance: SNowInstance = None,
|
|
1691
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1692
|
+
level: int = 2,
|
|
1693
|
+
):
|
|
1694
|
+
super().__init__(
|
|
1695
|
+
seed,
|
|
1696
|
+
instance,
|
|
1697
|
+
fixed_config,
|
|
1698
|
+
num_expenses=[3, 5],
|
|
1699
|
+
budget=150000,
|
|
1700
|
+
mode="n_items",
|
|
1701
|
+
level=level,
|
|
1702
|
+
num_items_uniform=2,
|
|
1703
|
+
)
|
|
1704
|
+
|
|
1705
|
+
|
|
1706
|
+
class FilterThreeItemsUniformExpensesAndDeleteWrongInvestmentsMediumTask(
|
|
1707
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1708
|
+
):
|
|
1709
|
+
def __init__(
|
|
1710
|
+
self,
|
|
1711
|
+
seed: int,
|
|
1712
|
+
instance: SNowInstance = None,
|
|
1713
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1714
|
+
level: int = 2,
|
|
1715
|
+
):
|
|
1716
|
+
super().__init__(
|
|
1717
|
+
seed,
|
|
1718
|
+
instance,
|
|
1719
|
+
fixed_config,
|
|
1720
|
+
num_expenses=[6, 8],
|
|
1721
|
+
budget=150000,
|
|
1722
|
+
mode="n_items",
|
|
1723
|
+
level=level,
|
|
1724
|
+
num_items_uniform=3,
|
|
1725
|
+
)
|
|
1726
|
+
|
|
1727
|
+
|
|
1728
|
+
class FilterThreeItemsUniformExpensesAndDeleteWrongInvestmentsLargeTask(
|
|
1729
|
+
FilterExpenseLinesAndDeleteWrongInvestments
|
|
1730
|
+
):
|
|
1731
|
+
def __init__(
|
|
1732
|
+
self,
|
|
1733
|
+
seed: int,
|
|
1734
|
+
instance: SNowInstance = None,
|
|
1735
|
+
fixed_config: List[AbstractServiceNowTask] = None,
|
|
1736
|
+
level: int = 2,
|
|
1737
|
+
):
|
|
1738
|
+
super().__init__(
|
|
1739
|
+
seed,
|
|
1740
|
+
instance,
|
|
1741
|
+
fixed_config,
|
|
1742
|
+
num_expenses=[9, 12],
|
|
1743
|
+
budget=150000,
|
|
1744
|
+
mode="n_items",
|
|
1745
|
+
level=level,
|
|
1746
|
+
num_items_uniform=3,
|
|
1747
|
+
)
|
|
1748
|
+
|
|
1749
|
+
|
|
1750
|
+
local_vars = locals().copy()
|
|
1751
|
+
|
|
1752
|
+
__TASKS__ = [
|
|
1753
|
+
var
|
|
1754
|
+
for var in local_vars.values()
|
|
1755
|
+
if isinstance(var, type)
|
|
1756
|
+
and issubclass(var, FilterAndDoTask)
|
|
1757
|
+
and var is not FilterAndDoTask
|
|
1758
|
+
and var is not FilterExpensesAndAllocateInvestmentsTask
|
|
1759
|
+
and var is not FilterExpensesAndFindTotalReturnTask
|
|
1760
|
+
and var is not FilterExpenseLinesAndDeleteWrongInvestments
|
|
1761
|
+
and var is not FilterExpensesFindTotalReturnAndSelectInvestmentsTask
|
|
1762
|
+
and var is not FilterExpensesAndSelectInvestmentsTask
|
|
1763
|
+
]
|