browsergym-workarena 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. browsergym/workarena/__init__.py +13 -1
  2. browsergym/workarena/api/category.py +74 -0
  3. browsergym/workarena/api/change_request.py +87 -0
  4. browsergym/workarena/api/computer_asset.py +90 -0
  5. browsergym/workarena/api/cost_center.py +19 -0
  6. browsergym/workarena/api/expense_line.py +89 -0
  7. browsergym/workarena/api/incident.py +45 -0
  8. browsergym/workarena/api/knowledge.py +29 -0
  9. browsergym/workarena/api/problem.py +90 -0
  10. browsergym/workarena/api/report.py +183 -0
  11. browsergym/workarena/api/requested_items.py +63 -0
  12. browsergym/workarena/api/user.py +11 -8
  13. browsergym/workarena/api/utils.py +47 -3
  14. browsergym/workarena/config.py +21 -1
  15. browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +1 -1
  16. browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +1 -0
  17. browsergym/workarena/data_files/setup_files/knowledge/protocols.json +46 -0
  18. browsergym/workarena/data_files/setup_files/knowledge/test.html +1 -0
  19. browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +2 -24
  20. browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +4 -40
  21. browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +12 -0
  22. browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +1 -42
  23. browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +2 -18
  24. browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +12 -0
  25. browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +12 -0
  26. browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +2 -19
  27. browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +3 -50
  28. browsergym/workarena/data_files/task_configs/all_menu.json +1 -1
  29. browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -1
  30. browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -1
  31. browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +1 -1
  32. browsergym/workarena/data_files/task_configs/impersonation_users.json +1 -1
  33. browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -1
  34. browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -1
  35. browsergym/workarena/human_eval/console.js +176 -0
  36. browsergym/workarena/human_eval/tool.py +366 -0
  37. browsergym/workarena/install.py +81 -20
  38. browsergym/workarena/tasks/base.py +55 -20
  39. browsergym/workarena/tasks/comp_building_block.py +4 -0
  40. browsergym/workarena/tasks/compositional/__init__.py +76 -0
  41. browsergym/workarena/tasks/compositional/base.py +364 -0
  42. browsergym/workarena/tasks/compositional/dash_do_base.py +1366 -0
  43. browsergym/workarena/tasks/compositional/dash_do_catalog.py +1127 -0
  44. browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +2047 -0
  45. browsergym/workarena/tasks/compositional/dash_do_create_incident.py +403 -0
  46. browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +278 -0
  47. browsergym/workarena/tasks/compositional/dash_do_create_problem.py +336 -0
  48. browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +235 -0
  49. browsergym/workarena/tasks/compositional/dash_do_filter.py +1600 -0
  50. browsergym/workarena/tasks/compositional/dash_do_request_item.py +1315 -0
  51. browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +693 -0
  52. browsergym/workarena/tasks/compositional/delete_record.py +341 -0
  53. browsergym/workarena/tasks/compositional/edit_knowledge_base.py +457 -0
  54. browsergym/workarena/tasks/compositional/expense_management.py +598 -0
  55. browsergym/workarena/tasks/compositional/filter_and_do.py +139 -0
  56. browsergym/workarena/tasks/compositional/find_and_order_item.py +345 -0
  57. browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +1417 -0
  58. browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +499 -0
  59. browsergym/workarena/tasks/compositional/maximize_investment_return.py +1763 -0
  60. browsergym/workarena/tasks/compositional/navigate_and_do.py +1151 -0
  61. browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +2100 -0
  62. browsergym/workarena/tasks/compositional/offboard_user.py +207 -0
  63. browsergym/workarena/tasks/compositional/onboard_user.py +226 -0
  64. browsergym/workarena/tasks/compositional/update_task.py +145 -0
  65. browsergym/workarena/tasks/compositional/utils/curriculum.py +215 -0
  66. browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +151 -0
  67. browsergym/workarena/tasks/compositional/utils/knapsack.py +192 -0
  68. browsergym/workarena/tasks/compositional/warranty_check.py +227 -0
  69. browsergym/workarena/tasks/compositional/work_assignment.py +804 -0
  70. browsergym/workarena/tasks/compositional/workload_balancing.py +396 -0
  71. browsergym/workarena/tasks/dashboard.py +188 -8
  72. browsergym/workarena/tasks/form.py +1024 -232
  73. browsergym/workarena/tasks/knowledge.py +216 -25
  74. browsergym/workarena/tasks/list.py +519 -102
  75. browsergym/workarena/tasks/mark_duplicate_problem.py +171 -0
  76. browsergym/workarena/tasks/navigation.py +55 -13
  77. browsergym/workarena/tasks/scripts/extract_all_menu_items.py +9 -2
  78. browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +6 -5
  79. browsergym/workarena/tasks/scripts/service_catalog.py +2 -1
  80. browsergym/workarena/tasks/scripts/validate.py +8 -2
  81. browsergym/workarena/tasks/send_chat_message.py +90 -0
  82. browsergym/workarena/tasks/service_catalog.py +94 -26
  83. browsergym/workarena/tasks/utils/form.py +1 -4
  84. browsergym/workarena/tasks/utils/private_tasks.py +63 -0
  85. browsergym/workarena/tasks/utils/utils.py +13 -0
  86. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/METADATA +19 -18
  87. browsergym_workarena-0.3.0.dist-info/RECORD +138 -0
  88. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/entry_points.txt +1 -0
  89. browsergym_workarena-0.2.1.dist-info/RECORD +0 -85
  90. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/WHEEL +0 -0
  91. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1763 @@
1
+ import re
2
+
3
+ from faker import Faker
4
+ from typing import List, Tuple
5
+
6
+ fake = Faker()
7
+
8
+ from playwright.sync_api._generated import Page
9
+
10
+ from browsergym.workarena.tasks.send_chat_message import SendChatMessageForBudgetAllocationTask
11
+
12
+ from .base import HumanEvalTask
13
+ from .delete_record import DeleteExpenseLineKnapsack
14
+ from .filter_and_do import FilterAndDoTask
15
+ from .utils.knapsack import KnapsackInstanceGenarator
16
+
17
+ from ..base import AbstractServiceNowTask
18
+
19
+ from ...api.expense_line import create_expense_line
20
+ from ...api.utils import table_api_call, db_delete_from_table
21
+ from ...config import (
22
+ # Expected columns for the different lists
23
+ EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
24
+ )
25
+ from ...instance import SNowInstance
26
+
27
+
28
+ class FilterExpensesAndAllocateInvestmentsTask(FilterAndDoTask):
29
+ """Task to filter expenses and allocate investments.
30
+ Args:
31
+ num_expenses: list[int]
32
+ The range to choose the number of expenses from
33
+ budget: int
34
+ The budget to allocate to the expenses
35
+ mode: str
36
+ Mode of generation. Choice of "random", "trivial", "single_item", "single_item_uniform", "n_items"
37
+ - random: Randomly generate the instance and return it; guaranteed to have a unique optimal solution
38
+ - trivial: Generate a trivial instance with all items fitting in the knapsack; return the instance
39
+ - single_item: Generate an instance where the optimal solution has only one item
40
+ - n_items: Generate an instance with all items having uniform weight and value; n items fitting in the knapsack
41
+ - single_item_uniform: Generate an instance with all items having uniform weight and value; optimal solution has only one item and it can be any
42
+ answer_format: str
43
+ The type of answer to generate. Choice of total_return_only, total_return_and_investments, investments_only, cleanup, cleanup_and_return
44
+ num_items_uniform: int
45
+ The number of items to generate in the "n_items" mode
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ seed: int,
51
+ instance: SNowInstance = None,
52
+ fixed_config: list[AbstractServiceNowTask] = None,
53
+ num_expenses: list[int] = [3, 4],
54
+ budget: int = 150000,
55
+ mode: str = "random",
56
+ num_items_uniform: int = None,
57
+ answer_format: str = None,
58
+ level: int = 2,
59
+ ) -> None:
60
+ super().__init__(
61
+ seed=seed,
62
+ instance=instance,
63
+ fixed_config=fixed_config,
64
+ navigation_config={
65
+ "module": "Expense Lines",
66
+ "application": "Cost",
67
+ },
68
+ level=level,
69
+ protocol_name="Maximizing total investment return",
70
+ )
71
+ self.num_expenses = self.random.randint(num_expenses[0], num_expenses[1] + 1)
72
+ # In these settings, we need to vary the budget
73
+ if mode in ["single_item_uniform", "n_items"]:
74
+ min_budget = budget / 5
75
+ max_budget = budget * 5
76
+ self.budget = self.random.randint(min_budget, max_budget)
77
+ else:
78
+ self.budget = budget
79
+ self.mode = mode
80
+ self.answer_format = answer_format
81
+ self.num_items_uniform = 1 if mode == "single_item_uniform" else num_items_uniform
82
+
83
+ self.expense_hashtag = "#" + self.unique_id[:10]
84
+ self.short_description = f"Allocate investments to maximize returns"
85
+ self.expense_line_sys_ids = []
86
+ self.expense_line_numbers = []
87
+ self.correct_investments = (
88
+ []
89
+ ) # List of correct investments to check for in the chat messages
90
+ self.incorrect_investments = (
91
+ []
92
+ ) # List of incorrect investments to check for in the chat messages
93
+ self.potential_investments = None # List of tuples (cost, return) of potential investments
94
+ self.max_return = None # Maximum return possible with optimal solution
95
+ self.alternative_max_return_formats = (
96
+ []
97
+ ) # List of alternative formats for the maximum return to check for in the chat messages
98
+ self.selected_investment_indices = (
99
+ None # Indices of the selected investments in the optimal solution
100
+ )
101
+ # flag to check if the investments are correctly selected and total return is correct
102
+ self.investments_correctly_selected = False
103
+ self.total_return_correct = False
104
+
105
+ def _setup_list(self) -> None:
106
+ self.filter_config = {
107
+ "list_url": "/now/nav/ui/classic/params/target/fm_expense_line_list.do",
108
+ "expected_fields_path": EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
109
+ "filter_columns": [
110
+ "short_description",
111
+ ],
112
+ "filter_kind": "AND",
113
+ "filter_operators": ["contains"],
114
+ "filter_values": [
115
+ f"{self.expense_hashtag}",
116
+ ],
117
+ }
118
+ knapsack = KnapsackInstanceGenarator(
119
+ random=self.random,
120
+ num_items=self.num_expenses,
121
+ max_capacity=self.budget,
122
+ mode=self.mode,
123
+ num_items_in_solution=self.num_items_uniform,
124
+ )
125
+ # investments is a list of tuples, where each tuple is (cost, return)
126
+ self.potential_investments, self.max_return, self.selected_investment_indices = (
127
+ knapsack.get_instance()
128
+ )
129
+ # Accepted answer formats for the maximum return
130
+ self.alternative_max_return_formats = [
131
+ str(self.max_return), # No comma
132
+ "{:,}".format(self.max_return), # Comma as thousand separator
133
+ "{:,}".format(self.max_return).replace(
134
+ ",", ", "
135
+ ), # Comma as thousand separator with space after
136
+ "{:,}".format(self.max_return).replace(",", " "), # Space as thousand separator
137
+ ]
138
+
139
+ for i, investment in enumerate(self.potential_investments):
140
+ expense_number = f"EXP-{i}{self.unique_id[:10]}"
141
+ # Include the return inside the short description
142
+ short_description = f"Build {fake.sentence(2)} - Return: {investment[1]}$ "
143
+ expense_sys_id, expense_number = create_expense_line(
144
+ instance=self.instance,
145
+ amount=investment[0],
146
+ number=expense_number,
147
+ date=str(fake.date_this_year(before_today=True, after_today=False)),
148
+ short_description=short_description,
149
+ expense_hashtag=self.expense_hashtag,
150
+ user_sys_id=self._base_user_sysid,
151
+ )
152
+ self.expense_line_sys_ids.append(expense_sys_id)
153
+ self.expense_line_numbers.append(expense_number)
154
+
155
+ # In this setting there is only one valid answer
156
+ if self.mode in ["random", "trivial", "single_item"]:
157
+ for i, investment in enumerate(self.potential_investments):
158
+ if i in self.selected_investment_indices:
159
+ self.correct_investments.append(self.expense_line_numbers[i])
160
+ else:
161
+ self.incorrect_investments.append(self.expense_line_numbers[i])
162
+ # In this setting, many answers are possible, it's only a matter of respecting the number of items in the solution
163
+ # We store values here just so the cheat function can work uniformly
164
+ elif self.mode in ["n_items", "single_item_uniform"]:
165
+ for i, investment in enumerate(self.potential_investments):
166
+ if i < self.num_items_uniform:
167
+ self.correct_investments.append(self.expense_line_numbers[i])
168
+ else:
169
+ self.incorrect_investments.append(self.expense_line_numbers[i])
170
+
171
+ def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
172
+ super().validate(page, chat_messages)
173
+
174
+ def check_total_return(
175
+ self, page: Page, chat_messages: List[str]
176
+ ) -> Tuple[float, bool, str, dict]:
177
+ """Simple check that validates that the total return is correct."""
178
+ if self.total_return_correct:
179
+ return (
180
+ 1,
181
+ True,
182
+ "That is correct, thank you!",
183
+ {"message": "Correct total return."},
184
+ )
185
+
186
+ if chat_messages and chat_messages[-1]["role"] == "assistant":
187
+ answer = chat_messages[-1]["message"]
188
+ else:
189
+ return (
190
+ 0,
191
+ False,
192
+ "",
193
+ {"message": "The assistant did not provide an answer."},
194
+ )
195
+ for format in self.alternative_max_return_formats:
196
+ if format in answer:
197
+ self.total_return_correct = True
198
+ return (
199
+ 1,
200
+ True,
201
+ "That is correct, thank you!",
202
+ {"message": "Correct answer."},
203
+ )
204
+
205
+ return (
206
+ 0,
207
+ False,
208
+ "",
209
+ {"message": "Incorrect answer."},
210
+ )
211
+
212
+ def check_correct_investments_sent_in_chat(
213
+ self, page: Page, chat_messages: List[str]
214
+ ) -> Tuple[float, bool, str, dict]:
215
+ """Check that the correct investments have been selected and their numbers have been sent in the chat"""
216
+ if not self.investments_correctly_selected:
217
+ if chat_messages and chat_messages[-1]["role"] == "assistant":
218
+ answer = chat_messages[-1]["message"]
219
+ else:
220
+ return (
221
+ 0,
222
+ False,
223
+ "",
224
+ {"message": "The assistant did not provide an answer."},
225
+ )
226
+
227
+ # In these settings, there is only one valid answer
228
+ if self.mode in ["random", "trivial", "single_item"]:
229
+ # Check that the correct investments have been selected
230
+ for investment in self.correct_investments:
231
+ if investment not in answer:
232
+ return (
233
+ 0,
234
+ False,
235
+ "",
236
+ {"message": "Investment missing from selected list."},
237
+ )
238
+ # Check that the incorrect investments have not been selected
239
+ for investment in self.incorrect_investments:
240
+ if investment in answer:
241
+ return (
242
+ 0,
243
+ False,
244
+ "",
245
+ {"message": "Incorrect investment selected."},
246
+ )
247
+ # In those settings, many answers are possible, it's only a matter of respecting the number of items in the solution
248
+ elif self.mode in ["n_items", "single_item_uniform"]:
249
+ # Extract the expense line numbers from the answer
250
+ pattern = r"EXP-\w+-\w+"
251
+ matches = re.findall(pattern, answer)
252
+ if len(matches) != self.num_items_uniform:
253
+ return (
254
+ 0,
255
+ False,
256
+ "",
257
+ {"message": "Incorrect number of investments selected."},
258
+ )
259
+ self.correct_investments_selected = True
260
+
261
+ return (
262
+ 1,
263
+ True,
264
+ "That is correct, thank you!",
265
+ {"message": "Correct investments selected."},
266
+ )
267
+
268
+ def check_only_right_investment_kept(
269
+ self, page: Page, chat_messages: List[str]
270
+ ) -> Tuple[float, bool, str, dict]:
271
+ """Checks that only the expected investments were kept; i.e. the others were deleted"""
272
+ for i, investment_sys_id in enumerate(self.expense_line_sys_ids):
273
+ record_expected = i in self.selected_investment_indices
274
+ record_exists = table_api_call(
275
+ instance=self.instance,
276
+ table="fm_expense_line",
277
+ params={"sysparm_query": f"sys_id={investment_sys_id}"},
278
+ )["result"]
279
+ # Missing investment that should be kept
280
+ if record_expected and not record_exists:
281
+ return (
282
+ 0,
283
+ True,
284
+ "",
285
+ {"message": "Expected investment has been deleted."},
286
+ )
287
+ # Unexpected investment that should be deleted
288
+ if not record_expected and record_exists:
289
+ return (
290
+ 0,
291
+ False,
292
+ "",
293
+ {"message": "Unexpected investment is present."},
294
+ )
295
+
296
+ return (
297
+ 1,
298
+ True,
299
+ "That is correct, thank you!",
300
+ {"message": "Correct investments kept."},
301
+ )
302
+
303
+ def teardown(self) -> None:
304
+ for expense_sys_id in self.expense_line_sys_ids:
305
+ record_exists = table_api_call(
306
+ instance=self.instance,
307
+ table="fm_expense_line",
308
+ params={"sysparm_query": f"sys_id={expense_sys_id}"},
309
+ )["result"]
310
+ if record_exists:
311
+ db_delete_from_table(
312
+ instance=self.instance,
313
+ table="fm_expense_line",
314
+ sys_id=expense_sys_id,
315
+ )
316
+ super().teardown()
317
+
318
+
319
+ class FilterExpensesAndFindTotalReturnTask(FilterExpensesAndAllocateInvestmentsTask):
320
+ def __init__(
321
+ self,
322
+ seed: int,
323
+ instance: SNowInstance = None,
324
+ fixed_config: list[AbstractServiceNowTask] = None,
325
+ num_expenses: list[int] = [3, 4],
326
+ budget: int = 150000,
327
+ mode: str = "random",
328
+ answer_format: str = "total_return_only",
329
+ num_items_uniform: int = 1,
330
+ level: int = 2,
331
+ ):
332
+ super().__init__(
333
+ seed,
334
+ instance,
335
+ fixed_config,
336
+ num_expenses=num_expenses,
337
+ budget=budget,
338
+ mode=mode,
339
+ num_items_uniform=num_items_uniform,
340
+ answer_format=answer_format,
341
+ level=level,
342
+ )
343
+ self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Give total return of selected investments only. '
344
+
345
+ def _setup_list(self) -> None:
346
+ super()._setup_list()
347
+ self.tasks = [
348
+ SendChatMessageForBudgetAllocationTask(
349
+ instance=self.instance,
350
+ message=f"The total value of the investments is {self.max_return}$",
351
+ used_in_level_2=True,
352
+ is_validated=False,
353
+ budget=self.budget,
354
+ answer_format=self.answer_format,
355
+ level=self.level,
356
+ )
357
+ ]
358
+
359
+ def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
360
+ reward, done, message, info = self.check_total_return(page, chat_messages)
361
+ if reward == 1 and done:
362
+ return FilterAndDoTask.validate(self, page, chat_messages)
363
+ else:
364
+ return reward, done, message, info
365
+
366
+
367
+ class FilterRandomExpensesAndFindTotalReturnSmallTask(
368
+ FilterExpensesAndFindTotalReturnTask, HumanEvalTask
369
+ ):
370
+ def __init__(
371
+ self,
372
+ seed: int,
373
+ instance: SNowInstance = None,
374
+ fixed_config: List[AbstractServiceNowTask] = None,
375
+ level: int = 2,
376
+ ):
377
+ super().__init__(
378
+ seed,
379
+ instance,
380
+ fixed_config,
381
+ num_expenses=[3, 5],
382
+ budget=150000,
383
+ mode="random",
384
+ level=level,
385
+ )
386
+
387
+
388
+ class FilterRandomExpensesAndFindTotalReturnMediumTask(FilterExpensesAndFindTotalReturnTask):
389
+ def __init__(
390
+ self,
391
+ seed: int,
392
+ instance: SNowInstance = None,
393
+ fixed_config: List[AbstractServiceNowTask] = None,
394
+ level: int = 2,
395
+ ):
396
+ super().__init__(
397
+ seed,
398
+ instance,
399
+ fixed_config,
400
+ num_expenses=[6, 8],
401
+ budget=150000,
402
+ mode="random",
403
+ level=level,
404
+ )
405
+
406
+
407
+ class FilterRandomExpensesAndFindTotalReturnLargeTask(FilterExpensesAndFindTotalReturnTask):
408
+ def __init__(
409
+ self,
410
+ seed: int,
411
+ instance: SNowInstance = None,
412
+ fixed_config: List[AbstractServiceNowTask] = None,
413
+ level: int = 2,
414
+ ):
415
+ super().__init__(
416
+ seed,
417
+ instance,
418
+ fixed_config,
419
+ num_expenses=[9, 12],
420
+ budget=150000,
421
+ mode="random",
422
+ level=level,
423
+ )
424
+
425
+
426
+ class FilterTrivialExpensesAndFindTotalReturnSmallTask(
427
+ FilterExpensesAndFindTotalReturnTask, HumanEvalTask
428
+ ):
429
+ def __init__(
430
+ self,
431
+ seed: int,
432
+ instance: SNowInstance = None,
433
+ fixed_config: List[AbstractServiceNowTask] = None,
434
+ level: int = 2,
435
+ ):
436
+ super().__init__(
437
+ seed,
438
+ instance,
439
+ fixed_config,
440
+ num_expenses=[3, 5],
441
+ budget=150000,
442
+ mode="trivial",
443
+ level=level,
444
+ )
445
+
446
+
447
+ class FilterTrivialExpensesAndFindTotalReturnMediumTask(FilterExpensesAndFindTotalReturnTask):
448
+ def __init__(
449
+ self,
450
+ seed: int,
451
+ instance: SNowInstance = None,
452
+ fixed_config: List[AbstractServiceNowTask] = None,
453
+ level: int = 2,
454
+ ):
455
+ super().__init__(
456
+ seed,
457
+ instance,
458
+ fixed_config,
459
+ num_expenses=[6, 8],
460
+ budget=150000,
461
+ mode="trivial",
462
+ level=level,
463
+ )
464
+
465
+
466
+ class FilterTrivialExpensesAndFindTotalReturnLargeTask(FilterExpensesAndFindTotalReturnTask):
467
+ def __init__(
468
+ self,
469
+ seed: int,
470
+ instance: SNowInstance = None,
471
+ fixed_config: List[AbstractServiceNowTask] = None,
472
+ level: int = 2,
473
+ ):
474
+ super().__init__(
475
+ seed,
476
+ instance,
477
+ fixed_config,
478
+ num_expenses=[9, 12],
479
+ budget=150000,
480
+ mode="trivial",
481
+ level=level,
482
+ )
483
+
484
+
485
+ class FilterSingleItemExpensesAndFindTotalReturnSmallTask(
486
+ FilterExpensesAndFindTotalReturnTask, HumanEvalTask
487
+ ):
488
+ def __init__(
489
+ self,
490
+ seed: int,
491
+ instance: SNowInstance = None,
492
+ fixed_config: List[AbstractServiceNowTask] = None,
493
+ level: int = 2,
494
+ ):
495
+ super().__init__(
496
+ seed,
497
+ instance,
498
+ fixed_config,
499
+ num_expenses=[3, 5],
500
+ budget=150000,
501
+ mode="single_item",
502
+ level=level,
503
+ )
504
+
505
+
506
+ class FilterSingleItemExpensesAndFindTotalReturnMediumTask(FilterExpensesAndFindTotalReturnTask):
507
+ def __init__(
508
+ self,
509
+ seed: int,
510
+ instance: SNowInstance = None,
511
+ fixed_config: List[AbstractServiceNowTask] = None,
512
+ level: int = 2,
513
+ ):
514
+ super().__init__(
515
+ seed,
516
+ instance,
517
+ fixed_config,
518
+ num_expenses=[6, 8],
519
+ budget=150000,
520
+ mode="single_item",
521
+ level=level,
522
+ )
523
+
524
+
525
+ class FilterSingleItemExpensesAndFindTotalReturnLargeTask(FilterExpensesAndFindTotalReturnTask):
526
+ def __init__(
527
+ self,
528
+ seed: int,
529
+ instance: SNowInstance = None,
530
+ fixed_config: List[AbstractServiceNowTask] = None,
531
+ level: int = 2,
532
+ ):
533
+ super().__init__(
534
+ seed,
535
+ instance,
536
+ fixed_config,
537
+ num_expenses=[9, 12],
538
+ budget=150000,
539
+ mode="single_item",
540
+ level=level,
541
+ )
542
+
543
+
544
+ class FilterSingleItemUniformExpensesAndFindTotalReturnSmallTask(
545
+ FilterExpensesAndFindTotalReturnTask, HumanEvalTask
546
+ ):
547
+ def __init__(
548
+ self,
549
+ seed: int,
550
+ instance: SNowInstance = None,
551
+ fixed_config: List[AbstractServiceNowTask] = None,
552
+ level: int = 2,
553
+ ):
554
+ super().__init__(
555
+ seed,
556
+ instance,
557
+ fixed_config,
558
+ num_expenses=[3, 5],
559
+ budget=150000,
560
+ mode="single_item_uniform",
561
+ level=level,
562
+ )
563
+
564
+
565
+ class FilterSingleItemUniformExpensesAndFindTotalReturnMediumTask(
566
+ FilterExpensesAndFindTotalReturnTask
567
+ ):
568
+ def __init__(
569
+ self,
570
+ seed: int,
571
+ instance: SNowInstance = None,
572
+ fixed_config: List[AbstractServiceNowTask] = None,
573
+ level: int = 2,
574
+ ):
575
+ super().__init__(
576
+ seed,
577
+ instance,
578
+ fixed_config,
579
+ num_expenses=[6, 8],
580
+ budget=150000,
581
+ mode="single_item_uniform",
582
+ level=level,
583
+ )
584
+
585
+
586
+ class FilterSingleItemUniformExpensesAndFindTotalReturnLargeTask(
587
+ FilterExpensesAndFindTotalReturnTask
588
+ ):
589
+ def __init__(
590
+ self,
591
+ seed: int,
592
+ instance: SNowInstance = None,
593
+ fixed_config: List[AbstractServiceNowTask] = None,
594
+ level: int = 2,
595
+ ):
596
+ super().__init__(
597
+ seed,
598
+ instance,
599
+ fixed_config,
600
+ num_expenses=[9, 12],
601
+ budget=150000,
602
+ mode="single_item_uniform",
603
+ level=level,
604
+ )
605
+
606
+
607
+ class FilterTwoItemsUniformExpensesAndFindTotalReturnSmallTask(
608
+ FilterExpensesAndFindTotalReturnTask, HumanEvalTask
609
+ ):
610
+ def __init__(
611
+ self,
612
+ seed: int,
613
+ instance: SNowInstance = None,
614
+ fixed_config: List[AbstractServiceNowTask] = None,
615
+ level: int = 2,
616
+ ):
617
+ super().__init__(
618
+ seed,
619
+ instance,
620
+ fixed_config,
621
+ num_expenses=[3, 5],
622
+ budget=150000,
623
+ mode="n_items",
624
+ level=level,
625
+ num_items_uniform=2,
626
+ )
627
+
628
+
629
+ class FilterThreeItemsUniformExpensesAndFindTotalReturnMediumTask(
630
+ FilterExpensesAndFindTotalReturnTask
631
+ ):
632
+ def __init__(
633
+ self,
634
+ seed: int,
635
+ instance: SNowInstance = None,
636
+ fixed_config: List[AbstractServiceNowTask] = None,
637
+ level: int = 2,
638
+ ):
639
+ super().__init__(
640
+ seed,
641
+ instance,
642
+ fixed_config,
643
+ num_expenses=[6, 8],
644
+ budget=150000,
645
+ mode="n_items",
646
+ level=level,
647
+ num_items_uniform=3,
648
+ )
649
+
650
+
651
+ class FilterThreeItemsUniformExpensesAndFindTotalReturnLargeTask(
652
+ FilterExpensesAndFindTotalReturnTask
653
+ ):
654
+ def __init__(
655
+ self,
656
+ seed: int,
657
+ instance: SNowInstance = None,
658
+ fixed_config: List[AbstractServiceNowTask] = None,
659
+ level: int = 2,
660
+ ):
661
+ super().__init__(
662
+ seed,
663
+ instance,
664
+ fixed_config,
665
+ num_expenses=[9, 12],
666
+ budget=150000,
667
+ mode="n_items",
668
+ level=level,
669
+ num_items_uniform=3,
670
+ )
671
+
672
+
673
+ class FilterExpensesAndSelectInvestmentsTask(FilterExpensesAndAllocateInvestmentsTask):
674
+ def __init__(
675
+ self,
676
+ seed: int,
677
+ instance: SNowInstance = None,
678
+ fixed_config: list[AbstractServiceNowTask] = None,
679
+ num_expenses: list[int] = [3, 4],
680
+ budget: int = 150000,
681
+ mode: str = "random",
682
+ num_items_uniform: int = None,
683
+ level: int = 2,
684
+ ):
685
+ super().__init__(
686
+ seed,
687
+ instance,
688
+ fixed_config,
689
+ num_expenses=num_expenses,
690
+ budget=budget,
691
+ mode=mode,
692
+ level=level,
693
+ answer_format="investments_only",
694
+ num_items_uniform=num_items_uniform,
695
+ )
696
+ self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Give selected investments only. '
697
+
698
+ def _setup_list(self) -> None:
699
+ super()._setup_list()
700
+ message = f"The correct investments to select are: {', '.join(self.correct_investments)}"
701
+ self.tasks.append(
702
+ SendChatMessageForBudgetAllocationTask(
703
+ instance=self.instance,
704
+ message=message,
705
+ used_in_level_2=True,
706
+ is_validated=False,
707
+ budget=self.budget,
708
+ answer_format=self.answer_format,
709
+ level=self.level,
710
+ )
711
+ )
712
+
713
+ def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
714
+ reward, done, message, info = self.check_correct_investments_sent_in_chat(
715
+ page, chat_messages
716
+ )
717
+ if reward == 1 and done:
718
+ return FilterAndDoTask.validate(self, page, chat_messages)
719
+ else:
720
+ return reward, done, message, info
721
+
722
+
723
+ class FilterRandomExpensesAndSelectInvestmentsSmallTask(
724
+ FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
725
+ ):
726
+ def __init__(
727
+ self,
728
+ seed: int,
729
+ instance: SNowInstance = None,
730
+ fixed_config: List[AbstractServiceNowTask] = None,
731
+ level: int = 2,
732
+ ):
733
+ super().__init__(
734
+ seed,
735
+ instance,
736
+ fixed_config,
737
+ num_expenses=[3, 5],
738
+ budget=150000,
739
+ mode="random",
740
+ level=level,
741
+ )
742
+
743
+
744
+ class FilterRandomExpensesAndSelectInvestmentsMediumTask(FilterExpensesAndSelectInvestmentsTask):
745
+ def __init__(
746
+ self,
747
+ seed: int,
748
+ instance: SNowInstance = None,
749
+ fixed_config: List[AbstractServiceNowTask] = None,
750
+ level: int = 2,
751
+ ):
752
+ super().__init__(
753
+ seed,
754
+ instance,
755
+ fixed_config,
756
+ num_expenses=[6, 8],
757
+ budget=150000,
758
+ mode="random",
759
+ level=level,
760
+ )
761
+
762
+
763
+ class FilterRandomExpensesAndSelectInvestmentsLargeTask(FilterExpensesAndSelectInvestmentsTask):
764
+ def __init__(
765
+ self,
766
+ seed: int,
767
+ instance: SNowInstance = None,
768
+ fixed_config: List[AbstractServiceNowTask] = None,
769
+ level: int = 2,
770
+ ):
771
+ super().__init__(
772
+ seed,
773
+ instance,
774
+ fixed_config,
775
+ num_expenses=[9, 12],
776
+ budget=150000,
777
+ mode="random",
778
+ level=level,
779
+ )
780
+
781
+
782
+ class FilterTrivialExpensesAndSelectInvestmentsSmallTask(
783
+ FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
784
+ ):
785
+ def __init__(
786
+ self,
787
+ seed: int,
788
+ instance: SNowInstance = None,
789
+ fixed_config: List[AbstractServiceNowTask] = None,
790
+ level: int = 2,
791
+ ):
792
+ super().__init__(
793
+ seed,
794
+ instance,
795
+ fixed_config,
796
+ num_expenses=[3, 5],
797
+ budget=150000,
798
+ mode="trivial",
799
+ level=level,
800
+ )
801
+
802
+
803
+ class FilterTrivialExpensesAndSelectInvestmentsMediumTask(FilterExpensesAndSelectInvestmentsTask):
804
+ def __init__(
805
+ self,
806
+ seed: int,
807
+ instance: SNowInstance = None,
808
+ fixed_config: List[AbstractServiceNowTask] = None,
809
+ level: int = 2,
810
+ ):
811
+ super().__init__(
812
+ seed,
813
+ instance,
814
+ fixed_config,
815
+ num_expenses=[6, 8],
816
+ budget=150000,
817
+ mode="trivial",
818
+ level=level,
819
+ )
820
+
821
+
822
+ class FilterTrivialExpensesAndSelectInvestmentsLargeTask(FilterExpensesAndSelectInvestmentsTask):
823
+ def __init__(
824
+ self,
825
+ seed: int,
826
+ instance: SNowInstance = None,
827
+ fixed_config: List[AbstractServiceNowTask] = None,
828
+ level: int = 2,
829
+ ):
830
+ super().__init__(
831
+ seed,
832
+ instance,
833
+ fixed_config,
834
+ num_expenses=[9, 12],
835
+ budget=150000,
836
+ mode="trivial",
837
+ level=level,
838
+ )
839
+
840
+
841
+ class FilterSingleItemExpensesAndSelectInvestmentsSmallTask(
842
+ FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
843
+ ):
844
+ def __init__(
845
+ self,
846
+ seed: int,
847
+ instance: SNowInstance = None,
848
+ fixed_config: List[AbstractServiceNowTask] = None,
849
+ level: int = 2,
850
+ ):
851
+ super().__init__(
852
+ seed,
853
+ instance,
854
+ fixed_config,
855
+ num_expenses=[3, 5],
856
+ budget=150000,
857
+ mode="single_item",
858
+ level=level,
859
+ )
860
+
861
+
862
+ class FilterSingleItemExpensesAndSelectInvestmentsMediumTask(
863
+ FilterExpensesAndSelectInvestmentsTask
864
+ ):
865
+ def __init__(
866
+ self,
867
+ seed: int,
868
+ instance: SNowInstance = None,
869
+ fixed_config: List[AbstractServiceNowTask] = None,
870
+ level: int = 2,
871
+ ):
872
+ super().__init__(
873
+ seed,
874
+ instance,
875
+ fixed_config,
876
+ num_expenses=[6, 8],
877
+ budget=150000,
878
+ mode="single_item",
879
+ level=level,
880
+ )
881
+
882
+
883
+ class FilterSingleItemExpensesAndSelectInvestmentsLargeTask(FilterExpensesAndSelectInvestmentsTask):
884
+ def __init__(
885
+ self,
886
+ seed: int,
887
+ instance: SNowInstance = None,
888
+ fixed_config: List[AbstractServiceNowTask] = None,
889
+ level: int = 2,
890
+ ):
891
+ super().__init__(
892
+ seed,
893
+ instance,
894
+ fixed_config,
895
+ num_expenses=[9, 12],
896
+ budget=150000,
897
+ mode="single_item",
898
+ level=level,
899
+ )
900
+
901
+
902
+ class FilterSingleItemUniformExpensesAndSelectInvestmentsSmallTask(
903
+ FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
904
+ ):
905
+ def __init__(
906
+ self,
907
+ seed: int,
908
+ instance: SNowInstance = None,
909
+ fixed_config: List[AbstractServiceNowTask] = None,
910
+ level: int = 2,
911
+ ):
912
+ super().__init__(
913
+ seed,
914
+ instance,
915
+ fixed_config,
916
+ num_expenses=[3, 5],
917
+ budget=150000,
918
+ mode="single_item_uniform",
919
+ level=level,
920
+ )
921
+
922
+
923
+ class FilterSingleItemUniformExpensesAndSelectInvestmentsMediumTask(
924
+ FilterExpensesAndSelectInvestmentsTask
925
+ ):
926
+ def __init__(
927
+ self,
928
+ seed: int,
929
+ instance: SNowInstance = None,
930
+ fixed_config: List[AbstractServiceNowTask] = None,
931
+ level: int = 2,
932
+ ):
933
+ super().__init__(
934
+ seed,
935
+ instance,
936
+ fixed_config,
937
+ num_expenses=[6, 8],
938
+ budget=150000,
939
+ mode="single_item_uniform",
940
+ level=level,
941
+ )
942
+
943
+
944
+ class FilterSingleItemUniformExpensesAndSelectInvestmentsLargeTask(
945
+ FilterExpensesAndSelectInvestmentsTask
946
+ ):
947
+ def __init__(
948
+ self,
949
+ seed: int,
950
+ instance: SNowInstance = None,
951
+ fixed_config: List[AbstractServiceNowTask] = None,
952
+ level: int = 2,
953
+ ):
954
+ super().__init__(
955
+ seed,
956
+ instance,
957
+ fixed_config,
958
+ num_expenses=[9, 12],
959
+ budget=150000,
960
+ mode="single_item_uniform",
961
+ level=level,
962
+ )
963
+
964
+
965
+ class FilterTwoItemsUniformExpensesAndSelectInvestmentsSmallTask(
966
+ FilterExpensesAndSelectInvestmentsTask, HumanEvalTask
967
+ ):
968
+ def __init__(
969
+ self,
970
+ seed: int,
971
+ instance: SNowInstance = None,
972
+ fixed_config: List[AbstractServiceNowTask] = None,
973
+ level: int = 2,
974
+ ):
975
+ super().__init__(
976
+ seed,
977
+ instance,
978
+ fixed_config,
979
+ num_expenses=[3, 5],
980
+ budget=150000,
981
+ mode="n_items",
982
+ level=level,
983
+ num_items_uniform=2,
984
+ )
985
+
986
+
987
+ class FilterThreeItemsUniformExpensesAndSelectInvestmentsMediumTask(
988
+ FilterExpensesAndSelectInvestmentsTask
989
+ ):
990
+ def __init__(
991
+ self,
992
+ seed: int,
993
+ instance: SNowInstance = None,
994
+ fixed_config: List[AbstractServiceNowTask] = None,
995
+ level: int = 2,
996
+ ):
997
+ super().__init__(
998
+ seed,
999
+ instance,
1000
+ fixed_config,
1001
+ num_expenses=[6, 8],
1002
+ budget=150000,
1003
+ mode="n_items",
1004
+ level=level,
1005
+ num_items_uniform=3,
1006
+ )
1007
+
1008
+
1009
+ class FilterThreeItemsUniformExpensesAndSelectInvestmentsLargeTask(
1010
+ FilterExpensesAndSelectInvestmentsTask
1011
+ ):
1012
+ def __init__(
1013
+ self,
1014
+ seed: int,
1015
+ instance: SNowInstance = None,
1016
+ fixed_config: List[AbstractServiceNowTask] = None,
1017
+ level: int = 2,
1018
+ ):
1019
+ super().__init__(
1020
+ seed,
1021
+ instance,
1022
+ fixed_config,
1023
+ num_expenses=[9, 12],
1024
+ budget=150000,
1025
+ mode="n_items",
1026
+ level=level,
1027
+ num_items_uniform=3,
1028
+ )
1029
+
1030
+
1031
+ class FilterExpensesFindTotalReturnAndSelectInvestmentsTask(FilterExpensesAndFindTotalReturnTask):
1032
+ def __init__(
1033
+ self,
1034
+ seed: int,
1035
+ instance: SNowInstance = None,
1036
+ fixed_config: List[AbstractServiceNowTask] = None,
1037
+ num_expenses: list[int] = [3, 4],
1038
+ budget: int = 150000,
1039
+ mode="random",
1040
+ num_items_uniform: int = None,
1041
+ level: int = 2,
1042
+ ):
1043
+ super().__init__(
1044
+ seed,
1045
+ instance,
1046
+ fixed_config,
1047
+ num_expenses=num_expenses,
1048
+ budget=budget,
1049
+ mode=mode,
1050
+ num_items_uniform=num_items_uniform,
1051
+ answer_format="total_return_and_investments",
1052
+ level=level,
1053
+ )
1054
+ self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Give selected investments and total return. '
1055
+
1056
+ def _setup_list(self) -> None:
1057
+ super()._setup_list()
1058
+ message = f"The correct investments to select are: {', '.join(self.correct_investments)} and their total return is {self.max_return}$"
1059
+ self.tasks = [
1060
+ SendChatMessageForBudgetAllocationTask(
1061
+ instance=self.instance,
1062
+ message=message,
1063
+ used_in_level_2=True,
1064
+ is_validated=False,
1065
+ budget=self.budget,
1066
+ answer_format=self.answer_format,
1067
+ level=self.level,
1068
+ )
1069
+ ]
1070
+
1071
+ def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
1072
+ reward, done, message, info = self.check_correct_investments_sent_in_chat(
1073
+ page, chat_messages
1074
+ )
1075
+ if not (reward == 1 and done):
1076
+ return reward, done, message, info
1077
+
1078
+ reward, done, message, info = self.check_total_return(page, chat_messages)
1079
+ if not (reward == 1 and done):
1080
+ return reward, done, message, info
1081
+
1082
+ return FilterAndDoTask.validate(self, page, chat_messages)
1083
+
1084
+
1085
+ class FilterRandomExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
1086
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
1087
+ ):
1088
+ def __init__(
1089
+ self,
1090
+ seed: int,
1091
+ instance: SNowInstance = None,
1092
+ fixed_config: List[AbstractServiceNowTask] = None,
1093
+ level: int = 2,
1094
+ ):
1095
+ super().__init__(
1096
+ seed,
1097
+ instance,
1098
+ fixed_config,
1099
+ num_expenses=[3, 5],
1100
+ budget=150000,
1101
+ mode="random",
1102
+ level=level,
1103
+ )
1104
+
1105
+
1106
+ class FilterRandomExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
1107
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1108
+ ):
1109
+ def __init__(
1110
+ self,
1111
+ seed: int,
1112
+ instance: SNowInstance = None,
1113
+ fixed_config: List[AbstractServiceNowTask] = None,
1114
+ level: int = 2,
1115
+ ):
1116
+ super().__init__(
1117
+ seed,
1118
+ instance,
1119
+ fixed_config,
1120
+ num_expenses=[6, 8],
1121
+ budget=150000,
1122
+ mode="random",
1123
+ level=level,
1124
+ )
1125
+
1126
+
1127
+ class FilterRandomExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
1128
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1129
+ ):
1130
+ def __init__(
1131
+ self,
1132
+ seed: int,
1133
+ instance: SNowInstance = None,
1134
+ fixed_config: List[AbstractServiceNowTask] = None,
1135
+ level: int = 2,
1136
+ ):
1137
+ super().__init__(
1138
+ seed,
1139
+ instance,
1140
+ fixed_config,
1141
+ num_expenses=[9, 12],
1142
+ budget=150000,
1143
+ mode="random",
1144
+ level=level,
1145
+ )
1146
+
1147
+
1148
+ class FilterTrivialExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
1149
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
1150
+ ):
1151
+ def __init__(
1152
+ self,
1153
+ seed: int,
1154
+ instance: SNowInstance = None,
1155
+ fixed_config: List[AbstractServiceNowTask] = None,
1156
+ level: int = 2,
1157
+ ):
1158
+ super().__init__(
1159
+ seed,
1160
+ instance,
1161
+ fixed_config,
1162
+ num_expenses=[3, 5],
1163
+ budget=150000,
1164
+ mode="trivial",
1165
+ level=level,
1166
+ )
1167
+
1168
+
1169
+ class FilterTrivialExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
1170
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1171
+ ):
1172
+ def __init__(
1173
+ self,
1174
+ seed: int,
1175
+ instance: SNowInstance = None,
1176
+ fixed_config: List[AbstractServiceNowTask] = None,
1177
+ level: int = 2,
1178
+ ):
1179
+ super().__init__(
1180
+ seed,
1181
+ instance,
1182
+ fixed_config,
1183
+ num_expenses=[6, 8],
1184
+ budget=150000,
1185
+ mode="trivial",
1186
+ level=level,
1187
+ )
1188
+
1189
+
1190
+ class FilterTrivialExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
1191
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1192
+ ):
1193
+ def __init__(
1194
+ self,
1195
+ seed: int,
1196
+ instance: SNowInstance = None,
1197
+ fixed_config: List[AbstractServiceNowTask] = None,
1198
+ level: int = 2,
1199
+ ):
1200
+ super().__init__(
1201
+ seed,
1202
+ instance,
1203
+ fixed_config,
1204
+ num_expenses=[9, 12],
1205
+ budget=150000,
1206
+ mode="trivial",
1207
+ level=level,
1208
+ )
1209
+
1210
+
1211
+ class FilterSingleItemExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
1212
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
1213
+ ):
1214
+ def __init__(
1215
+ self,
1216
+ seed: int,
1217
+ instance: SNowInstance = None,
1218
+ fixed_config: List[AbstractServiceNowTask] = None,
1219
+ level: int = 2,
1220
+ ):
1221
+ super().__init__(
1222
+ seed,
1223
+ instance,
1224
+ fixed_config,
1225
+ num_expenses=[3, 5],
1226
+ budget=150000,
1227
+ mode="single_item",
1228
+ level=level,
1229
+ )
1230
+
1231
+
1232
+ class FilterSingleItemExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
1233
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1234
+ ):
1235
+ def __init__(
1236
+ self,
1237
+ seed: int,
1238
+ instance: SNowInstance = None,
1239
+ fixed_config: List[AbstractServiceNowTask] = None,
1240
+ level: int = 2,
1241
+ ):
1242
+ super().__init__(
1243
+ seed,
1244
+ instance,
1245
+ fixed_config,
1246
+ num_expenses=[6, 8],
1247
+ budget=150000,
1248
+ mode="single_item",
1249
+ level=level,
1250
+ )
1251
+
1252
+
1253
+ class FilterSingleItemExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
1254
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1255
+ ):
1256
+ def __init__(
1257
+ self,
1258
+ seed: int,
1259
+ instance: SNowInstance = None,
1260
+ fixed_config: List[AbstractServiceNowTask] = None,
1261
+ level: int = 2,
1262
+ ):
1263
+ super().__init__(
1264
+ seed,
1265
+ instance,
1266
+ fixed_config,
1267
+ num_expenses=[9, 12],
1268
+ budget=150000,
1269
+ mode="single_item",
1270
+ level=level,
1271
+ )
1272
+
1273
+
1274
+ class FilterSingleItemUniformExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
1275
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
1276
+ ):
1277
+ def __init__(
1278
+ self,
1279
+ seed: int,
1280
+ instance: SNowInstance = None,
1281
+ fixed_config: List[AbstractServiceNowTask] = None,
1282
+ level: int = 2,
1283
+ ):
1284
+ super().__init__(
1285
+ seed,
1286
+ instance,
1287
+ fixed_config,
1288
+ num_expenses=[3, 5],
1289
+ budget=150000,
1290
+ mode="single_item_uniform",
1291
+ level=level,
1292
+ )
1293
+
1294
+
1295
+ class FilterSingleItemUniformExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
1296
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1297
+ ):
1298
+ def __init__(
1299
+ self,
1300
+ seed: int,
1301
+ instance: SNowInstance = None,
1302
+ fixed_config: List[AbstractServiceNowTask] = None,
1303
+ level: int = 2,
1304
+ ):
1305
+ super().__init__(
1306
+ seed,
1307
+ instance,
1308
+ fixed_config,
1309
+ num_expenses=[6, 8],
1310
+ budget=150000,
1311
+ mode="single_item_uniform",
1312
+ level=level,
1313
+ )
1314
+
1315
+
1316
+ class FilterSingleItemUniformExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
1317
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1318
+ ):
1319
+ def __init__(
1320
+ self,
1321
+ seed: int,
1322
+ instance: SNowInstance = None,
1323
+ fixed_config: List[AbstractServiceNowTask] = None,
1324
+ level: int = 2,
1325
+ ):
1326
+ super().__init__(
1327
+ seed,
1328
+ instance,
1329
+ fixed_config,
1330
+ num_expenses=[9, 12],
1331
+ budget=150000,
1332
+ mode="single_item_uniform",
1333
+ level=level,
1334
+ )
1335
+
1336
+
1337
+ class FilterTwoItemsUniformExpensesFindTotalReturnAndSelectInvestmentsSmallTask(
1338
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask, HumanEvalTask
1339
+ ):
1340
+ def __init__(
1341
+ self,
1342
+ seed: int,
1343
+ instance: SNowInstance = None,
1344
+ fixed_config: List[AbstractServiceNowTask] = None,
1345
+ level: int = 2,
1346
+ ):
1347
+ super().__init__(
1348
+ seed,
1349
+ instance,
1350
+ fixed_config,
1351
+ num_expenses=[9, 12],
1352
+ budget=150000,
1353
+ mode="n_items",
1354
+ level=level,
1355
+ num_items_uniform=2,
1356
+ )
1357
+
1358
+
1359
+ class FilterThreeItemsUniformExpensesFindTotalReturnAndSelectInvestmentsMediumTask(
1360
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1361
+ ):
1362
+ def __init__(
1363
+ self,
1364
+ seed: int,
1365
+ instance: SNowInstance = None,
1366
+ fixed_config: List[AbstractServiceNowTask] = None,
1367
+ level: int = 2,
1368
+ ):
1369
+ super().__init__(
1370
+ seed,
1371
+ instance,
1372
+ fixed_config,
1373
+ num_expenses=[9, 12],
1374
+ budget=150000,
1375
+ mode="n_items",
1376
+ level=level,
1377
+ num_items_uniform=3,
1378
+ )
1379
+
1380
+
1381
+ class FilterThreeItemsUniformExpensesFindTotalReturnAndSelectInvestmentsLargeTask(
1382
+ FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1383
+ ):
1384
+ def __init__(
1385
+ self,
1386
+ seed: int,
1387
+ instance: SNowInstance = None,
1388
+ fixed_config: List[AbstractServiceNowTask] = None,
1389
+ level: int = 2,
1390
+ ):
1391
+ super().__init__(
1392
+ seed,
1393
+ instance,
1394
+ fixed_config,
1395
+ num_expenses=[9, 12],
1396
+ budget=150000,
1397
+ mode="n_items",
1398
+ level=level,
1399
+ num_items_uniform=3,
1400
+ )
1401
+
1402
+
1403
+ class FilterExpenseLinesAndDeleteWrongInvestments(FilterExpensesAndAllocateInvestmentsTask):
1404
+ def __init__(
1405
+ self,
1406
+ seed: int,
1407
+ instance: SNowInstance = None,
1408
+ fixed_config: List[AbstractServiceNowTask] = None,
1409
+ num_expenses: List[int] = [3, 4],
1410
+ budget: int = 150000,
1411
+ mode: str = "random",
1412
+ num_items_uniform: int = None,
1413
+ level: int = 2,
1414
+ ) -> None:
1415
+ super().__init__(
1416
+ seed=seed,
1417
+ instance=instance,
1418
+ fixed_config=fixed_config,
1419
+ num_expenses=num_expenses,
1420
+ budget=budget,
1421
+ mode=mode,
1422
+ answer_format="cleanup",
1423
+ num_items_uniform=num_items_uniform,
1424
+ level=level,
1425
+ )
1426
+ self.task_description = f'Follow protocol "{self.protocol_name}" (located in the "Company Protocols" knowledge base) to allocate investments to the expenses with short description containing {self.expense_hashtag} to maximize returns while fitting inside the budget of {self.budget}$. Delete the investments that were not selected. '
1427
+
1428
+ def _setup_list(self) -> None:
1429
+ super()._setup_list()
1430
+ # in modes "n_items", "single_item_uniform", this yields one of many valid solutions
1431
+ for i, expense_line_number in enumerate(self.incorrect_investments):
1432
+ skip_description = i > 0
1433
+ expense_line_sys_id = self.expense_line_sys_ids[i]
1434
+ self.tasks.append(
1435
+ DeleteExpenseLineKnapsack(
1436
+ instance=self.instance,
1437
+ record_number=expense_line_number,
1438
+ record_sys_id=expense_line_sys_id,
1439
+ fixed_config={
1440
+ "field_name": "number",
1441
+ "field_value": f"{expense_line_number}",
1442
+ },
1443
+ used_in_level_2=True,
1444
+ is_validated=False,
1445
+ budget=self.budget,
1446
+ answer_format=self.answer_format,
1447
+ level=self.level,
1448
+ skip_description=skip_description,
1449
+ )
1450
+ )
1451
+
1452
+ def validate(self, page: Page, chat_messages: List[str]) -> Tuple[float, bool, str, dict]:
1453
+ expenses = table_api_call(
1454
+ instance=self.instance,
1455
+ table="fm_expense_line",
1456
+ params={
1457
+ "sysparm_query": f"short_descriptionLIKE{self.expense_hashtag}",
1458
+ "sysparm_fields": "number,amount,sys_id",
1459
+ },
1460
+ )["result"]
1461
+
1462
+ if self.mode in ["random", "trivial", "single_item"]:
1463
+ # Check that the correct investments have been selected
1464
+ for investment in self.correct_investments:
1465
+ if investment not in [expense["number"] for expense in expenses]:
1466
+ return (
1467
+ 0,
1468
+ False,
1469
+ "",
1470
+ {"message": "Investment missing from selected list."},
1471
+ )
1472
+ # Check that the incorrect investments have not been selected
1473
+ for investment in self.incorrect_investments:
1474
+ if investment in [expense["number"] for expense in expenses]:
1475
+ return (
1476
+ 0,
1477
+ False,
1478
+ "",
1479
+ {"message": "Incorrect investment selected."},
1480
+ )
1481
+ # In those settings, many answers are possible, it's only a matter of respecting the number of items in the solution
1482
+ elif self.mode in ["n_items", "single_item_uniform"]:
1483
+ if len(expenses) != self.num_items_uniform:
1484
+ return (
1485
+ 0,
1486
+ False,
1487
+ "",
1488
+ {"message": "Incorrect number of investments selected."},
1489
+ )
1490
+ reward, done, message, info = FilterAndDoTask.validate(self, page, chat_messages)
1491
+
1492
+ return reward, done, message, info
1493
+
1494
+
1495
+ class FilterRandomExpensesAndDeleteWrongInvestmentsSmallTask(
1496
+ FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
1497
+ ):
1498
+ def __init__(
1499
+ self,
1500
+ seed: int,
1501
+ instance: SNowInstance = None,
1502
+ fixed_config: List[AbstractServiceNowTask] = None,
1503
+ level: int = 2,
1504
+ ):
1505
+ super().__init__(
1506
+ seed,
1507
+ instance,
1508
+ fixed_config,
1509
+ num_expenses=[3, 5],
1510
+ budget=150000,
1511
+ mode="random",
1512
+ level=level,
1513
+ )
1514
+
1515
+
1516
+ class FilterRandomExpensesAndDeleteWrongInvestmentsMediumTask(
1517
+ FilterExpenseLinesAndDeleteWrongInvestments
1518
+ ):
1519
+ def __init__(
1520
+ self,
1521
+ seed: int,
1522
+ instance: SNowInstance = None,
1523
+ fixed_config: List[AbstractServiceNowTask] = None,
1524
+ level: int = 2,
1525
+ ):
1526
+ super().__init__(
1527
+ seed,
1528
+ instance,
1529
+ fixed_config,
1530
+ num_expenses=[6, 8],
1531
+ budget=150000,
1532
+ mode="random",
1533
+ level=level,
1534
+ )
1535
+
1536
+
1537
+ class FilterRandomExpensesAndDeleteWrongInvestmentsLargeTask(
1538
+ FilterExpenseLinesAndDeleteWrongInvestments
1539
+ ):
1540
+ def __init__(
1541
+ self,
1542
+ seed: int,
1543
+ instance: SNowInstance = None,
1544
+ fixed_config: List[AbstractServiceNowTask] = None,
1545
+ level: int = 2,
1546
+ ):
1547
+ super().__init__(
1548
+ seed,
1549
+ instance,
1550
+ fixed_config,
1551
+ num_expenses=[9, 12],
1552
+ budget=150000,
1553
+ mode="random",
1554
+ level=level,
1555
+ )
1556
+
1557
+
1558
+ class FilterSingleItemExpensesAndDeleteWrongInvestmentsSmallTask(
1559
+ FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
1560
+ ):
1561
+ def __init__(
1562
+ self,
1563
+ seed: int,
1564
+ instance: SNowInstance = None,
1565
+ fixed_config: List[AbstractServiceNowTask] = None,
1566
+ level: int = 2,
1567
+ ):
1568
+ super().__init__(
1569
+ seed,
1570
+ instance,
1571
+ fixed_config,
1572
+ num_expenses=[3, 5],
1573
+ budget=150000,
1574
+ mode="single_item",
1575
+ level=level,
1576
+ )
1577
+
1578
+
1579
+ class FilterSingleItemExpensesAndDeleteWrongInvestmentsMediumTask(
1580
+ FilterExpenseLinesAndDeleteWrongInvestments
1581
+ ):
1582
+ def __init__(
1583
+ self,
1584
+ seed: int,
1585
+ instance: SNowInstance = None,
1586
+ fixed_config: List[AbstractServiceNowTask] = None,
1587
+ level: int = 2,
1588
+ ):
1589
+ super().__init__(
1590
+ seed,
1591
+ instance,
1592
+ fixed_config,
1593
+ num_expenses=[6, 8],
1594
+ budget=150000,
1595
+ mode="single_item",
1596
+ level=level,
1597
+ )
1598
+
1599
+
1600
+ class FilterSingleItemExpensesAndDeleteWrongInvestmentsLargeTask(
1601
+ FilterExpenseLinesAndDeleteWrongInvestments
1602
+ ):
1603
+ def __init__(
1604
+ self,
1605
+ seed: int,
1606
+ instance: SNowInstance = None,
1607
+ fixed_config: List[AbstractServiceNowTask] = None,
1608
+ level: int = 2,
1609
+ ):
1610
+ super().__init__(
1611
+ seed,
1612
+ instance,
1613
+ fixed_config,
1614
+ num_expenses=[9, 12],
1615
+ budget=150000,
1616
+ mode="single_item",
1617
+ level=level,
1618
+ )
1619
+
1620
+
1621
+ class FilterSingleItemUniformExpensesAndDeleteWrongInvestmentsSmallTask(
1622
+ FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
1623
+ ):
1624
+ def __init__(
1625
+ self,
1626
+ seed: int,
1627
+ instance: SNowInstance = None,
1628
+ fixed_config: List[AbstractServiceNowTask] = None,
1629
+ level: int = 2,
1630
+ ):
1631
+ super().__init__(
1632
+ seed,
1633
+ instance,
1634
+ fixed_config,
1635
+ num_expenses=[3, 5],
1636
+ budget=150000,
1637
+ mode="single_item_uniform",
1638
+ level=level,
1639
+ )
1640
+
1641
+
1642
+ class FilterSingleItemUniformExpensesAndDeleteWrongInvestmentsMediumTask(
1643
+ FilterExpenseLinesAndDeleteWrongInvestments
1644
+ ):
1645
+ def __init__(
1646
+ self,
1647
+ seed: int,
1648
+ instance: SNowInstance = None,
1649
+ fixed_config: List[AbstractServiceNowTask] = None,
1650
+ level: int = 2,
1651
+ ):
1652
+ super().__init__(
1653
+ seed,
1654
+ instance,
1655
+ fixed_config,
1656
+ num_expenses=[6, 8],
1657
+ budget=150000,
1658
+ mode="single_item_uniform",
1659
+ level=level,
1660
+ )
1661
+
1662
+
1663
+ class FilterSingleItemUniformExpensesAndDeleteWrongInvestmentsLargeTask(
1664
+ FilterExpenseLinesAndDeleteWrongInvestments
1665
+ ):
1666
+ def __init__(
1667
+ self,
1668
+ seed: int,
1669
+ instance: SNowInstance = None,
1670
+ fixed_config: List[AbstractServiceNowTask] = None,
1671
+ level: int = 2,
1672
+ ):
1673
+ super().__init__(
1674
+ seed,
1675
+ instance,
1676
+ fixed_config,
1677
+ num_expenses=[9, 12],
1678
+ budget=150000,
1679
+ mode="single_item_uniform",
1680
+ level=level,
1681
+ )
1682
+
1683
+
1684
+ class FilterTwoItemsUniformExpensesAndDeleteWrongInvestmentsSmallTask(
1685
+ FilterExpenseLinesAndDeleteWrongInvestments, HumanEvalTask
1686
+ ):
1687
+ def __init__(
1688
+ self,
1689
+ seed: int,
1690
+ instance: SNowInstance = None,
1691
+ fixed_config: List[AbstractServiceNowTask] = None,
1692
+ level: int = 2,
1693
+ ):
1694
+ super().__init__(
1695
+ seed,
1696
+ instance,
1697
+ fixed_config,
1698
+ num_expenses=[3, 5],
1699
+ budget=150000,
1700
+ mode="n_items",
1701
+ level=level,
1702
+ num_items_uniform=2,
1703
+ )
1704
+
1705
+
1706
+ class FilterThreeItemsUniformExpensesAndDeleteWrongInvestmentsMediumTask(
1707
+ FilterExpenseLinesAndDeleteWrongInvestments
1708
+ ):
1709
+ def __init__(
1710
+ self,
1711
+ seed: int,
1712
+ instance: SNowInstance = None,
1713
+ fixed_config: List[AbstractServiceNowTask] = None,
1714
+ level: int = 2,
1715
+ ):
1716
+ super().__init__(
1717
+ seed,
1718
+ instance,
1719
+ fixed_config,
1720
+ num_expenses=[6, 8],
1721
+ budget=150000,
1722
+ mode="n_items",
1723
+ level=level,
1724
+ num_items_uniform=3,
1725
+ )
1726
+
1727
+
1728
+ class FilterThreeItemsUniformExpensesAndDeleteWrongInvestmentsLargeTask(
1729
+ FilterExpenseLinesAndDeleteWrongInvestments
1730
+ ):
1731
+ def __init__(
1732
+ self,
1733
+ seed: int,
1734
+ instance: SNowInstance = None,
1735
+ fixed_config: List[AbstractServiceNowTask] = None,
1736
+ level: int = 2,
1737
+ ):
1738
+ super().__init__(
1739
+ seed,
1740
+ instance,
1741
+ fixed_config,
1742
+ num_expenses=[9, 12],
1743
+ budget=150000,
1744
+ mode="n_items",
1745
+ level=level,
1746
+ num_items_uniform=3,
1747
+ )
1748
+
1749
+
1750
+ local_vars = locals().copy()
1751
+
1752
+ __TASKS__ = [
1753
+ var
1754
+ for var in local_vars.values()
1755
+ if isinstance(var, type)
1756
+ and issubclass(var, FilterAndDoTask)
1757
+ and var is not FilterAndDoTask
1758
+ and var is not FilterExpensesAndAllocateInvestmentsTask
1759
+ and var is not FilterExpensesAndFindTotalReturnTask
1760
+ and var is not FilterExpenseLinesAndDeleteWrongInvestments
1761
+ and var is not FilterExpensesFindTotalReturnAndSelectInvestmentsTask
1762
+ and var is not FilterExpensesAndSelectInvestmentsTask
1763
+ ]