browsergym-workarena 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. browsergym/workarena/__init__.py +13 -1
  2. browsergym/workarena/api/category.py +74 -0
  3. browsergym/workarena/api/change_request.py +87 -0
  4. browsergym/workarena/api/computer_asset.py +90 -0
  5. browsergym/workarena/api/cost_center.py +19 -0
  6. browsergym/workarena/api/expense_line.py +89 -0
  7. browsergym/workarena/api/incident.py +45 -0
  8. browsergym/workarena/api/knowledge.py +29 -0
  9. browsergym/workarena/api/problem.py +90 -0
  10. browsergym/workarena/api/report.py +183 -0
  11. browsergym/workarena/api/requested_items.py +63 -0
  12. browsergym/workarena/api/user.py +11 -8
  13. browsergym/workarena/api/utils.py +47 -3
  14. browsergym/workarena/config.py +21 -1
  15. browsergym/workarena/data_files/setup_files/forms/expected_incident_form_fields.json +1 -1
  16. browsergym/workarena/data_files/setup_files/forms/expected_request_item_form_fields.json +1 -0
  17. browsergym/workarena/data_files/setup_files/knowledge/protocols.json +46 -0
  18. browsergym/workarena/data_files/setup_files/knowledge/test.html +1 -0
  19. browsergym/workarena/data_files/setup_files/lists/expected_asset_list_columns.json +2 -24
  20. browsergym/workarena/data_files/setup_files/lists/expected_change_request_list_columns.json +4 -40
  21. browsergym/workarena/data_files/setup_files/lists/expected_expense_line_list_columns.json +12 -0
  22. browsergym/workarena/data_files/setup_files/lists/expected_hardware_list_columns.json +1 -42
  23. browsergym/workarena/data_files/setup_files/lists/expected_incident_list_columns.json +2 -18
  24. browsergym/workarena/data_files/setup_files/lists/expected_problem_list_columns.json +12 -0
  25. browsergym/workarena/data_files/setup_files/lists/expected_requested_items_list_columns.json +12 -0
  26. browsergym/workarena/data_files/setup_files/lists/expected_service_catalog_list_columns.json +2 -19
  27. browsergym/workarena/data_files/setup_files/lists/expected_user_list_columns.json +3 -50
  28. browsergym/workarena/data_files/task_configs/all_menu.json +1 -1
  29. browsergym/workarena/data_files/task_configs/dashboard_retrieval_minmax_task.json +1 -1
  30. browsergym/workarena/data_files/task_configs/dashboard_retrieval_value_task.json +1 -1
  31. browsergym/workarena/data_files/task_configs/filter_service_catalog_item_list_task.json +1 -1
  32. browsergym/workarena/data_files/task_configs/impersonation_users.json +1 -1
  33. browsergym/workarena/data_files/task_configs/report_retrieval_minmax_task.json +1 -1
  34. browsergym/workarena/data_files/task_configs/report_retrieval_value_task.json +1 -1
  35. browsergym/workarena/human_eval/console.js +176 -0
  36. browsergym/workarena/human_eval/tool.py +366 -0
  37. browsergym/workarena/install.py +81 -20
  38. browsergym/workarena/tasks/base.py +55 -20
  39. browsergym/workarena/tasks/comp_building_block.py +4 -0
  40. browsergym/workarena/tasks/compositional/__init__.py +76 -0
  41. browsergym/workarena/tasks/compositional/base.py +364 -0
  42. browsergym/workarena/tasks/compositional/dash_do_base.py +1366 -0
  43. browsergym/workarena/tasks/compositional/dash_do_catalog.py +1127 -0
  44. browsergym/workarena/tasks/compositional/dash_do_catalog_infeasible.py +2047 -0
  45. browsergym/workarena/tasks/compositional/dash_do_create_incident.py +403 -0
  46. browsergym/workarena/tasks/compositional/dash_do_create_incident_infeasible.py +278 -0
  47. browsergym/workarena/tasks/compositional/dash_do_create_problem.py +336 -0
  48. browsergym/workarena/tasks/compositional/dash_do_create_problem_infeasible.py +235 -0
  49. browsergym/workarena/tasks/compositional/dash_do_filter.py +1600 -0
  50. browsergym/workarena/tasks/compositional/dash_do_request_item.py +1315 -0
  51. browsergym/workarena/tasks/compositional/dash_do_request_item_infeasible.py +693 -0
  52. browsergym/workarena/tasks/compositional/delete_record.py +341 -0
  53. browsergym/workarena/tasks/compositional/edit_knowledge_base.py +457 -0
  54. browsergym/workarena/tasks/compositional/expense_management.py +598 -0
  55. browsergym/workarena/tasks/compositional/filter_and_do.py +139 -0
  56. browsergym/workarena/tasks/compositional/find_and_order_item.py +345 -0
  57. browsergym/workarena/tasks/compositional/manage_change_request_schedule.py +1417 -0
  58. browsergym/workarena/tasks/compositional/mark_duplicate_problems.py +499 -0
  59. browsergym/workarena/tasks/compositional/maximize_investment_return.py +1763 -0
  60. browsergym/workarena/tasks/compositional/navigate_and_do.py +1151 -0
  61. browsergym/workarena/tasks/compositional/navigate_and_do_infeasible.py +2100 -0
  62. browsergym/workarena/tasks/compositional/offboard_user.py +207 -0
  63. browsergym/workarena/tasks/compositional/onboard_user.py +226 -0
  64. browsergym/workarena/tasks/compositional/update_task.py +145 -0
  65. browsergym/workarena/tasks/compositional/utils/curriculum.py +215 -0
  66. browsergym/workarena/tasks/compositional/utils/infeasible_configs.py +151 -0
  67. browsergym/workarena/tasks/compositional/utils/knapsack.py +192 -0
  68. browsergym/workarena/tasks/compositional/warranty_check.py +227 -0
  69. browsergym/workarena/tasks/compositional/work_assignment.py +804 -0
  70. browsergym/workarena/tasks/compositional/workload_balancing.py +396 -0
  71. browsergym/workarena/tasks/dashboard.py +188 -8
  72. browsergym/workarena/tasks/form.py +1024 -232
  73. browsergym/workarena/tasks/knowledge.py +216 -25
  74. browsergym/workarena/tasks/list.py +519 -102
  75. browsergym/workarena/tasks/mark_duplicate_problem.py +171 -0
  76. browsergym/workarena/tasks/navigation.py +55 -13
  77. browsergym/workarena/tasks/scripts/extract_all_menu_items.py +9 -2
  78. browsergym/workarena/tasks/scripts/generate_dashboard_configs.py +6 -5
  79. browsergym/workarena/tasks/scripts/service_catalog.py +2 -1
  80. browsergym/workarena/tasks/scripts/validate.py +8 -2
  81. browsergym/workarena/tasks/send_chat_message.py +90 -0
  82. browsergym/workarena/tasks/service_catalog.py +94 -26
  83. browsergym/workarena/tasks/utils/form.py +1 -4
  84. browsergym/workarena/tasks/utils/private_tasks.py +63 -0
  85. browsergym/workarena/tasks/utils/utils.py +13 -0
  86. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/METADATA +19 -18
  87. browsergym_workarena-0.3.0.dist-info/RECORD +138 -0
  88. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/entry_points.txt +1 -0
  89. browsergym_workarena-0.2.1.dist-info/RECORD +0 -85
  90. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/WHEEL +0 -0
  91. {browsergym_workarena-0.2.1.dist-info → browsergym_workarena-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -17,11 +17,16 @@ from .config import (
17
17
  # for knowledge base setup
18
18
  KB_FILEPATH,
19
19
  KB_NAME,
20
+ PROTOCOL_KB_FILEPATH,
21
+ PROTOCOL_KB_NAME,
20
22
  # For list setup
21
23
  EXPECTED_ASSET_LIST_COLUMNS_PATH,
22
24
  EXPECTED_CHANGE_REQUEST_COLUMNS_PATH,
25
+ EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
23
26
  EXPECTED_HARDWARE_COLUMNS_PATH,
24
27
  EXPECTED_INCIDENT_COLUMNS_PATH,
28
+ EXPECTED_PROBLEM_COLUMNS_PATH,
29
+ EXPECTED_REQUESTED_ITEMS_COLUMNS_PATH,
25
30
  EXPECTED_SERVICE_CATALOG_COLUMNS_PATH,
26
31
  EXPECTED_USER_COLUMNS_PATH,
27
32
  # for form setup
@@ -29,6 +34,7 @@ from .config import (
29
34
  EXPECTED_HARDWARE_FORM_FIELDS_PATH,
30
35
  EXPECTED_INCIDENT_FORM_FIELDS_PATH,
31
36
  EXPECTED_PROBLEM_FORM_FIELDS_PATH,
37
+ EXPECTED_REQUEST_ITEM_FORM_FIELDS_PATH,
32
38
  EXPECTED_USER_FORM_FIELDS_PATH,
33
39
  # Patch flag for reports
34
40
  REPORT_PATCH_FLAG,
@@ -269,7 +275,11 @@ def delete_knowledge_base(instance: SNowInstance, kb_id: str, kb_name: str):
269
275
 
270
276
 
271
277
  def create_knowledge_base(
272
- instance: SNowInstance, kb_name: str, kb_data: dict, disable_commenting: bool = True
278
+ instance: SNowInstance,
279
+ kb_name: str,
280
+ kb_data: dict,
281
+ disable_commenting: bool = True,
282
+ add_article_name: bool = False,
273
283
  ):
274
284
  """
275
285
  Create knowledge base and upload all articles.
@@ -283,6 +293,9 @@ def create_knowledge_base(
283
293
  The knowledge base data to upload
284
294
  disable_commenting: bool
285
295
  Whether to disable commenting on the knowledge base
296
+ add_article_name: bool
297
+ Whether to add the article name to the article text. If False, the articles will be named "Article <number>"
298
+ Otherwise, we will extract the article title from the 'item' field in the JSON file.
286
299
 
287
300
  """
288
301
  logging.info(f"Installing knowledge base {kb_name}...")
@@ -311,7 +324,10 @@ def create_knowledge_base(
311
324
  for i, kb_entry in enumerate(kb_data):
312
325
  logging.info(f"... Knowledge Base {kb_name} uploading article {i + 1}/{len(kb_data)}")
313
326
  article = kb_entry["article"]
314
-
327
+ if add_article_name:
328
+ short_description = kb_entry["item"]
329
+ else:
330
+ short_description = f"Article {i + 1}"
315
331
  # Plant a new article in kb_knowledge table
316
332
  table_api_call(
317
333
  instance,
@@ -319,7 +335,7 @@ def create_knowledge_base(
319
335
  method="POST",
320
336
  data=json.dumps(
321
337
  {
322
- "short_description": f"Article {i + 1}",
338
+ "short_description": short_description,
323
339
  "sys_class_name": "kb_knowledge",
324
340
  "text": article,
325
341
  "article_type": "text",
@@ -337,11 +353,12 @@ def setup_knowledge_bases():
337
353
  """
338
354
  # Get the ServiceNow instance
339
355
  instance = SNowInstance()
340
- # Mapping between knowledge base name and filepath + whether or not to disable comments
356
+ # Mapping between knowledge base name and filepath + whether or not to disable comments + whether or not to add article name
341
357
  knowledge_bases = {
342
- KB_NAME: (KB_FILEPATH, True),
358
+ KB_NAME: (KB_FILEPATH, True, False),
359
+ PROTOCOL_KB_NAME: (PROTOCOL_KB_FILEPATH, True, True),
343
360
  }
344
- for kb_name, (kb_filepath, disable_commenting) in knowledge_bases.items():
361
+ for kb_name, (kb_filepath, disable_commenting, add_article_name) in knowledge_bases.items():
345
362
  # Load the knowledge base
346
363
  with open(kb_filepath, "r") as f:
347
364
  kb_data = json.load(f)
@@ -365,6 +382,7 @@ def setup_knowledge_bases():
365
382
  kb_name=kb_name,
366
383
  kb_data=kb_data,
367
384
  disable_commenting=disable_commenting,
385
+ add_article_name=add_article_name,
368
386
  )
369
387
 
370
388
  # Confirm that the knowledge base was installed correctly
@@ -570,10 +588,22 @@ def setup_list_columns():
570
588
  "url": "/now/nav/ui/classic/params/target/incident_list.do",
571
589
  "expected_columns_path": EXPECTED_INCIDENT_COLUMNS_PATH,
572
590
  },
591
+ "problem": {
592
+ "url": "/now/nav/ui/classic/params/target/problem_list.do",
593
+ "expected_columns_path": EXPECTED_PROBLEM_COLUMNS_PATH,
594
+ },
573
595
  "sys_user": {
574
596
  "url": "/now/nav/ui/classic/params/target/sys_user_list.do",
575
597
  "expected_columns_path": EXPECTED_USER_COLUMNS_PATH,
576
598
  },
599
+ "sc_req_item": {
600
+ "url": "/now/nav/ui/classic/params/target/sc_req_item_list.do",
601
+ "expected_columns_path": EXPECTED_REQUESTED_ITEMS_COLUMNS_PATH,
602
+ },
603
+ "fm_expense_line": {
604
+ "url": "/now/nav/ui/classic/params/target/fm_expense_line_list.do",
605
+ "expected_columns_path": EXPECTED_EXPENSE_LINE_COLUMNS_PATH,
606
+ },
577
607
  "sc_cat_item": {
578
608
  "url": "/now/nav/ui/classic/params/target/sc_cat_item_list.do",
579
609
  "expected_columns_path": EXPECTED_SERVICE_CATALOG_COLUMNS_PATH,
@@ -680,6 +710,10 @@ def setup_form_fields():
680
710
  "expected_fields_path": EXPECTED_USER_FORM_FIELDS_PATH,
681
711
  "url": "/now/nav/ui/classic/params/target/sys_user.do",
682
712
  },
713
+ "create_request_item": {
714
+ "expected_fields_path": EXPECTED_REQUEST_ITEM_FORM_FIELDS_PATH,
715
+ "url": "/now/nav/ui/classic/params/target/sc_req_item.do",
716
+ },
683
717
  }
684
718
 
685
719
  logging.info("... Creating a new user account to validate form fields")
@@ -872,6 +906,17 @@ def wipe_system_admin_preferences():
872
906
  )
873
907
 
874
908
 
909
+ def is_report_filter_using_time(filter):
910
+ """
911
+ Heuristic to check if a report is filtering based on time
912
+
913
+ This aims to detect the use of functions like "gs.endOfToday()". To avoid hardcoding all of them,
914
+ we simply check for the use of keywords. Our filter is definitely too wide, but that's ok.
915
+
916
+ """
917
+ return "javascript:gs." in filter or "@ago" in filter
918
+
919
+
875
920
  def patch_report_filters():
876
921
  """
877
922
  Add filters to reports to make sure they stay frozen in time and don't show new data
@@ -880,8 +925,6 @@ def patch_report_filters():
880
925
  """
881
926
  logging.info("Patching reports with date filter...")
882
927
 
883
- cutoff_date = REPORT_DATE_FILTER
884
-
885
928
  instance = SNowInstance()
886
929
 
887
930
  # Get all reports that are not already patched
@@ -893,22 +936,35 @@ def patch_report_filters():
893
936
  },
894
937
  )["result"]
895
938
 
896
- incompatible_reports = []
897
939
  for report in reports:
898
940
  # Find all sys_created_on columns of this record. Some have many.
899
941
  sys_created_on_cols = [
900
942
  c for c in table_column_info(instance, report["table"]).keys() if "sys_created_on" in c
901
943
  ]
902
-
903
944
  try:
904
945
  # XXX: We purposely do not support reports with multiple filter conditions for simplicity
905
946
  if len(sys_created_on_cols) == 0 or "^NQ" in report["filter"]:
906
- raise NotImplementedError()
947
+ logging.info(f"Discarding report {report['title']} {report['sys_id']}...")
948
+ raise NotImplementedError() # Mark for deletion
949
+
950
+ if not is_report_filter_using_time(report["filter"]):
951
+ # That's a report we want to keep (use date cutoff filter)
952
+ filter_date = REPORT_DATE_FILTER
953
+ logging.info(
954
+ f"Keeping report {report['title']} {report['sys_id']} (columns: {sys_created_on_cols})..."
955
+ )
956
+ else:
957
+ # XXX: We do not support reports with filters that rely on time (e.g., last 10 days) because
958
+ # there are not stable. In this case, we don't delete them but add a filter to make
959
+ # them empty. They will be shown as "No data available".
960
+ logging.info(
961
+ f"Disabling report {report['title']} {report['sys_id']} because it uses time filters..."
962
+ )
963
+ filter_date = "1900-01-01"
907
964
 
908
- # Add the filter
909
965
  filter = "".join(
910
966
  [
911
- f"^{col}<javascript:gs.dateGenerate('{cutoff_date}','00:00:00')"
967
+ f"^{col}<javascript:gs.dateGenerate('{filter_date}','00:00:00')"
912
968
  for col in sys_created_on_cols
913
969
  ]
914
970
  ) + ("^" if len(report["filter"]) > 0 and not report["filter"].startswith("^") else "")
@@ -921,16 +977,21 @@ def patch_report_filters():
921
977
  "description": report["description"] + " " + REPORT_PATCH_FLAG,
922
978
  },
923
979
  )
924
- logging.info(
925
- f"Patched report {report['title']} {report['sys_id']} (columns: {sys_created_on_cols})..."
926
- )
980
+ logging.info(f"... done")
927
981
 
928
982
  except (NotImplementedError, HTTPError):
929
983
  # HTTPError occurs when some reports simply cannot be patched because they are critical and protected
930
- incompatible_reports.append(report["sys_id"])
931
- logging.info(
932
- f"Did not patch report {report['title']} {report['title']} (columns: {sys_created_on_cols})..."
933
- )
984
+ logging.info(f"...failed to patch report. Attempting delete...")
985
+
986
+ # Delete the report if it cannot be patched
987
+ # This might fail sometimes, but it's the best we can do.
988
+ try:
989
+ table_api_call(
990
+ instance=instance, table=f"sys_report/{report['sys_id']}", method="DELETE"
991
+ )
992
+ logging.info(f"...... deleted.")
993
+ except:
994
+ logging.error(f"...... could not delete.")
934
995
 
935
996
 
936
997
  @tenacity.retry(
@@ -10,7 +10,7 @@ import playwright.sync_api
10
10
 
11
11
  from abc import ABC, abstractmethod
12
12
  from copy import deepcopy
13
- from typing import Dict, List, Optional, Tuple
13
+ from typing import List, Optional, Tuple
14
14
  from uuid import uuid4
15
15
  from urllib import parse
16
16
 
@@ -18,7 +18,7 @@ from browsergym.core.task import AbstractBrowserTask
18
18
  from ..api.user import create_user
19
19
  from ..api.utils import table_api_call
20
20
  from ..config import SNOW_BROWSER_TIMEOUT, SNOW_JS_UTILS_FILEPATH
21
- from ..utils import impersonate_user, url_login
21
+ from ..utils import url_login
22
22
  from ..instance import SNowInstance
23
23
 
24
24
 
@@ -34,7 +34,8 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
34
34
  start_rel_url: str,
35
35
  instance: SNowInstance = None,
36
36
  final_rel_url: Optional[str] = None,
37
- username: Optional[str] = "admin",
37
+ user_roles: List[str] = ["admin"],
38
+ has_description: bool = False,
38
39
  ) -> None:
39
40
  """
40
41
  Initialize the task
@@ -45,10 +46,14 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
45
46
  Random seed
46
47
  instance: SNowInstance
47
48
  The ServiceNow instance in which the task will be performed
48
- start_url: str
49
+ start_rel_url: str
49
50
  The URL for the starting page of the task
50
- final_url: str (optional)
51
+ final_rel_url: str (optional)
51
52
  The URL for the final page of the task (default: uses the value of base_url)
53
+ user_roles: list[str]
54
+ The roles to assign to the user (default: ["admin"])
55
+ has_description: bool
56
+ Whether the task has a description in L3 compositional tasks
52
57
 
53
58
  """
54
59
  super().__init__(seed)
@@ -67,6 +72,16 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
67
72
  self.final_url = self.start_url
68
73
  self.final_url_ = parse.urlparse(self.final_url)
69
74
 
75
+ # Set the task's unique ID
76
+ self.unique_id = str(uuid4())
77
+ # Flag to ensure the task is setup only once
78
+ self.task_is_setup = False
79
+ self.delete_user_on_teardown = False
80
+ self.user_roles = user_roles
81
+ self.has_description = (
82
+ has_description # Whether the task has a description in L3 compositional tasks
83
+ )
84
+
70
85
  def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> None:
71
86
  # Don't call super cheat function because it's not implemented at the base level
72
87
  logging.debug("Cheat is solving the task")
@@ -102,6 +117,8 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
102
117
 
103
118
  """
104
119
  logging.debug("Setting up the base task")
120
+ if self.task_is_setup:
121
+ raise ValueError("The task is already setup")
105
122
 
106
123
  # Keep the page for client-side validation
107
124
  self.page = page
@@ -109,6 +126,15 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
109
126
  # Set the page timeout
110
127
  page.set_default_timeout(SNOW_BROWSER_TIMEOUT)
111
128
 
129
+ # Create a new user to run the task if this is the starting task
130
+ if do_start:
131
+ self._base_initial_instance = self.instance
132
+ self._base_user_name, self._base_user_password, self._base_user_sysid = create_user(
133
+ instance=self.instance, user_roles=self.user_roles, random=self.random
134
+ )
135
+ self.instance = deepcopy(self.instance)
136
+ self.instance.snow_credentials = (self._base_user_name, self._base_user_password)
137
+ self.delete_user_on_teardown = True
112
138
  # Set the task's unique ID
113
139
  self.unique_id = str(uuid4())
114
140
 
@@ -116,26 +142,26 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
116
142
  goal, info = self.setup_goal(page=page)
117
143
 
118
144
  # Load a few utility functions for init scripts
119
- page.add_init_script(path=SNOW_JS_UTILS_FILEPATH)
145
+ page.context.add_init_script(path=SNOW_JS_UTILS_FILEPATH)
120
146
 
121
147
  # Add the initialization scripts to the page context
122
148
  for script in self.get_init_scripts():
123
149
  page.context.add_init_script(script)
124
150
 
125
- # Create a new user to run the task
126
- self._base_initial_instance = self.instance
127
- self._base_user_name, self._base_user_password, self._base_user_sysid = create_user(
128
- self.instance
129
- )
130
- self.instance = deepcopy(self.instance)
131
- self.instance.snow_credentials = (self._base_user_name, self._base_user_password)
132
-
133
151
  # Start the task if requested
134
152
  if do_start:
135
153
  self.start(page)
136
154
 
155
+ self.task_is_setup = True
156
+
137
157
  return goal, info
138
158
 
159
+ def create_user(self, first_name: str = None, last_name: str = None):
160
+ """
161
+ Create a user in the ServiceNow instance
162
+
163
+ """
164
+
139
165
  @abstractmethod
140
166
  def setup_goal(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
141
167
  """
@@ -157,11 +183,20 @@ class AbstractServiceNowTask(AbstractBrowserTask, ABC):
157
183
  page.goto(self.start_url)
158
184
 
159
185
  def teardown(self) -> None:
186
+ """
187
+ Clean up after the task
188
+
189
+ Notes:
190
+ ------
191
+ This method should not make assumptions on the state of the page (e.g., a specific URL).
192
+
193
+ """
160
194
  logging.debug("Tearing down the task")
161
195
 
162
- # Delete the user
163
- table_api_call(
164
- instance=self._base_initial_instance,
165
- table=f"sys_user/{self._base_user_sysid}",
166
- method="DELETE",
167
- )
196
+ if self.delete_user_on_teardown:
197
+ # Delete the user
198
+ table_api_call(
199
+ instance=self._base_initial_instance,
200
+ table=f"sys_user/{self._base_user_sysid}",
201
+ method="DELETE",
202
+ )
@@ -0,0 +1,4 @@
1
+ class CompositionalBuildingBlockTask:
2
+ """Base class for compositional building block tasks. Used to exclude these tasks from the list of tasks that are tested like atomic tasks"""
3
+
4
+ pass
@@ -0,0 +1,76 @@
1
+ from .utils.curriculum import AGENT_CURRICULUM, HUMAN_CURRICULUM
2
+
3
+ ALL_COMPOSITIONAL_TASKS = []
4
+
5
+ for category, items in AGENT_CURRICULUM.items():
6
+ category_tasks = []
7
+ for task in items["buckets"]:
8
+ category_tasks += task
9
+ ALL_COMPOSITIONAL_TASKS += category_tasks
10
+
11
+
12
+ def specialize_task_class_to_level(task_cls, level):
13
+ """
14
+ Function to hardcode the level for the tasks
15
+ """
16
+ new_name = f"{task_cls.__name__}L{level}"
17
+ patched_cls = f"""
18
+ class {new_name}(task_cls):
19
+ def __init__(self, **kwargs):
20
+ super().__init__(level={level}, **kwargs)
21
+ """
22
+ # Dictionary to capture local variables defined by exec
23
+ local_vars = {"task_cls": task_cls}
24
+ exec(patched_cls, globals(), local_vars)
25
+ return local_vars[new_name]
26
+
27
+
28
+ ALL_COMPOSITIONAL_TASKS_L2 = [
29
+ specialize_task_class_to_level(task, level=2) for task in ALL_COMPOSITIONAL_TASKS
30
+ ]
31
+ ALL_COMPOSITIONAL_TASKS_L3 = [
32
+ specialize_task_class_to_level(task, level=3) for task in ALL_COMPOSITIONAL_TASKS
33
+ ]
34
+
35
+
36
+ AGENT_CURRICULUM_L2 = dict()
37
+ AGENT_CURRICULUM_L3 = dict()
38
+
39
+ for category, items in AGENT_CURRICULUM.items():
40
+ AGENT_CURRICULUM_L2[category] = {
41
+ "buckets": [
42
+ [specialize_task_class_to_level(task, level=2) for task in task_set]
43
+ for task_set in items["buckets"]
44
+ ],
45
+ "num_seeds": items["num_seeds"],
46
+ "weights": items["weights"],
47
+ }
48
+ AGENT_CURRICULUM_L3[category] = {
49
+ "buckets": [
50
+ [specialize_task_class_to_level(task, level=3) for task in task_set]
51
+ for task_set in items["buckets"]
52
+ ],
53
+ "num_seeds": items["num_seeds"],
54
+ "weights": items["weights"],
55
+ }
56
+
57
+ HUMAN_CURRICULUM_L2 = dict()
58
+ HUMAN_CURRICULUM_L3 = dict()
59
+
60
+ for category, items in HUMAN_CURRICULUM.items():
61
+ HUMAN_CURRICULUM_L2[category] = {
62
+ "buckets": [
63
+ [specialize_task_class_to_level(task, level=2) for task in task_set]
64
+ for task_set in items["buckets"]
65
+ ],
66
+ "num_seeds": items["num_seeds"],
67
+ "weights": items["weights"],
68
+ }
69
+ HUMAN_CURRICULUM_L3[category] = {
70
+ "buckets": [
71
+ [specialize_task_class_to_level(task, level=3) for task in task_set]
72
+ for task_set in items["buckets"]
73
+ ],
74
+ "num_seeds": items["num_seeds"],
75
+ "weights": items["weights"],
76
+ }