aiverify-moonshot 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. aiverify_moonshot-0.4.0.dist-info/METADATA +249 -0
  2. aiverify_moonshot-0.4.0.dist-info/RECORD +163 -0
  3. aiverify_moonshot-0.4.0.dist-info/WHEEL +4 -0
  4. aiverify_moonshot-0.4.0.dist-info/licenses/AUTHORS.md +5 -0
  5. aiverify_moonshot-0.4.0.dist-info/licenses/LICENSE.md +201 -0
  6. aiverify_moonshot-0.4.0.dist-info/licenses/NOTICES.md +3340 -0
  7. moonshot/__init__.py +0 -0
  8. moonshot/__main__.py +198 -0
  9. moonshot/api.py +155 -0
  10. moonshot/integrations/__init__.py +0 -0
  11. moonshot/integrations/cli/__init__.py +0 -0
  12. moonshot/integrations/cli/__main__.py +25 -0
  13. moonshot/integrations/cli/active_session_cfg.py +1 -0
  14. moonshot/integrations/cli/benchmark/__init__.py +0 -0
  15. moonshot/integrations/cli/benchmark/benchmark.py +186 -0
  16. moonshot/integrations/cli/benchmark/cookbook.py +545 -0
  17. moonshot/integrations/cli/benchmark/datasets.py +164 -0
  18. moonshot/integrations/cli/benchmark/metrics.py +141 -0
  19. moonshot/integrations/cli/benchmark/recipe.py +598 -0
  20. moonshot/integrations/cli/benchmark/result.py +216 -0
  21. moonshot/integrations/cli/benchmark/run.py +140 -0
  22. moonshot/integrations/cli/benchmark/runner.py +174 -0
  23. moonshot/integrations/cli/cli.py +64 -0
  24. moonshot/integrations/cli/common/__init__.py +0 -0
  25. moonshot/integrations/cli/common/common.py +72 -0
  26. moonshot/integrations/cli/common/connectors.py +325 -0
  27. moonshot/integrations/cli/common/display_helper.py +42 -0
  28. moonshot/integrations/cli/common/prompt_template.py +94 -0
  29. moonshot/integrations/cli/initialisation/__init__.py +0 -0
  30. moonshot/integrations/cli/initialisation/initialisation.py +14 -0
  31. moonshot/integrations/cli/redteam/__init__.py +0 -0
  32. moonshot/integrations/cli/redteam/attack_module.py +70 -0
  33. moonshot/integrations/cli/redteam/context_strategy.py +147 -0
  34. moonshot/integrations/cli/redteam/prompt_template.py +67 -0
  35. moonshot/integrations/cli/redteam/redteam.py +90 -0
  36. moonshot/integrations/cli/redteam/session.py +467 -0
  37. moonshot/integrations/web_api/.env.dev +7 -0
  38. moonshot/integrations/web_api/__init__.py +0 -0
  39. moonshot/integrations/web_api/__main__.py +56 -0
  40. moonshot/integrations/web_api/app.py +125 -0
  41. moonshot/integrations/web_api/container.py +146 -0
  42. moonshot/integrations/web_api/log/.gitkeep +0 -0
  43. moonshot/integrations/web_api/logging_conf.py +114 -0
  44. moonshot/integrations/web_api/routes/__init__.py +0 -0
  45. moonshot/integrations/web_api/routes/attack_modules.py +66 -0
  46. moonshot/integrations/web_api/routes/benchmark.py +116 -0
  47. moonshot/integrations/web_api/routes/benchmark_result.py +175 -0
  48. moonshot/integrations/web_api/routes/context_strategy.py +129 -0
  49. moonshot/integrations/web_api/routes/cookbook.py +225 -0
  50. moonshot/integrations/web_api/routes/dataset.py +120 -0
  51. moonshot/integrations/web_api/routes/endpoint.py +282 -0
  52. moonshot/integrations/web_api/routes/metric.py +78 -0
  53. moonshot/integrations/web_api/routes/prompt_template.py +128 -0
  54. moonshot/integrations/web_api/routes/recipe.py +219 -0
  55. moonshot/integrations/web_api/routes/redteam.py +609 -0
  56. moonshot/integrations/web_api/routes/runner.py +239 -0
  57. moonshot/integrations/web_api/schemas/__init__.py +0 -0
  58. moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +13 -0
  59. moonshot/integrations/web_api/schemas/cookbook_create_dto.py +19 -0
  60. moonshot/integrations/web_api/schemas/cookbook_response_model.py +9 -0
  61. moonshot/integrations/web_api/schemas/dataset_response_dto.py +9 -0
  62. moonshot/integrations/web_api/schemas/endpoint_create_dto.py +21 -0
  63. moonshot/integrations/web_api/schemas/endpoint_response_model.py +11 -0
  64. moonshot/integrations/web_api/schemas/prompt_response_model.py +14 -0
  65. moonshot/integrations/web_api/schemas/prompt_template_response_model.py +10 -0
  66. moonshot/integrations/web_api/schemas/recipe_create_dto.py +32 -0
  67. moonshot/integrations/web_api/schemas/recipe_response_model.py +7 -0
  68. moonshot/integrations/web_api/schemas/session_create_dto.py +16 -0
  69. moonshot/integrations/web_api/schemas/session_prompt_dto.py +7 -0
  70. moonshot/integrations/web_api/schemas/session_response_model.py +38 -0
  71. moonshot/integrations/web_api/services/__init__.py +0 -0
  72. moonshot/integrations/web_api/services/attack_module_service.py +34 -0
  73. moonshot/integrations/web_api/services/auto_red_team_test_manager.py +86 -0
  74. moonshot/integrations/web_api/services/auto_red_team_test_state.py +57 -0
  75. moonshot/integrations/web_api/services/base_service.py +8 -0
  76. moonshot/integrations/web_api/services/benchmark_result_service.py +25 -0
  77. moonshot/integrations/web_api/services/benchmark_test_manager.py +106 -0
  78. moonshot/integrations/web_api/services/benchmark_test_state.py +56 -0
  79. moonshot/integrations/web_api/services/benchmarking_service.py +31 -0
  80. moonshot/integrations/web_api/services/context_strategy_service.py +22 -0
  81. moonshot/integrations/web_api/services/cookbook_service.py +194 -0
  82. moonshot/integrations/web_api/services/dataset_service.py +20 -0
  83. moonshot/integrations/web_api/services/endpoint_service.py +65 -0
  84. moonshot/integrations/web_api/services/metric_service.py +14 -0
  85. moonshot/integrations/web_api/services/prompt_template_service.py +39 -0
  86. moonshot/integrations/web_api/services/recipe_service.py +155 -0
  87. moonshot/integrations/web_api/services/runner_service.py +147 -0
  88. moonshot/integrations/web_api/services/session_service.py +350 -0
  89. moonshot/integrations/web_api/services/utils/exceptions_handler.py +41 -0
  90. moonshot/integrations/web_api/services/utils/results_formatter.py +47 -0
  91. moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +14 -0
  92. moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +14 -0
  93. moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +72 -0
  94. moonshot/integrations/web_api/types/types.py +99 -0
  95. moonshot/src/__init__.py +0 -0
  96. moonshot/src/api/__init__.py +0 -0
  97. moonshot/src/api/api_connector.py +58 -0
  98. moonshot/src/api/api_connector_endpoint.py +162 -0
  99. moonshot/src/api/api_context_strategy.py +57 -0
  100. moonshot/src/api/api_cookbook.py +160 -0
  101. moonshot/src/api/api_dataset.py +46 -0
  102. moonshot/src/api/api_environment_variables.py +17 -0
  103. moonshot/src/api/api_metrics.py +51 -0
  104. moonshot/src/api/api_prompt_template.py +43 -0
  105. moonshot/src/api/api_recipe.py +182 -0
  106. moonshot/src/api/api_red_teaming.py +59 -0
  107. moonshot/src/api/api_result.py +84 -0
  108. moonshot/src/api/api_run.py +74 -0
  109. moonshot/src/api/api_runner.py +132 -0
  110. moonshot/src/api/api_session.py +290 -0
  111. moonshot/src/configs/__init__.py +0 -0
  112. moonshot/src/configs/env_variables.py +187 -0
  113. moonshot/src/connectors/__init__.py +0 -0
  114. moonshot/src/connectors/connector.py +327 -0
  115. moonshot/src/connectors/connector_prompt_arguments.py +17 -0
  116. moonshot/src/connectors_endpoints/__init__.py +0 -0
  117. moonshot/src/connectors_endpoints/connector_endpoint.py +211 -0
  118. moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +54 -0
  119. moonshot/src/cookbooks/__init__.py +0 -0
  120. moonshot/src/cookbooks/cookbook.py +225 -0
  121. moonshot/src/cookbooks/cookbook_arguments.py +34 -0
  122. moonshot/src/datasets/__init__.py +0 -0
  123. moonshot/src/datasets/dataset.py +255 -0
  124. moonshot/src/datasets/dataset_arguments.py +50 -0
  125. moonshot/src/metrics/__init__.py +0 -0
  126. moonshot/src/metrics/metric.py +192 -0
  127. moonshot/src/metrics/metric_interface.py +95 -0
  128. moonshot/src/prompt_templates/__init__.py +0 -0
  129. moonshot/src/prompt_templates/prompt_template.py +103 -0
  130. moonshot/src/recipes/__init__.py +0 -0
  131. moonshot/src/recipes/recipe.py +340 -0
  132. moonshot/src/recipes/recipe_arguments.py +111 -0
  133. moonshot/src/redteaming/__init__.py +0 -0
  134. moonshot/src/redteaming/attack/__init__.py +0 -0
  135. moonshot/src/redteaming/attack/attack_module.py +618 -0
  136. moonshot/src/redteaming/attack/attack_module_arguments.py +44 -0
  137. moonshot/src/redteaming/attack/context_strategy.py +131 -0
  138. moonshot/src/redteaming/context_strategy/__init__.py +0 -0
  139. moonshot/src/redteaming/context_strategy/context_strategy_interface.py +46 -0
  140. moonshot/src/redteaming/session/__init__.py +0 -0
  141. moonshot/src/redteaming/session/chat.py +209 -0
  142. moonshot/src/redteaming/session/red_teaming_progress.py +128 -0
  143. moonshot/src/redteaming/session/red_teaming_type.py +6 -0
  144. moonshot/src/redteaming/session/session.py +775 -0
  145. moonshot/src/results/__init__.py +0 -0
  146. moonshot/src/results/result.py +119 -0
  147. moonshot/src/results/result_arguments.py +44 -0
  148. moonshot/src/runners/__init__.py +0 -0
  149. moonshot/src/runners/runner.py +476 -0
  150. moonshot/src/runners/runner_arguments.py +46 -0
  151. moonshot/src/runners/runner_type.py +6 -0
  152. moonshot/src/runs/__init__.py +0 -0
  153. moonshot/src/runs/run.py +344 -0
  154. moonshot/src/runs/run_arguments.py +162 -0
  155. moonshot/src/runs/run_progress.py +145 -0
  156. moonshot/src/runs/run_status.py +10 -0
  157. moonshot/src/storage/__init__.py +0 -0
  158. moonshot/src/storage/db_interface.py +128 -0
  159. moonshot/src/storage/io_interface.py +31 -0
  160. moonshot/src/storage/storage.py +525 -0
  161. moonshot/src/utils/__init__.py +0 -0
  162. moonshot/src/utils/import_modules.py +96 -0
  163. moonshot/src/utils/timeit.py +25 -0
File without changes
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic import validate_call
6
+
7
+ from moonshot.src.configs.env_variables import EnvVariables
8
+ from moonshot.src.storage.storage import Storage
9
+
10
+
11
+ class Result:
12
+ @staticmethod
13
+ @validate_call
14
+ def read(result_id: str) -> dict:
15
+ """
16
+ Reads the result data from storage for a given result ID.
17
+
18
+ This method attempts to retrieve the result data associated with the specified result ID from storage.
19
+ If the data is found, it is returned as a dictionary. If no data is found, an exception is raised and
20
+ an error message is printed.
21
+
22
+ Args:
23
+ result_id (str): The unique identifier of the result to be read.
24
+
25
+ Returns:
26
+ dict: A dictionary containing the result data if found.
27
+
28
+ Raises:
29
+ Exception: If no result data is found or if an error occurs during the read operation.
30
+ """
31
+ try:
32
+ if result_id:
33
+ return Result._read_result(result_id)
34
+ else:
35
+ raise RuntimeError("Result ID is empty")
36
+
37
+ except Exception as e:
38
+ print(f"Failed to read result: {str(e)}")
39
+ raise e
40
+
41
+ @staticmethod
42
+ def _read_result(result_id: str) -> dict:
43
+ """
44
+ Reads the result data from storage for a given result ID.
45
+
46
+ This method attempts to retrieve the result data associated with the specified result ID from storage.
47
+ If the data is found, it is returned as a dictionary. If no data is found, a RuntimeError is raised.
48
+
49
+ Args:
50
+ result_id (str): The unique identifier of the result to be read.
51
+
52
+ Returns:
53
+ dict: A dictionary containing the result data.
54
+
55
+ Raises:
56
+ RuntimeError: If no result data is found for the given result ID.
57
+ """
58
+ obj_results = Storage.read_object(EnvVariables.RESULTS.name, result_id, "json")
59
+ if obj_results:
60
+ return obj_results
61
+ else:
62
+ raise RuntimeError(f"Unable to get results for {result_id}.")
63
+
64
+ @staticmethod
65
+ @validate_call
66
+ def delete(result_id: str) -> bool:
67
+ """
68
+ Deletes the result data associated with the given result ID from storage.
69
+
70
+ This method attempts to delete the result data identified by the specified result ID from storage.
71
+ If the deletion is successful, it returns True. If an exception occurs during the deletion process,
72
+ an error message is printed and the exception is re-raised.
73
+
74
+ Args:
75
+ result_id (str): The unique identifier of the result to be deleted.
76
+
77
+ Returns:
78
+ bool: True if the result data was successfully deleted.
79
+
80
+ Raises:
81
+ Exception: If an error occurs during the deletion process.
82
+ """
83
+ try:
84
+ Storage.delete_object(EnvVariables.RESULTS.name, result_id, "json")
85
+ return True
86
+
87
+ except Exception as e:
88
+ print(f"Failed to delete result: {str(e)}")
89
+ raise e
90
+
91
+ @staticmethod
92
+ def get_available_items() -> tuple[list[str], list[dict]]:
93
+ """
94
+ Retrieves the list of available result IDs and their corresponding result data.
95
+
96
+ This method queries the storage to obtain all the result objects, filters out any that are not relevant
97
+ (e.g., internal use objects with "__" in their name), and then reads the result data for each remaining
98
+ result ID. It returns a tuple containing a list of result IDs and a list of dictionaries with the result
99
+ data.
100
+
101
+ Returns:
102
+ tuple[list[str], list[dict]]: A tuple with the first element being a list of result IDs and the
103
+ second element being a list of dictionaries containing the result data for each ID.
104
+ """
105
+ try:
106
+ retn_results = []
107
+ retn_results_ids = []
108
+
109
+ for result in Storage.get_objects(EnvVariables.RESULTS.name, "json"):
110
+ if "__" in result:
111
+ continue
112
+ result_info = Result._read_result(Path(result).stem)
113
+ retn_results.append(result_info)
114
+ retn_results_ids.append(Path(result).stem)
115
+ return retn_results_ids, retn_results
116
+
117
+ except Exception as e:
118
+ print(f"Failed to get available results: {str(e)}")
119
+ raise e
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from moonshot.src.runs.run_status import RunStatus
6
+
7
+
8
+ class ResultArguments(BaseModel):
9
+ id: str # The ID of the Runner.
10
+
11
+ start_time: float # The start time of the Run.
12
+
13
+ end_time: float # The end time of the Run.
14
+
15
+ duration: int # The duration of the Run.
16
+
17
+ status: RunStatus # Status of the Run.
18
+
19
+ raw_results: dict = {} # Raw Results of the Run from runners-modules.
20
+
21
+ results: dict = {} # Results of the Run from results-modules.
22
+
23
+ params: dict = {} # Other information required for results module
24
+
25
+ def to_dict(self) -> dict:
26
+ """
27
+ Transforms the ResultArguments instance into a dictionary format.
28
+
29
+ This method serializes the ResultArguments instance into a dictionary where attribute names become keys
30
+ and their corresponding values are the dictionary values.
31
+
32
+ Returns:
33
+ A dictionary representation of the ResultArguments instance.
34
+ """
35
+ return {
36
+ "id": self.id,
37
+ "start_time": self.start_time,
38
+ "end_time": self.end_time,
39
+ "duration": self.duration,
40
+ "status": self.status.name,
41
+ "raw_results": self.raw_results,
42
+ "results": self.results,
43
+ "params": self.params,
44
+ }
File without changes
@@ -0,0 +1,476 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import Callable
6
+
7
+ from pydantic import validate_call
8
+ from slugify import slugify
9
+
10
+ from moonshot.src.configs.env_variables import EnvVariables
11
+ from moonshot.src.redteaming.session.session import Session
12
+ from moonshot.src.runners.runner_arguments import RunnerArguments
13
+ from moonshot.src.runners.runner_type import RunnerType
14
+ from moonshot.src.runs.run import Run
15
+ from moonshot.src.storage.storage import Storage
16
+
17
+
18
+ class Runner:
19
+ sql_create_runner_cache_table = """
20
+ CREATE TABLE IF NOT EXISTS runner_cache_table (
21
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
22
+ connection_id text NOT NULL,
23
+ recipe_id text,
24
+ dataset_id text,
25
+ prompt_template_id text,
26
+ context_strategy_id text,
27
+ attack_module_id text,
28
+ prompt_index INTEGER,
29
+ prompt text NOT NULL,
30
+ target text NOT NULL,
31
+ predicted_results text NOT NULL,
32
+ duration text NOT NULL,
33
+ random_seed INTEGER,
34
+ system_prompt text
35
+ );
36
+ """
37
+
38
+ def __init__(self, runner_args: RunnerArguments) -> None:
39
+ self.id = runner_args.id
40
+ self.name = runner_args.name
41
+ self.description = runner_args.description
42
+ self.endpoints = runner_args.endpoints
43
+ self.database_instance = runner_args.database_instance
44
+ self.database_file = runner_args.database_file
45
+ self.progress_callback_func = runner_args.progress_callback_func
46
+
47
+ # Set current run
48
+ self.current_operation = None
49
+ self.current_operation_lock = asyncio.Lock() # Mutex lock for current operation
50
+
51
+ @classmethod
52
+ def load(
53
+ cls, runner_id: str, progress_callback_func: Callable | None = None
54
+ ) -> Runner:
55
+ """
56
+ This method is responsible for loading an existing runner.
57
+
58
+ It accepts a runner_id and an optional progress_callback_func as arguments. The method first verifies the
59
+ existence of the runner file corresponding to the provided runner_id. If the runner file does not exist,
60
+ it raises a RuntimeError.
61
+ If the runner file is found, the method reads the file and establishes a database connection. It then assigns
62
+ the progress_callback_func (if provided) to the runner and returns a new Runner instance.
63
+
64
+ Args:
65
+ runner_id (str): The unique identifier of the runner to be loaded.
66
+ progress_callback_func (Callable | None): An optional callback function for tracking the progress of
67
+ the runner.
68
+
69
+ Returns:
70
+ Runner: An instance of the Runner class, initialized with the data loaded from the runner file.
71
+
72
+ Raises:
73
+ RuntimeError: If the runner file corresponding to the provided runner_id does not exist.
74
+ """
75
+ try:
76
+ # Check if runner file exists. If it does not exists, raise an error.
77
+ if not Storage.is_object_exists(
78
+ EnvVariables.RUNNERS.name, runner_id, "json"
79
+ ):
80
+ raise RuntimeError(
81
+ "[Runner] Unable to load runner because the runner file does not exist."
82
+ )
83
+ runner_args = Runner.read(runner_id)
84
+ runner_args.database_instance = Storage.create_database_connection(
85
+ EnvVariables.DATABASES.name, runner_id, "db"
86
+ )
87
+ runner_args.progress_callback_func = progress_callback_func
88
+ return cls(runner_args)
89
+
90
+ except Exception as e:
91
+ print(f"[Runner] Failed to load runner: {str(e)}")
92
+ raise e
93
+
94
+ @classmethod
95
+ def create(cls, runner_args: RunnerArguments) -> Runner:
96
+ """
97
+ Creates a new runner instance.
98
+
99
+ This method takes a RunnerArguments object to generate a unique runner_id from the runner's name.
100
+ It checks for the existence of a runner file with the same id.
101
+ If found, a RuntimeError is raised to indicate the conflict.
102
+ Otherwise, it proceeds to create a new runner file and sets up a database connection for the runner.
103
+ A new Runner class instance, initialized with the provided arguments, is then returned.
104
+
105
+ Args:
106
+ runner_args (RunnerArguments): The configuration parameters for creating the runner.
107
+
108
+ Returns:
109
+ Runner: An instance of the Runner class, newly created with the specified arguments.
110
+
111
+ Raises:
112
+ RuntimeError: Raised if a runner file with the generated runner_id already exists,
113
+ indicating a duplicate runner.
114
+ """
115
+ try:
116
+ runner_id = slugify(runner_args.name, lowercase=True)
117
+
118
+ # Check if runner file exists. If it exists, raise an error.
119
+ if Storage.is_object_exists(EnvVariables.RUNNERS.name, runner_id, "json"):
120
+ raise RuntimeError(
121
+ "[Runner] Unable to create runner because the runner file exists."
122
+ )
123
+ # Check if all endpoint configuration files exist. If not, raise an error.
124
+ for endpoint in runner_args.endpoints:
125
+ if not Storage.is_object_exists(
126
+ EnvVariables.CONNECTORS_ENDPOINTS.name, endpoint, "json"
127
+ ):
128
+ raise RuntimeError(
129
+ f"[Runner] Connector endpoint {endpoint} does not exist."
130
+ )
131
+
132
+ runner_info = {
133
+ "id": runner_id,
134
+ "name": runner_args.name,
135
+ "endpoints": runner_args.endpoints,
136
+ "database_file": Storage.get_filepath(
137
+ EnvVariables.DATABASES.name, runner_id, "db", True
138
+ ),
139
+ "progress_callback_func": runner_args.progress_callback_func,
140
+ "description": runner_args.description,
141
+ }
142
+ runner_args = RunnerArguments(**runner_info)
143
+ runner_args.database_instance = Storage.create_database_connection(
144
+ EnvVariables.DATABASES.name, runner_id, "db"
145
+ )
146
+
147
+ # Create runner file
148
+ Storage.create_object(
149
+ EnvVariables.RUNNERS.name, runner_id, runner_args.to_dict(), "json"
150
+ )
151
+
152
+ # Create runner cache table
153
+ Storage.create_database_table(
154
+ runner_args.database_instance, Runner.sql_create_runner_cache_table
155
+ )
156
+
157
+ return cls(runner_args)
158
+
159
+ except Exception as e:
160
+ print(f"[Runner] Failed to create runner: {str(e)}")
161
+ raise e
162
+
163
+ @staticmethod
164
+ @validate_call
165
+ def read(runner_id: str) -> RunnerArguments:
166
+ """
167
+ Retrieves the runner data and constructs a RunnerArguments object.
168
+
169
+ This method accepts a runner_id as an input and utilizes the StorageManager to fetch the runner data.
170
+ It subsequently builds a RunnerArguments object using the fetched data and returns this object.
171
+
172
+ Args:
173
+ runner_id (str): The unique identifier of the runner.
174
+
175
+ Returns:
176
+ RunnerArguments: An object of RunnerArguments constructed with the runner's data.
177
+
178
+ Raises:
179
+ Exception: If an error occurs during the data retrieval or any other operation within the method.
180
+ """
181
+ try:
182
+ if runner_id:
183
+ return RunnerArguments(
184
+ **Storage.read_object(EnvVariables.RUNNERS.name, runner_id, "json")
185
+ )
186
+ else:
187
+ raise RuntimeError("Runner ID is empty")
188
+
189
+ except Exception as e:
190
+ print(f"[Runner] Failed to read runner: {str(e)}")
191
+ raise e
192
+
193
+ @staticmethod
194
+ @validate_call
195
+ def delete(runner_id: str) -> bool:
196
+ """
197
+ Deletes the runner and its associated database instance.
198
+
199
+ This method attempts to delete the runner identified by the provided runner_id from storage.
200
+ It also attempts to delete the associated database instance. If both deletions are successful,
201
+ it returns True. If an exception occurs during the deletion process, an error message is printed
202
+ and the exception is re-raised.
203
+
204
+ Args:
205
+ runner_id (str): The unique identifier of the runner to be deleted.
206
+
207
+ Returns:
208
+ bool: True if the runner and its associated database instance were successfully deleted.
209
+
210
+ Raises:
211
+ Exception: If an error occurs during the deletion process.
212
+ """
213
+ try:
214
+ Storage.delete_object(EnvVariables.RUNNERS.name, runner_id, "json")
215
+ Storage.delete_object(EnvVariables.DATABASES.name, runner_id, "db")
216
+ return True
217
+
218
+ except Exception as e:
219
+ print(f"[Runner] Failed to delete runner: {str(e)}")
220
+ raise e
221
+
222
+ @staticmethod
223
+ def get_available_items() -> tuple[list[str], list[RunnerArguments]]:
224
+ """
225
+ Retrieves and returns a list of available runners.
226
+
227
+ This method scans the directory specified by `EnvironmentVars.RUNNERS` and collects all stored runner files.
228
+ It excludes any files that contain "__" in their names. For each valid runner file, the method reads the file
229
+ content and constructs a RunnerArguments object encapsulating the runner's details. Both the RunnerArguments
230
+ object and the runner ID are then appended to their respective lists.
231
+
232
+ Returns:
233
+ tuple[list[str], list[RunnerArguments]]: A tuple where the first element is a list of runner IDs and
234
+ the second element is a list of RunnerArguments objects representing the details of each runner.
235
+
236
+ Raises:
237
+ Exception: If an error is encountered during the file reading process or any other operation within
238
+ the method.
239
+ """
240
+ try:
241
+ retn_runners = []
242
+ retn_runners_ids = []
243
+ runners = Storage.get_objects(EnvVariables.RUNNERS.name, "json")
244
+ for runner in runners:
245
+ if "__" in runner:
246
+ continue
247
+
248
+ runner_info = RunnerArguments(
249
+ **Storage.read_object(
250
+ EnvVariables.RUNNERS.name, Path(runner).stem, "json"
251
+ )
252
+ )
253
+ retn_runners.append(runner_info)
254
+ retn_runners_ids.append(runner_info.id)
255
+
256
+ return retn_runners_ids, retn_runners
257
+
258
+ except Exception as e:
259
+ print(f"[Runner] Failed to get available runners: {str(e)}")
260
+ raise e
261
+
262
+ def close(self) -> None:
263
+ """
264
+ Closes the runner instance.
265
+
266
+ This method is responsible for closing the runner instance. If a database instance is associated with the
267
+ runner, it also closes the database connection using the StorageManager's close_database_connection
268
+ method.
269
+
270
+ Raises:
271
+ Exception: If any error occurs while closing the runner or the database connection.
272
+ """
273
+ if self.database_instance:
274
+ Storage.close_database_connection(self.database_instance)
275
+
276
+ async def cancel(self) -> None:
277
+ """
278
+ Cancels the runner instance.
279
+
280
+ This method is responsible for cancelling the runner instance. If a run is currently in progress,
281
+ it stops the run and releases any resources associated with it.
282
+
283
+ Raises:
284
+ Exception: If any error occurs while cancelling the runner or releasing the resources.
285
+ """
286
+ async with self.current_operation_lock:
287
+ if self.current_operation:
288
+ print(f"[Runner] {self.id} - Cancelling current operation...")
289
+ self.current_operation.cancel()
290
+ self.current_operation = None # Reset the current operation
291
+
292
+ async def run_recipes(
293
+ self,
294
+ recipes: list[str],
295
+ num_of_prompts: int = 0,
296
+ random_seed: int = 0,
297
+ system_prompt: str = "",
298
+ runner_processing_module: str = "benchmarking",
299
+ result_processing_module: str = "benchmarking-result",
300
+ ) -> None:
301
+ """
302
+ Initiates an asynchronous benchmark run using a set of recipes.
303
+
304
+ This method sets up and starts a benchmark run tailored for recipes. It instantiates a benchmark run object,
305
+ applies the configuration based on the provided recipes, number of prompts, random seed, system prompt, and
306
+ the specified runner and result processing modules, and then commences the run asynchronously.
307
+
308
+ Args:
309
+ recipes (list[str]): The recipes to be included in the benchmark run.
310
+
311
+ num_of_prompts (int, optional): The count of prompts to utilize during the benchmark.
312
+ Defaults to 0.
313
+
314
+ random_seed (int, optional): The seed for random number generation to ensure reproducibility.
315
+ Defaults to 0.
316
+
317
+ system_prompt (str, optional): The system prompt to be used during the benchmark.
318
+ Defaults to an empty string.
319
+
320
+ runner_processing_module (str, optional): The module responsible for processing the runner.
321
+ Defaults to "benchmarking".
322
+
323
+ result_processing_module (str, optional): The module responsible for processing the results.
324
+ Defaults to "benchmarking-result".
325
+
326
+ Raises:
327
+ Exception: If any error occurs during the setup or execution of the benchmark run.
328
+ """
329
+ async with self.current_operation_lock: # Acquire the lock
330
+ # Create new benchmark recipe test run
331
+ print(f"[Runner] {self.id} - Running benchmark recipe run...")
332
+ self.current_operation = Run(
333
+ self.id,
334
+ RunnerType.BENCHMARK,
335
+ {
336
+ "recipes": recipes,
337
+ "num_of_prompts": num_of_prompts,
338
+ "random_seed": random_seed,
339
+ "system_prompt": system_prompt,
340
+ "runner_processing_module": runner_processing_module,
341
+ "result_processing_module": result_processing_module,
342
+ },
343
+ self.database_instance,
344
+ self.endpoints,
345
+ Storage.get_filepath(EnvVariables.RESULTS.name, self.id, "json", True),
346
+ self.progress_callback_func,
347
+ )
348
+ # Note: The lock is held during setup but should be released before long-running operations
349
+
350
+ # Execute the long-running operation outside of the lock
351
+ # Run new benchmark recipe test run
352
+ await self.current_operation.run()
353
+
354
+ # After completion, reset current_operation to None within the lock
355
+ async with self.current_operation_lock:
356
+ self.current_operation = None
357
+ print(f"[Runner] {self.id} - Benchmark recipe run completed and reset.")
358
+
359
+ async def run_cookbooks(
360
+ self,
361
+ cookbooks: list[str],
362
+ num_of_prompts: int = 0,
363
+ random_seed: int = 0,
364
+ system_prompt: str = "",
365
+ runner_processing_module: str = "benchmarking",
366
+ result_processing_module: str = "benchmarking-result",
367
+ ) -> None:
368
+ """
369
+ Asynchronously runs a set of cookbooks with the provided parameters.
370
+
371
+ This method is responsible for initiating a benchmark cookbook run with the specified cookbooks and parameters.
372
+ It creates a new benchmark cookbook run instance, configures it with the provided cookbook names,
373
+ number of prompts, random seed, system prompt, runner processing module, and result processing module,
374
+ and then starts the run asynchronously.
375
+
376
+ Args:
377
+ cookbooks (list[str]): A list of cookbook names to be run in the benchmark.
378
+
379
+ num_of_prompts (int, optional): The number of prompts to be used in the benchmark run.
380
+ Defaults to 0.
381
+
382
+ random_seed (int, optional): The seed for random number generation to ensure reproducibility.
383
+ Defaults to 0.
384
+
385
+ system_prompt (str, optional): A system prompt to be used in the benchmark run.
386
+ Defaults to an empty string.
387
+
388
+ runner_processing_module (str, optional): The module responsible for processing the runner.
389
+ Defaults to "benchmarking".
390
+
391
+ result_processing_module (str, optional): The module responsible for processing the results.
392
+ Defaults to "benchmarking-result".
393
+
394
+ Raises:
395
+ Exception: If any error occurs during the setup or execution of the benchmark run.
396
+ """
397
+ async with self.current_operation_lock: # Acquire the lock
398
+ # Create new benchmark cookbook test run
399
+ print(f"[Runner] {self.id} - Running benchmark cookbook run...")
400
+ self.current_operation = Run(
401
+ self.id,
402
+ RunnerType.BENCHMARK,
403
+ {
404
+ "cookbooks": cookbooks,
405
+ "num_of_prompts": num_of_prompts,
406
+ "random_seed": random_seed,
407
+ "system_prompt": system_prompt,
408
+ "runner_processing_module": runner_processing_module,
409
+ "result_processing_module": result_processing_module,
410
+ },
411
+ self.database_instance,
412
+ self.endpoints,
413
+ Storage.get_filepath(EnvVariables.RESULTS.name, self.id, "json", True),
414
+ self.progress_callback_func,
415
+ )
416
+ # Note: The lock is held during setup but should be released before long-running operations
417
+
418
+ # Execute the long-running operation outside of the lock
419
+ # Run new benchmark cookbook test run
420
+ await self.current_operation.run()
421
+
422
+ # After completion, reset current_operation to None within the lock
423
+ async with self.current_operation_lock:
424
+ self.current_operation = None
425
+ print(f"[Runner] {self.id} - Benchmark cookbook run completed and reset.")
426
+
427
+ async def run_red_teaming(
428
+ self,
429
+ red_team_args: dict,
430
+ system_prompt: str = "",
431
+ runner_processing_module: str = "redteaming",
432
+ ) -> list | None:
433
+ """
434
+ Asynchronously runs a red teaming session with the provided arguments.
435
+
436
+ This method is responsible for initiating a red teaming session with the specified arguments. It creates a new
437
+ red teaming session instance, configures it with the provided red teaming arguments, system prompt, and
438
+ runner processing module, and then starts the session asynchronously.
439
+
440
+ Args:
441
+ red_team_args (dict): A dictionary of arguments for the red teaming session.
442
+
443
+ system_prompt (str, optional): A system prompt to be used in the red teaming session.
444
+ Defaults to an empty string.
445
+
446
+ runner_processing_module (str, optional): The processing module to be used for the session.
447
+ Defaults to "redteaming".
448
+
449
+ Raises:
450
+ Exception: If any error occurs during the setup or execution of the red teaming session.
451
+ """
452
+ async with self.current_operation_lock: # Acquire the lock
453
+ print(f"[Runner] {self.id} - Running red teaming session...")
454
+ self.current_operation = Session(
455
+ self.id,
456
+ RunnerType.REDTEAM,
457
+ {
458
+ **red_team_args,
459
+ "runner_processing_module": runner_processing_module,
460
+ },
461
+ self.database_instance,
462
+ self.endpoints,
463
+ Storage.get_filepath(EnvVariables.RESULTS.name, self.id, "json", True),
464
+ self.progress_callback_func,
465
+ )
466
+
467
+ # Note: The lock is held during setup but should be released before long-running operations
468
+ # Execute the long-running operation outside of the lock
469
+ red_teaming_results = await self.current_operation.run()
470
+
471
+ # After completion, reset current_operation to None within the lock
472
+ async with self.current_operation_lock:
473
+ self.current_operation = None
474
+ print(f"[Runner] {self.id} - Red teaming run completed.")
475
+
476
+ return red_teaming_results
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Callable
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class RunnerArguments(BaseModel):
9
+ id: str # The ID of the Runner.
10
+
11
+ name: str = Field(min_length=1) # The name of the Runner.
12
+
13
+ database_file: str = "" # The database file associated with the Runner.
14
+
15
+ endpoints: list[str] = Field(min_length=1) # List of endpoints for the Runner.
16
+
17
+ description: str = "" # A brief description of the Runner.
18
+
19
+ # ------------------------------------------------------------------------------
20
+ # These attributes are not exported to dict
21
+ # ------------------------------------------------------------------------------
22
+ database_instance: Any | None = (
23
+ None # The database instance associated with the Runner.
24
+ )
25
+
26
+ progress_callback_func: Callable | None = (
27
+ None # The progress callback function for the Runner.
28
+ )
29
+
30
+ def to_dict(self) -> dict:
31
+ """
32
+ Transforms the RunnerArguments instance into a dictionary format.
33
+
34
+ This method serializes the RunnerArguments instance, excluding 'database_instance' and 'progress_callback_func',
35
+ into a dictionary where attribute names become keys and their corresponding values are the dictionary values.
36
+
37
+ Returns:
38
+ A dictionary representation of the RunnerArguments instance, excluding non-serializable attributes.
39
+ """
40
+ return {
41
+ "id": self.id,
42
+ "name": self.name,
43
+ "database_file": self.database_file,
44
+ "endpoints": self.endpoints,
45
+ "description": self.description,
46
+ }