aiverify-moonshot 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiverify_moonshot-0.4.0.dist-info/METADATA +249 -0
- aiverify_moonshot-0.4.0.dist-info/RECORD +163 -0
- aiverify_moonshot-0.4.0.dist-info/WHEEL +4 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/AUTHORS.md +5 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/LICENSE.md +201 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/NOTICES.md +3340 -0
- moonshot/__init__.py +0 -0
- moonshot/__main__.py +198 -0
- moonshot/api.py +155 -0
- moonshot/integrations/__init__.py +0 -0
- moonshot/integrations/cli/__init__.py +0 -0
- moonshot/integrations/cli/__main__.py +25 -0
- moonshot/integrations/cli/active_session_cfg.py +1 -0
- moonshot/integrations/cli/benchmark/__init__.py +0 -0
- moonshot/integrations/cli/benchmark/benchmark.py +186 -0
- moonshot/integrations/cli/benchmark/cookbook.py +545 -0
- moonshot/integrations/cli/benchmark/datasets.py +164 -0
- moonshot/integrations/cli/benchmark/metrics.py +141 -0
- moonshot/integrations/cli/benchmark/recipe.py +598 -0
- moonshot/integrations/cli/benchmark/result.py +216 -0
- moonshot/integrations/cli/benchmark/run.py +140 -0
- moonshot/integrations/cli/benchmark/runner.py +174 -0
- moonshot/integrations/cli/cli.py +64 -0
- moonshot/integrations/cli/common/__init__.py +0 -0
- moonshot/integrations/cli/common/common.py +72 -0
- moonshot/integrations/cli/common/connectors.py +325 -0
- moonshot/integrations/cli/common/display_helper.py +42 -0
- moonshot/integrations/cli/common/prompt_template.py +94 -0
- moonshot/integrations/cli/initialisation/__init__.py +0 -0
- moonshot/integrations/cli/initialisation/initialisation.py +14 -0
- moonshot/integrations/cli/redteam/__init__.py +0 -0
- moonshot/integrations/cli/redteam/attack_module.py +70 -0
- moonshot/integrations/cli/redteam/context_strategy.py +147 -0
- moonshot/integrations/cli/redteam/prompt_template.py +67 -0
- moonshot/integrations/cli/redteam/redteam.py +90 -0
- moonshot/integrations/cli/redteam/session.py +467 -0
- moonshot/integrations/web_api/.env.dev +7 -0
- moonshot/integrations/web_api/__init__.py +0 -0
- moonshot/integrations/web_api/__main__.py +56 -0
- moonshot/integrations/web_api/app.py +125 -0
- moonshot/integrations/web_api/container.py +146 -0
- moonshot/integrations/web_api/log/.gitkeep +0 -0
- moonshot/integrations/web_api/logging_conf.py +114 -0
- moonshot/integrations/web_api/routes/__init__.py +0 -0
- moonshot/integrations/web_api/routes/attack_modules.py +66 -0
- moonshot/integrations/web_api/routes/benchmark.py +116 -0
- moonshot/integrations/web_api/routes/benchmark_result.py +175 -0
- moonshot/integrations/web_api/routes/context_strategy.py +129 -0
- moonshot/integrations/web_api/routes/cookbook.py +225 -0
- moonshot/integrations/web_api/routes/dataset.py +120 -0
- moonshot/integrations/web_api/routes/endpoint.py +282 -0
- moonshot/integrations/web_api/routes/metric.py +78 -0
- moonshot/integrations/web_api/routes/prompt_template.py +128 -0
- moonshot/integrations/web_api/routes/recipe.py +219 -0
- moonshot/integrations/web_api/routes/redteam.py +609 -0
- moonshot/integrations/web_api/routes/runner.py +239 -0
- moonshot/integrations/web_api/schemas/__init__.py +0 -0
- moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +13 -0
- moonshot/integrations/web_api/schemas/cookbook_create_dto.py +19 -0
- moonshot/integrations/web_api/schemas/cookbook_response_model.py +9 -0
- moonshot/integrations/web_api/schemas/dataset_response_dto.py +9 -0
- moonshot/integrations/web_api/schemas/endpoint_create_dto.py +21 -0
- moonshot/integrations/web_api/schemas/endpoint_response_model.py +11 -0
- moonshot/integrations/web_api/schemas/prompt_response_model.py +14 -0
- moonshot/integrations/web_api/schemas/prompt_template_response_model.py +10 -0
- moonshot/integrations/web_api/schemas/recipe_create_dto.py +32 -0
- moonshot/integrations/web_api/schemas/recipe_response_model.py +7 -0
- moonshot/integrations/web_api/schemas/session_create_dto.py +16 -0
- moonshot/integrations/web_api/schemas/session_prompt_dto.py +7 -0
- moonshot/integrations/web_api/schemas/session_response_model.py +38 -0
- moonshot/integrations/web_api/services/__init__.py +0 -0
- moonshot/integrations/web_api/services/attack_module_service.py +34 -0
- moonshot/integrations/web_api/services/auto_red_team_test_manager.py +86 -0
- moonshot/integrations/web_api/services/auto_red_team_test_state.py +57 -0
- moonshot/integrations/web_api/services/base_service.py +8 -0
- moonshot/integrations/web_api/services/benchmark_result_service.py +25 -0
- moonshot/integrations/web_api/services/benchmark_test_manager.py +106 -0
- moonshot/integrations/web_api/services/benchmark_test_state.py +56 -0
- moonshot/integrations/web_api/services/benchmarking_service.py +31 -0
- moonshot/integrations/web_api/services/context_strategy_service.py +22 -0
- moonshot/integrations/web_api/services/cookbook_service.py +194 -0
- moonshot/integrations/web_api/services/dataset_service.py +20 -0
- moonshot/integrations/web_api/services/endpoint_service.py +65 -0
- moonshot/integrations/web_api/services/metric_service.py +14 -0
- moonshot/integrations/web_api/services/prompt_template_service.py +39 -0
- moonshot/integrations/web_api/services/recipe_service.py +155 -0
- moonshot/integrations/web_api/services/runner_service.py +147 -0
- moonshot/integrations/web_api/services/session_service.py +350 -0
- moonshot/integrations/web_api/services/utils/exceptions_handler.py +41 -0
- moonshot/integrations/web_api/services/utils/results_formatter.py +47 -0
- moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +14 -0
- moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +14 -0
- moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +72 -0
- moonshot/integrations/web_api/types/types.py +99 -0
- moonshot/src/__init__.py +0 -0
- moonshot/src/api/__init__.py +0 -0
- moonshot/src/api/api_connector.py +58 -0
- moonshot/src/api/api_connector_endpoint.py +162 -0
- moonshot/src/api/api_context_strategy.py +57 -0
- moonshot/src/api/api_cookbook.py +160 -0
- moonshot/src/api/api_dataset.py +46 -0
- moonshot/src/api/api_environment_variables.py +17 -0
- moonshot/src/api/api_metrics.py +51 -0
- moonshot/src/api/api_prompt_template.py +43 -0
- moonshot/src/api/api_recipe.py +182 -0
- moonshot/src/api/api_red_teaming.py +59 -0
- moonshot/src/api/api_result.py +84 -0
- moonshot/src/api/api_run.py +74 -0
- moonshot/src/api/api_runner.py +132 -0
- moonshot/src/api/api_session.py +290 -0
- moonshot/src/configs/__init__.py +0 -0
- moonshot/src/configs/env_variables.py +187 -0
- moonshot/src/connectors/__init__.py +0 -0
- moonshot/src/connectors/connector.py +327 -0
- moonshot/src/connectors/connector_prompt_arguments.py +17 -0
- moonshot/src/connectors_endpoints/__init__.py +0 -0
- moonshot/src/connectors_endpoints/connector_endpoint.py +211 -0
- moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +54 -0
- moonshot/src/cookbooks/__init__.py +0 -0
- moonshot/src/cookbooks/cookbook.py +225 -0
- moonshot/src/cookbooks/cookbook_arguments.py +34 -0
- moonshot/src/datasets/__init__.py +0 -0
- moonshot/src/datasets/dataset.py +255 -0
- moonshot/src/datasets/dataset_arguments.py +50 -0
- moonshot/src/metrics/__init__.py +0 -0
- moonshot/src/metrics/metric.py +192 -0
- moonshot/src/metrics/metric_interface.py +95 -0
- moonshot/src/prompt_templates/__init__.py +0 -0
- moonshot/src/prompt_templates/prompt_template.py +103 -0
- moonshot/src/recipes/__init__.py +0 -0
- moonshot/src/recipes/recipe.py +340 -0
- moonshot/src/recipes/recipe_arguments.py +111 -0
- moonshot/src/redteaming/__init__.py +0 -0
- moonshot/src/redteaming/attack/__init__.py +0 -0
- moonshot/src/redteaming/attack/attack_module.py +618 -0
- moonshot/src/redteaming/attack/attack_module_arguments.py +44 -0
- moonshot/src/redteaming/attack/context_strategy.py +131 -0
- moonshot/src/redteaming/context_strategy/__init__.py +0 -0
- moonshot/src/redteaming/context_strategy/context_strategy_interface.py +46 -0
- moonshot/src/redteaming/session/__init__.py +0 -0
- moonshot/src/redteaming/session/chat.py +209 -0
- moonshot/src/redteaming/session/red_teaming_progress.py +128 -0
- moonshot/src/redteaming/session/red_teaming_type.py +6 -0
- moonshot/src/redteaming/session/session.py +775 -0
- moonshot/src/results/__init__.py +0 -0
- moonshot/src/results/result.py +119 -0
- moonshot/src/results/result_arguments.py +44 -0
- moonshot/src/runners/__init__.py +0 -0
- moonshot/src/runners/runner.py +476 -0
- moonshot/src/runners/runner_arguments.py +46 -0
- moonshot/src/runners/runner_type.py +6 -0
- moonshot/src/runs/__init__.py +0 -0
- moonshot/src/runs/run.py +344 -0
- moonshot/src/runs/run_arguments.py +162 -0
- moonshot/src/runs/run_progress.py +145 -0
- moonshot/src/runs/run_status.py +10 -0
- moonshot/src/storage/__init__.py +0 -0
- moonshot/src/storage/db_interface.py +128 -0
- moonshot/src/storage/io_interface.py +31 -0
- moonshot/src/storage/storage.py +525 -0
- moonshot/src/utils/__init__.py +0 -0
- moonshot/src/utils/import_modules.py +96 -0
- moonshot/src/utils/timeit.py +25 -0
|
File without changes
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import validate_call
|
|
6
|
+
|
|
7
|
+
from moonshot.src.configs.env_variables import EnvVariables
|
|
8
|
+
from moonshot.src.storage.storage import Storage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Result:
|
|
12
|
+
@staticmethod
|
|
13
|
+
@validate_call
|
|
14
|
+
def read(result_id: str) -> dict:
|
|
15
|
+
"""
|
|
16
|
+
Reads the result data from storage for a given result ID.
|
|
17
|
+
|
|
18
|
+
This method attempts to retrieve the result data associated with the specified result ID from storage.
|
|
19
|
+
If the data is found, it is returned as a dictionary. If no data is found, an exception is raised and
|
|
20
|
+
an error message is printed.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
result_id (str): The unique identifier of the result to be read.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
dict: A dictionary containing the result data if found.
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
Exception: If no result data is found or if an error occurs during the read operation.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
if result_id:
|
|
33
|
+
return Result._read_result(result_id)
|
|
34
|
+
else:
|
|
35
|
+
raise RuntimeError("Result ID is empty")
|
|
36
|
+
|
|
37
|
+
except Exception as e:
|
|
38
|
+
print(f"Failed to read result: {str(e)}")
|
|
39
|
+
raise e
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def _read_result(result_id: str) -> dict:
|
|
43
|
+
"""
|
|
44
|
+
Reads the result data from storage for a given result ID.
|
|
45
|
+
|
|
46
|
+
This method attempts to retrieve the result data associated with the specified result ID from storage.
|
|
47
|
+
If the data is found, it is returned as a dictionary. If no data is found, a RuntimeError is raised.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
result_id (str): The unique identifier of the result to be read.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
dict: A dictionary containing the result data.
|
|
54
|
+
|
|
55
|
+
Raises:
|
|
56
|
+
RuntimeError: If no result data is found for the given result ID.
|
|
57
|
+
"""
|
|
58
|
+
obj_results = Storage.read_object(EnvVariables.RESULTS.name, result_id, "json")
|
|
59
|
+
if obj_results:
|
|
60
|
+
return obj_results
|
|
61
|
+
else:
|
|
62
|
+
raise RuntimeError(f"Unable to get results for {result_id}.")
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
@validate_call
|
|
66
|
+
def delete(result_id: str) -> bool:
|
|
67
|
+
"""
|
|
68
|
+
Deletes the result data associated with the given result ID from storage.
|
|
69
|
+
|
|
70
|
+
This method attempts to delete the result data identified by the specified result ID from storage.
|
|
71
|
+
If the deletion is successful, it returns True. If an exception occurs during the deletion process,
|
|
72
|
+
an error message is printed and the exception is re-raised.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
result_id (str): The unique identifier of the result to be deleted.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
bool: True if the result data was successfully deleted.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
Exception: If an error occurs during the deletion process.
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
Storage.delete_object(EnvVariables.RESULTS.name, result_id, "json")
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
print(f"Failed to delete result: {str(e)}")
|
|
89
|
+
raise e
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def get_available_items() -> tuple[list[str], list[dict]]:
|
|
93
|
+
"""
|
|
94
|
+
Retrieves the list of available result IDs and their corresponding result data.
|
|
95
|
+
|
|
96
|
+
This method queries the storage to obtain all the result objects, filters out any that are not relevant
|
|
97
|
+
(e.g., internal use objects with "__" in their name), and then reads the result data for each remaining
|
|
98
|
+
result ID. It returns a tuple containing a list of result IDs and a list of dictionaries with the result
|
|
99
|
+
data.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
tuple[list[str], list[dict]]: A tuple with the first element being a list of result IDs and the
|
|
103
|
+
second element being a list of dictionaries containing the result data for each ID.
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
retn_results = []
|
|
107
|
+
retn_results_ids = []
|
|
108
|
+
|
|
109
|
+
for result in Storage.get_objects(EnvVariables.RESULTS.name, "json"):
|
|
110
|
+
if "__" in result:
|
|
111
|
+
continue
|
|
112
|
+
result_info = Result._read_result(Path(result).stem)
|
|
113
|
+
retn_results.append(result_info)
|
|
114
|
+
retn_results_ids.append(Path(result).stem)
|
|
115
|
+
return retn_results_ids, retn_results
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
print(f"Failed to get available results: {str(e)}")
|
|
119
|
+
raise e
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from moonshot.src.runs.run_status import RunStatus
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ResultArguments(BaseModel):
|
|
9
|
+
id: str # The ID of the Runner.
|
|
10
|
+
|
|
11
|
+
start_time: float # The start time of the Run.
|
|
12
|
+
|
|
13
|
+
end_time: float # The end time of the Run.
|
|
14
|
+
|
|
15
|
+
duration: int # The duration of the Run.
|
|
16
|
+
|
|
17
|
+
status: RunStatus # Status of the Run.
|
|
18
|
+
|
|
19
|
+
raw_results: dict = {} # Raw Results of the Run from runners-modules.
|
|
20
|
+
|
|
21
|
+
results: dict = {} # Results of the Run from results-modules.
|
|
22
|
+
|
|
23
|
+
params: dict = {} # Other information required for results module
|
|
24
|
+
|
|
25
|
+
def to_dict(self) -> dict:
|
|
26
|
+
"""
|
|
27
|
+
Transforms the ResultArguments instance into a dictionary format.
|
|
28
|
+
|
|
29
|
+
This method serializes the ResultArguments instance into a dictionary where attribute names become keys
|
|
30
|
+
and their corresponding values are the dictionary values.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
A dictionary representation of the ResultArguments instance.
|
|
34
|
+
"""
|
|
35
|
+
return {
|
|
36
|
+
"id": self.id,
|
|
37
|
+
"start_time": self.start_time,
|
|
38
|
+
"end_time": self.end_time,
|
|
39
|
+
"duration": self.duration,
|
|
40
|
+
"status": self.status.name,
|
|
41
|
+
"raw_results": self.raw_results,
|
|
42
|
+
"results": self.results,
|
|
43
|
+
"params": self.params,
|
|
44
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Callable
|
|
6
|
+
|
|
7
|
+
from pydantic import validate_call
|
|
8
|
+
from slugify import slugify
|
|
9
|
+
|
|
10
|
+
from moonshot.src.configs.env_variables import EnvVariables
|
|
11
|
+
from moonshot.src.redteaming.session.session import Session
|
|
12
|
+
from moonshot.src.runners.runner_arguments import RunnerArguments
|
|
13
|
+
from moonshot.src.runners.runner_type import RunnerType
|
|
14
|
+
from moonshot.src.runs.run import Run
|
|
15
|
+
from moonshot.src.storage.storage import Storage
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Runner:
|
|
19
|
+
sql_create_runner_cache_table = """
|
|
20
|
+
CREATE TABLE IF NOT EXISTS runner_cache_table (
|
|
21
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
22
|
+
connection_id text NOT NULL,
|
|
23
|
+
recipe_id text,
|
|
24
|
+
dataset_id text,
|
|
25
|
+
prompt_template_id text,
|
|
26
|
+
context_strategy_id text,
|
|
27
|
+
attack_module_id text,
|
|
28
|
+
prompt_index INTEGER,
|
|
29
|
+
prompt text NOT NULL,
|
|
30
|
+
target text NOT NULL,
|
|
31
|
+
predicted_results text NOT NULL,
|
|
32
|
+
duration text NOT NULL,
|
|
33
|
+
random_seed INTEGER,
|
|
34
|
+
system_prompt text
|
|
35
|
+
);
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, runner_args: RunnerArguments) -> None:
|
|
39
|
+
self.id = runner_args.id
|
|
40
|
+
self.name = runner_args.name
|
|
41
|
+
self.description = runner_args.description
|
|
42
|
+
self.endpoints = runner_args.endpoints
|
|
43
|
+
self.database_instance = runner_args.database_instance
|
|
44
|
+
self.database_file = runner_args.database_file
|
|
45
|
+
self.progress_callback_func = runner_args.progress_callback_func
|
|
46
|
+
|
|
47
|
+
# Set current run
|
|
48
|
+
self.current_operation = None
|
|
49
|
+
self.current_operation_lock = asyncio.Lock() # Mutex lock for current operation
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def load(
|
|
53
|
+
cls, runner_id: str, progress_callback_func: Callable | None = None
|
|
54
|
+
) -> Runner:
|
|
55
|
+
"""
|
|
56
|
+
This method is responsible for loading an existing runner.
|
|
57
|
+
|
|
58
|
+
It accepts a runner_id and an optional progress_callback_func as arguments. The method first verifies the
|
|
59
|
+
existence of the runner file corresponding to the provided runner_id. If the runner file does not exist,
|
|
60
|
+
it raises a RuntimeError.
|
|
61
|
+
If the runner file is found, the method reads the file and establishes a database connection. It then assigns
|
|
62
|
+
the progress_callback_func (if provided) to the runner and returns a new Runner instance.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
runner_id (str): The unique identifier of the runner to be loaded.
|
|
66
|
+
progress_callback_func (Callable | None): An optional callback function for tracking the progress of
|
|
67
|
+
the runner.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Runner: An instance of the Runner class, initialized with the data loaded from the runner file.
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
RuntimeError: If the runner file corresponding to the provided runner_id does not exist.
|
|
74
|
+
"""
|
|
75
|
+
try:
|
|
76
|
+
# Check if runner file exists. If it does not exists, raise an error.
|
|
77
|
+
if not Storage.is_object_exists(
|
|
78
|
+
EnvVariables.RUNNERS.name, runner_id, "json"
|
|
79
|
+
):
|
|
80
|
+
raise RuntimeError(
|
|
81
|
+
"[Runner] Unable to load runner because the runner file does not exist."
|
|
82
|
+
)
|
|
83
|
+
runner_args = Runner.read(runner_id)
|
|
84
|
+
runner_args.database_instance = Storage.create_database_connection(
|
|
85
|
+
EnvVariables.DATABASES.name, runner_id, "db"
|
|
86
|
+
)
|
|
87
|
+
runner_args.progress_callback_func = progress_callback_func
|
|
88
|
+
return cls(runner_args)
|
|
89
|
+
|
|
90
|
+
except Exception as e:
|
|
91
|
+
print(f"[Runner] Failed to load runner: {str(e)}")
|
|
92
|
+
raise e
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def create(cls, runner_args: RunnerArguments) -> Runner:
|
|
96
|
+
"""
|
|
97
|
+
Creates a new runner instance.
|
|
98
|
+
|
|
99
|
+
This method takes a RunnerArguments object to generate a unique runner_id from the runner's name.
|
|
100
|
+
It checks for the existence of a runner file with the same id.
|
|
101
|
+
If found, a RuntimeError is raised to indicate the conflict.
|
|
102
|
+
Otherwise, it proceeds to create a new runner file and sets up a database connection for the runner.
|
|
103
|
+
A new Runner class instance, initialized with the provided arguments, is then returned.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
runner_args (RunnerArguments): The configuration parameters for creating the runner.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Runner: An instance of the Runner class, newly created with the specified arguments.
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
RuntimeError: Raised if a runner file with the generated runner_id already exists,
|
|
113
|
+
indicating a duplicate runner.
|
|
114
|
+
"""
|
|
115
|
+
try:
|
|
116
|
+
runner_id = slugify(runner_args.name, lowercase=True)
|
|
117
|
+
|
|
118
|
+
# Check if runner file exists. If it exists, raise an error.
|
|
119
|
+
if Storage.is_object_exists(EnvVariables.RUNNERS.name, runner_id, "json"):
|
|
120
|
+
raise RuntimeError(
|
|
121
|
+
"[Runner] Unable to create runner because the runner file exists."
|
|
122
|
+
)
|
|
123
|
+
# Check if all endpoint configuration files exist. If not, raise an error.
|
|
124
|
+
for endpoint in runner_args.endpoints:
|
|
125
|
+
if not Storage.is_object_exists(
|
|
126
|
+
EnvVariables.CONNECTORS_ENDPOINTS.name, endpoint, "json"
|
|
127
|
+
):
|
|
128
|
+
raise RuntimeError(
|
|
129
|
+
f"[Runner] Connector endpoint {endpoint} does not exist."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
runner_info = {
|
|
133
|
+
"id": runner_id,
|
|
134
|
+
"name": runner_args.name,
|
|
135
|
+
"endpoints": runner_args.endpoints,
|
|
136
|
+
"database_file": Storage.get_filepath(
|
|
137
|
+
EnvVariables.DATABASES.name, runner_id, "db", True
|
|
138
|
+
),
|
|
139
|
+
"progress_callback_func": runner_args.progress_callback_func,
|
|
140
|
+
"description": runner_args.description,
|
|
141
|
+
}
|
|
142
|
+
runner_args = RunnerArguments(**runner_info)
|
|
143
|
+
runner_args.database_instance = Storage.create_database_connection(
|
|
144
|
+
EnvVariables.DATABASES.name, runner_id, "db"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Create runner file
|
|
148
|
+
Storage.create_object(
|
|
149
|
+
EnvVariables.RUNNERS.name, runner_id, runner_args.to_dict(), "json"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Create runner cache table
|
|
153
|
+
Storage.create_database_table(
|
|
154
|
+
runner_args.database_instance, Runner.sql_create_runner_cache_table
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return cls(runner_args)
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
print(f"[Runner] Failed to create runner: {str(e)}")
|
|
161
|
+
raise e
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
@validate_call
|
|
165
|
+
def read(runner_id: str) -> RunnerArguments:
|
|
166
|
+
"""
|
|
167
|
+
Retrieves the runner data and constructs a RunnerArguments object.
|
|
168
|
+
|
|
169
|
+
This method accepts a runner_id as an input and utilizes the StorageManager to fetch the runner data.
|
|
170
|
+
It subsequently builds a RunnerArguments object using the fetched data and returns this object.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
runner_id (str): The unique identifier of the runner.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
RunnerArguments: An object of RunnerArguments constructed with the runner's data.
|
|
177
|
+
|
|
178
|
+
Raises:
|
|
179
|
+
Exception: If an error occurs during the data retrieval or any other operation within the method.
|
|
180
|
+
"""
|
|
181
|
+
try:
|
|
182
|
+
if runner_id:
|
|
183
|
+
return RunnerArguments(
|
|
184
|
+
**Storage.read_object(EnvVariables.RUNNERS.name, runner_id, "json")
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
raise RuntimeError("Runner ID is empty")
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
print(f"[Runner] Failed to read runner: {str(e)}")
|
|
191
|
+
raise e
|
|
192
|
+
|
|
193
|
+
@staticmethod
|
|
194
|
+
@validate_call
|
|
195
|
+
def delete(runner_id: str) -> bool:
|
|
196
|
+
"""
|
|
197
|
+
Deletes the runner and its associated database instance.
|
|
198
|
+
|
|
199
|
+
This method attempts to delete the runner identified by the provided runner_id from storage.
|
|
200
|
+
It also attempts to delete the associated database instance. If both deletions are successful,
|
|
201
|
+
it returns True. If an exception occurs during the deletion process, an error message is printed
|
|
202
|
+
and the exception is re-raised.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
runner_id (str): The unique identifier of the runner to be deleted.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
bool: True if the runner and its associated database instance were successfully deleted.
|
|
209
|
+
|
|
210
|
+
Raises:
|
|
211
|
+
Exception: If an error occurs during the deletion process.
|
|
212
|
+
"""
|
|
213
|
+
try:
|
|
214
|
+
Storage.delete_object(EnvVariables.RUNNERS.name, runner_id, "json")
|
|
215
|
+
Storage.delete_object(EnvVariables.DATABASES.name, runner_id, "db")
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
print(f"[Runner] Failed to delete runner: {str(e)}")
|
|
220
|
+
raise e
|
|
221
|
+
|
|
222
|
+
@staticmethod
|
|
223
|
+
def get_available_items() -> tuple[list[str], list[RunnerArguments]]:
|
|
224
|
+
"""
|
|
225
|
+
Retrieves and returns a list of available runners.
|
|
226
|
+
|
|
227
|
+
This method scans the directory specified by `EnvironmentVars.RUNNERS` and collects all stored runner files.
|
|
228
|
+
It excludes any files that contain "__" in their names. For each valid runner file, the method reads the file
|
|
229
|
+
content and constructs a RunnerArguments object encapsulating the runner's details. Both the RunnerArguments
|
|
230
|
+
object and the runner ID are then appended to their respective lists.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
tuple[list[str], list[RunnerArguments]]: A tuple where the first element is a list of runner IDs and
|
|
234
|
+
the second element is a list of RunnerArguments objects representing the details of each runner.
|
|
235
|
+
|
|
236
|
+
Raises:
|
|
237
|
+
Exception: If an error is encountered during the file reading process or any other operation within
|
|
238
|
+
the method.
|
|
239
|
+
"""
|
|
240
|
+
try:
|
|
241
|
+
retn_runners = []
|
|
242
|
+
retn_runners_ids = []
|
|
243
|
+
runners = Storage.get_objects(EnvVariables.RUNNERS.name, "json")
|
|
244
|
+
for runner in runners:
|
|
245
|
+
if "__" in runner:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
runner_info = RunnerArguments(
|
|
249
|
+
**Storage.read_object(
|
|
250
|
+
EnvVariables.RUNNERS.name, Path(runner).stem, "json"
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
retn_runners.append(runner_info)
|
|
254
|
+
retn_runners_ids.append(runner_info.id)
|
|
255
|
+
|
|
256
|
+
return retn_runners_ids, retn_runners
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
print(f"[Runner] Failed to get available runners: {str(e)}")
|
|
260
|
+
raise e
|
|
261
|
+
|
|
262
|
+
def close(self) -> None:
|
|
263
|
+
"""
|
|
264
|
+
Closes the runner instance.
|
|
265
|
+
|
|
266
|
+
This method is responsible for closing the runner instance. If a database instance is associated with the
|
|
267
|
+
runner, it also closes the database connection using the StorageManager's close_database_connection
|
|
268
|
+
method.
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
Exception: If any error occurs while closing the runner or the database connection.
|
|
272
|
+
"""
|
|
273
|
+
if self.database_instance:
|
|
274
|
+
Storage.close_database_connection(self.database_instance)
|
|
275
|
+
|
|
276
|
+
async def cancel(self) -> None:
|
|
277
|
+
"""
|
|
278
|
+
Cancels the runner instance.
|
|
279
|
+
|
|
280
|
+
This method is responsible for cancelling the runner instance. If a run is currently in progress,
|
|
281
|
+
it stops the run and releases any resources associated with it.
|
|
282
|
+
|
|
283
|
+
Raises:
|
|
284
|
+
Exception: If any error occurs while cancelling the runner or releasing the resources.
|
|
285
|
+
"""
|
|
286
|
+
async with self.current_operation_lock:
|
|
287
|
+
if self.current_operation:
|
|
288
|
+
print(f"[Runner] {self.id} - Cancelling current operation...")
|
|
289
|
+
self.current_operation.cancel()
|
|
290
|
+
self.current_operation = None # Reset the current operation
|
|
291
|
+
|
|
292
|
+
async def run_recipes(
|
|
293
|
+
self,
|
|
294
|
+
recipes: list[str],
|
|
295
|
+
num_of_prompts: int = 0,
|
|
296
|
+
random_seed: int = 0,
|
|
297
|
+
system_prompt: str = "",
|
|
298
|
+
runner_processing_module: str = "benchmarking",
|
|
299
|
+
result_processing_module: str = "benchmarking-result",
|
|
300
|
+
) -> None:
|
|
301
|
+
"""
|
|
302
|
+
Initiates an asynchronous benchmark run using a set of recipes.
|
|
303
|
+
|
|
304
|
+
This method sets up and starts a benchmark run tailored for recipes. It instantiates a benchmark run object,
|
|
305
|
+
applies the configuration based on the provided recipes, number of prompts, random seed, system prompt, and
|
|
306
|
+
the specified runner and result processing modules, and then commences the run asynchronously.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
recipes (list[str]): The recipes to be included in the benchmark run.
|
|
310
|
+
|
|
311
|
+
num_of_prompts (int, optional): The count of prompts to utilize during the benchmark.
|
|
312
|
+
Defaults to 0.
|
|
313
|
+
|
|
314
|
+
random_seed (int, optional): The seed for random number generation to ensure reproducibility.
|
|
315
|
+
Defaults to 0.
|
|
316
|
+
|
|
317
|
+
system_prompt (str, optional): The system prompt to be used during the benchmark.
|
|
318
|
+
Defaults to an empty string.
|
|
319
|
+
|
|
320
|
+
runner_processing_module (str, optional): The module responsible for processing the runner.
|
|
321
|
+
Defaults to "benchmarking".
|
|
322
|
+
|
|
323
|
+
result_processing_module (str, optional): The module responsible for processing the results.
|
|
324
|
+
Defaults to "benchmarking-result".
|
|
325
|
+
|
|
326
|
+
Raises:
|
|
327
|
+
Exception: If any error occurs during the setup or execution of the benchmark run.
|
|
328
|
+
"""
|
|
329
|
+
async with self.current_operation_lock: # Acquire the lock
|
|
330
|
+
# Create new benchmark recipe test run
|
|
331
|
+
print(f"[Runner] {self.id} - Running benchmark recipe run...")
|
|
332
|
+
self.current_operation = Run(
|
|
333
|
+
self.id,
|
|
334
|
+
RunnerType.BENCHMARK,
|
|
335
|
+
{
|
|
336
|
+
"recipes": recipes,
|
|
337
|
+
"num_of_prompts": num_of_prompts,
|
|
338
|
+
"random_seed": random_seed,
|
|
339
|
+
"system_prompt": system_prompt,
|
|
340
|
+
"runner_processing_module": runner_processing_module,
|
|
341
|
+
"result_processing_module": result_processing_module,
|
|
342
|
+
},
|
|
343
|
+
self.database_instance,
|
|
344
|
+
self.endpoints,
|
|
345
|
+
Storage.get_filepath(EnvVariables.RESULTS.name, self.id, "json", True),
|
|
346
|
+
self.progress_callback_func,
|
|
347
|
+
)
|
|
348
|
+
# Note: The lock is held during setup but should be released before long-running operations
|
|
349
|
+
|
|
350
|
+
# Execute the long-running operation outside of the lock
|
|
351
|
+
# Run new benchmark recipe test run
|
|
352
|
+
await self.current_operation.run()
|
|
353
|
+
|
|
354
|
+
# After completion, reset current_operation to None within the lock
|
|
355
|
+
async with self.current_operation_lock:
|
|
356
|
+
self.current_operation = None
|
|
357
|
+
print(f"[Runner] {self.id} - Benchmark recipe run completed and reset.")
|
|
358
|
+
|
|
359
|
+
async def run_cookbooks(
|
|
360
|
+
self,
|
|
361
|
+
cookbooks: list[str],
|
|
362
|
+
num_of_prompts: int = 0,
|
|
363
|
+
random_seed: int = 0,
|
|
364
|
+
system_prompt: str = "",
|
|
365
|
+
runner_processing_module: str = "benchmarking",
|
|
366
|
+
result_processing_module: str = "benchmarking-result",
|
|
367
|
+
) -> None:
|
|
368
|
+
"""
|
|
369
|
+
Asynchronously runs a set of cookbooks with the provided parameters.
|
|
370
|
+
|
|
371
|
+
This method is responsible for initiating a benchmark cookbook run with the specified cookbooks and parameters.
|
|
372
|
+
It creates a new benchmark cookbook run instance, configures it with the provided cookbook names,
|
|
373
|
+
number of prompts, random seed, system prompt, runner processing module, and result processing module,
|
|
374
|
+
and then starts the run asynchronously.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
cookbooks (list[str]): A list of cookbook names to be run in the benchmark.
|
|
378
|
+
|
|
379
|
+
num_of_prompts (int, optional): The number of prompts to be used in the benchmark run.
|
|
380
|
+
Defaults to 0.
|
|
381
|
+
|
|
382
|
+
random_seed (int, optional): The seed for random number generation to ensure reproducibility.
|
|
383
|
+
Defaults to 0.
|
|
384
|
+
|
|
385
|
+
system_prompt (str, optional): A system prompt to be used in the benchmark run.
|
|
386
|
+
Defaults to an empty string.
|
|
387
|
+
|
|
388
|
+
runner_processing_module (str, optional): The module responsible for processing the runner.
|
|
389
|
+
Defaults to "benchmarking".
|
|
390
|
+
|
|
391
|
+
result_processing_module (str, optional): The module responsible for processing the results.
|
|
392
|
+
Defaults to "benchmarking-result".
|
|
393
|
+
|
|
394
|
+
Raises:
|
|
395
|
+
Exception: If any error occurs during the setup or execution of the benchmark run.
|
|
396
|
+
"""
|
|
397
|
+
async with self.current_operation_lock: # Acquire the lock
|
|
398
|
+
# Create new benchmark cookbook test run
|
|
399
|
+
print(f"[Runner] {self.id} - Running benchmark cookbook run...")
|
|
400
|
+
self.current_operation = Run(
|
|
401
|
+
self.id,
|
|
402
|
+
RunnerType.BENCHMARK,
|
|
403
|
+
{
|
|
404
|
+
"cookbooks": cookbooks,
|
|
405
|
+
"num_of_prompts": num_of_prompts,
|
|
406
|
+
"random_seed": random_seed,
|
|
407
|
+
"system_prompt": system_prompt,
|
|
408
|
+
"runner_processing_module": runner_processing_module,
|
|
409
|
+
"result_processing_module": result_processing_module,
|
|
410
|
+
},
|
|
411
|
+
self.database_instance,
|
|
412
|
+
self.endpoints,
|
|
413
|
+
Storage.get_filepath(EnvVariables.RESULTS.name, self.id, "json", True),
|
|
414
|
+
self.progress_callback_func,
|
|
415
|
+
)
|
|
416
|
+
# Note: The lock is held during setup but should be released before long-running operations
|
|
417
|
+
|
|
418
|
+
# Execute the long-running operation outside of the lock
|
|
419
|
+
# Run new benchmark cookbook test run
|
|
420
|
+
await self.current_operation.run()
|
|
421
|
+
|
|
422
|
+
# After completion, reset current_operation to None within the lock
|
|
423
|
+
async with self.current_operation_lock:
|
|
424
|
+
self.current_operation = None
|
|
425
|
+
print(f"[Runner] {self.id} - Benchmark cookbook run completed and reset.")
|
|
426
|
+
|
|
427
|
+
async def run_red_teaming(
|
|
428
|
+
self,
|
|
429
|
+
red_team_args: dict,
|
|
430
|
+
system_prompt: str = "",
|
|
431
|
+
runner_processing_module: str = "redteaming",
|
|
432
|
+
) -> list | None:
|
|
433
|
+
"""
|
|
434
|
+
Asynchronously runs a red teaming session with the provided arguments.
|
|
435
|
+
|
|
436
|
+
This method is responsible for initiating a red teaming session with the specified arguments. It creates a new
|
|
437
|
+
red teaming session instance, configures it with the provided red teaming arguments, system prompt, and
|
|
438
|
+
runner processing module, and then starts the session asynchronously.
|
|
439
|
+
|
|
440
|
+
Args:
|
|
441
|
+
red_team_args (dict): A dictionary of arguments for the red teaming session.
|
|
442
|
+
|
|
443
|
+
system_prompt (str, optional): A system prompt to be used in the red teaming session.
|
|
444
|
+
Defaults to an empty string.
|
|
445
|
+
|
|
446
|
+
runner_processing_module (str, optional): The processing module to be used for the session.
|
|
447
|
+
Defaults to "redteaming".
|
|
448
|
+
|
|
449
|
+
Raises:
|
|
450
|
+
Exception: If any error occurs during the setup or execution of the red teaming session.
|
|
451
|
+
"""
|
|
452
|
+
async with self.current_operation_lock: # Acquire the lock
|
|
453
|
+
print(f"[Runner] {self.id} - Running red teaming session...")
|
|
454
|
+
self.current_operation = Session(
|
|
455
|
+
self.id,
|
|
456
|
+
RunnerType.REDTEAM,
|
|
457
|
+
{
|
|
458
|
+
**red_team_args,
|
|
459
|
+
"runner_processing_module": runner_processing_module,
|
|
460
|
+
},
|
|
461
|
+
self.database_instance,
|
|
462
|
+
self.endpoints,
|
|
463
|
+
Storage.get_filepath(EnvVariables.RESULTS.name, self.id, "json", True),
|
|
464
|
+
self.progress_callback_func,
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
# Note: The lock is held during setup but should be released before long-running operations
|
|
468
|
+
# Execute the long-running operation outside of the lock
|
|
469
|
+
red_teaming_results = await self.current_operation.run()
|
|
470
|
+
|
|
471
|
+
# After completion, reset current_operation to None within the lock
|
|
472
|
+
async with self.current_operation_lock:
|
|
473
|
+
self.current_operation = None
|
|
474
|
+
print(f"[Runner] {self.id} - Red teaming run completed.")
|
|
475
|
+
|
|
476
|
+
return red_teaming_results
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Callable
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RunnerArguments(BaseModel):
|
|
9
|
+
id: str # The ID of the Runner.
|
|
10
|
+
|
|
11
|
+
name: str = Field(min_length=1) # The name of the Runner.
|
|
12
|
+
|
|
13
|
+
database_file: str = "" # The database file associated with the Runner.
|
|
14
|
+
|
|
15
|
+
endpoints: list[str] = Field(min_length=1) # List of endpoints for the Runner.
|
|
16
|
+
|
|
17
|
+
description: str = "" # A brief description of the Runner.
|
|
18
|
+
|
|
19
|
+
# ------------------------------------------------------------------------------
|
|
20
|
+
# These attributes are not exported to dict
|
|
21
|
+
# ------------------------------------------------------------------------------
|
|
22
|
+
database_instance: Any | None = (
|
|
23
|
+
None # The database instance associated with the Runner.
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
progress_callback_func: Callable | None = (
|
|
27
|
+
None # The progress callback function for the Runner.
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict:
|
|
31
|
+
"""
|
|
32
|
+
Transforms the RunnerArguments instance into a dictionary format.
|
|
33
|
+
|
|
34
|
+
This method serializes the RunnerArguments instance, excluding 'database_instance' and 'progress_callback_func',
|
|
35
|
+
into a dictionary where attribute names become keys and their corresponding values are the dictionary values.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
A dictionary representation of the RunnerArguments instance, excluding non-serializable attributes.
|
|
39
|
+
"""
|
|
40
|
+
return {
|
|
41
|
+
"id": self.id,
|
|
42
|
+
"name": self.name,
|
|
43
|
+
"database_file": self.database_file,
|
|
44
|
+
"endpoints": self.endpoints,
|
|
45
|
+
"description": self.description,
|
|
46
|
+
}
|