aiverify-moonshot 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiverify_moonshot-0.4.0.dist-info/METADATA +249 -0
- aiverify_moonshot-0.4.0.dist-info/RECORD +163 -0
- aiverify_moonshot-0.4.0.dist-info/WHEEL +4 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/AUTHORS.md +5 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/LICENSE.md +201 -0
- aiverify_moonshot-0.4.0.dist-info/licenses/NOTICES.md +3340 -0
- moonshot/__init__.py +0 -0
- moonshot/__main__.py +198 -0
- moonshot/api.py +155 -0
- moonshot/integrations/__init__.py +0 -0
- moonshot/integrations/cli/__init__.py +0 -0
- moonshot/integrations/cli/__main__.py +25 -0
- moonshot/integrations/cli/active_session_cfg.py +1 -0
- moonshot/integrations/cli/benchmark/__init__.py +0 -0
- moonshot/integrations/cli/benchmark/benchmark.py +186 -0
- moonshot/integrations/cli/benchmark/cookbook.py +545 -0
- moonshot/integrations/cli/benchmark/datasets.py +164 -0
- moonshot/integrations/cli/benchmark/metrics.py +141 -0
- moonshot/integrations/cli/benchmark/recipe.py +598 -0
- moonshot/integrations/cli/benchmark/result.py +216 -0
- moonshot/integrations/cli/benchmark/run.py +140 -0
- moonshot/integrations/cli/benchmark/runner.py +174 -0
- moonshot/integrations/cli/cli.py +64 -0
- moonshot/integrations/cli/common/__init__.py +0 -0
- moonshot/integrations/cli/common/common.py +72 -0
- moonshot/integrations/cli/common/connectors.py +325 -0
- moonshot/integrations/cli/common/display_helper.py +42 -0
- moonshot/integrations/cli/common/prompt_template.py +94 -0
- moonshot/integrations/cli/initialisation/__init__.py +0 -0
- moonshot/integrations/cli/initialisation/initialisation.py +14 -0
- moonshot/integrations/cli/redteam/__init__.py +0 -0
- moonshot/integrations/cli/redteam/attack_module.py +70 -0
- moonshot/integrations/cli/redteam/context_strategy.py +147 -0
- moonshot/integrations/cli/redteam/prompt_template.py +67 -0
- moonshot/integrations/cli/redteam/redteam.py +90 -0
- moonshot/integrations/cli/redteam/session.py +467 -0
- moonshot/integrations/web_api/.env.dev +7 -0
- moonshot/integrations/web_api/__init__.py +0 -0
- moonshot/integrations/web_api/__main__.py +56 -0
- moonshot/integrations/web_api/app.py +125 -0
- moonshot/integrations/web_api/container.py +146 -0
- moonshot/integrations/web_api/log/.gitkeep +0 -0
- moonshot/integrations/web_api/logging_conf.py +114 -0
- moonshot/integrations/web_api/routes/__init__.py +0 -0
- moonshot/integrations/web_api/routes/attack_modules.py +66 -0
- moonshot/integrations/web_api/routes/benchmark.py +116 -0
- moonshot/integrations/web_api/routes/benchmark_result.py +175 -0
- moonshot/integrations/web_api/routes/context_strategy.py +129 -0
- moonshot/integrations/web_api/routes/cookbook.py +225 -0
- moonshot/integrations/web_api/routes/dataset.py +120 -0
- moonshot/integrations/web_api/routes/endpoint.py +282 -0
- moonshot/integrations/web_api/routes/metric.py +78 -0
- moonshot/integrations/web_api/routes/prompt_template.py +128 -0
- moonshot/integrations/web_api/routes/recipe.py +219 -0
- moonshot/integrations/web_api/routes/redteam.py +609 -0
- moonshot/integrations/web_api/routes/runner.py +239 -0
- moonshot/integrations/web_api/schemas/__init__.py +0 -0
- moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +13 -0
- moonshot/integrations/web_api/schemas/cookbook_create_dto.py +19 -0
- moonshot/integrations/web_api/schemas/cookbook_response_model.py +9 -0
- moonshot/integrations/web_api/schemas/dataset_response_dto.py +9 -0
- moonshot/integrations/web_api/schemas/endpoint_create_dto.py +21 -0
- moonshot/integrations/web_api/schemas/endpoint_response_model.py +11 -0
- moonshot/integrations/web_api/schemas/prompt_response_model.py +14 -0
- moonshot/integrations/web_api/schemas/prompt_template_response_model.py +10 -0
- moonshot/integrations/web_api/schemas/recipe_create_dto.py +32 -0
- moonshot/integrations/web_api/schemas/recipe_response_model.py +7 -0
- moonshot/integrations/web_api/schemas/session_create_dto.py +16 -0
- moonshot/integrations/web_api/schemas/session_prompt_dto.py +7 -0
- moonshot/integrations/web_api/schemas/session_response_model.py +38 -0
- moonshot/integrations/web_api/services/__init__.py +0 -0
- moonshot/integrations/web_api/services/attack_module_service.py +34 -0
- moonshot/integrations/web_api/services/auto_red_team_test_manager.py +86 -0
- moonshot/integrations/web_api/services/auto_red_team_test_state.py +57 -0
- moonshot/integrations/web_api/services/base_service.py +8 -0
- moonshot/integrations/web_api/services/benchmark_result_service.py +25 -0
- moonshot/integrations/web_api/services/benchmark_test_manager.py +106 -0
- moonshot/integrations/web_api/services/benchmark_test_state.py +56 -0
- moonshot/integrations/web_api/services/benchmarking_service.py +31 -0
- moonshot/integrations/web_api/services/context_strategy_service.py +22 -0
- moonshot/integrations/web_api/services/cookbook_service.py +194 -0
- moonshot/integrations/web_api/services/dataset_service.py +20 -0
- moonshot/integrations/web_api/services/endpoint_service.py +65 -0
- moonshot/integrations/web_api/services/metric_service.py +14 -0
- moonshot/integrations/web_api/services/prompt_template_service.py +39 -0
- moonshot/integrations/web_api/services/recipe_service.py +155 -0
- moonshot/integrations/web_api/services/runner_service.py +147 -0
- moonshot/integrations/web_api/services/session_service.py +350 -0
- moonshot/integrations/web_api/services/utils/exceptions_handler.py +41 -0
- moonshot/integrations/web_api/services/utils/results_formatter.py +47 -0
- moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +14 -0
- moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +14 -0
- moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +72 -0
- moonshot/integrations/web_api/types/types.py +99 -0
- moonshot/src/__init__.py +0 -0
- moonshot/src/api/__init__.py +0 -0
- moonshot/src/api/api_connector.py +58 -0
- moonshot/src/api/api_connector_endpoint.py +162 -0
- moonshot/src/api/api_context_strategy.py +57 -0
- moonshot/src/api/api_cookbook.py +160 -0
- moonshot/src/api/api_dataset.py +46 -0
- moonshot/src/api/api_environment_variables.py +17 -0
- moonshot/src/api/api_metrics.py +51 -0
- moonshot/src/api/api_prompt_template.py +43 -0
- moonshot/src/api/api_recipe.py +182 -0
- moonshot/src/api/api_red_teaming.py +59 -0
- moonshot/src/api/api_result.py +84 -0
- moonshot/src/api/api_run.py +74 -0
- moonshot/src/api/api_runner.py +132 -0
- moonshot/src/api/api_session.py +290 -0
- moonshot/src/configs/__init__.py +0 -0
- moonshot/src/configs/env_variables.py +187 -0
- moonshot/src/connectors/__init__.py +0 -0
- moonshot/src/connectors/connector.py +327 -0
- moonshot/src/connectors/connector_prompt_arguments.py +17 -0
- moonshot/src/connectors_endpoints/__init__.py +0 -0
- moonshot/src/connectors_endpoints/connector_endpoint.py +211 -0
- moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +54 -0
- moonshot/src/cookbooks/__init__.py +0 -0
- moonshot/src/cookbooks/cookbook.py +225 -0
- moonshot/src/cookbooks/cookbook_arguments.py +34 -0
- moonshot/src/datasets/__init__.py +0 -0
- moonshot/src/datasets/dataset.py +255 -0
- moonshot/src/datasets/dataset_arguments.py +50 -0
- moonshot/src/metrics/__init__.py +0 -0
- moonshot/src/metrics/metric.py +192 -0
- moonshot/src/metrics/metric_interface.py +95 -0
- moonshot/src/prompt_templates/__init__.py +0 -0
- moonshot/src/prompt_templates/prompt_template.py +103 -0
- moonshot/src/recipes/__init__.py +0 -0
- moonshot/src/recipes/recipe.py +340 -0
- moonshot/src/recipes/recipe_arguments.py +111 -0
- moonshot/src/redteaming/__init__.py +0 -0
- moonshot/src/redteaming/attack/__init__.py +0 -0
- moonshot/src/redteaming/attack/attack_module.py +618 -0
- moonshot/src/redteaming/attack/attack_module_arguments.py +44 -0
- moonshot/src/redteaming/attack/context_strategy.py +131 -0
- moonshot/src/redteaming/context_strategy/__init__.py +0 -0
- moonshot/src/redteaming/context_strategy/context_strategy_interface.py +46 -0
- moonshot/src/redteaming/session/__init__.py +0 -0
- moonshot/src/redteaming/session/chat.py +209 -0
- moonshot/src/redteaming/session/red_teaming_progress.py +128 -0
- moonshot/src/redteaming/session/red_teaming_type.py +6 -0
- moonshot/src/redteaming/session/session.py +775 -0
- moonshot/src/results/__init__.py +0 -0
- moonshot/src/results/result.py +119 -0
- moonshot/src/results/result_arguments.py +44 -0
- moonshot/src/runners/__init__.py +0 -0
- moonshot/src/runners/runner.py +476 -0
- moonshot/src/runners/runner_arguments.py +46 -0
- moonshot/src/runners/runner_type.py +6 -0
- moonshot/src/runs/__init__.py +0 -0
- moonshot/src/runs/run.py +344 -0
- moonshot/src/runs/run_arguments.py +162 -0
- moonshot/src/runs/run_progress.py +145 -0
- moonshot/src/runs/run_status.py +10 -0
- moonshot/src/storage/__init__.py +0 -0
- moonshot/src/storage/db_interface.py +128 -0
- moonshot/src/storage/io_interface.py +31 -0
- moonshot/src/storage/storage.py +525 -0
- moonshot/src/utils/__init__.py +0 -0
- moonshot/src/utils/import_modules.py +96 -0
- moonshot/src/utils/timeit.py +25 -0
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ast import literal_eval
|
|
3
|
+
|
|
4
|
+
import cmd2
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
from slugify import slugify
|
|
8
|
+
|
|
9
|
+
from moonshot.api import (
|
|
10
|
+
api_create_cookbook,
|
|
11
|
+
api_create_runner,
|
|
12
|
+
api_delete_cookbook,
|
|
13
|
+
api_get_all_cookbook,
|
|
14
|
+
api_get_all_run,
|
|
15
|
+
api_get_all_runner_name,
|
|
16
|
+
api_load_runner,
|
|
17
|
+
api_read_cookbook,
|
|
18
|
+
api_read_recipes,
|
|
19
|
+
api_update_cookbook,
|
|
20
|
+
)
|
|
21
|
+
from moonshot.integrations.cli.benchmark.recipe import (
|
|
22
|
+
display_view_grading_scale_format,
|
|
23
|
+
display_view_statistics_format,
|
|
24
|
+
)
|
|
25
|
+
from moonshot.integrations.cli.common.display_helper import display_view_list_format
|
|
26
|
+
|
|
27
|
+
console = Console()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ------------------------------------------------------------------------------
|
|
31
|
+
# CLI Functions
|
|
32
|
+
# ------------------------------------------------------------------------------
|
|
33
|
+
def add_cookbook(args) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Add a new cookbook.
|
|
36
|
+
|
|
37
|
+
This function creates a new cookbook with the specified parameters.
|
|
38
|
+
It first converts the recipes argument from a string to a list using the literal_eval function from the ast module.
|
|
39
|
+
Then, it calls the api_create_cookbook function from the moonshot.api module to create the new cookbook.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
args: A namespace object from argparse. It should have the following attributes:
|
|
43
|
+
name (str): The name of the new cookbook.
|
|
44
|
+
description (str): The description of the cookbook.
|
|
45
|
+
recipes (str): A string representation of a list of recipes. Each recipe is represented by its ID.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
None
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
recipes = literal_eval(args.recipes)
|
|
52
|
+
new_cookbook_id = api_create_cookbook(args.name, args.description, recipes)
|
|
53
|
+
print(f"[add_cookbook]: Cookbook ({new_cookbook_id}) created.")
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"[add_cookbook]: {str(e)}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def list_cookbooks() -> None:
|
|
59
|
+
"""
|
|
60
|
+
List all available cookbooks.
|
|
61
|
+
|
|
62
|
+
This function retrieves all available cookbooks by calling the api_get_all_cookbook function from the
|
|
63
|
+
moonshot.api module.
|
|
64
|
+
It then displays the retrieved cookbooks using the display_cookbooks function.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
None
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
cookbooks_list = api_get_all_cookbook()
|
|
71
|
+
display_cookbooks(cookbooks_list)
|
|
72
|
+
except Exception as e:
|
|
73
|
+
print(f"[list_cookbooks]: {str(e)}")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def view_cookbook(args) -> None:
|
|
77
|
+
"""
|
|
78
|
+
View a specific cookbook.
|
|
79
|
+
|
|
80
|
+
This function retrieves a specific cookbook by calling the api_read_cookbook function from the
|
|
81
|
+
moonshot.api module using the cookbook name provided in the args.
|
|
82
|
+
It then displays the retrieved cookbook using the display_view_cookbook function.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
args: A namespace object from argparse. It should have the following attribute:
|
|
86
|
+
cookbook (str): The id of the cookbook to view.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
None
|
|
90
|
+
"""
|
|
91
|
+
try:
|
|
92
|
+
cookbook_info = api_read_cookbook(args.cookbook)
|
|
93
|
+
display_view_cookbook(cookbook_info)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
print(f"[view_cookbook]: {str(e)}")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def run_cookbook(args) -> None:
|
|
99
|
+
"""
|
|
100
|
+
Run a cookbook with the specified parameters.
|
|
101
|
+
|
|
102
|
+
This function executes a cookbook runner with the given name, cookbooks, endpoints, and other parameters.
|
|
103
|
+
It checks if the runner with the specified name already exists, and if not, it creates a new one.
|
|
104
|
+
The cookbooks are run against the specified endpoints, and the results are processed and displayed.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
args: A namespace object from argparse. It should have the following attributes:
|
|
108
|
+
name (str): The name of the cookbook runner.
|
|
109
|
+
cookbooks (str): A string representation of a list of cookbooks to run.
|
|
110
|
+
endpoints (str): A string representation of a list of endpoints to run.
|
|
111
|
+
num_of_prompts (int): The number of prompts to run.
|
|
112
|
+
random_seed (int): The random seed number for reproducibility.
|
|
113
|
+
system_prompt (str): The system prompt to use.
|
|
114
|
+
runner_proc_module (str): The runner processing module to use.
|
|
115
|
+
result_proc_module (str): The result processing module to use.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
None
|
|
119
|
+
"""
|
|
120
|
+
try:
|
|
121
|
+
name = args.name
|
|
122
|
+
cookbooks = literal_eval(args.cookbooks)
|
|
123
|
+
endpoints = literal_eval(args.endpoints)
|
|
124
|
+
num_of_prompts = args.num_of_prompts
|
|
125
|
+
random_seed = args.random_seed
|
|
126
|
+
system_prompt = args.system_prompt
|
|
127
|
+
runner_proc_module = args.runner_proc_module
|
|
128
|
+
result_proc_module = args.result_proc_module
|
|
129
|
+
|
|
130
|
+
# Run the cookbooks with the defined endpoints
|
|
131
|
+
slugify_id = slugify(name, lowercase=True)
|
|
132
|
+
if slugify_id in api_get_all_runner_name():
|
|
133
|
+
cb_runner = api_load_runner(slugify_id)
|
|
134
|
+
else:
|
|
135
|
+
cb_runner = api_create_runner(name, endpoints)
|
|
136
|
+
|
|
137
|
+
loop = asyncio.get_event_loop()
|
|
138
|
+
loop.run_until_complete(
|
|
139
|
+
cb_runner.run_cookbooks(
|
|
140
|
+
cookbooks,
|
|
141
|
+
num_of_prompts,
|
|
142
|
+
random_seed,
|
|
143
|
+
system_prompt,
|
|
144
|
+
runner_proc_module,
|
|
145
|
+
result_proc_module,
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
cb_runner.close()
|
|
149
|
+
|
|
150
|
+
# Display results
|
|
151
|
+
runner_runs = api_get_all_run(cb_runner.id)
|
|
152
|
+
result_info = runner_runs[-1].get("results")
|
|
153
|
+
if result_info:
|
|
154
|
+
show_cookbook_results(
|
|
155
|
+
cookbooks, endpoints, result_info, result_info["metadata"]["duration"]
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
raise RuntimeError("no run result generated")
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
print(f"[run_cookbook]: {str(e)}")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def update_cookbook(args) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Update a specific cookbook.
|
|
167
|
+
|
|
168
|
+
This function updates a specific cookbook by calling the api_update_cookbook function from the
|
|
169
|
+
moonshot.api module using the cookbook name and update values provided in the args.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
args: A namespace object from argparse. It should have the following attributes:
|
|
173
|
+
cookbook (str): The id of the cookbook to update.
|
|
174
|
+
update_values (str): A string representation of a list of tuples. Each tuple contains a key
|
|
175
|
+
and a value to update in the cookbook.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
None
|
|
179
|
+
"""
|
|
180
|
+
try:
|
|
181
|
+
cookbook = args.cookbook
|
|
182
|
+
update_values = dict(literal_eval(args.update_values))
|
|
183
|
+
api_update_cookbook(cookbook, **update_values)
|
|
184
|
+
print("[update_cookbook]: Cookbook updated.")
|
|
185
|
+
except Exception as e:
|
|
186
|
+
print(f"[update_cookbook]: {str(e)}")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def delete_cookbook(args) -> None:
|
|
190
|
+
"""
|
|
191
|
+
Delete a cookbook.
|
|
192
|
+
|
|
193
|
+
This function deletes a cookbook with the specified identifier. It prompts the user for confirmation before
|
|
194
|
+
proceeding with the deletion. If the user confirms, it calls the api_delete_cookbook function from the moonshot.api
|
|
195
|
+
module to delete the cookbook. If the deletion is successful, it prints a confirmation message.
|
|
196
|
+
|
|
197
|
+
If an exception occurs, it prints an error message.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
args: A namespace object from argparse. It should have the following attribute:
|
|
201
|
+
cookbook (str): The identifier of the cookbook to delete.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
None
|
|
205
|
+
"""
|
|
206
|
+
# Confirm with the user before deleting a cookbook
|
|
207
|
+
confirmation = console.input(
|
|
208
|
+
"[bold red]Are you sure you want to delete the cookbook (y/N)? [/]"
|
|
209
|
+
)
|
|
210
|
+
if confirmation.lower() != "y":
|
|
211
|
+
console.print("[bold yellow]Cookbook deletion cancelled.[/]")
|
|
212
|
+
return
|
|
213
|
+
try:
|
|
214
|
+
api_delete_cookbook(args.cookbook)
|
|
215
|
+
print("[delete_cookbook]: Cookbook deleted.")
|
|
216
|
+
except Exception as e:
|
|
217
|
+
print(f"[delete_cookbook]: {str(e)}")
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# ------------------------------------------------------------------------------
|
|
221
|
+
# Helper functions: Display on cli
|
|
222
|
+
# ------------------------------------------------------------------------------
|
|
223
|
+
def display_cookbooks(cookbooks_list):
|
|
224
|
+
"""
|
|
225
|
+
Display the list of cookbooks in a tabular format.
|
|
226
|
+
|
|
227
|
+
This function takes a list of cookbook dictionaries and displays each cookbook's details in a table.
|
|
228
|
+
The table includes the cookbook's ID, name, description, and associated recipes. If the list is empty,
|
|
229
|
+
it prints a message indicating that no cookbooks are found.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
cookbooks_list (list): A list of dictionaries, where each dictionary contains the details of a cookbook.
|
|
233
|
+
"""
|
|
234
|
+
if cookbooks_list:
|
|
235
|
+
table = Table(
|
|
236
|
+
title="List of Cookbooks", show_lines=True, expand=True, header_style="bold"
|
|
237
|
+
)
|
|
238
|
+
table.add_column("No.", width=2)
|
|
239
|
+
table.add_column("Cookbook", justify="left", width=78)
|
|
240
|
+
table.add_column("Contains", justify="left", width=20, overflow="fold")
|
|
241
|
+
for cookbook_id, cookbook in enumerate(cookbooks_list, 1):
|
|
242
|
+
id, name, description, recipes = cookbook.values()
|
|
243
|
+
cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n{description}"
|
|
244
|
+
recipes_info = display_view_list_format("Recipes", recipes)
|
|
245
|
+
table.add_section()
|
|
246
|
+
table.add_row(str(cookbook_id), cookbook_info, recipes_info)
|
|
247
|
+
console.print(table)
|
|
248
|
+
else:
|
|
249
|
+
console.print("[red]There are no cookbooks found.[/red]")
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def display_view_cookbook(cookbook_info):
|
|
253
|
+
"""
|
|
254
|
+
Display the cookbook information in a formatted table.
|
|
255
|
+
|
|
256
|
+
This function takes a dictionary containing cookbook information and displays it in a table format using the rich
|
|
257
|
+
library's Table class. It includes details such as the cookbook's ID, name, description, and associated recipes.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
cookbook_info (dict): A dictionary containing the cookbook's information with keys such as
|
|
261
|
+
'id', 'name', 'description', and 'recipes'.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
None
|
|
265
|
+
"""
|
|
266
|
+
id, name, description, recipes = cookbook_info.values()
|
|
267
|
+
recipes_list = api_read_recipes(recipes)
|
|
268
|
+
if recipes_list:
|
|
269
|
+
table = Table(
|
|
270
|
+
title="View Cookbook", show_lines=True, expand=True, header_style="bold"
|
|
271
|
+
)
|
|
272
|
+
table.add_column("No.", width=2)
|
|
273
|
+
table.add_column("Recipe", justify="left", width=78)
|
|
274
|
+
table.add_column("Contains", justify="left", width=20, overflow="fold")
|
|
275
|
+
for recipe_id, recipe in enumerate(recipes_list, 1):
|
|
276
|
+
(
|
|
277
|
+
id,
|
|
278
|
+
name,
|
|
279
|
+
description,
|
|
280
|
+
tags,
|
|
281
|
+
categories,
|
|
282
|
+
datasets,
|
|
283
|
+
prompt_templates,
|
|
284
|
+
metrics,
|
|
285
|
+
attack_strategies,
|
|
286
|
+
grading_scale,
|
|
287
|
+
stats,
|
|
288
|
+
) = recipe.values()
|
|
289
|
+
|
|
290
|
+
tags_info = display_view_list_format("Tags", tags)
|
|
291
|
+
categories_info = display_view_list_format("Categories", categories)
|
|
292
|
+
datasets_info = display_view_list_format("Datasets", datasets)
|
|
293
|
+
prompt_templates_info = display_view_list_format(
|
|
294
|
+
"Prompt Templates", prompt_templates
|
|
295
|
+
)
|
|
296
|
+
metrics_info = display_view_list_format("Metrics", metrics)
|
|
297
|
+
attack_strategies_info = display_view_list_format(
|
|
298
|
+
"Attack Strategies", attack_strategies
|
|
299
|
+
)
|
|
300
|
+
grading_scale_info = display_view_grading_scale_format(
|
|
301
|
+
"Grading Scale", grading_scale
|
|
302
|
+
)
|
|
303
|
+
stats_info = display_view_statistics_format("Statistics", stats)
|
|
304
|
+
|
|
305
|
+
recipe_info = (
|
|
306
|
+
f"[red]id: {id}[/red]\n\n[blue]{name}[/blue]\n{description}\n\n"
|
|
307
|
+
f"{tags_info}\n\n{categories_info}\n\n{grading_scale_info}\n\n{stats_info}"
|
|
308
|
+
)
|
|
309
|
+
contains_info = f"{datasets_info}\n\n{prompt_templates_info}\n\n{metrics_info}\n\n{attack_strategies_info}"
|
|
310
|
+
|
|
311
|
+
table.add_section()
|
|
312
|
+
table.add_row(str(recipe_id), recipe_info, contains_info)
|
|
313
|
+
console.print(table)
|
|
314
|
+
else:
|
|
315
|
+
console.print("[red]There are no recipes found for the cookbook.[/red]")
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def show_cookbook_results(cookbooks, endpoints, cookbook_results, duration):
|
|
319
|
+
"""
|
|
320
|
+
Show the results of the cookbook benchmarking.
|
|
321
|
+
|
|
322
|
+
This function takes the cookbooks, endpoints, cookbook results, results file, and duration as arguments.
|
|
323
|
+
If there are results, it generates a table with the cookbook results and prints a message indicating
|
|
324
|
+
where the results are saved. If there are no results, it prints a message indicating that no results were found.
|
|
325
|
+
Finally, it prints the duration of the run.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
cookbooks (list): A list of cookbooks.
|
|
329
|
+
endpoints (list): A list of endpoints.
|
|
330
|
+
cookbook_results (dict): A dictionary with the results of the cookbook benchmarking.
|
|
331
|
+
duration (float): The duration of the run.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
None
|
|
335
|
+
"""
|
|
336
|
+
if cookbook_results:
|
|
337
|
+
# Display recipe results
|
|
338
|
+
generate_cookbook_table(cookbooks, endpoints, cookbook_results)
|
|
339
|
+
else:
|
|
340
|
+
console.print("[red]There are no results.[/red]")
|
|
341
|
+
|
|
342
|
+
# Print run stats
|
|
343
|
+
console.print(
|
|
344
|
+
f"{'='*50}\n[blue]Time taken to run: {duration}s[/blue]\n*Overall rating will be the lowest grade that the recipes have in each cookbook\n{'='*50}"
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def generate_cookbook_table(cookbooks: list, endpoints: list, results: dict) -> None:
|
|
349
|
+
"""
|
|
350
|
+
Generate and display a table with the cookbook benchmarking results.
|
|
351
|
+
|
|
352
|
+
This function creates a table that includes the index, cookbook name, recipe name, and the results
|
|
353
|
+
for each endpoint.
|
|
354
|
+
|
|
355
|
+
The cookbook names are prefixed with "Cookbook:" and are displayed with their overall grades. Each recipe under a
|
|
356
|
+
cookbook is indented and prefixed with "Recipe:" followed by its individual grades for each endpoint. If there are
|
|
357
|
+
no results for a cookbook, a row with dashes across all endpoint columns is added to indicate this.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
cookbooks (list): A list of cookbook names to display in the table.
|
|
361
|
+
endpoints (list): A list of endpoints for which results are to be displayed.
|
|
362
|
+
results (dict): A dictionary containing the benchmarking results for cookbooks and recipes.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
None: The function prints the table to the console but does not return any value.
|
|
366
|
+
"""
|
|
367
|
+
table = Table(
|
|
368
|
+
title="Cookbook Result", show_lines=True, expand=True, header_style="bold"
|
|
369
|
+
)
|
|
370
|
+
table.add_column("No.", width=2)
|
|
371
|
+
table.add_column("Cookbook (with its recipes)", justify="left", width=78)
|
|
372
|
+
for endpoint in endpoints:
|
|
373
|
+
table.add_column(endpoint, justify="center")
|
|
374
|
+
|
|
375
|
+
index = 1
|
|
376
|
+
for cookbook in cookbooks:
|
|
377
|
+
# Get cookbook result
|
|
378
|
+
cookbook_result = next(
|
|
379
|
+
(
|
|
380
|
+
result
|
|
381
|
+
for result in results["results"]["cookbooks"]
|
|
382
|
+
if result["id"] == cookbook
|
|
383
|
+
),
|
|
384
|
+
None,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
if cookbook_result:
|
|
388
|
+
# Add the cookbook name with the "Cookbook: " prefix as the first row for this section
|
|
389
|
+
endpoint_results = []
|
|
390
|
+
for endpoint in endpoints:
|
|
391
|
+
# Find the evaluation summary for the endpoint
|
|
392
|
+
evaluation_summary = next(
|
|
393
|
+
(
|
|
394
|
+
temp_eval
|
|
395
|
+
for temp_eval in cookbook_result["overall_evaluation_summary"]
|
|
396
|
+
if temp_eval["model_id"] == endpoint
|
|
397
|
+
),
|
|
398
|
+
None,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Get the grade from the evaluation_summary, or use "-" if not found
|
|
402
|
+
grade = "-"
|
|
403
|
+
if evaluation_summary and evaluation_summary["overall_grade"]:
|
|
404
|
+
grade = evaluation_summary["overall_grade"]
|
|
405
|
+
endpoint_results.append(grade)
|
|
406
|
+
table.add_row(
|
|
407
|
+
str(index),
|
|
408
|
+
f"Cookbook: [blue]{cookbook}[/blue]",
|
|
409
|
+
*endpoint_results,
|
|
410
|
+
end_section=True,
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
for recipe in cookbook_result["recipes"]:
|
|
414
|
+
endpoint_results = []
|
|
415
|
+
for endpoint in endpoints:
|
|
416
|
+
# Find the evaluation summary for the endpoint
|
|
417
|
+
evaluation_summary = next(
|
|
418
|
+
(
|
|
419
|
+
temp_eval
|
|
420
|
+
for temp_eval in recipe["evaluation_summary"]
|
|
421
|
+
if temp_eval["model_id"] == endpoint
|
|
422
|
+
),
|
|
423
|
+
None,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# Get the grade from the evaluation_summary, or use "-" if not found
|
|
427
|
+
grade = "-"
|
|
428
|
+
if (
|
|
429
|
+
evaluation_summary
|
|
430
|
+
and "grade" in evaluation_summary
|
|
431
|
+
and "avg_grade_value" in evaluation_summary
|
|
432
|
+
and evaluation_summary["grade"]
|
|
433
|
+
):
|
|
434
|
+
grade = f"{evaluation_summary['grade']} [{evaluation_summary['avg_grade_value']}]"
|
|
435
|
+
endpoint_results.append(grade)
|
|
436
|
+
|
|
437
|
+
# Add the recipe name indented under the cookbook name
|
|
438
|
+
table.add_row(
|
|
439
|
+
"",
|
|
440
|
+
f" └── Recipe: [blue]{recipe['id']}[/blue]",
|
|
441
|
+
*endpoint_results,
|
|
442
|
+
end_section=True,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
# Increment index only after all recipes of the cookbook have been added
|
|
446
|
+
index += 1
|
|
447
|
+
else:
|
|
448
|
+
# If no results for the cookbook, add a row indicating this with the "Cookbook: " prefix
|
|
449
|
+
# and a dash for each endpoint column
|
|
450
|
+
table.add_row(
|
|
451
|
+
str(index),
|
|
452
|
+
f"Cookbook: {cookbook}",
|
|
453
|
+
*(["-"] * len(endpoints)),
|
|
454
|
+
end_section=True,
|
|
455
|
+
)
|
|
456
|
+
index += 1
|
|
457
|
+
|
|
458
|
+
# Display table
|
|
459
|
+
console.print(table)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
# ------------------------------------------------------------------------------
|
|
463
|
+
# Cmd2 Arguments Parsers
|
|
464
|
+
# ------------------------------------------------------------------------------
|
|
465
|
+
# Add cookbook arguments
|
|
466
|
+
add_cookbook_args = cmd2.Cmd2ArgumentParser(
|
|
467
|
+
description="Add a new cookbook. The 'name' argument will be slugified to create a unique identifier.",
|
|
468
|
+
epilog="Example:\n add_cookbook 'My new cookbook' "
|
|
469
|
+
"'I am cookbook description' "
|
|
470
|
+
"\"['analogical-similarity','auto-categorisation']\"",
|
|
471
|
+
)
|
|
472
|
+
add_cookbook_args.add_argument("name", type=str, help="Name of the new cookbook")
|
|
473
|
+
add_cookbook_args.add_argument(
|
|
474
|
+
"description", type=str, help="Description of the new cookbook"
|
|
475
|
+
)
|
|
476
|
+
add_cookbook_args.add_argument(
|
|
477
|
+
"recipes", type=str, help="List of recipes to be included in the new cookbook"
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
# Update cookbook arguments
|
|
481
|
+
update_cookbook_args = cmd2.Cmd2ArgumentParser(
|
|
482
|
+
description="Update a cookbook.",
|
|
483
|
+
epilog="Available keys for updating a cookbook: \n"
|
|
484
|
+
" name: The name of the cookbook. \n"
|
|
485
|
+
" description: The description of the cookbook. \n"
|
|
486
|
+
" recipes: A list of recipes included in the cookbook. \n\n"
|
|
487
|
+
"Example command:\n"
|
|
488
|
+
" update_cookbook my-new-cookbook "
|
|
489
|
+
"\"[('name', 'Updated Cookbook Name'), ('description', 'Updated description'), "
|
|
490
|
+
"('recipes', ['analogical-similarity'])]\"",
|
|
491
|
+
)
|
|
492
|
+
update_cookbook_args.add_argument("cookbook", type=str, help="Id of the cookbook")
|
|
493
|
+
update_cookbook_args.add_argument(
|
|
494
|
+
"update_values", type=str, help="Update cookbook key/value"
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# View cookbook arguments
|
|
498
|
+
view_cookbook_args = cmd2.Cmd2ArgumentParser(
|
|
499
|
+
description="View a cookbook.",
|
|
500
|
+
epilog="Example:\n view_cookbook my-new-cookbook",
|
|
501
|
+
)
|
|
502
|
+
view_cookbook_args.add_argument("cookbook", type=str, help="Id of the cookbook")
|
|
503
|
+
|
|
504
|
+
# Delete cookbook arguments
|
|
505
|
+
delete_cookbook_args = cmd2.Cmd2ArgumentParser(
|
|
506
|
+
description="Delete a cookbook.",
|
|
507
|
+
epilog="Example:\n delete_cookbook my-new-cookbook",
|
|
508
|
+
)
|
|
509
|
+
delete_cookbook_args.add_argument("cookbook", type=str, help="Id of the cookbook")
|
|
510
|
+
|
|
511
|
+
# Run cookbook arguments
|
|
512
|
+
run_cookbook_args = cmd2.Cmd2ArgumentParser(
|
|
513
|
+
description="Run a cookbook.",
|
|
514
|
+
epilog="Example:\n run_cookbook "
|
|
515
|
+
'"my new cookbook runner" '
|
|
516
|
+
"\"['chinese-safety-cookbook']\" "
|
|
517
|
+
"\"['openai-gpt35-turbo']\" "
|
|
518
|
+
'-n 1 -r 1 -s "You are an intelligent AI" ',
|
|
519
|
+
)
|
|
520
|
+
run_cookbook_args.add_argument("name", type=str, help="Name of cookbook runner")
|
|
521
|
+
run_cookbook_args.add_argument("cookbooks", type=str, help="List of cookbooks to run")
|
|
522
|
+
run_cookbook_args.add_argument("endpoints", type=str, help="List of endpoints to run")
|
|
523
|
+
run_cookbook_args.add_argument(
|
|
524
|
+
"-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
|
|
525
|
+
)
|
|
526
|
+
run_cookbook_args.add_argument(
|
|
527
|
+
"-r", "--random_seed", type=int, default=0, help="Random seed number"
|
|
528
|
+
)
|
|
529
|
+
run_cookbook_args.add_argument(
|
|
530
|
+
"-s", "--system_prompt", type=str, default="", help="System Prompt to use"
|
|
531
|
+
)
|
|
532
|
+
run_cookbook_args.add_argument(
|
|
533
|
+
"-l",
|
|
534
|
+
"--runner_proc_module",
|
|
535
|
+
type=str,
|
|
536
|
+
default="benchmarking",
|
|
537
|
+
help="Runner processing module to use",
|
|
538
|
+
)
|
|
539
|
+
run_cookbook_args.add_argument(
|
|
540
|
+
"-o",
|
|
541
|
+
"--result_proc_module",
|
|
542
|
+
type=str,
|
|
543
|
+
default="benchmarking-result",
|
|
544
|
+
help="Result processing module to use",
|
|
545
|
+
)
|