aiverify-moonshot 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. aiverify_moonshot-0.4.0.dist-info/METADATA +249 -0
  2. aiverify_moonshot-0.4.0.dist-info/RECORD +163 -0
  3. aiverify_moonshot-0.4.0.dist-info/WHEEL +4 -0
  4. aiverify_moonshot-0.4.0.dist-info/licenses/AUTHORS.md +5 -0
  5. aiverify_moonshot-0.4.0.dist-info/licenses/LICENSE.md +201 -0
  6. aiverify_moonshot-0.4.0.dist-info/licenses/NOTICES.md +3340 -0
  7. moonshot/__init__.py +0 -0
  8. moonshot/__main__.py +198 -0
  9. moonshot/api.py +155 -0
  10. moonshot/integrations/__init__.py +0 -0
  11. moonshot/integrations/cli/__init__.py +0 -0
  12. moonshot/integrations/cli/__main__.py +25 -0
  13. moonshot/integrations/cli/active_session_cfg.py +1 -0
  14. moonshot/integrations/cli/benchmark/__init__.py +0 -0
  15. moonshot/integrations/cli/benchmark/benchmark.py +186 -0
  16. moonshot/integrations/cli/benchmark/cookbook.py +545 -0
  17. moonshot/integrations/cli/benchmark/datasets.py +164 -0
  18. moonshot/integrations/cli/benchmark/metrics.py +141 -0
  19. moonshot/integrations/cli/benchmark/recipe.py +598 -0
  20. moonshot/integrations/cli/benchmark/result.py +216 -0
  21. moonshot/integrations/cli/benchmark/run.py +140 -0
  22. moonshot/integrations/cli/benchmark/runner.py +174 -0
  23. moonshot/integrations/cli/cli.py +64 -0
  24. moonshot/integrations/cli/common/__init__.py +0 -0
  25. moonshot/integrations/cli/common/common.py +72 -0
  26. moonshot/integrations/cli/common/connectors.py +325 -0
  27. moonshot/integrations/cli/common/display_helper.py +42 -0
  28. moonshot/integrations/cli/common/prompt_template.py +94 -0
  29. moonshot/integrations/cli/initialisation/__init__.py +0 -0
  30. moonshot/integrations/cli/initialisation/initialisation.py +14 -0
  31. moonshot/integrations/cli/redteam/__init__.py +0 -0
  32. moonshot/integrations/cli/redteam/attack_module.py +70 -0
  33. moonshot/integrations/cli/redteam/context_strategy.py +147 -0
  34. moonshot/integrations/cli/redteam/prompt_template.py +67 -0
  35. moonshot/integrations/cli/redteam/redteam.py +90 -0
  36. moonshot/integrations/cli/redteam/session.py +467 -0
  37. moonshot/integrations/web_api/.env.dev +7 -0
  38. moonshot/integrations/web_api/__init__.py +0 -0
  39. moonshot/integrations/web_api/__main__.py +56 -0
  40. moonshot/integrations/web_api/app.py +125 -0
  41. moonshot/integrations/web_api/container.py +146 -0
  42. moonshot/integrations/web_api/log/.gitkeep +0 -0
  43. moonshot/integrations/web_api/logging_conf.py +114 -0
  44. moonshot/integrations/web_api/routes/__init__.py +0 -0
  45. moonshot/integrations/web_api/routes/attack_modules.py +66 -0
  46. moonshot/integrations/web_api/routes/benchmark.py +116 -0
  47. moonshot/integrations/web_api/routes/benchmark_result.py +175 -0
  48. moonshot/integrations/web_api/routes/context_strategy.py +129 -0
  49. moonshot/integrations/web_api/routes/cookbook.py +225 -0
  50. moonshot/integrations/web_api/routes/dataset.py +120 -0
  51. moonshot/integrations/web_api/routes/endpoint.py +282 -0
  52. moonshot/integrations/web_api/routes/metric.py +78 -0
  53. moonshot/integrations/web_api/routes/prompt_template.py +128 -0
  54. moonshot/integrations/web_api/routes/recipe.py +219 -0
  55. moonshot/integrations/web_api/routes/redteam.py +609 -0
  56. moonshot/integrations/web_api/routes/runner.py +239 -0
  57. moonshot/integrations/web_api/schemas/__init__.py +0 -0
  58. moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +13 -0
  59. moonshot/integrations/web_api/schemas/cookbook_create_dto.py +19 -0
  60. moonshot/integrations/web_api/schemas/cookbook_response_model.py +9 -0
  61. moonshot/integrations/web_api/schemas/dataset_response_dto.py +9 -0
  62. moonshot/integrations/web_api/schemas/endpoint_create_dto.py +21 -0
  63. moonshot/integrations/web_api/schemas/endpoint_response_model.py +11 -0
  64. moonshot/integrations/web_api/schemas/prompt_response_model.py +14 -0
  65. moonshot/integrations/web_api/schemas/prompt_template_response_model.py +10 -0
  66. moonshot/integrations/web_api/schemas/recipe_create_dto.py +32 -0
  67. moonshot/integrations/web_api/schemas/recipe_response_model.py +7 -0
  68. moonshot/integrations/web_api/schemas/session_create_dto.py +16 -0
  69. moonshot/integrations/web_api/schemas/session_prompt_dto.py +7 -0
  70. moonshot/integrations/web_api/schemas/session_response_model.py +38 -0
  71. moonshot/integrations/web_api/services/__init__.py +0 -0
  72. moonshot/integrations/web_api/services/attack_module_service.py +34 -0
  73. moonshot/integrations/web_api/services/auto_red_team_test_manager.py +86 -0
  74. moonshot/integrations/web_api/services/auto_red_team_test_state.py +57 -0
  75. moonshot/integrations/web_api/services/base_service.py +8 -0
  76. moonshot/integrations/web_api/services/benchmark_result_service.py +25 -0
  77. moonshot/integrations/web_api/services/benchmark_test_manager.py +106 -0
  78. moonshot/integrations/web_api/services/benchmark_test_state.py +56 -0
  79. moonshot/integrations/web_api/services/benchmarking_service.py +31 -0
  80. moonshot/integrations/web_api/services/context_strategy_service.py +22 -0
  81. moonshot/integrations/web_api/services/cookbook_service.py +194 -0
  82. moonshot/integrations/web_api/services/dataset_service.py +20 -0
  83. moonshot/integrations/web_api/services/endpoint_service.py +65 -0
  84. moonshot/integrations/web_api/services/metric_service.py +14 -0
  85. moonshot/integrations/web_api/services/prompt_template_service.py +39 -0
  86. moonshot/integrations/web_api/services/recipe_service.py +155 -0
  87. moonshot/integrations/web_api/services/runner_service.py +147 -0
  88. moonshot/integrations/web_api/services/session_service.py +350 -0
  89. moonshot/integrations/web_api/services/utils/exceptions_handler.py +41 -0
  90. moonshot/integrations/web_api/services/utils/results_formatter.py +47 -0
  91. moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +14 -0
  92. moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +14 -0
  93. moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +72 -0
  94. moonshot/integrations/web_api/types/types.py +99 -0
  95. moonshot/src/__init__.py +0 -0
  96. moonshot/src/api/__init__.py +0 -0
  97. moonshot/src/api/api_connector.py +58 -0
  98. moonshot/src/api/api_connector_endpoint.py +162 -0
  99. moonshot/src/api/api_context_strategy.py +57 -0
  100. moonshot/src/api/api_cookbook.py +160 -0
  101. moonshot/src/api/api_dataset.py +46 -0
  102. moonshot/src/api/api_environment_variables.py +17 -0
  103. moonshot/src/api/api_metrics.py +51 -0
  104. moonshot/src/api/api_prompt_template.py +43 -0
  105. moonshot/src/api/api_recipe.py +182 -0
  106. moonshot/src/api/api_red_teaming.py +59 -0
  107. moonshot/src/api/api_result.py +84 -0
  108. moonshot/src/api/api_run.py +74 -0
  109. moonshot/src/api/api_runner.py +132 -0
  110. moonshot/src/api/api_session.py +290 -0
  111. moonshot/src/configs/__init__.py +0 -0
  112. moonshot/src/configs/env_variables.py +187 -0
  113. moonshot/src/connectors/__init__.py +0 -0
  114. moonshot/src/connectors/connector.py +327 -0
  115. moonshot/src/connectors/connector_prompt_arguments.py +17 -0
  116. moonshot/src/connectors_endpoints/__init__.py +0 -0
  117. moonshot/src/connectors_endpoints/connector_endpoint.py +211 -0
  118. moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +54 -0
  119. moonshot/src/cookbooks/__init__.py +0 -0
  120. moonshot/src/cookbooks/cookbook.py +225 -0
  121. moonshot/src/cookbooks/cookbook_arguments.py +34 -0
  122. moonshot/src/datasets/__init__.py +0 -0
  123. moonshot/src/datasets/dataset.py +255 -0
  124. moonshot/src/datasets/dataset_arguments.py +50 -0
  125. moonshot/src/metrics/__init__.py +0 -0
  126. moonshot/src/metrics/metric.py +192 -0
  127. moonshot/src/metrics/metric_interface.py +95 -0
  128. moonshot/src/prompt_templates/__init__.py +0 -0
  129. moonshot/src/prompt_templates/prompt_template.py +103 -0
  130. moonshot/src/recipes/__init__.py +0 -0
  131. moonshot/src/recipes/recipe.py +340 -0
  132. moonshot/src/recipes/recipe_arguments.py +111 -0
  133. moonshot/src/redteaming/__init__.py +0 -0
  134. moonshot/src/redteaming/attack/__init__.py +0 -0
  135. moonshot/src/redteaming/attack/attack_module.py +618 -0
  136. moonshot/src/redteaming/attack/attack_module_arguments.py +44 -0
  137. moonshot/src/redteaming/attack/context_strategy.py +131 -0
  138. moonshot/src/redteaming/context_strategy/__init__.py +0 -0
  139. moonshot/src/redteaming/context_strategy/context_strategy_interface.py +46 -0
  140. moonshot/src/redteaming/session/__init__.py +0 -0
  141. moonshot/src/redteaming/session/chat.py +209 -0
  142. moonshot/src/redteaming/session/red_teaming_progress.py +128 -0
  143. moonshot/src/redteaming/session/red_teaming_type.py +6 -0
  144. moonshot/src/redteaming/session/session.py +775 -0
  145. moonshot/src/results/__init__.py +0 -0
  146. moonshot/src/results/result.py +119 -0
  147. moonshot/src/results/result_arguments.py +44 -0
  148. moonshot/src/runners/__init__.py +0 -0
  149. moonshot/src/runners/runner.py +476 -0
  150. moonshot/src/runners/runner_arguments.py +46 -0
  151. moonshot/src/runners/runner_type.py +6 -0
  152. moonshot/src/runs/__init__.py +0 -0
  153. moonshot/src/runs/run.py +344 -0
  154. moonshot/src/runs/run_arguments.py +162 -0
  155. moonshot/src/runs/run_progress.py +145 -0
  156. moonshot/src/runs/run_status.py +10 -0
  157. moonshot/src/storage/__init__.py +0 -0
  158. moonshot/src/storage/db_interface.py +128 -0
  159. moonshot/src/storage/io_interface.py +31 -0
  160. moonshot/src/storage/storage.py +525 -0
  161. moonshot/src/utils/__init__.py +0 -0
  162. moonshot/src/utils/import_modules.py +96 -0
  163. moonshot/src/utils/timeit.py +25 -0
@@ -0,0 +1,598 @@
1
+ import asyncio
2
+ from ast import literal_eval
3
+
4
+ import cmd2
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+ from slugify import slugify
8
+
9
+ from moonshot.api import (
10
+ api_create_recipe,
11
+ api_create_runner,
12
+ api_delete_recipe,
13
+ api_get_all_recipe,
14
+ api_get_all_run,
15
+ api_get_all_runner_name,
16
+ api_load_runner,
17
+ api_read_recipe,
18
+ api_update_recipe,
19
+ )
20
+ from moonshot.integrations.cli.common.display_helper import display_view_list_format
21
+
22
+ console = Console()
23
+
24
+
25
+ # ------------------------------------------------------------------------------
26
+ # CLI Functions
27
+ # ------------------------------------------------------------------------------
28
+ def add_recipe(args) -> None:
29
+ """
30
+ Add a new recipe.
31
+
32
+ This function creates a new recipe by parsing the arguments provided and then calling the api_create_recipe
33
+ function from the moonshot.api module.
34
+
35
+ It expects the arguments to be strings that can be evaluated into Python data structures using literal_eval.
36
+
37
+ Args:
38
+ args (argparse.Namespace): The arguments provided to the command line interface.
39
+ Expected keys are name, description, tags, categories, dataset, prompt_templates, metrics, attack_modules,
40
+ and grading_scale.
41
+
42
+ Returns:
43
+ None
44
+
45
+ Raises:
46
+ Exception: If there is an error during the creation of the recipe or the arguments cannot be evaluated.
47
+ """
48
+ try:
49
+ tags = literal_eval(args.tags) if args.tags else []
50
+ categories = literal_eval(args.categories)
51
+ datasets = literal_eval(args.datasets)
52
+ prompt_templates = (
53
+ literal_eval(args.prompt_templates) if args.prompt_templates else []
54
+ )
55
+ metrics = literal_eval(args.metrics)
56
+ attack_modules = (
57
+ literal_eval(args.attack_modules) if args.attack_modules else []
58
+ )
59
+ grading_scale = literal_eval(args.grading_scale) if args.grading_scale else {}
60
+
61
+ new_recipe_id = api_create_recipe(
62
+ args.name,
63
+ args.description,
64
+ tags,
65
+ categories,
66
+ datasets,
67
+ prompt_templates,
68
+ metrics,
69
+ attack_modules,
70
+ grading_scale,
71
+ )
72
+ print(f"[add_recipe]: Recipe ({new_recipe_id}) created.")
73
+ except Exception as e:
74
+ print(f"[add_recipe]: {str(e)}")
75
+
76
+
77
+ def list_recipes() -> None:
78
+ """
79
+ List all available recipes.
80
+
81
+ This function retrieves all available recipes by calling the api_get_all_recipe function from the
82
+ moonshot.api module.
83
+ It then displays the retrieved recipes using the display_recipes function.
84
+
85
+ Returns:
86
+ None
87
+ """
88
+ try:
89
+ recipes_list = api_get_all_recipe()
90
+ display_recipes(recipes_list)
91
+ except Exception as e:
92
+ print(f"[list_recipes]: {str(e)}")
93
+
94
+
95
+ def view_recipe(args) -> None:
96
+ """
97
+ View a specific recipe.
98
+
99
+ This function retrieves a specific recipe by calling the api_read_recipe function from the
100
+ moonshot.api module using the recipe name provided in the args.
101
+ It then displays the retrieved recipe using the display_view_recipe function.
102
+
103
+ Args:
104
+ args: A namespace object from argparse. It should have the following attribute:
105
+ recipe (str): The id of the recipe to view.
106
+
107
+ Returns:
108
+ None
109
+ """
110
+ try:
111
+ recipe_info = api_read_recipe(args.recipe)
112
+ display_recipes([recipe_info])
113
+ except Exception as e:
114
+ print(f"[view_recipe]: {str(e)}")
115
+
116
+
117
+ def run_recipe(args) -> None:
118
+ """
119
+ Execute a recipe with the specified parameters.
120
+
121
+ This function runs a recipe runner with the given name, recipes, endpoints, and other parameters.
122
+ It checks if the runner with the specified name already exists, and if not, it creates a new one.
123
+ The recipes are run against the specified endpoints, and the results are processed and displayed.
124
+
125
+ Args:
126
+ args: A namespace object from argparse. It should have the following attributes:
127
+ name (str): The name of the recipe runner.
128
+ recipes (str): A string representation of a list of recipes to run.
129
+ endpoints (str): A string representation of a list of endpoints to run.
130
+ num_of_prompts (int): The number of prompts to run.
131
+ random_seed (int): The random seed number for reproducibility.
132
+ system_prompt (str): The system prompt to use.
133
+ runner_proc_module (str): The runner processing module to use.
134
+ result_proc_module (str): The result processing module to use.
135
+
136
+ Returns:
137
+ None
138
+ """
139
+ try:
140
+ name = args.name
141
+ recipes = literal_eval(args.recipes)
142
+ endpoints = literal_eval(args.endpoints)
143
+ num_of_prompts = args.num_of_prompts
144
+ random_seed = args.random_seed
145
+ system_prompt = args.system_prompt
146
+ runner_proc_module = args.runner_proc_module
147
+ result_proc_module = args.result_proc_module
148
+
149
+ # Run the recipes with the defined endpoints
150
+ slugify_id = slugify(name, lowercase=True)
151
+ if slugify_id in api_get_all_runner_name():
152
+ rec_runner = api_load_runner(slugify_id)
153
+ else:
154
+ rec_runner = api_create_runner(name, endpoints)
155
+
156
+ loop = asyncio.get_event_loop()
157
+ loop.run_until_complete(
158
+ rec_runner.run_recipes(
159
+ recipes,
160
+ num_of_prompts,
161
+ random_seed,
162
+ system_prompt,
163
+ runner_proc_module,
164
+ result_proc_module,
165
+ )
166
+ )
167
+ rec_runner.close()
168
+
169
+ # Display results
170
+ runner_runs = api_get_all_run(rec_runner.id)
171
+ result_info = runner_runs[-1].get("results")
172
+ if result_info:
173
+ show_recipe_results(
174
+ recipes, endpoints, result_info, result_info["metadata"]["duration"]
175
+ )
176
+ else:
177
+ raise RuntimeError("no run result generated")
178
+ except Exception as e:
179
+ print(f"[run_recipe]: {str(e)}")
180
+
181
+
182
+ def update_recipe(args) -> None:
183
+ """
184
+ Update a specific recipe.
185
+
186
+ This function updates a specific recipe by calling the api_update_recipe function from the
187
+ moonshot.api module using the recipe name and update values provided in the args.
188
+
189
+ Args:
190
+ args: A namespace object from argparse. It should have the following attributes:
191
+ recipe (str): The id of the recipe to update.
192
+ update_values (str): A string representation of a list of tuples. Each tuple contains a key
193
+ and a value to update in the recipe.
194
+
195
+ Returns:
196
+ None
197
+ """
198
+ try:
199
+ recipe = args.recipe
200
+ update_values = dict(literal_eval(args.update_values))
201
+ api_update_recipe(recipe, **update_values)
202
+ print("[update_recipe]: Recipe updated.")
203
+ except Exception as e:
204
+ print(f"[update_recipe]: {str(e)}")
205
+
206
+
207
+ def delete_recipe(args) -> None:
208
+ """
209
+ Delete a recipe.
210
+
211
+ This function deletes a recipe with the specified identifier. It prompts the user for confirmation before proceeding
212
+ with the deletion. If the user confirms, it calls the api_delete_recipe function from the moonshot.api module to
213
+ delete the recipe. If the deletion is successful, it prints a confirmation message. If an exception occurs, it
214
+ prints an error message.
215
+
216
+ Args:
217
+ args: A namespace object from argparse. It should have the following attribute:
218
+ recipe (str): The identifier of the recipe to delete.
219
+
220
+ Returns:
221
+ None
222
+ """
223
+ # Confirm with the user before deleting a recipe
224
+ confirmation = console.input(
225
+ "[bold red]Are you sure you want to delete the recipe (y/N)? [/]"
226
+ )
227
+ if confirmation.lower() != "y":
228
+ console.print("[bold yellow]Recipe deletion cancelled.[/]")
229
+ return
230
+ try:
231
+ api_delete_recipe(args.recipe)
232
+ print("[delete_recipe]: Recipe deleted.")
233
+ except Exception as e:
234
+ print(f"[delete_recipe]: {str(e)}")
235
+
236
+
237
+ # ------------------------------------------------------------------------------
238
+ # Helper functions: Display on cli
239
+ # ------------------------------------------------------------------------------
240
+ def display_view_grading_scale_format(title: str, grading_scale: dict) -> str:
241
+ """
242
+ Format the grading scale for display.
243
+
244
+ This function takes a title and a grading scale dictionary and formats them into a string suitable for display.
245
+ The grading scale dictionary is expected to have grade levels as keys and tuples representing the range as values.
246
+ If the grading scale is empty, it returns the title with 'nil'.
247
+
248
+ Args:
249
+ title (str): The title to display above the grading scale.
250
+ grading_scale (dict): A dictionary with grade levels as keys and range tuples as values.
251
+
252
+ Returns:
253
+ str: The formatted grading scale as a string.
254
+ """
255
+ if grading_scale:
256
+ formatted_grades = "\n".join(
257
+ f"{i + 1}. {grade} [{range_[0]} - {range_[1]}]"
258
+ for i, (grade, range_) in enumerate(grading_scale.items())
259
+ )
260
+ return f"[blue]{title}[/blue]:\n{formatted_grades}"
261
+ else:
262
+ return f"[blue]{title}[/blue]: nil"
263
+
264
+
265
+ def display_view_statistics_format(title: str, stats: dict) -> str:
266
+ """
267
+ Format the statistics for display.
268
+
269
+ This function takes a title and a statistics dictionary and formats them into a string suitable for display.
270
+ The statistics dictionary is expected to have various statistics as keys and their counts or sub-statistics
271
+ as values.
272
+
273
+ If the statistics dictionary is empty, it returns the title with 'nil'.
274
+
275
+ Args:
276
+ title (str): The title to display above the statistics.
277
+ stats (dict): A dictionary with various statistics as keys and their counts or sub-statistics as values.
278
+
279
+ Returns:
280
+ str: The formatted statistics as a string.
281
+ """
282
+ if stats:
283
+ formatted_stats = []
284
+ for i, (stat, value) in enumerate(stats.items(), start=1):
285
+ if isinstance(value, dict):
286
+ sub_stats = "\n".join(
287
+ f" {sub_key}: {sub_value}"
288
+ for sub_key, sub_value in value.items()
289
+ )
290
+ formatted_stats.append(f"{i}. {stat}:\n{sub_stats}")
291
+ else:
292
+ formatted_stats.append(f"{i}. {stat}: {value}")
293
+ return f"[blue]{title}[/blue]:\n" + "\n".join(formatted_stats)
294
+ else:
295
+ return f"[blue]{title}[/blue]: nil"
296
+
297
+
298
+ def display_recipes(recipes_list: list) -> None:
299
+ """
300
+ Display the list of recipes in a tabular format.
301
+
302
+ This function takes a list of recipe dictionaries and displays each recipe's details in a table.
303
+ The table includes the recipe's ID, name, description, and associated details such as tags, categories,
304
+ datasets, prompt templates, metrics, attack strategies, grading scale, and statistics. If the list is empty,
305
+ it prints a message indicating that no recipes are found.
306
+
307
+ Args:
308
+ recipes_list (list): A list of dictionaries, where each dictionary contains the details of a recipe.
309
+ """
310
+ if recipes_list:
311
+ table = Table(
312
+ title="List of Recipes", show_lines=True, expand=True, header_style="bold"
313
+ )
314
+ table.add_column("No.", width=2)
315
+ table.add_column("Recipe", justify="left", width=78)
316
+ table.add_column("Contains", justify="left", width=20, overflow="fold")
317
+ for recipe_id, recipe in enumerate(recipes_list, 1):
318
+ (
319
+ id,
320
+ name,
321
+ description,
322
+ tags,
323
+ categories,
324
+ datasets,
325
+ prompt_templates,
326
+ metrics,
327
+ attack_strategies,
328
+ grading_scale,
329
+ stats,
330
+ ) = recipe.values()
331
+
332
+ tags_info = display_view_list_format("Tags", tags)
333
+ categories_info = display_view_list_format("Categories", categories)
334
+ datasets_info = display_view_list_format("Datasets", datasets)
335
+ prompt_templates_info = display_view_list_format(
336
+ "Prompt Templates", prompt_templates
337
+ )
338
+ metrics_info = display_view_list_format("Metrics", metrics)
339
+ attack_strategies_info = display_view_list_format(
340
+ "Attack Strategies", attack_strategies
341
+ )
342
+ grading_scale_info = display_view_grading_scale_format(
343
+ "Grading Scale", grading_scale
344
+ )
345
+ stats_info = display_view_statistics_format("Statistics", stats)
346
+
347
+ recipe_info = (
348
+ f"[red]id: {id}[/red]\n\n[blue]{name}[/blue]\n{description}\n\n"
349
+ f"{tags_info}\n\n{categories_info}\n\n{grading_scale_info}\n\n{stats_info}"
350
+ )
351
+ contains_info = f"{datasets_info}\n\n{prompt_templates_info}\n\n{metrics_info}\n\n{attack_strategies_info}"
352
+
353
+ table.add_section()
354
+ table.add_row(str(recipe_id), recipe_info, contains_info)
355
+ console.print(table)
356
+ else:
357
+ console.print("[red]There are no recipes found.[/red]")
358
+
359
+
360
+ def show_recipe_results(recipes, endpoints, recipe_results, duration):
361
+ """
362
+ Show the results of the recipe benchmarking.
363
+
364
+ This function takes the recipes, endpoints, recipe results, results file, and duration as arguments.
365
+ If there are any recipe results, it generates a table to display them using the generate_recipe_table function.
366
+ It also prints the location of the results file and the time taken to run the benchmarking.
367
+ If there are no recipe results, it prints a message indicating that there are no results.
368
+
369
+ Args:
370
+ recipes (list): A list of recipes that were benchmarked.
371
+ endpoints (list): A list of endpoints that were used in the benchmarking.
372
+ recipe_results (dict): A dictionary with the results of the recipe benchmarking.
373
+ duration (float): The time taken to run the benchmarking in seconds.
374
+
375
+ Returns:
376
+ None
377
+ """
378
+ if recipe_results:
379
+ # Display recipe results
380
+ generate_recipe_table(recipes, endpoints, recipe_results)
381
+ else:
382
+ console.print("[red]There are no results.[/red]")
383
+
384
+ # Print run stats
385
+ console.print(
386
+ f"{'='*50}\n[blue]Time taken to run: {duration}s[/blue]\n*Overall rating will be the lowest grade that the recipes have in each cookbook\n{'='*50}"
387
+ )
388
+
389
+
390
+ def generate_recipe_table(recipes: list, endpoints: list, results: dict) -> None:
391
+ """
392
+ Generate and display a table of recipe results.
393
+
394
+ This function creates a table that lists the results of running recipes against various endpoints.
395
+ Each row in the table corresponds to a recipe, and each column corresponds to an endpoint.
396
+ The results include the grade and average grade value for each recipe-endpoint pair.
397
+
398
+ Args:
399
+ recipes (list): A list of recipe IDs that were benchmarked.
400
+ endpoints (list): A list of endpoint IDs against which the recipes were run.
401
+ results (dict): A dictionary containing the results of the benchmarking.
402
+
403
+ Returns:
404
+ None: This function does not return anything. It prints the table to the console.
405
+ """
406
+ # Create a table with a title and headers
407
+ table = Table(
408
+ title="Recipes Result", show_lines=True, expand=True, header_style="bold"
409
+ )
410
+ table.add_column("No.", width=2)
411
+ table.add_column("Recipe", justify="left", width=78)
412
+ # Add a column for each endpoint
413
+ for endpoint in endpoints:
414
+ table.add_column(endpoint, justify="center")
415
+
416
+ # Iterate over each recipe and populate the table with results
417
+ for index, recipe_id in enumerate(recipes, start=1):
418
+ # Attempt to find the result for the current recipe
419
+ recipe_result = next(
420
+ (
421
+ result
422
+ for result in results["results"]["recipes"]
423
+ if result["id"] == recipe_id
424
+ ),
425
+ None,
426
+ )
427
+
428
+ # If the result exists, extract and format the results for each endpoint
429
+ if recipe_result:
430
+ endpoint_results = []
431
+ for endpoint in endpoints:
432
+ # Find the evaluation summary for the endpoint
433
+ evaluation_summary = next(
434
+ (
435
+ eval_summary
436
+ for eval_summary in recipe_result["evaluation_summary"]
437
+ if eval_summary["model_id"] == endpoint
438
+ ),
439
+ None,
440
+ )
441
+
442
+ # Format the grade and average grade value, or use "-" if not found
443
+ grade = "-"
444
+ if (
445
+ evaluation_summary
446
+ and "grade" in evaluation_summary
447
+ and "avg_grade_value" in evaluation_summary
448
+ and evaluation_summary["grade"]
449
+ ):
450
+ grade = f"{evaluation_summary['grade']} [{evaluation_summary['avg_grade_value']}]"
451
+ endpoint_results.append(grade)
452
+
453
+ # Add a row for the recipe with its results
454
+ table.add_row(
455
+ str(index),
456
+ f"Recipe: [blue]{recipe_result['id']}[/blue]",
457
+ *endpoint_results,
458
+ end_section=True,
459
+ )
460
+ else:
461
+ # If no result is found, add a row with placeholders
462
+ table.add_row(
463
+ str(index),
464
+ f"Recipe: [blue]{recipe_id}[/blue]",
465
+ *(["-"] * len(endpoints)),
466
+ end_section=True,
467
+ )
468
+
469
+ # Print the table to the console
470
+ console.print(table)
471
+
472
+
473
+ # ------------------------------------------------------------------------------
474
+ # Cmd2 Arguments Parsers
475
+ # ------------------------------------------------------------------------------
476
+ # Add recipe arguments
477
+ add_recipe_args = cmd2.Cmd2ArgumentParser(
478
+ description="Add a new recipe. The 'name' argument will be slugified to create a unique identifier.",
479
+ epilog="Example:\n add_recipe 'My new recipe' "
480
+ "'I am recipe description' "
481
+ "\"['category1','category2']\" "
482
+ "\"['bbq-lite-age-ambiguous']\" "
483
+ "\"['bertscore','bleuscore']\" "
484
+ "-p \"['analogical-similarity','mmlu']\" "
485
+ "-t \"['tag1','tag2']\" "
486
+ "-a \"['charswap_attack']\" "
487
+ "-g \"{'A':[80,100],'B':[60,79],'C':[40,59],'D':[20,39],'E':[0,19]}\" ",
488
+ )
489
+ add_recipe_args.add_argument("name", type=str, help="Name of the new recipe")
490
+ add_recipe_args.add_argument(
491
+ "description", type=str, help="Description of the new recipe"
492
+ )
493
+ add_recipe_args.add_argument(
494
+ "-t",
495
+ "--tags",
496
+ type=str,
497
+ help="List of tags to be included in the new recipe",
498
+ nargs="?",
499
+ )
500
+ add_recipe_args.add_argument(
501
+ "categories", type=str, help="List of tags to be included in the new recipe"
502
+ )
503
+ add_recipe_args.add_argument("datasets", type=str, help="The dataset to be used")
504
+ add_recipe_args.add_argument(
505
+ "-p",
506
+ "--prompt_templates",
507
+ type=str,
508
+ help="List of prompt templates to be included in the new recipe",
509
+ nargs="?",
510
+ )
511
+ add_recipe_args.add_argument(
512
+ "metrics", type=str, help="List of metrics to be included in the new recipe"
513
+ )
514
+ add_recipe_args.add_argument(
515
+ "-a",
516
+ "--attack_modules",
517
+ type=str,
518
+ help="List of attack modules to be included in the new recipe",
519
+ nargs="?",
520
+ )
521
+ add_recipe_args.add_argument(
522
+ "-g",
523
+ "--grading_scale",
524
+ type=str,
525
+ help="Dict of grading scale for the metric to be included in the new recipe",
526
+ nargs="?",
527
+ )
528
+
529
+ # Update recipe arguments
530
+ update_recipe_args = cmd2.Cmd2ArgumentParser(
531
+ description="Update a recipe.",
532
+ epilog="Available keys for updating a recipe: \n"
533
+ " name: The name of the recipe. \n"
534
+ " description: The description of the recipe. \n"
535
+ " tags: A list of tags associated with the recipe. \n"
536
+ " categories: A list of categories used in the recipe. \n"
537
+ " datasets: A list of datasets used in the recipe. \n"
538
+ " prompt_templates: A list of prompt templates for the recipe. \n"
539
+ " metrics: A list of metrics to evaluate the recipe. \n"
540
+ " attack_modules: A list of attack modules used in the recipe.\n"
541
+ " grading_scale: A list of grading scale used in the recipe. \n\n"
542
+ "Example command:\n"
543
+ " update_recipe my-new-recipe \"[('name', 'My Updated Recipe'), ('tags', ['fairness', 'bbq'])]\" ",
544
+ )
545
+ update_recipe_args.add_argument("recipe", type=str, help="Id of the recipe")
546
+ update_recipe_args.add_argument(
547
+ "update_values", type=str, help="Update recipe key/value"
548
+ )
549
+
550
+ # View recipe arguments
551
+ view_recipe_args = cmd2.Cmd2ArgumentParser(
552
+ description="View a recipe.",
553
+ epilog="Example:\n view_recipe my-new-recipe",
554
+ )
555
+ view_recipe_args.add_argument("recipe", type=str, help="Id of the recipe")
556
+
557
+ # Delete recipe arguments
558
+ delete_recipe_args = cmd2.Cmd2ArgumentParser(
559
+ description="Delete a recipe.",
560
+ epilog="Example:\n delete_recipe my-new-recipe",
561
+ )
562
+ delete_recipe_args.add_argument("recipe", type=str, help="Id of the recipe")
563
+
564
+ # Run recipe arguments
565
+ run_recipe_args = cmd2.Cmd2ArgumentParser(
566
+ description="Run a recipe.",
567
+ epilog="Example:\n run_recipe "
568
+ '"my new recipe runner" '
569
+ "\"['bbq','mmlu']\" "
570
+ "\"['openai-gpt35-turbo']\" "
571
+ '-n 1 -r 1 -s "You are an intelligent AI" ',
572
+ )
573
+ run_recipe_args.add_argument("name", type=str, help="Name of recipe runner")
574
+ run_recipe_args.add_argument("recipes", type=str, help="List of recipes to run")
575
+ run_recipe_args.add_argument("endpoints", type=str, help="List of endpoints to run")
576
+ run_recipe_args.add_argument(
577
+ "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
578
+ )
579
+ run_recipe_args.add_argument(
580
+ "-r", "--random_seed", type=int, default=0, help="Random seed number"
581
+ )
582
+ run_recipe_args.add_argument(
583
+ "-s", "--system_prompt", type=str, default="", help="System Prompt to use"
584
+ )
585
+ run_recipe_args.add_argument(
586
+ "-l",
587
+ "--runner_proc_module",
588
+ type=str,
589
+ default="benchmarking",
590
+ help="Runner processing module to use",
591
+ )
592
+ run_recipe_args.add_argument(
593
+ "-o",
594
+ "--result_proc_module",
595
+ type=str,
596
+ default="benchmarking-result",
597
+ help="Result processing module to use",
598
+ )