aiverify-moonshot 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. aiverify_moonshot-0.4.0.dist-info/METADATA +249 -0
  2. aiverify_moonshot-0.4.0.dist-info/RECORD +163 -0
  3. aiverify_moonshot-0.4.0.dist-info/WHEEL +4 -0
  4. aiverify_moonshot-0.4.0.dist-info/licenses/AUTHORS.md +5 -0
  5. aiverify_moonshot-0.4.0.dist-info/licenses/LICENSE.md +201 -0
  6. aiverify_moonshot-0.4.0.dist-info/licenses/NOTICES.md +3340 -0
  7. moonshot/__init__.py +0 -0
  8. moonshot/__main__.py +198 -0
  9. moonshot/api.py +155 -0
  10. moonshot/integrations/__init__.py +0 -0
  11. moonshot/integrations/cli/__init__.py +0 -0
  12. moonshot/integrations/cli/__main__.py +25 -0
  13. moonshot/integrations/cli/active_session_cfg.py +1 -0
  14. moonshot/integrations/cli/benchmark/__init__.py +0 -0
  15. moonshot/integrations/cli/benchmark/benchmark.py +186 -0
  16. moonshot/integrations/cli/benchmark/cookbook.py +545 -0
  17. moonshot/integrations/cli/benchmark/datasets.py +164 -0
  18. moonshot/integrations/cli/benchmark/metrics.py +141 -0
  19. moonshot/integrations/cli/benchmark/recipe.py +598 -0
  20. moonshot/integrations/cli/benchmark/result.py +216 -0
  21. moonshot/integrations/cli/benchmark/run.py +140 -0
  22. moonshot/integrations/cli/benchmark/runner.py +174 -0
  23. moonshot/integrations/cli/cli.py +64 -0
  24. moonshot/integrations/cli/common/__init__.py +0 -0
  25. moonshot/integrations/cli/common/common.py +72 -0
  26. moonshot/integrations/cli/common/connectors.py +325 -0
  27. moonshot/integrations/cli/common/display_helper.py +42 -0
  28. moonshot/integrations/cli/common/prompt_template.py +94 -0
  29. moonshot/integrations/cli/initialisation/__init__.py +0 -0
  30. moonshot/integrations/cli/initialisation/initialisation.py +14 -0
  31. moonshot/integrations/cli/redteam/__init__.py +0 -0
  32. moonshot/integrations/cli/redteam/attack_module.py +70 -0
  33. moonshot/integrations/cli/redteam/context_strategy.py +147 -0
  34. moonshot/integrations/cli/redteam/prompt_template.py +67 -0
  35. moonshot/integrations/cli/redteam/redteam.py +90 -0
  36. moonshot/integrations/cli/redteam/session.py +467 -0
  37. moonshot/integrations/web_api/.env.dev +7 -0
  38. moonshot/integrations/web_api/__init__.py +0 -0
  39. moonshot/integrations/web_api/__main__.py +56 -0
  40. moonshot/integrations/web_api/app.py +125 -0
  41. moonshot/integrations/web_api/container.py +146 -0
  42. moonshot/integrations/web_api/log/.gitkeep +0 -0
  43. moonshot/integrations/web_api/logging_conf.py +114 -0
  44. moonshot/integrations/web_api/routes/__init__.py +0 -0
  45. moonshot/integrations/web_api/routes/attack_modules.py +66 -0
  46. moonshot/integrations/web_api/routes/benchmark.py +116 -0
  47. moonshot/integrations/web_api/routes/benchmark_result.py +175 -0
  48. moonshot/integrations/web_api/routes/context_strategy.py +129 -0
  49. moonshot/integrations/web_api/routes/cookbook.py +225 -0
  50. moonshot/integrations/web_api/routes/dataset.py +120 -0
  51. moonshot/integrations/web_api/routes/endpoint.py +282 -0
  52. moonshot/integrations/web_api/routes/metric.py +78 -0
  53. moonshot/integrations/web_api/routes/prompt_template.py +128 -0
  54. moonshot/integrations/web_api/routes/recipe.py +219 -0
  55. moonshot/integrations/web_api/routes/redteam.py +609 -0
  56. moonshot/integrations/web_api/routes/runner.py +239 -0
  57. moonshot/integrations/web_api/schemas/__init__.py +0 -0
  58. moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +13 -0
  59. moonshot/integrations/web_api/schemas/cookbook_create_dto.py +19 -0
  60. moonshot/integrations/web_api/schemas/cookbook_response_model.py +9 -0
  61. moonshot/integrations/web_api/schemas/dataset_response_dto.py +9 -0
  62. moonshot/integrations/web_api/schemas/endpoint_create_dto.py +21 -0
  63. moonshot/integrations/web_api/schemas/endpoint_response_model.py +11 -0
  64. moonshot/integrations/web_api/schemas/prompt_response_model.py +14 -0
  65. moonshot/integrations/web_api/schemas/prompt_template_response_model.py +10 -0
  66. moonshot/integrations/web_api/schemas/recipe_create_dto.py +32 -0
  67. moonshot/integrations/web_api/schemas/recipe_response_model.py +7 -0
  68. moonshot/integrations/web_api/schemas/session_create_dto.py +16 -0
  69. moonshot/integrations/web_api/schemas/session_prompt_dto.py +7 -0
  70. moonshot/integrations/web_api/schemas/session_response_model.py +38 -0
  71. moonshot/integrations/web_api/services/__init__.py +0 -0
  72. moonshot/integrations/web_api/services/attack_module_service.py +34 -0
  73. moonshot/integrations/web_api/services/auto_red_team_test_manager.py +86 -0
  74. moonshot/integrations/web_api/services/auto_red_team_test_state.py +57 -0
  75. moonshot/integrations/web_api/services/base_service.py +8 -0
  76. moonshot/integrations/web_api/services/benchmark_result_service.py +25 -0
  77. moonshot/integrations/web_api/services/benchmark_test_manager.py +106 -0
  78. moonshot/integrations/web_api/services/benchmark_test_state.py +56 -0
  79. moonshot/integrations/web_api/services/benchmarking_service.py +31 -0
  80. moonshot/integrations/web_api/services/context_strategy_service.py +22 -0
  81. moonshot/integrations/web_api/services/cookbook_service.py +194 -0
  82. moonshot/integrations/web_api/services/dataset_service.py +20 -0
  83. moonshot/integrations/web_api/services/endpoint_service.py +65 -0
  84. moonshot/integrations/web_api/services/metric_service.py +14 -0
  85. moonshot/integrations/web_api/services/prompt_template_service.py +39 -0
  86. moonshot/integrations/web_api/services/recipe_service.py +155 -0
  87. moonshot/integrations/web_api/services/runner_service.py +147 -0
  88. moonshot/integrations/web_api/services/session_service.py +350 -0
  89. moonshot/integrations/web_api/services/utils/exceptions_handler.py +41 -0
  90. moonshot/integrations/web_api/services/utils/results_formatter.py +47 -0
  91. moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +14 -0
  92. moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +14 -0
  93. moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py +72 -0
  94. moonshot/integrations/web_api/types/types.py +99 -0
  95. moonshot/src/__init__.py +0 -0
  96. moonshot/src/api/__init__.py +0 -0
  97. moonshot/src/api/api_connector.py +58 -0
  98. moonshot/src/api/api_connector_endpoint.py +162 -0
  99. moonshot/src/api/api_context_strategy.py +57 -0
  100. moonshot/src/api/api_cookbook.py +160 -0
  101. moonshot/src/api/api_dataset.py +46 -0
  102. moonshot/src/api/api_environment_variables.py +17 -0
  103. moonshot/src/api/api_metrics.py +51 -0
  104. moonshot/src/api/api_prompt_template.py +43 -0
  105. moonshot/src/api/api_recipe.py +182 -0
  106. moonshot/src/api/api_red_teaming.py +59 -0
  107. moonshot/src/api/api_result.py +84 -0
  108. moonshot/src/api/api_run.py +74 -0
  109. moonshot/src/api/api_runner.py +132 -0
  110. moonshot/src/api/api_session.py +290 -0
  111. moonshot/src/configs/__init__.py +0 -0
  112. moonshot/src/configs/env_variables.py +187 -0
  113. moonshot/src/connectors/__init__.py +0 -0
  114. moonshot/src/connectors/connector.py +327 -0
  115. moonshot/src/connectors/connector_prompt_arguments.py +17 -0
  116. moonshot/src/connectors_endpoints/__init__.py +0 -0
  117. moonshot/src/connectors_endpoints/connector_endpoint.py +211 -0
  118. moonshot/src/connectors_endpoints/connector_endpoint_arguments.py +54 -0
  119. moonshot/src/cookbooks/__init__.py +0 -0
  120. moonshot/src/cookbooks/cookbook.py +225 -0
  121. moonshot/src/cookbooks/cookbook_arguments.py +34 -0
  122. moonshot/src/datasets/__init__.py +0 -0
  123. moonshot/src/datasets/dataset.py +255 -0
  124. moonshot/src/datasets/dataset_arguments.py +50 -0
  125. moonshot/src/metrics/__init__.py +0 -0
  126. moonshot/src/metrics/metric.py +192 -0
  127. moonshot/src/metrics/metric_interface.py +95 -0
  128. moonshot/src/prompt_templates/__init__.py +0 -0
  129. moonshot/src/prompt_templates/prompt_template.py +103 -0
  130. moonshot/src/recipes/__init__.py +0 -0
  131. moonshot/src/recipes/recipe.py +340 -0
  132. moonshot/src/recipes/recipe_arguments.py +111 -0
  133. moonshot/src/redteaming/__init__.py +0 -0
  134. moonshot/src/redteaming/attack/__init__.py +0 -0
  135. moonshot/src/redteaming/attack/attack_module.py +618 -0
  136. moonshot/src/redteaming/attack/attack_module_arguments.py +44 -0
  137. moonshot/src/redteaming/attack/context_strategy.py +131 -0
  138. moonshot/src/redteaming/context_strategy/__init__.py +0 -0
  139. moonshot/src/redteaming/context_strategy/context_strategy_interface.py +46 -0
  140. moonshot/src/redteaming/session/__init__.py +0 -0
  141. moonshot/src/redteaming/session/chat.py +209 -0
  142. moonshot/src/redteaming/session/red_teaming_progress.py +128 -0
  143. moonshot/src/redteaming/session/red_teaming_type.py +6 -0
  144. moonshot/src/redteaming/session/session.py +775 -0
  145. moonshot/src/results/__init__.py +0 -0
  146. moonshot/src/results/result.py +119 -0
  147. moonshot/src/results/result_arguments.py +44 -0
  148. moonshot/src/runners/__init__.py +0 -0
  149. moonshot/src/runners/runner.py +476 -0
  150. moonshot/src/runners/runner_arguments.py +46 -0
  151. moonshot/src/runners/runner_type.py +6 -0
  152. moonshot/src/runs/__init__.py +0 -0
  153. moonshot/src/runs/run.py +344 -0
  154. moonshot/src/runs/run_arguments.py +162 -0
  155. moonshot/src/runs/run_progress.py +145 -0
  156. moonshot/src/runs/run_status.py +10 -0
  157. moonshot/src/storage/__init__.py +0 -0
  158. moonshot/src/storage/db_interface.py +128 -0
  159. moonshot/src/storage/io_interface.py +31 -0
  160. moonshot/src/storage/storage.py +525 -0
  161. moonshot/src/utils/__init__.py +0 -0
  162. moonshot/src/utils/import_modules.py +96 -0
  163. moonshot/src/utils/timeit.py +25 -0
@@ -0,0 +1,545 @@
1
+ import asyncio
2
+ from ast import literal_eval
3
+
4
+ import cmd2
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+ from slugify import slugify
8
+
9
+ from moonshot.api import (
10
+ api_create_cookbook,
11
+ api_create_runner,
12
+ api_delete_cookbook,
13
+ api_get_all_cookbook,
14
+ api_get_all_run,
15
+ api_get_all_runner_name,
16
+ api_load_runner,
17
+ api_read_cookbook,
18
+ api_read_recipes,
19
+ api_update_cookbook,
20
+ )
21
+ from moonshot.integrations.cli.benchmark.recipe import (
22
+ display_view_grading_scale_format,
23
+ display_view_statistics_format,
24
+ )
25
+ from moonshot.integrations.cli.common.display_helper import display_view_list_format
26
+
27
+ console = Console()
28
+
29
+
30
+ # ------------------------------------------------------------------------------
31
+ # CLI Functions
32
+ # ------------------------------------------------------------------------------
33
+ def add_cookbook(args) -> None:
34
+ """
35
+ Add a new cookbook.
36
+
37
+ This function creates a new cookbook with the specified parameters.
38
+ It first converts the recipes argument from a string to a list using the literal_eval function from the ast module.
39
+ Then, it calls the api_create_cookbook function from the moonshot.api module to create the new cookbook.
40
+
41
+ Args:
42
+ args: A namespace object from argparse. It should have the following attributes:
43
+ name (str): The name of the new cookbook.
44
+ description (str): The description of the cookbook.
45
+ recipes (str): A string representation of a list of recipes. Each recipe is represented by its ID.
46
+
47
+ Returns:
48
+ None
49
+ """
50
+ try:
51
+ recipes = literal_eval(args.recipes)
52
+ new_cookbook_id = api_create_cookbook(args.name, args.description, recipes)
53
+ print(f"[add_cookbook]: Cookbook ({new_cookbook_id}) created.")
54
+ except Exception as e:
55
+ print(f"[add_cookbook]: {str(e)}")
56
+
57
+
58
+ def list_cookbooks() -> None:
59
+ """
60
+ List all available cookbooks.
61
+
62
+ This function retrieves all available cookbooks by calling the api_get_all_cookbook function from the
63
+ moonshot.api module.
64
+ It then displays the retrieved cookbooks using the display_cookbooks function.
65
+
66
+ Returns:
67
+ None
68
+ """
69
+ try:
70
+ cookbooks_list = api_get_all_cookbook()
71
+ display_cookbooks(cookbooks_list)
72
+ except Exception as e:
73
+ print(f"[list_cookbooks]: {str(e)}")
74
+
75
+
76
+ def view_cookbook(args) -> None:
77
+ """
78
+ View a specific cookbook.
79
+
80
+ This function retrieves a specific cookbook by calling the api_read_cookbook function from the
81
+ moonshot.api module using the cookbook name provided in the args.
82
+ It then displays the retrieved cookbook using the display_view_cookbook function.
83
+
84
+ Args:
85
+ args: A namespace object from argparse. It should have the following attribute:
86
+ cookbook (str): The id of the cookbook to view.
87
+
88
+ Returns:
89
+ None
90
+ """
91
+ try:
92
+ cookbook_info = api_read_cookbook(args.cookbook)
93
+ display_view_cookbook(cookbook_info)
94
+ except Exception as e:
95
+ print(f"[view_cookbook]: {str(e)}")
96
+
97
+
98
+ def run_cookbook(args) -> None:
99
+ """
100
+ Run a cookbook with the specified parameters.
101
+
102
+ This function executes a cookbook runner with the given name, cookbooks, endpoints, and other parameters.
103
+ It checks if the runner with the specified name already exists, and if not, it creates a new one.
104
+ The cookbooks are run against the specified endpoints, and the results are processed and displayed.
105
+
106
+ Args:
107
+ args: A namespace object from argparse. It should have the following attributes:
108
+ name (str): The name of the cookbook runner.
109
+ cookbooks (str): A string representation of a list of cookbooks to run.
110
+ endpoints (str): A string representation of a list of endpoints to run.
111
+ num_of_prompts (int): The number of prompts to run.
112
+ random_seed (int): The random seed number for reproducibility.
113
+ system_prompt (str): The system prompt to use.
114
+ runner_proc_module (str): The runner processing module to use.
115
+ result_proc_module (str): The result processing module to use.
116
+
117
+ Returns:
118
+ None
119
+ """
120
+ try:
121
+ name = args.name
122
+ cookbooks = literal_eval(args.cookbooks)
123
+ endpoints = literal_eval(args.endpoints)
124
+ num_of_prompts = args.num_of_prompts
125
+ random_seed = args.random_seed
126
+ system_prompt = args.system_prompt
127
+ runner_proc_module = args.runner_proc_module
128
+ result_proc_module = args.result_proc_module
129
+
130
+ # Run the cookbooks with the defined endpoints
131
+ slugify_id = slugify(name, lowercase=True)
132
+ if slugify_id in api_get_all_runner_name():
133
+ cb_runner = api_load_runner(slugify_id)
134
+ else:
135
+ cb_runner = api_create_runner(name, endpoints)
136
+
137
+ loop = asyncio.get_event_loop()
138
+ loop.run_until_complete(
139
+ cb_runner.run_cookbooks(
140
+ cookbooks,
141
+ num_of_prompts,
142
+ random_seed,
143
+ system_prompt,
144
+ runner_proc_module,
145
+ result_proc_module,
146
+ )
147
+ )
148
+ cb_runner.close()
149
+
150
+ # Display results
151
+ runner_runs = api_get_all_run(cb_runner.id)
152
+ result_info = runner_runs[-1].get("results")
153
+ if result_info:
154
+ show_cookbook_results(
155
+ cookbooks, endpoints, result_info, result_info["metadata"]["duration"]
156
+ )
157
+ else:
158
+ raise RuntimeError("no run result generated")
159
+
160
+ except Exception as e:
161
+ print(f"[run_cookbook]: {str(e)}")
162
+
163
+
164
+ def update_cookbook(args) -> None:
165
+ """
166
+ Update a specific cookbook.
167
+
168
+ This function updates a specific cookbook by calling the api_update_cookbook function from the
169
+ moonshot.api module using the cookbook name and update values provided in the args.
170
+
171
+ Args:
172
+ args: A namespace object from argparse. It should have the following attributes:
173
+ cookbook (str): The id of the cookbook to update.
174
+ update_values (str): A string representation of a list of tuples. Each tuple contains a key
175
+ and a value to update in the cookbook.
176
+
177
+ Returns:
178
+ None
179
+ """
180
+ try:
181
+ cookbook = args.cookbook
182
+ update_values = dict(literal_eval(args.update_values))
183
+ api_update_cookbook(cookbook, **update_values)
184
+ print("[update_cookbook]: Cookbook updated.")
185
+ except Exception as e:
186
+ print(f"[update_cookbook]: {str(e)}")
187
+
188
+
189
+ def delete_cookbook(args) -> None:
190
+ """
191
+ Delete a cookbook.
192
+
193
+ This function deletes a cookbook with the specified identifier. It prompts the user for confirmation before
194
+ proceeding with the deletion. If the user confirms, it calls the api_delete_cookbook function from the moonshot.api
195
+ module to delete the cookbook. If the deletion is successful, it prints a confirmation message.
196
+
197
+ If an exception occurs, it prints an error message.
198
+
199
+ Args:
200
+ args: A namespace object from argparse. It should have the following attribute:
201
+ cookbook (str): The identifier of the cookbook to delete.
202
+
203
+ Returns:
204
+ None
205
+ """
206
+ # Confirm with the user before deleting a cookbook
207
+ confirmation = console.input(
208
+ "[bold red]Are you sure you want to delete the cookbook (y/N)? [/]"
209
+ )
210
+ if confirmation.lower() != "y":
211
+ console.print("[bold yellow]Cookbook deletion cancelled.[/]")
212
+ return
213
+ try:
214
+ api_delete_cookbook(args.cookbook)
215
+ print("[delete_cookbook]: Cookbook deleted.")
216
+ except Exception as e:
217
+ print(f"[delete_cookbook]: {str(e)}")
218
+
219
+
220
+ # ------------------------------------------------------------------------------
221
+ # Helper functions: Display on cli
222
+ # ------------------------------------------------------------------------------
223
+ def display_cookbooks(cookbooks_list):
224
+ """
225
+ Display the list of cookbooks in a tabular format.
226
+
227
+ This function takes a list of cookbook dictionaries and displays each cookbook's details in a table.
228
+ The table includes the cookbook's ID, name, description, and associated recipes. If the list is empty,
229
+ it prints a message indicating that no cookbooks are found.
230
+
231
+ Args:
232
+ cookbooks_list (list): A list of dictionaries, where each dictionary contains the details of a cookbook.
233
+ """
234
+ if cookbooks_list:
235
+ table = Table(
236
+ title="List of Cookbooks", show_lines=True, expand=True, header_style="bold"
237
+ )
238
+ table.add_column("No.", width=2)
239
+ table.add_column("Cookbook", justify="left", width=78)
240
+ table.add_column("Contains", justify="left", width=20, overflow="fold")
241
+ for cookbook_id, cookbook in enumerate(cookbooks_list, 1):
242
+ id, name, description, recipes = cookbook.values()
243
+ cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n{description}"
244
+ recipes_info = display_view_list_format("Recipes", recipes)
245
+ table.add_section()
246
+ table.add_row(str(cookbook_id), cookbook_info, recipes_info)
247
+ console.print(table)
248
+ else:
249
+ console.print("[red]There are no cookbooks found.[/red]")
250
+
251
+
252
+ def display_view_cookbook(cookbook_info):
253
+ """
254
+ Display the cookbook information in a formatted table.
255
+
256
+ This function takes a dictionary containing cookbook information and displays it in a table format using the rich
257
+ library's Table class. It includes details such as the cookbook's ID, name, description, and associated recipes.
258
+
259
+ Args:
260
+ cookbook_info (dict): A dictionary containing the cookbook's information with keys such as
261
+ 'id', 'name', 'description', and 'recipes'.
262
+
263
+ Returns:
264
+ None
265
+ """
266
+ id, name, description, recipes = cookbook_info.values()
267
+ recipes_list = api_read_recipes(recipes)
268
+ if recipes_list:
269
+ table = Table(
270
+ title="View Cookbook", show_lines=True, expand=True, header_style="bold"
271
+ )
272
+ table.add_column("No.", width=2)
273
+ table.add_column("Recipe", justify="left", width=78)
274
+ table.add_column("Contains", justify="left", width=20, overflow="fold")
275
+ for recipe_id, recipe in enumerate(recipes_list, 1):
276
+ (
277
+ id,
278
+ name,
279
+ description,
280
+ tags,
281
+ categories,
282
+ datasets,
283
+ prompt_templates,
284
+ metrics,
285
+ attack_strategies,
286
+ grading_scale,
287
+ stats,
288
+ ) = recipe.values()
289
+
290
+ tags_info = display_view_list_format("Tags", tags)
291
+ categories_info = display_view_list_format("Categories", categories)
292
+ datasets_info = display_view_list_format("Datasets", datasets)
293
+ prompt_templates_info = display_view_list_format(
294
+ "Prompt Templates", prompt_templates
295
+ )
296
+ metrics_info = display_view_list_format("Metrics", metrics)
297
+ attack_strategies_info = display_view_list_format(
298
+ "Attack Strategies", attack_strategies
299
+ )
300
+ grading_scale_info = display_view_grading_scale_format(
301
+ "Grading Scale", grading_scale
302
+ )
303
+ stats_info = display_view_statistics_format("Statistics", stats)
304
+
305
+ recipe_info = (
306
+ f"[red]id: {id}[/red]\n\n[blue]{name}[/blue]\n{description}\n\n"
307
+ f"{tags_info}\n\n{categories_info}\n\n{grading_scale_info}\n\n{stats_info}"
308
+ )
309
+ contains_info = f"{datasets_info}\n\n{prompt_templates_info}\n\n{metrics_info}\n\n{attack_strategies_info}"
310
+
311
+ table.add_section()
312
+ table.add_row(str(recipe_id), recipe_info, contains_info)
313
+ console.print(table)
314
+ else:
315
+ console.print("[red]There are no recipes found for the cookbook.[/red]")
316
+
317
+
318
+ def show_cookbook_results(cookbooks, endpoints, cookbook_results, duration):
319
+ """
320
+ Show the results of the cookbook benchmarking.
321
+
322
+ This function takes the cookbooks, endpoints, cookbook results, results file, and duration as arguments.
323
+ If there are results, it generates a table with the cookbook results and prints a message indicating
324
+ where the results are saved. If there are no results, it prints a message indicating that no results were found.
325
+ Finally, it prints the duration of the run.
326
+
327
+ Args:
328
+ cookbooks (list): A list of cookbooks.
329
+ endpoints (list): A list of endpoints.
330
+ cookbook_results (dict): A dictionary with the results of the cookbook benchmarking.
331
+ duration (float): The duration of the run.
332
+
333
+ Returns:
334
+ None
335
+ """
336
+ if cookbook_results:
337
+ # Display recipe results
338
+ generate_cookbook_table(cookbooks, endpoints, cookbook_results)
339
+ else:
340
+ console.print("[red]There are no results.[/red]")
341
+
342
+ # Print run stats
343
+ console.print(
344
+ f"{'='*50}\n[blue]Time taken to run: {duration}s[/blue]\n*Overall rating will be the lowest grade that the recipes have in each cookbook\n{'='*50}"
345
+ )
346
+
347
+
348
+ def generate_cookbook_table(cookbooks: list, endpoints: list, results: dict) -> None:
349
+ """
350
+ Generate and display a table with the cookbook benchmarking results.
351
+
352
+ This function creates a table that includes the index, cookbook name, recipe name, and the results
353
+ for each endpoint.
354
+
355
+ The cookbook names are prefixed with "Cookbook:" and are displayed with their overall grades. Each recipe under a
356
+ cookbook is indented and prefixed with "Recipe:" followed by its individual grades for each endpoint. If there are
357
+ no results for a cookbook, a row with dashes across all endpoint columns is added to indicate this.
358
+
359
+ Args:
360
+ cookbooks (list): A list of cookbook names to display in the table.
361
+ endpoints (list): A list of endpoints for which results are to be displayed.
362
+ results (dict): A dictionary containing the benchmarking results for cookbooks and recipes.
363
+
364
+ Returns:
365
+ None: The function prints the table to the console but does not return any value.
366
+ """
367
+ table = Table(
368
+ title="Cookbook Result", show_lines=True, expand=True, header_style="bold"
369
+ )
370
+ table.add_column("No.", width=2)
371
+ table.add_column("Cookbook (with its recipes)", justify="left", width=78)
372
+ for endpoint in endpoints:
373
+ table.add_column(endpoint, justify="center")
374
+
375
+ index = 1
376
+ for cookbook in cookbooks:
377
+ # Get cookbook result
378
+ cookbook_result = next(
379
+ (
380
+ result
381
+ for result in results["results"]["cookbooks"]
382
+ if result["id"] == cookbook
383
+ ),
384
+ None,
385
+ )
386
+
387
+ if cookbook_result:
388
+ # Add the cookbook name with the "Cookbook: " prefix as the first row for this section
389
+ endpoint_results = []
390
+ for endpoint in endpoints:
391
+ # Find the evaluation summary for the endpoint
392
+ evaluation_summary = next(
393
+ (
394
+ temp_eval
395
+ for temp_eval in cookbook_result["overall_evaluation_summary"]
396
+ if temp_eval["model_id"] == endpoint
397
+ ),
398
+ None,
399
+ )
400
+
401
+ # Get the grade from the evaluation_summary, or use "-" if not found
402
+ grade = "-"
403
+ if evaluation_summary and evaluation_summary["overall_grade"]:
404
+ grade = evaluation_summary["overall_grade"]
405
+ endpoint_results.append(grade)
406
+ table.add_row(
407
+ str(index),
408
+ f"Cookbook: [blue]{cookbook}[/blue]",
409
+ *endpoint_results,
410
+ end_section=True,
411
+ )
412
+
413
+ for recipe in cookbook_result["recipes"]:
414
+ endpoint_results = []
415
+ for endpoint in endpoints:
416
+ # Find the evaluation summary for the endpoint
417
+ evaluation_summary = next(
418
+ (
419
+ temp_eval
420
+ for temp_eval in recipe["evaluation_summary"]
421
+ if temp_eval["model_id"] == endpoint
422
+ ),
423
+ None,
424
+ )
425
+
426
+ # Get the grade from the evaluation_summary, or use "-" if not found
427
+ grade = "-"
428
+ if (
429
+ evaluation_summary
430
+ and "grade" in evaluation_summary
431
+ and "avg_grade_value" in evaluation_summary
432
+ and evaluation_summary["grade"]
433
+ ):
434
+ grade = f"{evaluation_summary['grade']} [{evaluation_summary['avg_grade_value']}]"
435
+ endpoint_results.append(grade)
436
+
437
+ # Add the recipe name indented under the cookbook name
438
+ table.add_row(
439
+ "",
440
+ f" └── Recipe: [blue]{recipe['id']}[/blue]",
441
+ *endpoint_results,
442
+ end_section=True,
443
+ )
444
+
445
+ # Increment index only after all recipes of the cookbook have been added
446
+ index += 1
447
+ else:
448
+ # If no results for the cookbook, add a row indicating this with the "Cookbook: " prefix
449
+ # and a dash for each endpoint column
450
+ table.add_row(
451
+ str(index),
452
+ f"Cookbook: {cookbook}",
453
+ *(["-"] * len(endpoints)),
454
+ end_section=True,
455
+ )
456
+ index += 1
457
+
458
+ # Display table
459
+ console.print(table)
460
+
461
+
462
+ # ------------------------------------------------------------------------------
463
+ # Cmd2 Arguments Parsers
464
+ # ------------------------------------------------------------------------------
465
+ # Add cookbook arguments
466
+ add_cookbook_args = cmd2.Cmd2ArgumentParser(
467
+ description="Add a new cookbook. The 'name' argument will be slugified to create a unique identifier.",
468
+ epilog="Example:\n add_cookbook 'My new cookbook' "
469
+ "'I am cookbook description' "
470
+ "\"['analogical-similarity','auto-categorisation']\"",
471
+ )
472
+ add_cookbook_args.add_argument("name", type=str, help="Name of the new cookbook")
473
+ add_cookbook_args.add_argument(
474
+ "description", type=str, help="Description of the new cookbook"
475
+ )
476
+ add_cookbook_args.add_argument(
477
+ "recipes", type=str, help="List of recipes to be included in the new cookbook"
478
+ )
479
+
480
+ # Update cookbook arguments
481
+ update_cookbook_args = cmd2.Cmd2ArgumentParser(
482
+ description="Update a cookbook.",
483
+ epilog="Available keys for updating a cookbook: \n"
484
+ " name: The name of the cookbook. \n"
485
+ " description: The description of the cookbook. \n"
486
+ " recipes: A list of recipes included in the cookbook. \n\n"
487
+ "Example command:\n"
488
+ " update_cookbook my-new-cookbook "
489
+ "\"[('name', 'Updated Cookbook Name'), ('description', 'Updated description'), "
490
+ "('recipes', ['analogical-similarity'])]\"",
491
+ )
492
+ update_cookbook_args.add_argument("cookbook", type=str, help="Id of the cookbook")
493
+ update_cookbook_args.add_argument(
494
+ "update_values", type=str, help="Update cookbook key/value"
495
+ )
496
+
497
+ # View cookbook arguments
498
+ view_cookbook_args = cmd2.Cmd2ArgumentParser(
499
+ description="View a cookbook.",
500
+ epilog="Example:\n view_cookbook my-new-cookbook",
501
+ )
502
+ view_cookbook_args.add_argument("cookbook", type=str, help="Id of the cookbook")
503
+
504
+ # Delete cookbook arguments
505
+ delete_cookbook_args = cmd2.Cmd2ArgumentParser(
506
+ description="Delete a cookbook.",
507
+ epilog="Example:\n delete_cookbook my-new-cookbook",
508
+ )
509
+ delete_cookbook_args.add_argument("cookbook", type=str, help="Id of the cookbook")
510
+
511
+ # Run cookbook arguments
512
+ run_cookbook_args = cmd2.Cmd2ArgumentParser(
513
+ description="Run a cookbook.",
514
+ epilog="Example:\n run_cookbook "
515
+ '"my new cookbook runner" '
516
+ "\"['chinese-safety-cookbook']\" "
517
+ "\"['openai-gpt35-turbo']\" "
518
+ '-n 1 -r 1 -s "You are an intelligent AI" ',
519
+ )
520
+ run_cookbook_args.add_argument("name", type=str, help="Name of cookbook runner")
521
+ run_cookbook_args.add_argument("cookbooks", type=str, help="List of cookbooks to run")
522
+ run_cookbook_args.add_argument("endpoints", type=str, help="List of endpoints to run")
523
+ run_cookbook_args.add_argument(
524
+ "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
525
+ )
526
+ run_cookbook_args.add_argument(
527
+ "-r", "--random_seed", type=int, default=0, help="Random seed number"
528
+ )
529
+ run_cookbook_args.add_argument(
530
+ "-s", "--system_prompt", type=str, default="", help="System Prompt to use"
531
+ )
532
+ run_cookbook_args.add_argument(
533
+ "-l",
534
+ "--runner_proc_module",
535
+ type=str,
536
+ default="benchmarking",
537
+ help="Runner processing module to use",
538
+ )
539
+ run_cookbook_args.add_argument(
540
+ "-o",
541
+ "--result_proc_module",
542
+ type=str,
543
+ default="benchmarking-result",
544
+ help="Result processing module to use",
545
+ )