aiverify-moonshot 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,13 @@ from rich.console import Console
5
5
  from rich.table import Table
6
6
 
7
7
  from moonshot.api import api_delete_metric, api_get_all_metric, api_get_all_metric_name
8
+ from moonshot.integrations.cli.cli_errors import (
9
+ ERROR_BENCHMARK_DELETE_METRIC_METRIC_VALIDATION,
10
+ ERROR_BENCHMARK_LIST_METRICS_FIND_VALIDATION,
11
+ ERROR_BENCHMARK_LIST_METRICS_PAGINATION_VALIDATION,
12
+ ERROR_BENCHMARK_LIST_METRICS_PAGINATION_VALIDATION_1,
13
+ ERROR_BENCHMARK_VIEW_METRIC_METRIC_FILENAME_VALIDATION,
14
+ )
8
15
  from moonshot.integrations.cli.utils.process_data import filter_data
9
16
 
10
17
  console = Console()
@@ -18,23 +25,44 @@ def list_metrics(args) -> list | None:
18
25
  List all available metrics.
19
26
 
20
27
  This function retrieves all available metrics by calling the api_get_all_metric function from the
21
- moonshot.api module. It then displays the metrics using the _display_metrics function. If an exception occurs,
22
- it prints an error message.
28
+ moonshot.api module. It then filters the metrics based on the provided keyword and pagination arguments.
29
+ If there are no metrics, it prints a message indicating that no metrics were found.
23
30
 
24
31
  Args:
25
- args: A namespace object from argparse. It should have an optional attribute:
26
- find (str): Optional field to find metric(s) with a keyword.
27
- pagination (str): Optional field to paginate metrics.
32
+ args: A namespace object from argparse. It should have optional attributes:
33
+ find (str): Optional field to find metric(s) with a keyword.
34
+ pagination (str): Optional field to paginate metrics.
28
35
 
29
36
  Returns:
30
- list | None: A list of Metric or None if there is no result.
37
+ list | None: A list of metrics or None if there are no metrics.
31
38
  """
32
39
 
33
40
  try:
34
41
  print("Listing metrics may take a while...")
42
+ if args.find is not None:
43
+ if not isinstance(args.find, str) or not args.find:
44
+ raise TypeError(ERROR_BENCHMARK_LIST_METRICS_FIND_VALIDATION)
45
+
46
+ if args.pagination is not None:
47
+ if not isinstance(args.pagination, str) or not args.pagination:
48
+ raise TypeError(ERROR_BENCHMARK_LIST_METRICS_PAGINATION_VALIDATION)
49
+ try:
50
+ pagination = literal_eval(args.pagination)
51
+ if not (
52
+ isinstance(pagination, tuple)
53
+ and len(pagination) == 2
54
+ and all(isinstance(i, int) for i in pagination)
55
+ ):
56
+ raise ValueError(
57
+ ERROR_BENCHMARK_LIST_METRICS_PAGINATION_VALIDATION_1
58
+ )
59
+ except (ValueError, SyntaxError):
60
+ raise ValueError(ERROR_BENCHMARK_LIST_METRICS_PAGINATION_VALIDATION_1)
61
+ else:
62
+ pagination = ()
63
+
35
64
  metrics_list = api_get_all_metric()
36
65
  keyword = args.find.lower() if args.find else ""
37
- pagination = literal_eval(args.pagination) if args.pagination else ()
38
66
 
39
67
  if metrics_list:
40
68
  filtered_metrics_list = filter_data(metrics_list, keyword, pagination)
@@ -44,8 +72,10 @@ def list_metrics(args) -> list | None:
44
72
 
45
73
  console.print("[red]There are no metrics found.[/red]")
46
74
  return None
75
+
47
76
  except Exception as e:
48
77
  print(f"[list_metrics]: {str(e)}")
78
+ return None
49
79
 
50
80
 
51
81
  def view_metric(args) -> None:
@@ -65,6 +95,13 @@ def view_metric(args) -> None:
65
95
  """
66
96
  try:
67
97
  print("Viewing metrics may take a while...")
98
+ if (
99
+ not isinstance(args.metric_filename, str)
100
+ or not args.metric_filename
101
+ or args.metric_filename is None
102
+ ):
103
+ raise TypeError(ERROR_BENCHMARK_VIEW_METRIC_METRIC_FILENAME_VALIDATION)
104
+
68
105
  metrics_list = api_get_all_metric()
69
106
  metrics_name_list = api_get_all_metric_name()
70
107
 
@@ -100,7 +137,11 @@ def delete_metric(args) -> None:
100
137
  if confirmation.lower() != "y":
101
138
  console.print("[bold yellow]Metric deletion cancelled.[/]")
102
139
  return
140
+
103
141
  try:
142
+ if args.metric is None or not isinstance(args.metric, str) or not args.metric:
143
+ raise ValueError(ERROR_BENCHMARK_DELETE_METRIC_METRIC_VALIDATION)
144
+
104
145
  api_delete_metric(args.metric)
105
146
  print("[delete_metric]: Metric deleted.")
106
147
  except Exception as e:
@@ -17,6 +17,41 @@ from moonshot.api import (
17
17
  api_read_recipe,
18
18
  api_update_recipe,
19
19
  )
20
+ from moonshot.integrations.cli.cli_errors import (
21
+ ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_LIST_STR_VALIDATION,
22
+ ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_VALIDATION,
23
+ ERROR_BENCHMARK_ADD_RECIPE_DATASETS_LIST_STR_VALIDATION,
24
+ ERROR_BENCHMARK_ADD_RECIPE_DATASETS_VALIDATION,
25
+ ERROR_BENCHMARK_ADD_RECIPE_DESC_VALIDATION,
26
+ ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_DICT_STR_VALIDATION,
27
+ ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_VALIDATION,
28
+ ERROR_BENCHMARK_ADD_RECIPE_METRICS_LIST_STR_VALIDATION,
29
+ ERROR_BENCHMARK_ADD_RECIPE_METRICS_VALIDATION,
30
+ ERROR_BENCHMARK_ADD_RECIPE_NAME_VALIDATION,
31
+ ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_LIST_STR_VALIDATION,
32
+ ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_VALIDATION,
33
+ ERROR_BENCHMARK_ADD_RECIPE_TAGS_LIST_STR_VALIDATION,
34
+ ERROR_BENCHMARK_ADD_RECIPE_TAGS_VALIDATION,
35
+ ERROR_BENCHMARK_DELETE_RECIPE_RECIPE_VALIDATION,
36
+ ERROR_BENCHMARK_LIST_RECIPES_FIND_VALIDATION,
37
+ ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION,
38
+ ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION_1,
39
+ ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION,
40
+ ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1,
41
+ ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION,
42
+ ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT,
43
+ ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION,
44
+ ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION,
45
+ ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION,
46
+ ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1,
47
+ ERROR_BENCHMARK_RUN_RECIPE_RESULT_PROC_MOD_VALIDATION,
48
+ ERROR_BENCHMARK_RUN_RECIPE_RUNNER_PROC_MOD_VALIDATION,
49
+ ERROR_BENCHMARK_RUN_RECIPE_SYS_PROMPT_VALIDATION,
50
+ ERROR_BENCHMARK_UPDATE_RECIPE_RECIPE_VALIDATION,
51
+ ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION,
52
+ ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION_1,
53
+ ERROR_BENCHMARK_VIEW_RECIPE_RECIPE_VALIDATION,
54
+ )
20
55
  from moonshot.integrations.cli.common.display_helper import display_view_list_format
21
56
  from moonshot.integrations.cli.utils.process_data import filter_data
22
57
 
@@ -37,23 +72,114 @@ def add_recipe(args) -> None:
37
72
 
38
73
  Args:
39
74
  args (argparse.Namespace): The arguments provided to the command line interface.
40
- Expected keys are name, description, tags, categories, dataset, prompt_templates, metrics and grading_scale.
75
+ Expected keys are name, description, tags, categories, datasets, prompt_templates, metrics, and grading_scale.
41
76
 
42
77
  Returns:
43
78
  None
44
79
 
45
80
  Raises:
46
- Exception: If there is an error during the creation of the recipe or the arguments cannot be evaluated.
81
+ TypeError: If any of the required arguments are not strings or are None.
82
+ ValueError: If the evaluated arguments are not of the expected types.
47
83
  """
48
84
  try:
49
- tags = literal_eval(args.tags) if args.tags else []
85
+ if not isinstance(args.name, str) or not args.name or args.name is None:
86
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_NAME_VALIDATION)
87
+
88
+ if (
89
+ not isinstance(args.description, str)
90
+ or not args.description
91
+ or args.description is None
92
+ ):
93
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_DESC_VALIDATION)
94
+
95
+ if not isinstance(args.tags, str) or not args.tags or args.tags is None:
96
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_TAGS_VALIDATION)
97
+
98
+ if (
99
+ not isinstance(args.categories, str)
100
+ or not args.categories
101
+ or args.categories is None
102
+ ):
103
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_VALIDATION)
104
+
105
+ if (
106
+ not isinstance(args.datasets, str)
107
+ or not args.datasets
108
+ or args.datasets is None
109
+ ):
110
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_DATASETS_VALIDATION)
111
+
112
+ if (
113
+ not isinstance(args.prompt_templates, str)
114
+ or not args.prompt_templates
115
+ or args.prompt_templates is None
116
+ ):
117
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_VALIDATION)
118
+
119
+ if (
120
+ not isinstance(args.metrics, str)
121
+ or not args.metrics
122
+ or args.metrics is None
123
+ ):
124
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_METRICS_VALIDATION)
125
+
126
+ if (
127
+ not isinstance(args.grading_scale, str)
128
+ or not args.grading_scale
129
+ or args.grading_scale is None
130
+ ):
131
+ raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_VALIDATION)
132
+
133
+ tags = literal_eval(args.tags)
50
134
  categories = literal_eval(args.categories)
51
135
  datasets = literal_eval(args.datasets)
52
- prompt_templates = (
53
- literal_eval(args.prompt_templates) if args.prompt_templates else []
54
- )
136
+ prompt_templates = literal_eval(args.prompt_templates)
55
137
  metrics = literal_eval(args.metrics)
56
- grading_scale = literal_eval(args.grading_scale) if args.grading_scale else {}
138
+ grading_scale = literal_eval(args.grading_scale)
139
+
140
+ if not (isinstance(tags, list) and all(isinstance(tag, str) for tag in tags)):
141
+ raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_TAGS_LIST_STR_VALIDATION)
142
+
143
+ if not (
144
+ isinstance(categories, list)
145
+ and all(isinstance(category, str) for category in categories)
146
+ ):
147
+ raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_LIST_STR_VALIDATION)
148
+
149
+ if not (
150
+ isinstance(datasets, list)
151
+ and all(isinstance(dataset, str) for dataset in datasets)
152
+ ):
153
+ raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_DATASETS_LIST_STR_VALIDATION)
154
+
155
+ if not (
156
+ isinstance(prompt_templates, list)
157
+ and all(
158
+ isinstance(prompt_template, str) for prompt_template in prompt_templates
159
+ )
160
+ ):
161
+ raise ValueError(
162
+ ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_LIST_STR_VALIDATION
163
+ )
164
+
165
+ if not (
166
+ isinstance(metrics, list)
167
+ and all(isinstance(metric, str) for metric in metrics)
168
+ ):
169
+ raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_METRICS_LIST_STR_VALIDATION)
170
+
171
+ if not (
172
+ isinstance(grading_scale, dict)
173
+ and all(
174
+ isinstance(gs, list)
175
+ and len(gs) == 2
176
+ and all(isinstance(value, int) for value in gs)
177
+ for gs in grading_scale.values()
178
+ )
179
+ ):
180
+ raise ValueError(
181
+ ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_DICT_STR_VALIDATION
182
+ )
57
183
 
58
184
  new_recipe_id = api_create_recipe(
59
185
  args.name,
@@ -79,18 +205,42 @@ def list_recipes(args) -> list | None:
79
205
  It then displays the retrieved recipes using the _display_recipes function.
80
206
 
81
207
  Args:
82
- args: A namespace object from argparse. It should have an optional attribute:
83
- find (str): Optional field to find recipe(s) with a keyword.
84
- pagination (str): Optional field to paginate recipes.
208
+ args: A namespace object from argparse. It should have optional attributes:
209
+ find (str): Optional field to find recipe(s) with a keyword.
210
+ pagination (str): Optional field to paginate recipes.
85
211
 
86
212
  Returns:
87
- list | None: A list of Recipe or None if there is no result.
88
- """
213
+ list | None: A list of recipes or None if there is no result.
89
214
 
215
+ Raises:
216
+ TypeError: If the 'find' or 'pagination' arguments are not strings or are invalid.
217
+ ValueError: If the 'pagination' argument cannot be evaluated into a tuple of two integers.
218
+ """
90
219
  try:
220
+ if args.find is not None:
221
+ if not isinstance(args.find, str) or not args.find:
222
+ raise TypeError(ERROR_BENCHMARK_LIST_RECIPES_FIND_VALIDATION)
223
+
224
+ if args.pagination is not None:
225
+ if not isinstance(args.pagination, str) or not args.pagination:
226
+ raise TypeError(ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION)
227
+ try:
228
+ pagination = literal_eval(args.pagination)
229
+ if not (
230
+ isinstance(pagination, tuple)
231
+ and len(pagination) == 2
232
+ and all(isinstance(i, int) for i in pagination)
233
+ ):
234
+ raise ValueError(
235
+ ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION_1
236
+ )
237
+ except (ValueError, SyntaxError):
238
+ raise ValueError(ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION_1)
239
+ else:
240
+ pagination = ()
241
+
91
242
  recipes_list = api_get_all_recipe()
92
243
  keyword = args.find.lower() if args.find else ""
93
- pagination = literal_eval(args.pagination) if args.pagination else ()
94
244
 
95
245
  if recipes_list:
96
246
  filtered_recipes_list = filter_data(recipes_list, keyword, pagination)
@@ -103,6 +253,7 @@ def list_recipes(args) -> list | None:
103
253
 
104
254
  except Exception as e:
105
255
  print(f"[list_recipes]: {str(e)}")
256
+ return None
106
257
 
107
258
 
108
259
  def view_recipe(args) -> None:
@@ -111,7 +262,7 @@ def view_recipe(args) -> None:
111
262
 
112
263
  This function retrieves a specific recipe by calling the api_read_recipe function from the
113
264
  moonshot.api module using the recipe name provided in the args.
114
- It then displays the retrieved recipe using the display_view_recipe function.
265
+ It then displays the retrieved recipe using the _display_recipes function.
115
266
 
116
267
  Args:
117
268
  args: A namespace object from argparse. It should have the following attribute:
@@ -119,8 +270,14 @@ def view_recipe(args) -> None:
119
270
 
120
271
  Returns:
121
272
  None
273
+
274
+ Raises:
275
+ TypeError: If the 'recipe' argument is not a string or is None.
122
276
  """
123
277
  try:
278
+ if not isinstance(args.recipe, str) or not args.recipe or args.recipe is None:
279
+ raise TypeError(ERROR_BENCHMARK_VIEW_RECIPE_RECIPE_VALIDATION)
280
+
124
281
  recipe_info = api_read_recipe(args.recipe)
125
282
  _display_recipes([recipe_info])
126
283
  except Exception as e:
@@ -148,46 +305,103 @@ def run_recipe(args) -> None:
148
305
 
149
306
  Returns:
150
307
  None
308
+
309
+ Raises:
310
+ TypeError: If any of the required arguments are not of the expected types or are None.
311
+ ValueError: If the 'recipes' or 'endpoints' arguments cannot be evaluated into lists of strings.
312
+ RuntimeError: If no results are found after running the recipes.
151
313
  """
152
314
  try:
153
- name = args.name
315
+ if not isinstance(args.name, str) or not args.name or args.name is None:
316
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION)
317
+
318
+ if (
319
+ not isinstance(args.recipes, str)
320
+ or not args.recipes
321
+ or args.recipes is None
322
+ ):
323
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION)
324
+
325
+ if (
326
+ not isinstance(args.endpoints, str)
327
+ or not args.endpoints
328
+ or args.endpoints is None
329
+ ):
330
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION)
331
+
332
+ if isinstance(args.num_of_prompts, bool) or not isinstance(
333
+ args.num_of_prompts, int
334
+ ):
335
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION)
336
+
337
+ if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
338
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION)
339
+
340
+ if (
341
+ not isinstance(args.system_prompt, str)
342
+ or not args.system_prompt
343
+ or args.system_prompt is None
344
+ ):
345
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_SYS_PROMPT_VALIDATION)
346
+
347
+ if (
348
+ not isinstance(args.runner_proc_module, str)
349
+ or not args.runner_proc_module
350
+ or args.runner_proc_module is None
351
+ ):
352
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RUNNER_PROC_MOD_VALIDATION)
353
+
354
+ if (
355
+ not isinstance(args.result_proc_module, str)
356
+ or not args.result_proc_module
357
+ or args.result_proc_module is None
358
+ ):
359
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RESULT_PROC_MOD_VALIDATION)
360
+
154
361
  recipes = literal_eval(args.recipes)
362
+ if not (
363
+ isinstance(recipes, list) and all(isinstance(item, str) for item in recipes)
364
+ ):
365
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1)
366
+
155
367
  endpoints = literal_eval(args.endpoints)
156
- num_of_prompts = args.num_of_prompts
157
- random_seed = args.random_seed
158
- system_prompt = args.system_prompt
159
- runner_proc_module = args.runner_proc_module
160
- result_proc_module = args.result_proc_module
368
+ if not (
369
+ isinstance(endpoints, list)
370
+ and all(isinstance(item, str) for item in endpoints)
371
+ ):
372
+ raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1)
161
373
 
162
374
  # Run the recipes with the defined endpoints
163
- slugify_id = slugify(name, lowercase=True)
375
+ slugify_id = slugify(args.name, lowercase=True)
164
376
  if slugify_id in api_get_all_runner_name():
165
377
  rec_runner = api_load_runner(slugify_id)
166
378
  else:
167
- rec_runner = api_create_runner(name, endpoints)
379
+ rec_runner = api_create_runner(args.name, endpoints)
168
380
 
169
- loop = asyncio.get_event_loop()
170
- loop.run_until_complete(
171
- rec_runner.run_recipes(
381
+ async def run():
382
+ await rec_runner.run_recipes(
172
383
  recipes,
173
- num_of_prompts,
174
- random_seed,
175
- system_prompt,
176
- runner_proc_module,
177
- result_proc_module,
384
+ args.num_of_prompts,
385
+ args.random_seed,
386
+ args.system_prompt,
387
+ args.runner_proc_module,
388
+ args.result_proc_module,
178
389
  )
179
- )
180
- rec_runner.close()
390
+ await rec_runner.close()
391
+
392
+ loop = asyncio.get_event_loop()
393
+ loop.run_until_complete(run())
181
394
 
182
395
  # Display results
183
396
  runner_runs = api_get_all_run(rec_runner.id)
184
397
  result_info = runner_runs[-1].get("results")
185
398
  if result_info:
186
- show_recipe_results(
399
+ _show_recipe_results(
187
400
  recipes, endpoints, result_info, result_info["metadata"]["duration"]
188
401
  )
189
402
  else:
190
- raise RuntimeError("no run result generated")
403
+ raise RuntimeError(ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT)
404
+
191
405
  except Exception as e:
192
406
  print(f"[run_recipe]: {str(e)}")
193
407
 
@@ -207,11 +421,31 @@ def update_recipe(args) -> None:
207
421
 
208
422
  Returns:
209
423
  None
424
+
425
+ Raises:
426
+ ValueError: If the 'recipe' or 'update_values' arguments are not strings or are None.
427
+ ValueError: If the 'update_values' argument cannot be evaluated into a list of tuples.
210
428
  """
211
429
  try:
430
+ if args.recipe is None or not isinstance(args.recipe, str) or not args.recipe:
431
+ raise ValueError(ERROR_BENCHMARK_UPDATE_RECIPE_RECIPE_VALIDATION)
432
+
433
+ if (
434
+ args.update_values is None
435
+ or not isinstance(args.update_values, str)
436
+ or not args.update_values
437
+ ):
438
+ raise ValueError(ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION)
439
+
212
440
  recipe = args.recipe
213
- update_values = dict(literal_eval(args.update_values))
441
+ if literal_eval(args.update_values) and all(
442
+ isinstance(i, tuple) for i in literal_eval(args.update_values)
443
+ ):
444
+ update_values = dict(literal_eval(args.update_values))
445
+ else:
446
+ raise ValueError(ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION_1)
214
447
  api_update_recipe(recipe, **update_values)
448
+
215
449
  print("[update_recipe]: Recipe updated.")
216
450
  except Exception as e:
217
451
  print(f"[update_recipe]: {str(e)}")
@@ -232,6 +466,9 @@ def delete_recipe(args) -> None:
232
466
 
233
467
  Returns:
234
468
  None
469
+
470
+ Raises:
471
+ ValueError: If the 'recipe' argument is not a string or is None.
235
472
  """
236
473
  # Confirm with the user before deleting a recipe
237
474
  confirmation = console.input(
@@ -240,7 +477,11 @@ def delete_recipe(args) -> None:
240
477
  if confirmation.lower() != "y":
241
478
  console.print("[bold yellow]Recipe deletion cancelled.[/]")
242
479
  return
480
+
243
481
  try:
482
+ if args.recipe is None or not isinstance(args.recipe, str) or not args.recipe:
483
+ raise ValueError(ERROR_BENCHMARK_DELETE_RECIPE_RECIPE_VALIDATION)
484
+
244
485
  api_delete_recipe(args.recipe)
245
486
  print("[delete_recipe]: Recipe deleted.")
246
487
  except Exception as e:
@@ -250,7 +491,7 @@ def delete_recipe(args) -> None:
250
491
  # ------------------------------------------------------------------------------
251
492
  # Helper functions: Display on cli
252
493
  # ------------------------------------------------------------------------------
253
- def display_view_grading_scale_format(title: str, grading_scale: dict) -> str:
494
+ def _display_view_grading_scale_format(title: str, grading_scale: dict) -> str:
254
495
  """
255
496
  Format the grading scale for display.
256
497
 
@@ -275,7 +516,7 @@ def display_view_grading_scale_format(title: str, grading_scale: dict) -> str:
275
516
  return f"[blue]{title}[/blue]: nil"
276
517
 
277
518
 
278
- def display_view_statistics_format(title: str, stats: dict) -> str:
519
+ def _display_view_statistics_format(title: str, stats: dict) -> str:
279
520
  """
280
521
  Format the statistics for display.
281
522
 
@@ -348,10 +589,10 @@ def _display_recipes(recipes_list: list) -> None:
348
589
  "Prompt Templates", prompt_templates
349
590
  )
350
591
  metrics_info = display_view_list_format("Metrics", metrics)
351
- grading_scale_info = display_view_grading_scale_format(
592
+ grading_scale_info = _display_view_grading_scale_format(
352
593
  "Grading Scale", grading_scale
353
594
  )
354
- stats_info = display_view_statistics_format("Statistics", stats)
595
+ stats_info = _display_view_statistics_format("Statistics", stats)
355
596
 
356
597
  recipe_info = (
357
598
  f"[red]id: {id}[/red]\n\n[blue]{name}[/blue]\n{description}\n\n"
@@ -364,7 +605,7 @@ def _display_recipes(recipes_list: list) -> None:
364
605
  console.print(table)
365
606
 
366
607
 
367
- def show_recipe_results(recipes, endpoints, recipe_results, duration):
608
+ def _show_recipe_results(recipes, endpoints, recipe_results, duration):
368
609
  """
369
610
  Show the results of the recipe benchmarking.
370
611
 
@@ -384,7 +625,7 @@ def show_recipe_results(recipes, endpoints, recipe_results, duration):
384
625
  """
385
626
  if recipe_results:
386
627
  # Display recipe results
387
- generate_recipe_table(recipes, endpoints, recipe_results)
628
+ _generate_recipe_table(recipes, endpoints, recipe_results)
388
629
  else:
389
630
  console.print("[red]There are no results.[/red]")
390
631
 
@@ -394,7 +635,7 @@ def show_recipe_results(recipes, endpoints, recipe_results, duration):
394
635
  console.print(run_stats)
395
636
 
396
637
 
397
- def generate_recipe_table(recipes: list, endpoints: list, results: dict) -> None:
638
+ def _generate_recipe_table(recipes: list, endpoints: list, results: dict) -> None:
398
639
  """
399
640
  Generate and display a table of recipe results.
400
641