aiverify-moonshot 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aiverify_moonshot-0.4.5.dist-info → aiverify_moonshot-0.4.7.dist-info}/METADATA +13 -12
- {aiverify_moonshot-0.4.5.dist-info → aiverify_moonshot-0.4.7.dist-info}/RECORD +24 -22
- moonshot/integrations/cli/benchmark/cookbook.py +226 -42
- moonshot/integrations/cli/benchmark/datasets.py +53 -8
- moonshot/integrations/cli/benchmark/metrics.py +48 -7
- moonshot/integrations/cli/benchmark/recipe.py +283 -42
- moonshot/integrations/cli/benchmark/result.py +73 -30
- moonshot/integrations/cli/benchmark/run.py +43 -11
- moonshot/integrations/cli/benchmark/runner.py +29 -20
- moonshot/integrations/cli/cli_errors.py +511 -0
- moonshot/integrations/cli/common/connectors.py +139 -8
- moonshot/integrations/cli/common/dataset.py +66 -13
- moonshot/integrations/cli/common/prompt_template.py +38 -2
- moonshot/integrations/cli/redteam/session.py +126 -43
- moonshot/integrations/web_api/app.py +1 -1
- moonshot/src/api/api_bookmark.py +6 -6
- moonshot/src/bookmark/bookmark.py +119 -60
- moonshot/src/bookmark/bookmark_arguments.py +10 -0
- moonshot/src/messages_constants.py +40 -0
- moonshot/src/runners/runner.py +1 -1
- {aiverify_moonshot-0.4.5.dist-info → aiverify_moonshot-0.4.7.dist-info}/WHEEL +0 -0
- {aiverify_moonshot-0.4.5.dist-info → aiverify_moonshot-0.4.7.dist-info}/licenses/AUTHORS.md +0 -0
- {aiverify_moonshot-0.4.5.dist-info → aiverify_moonshot-0.4.7.dist-info}/licenses/LICENSE.md +0 -0
- {aiverify_moonshot-0.4.5.dist-info → aiverify_moonshot-0.4.7.dist-info}/licenses/NOTICES.md +0 -0
|
@@ -17,6 +17,41 @@ from moonshot.api import (
|
|
|
17
17
|
api_read_recipe,
|
|
18
18
|
api_update_recipe,
|
|
19
19
|
)
|
|
20
|
+
from moonshot.integrations.cli.cli_errors import (
|
|
21
|
+
ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_LIST_STR_VALIDATION,
|
|
22
|
+
ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_VALIDATION,
|
|
23
|
+
ERROR_BENCHMARK_ADD_RECIPE_DATASETS_LIST_STR_VALIDATION,
|
|
24
|
+
ERROR_BENCHMARK_ADD_RECIPE_DATASETS_VALIDATION,
|
|
25
|
+
ERROR_BENCHMARK_ADD_RECIPE_DESC_VALIDATION,
|
|
26
|
+
ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_DICT_STR_VALIDATION,
|
|
27
|
+
ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_VALIDATION,
|
|
28
|
+
ERROR_BENCHMARK_ADD_RECIPE_METRICS_LIST_STR_VALIDATION,
|
|
29
|
+
ERROR_BENCHMARK_ADD_RECIPE_METRICS_VALIDATION,
|
|
30
|
+
ERROR_BENCHMARK_ADD_RECIPE_NAME_VALIDATION,
|
|
31
|
+
ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_LIST_STR_VALIDATION,
|
|
32
|
+
ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_VALIDATION,
|
|
33
|
+
ERROR_BENCHMARK_ADD_RECIPE_TAGS_LIST_STR_VALIDATION,
|
|
34
|
+
ERROR_BENCHMARK_ADD_RECIPE_TAGS_VALIDATION,
|
|
35
|
+
ERROR_BENCHMARK_DELETE_RECIPE_RECIPE_VALIDATION,
|
|
36
|
+
ERROR_BENCHMARK_LIST_RECIPES_FIND_VALIDATION,
|
|
37
|
+
ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION,
|
|
38
|
+
ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION_1,
|
|
39
|
+
ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION,
|
|
40
|
+
ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1,
|
|
41
|
+
ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION,
|
|
42
|
+
ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT,
|
|
43
|
+
ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION,
|
|
44
|
+
ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION,
|
|
45
|
+
ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION,
|
|
46
|
+
ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1,
|
|
47
|
+
ERROR_BENCHMARK_RUN_RECIPE_RESULT_PROC_MOD_VALIDATION,
|
|
48
|
+
ERROR_BENCHMARK_RUN_RECIPE_RUNNER_PROC_MOD_VALIDATION,
|
|
49
|
+
ERROR_BENCHMARK_RUN_RECIPE_SYS_PROMPT_VALIDATION,
|
|
50
|
+
ERROR_BENCHMARK_UPDATE_RECIPE_RECIPE_VALIDATION,
|
|
51
|
+
ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION,
|
|
52
|
+
ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION_1,
|
|
53
|
+
ERROR_BENCHMARK_VIEW_RECIPE_RECIPE_VALIDATION,
|
|
54
|
+
)
|
|
20
55
|
from moonshot.integrations.cli.common.display_helper import display_view_list_format
|
|
21
56
|
from moonshot.integrations.cli.utils.process_data import filter_data
|
|
22
57
|
|
|
@@ -37,23 +72,114 @@ def add_recipe(args) -> None:
|
|
|
37
72
|
|
|
38
73
|
Args:
|
|
39
74
|
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
40
|
-
Expected keys are name, description, tags, categories,
|
|
75
|
+
Expected keys are name, description, tags, categories, datasets, prompt_templates, metrics, and grading_scale.
|
|
41
76
|
|
|
42
77
|
Returns:
|
|
43
78
|
None
|
|
44
79
|
|
|
45
80
|
Raises:
|
|
46
|
-
|
|
81
|
+
TypeError: If any of the required arguments are not strings or are None.
|
|
82
|
+
ValueError: If the evaluated arguments are not of the expected types.
|
|
47
83
|
"""
|
|
48
84
|
try:
|
|
49
|
-
|
|
85
|
+
if not isinstance(args.name, str) or not args.name or args.name is None:
|
|
86
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_NAME_VALIDATION)
|
|
87
|
+
|
|
88
|
+
if (
|
|
89
|
+
not isinstance(args.description, str)
|
|
90
|
+
or not args.description
|
|
91
|
+
or args.description is None
|
|
92
|
+
):
|
|
93
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_DESC_VALIDATION)
|
|
94
|
+
|
|
95
|
+
if not isinstance(args.tags, str) or not args.tags or args.tags is None:
|
|
96
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_TAGS_VALIDATION)
|
|
97
|
+
|
|
98
|
+
if (
|
|
99
|
+
not isinstance(args.categories, str)
|
|
100
|
+
or not args.categories
|
|
101
|
+
or args.categories is None
|
|
102
|
+
):
|
|
103
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_VALIDATION)
|
|
104
|
+
|
|
105
|
+
if (
|
|
106
|
+
not isinstance(args.datasets, str)
|
|
107
|
+
or not args.datasets
|
|
108
|
+
or args.datasets is None
|
|
109
|
+
):
|
|
110
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_DATASETS_VALIDATION)
|
|
111
|
+
|
|
112
|
+
if (
|
|
113
|
+
not isinstance(args.prompt_templates, str)
|
|
114
|
+
or not args.prompt_templates
|
|
115
|
+
or args.prompt_templates is None
|
|
116
|
+
):
|
|
117
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_VALIDATION)
|
|
118
|
+
|
|
119
|
+
if (
|
|
120
|
+
not isinstance(args.metrics, str)
|
|
121
|
+
or not args.metrics
|
|
122
|
+
or args.metrics is None
|
|
123
|
+
):
|
|
124
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_METRICS_VALIDATION)
|
|
125
|
+
|
|
126
|
+
if (
|
|
127
|
+
not isinstance(args.grading_scale, str)
|
|
128
|
+
or not args.grading_scale
|
|
129
|
+
or args.grading_scale is None
|
|
130
|
+
):
|
|
131
|
+
raise TypeError(ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_VALIDATION)
|
|
132
|
+
|
|
133
|
+
tags = literal_eval(args.tags)
|
|
50
134
|
categories = literal_eval(args.categories)
|
|
51
135
|
datasets = literal_eval(args.datasets)
|
|
52
|
-
prompt_templates = (
|
|
53
|
-
literal_eval(args.prompt_templates) if args.prompt_templates else []
|
|
54
|
-
)
|
|
136
|
+
prompt_templates = literal_eval(args.prompt_templates)
|
|
55
137
|
metrics = literal_eval(args.metrics)
|
|
56
|
-
grading_scale = literal_eval(args.grading_scale)
|
|
138
|
+
grading_scale = literal_eval(args.grading_scale)
|
|
139
|
+
|
|
140
|
+
if not (isinstance(tags, list) and all(isinstance(tag, str) for tag in tags)):
|
|
141
|
+
raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_TAGS_LIST_STR_VALIDATION)
|
|
142
|
+
|
|
143
|
+
if not (
|
|
144
|
+
isinstance(categories, list)
|
|
145
|
+
and all(isinstance(category, str) for category in categories)
|
|
146
|
+
):
|
|
147
|
+
raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_CATEGORIES_LIST_STR_VALIDATION)
|
|
148
|
+
|
|
149
|
+
if not (
|
|
150
|
+
isinstance(datasets, list)
|
|
151
|
+
and all(isinstance(dataset, str) for dataset in datasets)
|
|
152
|
+
):
|
|
153
|
+
raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_DATASETS_LIST_STR_VALIDATION)
|
|
154
|
+
|
|
155
|
+
if not (
|
|
156
|
+
isinstance(prompt_templates, list)
|
|
157
|
+
and all(
|
|
158
|
+
isinstance(prompt_template, str) for prompt_template in prompt_templates
|
|
159
|
+
)
|
|
160
|
+
):
|
|
161
|
+
raise ValueError(
|
|
162
|
+
ERROR_BENCHMARK_ADD_RECIPE_PROMPT_TEMPLATES_LIST_STR_VALIDATION
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if not (
|
|
166
|
+
isinstance(metrics, list)
|
|
167
|
+
and all(isinstance(metric, str) for metric in metrics)
|
|
168
|
+
):
|
|
169
|
+
raise ValueError(ERROR_BENCHMARK_ADD_RECIPE_METRICS_LIST_STR_VALIDATION)
|
|
170
|
+
|
|
171
|
+
if not (
|
|
172
|
+
isinstance(grading_scale, dict)
|
|
173
|
+
and all(
|
|
174
|
+
isinstance(gs, list)
|
|
175
|
+
and len(gs) == 2
|
|
176
|
+
and all(isinstance(value, int) for value in gs)
|
|
177
|
+
for gs in grading_scale.values()
|
|
178
|
+
)
|
|
179
|
+
):
|
|
180
|
+
raise ValueError(
|
|
181
|
+
ERROR_BENCHMARK_ADD_RECIPE_GRADING_SCALE_DICT_STR_VALIDATION
|
|
182
|
+
)
|
|
57
183
|
|
|
58
184
|
new_recipe_id = api_create_recipe(
|
|
59
185
|
args.name,
|
|
@@ -79,18 +205,42 @@ def list_recipes(args) -> list | None:
|
|
|
79
205
|
It then displays the retrieved recipes using the _display_recipes function.
|
|
80
206
|
|
|
81
207
|
Args:
|
|
82
|
-
args: A namespace object from argparse. It should have
|
|
83
|
-
|
|
84
|
-
|
|
208
|
+
args: A namespace object from argparse. It should have optional attributes:
|
|
209
|
+
find (str): Optional field to find recipe(s) with a keyword.
|
|
210
|
+
pagination (str): Optional field to paginate recipes.
|
|
85
211
|
|
|
86
212
|
Returns:
|
|
87
|
-
list | None: A list of
|
|
88
|
-
"""
|
|
213
|
+
list | None: A list of recipes or None if there is no result.
|
|
89
214
|
|
|
215
|
+
Raises:
|
|
216
|
+
TypeError: If the 'find' or 'pagination' arguments are not strings or are invalid.
|
|
217
|
+
ValueError: If the 'pagination' argument cannot be evaluated into a tuple of two integers.
|
|
218
|
+
"""
|
|
90
219
|
try:
|
|
220
|
+
if args.find is not None:
|
|
221
|
+
if not isinstance(args.find, str) or not args.find:
|
|
222
|
+
raise TypeError(ERROR_BENCHMARK_LIST_RECIPES_FIND_VALIDATION)
|
|
223
|
+
|
|
224
|
+
if args.pagination is not None:
|
|
225
|
+
if not isinstance(args.pagination, str) or not args.pagination:
|
|
226
|
+
raise TypeError(ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION)
|
|
227
|
+
try:
|
|
228
|
+
pagination = literal_eval(args.pagination)
|
|
229
|
+
if not (
|
|
230
|
+
isinstance(pagination, tuple)
|
|
231
|
+
and len(pagination) == 2
|
|
232
|
+
and all(isinstance(i, int) for i in pagination)
|
|
233
|
+
):
|
|
234
|
+
raise ValueError(
|
|
235
|
+
ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION_1
|
|
236
|
+
)
|
|
237
|
+
except (ValueError, SyntaxError):
|
|
238
|
+
raise ValueError(ERROR_BENCHMARK_LIST_RECIPES_PAGINATION_VALIDATION_1)
|
|
239
|
+
else:
|
|
240
|
+
pagination = ()
|
|
241
|
+
|
|
91
242
|
recipes_list = api_get_all_recipe()
|
|
92
243
|
keyword = args.find.lower() if args.find else ""
|
|
93
|
-
pagination = literal_eval(args.pagination) if args.pagination else ()
|
|
94
244
|
|
|
95
245
|
if recipes_list:
|
|
96
246
|
filtered_recipes_list = filter_data(recipes_list, keyword, pagination)
|
|
@@ -103,6 +253,7 @@ def list_recipes(args) -> list | None:
|
|
|
103
253
|
|
|
104
254
|
except Exception as e:
|
|
105
255
|
print(f"[list_recipes]: {str(e)}")
|
|
256
|
+
return None
|
|
106
257
|
|
|
107
258
|
|
|
108
259
|
def view_recipe(args) -> None:
|
|
@@ -111,7 +262,7 @@ def view_recipe(args) -> None:
|
|
|
111
262
|
|
|
112
263
|
This function retrieves a specific recipe by calling the api_read_recipe function from the
|
|
113
264
|
moonshot.api module using the recipe name provided in the args.
|
|
114
|
-
It then displays the retrieved recipe using the
|
|
265
|
+
It then displays the retrieved recipe using the _display_recipes function.
|
|
115
266
|
|
|
116
267
|
Args:
|
|
117
268
|
args: A namespace object from argparse. It should have the following attribute:
|
|
@@ -119,8 +270,14 @@ def view_recipe(args) -> None:
|
|
|
119
270
|
|
|
120
271
|
Returns:
|
|
121
272
|
None
|
|
273
|
+
|
|
274
|
+
Raises:
|
|
275
|
+
TypeError: If the 'recipe' argument is not a string or is None.
|
|
122
276
|
"""
|
|
123
277
|
try:
|
|
278
|
+
if not isinstance(args.recipe, str) or not args.recipe or args.recipe is None:
|
|
279
|
+
raise TypeError(ERROR_BENCHMARK_VIEW_RECIPE_RECIPE_VALIDATION)
|
|
280
|
+
|
|
124
281
|
recipe_info = api_read_recipe(args.recipe)
|
|
125
282
|
_display_recipes([recipe_info])
|
|
126
283
|
except Exception as e:
|
|
@@ -148,46 +305,103 @@ def run_recipe(args) -> None:
|
|
|
148
305
|
|
|
149
306
|
Returns:
|
|
150
307
|
None
|
|
308
|
+
|
|
309
|
+
Raises:
|
|
310
|
+
TypeError: If any of the required arguments are not of the expected types or are None.
|
|
311
|
+
ValueError: If the 'recipes' or 'endpoints' arguments cannot be evaluated into lists of strings.
|
|
312
|
+
RuntimeError: If no results are found after running the recipes.
|
|
151
313
|
"""
|
|
152
314
|
try:
|
|
153
|
-
name
|
|
315
|
+
if not isinstance(args.name, str) or not args.name or args.name is None:
|
|
316
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION)
|
|
317
|
+
|
|
318
|
+
if (
|
|
319
|
+
not isinstance(args.recipes, str)
|
|
320
|
+
or not args.recipes
|
|
321
|
+
or args.recipes is None
|
|
322
|
+
):
|
|
323
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION)
|
|
324
|
+
|
|
325
|
+
if (
|
|
326
|
+
not isinstance(args.endpoints, str)
|
|
327
|
+
or not args.endpoints
|
|
328
|
+
or args.endpoints is None
|
|
329
|
+
):
|
|
330
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION)
|
|
331
|
+
|
|
332
|
+
if isinstance(args.num_of_prompts, bool) or not isinstance(
|
|
333
|
+
args.num_of_prompts, int
|
|
334
|
+
):
|
|
335
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION)
|
|
336
|
+
|
|
337
|
+
if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
|
|
338
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION)
|
|
339
|
+
|
|
340
|
+
if (
|
|
341
|
+
not isinstance(args.system_prompt, str)
|
|
342
|
+
or not args.system_prompt
|
|
343
|
+
or args.system_prompt is None
|
|
344
|
+
):
|
|
345
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_SYS_PROMPT_VALIDATION)
|
|
346
|
+
|
|
347
|
+
if (
|
|
348
|
+
not isinstance(args.runner_proc_module, str)
|
|
349
|
+
or not args.runner_proc_module
|
|
350
|
+
or args.runner_proc_module is None
|
|
351
|
+
):
|
|
352
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RUNNER_PROC_MOD_VALIDATION)
|
|
353
|
+
|
|
354
|
+
if (
|
|
355
|
+
not isinstance(args.result_proc_module, str)
|
|
356
|
+
or not args.result_proc_module
|
|
357
|
+
or args.result_proc_module is None
|
|
358
|
+
):
|
|
359
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RESULT_PROC_MOD_VALIDATION)
|
|
360
|
+
|
|
154
361
|
recipes = literal_eval(args.recipes)
|
|
362
|
+
if not (
|
|
363
|
+
isinstance(recipes, list) and all(isinstance(item, str) for item in recipes)
|
|
364
|
+
):
|
|
365
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1)
|
|
366
|
+
|
|
155
367
|
endpoints = literal_eval(args.endpoints)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
368
|
+
if not (
|
|
369
|
+
isinstance(endpoints, list)
|
|
370
|
+
and all(isinstance(item, str) for item in endpoints)
|
|
371
|
+
):
|
|
372
|
+
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1)
|
|
161
373
|
|
|
162
374
|
# Run the recipes with the defined endpoints
|
|
163
|
-
slugify_id = slugify(name, lowercase=True)
|
|
375
|
+
slugify_id = slugify(args.name, lowercase=True)
|
|
164
376
|
if slugify_id in api_get_all_runner_name():
|
|
165
377
|
rec_runner = api_load_runner(slugify_id)
|
|
166
378
|
else:
|
|
167
|
-
rec_runner = api_create_runner(name, endpoints)
|
|
379
|
+
rec_runner = api_create_runner(args.name, endpoints)
|
|
168
380
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
rec_runner.run_recipes(
|
|
381
|
+
async def run():
|
|
382
|
+
await rec_runner.run_recipes(
|
|
172
383
|
recipes,
|
|
173
|
-
num_of_prompts,
|
|
174
|
-
random_seed,
|
|
175
|
-
system_prompt,
|
|
176
|
-
runner_proc_module,
|
|
177
|
-
result_proc_module,
|
|
384
|
+
args.num_of_prompts,
|
|
385
|
+
args.random_seed,
|
|
386
|
+
args.system_prompt,
|
|
387
|
+
args.runner_proc_module,
|
|
388
|
+
args.result_proc_module,
|
|
178
389
|
)
|
|
179
|
-
|
|
180
|
-
|
|
390
|
+
await rec_runner.close()
|
|
391
|
+
|
|
392
|
+
loop = asyncio.get_event_loop()
|
|
393
|
+
loop.run_until_complete(run())
|
|
181
394
|
|
|
182
395
|
# Display results
|
|
183
396
|
runner_runs = api_get_all_run(rec_runner.id)
|
|
184
397
|
result_info = runner_runs[-1].get("results")
|
|
185
398
|
if result_info:
|
|
186
|
-
|
|
399
|
+
_show_recipe_results(
|
|
187
400
|
recipes, endpoints, result_info, result_info["metadata"]["duration"]
|
|
188
401
|
)
|
|
189
402
|
else:
|
|
190
|
-
raise RuntimeError(
|
|
403
|
+
raise RuntimeError(ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT)
|
|
404
|
+
|
|
191
405
|
except Exception as e:
|
|
192
406
|
print(f"[run_recipe]: {str(e)}")
|
|
193
407
|
|
|
@@ -207,11 +421,31 @@ def update_recipe(args) -> None:
|
|
|
207
421
|
|
|
208
422
|
Returns:
|
|
209
423
|
None
|
|
424
|
+
|
|
425
|
+
Raises:
|
|
426
|
+
ValueError: If the 'recipe' or 'update_values' arguments are not strings or are None.
|
|
427
|
+
ValueError: If the 'update_values' argument cannot be evaluated into a list of tuples.
|
|
210
428
|
"""
|
|
211
429
|
try:
|
|
430
|
+
if args.recipe is None or not isinstance(args.recipe, str) or not args.recipe:
|
|
431
|
+
raise ValueError(ERROR_BENCHMARK_UPDATE_RECIPE_RECIPE_VALIDATION)
|
|
432
|
+
|
|
433
|
+
if (
|
|
434
|
+
args.update_values is None
|
|
435
|
+
or not isinstance(args.update_values, str)
|
|
436
|
+
or not args.update_values
|
|
437
|
+
):
|
|
438
|
+
raise ValueError(ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION)
|
|
439
|
+
|
|
212
440
|
recipe = args.recipe
|
|
213
|
-
|
|
441
|
+
if literal_eval(args.update_values) and all(
|
|
442
|
+
isinstance(i, tuple) for i in literal_eval(args.update_values)
|
|
443
|
+
):
|
|
444
|
+
update_values = dict(literal_eval(args.update_values))
|
|
445
|
+
else:
|
|
446
|
+
raise ValueError(ERROR_BENCHMARK_UPDATE_RECIPE_UPDATE_VALUES_VALIDATION_1)
|
|
214
447
|
api_update_recipe(recipe, **update_values)
|
|
448
|
+
|
|
215
449
|
print("[update_recipe]: Recipe updated.")
|
|
216
450
|
except Exception as e:
|
|
217
451
|
print(f"[update_recipe]: {str(e)}")
|
|
@@ -232,6 +466,9 @@ def delete_recipe(args) -> None:
|
|
|
232
466
|
|
|
233
467
|
Returns:
|
|
234
468
|
None
|
|
469
|
+
|
|
470
|
+
Raises:
|
|
471
|
+
ValueError: If the 'recipe' argument is not a string or is None.
|
|
235
472
|
"""
|
|
236
473
|
# Confirm with the user before deleting a recipe
|
|
237
474
|
confirmation = console.input(
|
|
@@ -240,7 +477,11 @@ def delete_recipe(args) -> None:
|
|
|
240
477
|
if confirmation.lower() != "y":
|
|
241
478
|
console.print("[bold yellow]Recipe deletion cancelled.[/]")
|
|
242
479
|
return
|
|
480
|
+
|
|
243
481
|
try:
|
|
482
|
+
if args.recipe is None or not isinstance(args.recipe, str) or not args.recipe:
|
|
483
|
+
raise ValueError(ERROR_BENCHMARK_DELETE_RECIPE_RECIPE_VALIDATION)
|
|
484
|
+
|
|
244
485
|
api_delete_recipe(args.recipe)
|
|
245
486
|
print("[delete_recipe]: Recipe deleted.")
|
|
246
487
|
except Exception as e:
|
|
@@ -250,7 +491,7 @@ def delete_recipe(args) -> None:
|
|
|
250
491
|
# ------------------------------------------------------------------------------
|
|
251
492
|
# Helper functions: Display on cli
|
|
252
493
|
# ------------------------------------------------------------------------------
|
|
253
|
-
def
|
|
494
|
+
def _display_view_grading_scale_format(title: str, grading_scale: dict) -> str:
|
|
254
495
|
"""
|
|
255
496
|
Format the grading scale for display.
|
|
256
497
|
|
|
@@ -275,7 +516,7 @@ def display_view_grading_scale_format(title: str, grading_scale: dict) -> str:
|
|
|
275
516
|
return f"[blue]{title}[/blue]: nil"
|
|
276
517
|
|
|
277
518
|
|
|
278
|
-
def
|
|
519
|
+
def _display_view_statistics_format(title: str, stats: dict) -> str:
|
|
279
520
|
"""
|
|
280
521
|
Format the statistics for display.
|
|
281
522
|
|
|
@@ -348,10 +589,10 @@ def _display_recipes(recipes_list: list) -> None:
|
|
|
348
589
|
"Prompt Templates", prompt_templates
|
|
349
590
|
)
|
|
350
591
|
metrics_info = display_view_list_format("Metrics", metrics)
|
|
351
|
-
grading_scale_info =
|
|
592
|
+
grading_scale_info = _display_view_grading_scale_format(
|
|
352
593
|
"Grading Scale", grading_scale
|
|
353
594
|
)
|
|
354
|
-
stats_info =
|
|
595
|
+
stats_info = _display_view_statistics_format("Statistics", stats)
|
|
355
596
|
|
|
356
597
|
recipe_info = (
|
|
357
598
|
f"[red]id: {id}[/red]\n\n[blue]{name}[/blue]\n{description}\n\n"
|
|
@@ -364,7 +605,7 @@ def _display_recipes(recipes_list: list) -> None:
|
|
|
364
605
|
console.print(table)
|
|
365
606
|
|
|
366
607
|
|
|
367
|
-
def
|
|
608
|
+
def _show_recipe_results(recipes, endpoints, recipe_results, duration):
|
|
368
609
|
"""
|
|
369
610
|
Show the results of the recipe benchmarking.
|
|
370
611
|
|
|
@@ -384,7 +625,7 @@ def show_recipe_results(recipes, endpoints, recipe_results, duration):
|
|
|
384
625
|
"""
|
|
385
626
|
if recipe_results:
|
|
386
627
|
# Display recipe results
|
|
387
|
-
|
|
628
|
+
_generate_recipe_table(recipes, endpoints, recipe_results)
|
|
388
629
|
else:
|
|
389
630
|
console.print("[red]There are no results.[/red]")
|
|
390
631
|
|
|
@@ -394,7 +635,7 @@ def show_recipe_results(recipes, endpoints, recipe_results, duration):
|
|
|
394
635
|
console.print(run_stats)
|
|
395
636
|
|
|
396
637
|
|
|
397
|
-
def
|
|
638
|
+
def _generate_recipe_table(recipes: list, endpoints: list, results: dict) -> None:
|
|
398
639
|
"""
|
|
399
640
|
Generate and display a table of recipe results.
|
|
400
641
|
|
|
@@ -5,8 +5,17 @@ from rich.console import Console
|
|
|
5
5
|
from rich.table import Table
|
|
6
6
|
|
|
7
7
|
from moonshot.api import api_delete_result, api_get_all_result, api_read_result
|
|
8
|
-
from moonshot.integrations.cli.benchmark.cookbook import
|
|
9
|
-
from moonshot.integrations.cli.benchmark.recipe import
|
|
8
|
+
from moonshot.integrations.cli.benchmark.cookbook import _show_cookbook_results
|
|
9
|
+
from moonshot.integrations.cli.benchmark.recipe import _show_recipe_results
|
|
10
|
+
from moonshot.integrations.cli.cli_errors import (
|
|
11
|
+
ERROR_BENCHMARK_DELETE_RESULT_RESULT_VALIDATION,
|
|
12
|
+
ERROR_BENCHMARK_LIST_RESULTS_FIND_VALIDATION,
|
|
13
|
+
ERROR_BENCHMARK_LIST_RESULTS_PAGINATION_VALIDATION,
|
|
14
|
+
ERROR_BENCHMARK_LIST_RESULTS_PAGINATION_VALIDATION_1,
|
|
15
|
+
ERROR_BENCHMARK_VIEW_RESULT_METADATA_INVALID_VALIDATION,
|
|
16
|
+
ERROR_BENCHMARK_VIEW_RESULT_METADATA_VALIDATION,
|
|
17
|
+
ERROR_BENCHMARK_VIEW_RESULT_RESULT_FILENAME_VALIDATION,
|
|
18
|
+
)
|
|
10
19
|
from moonshot.integrations.cli.common.display_helper import (
|
|
11
20
|
display_view_list_format,
|
|
12
21
|
display_view_str_format,
|
|
@@ -23,23 +32,44 @@ def list_results(args) -> list | None:
|
|
|
23
32
|
"""
|
|
24
33
|
List all available results.
|
|
25
34
|
|
|
26
|
-
This function retrieves all available results by calling the
|
|
27
|
-
moonshot.api module. It then
|
|
28
|
-
message indicating that no results were found.
|
|
35
|
+
This function retrieves all available results by calling the api_get_all_result function from the
|
|
36
|
+
moonshot.api module. It then filters the results based on the provided keyword and pagination arguments.
|
|
37
|
+
If there are no results, it prints a message indicating that no results were found.
|
|
29
38
|
|
|
30
39
|
Args:
|
|
31
|
-
args:
|
|
32
|
-
|
|
33
|
-
|
|
40
|
+
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
41
|
+
find (str): Optional field to find result(s) with a keyword.
|
|
42
|
+
pagination (str): Optional field to paginate results.
|
|
34
43
|
|
|
35
44
|
Returns:
|
|
36
|
-
list | None: A list of
|
|
45
|
+
list | None: A list of results or None if there are no results.
|
|
37
46
|
"""
|
|
38
47
|
|
|
39
48
|
try:
|
|
49
|
+
if args.find is not None:
|
|
50
|
+
if not isinstance(args.find, str) or not args.find:
|
|
51
|
+
raise TypeError(ERROR_BENCHMARK_LIST_RESULTS_FIND_VALIDATION)
|
|
52
|
+
|
|
53
|
+
if args.pagination is not None:
|
|
54
|
+
if not isinstance(args.pagination, str) or not args.pagination:
|
|
55
|
+
raise TypeError(ERROR_BENCHMARK_LIST_RESULTS_PAGINATION_VALIDATION)
|
|
56
|
+
try:
|
|
57
|
+
pagination = literal_eval(args.pagination)
|
|
58
|
+
if not (
|
|
59
|
+
isinstance(pagination, tuple)
|
|
60
|
+
and len(pagination) == 2
|
|
61
|
+
and all(isinstance(i, int) for i in pagination)
|
|
62
|
+
):
|
|
63
|
+
raise ValueError(
|
|
64
|
+
ERROR_BENCHMARK_LIST_RESULTS_PAGINATION_VALIDATION_1
|
|
65
|
+
)
|
|
66
|
+
except (ValueError, SyntaxError):
|
|
67
|
+
raise ValueError(ERROR_BENCHMARK_LIST_RESULTS_PAGINATION_VALIDATION_1)
|
|
68
|
+
else:
|
|
69
|
+
pagination = ()
|
|
70
|
+
|
|
40
71
|
results_list = api_get_all_result()
|
|
41
72
|
keyword = args.find.lower() if args.find else ""
|
|
42
|
-
pagination = literal_eval(args.pagination) if args.pagination else ()
|
|
43
73
|
|
|
44
74
|
if results_list:
|
|
45
75
|
filtered_results_list = filter_data(results_list, keyword, pagination)
|
|
@@ -52,6 +82,7 @@ def list_results(args) -> list | None:
|
|
|
52
82
|
|
|
53
83
|
except Exception as e:
|
|
54
84
|
print(f"[list_results]: {str(e)}")
|
|
85
|
+
return None
|
|
55
86
|
|
|
56
87
|
|
|
57
88
|
def view_result(args) -> None:
|
|
@@ -60,24 +91,34 @@ def view_result(args) -> None:
|
|
|
60
91
|
|
|
61
92
|
This function retrieves a specific result by calling the api_read_result function from the
|
|
62
93
|
moonshot.api module using the result filename provided in the args.
|
|
63
|
-
It then checks
|
|
64
|
-
display_view_cookbook_result function. Otherwise, it uses the display_view_recipe_result function.
|
|
94
|
+
It then checks the metadata of the result to determine whether to display it as a cookbook or recipe result.
|
|
65
95
|
|
|
66
96
|
Args:
|
|
67
|
-
args
|
|
97
|
+
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
68
98
|
result_filename (str): The filename of the result to view.
|
|
69
99
|
|
|
70
100
|
Returns:
|
|
71
101
|
None
|
|
72
102
|
"""
|
|
73
103
|
try:
|
|
104
|
+
if (
|
|
105
|
+
not isinstance(args.result_filename, str)
|
|
106
|
+
or not args.result_filename
|
|
107
|
+
or args.result_filename is None
|
|
108
|
+
):
|
|
109
|
+
raise TypeError(ERROR_BENCHMARK_VIEW_RESULT_RESULT_FILENAME_VALIDATION)
|
|
110
|
+
|
|
74
111
|
result_info = api_read_result(args.result_filename)
|
|
75
|
-
if result_info
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
112
|
+
if isinstance(result_info, dict) and "metadata" in result_info:
|
|
113
|
+
if result_info["metadata"].get("cookbooks"):
|
|
114
|
+
_display_view_cookbook_result(result_info)
|
|
115
|
+
elif result_info["metadata"].get("recipes"):
|
|
116
|
+
_display_view_recipe_result(result_info)
|
|
117
|
+
else:
|
|
118
|
+
raise TypeError(ERROR_BENCHMARK_VIEW_RESULT_METADATA_INVALID_VALIDATION)
|
|
79
119
|
else:
|
|
80
|
-
|
|
120
|
+
raise TypeError(ERROR_BENCHMARK_VIEW_RESULT_METADATA_VALIDATION)
|
|
121
|
+
|
|
81
122
|
except Exception as e:
|
|
82
123
|
print(f"[view_result]: {str(e)}")
|
|
83
124
|
|
|
@@ -92,7 +133,7 @@ def delete_result(args) -> None:
|
|
|
92
133
|
prints an error message.
|
|
93
134
|
|
|
94
135
|
Args:
|
|
95
|
-
args
|
|
136
|
+
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
96
137
|
result (str): The identifier of the result to delete.
|
|
97
138
|
|
|
98
139
|
Returns:
|
|
@@ -105,7 +146,11 @@ def delete_result(args) -> None:
|
|
|
105
146
|
if confirmation.lower() != "y":
|
|
106
147
|
console.print("[bold yellow]Result deletion cancelled.[/]")
|
|
107
148
|
return
|
|
149
|
+
|
|
108
150
|
try:
|
|
151
|
+
if args.result is None or not isinstance(args.result, str) or not args.result:
|
|
152
|
+
raise ValueError(ERROR_BENCHMARK_DELETE_RESULT_RESULT_VALIDATION)
|
|
153
|
+
|
|
109
154
|
api_delete_result(args.result)
|
|
110
155
|
print("[delete_result]: Result deleted.")
|
|
111
156
|
except Exception as e:
|
|
@@ -123,7 +168,7 @@ def _display_results(results_list):
|
|
|
123
168
|
message indicating that no results were found.
|
|
124
169
|
|
|
125
170
|
Args:
|
|
126
|
-
results_list (list): A list of results. Each result is a dictionary with keys 'id' and '
|
|
171
|
+
results_list (list): A list of results. Each result is a dictionary with keys 'id' and 'metadata'.
|
|
127
172
|
|
|
128
173
|
Returns:
|
|
129
174
|
None
|
|
@@ -170,13 +215,12 @@ def _display_results(results_list):
|
|
|
170
215
|
console.print(table)
|
|
171
216
|
|
|
172
217
|
|
|
173
|
-
def
|
|
218
|
+
def _display_view_recipe_result(result_info):
|
|
174
219
|
"""
|
|
175
220
|
Display the recipe result.
|
|
176
221
|
|
|
177
|
-
This function takes the result
|
|
178
|
-
|
|
179
|
-
converted result info. Finally, it calls the show_recipe_results function from the
|
|
222
|
+
This function takes the result info as an argument. It retrieves the recipes, endpoints, and duration from the
|
|
223
|
+
result info. Finally, it calls the show_recipe_results function from the
|
|
180
224
|
moonshot.integrations.cli.benchmark.recipe module to display the recipe results.
|
|
181
225
|
|
|
182
226
|
Args:
|
|
@@ -188,16 +232,15 @@ def display_view_recipe_result(result_info):
|
|
|
188
232
|
recipes = result_info["metadata"]["recipes"]
|
|
189
233
|
endpoints = result_info["metadata"]["endpoints"]
|
|
190
234
|
duration = result_info["metadata"]["duration"]
|
|
191
|
-
|
|
235
|
+
_show_recipe_results(recipes, endpoints, result_info, duration)
|
|
192
236
|
|
|
193
237
|
|
|
194
|
-
def
|
|
238
|
+
def _display_view_cookbook_result(result_info):
|
|
195
239
|
"""
|
|
196
240
|
Display the cookbook result.
|
|
197
241
|
|
|
198
|
-
This function takes the result
|
|
199
|
-
|
|
200
|
-
converted result info. Finally, it calls the show_cookbook_results function from the
|
|
242
|
+
This function takes the result info as an argument. It retrieves the cookbooks, endpoints, and duration from the
|
|
243
|
+
result info. Finally, it calls the show_cookbook_results function from the
|
|
201
244
|
moonshot.integrations.cli.benchmark.cookbook module to display the cookbook results.
|
|
202
245
|
|
|
203
246
|
Args:
|
|
@@ -209,7 +252,7 @@ def display_view_cookbook_result(result_info):
|
|
|
209
252
|
cookbooks = result_info["metadata"]["cookbooks"]
|
|
210
253
|
endpoints = result_info["metadata"]["endpoints"]
|
|
211
254
|
duration = result_info["metadata"]["duration"]
|
|
212
|
-
|
|
255
|
+
_show_cookbook_results(cookbooks, endpoints, result_info, duration)
|
|
213
256
|
|
|
214
257
|
|
|
215
258
|
# ------------------------------------------------------------------------------
|