aiverify-moonshot 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiverify-moonshot
3
- Version: 0.5.1
3
+ Version: 0.6.1
4
4
  Summary: AI Verify advances Gen AI testing with Project Moonshot.
5
5
  Project-URL: Repository, https://github.com/aiverify-foundation/moonshot
6
6
  Project-URL: Documentation, https://aiverify-foundation.github.io/moonshot/
@@ -47,7 +47,7 @@ Description-Content-Type: text/markdown
47
47
 
48
48
  ![Moonshot Logo](https://github.com/aiverify-foundation/moonshot/raw/main/misc/aiverify-moonshot-logo.png)
49
49
 
50
- **Version 0.5.1**
50
+ **Version 0.6.1**
51
51
 
52
52
  A simple and modular tool to evaluate any LLM application.
53
53
 
@@ -6,14 +6,14 @@ moonshot/integrations/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
6
6
  moonshot/integrations/cli/__main__.py,sha256=0VnYSj2AayvDCZ3uXpldPcjMHt2Yd7BWojWzFOGSSl4,679
7
7
  moonshot/integrations/cli/active_session_cfg.py,sha256=n8hOFxFjvz26qbEFY4q7iPUZYrGLoeCmXJxmOb_xWUE,20
8
8
  moonshot/integrations/cli/cli.py,sha256=9tnzcxcSOjblxCUpyh3pK0ke0bLs3s-63OxXtYoZI2g,2769
9
- moonshot/integrations/cli/cli_errors.py,sha256=jJ15ngnVJjSByamtOE2Cx79k1mOJnAl36NbCjpPppeM,23672
9
+ moonshot/integrations/cli/cli_errors.py,sha256=ltQKnj9bawpVogQFOgLHbxL_9CcFQf3XOR6yQtdqGS4,24030
10
10
  moonshot/integrations/cli/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  moonshot/integrations/cli/benchmark/benchmark.py,sha256=QUxr6DU11-XeH6Y3j1uPsZsotshgy64G_cWNf0Rn2_U,6303
12
- moonshot/integrations/cli/benchmark/cookbook.py,sha256=rT4gK88LhATFxsAxWWYExtPVT09oWPHJtJ1HQkLT3yU,29444
12
+ moonshot/integrations/cli/benchmark/cookbook.py,sha256=_ShWLEb1R_G3zKPmRVbeykRQ5pS_fv4IwbVeLEqYYEM,30265
13
13
  moonshot/integrations/cli/benchmark/datasets.py,sha256=Uq5XMNWUp775sz9jCZUZHHmkumPFI7cHVRueHgWm70Q,8965
14
14
  moonshot/integrations/cli/benchmark/metrics.py,sha256=SHs-hIa4CIPyOJtxK2U4D6IRHy3ZNsRtZlAMGvF9Qxw,8310
15
- moonshot/integrations/cli/benchmark/recipe.py,sha256=uAYTBu1EwcCBdf9Iut6eboZ6nBt5_hDwBhru50mZer0,32407
16
- moonshot/integrations/cli/benchmark/result.py,sha256=TGlIuDkPB1Cfu1u2uyDsWtgQxTOoNdGDzc0vCnGC1zk,11152
15
+ moonshot/integrations/cli/benchmark/recipe.py,sha256=KW0h1Ynga_2yc-jOd-ULQSbO9R7zETOz3qn3T23bqh8,32920
16
+ moonshot/integrations/cli/benchmark/result.py,sha256=o6_yca1PqFtbjWgnUpK8v_y2z6zuxmB_ue6MXEevpAo,11223
17
17
  moonshot/integrations/cli/benchmark/run.py,sha256=HBztvG_Zkg1ZAWsFv0QDE43FaEmx92vTWc4h1U3VesU,7438
18
18
  moonshot/integrations/cli/benchmark/runner.py,sha256=Y4Vt6Qqn9QzsM6eLUM9m2_XKkW3ctu-2jMTSei_TDPU,7098
19
19
  moonshot/integrations/cli/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -34,7 +34,7 @@ moonshot/integrations/cli/utils/process_data.py,sha256=QVL5vp2_8ZgGicmCAdeYEHkeb
34
34
  moonshot/integrations/web_api/.env.dev,sha256=0z5_Ut8rF-UqFZtgjkH2qoqORhD5_nSs2w_OeX2SteI,182
35
35
  moonshot/integrations/web_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  moonshot/integrations/web_api/__main__.py,sha256=MdnLi_ZF-olAAEJwTPU1iGYFYwo-fNWNT2qfchkH3y4,2050
37
- moonshot/integrations/web_api/app.py,sha256=14_CWTSuLNyX3zH_vaqFUa4fugOUcOxLqx-IEv0-v34,3651
37
+ moonshot/integrations/web_api/app.py,sha256=Jr6mYvfjiPKMUWU58QxvYS-bpvkUotd728t6up3ZS-w,3651
38
38
  moonshot/integrations/web_api/container.py,sha256=DVkJG_qm7ItcG6tgMYOqIj07wpKhPWOOfy6-bEv72y4,5915
39
39
  moonshot/integrations/web_api/logging_conf.py,sha256=t3EGRV6tZhV732KXe8_Tiy0fiwVAWxZX5Tt8VTgrrfg,3388
40
40
  moonshot/integrations/web_api/log/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,11 +53,11 @@ moonshot/integrations/web_api/routes/recipe.py,sha256=WOcq4bm2LP87ovO4Op6cDbUPJ2
53
53
  moonshot/integrations/web_api/routes/redteam.py,sha256=t-jNot5_PkV6f5_WBorp1HL437NY5RZzxSE-2NfG0es,24541
54
54
  moonshot/integrations/web_api/routes/runner.py,sha256=NQdAmVIOnNgSESX3am6wAE0YLIxHYXlnQbh00_7-SD4,8438
55
55
  moonshot/integrations/web_api/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- moonshot/integrations/web_api/schemas/benchmark_runner_dto.py,sha256=nfNMt_9Xg0YAL5f93dZamu7DxSLvAsz8-tdA_DTCXQQ,322
56
+ moonshot/integrations/web_api/schemas/benchmark_runner_dto.py,sha256=IIn6KeMcwxTSlwXuCnOUhd3x24ucq-goV2brU1OvxT4,369
57
57
  moonshot/integrations/web_api/schemas/bookmark_create_dto.py,sha256=C78vG8UG02N7Cmt6RSuS8e4sX_G-MLCiAWT-cF5BE8s,374
58
- moonshot/integrations/web_api/schemas/cookbook_create_dto.py,sha256=00SPVw7lEpfY9yOFdt1XkvvNAzfFRd7d7CA90qguhuQ,670
58
+ moonshot/integrations/web_api/schemas/cookbook_create_dto.py,sha256=wXC0tu1Q8SpSI3Qk0xKPj1vKsOJEYmfPgU4rl6QopUY,826
59
59
  moonshot/integrations/web_api/schemas/cookbook_response_model.py,sha256=COLvaE4Hrz_w-C_HQkB7feztweIr0wkY9h8N6NKNIr8,332
60
- moonshot/integrations/web_api/schemas/dataset_create_dto.py,sha256=Jr_EbIgqR7K__LQXpXSocfYLE6oN7pGRFzWcDVcFaus,963
60
+ moonshot/integrations/web_api/schemas/dataset_create_dto.py,sha256=GRqIIlQZEpzzEXwAFcbDlxOuKg0JZ399axBjg34LMp8,915
61
61
  moonshot/integrations/web_api/schemas/dataset_response_dto.py,sha256=s5x4-UXEWccWhK42E0FPXiHG6VqjuFuph-2t5atEkg4,171
62
62
  moonshot/integrations/web_api/schemas/endpoint_create_dto.py,sha256=WS8AfRybrweoOgZx6K6jiNy1Z6J3IZS1PUNnrRxGKyM,678
63
63
  moonshot/integrations/web_api/schemas/endpoint_response_model.py,sha256=OmmM2uaPSgB2aqPFfkhseKkI5OKCKilXR19gDmwFlLc,321
@@ -74,12 +74,12 @@ moonshot/integrations/web_api/services/auto_red_team_test_manager.py,sha256=a_aB
74
74
  moonshot/integrations/web_api/services/auto_red_team_test_state.py,sha256=GRmvdYLwQdE8gGkYD9Sd4n__yEBajl2pRA_V0J2YObE,1952
75
75
  moonshot/integrations/web_api/services/base_service.py,sha256=_MaQEuBpRNNHXNPylZUGaUVCSA5a2jHi9NoKBpvIprs,172
76
76
  moonshot/integrations/web_api/services/benchmark_result_service.py,sha256=-oPvLL7b-pEAOeY0gwlngpgImklkUiwvPE6IJo83a7M,909
77
- moonshot/integrations/web_api/services/benchmark_test_manager.py,sha256=zsB8zTDUvH-hT1c-rmyh71uO9ZuIxYbUP3msh8Hdkm4,4024
77
+ moonshot/integrations/web_api/services/benchmark_test_manager.py,sha256=aPoB6hOfOYqsDliiIzZ0y6cCI0mPDXLK21j9fHXm10U,4076
78
78
  moonshot/integrations/web_api/services/benchmark_test_state.py,sha256=MyhTxpAhhP66JF0ua1SMc_IIeIjDxQY5swOXv9cmYaY,1887
79
79
  moonshot/integrations/web_api/services/benchmarking_service.py,sha256=lJZeNTqxEPBLrZNX3Z9JIilgwetywSkv0deQkcb8mQs,1257
80
80
  moonshot/integrations/web_api/services/bookmark_service.py,sha256=jI9nXs1hjzO0CLG2LKaXSzDApLThkfCvPUkaNNV9A5A,3546
81
81
  moonshot/integrations/web_api/services/context_strategy_service.py,sha256=6YKnnG8JlE_1nlnr4Hq7rgz-sxI6oQglK0STaWPFQxQ,710
82
- moonshot/integrations/web_api/services/cookbook_service.py,sha256=AU2J_YCLIrRlufPgboZ27f89rVVR1zJHXXHwusf_ipc,8877
82
+ moonshot/integrations/web_api/services/cookbook_service.py,sha256=37iJZn4ybe9tugBWB99g1SAN1YUtkmaq2mLQWj_HBQo,8736
83
83
  moonshot/integrations/web_api/services/dataset_service.py,sha256=ZWb3FqyDkA0C9qhlQ3X_zR0ohAlwlLsJi-mgKLvXpnI,2407
84
84
  moonshot/integrations/web_api/services/endpoint_service.py,sha256=N5SXNAh44UNeBpMhA9baL0VZoTx4sHzpy4y7-Ch8O4E,2395
85
85
  moonshot/integrations/web_api/services/metric_service.py,sha256=xWC5Dk8aiU7tuHsxYedTTrEkbA3Ug1pV2nbaBas6cAg,456
@@ -94,7 +94,7 @@ moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py,sha256=ToyyC
94
94
  moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py,sha256=MOs_1CKpNh2m3JUAEoJfmZOBivk80DNtSnRuTCJgzJ4,350
95
95
  moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py,sha256=JRczi3vCq6oPfOddPrF4OCdyHQYAVxgPWK-qOJxElKg,350
96
96
  moonshot/integrations/web_api/temp/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
- moonshot/integrations/web_api/types/types.py,sha256=AN0Xf61lx2c5AFAYoXA8mVL5iufVBpwYlIPdo8gv-ls,2395
97
+ moonshot/integrations/web_api/types/types.py,sha256=zy1Jlqgju_F7Gb4SaJf7O70egH98D0b4DDCZ384HG2E,2408
98
98
  moonshot/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
99
  moonshot/src/messages_constants.py,sha256=usbvwitgRdOVY0ARdBbh9uiLNUb6WCdCuSoa64tVMhM,6936
100
100
  moonshot/src/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -102,7 +102,7 @@ moonshot/src/api/api_bookmark.py,sha256=gg7uJU7ixZtmJ99S3xdgQgcznkY8SjoJaTBkSsno
102
102
  moonshot/src/api/api_connector.py,sha256=Q_of-aHPuWkbefMJq4uXctJl89G2Tt6J_HfSuf1hE6g,2234
103
103
  moonshot/src/api/api_connector_endpoint.py,sha256=lwfhlWNBJ6QotqffmURtjRmxfzbBlSIAZupeSpMt9VU,5584
104
104
  moonshot/src/api/api_context_strategy.py,sha256=uRIfNjKJ_Wk9nSrvbPRfrdQLpG0K6kH9rl5tmmHui40,2151
105
- moonshot/src/api/api_cookbook.py,sha256=KzGCcJjpAs92FZBloojLbmi5nqK3hd_zX8m5jYWKMMM,5715
105
+ moonshot/src/api/api_cookbook.py,sha256=V05abHvzElrO7LkSyhOMcAHEfCfIgopd6L0cSSO3Dro,6722
106
106
  moonshot/src/api/api_dataset.py,sha256=i2KwnZ-6fTm_tyn8cRw8iesrGi7_Nh0-1bFuN7m0TVo,4066
107
107
  moonshot/src/api/api_environment_variables.py,sha256=wRx6rm95ItyL_uKUAYfSjcPZNbRxKl1GGS4PpWcTE1s,712
108
108
  moonshot/src/api/api_metrics.py,sha256=x5DiysTYQsMmcAS2y2XpgvrPobZk7GT2rhO-MaIRun4,1603
@@ -125,8 +125,8 @@ moonshot/src/connectors_endpoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
125
125
  moonshot/src/connectors_endpoints/connector_endpoint.py,sha256=3U4030gKhvDt4e7VD3lac2FG0kxGJ0DSLPTAUKYOPEk,10018
126
126
  moonshot/src/connectors_endpoints/connector_endpoint_arguments.py,sha256=0v9mUHki4l22CK8o8UjATAsFDza9Lutbh4QplLiDXs8,2434
127
127
  moonshot/src/cookbooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
- moonshot/src/cookbooks/cookbook.py,sha256=LjmbJtFwIPhupQlx6LjjIQLMfpwcWZKurduS8MBgqHI,9661
129
- moonshot/src/cookbooks/cookbook_arguments.py,sha256=iVp3x1-wi_RL7peTSr5TWbXnjuB_w7Uj29kJrQg48Y0,1210
128
+ moonshot/src/cookbooks/cookbook.py,sha256=DdZwRGx5-xTDIKcXtZRpp7Qb9Mm9dNGwXWLQXoQrBBo,10412
129
+ moonshot/src/cookbooks/cookbook_arguments.py,sha256=SmNG8D5qN2K2dcImDaSBPHsna0Gy60ZR49_eTKEsvVU,1445
130
130
  moonshot/src/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
131
  moonshot/src/datasets/dataset.py,sha256=-_uhjR7zi50nkLu1WWlPCCWr14VwFUDfhTeeBHOhb70,14236
132
132
  moonshot/src/datasets/dataset_arguments.py,sha256=rUcxxo2WTcHhLLV-WoixjOfT_Ju7hFCq811_ctjegt8,1751
@@ -154,7 +154,7 @@ moonshot/src/results/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
154
154
  moonshot/src/results/result.py,sha256=o56SdhYH-XVfpeeKhN495dJPkU035MmTjRUx48q53lo,4527
155
155
  moonshot/src/results/result_arguments.py,sha256=mTR7yajY72PFglfAaa1ajJfvYNV4IBGLXS4VaD53-8c,1334
156
156
  moonshot/src/runners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
157
- moonshot/src/runners/runner.py,sha256=nnYiMHZUSu35jzHoeiEqADhc7iHLKnViyUbd_Qg8WZs,21203
157
+ moonshot/src/runners/runner.py,sha256=7xp4GwiN4fO-ib7CahkaYXoVBB40cuVNJd5DxJxrYsk,21362
158
158
  moonshot/src/runners/runner_arguments.py,sha256=Bg4OPSmgr9jZKNAwPH0T3epEHw-6qGrflszFc6oMyEU,1640
159
159
  moonshot/src/runners/runner_type.py,sha256=jOfnAnaCYp-rPTRJXhM8hin_dinlR0sMwmimQXvLcJ0,100
160
160
  moonshot/src/runs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -172,9 +172,9 @@ moonshot/src/utils/import_modules.py,sha256=T9zTN59PFnvY2rjyWhSV9KSIAHxWV1pyBemF
172
172
  moonshot/src/utils/log.py,sha256=YNgD7Eh2OT36XlmVBKCGUTAh9TRp4Akfe4kDdvHASgs,2502
173
173
  moonshot/src/utils/pagination.py,sha256=5seymyRoqyENIhKllAatr1T91kMCGFslcvRnJHyMSvc,814
174
174
  moonshot/src/utils/timeit.py,sha256=TvuF0w8KWhp0oZFY0cUU3UY0xlGKjchb0OkfYfgVTlc,866
175
- aiverify_moonshot-0.5.1.dist-info/METADATA,sha256=EtBhJ0bmVoTktwS_p6OUI9JMgY6-gjmhUWfGK3Em7Vk,12419
176
- aiverify_moonshot-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
177
- aiverify_moonshot-0.5.1.dist-info/licenses/AUTHORS.md,sha256=mmAbe3i3sT8JZHJMBhxp3i1xRehV0g7WB4T_eyIBuBs,59
178
- aiverify_moonshot-0.5.1.dist-info/licenses/LICENSE.md,sha256=53izDRmJZZCjpYGfyLqlxnGQN-aNWBxasuzuMXC5Ias,11347
179
- aiverify_moonshot-0.5.1.dist-info/licenses/NOTICES.md,sha256=vS1zZYAnGjCJdwQ13xv3b2zc30wOS98ZnCKluT-AhHs,123266
180
- aiverify_moonshot-0.5.1.dist-info/RECORD,,
175
+ aiverify_moonshot-0.6.1.dist-info/METADATA,sha256=Um1dy4p7R1ZqYm9X_wnmzsVi2qclr6trbA11ijKYiRs,12419
176
+ aiverify_moonshot-0.6.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
177
+ aiverify_moonshot-0.6.1.dist-info/licenses/AUTHORS.md,sha256=mmAbe3i3sT8JZHJMBhxp3i1xRehV0g7WB4T_eyIBuBs,59
178
+ aiverify_moonshot-0.6.1.dist-info/licenses/LICENSE.md,sha256=53izDRmJZZCjpYGfyLqlxnGQN-aNWBxasuzuMXC5Ias,11347
179
+ aiverify_moonshot-0.6.1.dist-info/licenses/NOTICES.md,sha256=vS1zZYAnGjCJdwQ13xv3b2zc30wOS98ZnCKluT-AhHs,123266
180
+ aiverify_moonshot-0.6.1.dist-info/RECORD,,
@@ -37,7 +37,8 @@ from moonshot.integrations.cli.cli_errors import (
37
37
  ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1,
38
38
  ERROR_BENCHMARK_RUN_COOKBOOK_NAME_VALIDATION,
39
39
  ERROR_BENCHMARK_RUN_COOKBOOK_NO_RESULT,
40
- ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION,
40
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
41
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
41
42
  ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION,
42
43
  ERROR_BENCHMARK_RUN_COOKBOOK_RESULT_PROC_MOD_VALIDATION,
43
44
  ERROR_BENCHMARK_RUN_COOKBOOK_RUNNER_PROC_MOD_VALIDATION,
@@ -212,11 +213,12 @@ def run_cookbook(args) -> None:
212
213
  The cookbooks are run against the specified endpoints, and the results are processed and displayed.
213
214
 
214
215
  Args:
215
- args: A namespace object from argparse. It should have the following attributes:
216
+ args (argparse.Namespace): The arguments provided to the command line interface.
217
+ Expected keys are:
216
218
  name (str): The name of the cookbook runner.
217
219
  cookbooks (str): A string representation of a list of cookbooks to run.
218
220
  endpoints (str): A string representation of a list of endpoints to run.
219
- num_of_prompts (int): The number of prompts to run.
221
+ prompt_selection_percentage (int): The percentage of prompts to run.
220
222
  random_seed (int): The random seed number for reproducibility.
221
223
  system_prompt (str): The system prompt to use.
222
224
  runner_proc_module (str): The runner processing module to use.
@@ -248,10 +250,19 @@ def run_cookbook(args) -> None:
248
250
  ):
249
251
  raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION)
250
252
 
251
- if isinstance(args.num_of_prompts, bool) or not isinstance(
252
- args.num_of_prompts, int
253
+ if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
254
+ args.prompt_selection_percentage, int
255
+ ):
256
+ raise TypeError(
257
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION
258
+ )
259
+ elif (
260
+ args.prompt_selection_percentage < 1
261
+ or args.prompt_selection_percentage > 100
253
262
  ):
254
- raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION)
263
+ raise ValueError(
264
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
265
+ )
255
266
 
256
267
  if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
257
268
  raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION)
@@ -297,7 +308,7 @@ def run_cookbook(args) -> None:
297
308
  async def run():
298
309
  await cb_runner.run_cookbooks(
299
310
  cookbooks,
300
- args.num_of_prompts,
311
+ args.prompt_selection_percentage,
301
312
  args.random_seed,
302
313
  args.system_prompt,
303
314
  args.runner_proc_module,
@@ -436,9 +447,20 @@ def _display_cookbooks(cookbooks_list):
436
447
  table.add_column("Cookbook", justify="left", width=78)
437
448
  table.add_column("Contains", justify="left", width=20, overflow="fold")
438
449
  for idx, cookbook in enumerate(cookbooks_list, 1):
439
- id, name, description, recipes, *other_args = cookbook.values()
450
+ (
451
+ id,
452
+ name,
453
+ tags,
454
+ categories,
455
+ description,
456
+ recipes,
457
+ *other_args,
458
+ ) = cookbook.values()
440
459
  idx = cookbook.get("idx", idx)
441
- cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n{description}"
460
+ cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n\n{description}"
461
+ cookbook_info += (
462
+ f"\n\n[blue]Tags: {tags}[/blue]\n[blue]Categories: {categories}[/blue]\n"
463
+ )
442
464
  recipes_info = display_view_list_format("Recipes", recipes)
443
465
  table.add_section()
444
466
  table.add_row(str(idx), cookbook_info, recipes_info)
@@ -459,11 +481,11 @@ def _display_view_cookbook(cookbook_info):
459
481
  Returns:
460
482
  None
461
483
  """
462
- id, name, description, recipes = cookbook_info.values()
484
+ id, name, tags, categories, description, recipes = cookbook_info.values()
463
485
  recipes_list = api_read_recipes(recipes)
464
486
  if recipes_list:
465
487
  table = Table(
466
- title=f'Cookbook "{name}"',
488
+ title=f'Cookbook: "{name}"\n Tags: {tags}\n Categories: {categories}\n',
467
489
  show_lines=True,
468
490
  expand=True,
469
491
  header_style="bold",
@@ -471,6 +493,7 @@ def _display_view_cookbook(cookbook_info):
471
493
  table.add_column("No.", width=2)
472
494
  table.add_column("Recipe", justify="left", width=78)
473
495
  table.add_column("Contains", justify="left", width=20, overflow="fold")
496
+
474
497
  for recipe_id, recipe in enumerate(recipes_list, 1):
475
498
  (
476
499
  id,
@@ -718,7 +741,11 @@ run_cookbook_args.add_argument("name", type=str, help="Name of cookbook runner")
718
741
  run_cookbook_args.add_argument("cookbooks", type=str, help="List of cookbooks to run")
719
742
  run_cookbook_args.add_argument("endpoints", type=str, help="List of endpoints to run")
720
743
  run_cookbook_args.add_argument(
721
- "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
744
+ "-n",
745
+ "--prompt_selection_percentage",
746
+ type=int,
747
+ default=100,
748
+ help="Percentage of prompts to run",
722
749
  )
723
750
  run_cookbook_args.add_argument(
724
751
  "-r", "--random_seed", type=int, default=0, help="Random seed number"
@@ -40,7 +40,8 @@ from moonshot.integrations.cli.cli_errors import (
40
40
  ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1,
41
41
  ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION,
42
42
  ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT,
43
- ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION,
43
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
44
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
44
45
  ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION,
45
46
  ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION,
46
47
  ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1,
@@ -293,11 +294,12 @@ def run_recipe(args) -> None:
293
294
  The recipes are run against the specified endpoints, and the results are processed and displayed.
294
295
 
295
296
  Args:
296
- args: A namespace object from argparse. It should have the following attributes:
297
+ args (argparse.Namespace): The arguments provided to the command line interface.
298
+ Expected keys are:
297
299
  name (str): The name of the recipe runner.
298
300
  recipes (str): A string representation of a list of recipes to run.
299
301
  endpoints (str): A string representation of a list of endpoints to run.
300
- num_of_prompts (int): The number of prompts to run.
302
+ prompt_selection_percentage (int): The percentage of prompts to run.
301
303
  random_seed (int): The random seed number for reproducibility.
302
304
  system_prompt (str): The system prompt to use.
303
305
  runner_proc_module (str): The runner processing module to use.
@@ -329,10 +331,19 @@ def run_recipe(args) -> None:
329
331
  ):
330
332
  raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION)
331
333
 
332
- if isinstance(args.num_of_prompts, bool) or not isinstance(
333
- args.num_of_prompts, int
334
+ if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
335
+ args.prompt_selection_percentage, int
336
+ ):
337
+ raise TypeError(
338
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION
339
+ )
340
+ elif (
341
+ args.prompt_selection_percentage < 1
342
+ or args.prompt_selection_percentage > 100
334
343
  ):
335
- raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION)
344
+ raise ValueError(
345
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
346
+ )
336
347
 
337
348
  if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
338
349
  raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION)
@@ -377,7 +388,7 @@ def run_recipe(args) -> None:
377
388
  async def run():
378
389
  await rec_runner.run_recipes(
379
390
  recipes,
380
- args.num_of_prompts,
391
+ args.prompt_selection_percentage,
381
392
  args.random_seed,
382
393
  args.system_prompt,
383
394
  args.runner_proc_module,
@@ -809,7 +820,11 @@ run_recipe_args.add_argument("name", type=str, help="Name of recipe runner")
809
820
  run_recipe_args.add_argument("recipes", type=str, help="List of recipes to run")
810
821
  run_recipe_args.add_argument("endpoints", type=str, help="List of endpoints to run")
811
822
  run_recipe_args.add_argument(
812
- "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
823
+ "-n",
824
+ "--prompt_selection_percentage",
825
+ type=int,
826
+ default=100,
827
+ help="Percentage of prompts to run",
813
828
  )
814
829
  run_recipe_args.add_argument(
815
830
  "-r", "--random_seed", type=int, default=0, help="Random seed number"
@@ -190,7 +190,7 @@ def _display_results(results_list):
190
190
  recipes = metadata["recipes"]
191
191
  cookbooks = metadata["cookbooks"]
192
192
  endpoints = metadata["endpoints"]
193
- num_of_prompts = metadata["num_of_prompts"]
193
+ prompt_selection_percentage = metadata["prompt_selection_percentage"]
194
194
  random_seed = metadata["random_seed"]
195
195
  system_prompt = metadata["system_prompt"]
196
196
  idx = result.get("idx", idx)
@@ -200,7 +200,9 @@ def _display_results(results_list):
200
200
  recipes_info = display_view_list_format("Recipes", recipes)
201
201
  cookbooks_info = display_view_list_format("Cookbooks", cookbooks)
202
202
  endpoints_info = display_view_list_format("Endpoints", endpoints)
203
- prompts_info = display_view_str_format("Number of Prompts", num_of_prompts)
203
+ prompts_info = display_view_str_format(
204
+ "Prompt Selection Percentage", prompt_selection_percentage
205
+ )
204
206
  seed_info = display_view_str_format("Seed", random_seed)
205
207
  system_prompt_info = display_view_str_format("System Prompt", system_prompt)
206
208
 
@@ -52,8 +52,11 @@ ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION = (
52
52
  ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1 = (
53
53
  "The 'endpoints' argument must evaluate to a list of strings."
54
54
  )
55
- ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION = (
56
- "The 'num_of_prompts' argument must be an integer."
55
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
56
+ "The 'prompt_selection_percentage' argument must be an integer."
57
+ )
58
+ ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
59
+ "The 'prompt_selection_percentage' argument must be between 1 - 100."
57
60
  )
58
61
  ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION = (
59
62
  "The 'random_seed' argument must be an integer."
@@ -278,8 +281,11 @@ ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION = (
278
281
  ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1 = (
279
282
  "The 'endpoints' argument must evaluate to a list of strings."
280
283
  )
281
- ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION = (
282
- "The 'num_of_prompts' argument must be an integer."
284
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
285
+ "The 'prompt_selection_percentage' argument must be an integer."
286
+ )
287
+ ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
288
+ "The 'prompt_selection_percentage' argument must be between 1 - 100."
283
289
  )
284
290
  ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION = (
285
291
  "The 'random_seed' argument must be an integer."
@@ -71,7 +71,7 @@ def create_app(cfg: providers.Configuration) -> CustomFastAPI:
71
71
  }
72
72
 
73
73
  app: CustomFastAPI = CustomFastAPI(
74
- title="Project Moonshot", version="0.5.1", **app_kwargs
74
+ title="Project Moonshot", version="0.6.1", **app_kwargs
75
75
  )
76
76
 
77
77
  if cfg.cors.enabled():
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, ConfigDict
1
+ from pydantic import BaseModel, ConfigDict, Field
2
2
 
3
3
 
4
4
  class BenchmarkRunnerDTO(BaseModel):
@@ -7,7 +7,7 @@ class BenchmarkRunnerDTO(BaseModel):
7
7
  description: str
8
8
  endpoints: list[str]
9
9
  inputs: list[str]
10
- num_of_prompts: int
10
+ prompt_selection_percentage: int = Field(..., ge=1, le=100)
11
11
  random_seed: int
12
12
  system_prompt: str
13
13
  runner_processing_module: str
@@ -9,6 +9,8 @@ class CookbookCreateDTO(CookbookPydanticModel):
9
9
  id: Optional[str] = None
10
10
  name: str = Field(..., min_length=1)
11
11
  description: Optional[str] = Field(default="", min_length=1)
12
+ tags: Optional[list[str]] = []
13
+ categories: Optional[list[str]] = []
12
14
  recipes: list[str] = Field(..., min_length=1)
13
15
 
14
16
 
@@ -16,4 +18,6 @@ class CookbookUpdateDTO(CookbookPydanticModel):
16
18
  id: Optional[str] = None
17
19
  name: Optional[str] = Field(default=None, min_length=1)
18
20
  description: Optional[str] = Field(default=None, min_length=1)
21
+ tags: Optional[list[str]] = None
22
+ categories: Optional[list[str]] = None
19
23
  recipes: Optional[list[str]] = Field(default=None, min_length=1)
@@ -1,7 +1,6 @@
1
- from typing import Optional
1
+ from typing import Any, Optional
2
2
 
3
3
  from pydantic import Field
4
- from pyparsing import Iterator
5
4
 
6
5
  from moonshot.src.datasets.dataset_arguments import (
7
6
  DatasetArguments as DatasetPydanticModel,
@@ -10,7 +9,7 @@ from moonshot.src.datasets.dataset_arguments import (
10
9
 
11
10
  class CSV_Dataset_DTO(DatasetPydanticModel):
12
11
  id: Optional[str] = None # Not a required from user
13
- examples: Optional[Iterator[dict]] = None # Not a required from user
12
+ examples: Optional[Any] = None # Not a required from user
14
13
  name: str = Field(..., min_length=1)
15
14
  description: str = Field(default="", min_length=1)
16
15
  license: Optional[str] = ""
@@ -20,7 +19,7 @@ class CSV_Dataset_DTO(DatasetPydanticModel):
20
19
 
21
20
  class HF_Dataset_DTO(DatasetPydanticModel):
22
21
  id: Optional[str] = None # Not a required from user
23
- examples: Optional[Iterator[dict]] = None # Not a required from user
22
+ examples: Optional[Any] = None # Not a required from user
24
23
  name: str = Field(..., min_length=1)
25
24
  description: str = Field(default="", min_length=1)
26
25
  license: Optional[str] = ""
@@ -60,14 +60,14 @@ class BenchmarkTestManager(BaseService):
60
60
  if benchmark_type == BenchmarkCollectionType.COOKBOOK:
61
61
  async_run = moonshot_runner.run_cookbooks(
62
62
  cookbooks=benchmark_input_data.inputs,
63
- num_of_prompts=benchmark_input_data.num_of_prompts,
63
+ prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
64
64
  random_seed=benchmark_input_data.random_seed,
65
65
  system_prompt=benchmark_input_data.system_prompt,
66
66
  )
67
67
  else:
68
68
  async_run = moonshot_runner.run_recipes(
69
69
  recipes=benchmark_input_data.inputs,
70
- num_of_prompts=benchmark_input_data.num_of_prompts,
70
+ prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
71
71
  random_seed=benchmark_input_data.random_seed,
72
72
  system_prompt=benchmark_input_data.system_prompt,
73
73
  )
@@ -71,7 +71,7 @@ class CookbookService(BaseService):
71
71
  cookbook.total_dataset_in_cookbook,
72
72
  ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
73
73
 
74
- if tags and cookbooks_recipe_has_tags(tags, cookbook):
74
+ if tags and cookbook_has_tags(tags, cookbook):
75
75
  if cookbook not in cookbooks_list:
76
76
  cookbooks_list.append(cookbook)
77
77
  if count:
@@ -80,7 +80,7 @@ class CookbookService(BaseService):
80
80
  cookbook.total_dataset_in_cookbook,
81
81
  ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
82
82
 
83
- if categories and cookbooks_recipe_has_categories(categories, cookbook):
83
+ if categories and cookbook_has_categories(categories, cookbook):
84
84
  if cookbook not in cookbooks_list:
85
85
  cookbooks_list.append(cookbook)
86
86
  if count:
@@ -89,10 +89,16 @@ class CookbookService(BaseService):
89
89
  cookbook.total_dataset_in_cookbook,
90
90
  ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
91
91
 
92
- if categories_excluded and cookbooks_recipe_has_categories(
93
- categories_excluded, cookbook
94
- ):
95
- cookbooks_list.remove(cookbook)
92
+ if categories_excluded:
93
+ excluded_categories_set = set(
94
+ category.lower() for category in categories_excluded.split(",")
95
+ )
96
+ cookbook_categories_set = set(
97
+ category.lower() for category in cookbook.categories
98
+ )
99
+ # Exclude only if all categories in the cookbook are in the excluded list
100
+ if cookbook_categories_set.issubset(excluded_categories_set):
101
+ cookbooks_list.remove(cookbook)
96
102
 
97
103
  for cookbook in cookbooks_list:
98
104
  cookbook.required_config = cookbook_metrics_dependency(cookbook)
@@ -160,50 +166,40 @@ def get_total_prompt_and_dataset_in_cookbook(cookbook: Cookbook) -> tuple[int, i
160
166
 
161
167
 
162
168
  @staticmethod
163
- def cookbooks_recipe_has_tags(tags: str, cookbook: Cookbook) -> bool:
169
+ def cookbook_has_tags(tags: str, cookbook: Cookbook) -> bool:
164
170
  """
165
- Check if any recipe in a cookbook has the specified tags.
171
+ Check if a cookbook has the specified tags.
166
172
 
167
173
  Args:
168
- tags (str): The tags to check for in the cookbook's recipes.
169
- cookbook (Cookbook): The cookbook object containing the recipe IDs.
174
+ tags (str): The tags to check for in the cookbook.
175
+ cookbook (Cookbook): The cookbook object.
170
176
 
171
177
  Returns:
172
- bool: True if any recipe in the cookbook has the specified tags, False otherwise.
178
+ bool: True if the cookbook has the specified tags, False otherwise.
173
179
  """
174
- recipe_ids = cookbook.recipes
175
- recipes = moonshot_api.api_read_recipes(recipe_ids)
176
- for recipe in recipes:
177
- recipe = Recipe(**recipe)
178
- if tags in recipe.tags:
179
- return True
180
- return False
180
+ tags_list = [tag.lower() for tag in tags.split(",")]
181
+ return any(tag in [ctag.lower() for ctag in cookbook.tags] for tag in tags_list)
181
182
 
182
183
 
183
184
  @staticmethod
184
- def cookbooks_recipe_has_categories(categories: str, cookbook: Cookbook) -> bool:
185
+ def cookbook_has_categories(categories: str, cookbook: Cookbook) -> bool:
185
186
  """
186
- Check if any recipe in a cookbook has the specified categories.
187
+ Check if a cookbook has the specified categories.
187
188
 
188
189
  Args:
189
- categories (str): The categories to check for in the cookbook's recipes.
190
- cookbook (Cookbook): The cookbook object containing the recipe IDs.
191
- exclude_categories (str): The categories to exclude
190
+ categories (str): The categories to check for in the cookbook.
191
+ cookbook (Cookbook): The cookbook object.
192
192
 
193
193
  Returns:
194
- bool: True if any recipe in the cookbook has the specified categories, False otherwise.
194
+ bool: True if the cookbook has the specified categories, False otherwise.
195
195
  """
196
- recipe_ids = cookbook.recipes
197
196
  categories_list = [category.lower() for category in categories.split(",")]
198
- recipes = moonshot_api.api_read_recipes(recipe_ids)
199
- for recipe in recipes:
200
- recipe = Recipe(**recipe)
201
- if any(
202
- category in [rcat.lower() for rcat in recipe.categories]
203
- for category in categories_list
204
- ):
205
- return True
206
- return False
197
+ return any(
198
+ category in [ccat.lower() for ccat in cookbook.categories]
199
+ for category in categories_list
200
+ )
201
+
202
+
207
203
 
208
204
 
209
205
  @staticmethod
@@ -85,7 +85,7 @@ class ResultMetadata(TypedDict):
85
85
  recipes: List[str]
86
86
  cookbooks: List[str]
87
87
  endpoints: List[str]
88
- num_of_prompts: int
88
+ prompt_selection_percentage: int
89
89
  status: str
90
90
 
91
91
 
@@ -2,6 +2,7 @@ from pydantic import conlist, validate_call
2
2
 
3
3
  from moonshot.src.cookbooks.cookbook import Cookbook
4
4
  from moonshot.src.cookbooks.cookbook_arguments import CookbookArguments
5
+ from moonshot.src.recipes.recipe import Recipe
5
6
 
6
7
 
7
8
  # ------------------------------------------------------------------------------
@@ -20,6 +21,8 @@ def api_create_cookbook(name: str, description: str, recipes: list[str]) -> str:
20
21
  Args:
21
22
  name (str): The name of the new cookbook.
22
23
  description (str): A brief description of the new cookbook.
24
+ tags (list[str]): A list of tags associated with the cookbook.
25
+ categories (list[str]): A list of categories the cookbook belongs to.
23
26
  recipes (list[str]): A list of recipes to be included in the new cookbook.
24
27
 
25
28
  Returns:
@@ -29,10 +32,13 @@ def api_create_cookbook(name: str, description: str, recipes: list[str]) -> str:
29
32
  # We do not need to provide the id.
30
33
  # This is because during creation:
31
34
  # 1. the id is slugify from the name and stored as id.
35
+ # We do not need to provide tags and categories as they will be generated based on the recipes selected.
32
36
  cb_args = CookbookArguments(
33
37
  id="",
34
38
  name=name,
35
39
  description=description,
40
+ tags=[],
41
+ categories=[],
36
42
  recipes=recipes,
37
43
  )
38
44
  return Cookbook.create(cb_args)
@@ -103,6 +109,20 @@ def api_update_cookbook(cb_id: str, **kwargs) -> bool:
103
109
  if hasattr(existing_cookbook, key):
104
110
  setattr(existing_cookbook, key, value)
105
111
 
112
+ # Update the cookbook's categories and tags if any of the recipe(s) are changed
113
+ if "recipes" in kwargs:
114
+ consolidated_tags = set()
115
+ consolidated_categories = set()
116
+ for key, value in kwargs.items():
117
+ if key == "recipes":
118
+ for recipe_id in value:
119
+ recipe = Recipe.read(recipe_id)
120
+ consolidated_tags.update(recipe.tags)
121
+ consolidated_categories.update(recipe.categories)
122
+ # Consolidate and set the tags and categories
123
+ existing_cookbook.tags = list(consolidated_tags)
124
+ existing_cookbook.categories = list(consolidated_categories)
125
+
106
126
  # Perform pydantic check on the updated existing cookbook
107
127
  CookbookArguments.model_validate(existing_cookbook.to_dict())
108
128
 
@@ -7,6 +7,7 @@ from slugify import slugify
7
7
 
8
8
  from moonshot.src.configs.env_variables import EnvVariables
9
9
  from moonshot.src.cookbooks.cookbook_arguments import CookbookArguments
10
+ from moonshot.src.recipes.recipe import Recipe
10
11
  from moonshot.src.storage.storage import Storage
11
12
  from moonshot.src.utils.log import configure_logger
12
13
 
@@ -19,6 +20,8 @@ class Cookbook:
19
20
  self.id = cb_args.id
20
21
  self.name = cb_args.name
21
22
  self.description = cb_args.description
23
+ self.tags = cb_args.tags
24
+ self.categories = cb_args.categories
22
25
  self.recipes = cb_args.recipes
23
26
 
24
27
  @classmethod
@@ -64,6 +67,8 @@ class Cookbook:
64
67
  cb_info = {
65
68
  "name": cb_args.name,
66
69
  "description": cb_args.description,
70
+ "tags": Cookbook.get_tags_in_recipes(cb_args.recipes),
71
+ "categories": Cookbook.get_categories_in_recipes(cb_args.recipes),
67
72
  "recipes": cb_args.recipes,
68
73
  }
69
74
 
@@ -243,3 +248,19 @@ class Cookbook:
243
248
  except Exception as e:
244
249
  logger.error(f"Failed to get available cookbooks: {str(e)}")
245
250
  raise e
251
+
252
+ @staticmethod
253
+ def get_categories_in_recipes(recipes: list[str]) -> list[str]:
254
+ return list(
255
+ {
256
+ category
257
+ for recipe_id in recipes
258
+ for category in Recipe.read(recipe_id).categories
259
+ }
260
+ )
261
+
262
+ @staticmethod
263
+ def get_tags_in_recipes(recipes: list[str]) -> list[str]:
264
+ return list(
265
+ {tag for recipe_id in recipes for tag in Recipe.read(recipe_id).tags}
266
+ )
@@ -8,6 +8,10 @@ class CookbookArguments(BaseModel):
8
8
 
9
9
  description: str # description (str): A brief description of the Cookbook.
10
10
 
11
+ tags: list[str] # tags (list): The list of tags in the Cookbook.
12
+
13
+ categories: list[str] # categories (list): The list of categories in the Cookbook.
14
+
11
15
  recipes: list[str] = Field(
12
16
  min_length=1
13
17
  ) # recipes (list): A list of recipes included in the Cookbook.
@@ -29,6 +33,8 @@ class CookbookArguments(BaseModel):
29
33
  return {
30
34
  "id": self.id,
31
35
  "name": self.name,
36
+ "tags": self.tags,
37
+ "categories": self.categories,
32
38
  "description": self.description,
33
39
  "recipes": self.recipes,
34
40
  }
@@ -315,7 +315,7 @@ class Runner:
315
315
  async def run_recipes(
316
316
  self,
317
317
  recipes: list[str],
318
- num_of_prompts: int = 0,
318
+ prompt_selection_percentage: int = 100,
319
319
  random_seed: int = 0,
320
320
  system_prompt: str = "",
321
321
  runner_processing_module: str = "benchmarking",
@@ -325,26 +325,21 @@ class Runner:
325
325
  Initiates an asynchronous benchmark run using a set of recipes.
326
326
 
327
327
  This method sets up and starts a benchmark run tailored for recipes. It instantiates a benchmark run object,
328
- applies the configuration based on the provided recipes, number of prompts, random seed, system prompt, and
328
+ applies the configuration based on the provided recipes, percentage of prompts, random seed, system prompt, and
329
329
  the specified runner and result processing modules, and then commences the run asynchronously.
330
330
 
331
331
  Args:
332
332
  recipes (list[str]): The recipes to be included in the benchmark run.
333
-
334
- num_of_prompts (int, optional): The count of prompts to utilize during the benchmark.
335
- Defaults to 0.
336
-
333
+ prompt_selection_percentage (int, optional): The percentage of prompts to utilize during the benchmark.
334
+ Defaults to 100.
337
335
  random_seed (int, optional): The seed for random number generation to ensure reproducibility.
338
- Defaults to 0.
339
-
336
+ Defaults to 0.
340
337
  system_prompt (str, optional): The system prompt to be used during the benchmark.
341
- Defaults to an empty string.
342
-
338
+ Defaults to an empty string.
343
339
  runner_processing_module (str, optional): The module responsible for processing the runner.
344
- Defaults to "benchmarking".
345
-
340
+ Defaults to "benchmarking".
346
341
  result_processing_module (str, optional): The module responsible for processing the results.
347
- Defaults to "benchmarking-result".
342
+ Defaults to "benchmarking-result".
348
343
 
349
344
  Raises:
350
345
  Exception: If any error occurs during the setup or execution of the benchmark run.
@@ -357,7 +352,7 @@ class Runner:
357
352
  RunnerType.BENCHMARK,
358
353
  {
359
354
  "recipes": recipes,
360
- "num_of_prompts": num_of_prompts,
355
+ "prompt_selection_percentage": prompt_selection_percentage,
361
356
  "random_seed": random_seed,
362
357
  "system_prompt": system_prompt,
363
358
  "runner_processing_module": runner_processing_module,
@@ -382,7 +377,7 @@ class Runner:
382
377
  async def run_cookbooks(
383
378
  self,
384
379
  cookbooks: list[str],
385
- num_of_prompts: int = 0,
380
+ prompt_selection_percentage: int = 100,
386
381
  random_seed: int = 0,
387
382
  system_prompt: str = "",
388
383
  runner_processing_module: str = "benchmarking",
@@ -393,26 +388,21 @@ class Runner:
393
388
 
394
389
  This method is responsible for initiating a benchmark cookbook run with the specified cookbooks and parameters.
395
390
  It creates a new benchmark cookbook run instance, configures it with the provided cookbook names,
396
- number of prompts, random seed, system prompt, runner processing module, and result processing module,
391
+ percentage of prompts, random seed, system prompt, runner processing module, and result processing module,
397
392
  and then starts the run asynchronously.
398
393
 
399
394
  Args:
400
395
  cookbooks (list[str]): A list of cookbook names to be run in the benchmark.
401
-
402
- num_of_prompts (int, optional): The number of prompts to be used in the benchmark run.
403
- Defaults to 0.
404
-
396
+ prompt_selection_percentage (int, optional): The percentage of prompts to be used in the benchmark run.
397
+ Defaults to 100.
405
398
  random_seed (int, optional): The seed for random number generation to ensure reproducibility.
406
- Defaults to 0.
407
-
399
+ Defaults to 0.
408
400
  system_prompt (str, optional): A system prompt to be used in the benchmark run.
409
- Defaults to an empty string.
410
-
401
+ Defaults to an empty string.
411
402
  runner_processing_module (str, optional): The module responsible for processing the runner.
412
- Defaults to "benchmarking".
413
-
403
+ Defaults to "benchmarking".
414
404
  result_processing_module (str, optional): The module responsible for processing the results.
415
- Defaults to "benchmarking-result".
405
+ Defaults to "benchmarking-result".
416
406
 
417
407
  Raises:
418
408
  Exception: If any error occurs during the setup or execution of the benchmark run.
@@ -425,7 +415,7 @@ class Runner:
425
415
  RunnerType.BENCHMARK,
426
416
  {
427
417
  "cookbooks": cookbooks,
428
- "num_of_prompts": num_of_prompts,
418
+ "prompt_selection_percentage": prompt_selection_percentage,
429
419
  "random_seed": random_seed,
430
420
  "system_prompt": system_prompt,
431
421
  "runner_processing_module": runner_processing_module,