aiverify-moonshot 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aiverify_moonshot-0.5.1.dist-info → aiverify_moonshot-0.6.0.dist-info}/METADATA +2 -2
- {aiverify_moonshot-0.5.1.dist-info → aiverify_moonshot-0.6.0.dist-info}/RECORD +21 -21
- moonshot/integrations/cli/benchmark/cookbook.py +39 -12
- moonshot/integrations/cli/benchmark/recipe.py +23 -8
- moonshot/integrations/cli/benchmark/result.py +4 -2
- moonshot/integrations/cli/cli_errors.py +10 -4
- moonshot/integrations/web_api/app.py +1 -1
- moonshot/integrations/web_api/schemas/benchmark_runner_dto.py +2 -2
- moonshot/integrations/web_api/schemas/cookbook_create_dto.py +4 -0
- moonshot/integrations/web_api/schemas/dataset_create_dto.py +3 -4
- moonshot/integrations/web_api/services/benchmark_test_manager.py +2 -2
- moonshot/integrations/web_api/services/cookbook_service.py +30 -34
- moonshot/integrations/web_api/types/types.py +1 -1
- moonshot/src/api/api_cookbook.py +20 -0
- moonshot/src/cookbooks/cookbook.py +21 -0
- moonshot/src/cookbooks/cookbook_arguments.py +6 -0
- moonshot/src/runners/runner.py +18 -28
- {aiverify_moonshot-0.5.1.dist-info → aiverify_moonshot-0.6.0.dist-info}/WHEEL +0 -0
- {aiverify_moonshot-0.5.1.dist-info → aiverify_moonshot-0.6.0.dist-info}/licenses/AUTHORS.md +0 -0
- {aiverify_moonshot-0.5.1.dist-info → aiverify_moonshot-0.6.0.dist-info}/licenses/LICENSE.md +0 -0
- {aiverify_moonshot-0.5.1.dist-info → aiverify_moonshot-0.6.0.dist-info}/licenses/NOTICES.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiverify-moonshot
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: AI Verify advances Gen AI testing with Project Moonshot.
|
|
5
5
|
Project-URL: Repository, https://github.com/aiverify-foundation/moonshot
|
|
6
6
|
Project-URL: Documentation, https://aiverify-foundation.github.io/moonshot/
|
|
@@ -47,7 +47,7 @@ Description-Content-Type: text/markdown
|
|
|
47
47
|
|
|
48
48
|

|
|
49
49
|
|
|
50
|
-
**Version 0.
|
|
50
|
+
**Version 0.6.0**
|
|
51
51
|
|
|
52
52
|
A simple and modular tool to evaluate any LLM application.
|
|
53
53
|
|
|
@@ -6,14 +6,14 @@ moonshot/integrations/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
6
6
|
moonshot/integrations/cli/__main__.py,sha256=0VnYSj2AayvDCZ3uXpldPcjMHt2Yd7BWojWzFOGSSl4,679
|
|
7
7
|
moonshot/integrations/cli/active_session_cfg.py,sha256=n8hOFxFjvz26qbEFY4q7iPUZYrGLoeCmXJxmOb_xWUE,20
|
|
8
8
|
moonshot/integrations/cli/cli.py,sha256=9tnzcxcSOjblxCUpyh3pK0ke0bLs3s-63OxXtYoZI2g,2769
|
|
9
|
-
moonshot/integrations/cli/cli_errors.py,sha256=
|
|
9
|
+
moonshot/integrations/cli/cli_errors.py,sha256=ltQKnj9bawpVogQFOgLHbxL_9CcFQf3XOR6yQtdqGS4,24030
|
|
10
10
|
moonshot/integrations/cli/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
moonshot/integrations/cli/benchmark/benchmark.py,sha256=QUxr6DU11-XeH6Y3j1uPsZsotshgy64G_cWNf0Rn2_U,6303
|
|
12
|
-
moonshot/integrations/cli/benchmark/cookbook.py,sha256=
|
|
12
|
+
moonshot/integrations/cli/benchmark/cookbook.py,sha256=_ShWLEb1R_G3zKPmRVbeykRQ5pS_fv4IwbVeLEqYYEM,30265
|
|
13
13
|
moonshot/integrations/cli/benchmark/datasets.py,sha256=Uq5XMNWUp775sz9jCZUZHHmkumPFI7cHVRueHgWm70Q,8965
|
|
14
14
|
moonshot/integrations/cli/benchmark/metrics.py,sha256=SHs-hIa4CIPyOJtxK2U4D6IRHy3ZNsRtZlAMGvF9Qxw,8310
|
|
15
|
-
moonshot/integrations/cli/benchmark/recipe.py,sha256=
|
|
16
|
-
moonshot/integrations/cli/benchmark/result.py,sha256=
|
|
15
|
+
moonshot/integrations/cli/benchmark/recipe.py,sha256=KW0h1Ynga_2yc-jOd-ULQSbO9R7zETOz3qn3T23bqh8,32920
|
|
16
|
+
moonshot/integrations/cli/benchmark/result.py,sha256=o6_yca1PqFtbjWgnUpK8v_y2z6zuxmB_ue6MXEevpAo,11223
|
|
17
17
|
moonshot/integrations/cli/benchmark/run.py,sha256=HBztvG_Zkg1ZAWsFv0QDE43FaEmx92vTWc4h1U3VesU,7438
|
|
18
18
|
moonshot/integrations/cli/benchmark/runner.py,sha256=Y4Vt6Qqn9QzsM6eLUM9m2_XKkW3ctu-2jMTSei_TDPU,7098
|
|
19
19
|
moonshot/integrations/cli/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -34,7 +34,7 @@ moonshot/integrations/cli/utils/process_data.py,sha256=QVL5vp2_8ZgGicmCAdeYEHkeb
|
|
|
34
34
|
moonshot/integrations/web_api/.env.dev,sha256=0z5_Ut8rF-UqFZtgjkH2qoqORhD5_nSs2w_OeX2SteI,182
|
|
35
35
|
moonshot/integrations/web_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
moonshot/integrations/web_api/__main__.py,sha256=MdnLi_ZF-olAAEJwTPU1iGYFYwo-fNWNT2qfchkH3y4,2050
|
|
37
|
-
moonshot/integrations/web_api/app.py,sha256=
|
|
37
|
+
moonshot/integrations/web_api/app.py,sha256=GEWD-XAzGVs4zG-11MFNyPKdZXd6hwpaj1ac9Fh-0aQ,3651
|
|
38
38
|
moonshot/integrations/web_api/container.py,sha256=DVkJG_qm7ItcG6tgMYOqIj07wpKhPWOOfy6-bEv72y4,5915
|
|
39
39
|
moonshot/integrations/web_api/logging_conf.py,sha256=t3EGRV6tZhV732KXe8_Tiy0fiwVAWxZX5Tt8VTgrrfg,3388
|
|
40
40
|
moonshot/integrations/web_api/log/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -53,11 +53,11 @@ moonshot/integrations/web_api/routes/recipe.py,sha256=WOcq4bm2LP87ovO4Op6cDbUPJ2
|
|
|
53
53
|
moonshot/integrations/web_api/routes/redteam.py,sha256=t-jNot5_PkV6f5_WBorp1HL437NY5RZzxSE-2NfG0es,24541
|
|
54
54
|
moonshot/integrations/web_api/routes/runner.py,sha256=NQdAmVIOnNgSESX3am6wAE0YLIxHYXlnQbh00_7-SD4,8438
|
|
55
55
|
moonshot/integrations/web_api/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
moonshot/integrations/web_api/schemas/benchmark_runner_dto.py,sha256=
|
|
56
|
+
moonshot/integrations/web_api/schemas/benchmark_runner_dto.py,sha256=IIn6KeMcwxTSlwXuCnOUhd3x24ucq-goV2brU1OvxT4,369
|
|
57
57
|
moonshot/integrations/web_api/schemas/bookmark_create_dto.py,sha256=C78vG8UG02N7Cmt6RSuS8e4sX_G-MLCiAWT-cF5BE8s,374
|
|
58
|
-
moonshot/integrations/web_api/schemas/cookbook_create_dto.py,sha256=
|
|
58
|
+
moonshot/integrations/web_api/schemas/cookbook_create_dto.py,sha256=wXC0tu1Q8SpSI3Qk0xKPj1vKsOJEYmfPgU4rl6QopUY,826
|
|
59
59
|
moonshot/integrations/web_api/schemas/cookbook_response_model.py,sha256=COLvaE4Hrz_w-C_HQkB7feztweIr0wkY9h8N6NKNIr8,332
|
|
60
|
-
moonshot/integrations/web_api/schemas/dataset_create_dto.py,sha256=
|
|
60
|
+
moonshot/integrations/web_api/schemas/dataset_create_dto.py,sha256=GRqIIlQZEpzzEXwAFcbDlxOuKg0JZ399axBjg34LMp8,915
|
|
61
61
|
moonshot/integrations/web_api/schemas/dataset_response_dto.py,sha256=s5x4-UXEWccWhK42E0FPXiHG6VqjuFuph-2t5atEkg4,171
|
|
62
62
|
moonshot/integrations/web_api/schemas/endpoint_create_dto.py,sha256=WS8AfRybrweoOgZx6K6jiNy1Z6J3IZS1PUNnrRxGKyM,678
|
|
63
63
|
moonshot/integrations/web_api/schemas/endpoint_response_model.py,sha256=OmmM2uaPSgB2aqPFfkhseKkI5OKCKilXR19gDmwFlLc,321
|
|
@@ -74,12 +74,12 @@ moonshot/integrations/web_api/services/auto_red_team_test_manager.py,sha256=a_aB
|
|
|
74
74
|
moonshot/integrations/web_api/services/auto_red_team_test_state.py,sha256=GRmvdYLwQdE8gGkYD9Sd4n__yEBajl2pRA_V0J2YObE,1952
|
|
75
75
|
moonshot/integrations/web_api/services/base_service.py,sha256=_MaQEuBpRNNHXNPylZUGaUVCSA5a2jHi9NoKBpvIprs,172
|
|
76
76
|
moonshot/integrations/web_api/services/benchmark_result_service.py,sha256=-oPvLL7b-pEAOeY0gwlngpgImklkUiwvPE6IJo83a7M,909
|
|
77
|
-
moonshot/integrations/web_api/services/benchmark_test_manager.py,sha256=
|
|
77
|
+
moonshot/integrations/web_api/services/benchmark_test_manager.py,sha256=aPoB6hOfOYqsDliiIzZ0y6cCI0mPDXLK21j9fHXm10U,4076
|
|
78
78
|
moonshot/integrations/web_api/services/benchmark_test_state.py,sha256=MyhTxpAhhP66JF0ua1SMc_IIeIjDxQY5swOXv9cmYaY,1887
|
|
79
79
|
moonshot/integrations/web_api/services/benchmarking_service.py,sha256=lJZeNTqxEPBLrZNX3Z9JIilgwetywSkv0deQkcb8mQs,1257
|
|
80
80
|
moonshot/integrations/web_api/services/bookmark_service.py,sha256=jI9nXs1hjzO0CLG2LKaXSzDApLThkfCvPUkaNNV9A5A,3546
|
|
81
81
|
moonshot/integrations/web_api/services/context_strategy_service.py,sha256=6YKnnG8JlE_1nlnr4Hq7rgz-sxI6oQglK0STaWPFQxQ,710
|
|
82
|
-
moonshot/integrations/web_api/services/cookbook_service.py,sha256=
|
|
82
|
+
moonshot/integrations/web_api/services/cookbook_service.py,sha256=37iJZn4ybe9tugBWB99g1SAN1YUtkmaq2mLQWj_HBQo,8736
|
|
83
83
|
moonshot/integrations/web_api/services/dataset_service.py,sha256=ZWb3FqyDkA0C9qhlQ3X_zR0ohAlwlLsJi-mgKLvXpnI,2407
|
|
84
84
|
moonshot/integrations/web_api/services/endpoint_service.py,sha256=N5SXNAh44UNeBpMhA9baL0VZoTx4sHzpy4y7-Ch8O4E,2395
|
|
85
85
|
moonshot/integrations/web_api/services/metric_service.py,sha256=xWC5Dk8aiU7tuHsxYedTTrEkbA3Ug1pV2nbaBas6cAg,456
|
|
@@ -94,7 +94,7 @@ moonshot/integrations/web_api/status_updater/moonshot_ui_webhook.py,sha256=ToyyC
|
|
|
94
94
|
moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py,sha256=MOs_1CKpNh2m3JUAEoJfmZOBivk80DNtSnRuTCJgzJ4,350
|
|
95
95
|
moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py,sha256=JRczi3vCq6oPfOddPrF4OCdyHQYAVxgPWK-qOJxElKg,350
|
|
96
96
|
moonshot/integrations/web_api/temp/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
|
-
moonshot/integrations/web_api/types/types.py,sha256=
|
|
97
|
+
moonshot/integrations/web_api/types/types.py,sha256=zy1Jlqgju_F7Gb4SaJf7O70egH98D0b4DDCZ384HG2E,2408
|
|
98
98
|
moonshot/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
99
|
moonshot/src/messages_constants.py,sha256=usbvwitgRdOVY0ARdBbh9uiLNUb6WCdCuSoa64tVMhM,6936
|
|
100
100
|
moonshot/src/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -102,7 +102,7 @@ moonshot/src/api/api_bookmark.py,sha256=gg7uJU7ixZtmJ99S3xdgQgcznkY8SjoJaTBkSsno
|
|
|
102
102
|
moonshot/src/api/api_connector.py,sha256=Q_of-aHPuWkbefMJq4uXctJl89G2Tt6J_HfSuf1hE6g,2234
|
|
103
103
|
moonshot/src/api/api_connector_endpoint.py,sha256=lwfhlWNBJ6QotqffmURtjRmxfzbBlSIAZupeSpMt9VU,5584
|
|
104
104
|
moonshot/src/api/api_context_strategy.py,sha256=uRIfNjKJ_Wk9nSrvbPRfrdQLpG0K6kH9rl5tmmHui40,2151
|
|
105
|
-
moonshot/src/api/api_cookbook.py,sha256=
|
|
105
|
+
moonshot/src/api/api_cookbook.py,sha256=V05abHvzElrO7LkSyhOMcAHEfCfIgopd6L0cSSO3Dro,6722
|
|
106
106
|
moonshot/src/api/api_dataset.py,sha256=i2KwnZ-6fTm_tyn8cRw8iesrGi7_Nh0-1bFuN7m0TVo,4066
|
|
107
107
|
moonshot/src/api/api_environment_variables.py,sha256=wRx6rm95ItyL_uKUAYfSjcPZNbRxKl1GGS4PpWcTE1s,712
|
|
108
108
|
moonshot/src/api/api_metrics.py,sha256=x5DiysTYQsMmcAS2y2XpgvrPobZk7GT2rhO-MaIRun4,1603
|
|
@@ -125,8 +125,8 @@ moonshot/src/connectors_endpoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
125
125
|
moonshot/src/connectors_endpoints/connector_endpoint.py,sha256=3U4030gKhvDt4e7VD3lac2FG0kxGJ0DSLPTAUKYOPEk,10018
|
|
126
126
|
moonshot/src/connectors_endpoints/connector_endpoint_arguments.py,sha256=0v9mUHki4l22CK8o8UjATAsFDza9Lutbh4QplLiDXs8,2434
|
|
127
127
|
moonshot/src/cookbooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
|
-
moonshot/src/cookbooks/cookbook.py,sha256=
|
|
129
|
-
moonshot/src/cookbooks/cookbook_arguments.py,sha256=
|
|
128
|
+
moonshot/src/cookbooks/cookbook.py,sha256=DdZwRGx5-xTDIKcXtZRpp7Qb9Mm9dNGwXWLQXoQrBBo,10412
|
|
129
|
+
moonshot/src/cookbooks/cookbook_arguments.py,sha256=SmNG8D5qN2K2dcImDaSBPHsna0Gy60ZR49_eTKEsvVU,1445
|
|
130
130
|
moonshot/src/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
131
|
moonshot/src/datasets/dataset.py,sha256=-_uhjR7zi50nkLu1WWlPCCWr14VwFUDfhTeeBHOhb70,14236
|
|
132
132
|
moonshot/src/datasets/dataset_arguments.py,sha256=rUcxxo2WTcHhLLV-WoixjOfT_Ju7hFCq811_ctjegt8,1751
|
|
@@ -154,7 +154,7 @@ moonshot/src/results/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
154
154
|
moonshot/src/results/result.py,sha256=o56SdhYH-XVfpeeKhN495dJPkU035MmTjRUx48q53lo,4527
|
|
155
155
|
moonshot/src/results/result_arguments.py,sha256=mTR7yajY72PFglfAaa1ajJfvYNV4IBGLXS4VaD53-8c,1334
|
|
156
156
|
moonshot/src/runners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
157
|
-
moonshot/src/runners/runner.py,sha256=
|
|
157
|
+
moonshot/src/runners/runner.py,sha256=7xp4GwiN4fO-ib7CahkaYXoVBB40cuVNJd5DxJxrYsk,21362
|
|
158
158
|
moonshot/src/runners/runner_arguments.py,sha256=Bg4OPSmgr9jZKNAwPH0T3epEHw-6qGrflszFc6oMyEU,1640
|
|
159
159
|
moonshot/src/runners/runner_type.py,sha256=jOfnAnaCYp-rPTRJXhM8hin_dinlR0sMwmimQXvLcJ0,100
|
|
160
160
|
moonshot/src/runs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -172,9 +172,9 @@ moonshot/src/utils/import_modules.py,sha256=T9zTN59PFnvY2rjyWhSV9KSIAHxWV1pyBemF
|
|
|
172
172
|
moonshot/src/utils/log.py,sha256=YNgD7Eh2OT36XlmVBKCGUTAh9TRp4Akfe4kDdvHASgs,2502
|
|
173
173
|
moonshot/src/utils/pagination.py,sha256=5seymyRoqyENIhKllAatr1T91kMCGFslcvRnJHyMSvc,814
|
|
174
174
|
moonshot/src/utils/timeit.py,sha256=TvuF0w8KWhp0oZFY0cUU3UY0xlGKjchb0OkfYfgVTlc,866
|
|
175
|
-
aiverify_moonshot-0.
|
|
176
|
-
aiverify_moonshot-0.
|
|
177
|
-
aiverify_moonshot-0.
|
|
178
|
-
aiverify_moonshot-0.
|
|
179
|
-
aiverify_moonshot-0.
|
|
180
|
-
aiverify_moonshot-0.
|
|
175
|
+
aiverify_moonshot-0.6.0.dist-info/METADATA,sha256=80QAfhALPe1HdwrDcXBuhAG6OduDw0x9fQgxQfhX3tc,12419
|
|
176
|
+
aiverify_moonshot-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
177
|
+
aiverify_moonshot-0.6.0.dist-info/licenses/AUTHORS.md,sha256=mmAbe3i3sT8JZHJMBhxp3i1xRehV0g7WB4T_eyIBuBs,59
|
|
178
|
+
aiverify_moonshot-0.6.0.dist-info/licenses/LICENSE.md,sha256=53izDRmJZZCjpYGfyLqlxnGQN-aNWBxasuzuMXC5Ias,11347
|
|
179
|
+
aiverify_moonshot-0.6.0.dist-info/licenses/NOTICES.md,sha256=vS1zZYAnGjCJdwQ13xv3b2zc30wOS98ZnCKluT-AhHs,123266
|
|
180
|
+
aiverify_moonshot-0.6.0.dist-info/RECORD,,
|
|
@@ -37,7 +37,8 @@ from moonshot.integrations.cli.cli_errors import (
|
|
|
37
37
|
ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1,
|
|
38
38
|
ERROR_BENCHMARK_RUN_COOKBOOK_NAME_VALIDATION,
|
|
39
39
|
ERROR_BENCHMARK_RUN_COOKBOOK_NO_RESULT,
|
|
40
|
-
|
|
40
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
|
|
41
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
|
|
41
42
|
ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION,
|
|
42
43
|
ERROR_BENCHMARK_RUN_COOKBOOK_RESULT_PROC_MOD_VALIDATION,
|
|
43
44
|
ERROR_BENCHMARK_RUN_COOKBOOK_RUNNER_PROC_MOD_VALIDATION,
|
|
@@ -212,11 +213,12 @@ def run_cookbook(args) -> None:
|
|
|
212
213
|
The cookbooks are run against the specified endpoints, and the results are processed and displayed.
|
|
213
214
|
|
|
214
215
|
Args:
|
|
215
|
-
args
|
|
216
|
+
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
217
|
+
Expected keys are:
|
|
216
218
|
name (str): The name of the cookbook runner.
|
|
217
219
|
cookbooks (str): A string representation of a list of cookbooks to run.
|
|
218
220
|
endpoints (str): A string representation of a list of endpoints to run.
|
|
219
|
-
|
|
221
|
+
prompt_selection_percentage (int): The percentage of prompts to run.
|
|
220
222
|
random_seed (int): The random seed number for reproducibility.
|
|
221
223
|
system_prompt (str): The system prompt to use.
|
|
222
224
|
runner_proc_module (str): The runner processing module to use.
|
|
@@ -248,10 +250,19 @@ def run_cookbook(args) -> None:
|
|
|
248
250
|
):
|
|
249
251
|
raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION)
|
|
250
252
|
|
|
251
|
-
if isinstance(args.
|
|
252
|
-
args.
|
|
253
|
+
if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
|
|
254
|
+
args.prompt_selection_percentage, int
|
|
255
|
+
):
|
|
256
|
+
raise TypeError(
|
|
257
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION
|
|
258
|
+
)
|
|
259
|
+
elif (
|
|
260
|
+
args.prompt_selection_percentage < 1
|
|
261
|
+
or args.prompt_selection_percentage > 100
|
|
253
262
|
):
|
|
254
|
-
raise
|
|
263
|
+
raise ValueError(
|
|
264
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
|
|
265
|
+
)
|
|
255
266
|
|
|
256
267
|
if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
|
|
257
268
|
raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION)
|
|
@@ -297,7 +308,7 @@ def run_cookbook(args) -> None:
|
|
|
297
308
|
async def run():
|
|
298
309
|
await cb_runner.run_cookbooks(
|
|
299
310
|
cookbooks,
|
|
300
|
-
args.
|
|
311
|
+
args.prompt_selection_percentage,
|
|
301
312
|
args.random_seed,
|
|
302
313
|
args.system_prompt,
|
|
303
314
|
args.runner_proc_module,
|
|
@@ -436,9 +447,20 @@ def _display_cookbooks(cookbooks_list):
|
|
|
436
447
|
table.add_column("Cookbook", justify="left", width=78)
|
|
437
448
|
table.add_column("Contains", justify="left", width=20, overflow="fold")
|
|
438
449
|
for idx, cookbook in enumerate(cookbooks_list, 1):
|
|
439
|
-
|
|
450
|
+
(
|
|
451
|
+
id,
|
|
452
|
+
name,
|
|
453
|
+
tags,
|
|
454
|
+
categories,
|
|
455
|
+
description,
|
|
456
|
+
recipes,
|
|
457
|
+
*other_args,
|
|
458
|
+
) = cookbook.values()
|
|
440
459
|
idx = cookbook.get("idx", idx)
|
|
441
|
-
cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n{description}"
|
|
460
|
+
cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n\n{description}"
|
|
461
|
+
cookbook_info += (
|
|
462
|
+
f"\n\n[blue]Tags: {tags}[/blue]\n[blue]Categories: {categories}[/blue]\n"
|
|
463
|
+
)
|
|
442
464
|
recipes_info = display_view_list_format("Recipes", recipes)
|
|
443
465
|
table.add_section()
|
|
444
466
|
table.add_row(str(idx), cookbook_info, recipes_info)
|
|
@@ -459,11 +481,11 @@ def _display_view_cookbook(cookbook_info):
|
|
|
459
481
|
Returns:
|
|
460
482
|
None
|
|
461
483
|
"""
|
|
462
|
-
id, name, description, recipes = cookbook_info.values()
|
|
484
|
+
id, name, tags, categories, description, recipes = cookbook_info.values()
|
|
463
485
|
recipes_list = api_read_recipes(recipes)
|
|
464
486
|
if recipes_list:
|
|
465
487
|
table = Table(
|
|
466
|
-
title=f'Cookbook "{name}"',
|
|
488
|
+
title=f'Cookbook: "{name}"\n Tags: {tags}\n Categories: {categories}\n',
|
|
467
489
|
show_lines=True,
|
|
468
490
|
expand=True,
|
|
469
491
|
header_style="bold",
|
|
@@ -471,6 +493,7 @@ def _display_view_cookbook(cookbook_info):
|
|
|
471
493
|
table.add_column("No.", width=2)
|
|
472
494
|
table.add_column("Recipe", justify="left", width=78)
|
|
473
495
|
table.add_column("Contains", justify="left", width=20, overflow="fold")
|
|
496
|
+
|
|
474
497
|
for recipe_id, recipe in enumerate(recipes_list, 1):
|
|
475
498
|
(
|
|
476
499
|
id,
|
|
@@ -718,7 +741,11 @@ run_cookbook_args.add_argument("name", type=str, help="Name of cookbook runner")
|
|
|
718
741
|
run_cookbook_args.add_argument("cookbooks", type=str, help="List of cookbooks to run")
|
|
719
742
|
run_cookbook_args.add_argument("endpoints", type=str, help="List of endpoints to run")
|
|
720
743
|
run_cookbook_args.add_argument(
|
|
721
|
-
"-n",
|
|
744
|
+
"-n",
|
|
745
|
+
"--prompt_selection_percentage",
|
|
746
|
+
type=int,
|
|
747
|
+
default=100,
|
|
748
|
+
help="Percentage of prompts to run",
|
|
722
749
|
)
|
|
723
750
|
run_cookbook_args.add_argument(
|
|
724
751
|
"-r", "--random_seed", type=int, default=0, help="Random seed number"
|
|
@@ -40,7 +40,8 @@ from moonshot.integrations.cli.cli_errors import (
|
|
|
40
40
|
ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1,
|
|
41
41
|
ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION,
|
|
42
42
|
ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT,
|
|
43
|
-
|
|
43
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
|
|
44
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
|
|
44
45
|
ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION,
|
|
45
46
|
ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION,
|
|
46
47
|
ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1,
|
|
@@ -293,11 +294,12 @@ def run_recipe(args) -> None:
|
|
|
293
294
|
The recipes are run against the specified endpoints, and the results are processed and displayed.
|
|
294
295
|
|
|
295
296
|
Args:
|
|
296
|
-
args
|
|
297
|
+
args (argparse.Namespace): The arguments provided to the command line interface.
|
|
298
|
+
Expected keys are:
|
|
297
299
|
name (str): The name of the recipe runner.
|
|
298
300
|
recipes (str): A string representation of a list of recipes to run.
|
|
299
301
|
endpoints (str): A string representation of a list of endpoints to run.
|
|
300
|
-
|
|
302
|
+
prompt_selection_percentage (int): The percentage of prompts to run.
|
|
301
303
|
random_seed (int): The random seed number for reproducibility.
|
|
302
304
|
system_prompt (str): The system prompt to use.
|
|
303
305
|
runner_proc_module (str): The runner processing module to use.
|
|
@@ -329,10 +331,19 @@ def run_recipe(args) -> None:
|
|
|
329
331
|
):
|
|
330
332
|
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION)
|
|
331
333
|
|
|
332
|
-
if isinstance(args.
|
|
333
|
-
args.
|
|
334
|
+
if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
|
|
335
|
+
args.prompt_selection_percentage, int
|
|
336
|
+
):
|
|
337
|
+
raise TypeError(
|
|
338
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION
|
|
339
|
+
)
|
|
340
|
+
elif (
|
|
341
|
+
args.prompt_selection_percentage < 1
|
|
342
|
+
or args.prompt_selection_percentage > 100
|
|
334
343
|
):
|
|
335
|
-
raise
|
|
344
|
+
raise ValueError(
|
|
345
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
|
|
346
|
+
)
|
|
336
347
|
|
|
337
348
|
if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
|
|
338
349
|
raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION)
|
|
@@ -377,7 +388,7 @@ def run_recipe(args) -> None:
|
|
|
377
388
|
async def run():
|
|
378
389
|
await rec_runner.run_recipes(
|
|
379
390
|
recipes,
|
|
380
|
-
args.
|
|
391
|
+
args.prompt_selection_percentage,
|
|
381
392
|
args.random_seed,
|
|
382
393
|
args.system_prompt,
|
|
383
394
|
args.runner_proc_module,
|
|
@@ -809,7 +820,11 @@ run_recipe_args.add_argument("name", type=str, help="Name of recipe runner")
|
|
|
809
820
|
run_recipe_args.add_argument("recipes", type=str, help="List of recipes to run")
|
|
810
821
|
run_recipe_args.add_argument("endpoints", type=str, help="List of endpoints to run")
|
|
811
822
|
run_recipe_args.add_argument(
|
|
812
|
-
"-n",
|
|
823
|
+
"-n",
|
|
824
|
+
"--prompt_selection_percentage",
|
|
825
|
+
type=int,
|
|
826
|
+
default=100,
|
|
827
|
+
help="Percentage of prompts to run",
|
|
813
828
|
)
|
|
814
829
|
run_recipe_args.add_argument(
|
|
815
830
|
"-r", "--random_seed", type=int, default=0, help="Random seed number"
|
|
@@ -190,7 +190,7 @@ def _display_results(results_list):
|
|
|
190
190
|
recipes = metadata["recipes"]
|
|
191
191
|
cookbooks = metadata["cookbooks"]
|
|
192
192
|
endpoints = metadata["endpoints"]
|
|
193
|
-
|
|
193
|
+
prompt_selection_percentage = metadata["prompt_selection_percentage"]
|
|
194
194
|
random_seed = metadata["random_seed"]
|
|
195
195
|
system_prompt = metadata["system_prompt"]
|
|
196
196
|
idx = result.get("idx", idx)
|
|
@@ -200,7 +200,9 @@ def _display_results(results_list):
|
|
|
200
200
|
recipes_info = display_view_list_format("Recipes", recipes)
|
|
201
201
|
cookbooks_info = display_view_list_format("Cookbooks", cookbooks)
|
|
202
202
|
endpoints_info = display_view_list_format("Endpoints", endpoints)
|
|
203
|
-
prompts_info = display_view_str_format(
|
|
203
|
+
prompts_info = display_view_str_format(
|
|
204
|
+
"Prompt Selection Percentage", prompt_selection_percentage
|
|
205
|
+
)
|
|
204
206
|
seed_info = display_view_str_format("Seed", random_seed)
|
|
205
207
|
system_prompt_info = display_view_str_format("System Prompt", system_prompt)
|
|
206
208
|
|
|
@@ -52,8 +52,11 @@ ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION = (
|
|
|
52
52
|
ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1 = (
|
|
53
53
|
"The 'endpoints' argument must evaluate to a list of strings."
|
|
54
54
|
)
|
|
55
|
-
|
|
56
|
-
"The '
|
|
55
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
|
|
56
|
+
"The 'prompt_selection_percentage' argument must be an integer."
|
|
57
|
+
)
|
|
58
|
+
ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
|
|
59
|
+
"The 'prompt_selection_percentage' argument must be between 1 - 100."
|
|
57
60
|
)
|
|
58
61
|
ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION = (
|
|
59
62
|
"The 'random_seed' argument must be an integer."
|
|
@@ -278,8 +281,11 @@ ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION = (
|
|
|
278
281
|
ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1 = (
|
|
279
282
|
"The 'endpoints' argument must evaluate to a list of strings."
|
|
280
283
|
)
|
|
281
|
-
|
|
282
|
-
"The '
|
|
284
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
|
|
285
|
+
"The 'prompt_selection_percentage' argument must be an integer."
|
|
286
|
+
)
|
|
287
|
+
ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
|
|
288
|
+
"The 'prompt_selection_percentage' argument must be between 1 - 100."
|
|
283
289
|
)
|
|
284
290
|
ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION = (
|
|
285
291
|
"The 'random_seed' argument must be an integer."
|
|
@@ -71,7 +71,7 @@ def create_app(cfg: providers.Configuration) -> CustomFastAPI:
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
app: CustomFastAPI = CustomFastAPI(
|
|
74
|
-
title="Project Moonshot", version="0.
|
|
74
|
+
title="Project Moonshot", version="0.6.0", **app_kwargs
|
|
75
75
|
)
|
|
76
76
|
|
|
77
77
|
if cfg.cors.enabled():
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pydantic import BaseModel, ConfigDict
|
|
1
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class BenchmarkRunnerDTO(BaseModel):
|
|
@@ -7,7 +7,7 @@ class BenchmarkRunnerDTO(BaseModel):
|
|
|
7
7
|
description: str
|
|
8
8
|
endpoints: list[str]
|
|
9
9
|
inputs: list[str]
|
|
10
|
-
|
|
10
|
+
prompt_selection_percentage: int = Field(..., ge=1, le=100)
|
|
11
11
|
random_seed: int
|
|
12
12
|
system_prompt: str
|
|
13
13
|
runner_processing_module: str
|
|
@@ -9,6 +9,8 @@ class CookbookCreateDTO(CookbookPydanticModel):
|
|
|
9
9
|
id: Optional[str] = None
|
|
10
10
|
name: str = Field(..., min_length=1)
|
|
11
11
|
description: Optional[str] = Field(default="", min_length=1)
|
|
12
|
+
tags: Optional[list[str]] = []
|
|
13
|
+
categories: Optional[list[str]] = []
|
|
12
14
|
recipes: list[str] = Field(..., min_length=1)
|
|
13
15
|
|
|
14
16
|
|
|
@@ -16,4 +18,6 @@ class CookbookUpdateDTO(CookbookPydanticModel):
|
|
|
16
18
|
id: Optional[str] = None
|
|
17
19
|
name: Optional[str] = Field(default=None, min_length=1)
|
|
18
20
|
description: Optional[str] = Field(default=None, min_length=1)
|
|
21
|
+
tags: Optional[list[str]] = None
|
|
22
|
+
categories: Optional[list[str]] = None
|
|
19
23
|
recipes: Optional[list[str]] = Field(default=None, min_length=1)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
from pydantic import Field
|
|
4
|
-
from pyparsing import Iterator
|
|
5
4
|
|
|
6
5
|
from moonshot.src.datasets.dataset_arguments import (
|
|
7
6
|
DatasetArguments as DatasetPydanticModel,
|
|
@@ -10,7 +9,7 @@ from moonshot.src.datasets.dataset_arguments import (
|
|
|
10
9
|
|
|
11
10
|
class CSV_Dataset_DTO(DatasetPydanticModel):
|
|
12
11
|
id: Optional[str] = None # Not a required from user
|
|
13
|
-
examples: Optional[
|
|
12
|
+
examples: Optional[Any] = None # Not a required from user
|
|
14
13
|
name: str = Field(..., min_length=1)
|
|
15
14
|
description: str = Field(default="", min_length=1)
|
|
16
15
|
license: Optional[str] = ""
|
|
@@ -20,7 +19,7 @@ class CSV_Dataset_DTO(DatasetPydanticModel):
|
|
|
20
19
|
|
|
21
20
|
class HF_Dataset_DTO(DatasetPydanticModel):
|
|
22
21
|
id: Optional[str] = None # Not a required from user
|
|
23
|
-
examples: Optional[
|
|
22
|
+
examples: Optional[Any] = None # Not a required from user
|
|
24
23
|
name: str = Field(..., min_length=1)
|
|
25
24
|
description: str = Field(default="", min_length=1)
|
|
26
25
|
license: Optional[str] = ""
|
|
@@ -60,14 +60,14 @@ class BenchmarkTestManager(BaseService):
|
|
|
60
60
|
if benchmark_type == BenchmarkCollectionType.COOKBOOK:
|
|
61
61
|
async_run = moonshot_runner.run_cookbooks(
|
|
62
62
|
cookbooks=benchmark_input_data.inputs,
|
|
63
|
-
|
|
63
|
+
prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
|
|
64
64
|
random_seed=benchmark_input_data.random_seed,
|
|
65
65
|
system_prompt=benchmark_input_data.system_prompt,
|
|
66
66
|
)
|
|
67
67
|
else:
|
|
68
68
|
async_run = moonshot_runner.run_recipes(
|
|
69
69
|
recipes=benchmark_input_data.inputs,
|
|
70
|
-
|
|
70
|
+
prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
|
|
71
71
|
random_seed=benchmark_input_data.random_seed,
|
|
72
72
|
system_prompt=benchmark_input_data.system_prompt,
|
|
73
73
|
)
|
|
@@ -71,7 +71,7 @@ class CookbookService(BaseService):
|
|
|
71
71
|
cookbook.total_dataset_in_cookbook,
|
|
72
72
|
) = get_total_prompt_and_dataset_in_cookbook(cookbook)
|
|
73
73
|
|
|
74
|
-
if tags and
|
|
74
|
+
if tags and cookbook_has_tags(tags, cookbook):
|
|
75
75
|
if cookbook not in cookbooks_list:
|
|
76
76
|
cookbooks_list.append(cookbook)
|
|
77
77
|
if count:
|
|
@@ -80,7 +80,7 @@ class CookbookService(BaseService):
|
|
|
80
80
|
cookbook.total_dataset_in_cookbook,
|
|
81
81
|
) = get_total_prompt_and_dataset_in_cookbook(cookbook)
|
|
82
82
|
|
|
83
|
-
if categories and
|
|
83
|
+
if categories and cookbook_has_categories(categories, cookbook):
|
|
84
84
|
if cookbook not in cookbooks_list:
|
|
85
85
|
cookbooks_list.append(cookbook)
|
|
86
86
|
if count:
|
|
@@ -89,10 +89,16 @@ class CookbookService(BaseService):
|
|
|
89
89
|
cookbook.total_dataset_in_cookbook,
|
|
90
90
|
) = get_total_prompt_and_dataset_in_cookbook(cookbook)
|
|
91
91
|
|
|
92
|
-
if categories_excluded
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
if categories_excluded:
|
|
93
|
+
excluded_categories_set = set(
|
|
94
|
+
category.lower() for category in categories_excluded.split(",")
|
|
95
|
+
)
|
|
96
|
+
cookbook_categories_set = set(
|
|
97
|
+
category.lower() for category in cookbook.categories
|
|
98
|
+
)
|
|
99
|
+
# Exclude only if all categories in the cookbook are in the excluded list
|
|
100
|
+
if cookbook_categories_set.issubset(excluded_categories_set):
|
|
101
|
+
cookbooks_list.remove(cookbook)
|
|
96
102
|
|
|
97
103
|
for cookbook in cookbooks_list:
|
|
98
104
|
cookbook.required_config = cookbook_metrics_dependency(cookbook)
|
|
@@ -160,50 +166,40 @@ def get_total_prompt_and_dataset_in_cookbook(cookbook: Cookbook) -> tuple[int, i
|
|
|
160
166
|
|
|
161
167
|
|
|
162
168
|
@staticmethod
|
|
163
|
-
def
|
|
169
|
+
def cookbook_has_tags(tags: str, cookbook: Cookbook) -> bool:
|
|
164
170
|
"""
|
|
165
|
-
Check if
|
|
171
|
+
Check if a cookbook has the specified tags.
|
|
166
172
|
|
|
167
173
|
Args:
|
|
168
|
-
tags (str): The tags to check for in the cookbook
|
|
169
|
-
cookbook (Cookbook): The cookbook object
|
|
174
|
+
tags (str): The tags to check for in the cookbook.
|
|
175
|
+
cookbook (Cookbook): The cookbook object.
|
|
170
176
|
|
|
171
177
|
Returns:
|
|
172
|
-
bool: True if
|
|
178
|
+
bool: True if the cookbook has the specified tags, False otherwise.
|
|
173
179
|
"""
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
for recipe in recipes:
|
|
177
|
-
recipe = Recipe(**recipe)
|
|
178
|
-
if tags in recipe.tags:
|
|
179
|
-
return True
|
|
180
|
-
return False
|
|
180
|
+
tags_list = [tag.lower() for tag in tags.split(",")]
|
|
181
|
+
return any(tag in [ctag.lower() for ctag in cookbook.tags] for tag in tags_list)
|
|
181
182
|
|
|
182
183
|
|
|
183
184
|
@staticmethod
|
|
184
|
-
def
|
|
185
|
+
def cookbook_has_categories(categories: str, cookbook: Cookbook) -> bool:
|
|
185
186
|
"""
|
|
186
|
-
Check if
|
|
187
|
+
Check if a cookbook has the specified categories.
|
|
187
188
|
|
|
188
189
|
Args:
|
|
189
|
-
categories (str): The categories to check for in the cookbook
|
|
190
|
-
cookbook (Cookbook): The cookbook object
|
|
191
|
-
exclude_categories (str): The categories to exclude
|
|
190
|
+
categories (str): The categories to check for in the cookbook.
|
|
191
|
+
cookbook (Cookbook): The cookbook object.
|
|
192
192
|
|
|
193
193
|
Returns:
|
|
194
|
-
bool: True if
|
|
194
|
+
bool: True if the cookbook has the specified categories, False otherwise.
|
|
195
195
|
"""
|
|
196
|
-
recipe_ids = cookbook.recipes
|
|
197
196
|
categories_list = [category.lower() for category in categories.split(",")]
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
):
|
|
205
|
-
return True
|
|
206
|
-
return False
|
|
197
|
+
return any(
|
|
198
|
+
category in [ccat.lower() for ccat in cookbook.categories]
|
|
199
|
+
for category in categories_list
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
207
203
|
|
|
208
204
|
|
|
209
205
|
@staticmethod
|
moonshot/src/api/api_cookbook.py
CHANGED
|
@@ -2,6 +2,7 @@ from pydantic import conlist, validate_call
|
|
|
2
2
|
|
|
3
3
|
from moonshot.src.cookbooks.cookbook import Cookbook
|
|
4
4
|
from moonshot.src.cookbooks.cookbook_arguments import CookbookArguments
|
|
5
|
+
from moonshot.src.recipes.recipe import Recipe
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
# ------------------------------------------------------------------------------
|
|
@@ -20,6 +21,8 @@ def api_create_cookbook(name: str, description: str, recipes: list[str]) -> str:
|
|
|
20
21
|
Args:
|
|
21
22
|
name (str): The name of the new cookbook.
|
|
22
23
|
description (str): A brief description of the new cookbook.
|
|
24
|
+
tags (list[str]): A list of tags associated with the cookbook.
|
|
25
|
+
categories (list[str]): A list of categories the cookbook belongs to.
|
|
23
26
|
recipes (list[str]): A list of recipes to be included in the new cookbook.
|
|
24
27
|
|
|
25
28
|
Returns:
|
|
@@ -29,10 +32,13 @@ def api_create_cookbook(name: str, description: str, recipes: list[str]) -> str:
|
|
|
29
32
|
# We do not need to provide the id.
|
|
30
33
|
# This is because during creation:
|
|
31
34
|
# 1. the id is slugify from the name and stored as id.
|
|
35
|
+
# We do not need to provide tags and categories as they will be generated based on the recipes selected.
|
|
32
36
|
cb_args = CookbookArguments(
|
|
33
37
|
id="",
|
|
34
38
|
name=name,
|
|
35
39
|
description=description,
|
|
40
|
+
tags=[],
|
|
41
|
+
categories=[],
|
|
36
42
|
recipes=recipes,
|
|
37
43
|
)
|
|
38
44
|
return Cookbook.create(cb_args)
|
|
@@ -103,6 +109,20 @@ def api_update_cookbook(cb_id: str, **kwargs) -> bool:
|
|
|
103
109
|
if hasattr(existing_cookbook, key):
|
|
104
110
|
setattr(existing_cookbook, key, value)
|
|
105
111
|
|
|
112
|
+
# Update the cookbook's categories and tags if any of the recipe(s) are changed
|
|
113
|
+
if "recipes" in kwargs:
|
|
114
|
+
consolidated_tags = set()
|
|
115
|
+
consolidated_categories = set()
|
|
116
|
+
for key, value in kwargs.items():
|
|
117
|
+
if key == "recipes":
|
|
118
|
+
for recipe_id in value:
|
|
119
|
+
recipe = Recipe.read(recipe_id)
|
|
120
|
+
consolidated_tags.update(recipe.tags)
|
|
121
|
+
consolidated_categories.update(recipe.categories)
|
|
122
|
+
# Consolidate and set the tags and categories
|
|
123
|
+
existing_cookbook.tags = list(consolidated_tags)
|
|
124
|
+
existing_cookbook.categories = list(consolidated_categories)
|
|
125
|
+
|
|
106
126
|
# Perform pydantic check on the updated existing cookbook
|
|
107
127
|
CookbookArguments.model_validate(existing_cookbook.to_dict())
|
|
108
128
|
|
|
@@ -7,6 +7,7 @@ from slugify import slugify
|
|
|
7
7
|
|
|
8
8
|
from moonshot.src.configs.env_variables import EnvVariables
|
|
9
9
|
from moonshot.src.cookbooks.cookbook_arguments import CookbookArguments
|
|
10
|
+
from moonshot.src.recipes.recipe import Recipe
|
|
10
11
|
from moonshot.src.storage.storage import Storage
|
|
11
12
|
from moonshot.src.utils.log import configure_logger
|
|
12
13
|
|
|
@@ -19,6 +20,8 @@ class Cookbook:
|
|
|
19
20
|
self.id = cb_args.id
|
|
20
21
|
self.name = cb_args.name
|
|
21
22
|
self.description = cb_args.description
|
|
23
|
+
self.tags = cb_args.tags
|
|
24
|
+
self.categories = cb_args.categories
|
|
22
25
|
self.recipes = cb_args.recipes
|
|
23
26
|
|
|
24
27
|
@classmethod
|
|
@@ -64,6 +67,8 @@ class Cookbook:
|
|
|
64
67
|
cb_info = {
|
|
65
68
|
"name": cb_args.name,
|
|
66
69
|
"description": cb_args.description,
|
|
70
|
+
"tags": Cookbook.get_tags_in_recipes(cb_args.recipes),
|
|
71
|
+
"categories": Cookbook.get_categories_in_recipes(cb_args.recipes),
|
|
67
72
|
"recipes": cb_args.recipes,
|
|
68
73
|
}
|
|
69
74
|
|
|
@@ -243,3 +248,19 @@ class Cookbook:
|
|
|
243
248
|
except Exception as e:
|
|
244
249
|
logger.error(f"Failed to get available cookbooks: {str(e)}")
|
|
245
250
|
raise e
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def get_categories_in_recipes(recipes: list[str]) -> list[str]:
|
|
254
|
+
return list(
|
|
255
|
+
{
|
|
256
|
+
category
|
|
257
|
+
for recipe_id in recipes
|
|
258
|
+
for category in Recipe.read(recipe_id).categories
|
|
259
|
+
}
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
@staticmethod
|
|
263
|
+
def get_tags_in_recipes(recipes: list[str]) -> list[str]:
|
|
264
|
+
return list(
|
|
265
|
+
{tag for recipe_id in recipes for tag in Recipe.read(recipe_id).tags}
|
|
266
|
+
)
|
|
@@ -8,6 +8,10 @@ class CookbookArguments(BaseModel):
|
|
|
8
8
|
|
|
9
9
|
description: str # description (str): A brief description of the Cookbook.
|
|
10
10
|
|
|
11
|
+
tags: list[str] # tags (list): The list of tags in the Cookbook.
|
|
12
|
+
|
|
13
|
+
categories: list[str] # categories (list): The list of categories in the Cookbook.
|
|
14
|
+
|
|
11
15
|
recipes: list[str] = Field(
|
|
12
16
|
min_length=1
|
|
13
17
|
) # recipes (list): A list of recipes included in the Cookbook.
|
|
@@ -29,6 +33,8 @@ class CookbookArguments(BaseModel):
|
|
|
29
33
|
return {
|
|
30
34
|
"id": self.id,
|
|
31
35
|
"name": self.name,
|
|
36
|
+
"tags": self.tags,
|
|
37
|
+
"categories": self.categories,
|
|
32
38
|
"description": self.description,
|
|
33
39
|
"recipes": self.recipes,
|
|
34
40
|
}
|
moonshot/src/runners/runner.py
CHANGED
|
@@ -315,7 +315,7 @@ class Runner:
|
|
|
315
315
|
async def run_recipes(
|
|
316
316
|
self,
|
|
317
317
|
recipes: list[str],
|
|
318
|
-
|
|
318
|
+
prompt_selection_percentage: int = 100,
|
|
319
319
|
random_seed: int = 0,
|
|
320
320
|
system_prompt: str = "",
|
|
321
321
|
runner_processing_module: str = "benchmarking",
|
|
@@ -325,26 +325,21 @@ class Runner:
|
|
|
325
325
|
Initiates an asynchronous benchmark run using a set of recipes.
|
|
326
326
|
|
|
327
327
|
This method sets up and starts a benchmark run tailored for recipes. It instantiates a benchmark run object,
|
|
328
|
-
applies the configuration based on the provided recipes,
|
|
328
|
+
applies the configuration based on the provided recipes, percentage of prompts, random seed, system prompt, and
|
|
329
329
|
the specified runner and result processing modules, and then commences the run asynchronously.
|
|
330
330
|
|
|
331
331
|
Args:
|
|
332
332
|
recipes (list[str]): The recipes to be included in the benchmark run.
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
Defaults to 0.
|
|
336
|
-
|
|
333
|
+
prompt_selection_percentage (int, optional): The percentage of prompts to utilize during the benchmark.
|
|
334
|
+
Defaults to 100.
|
|
337
335
|
random_seed (int, optional): The seed for random number generation to ensure reproducibility.
|
|
338
|
-
|
|
339
|
-
|
|
336
|
+
Defaults to 0.
|
|
340
337
|
system_prompt (str, optional): The system prompt to be used during the benchmark.
|
|
341
|
-
|
|
342
|
-
|
|
338
|
+
Defaults to an empty string.
|
|
343
339
|
runner_processing_module (str, optional): The module responsible for processing the runner.
|
|
344
|
-
|
|
345
|
-
|
|
340
|
+
Defaults to "benchmarking".
|
|
346
341
|
result_processing_module (str, optional): The module responsible for processing the results.
|
|
347
|
-
|
|
342
|
+
Defaults to "benchmarking-result".
|
|
348
343
|
|
|
349
344
|
Raises:
|
|
350
345
|
Exception: If any error occurs during the setup or execution of the benchmark run.
|
|
@@ -357,7 +352,7 @@ class Runner:
|
|
|
357
352
|
RunnerType.BENCHMARK,
|
|
358
353
|
{
|
|
359
354
|
"recipes": recipes,
|
|
360
|
-
"
|
|
355
|
+
"prompt_selection_percentage": prompt_selection_percentage,
|
|
361
356
|
"random_seed": random_seed,
|
|
362
357
|
"system_prompt": system_prompt,
|
|
363
358
|
"runner_processing_module": runner_processing_module,
|
|
@@ -382,7 +377,7 @@ class Runner:
|
|
|
382
377
|
async def run_cookbooks(
|
|
383
378
|
self,
|
|
384
379
|
cookbooks: list[str],
|
|
385
|
-
|
|
380
|
+
prompt_selection_percentage: int = 100,
|
|
386
381
|
random_seed: int = 0,
|
|
387
382
|
system_prompt: str = "",
|
|
388
383
|
runner_processing_module: str = "benchmarking",
|
|
@@ -393,26 +388,21 @@ class Runner:
|
|
|
393
388
|
|
|
394
389
|
This method is responsible for initiating a benchmark cookbook run with the specified cookbooks and parameters.
|
|
395
390
|
It creates a new benchmark cookbook run instance, configures it with the provided cookbook names,
|
|
396
|
-
|
|
391
|
+
percentage of prompts, random seed, system prompt, runner processing module, and result processing module,
|
|
397
392
|
and then starts the run asynchronously.
|
|
398
393
|
|
|
399
394
|
Args:
|
|
400
395
|
cookbooks (list[str]): A list of cookbook names to be run in the benchmark.
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
Defaults to 0.
|
|
404
|
-
|
|
396
|
+
prompt_selection_percentage (int, optional): The percentage of prompts to be used in the benchmark run.
|
|
397
|
+
Defaults to 100.
|
|
405
398
|
random_seed (int, optional): The seed for random number generation to ensure reproducibility.
|
|
406
|
-
|
|
407
|
-
|
|
399
|
+
Defaults to 0.
|
|
408
400
|
system_prompt (str, optional): A system prompt to be used in the benchmark run.
|
|
409
|
-
|
|
410
|
-
|
|
401
|
+
Defaults to an empty string.
|
|
411
402
|
runner_processing_module (str, optional): The module responsible for processing the runner.
|
|
412
|
-
|
|
413
|
-
|
|
403
|
+
Defaults to "benchmarking".
|
|
414
404
|
result_processing_module (str, optional): The module responsible for processing the results.
|
|
415
|
-
|
|
405
|
+
Defaults to "benchmarking-result".
|
|
416
406
|
|
|
417
407
|
Raises:
|
|
418
408
|
Exception: If any error occurs during the setup or execution of the benchmark run.
|
|
@@ -425,7 +415,7 @@ class Runner:
|
|
|
425
415
|
RunnerType.BENCHMARK,
|
|
426
416
|
{
|
|
427
417
|
"cookbooks": cookbooks,
|
|
428
|
-
"
|
|
418
|
+
"prompt_selection_percentage": prompt_selection_percentage,
|
|
429
419
|
"random_seed": random_seed,
|
|
430
420
|
"system_prompt": system_prompt,
|
|
431
421
|
"runner_processing_module": runner_processing_module,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|