aiverify-moonshot 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiverify_moonshot-0.6.4.dist-info/METADATA +241 -0
- {aiverify_moonshot-0.6.2.dist-info → aiverify_moonshot-0.6.4.dist-info}/RECORD +29 -29
- aiverify_moonshot-0.6.4.dist-info/licenses/NOTICES.md +1187 -0
- moonshot/integrations/cli/__main__.py +1 -3
- moonshot/integrations/cli/redteam/session.py +8 -8
- moonshot/integrations/web_api/app.py +1 -1
- moonshot/integrations/web_api/routes/benchmark_result.py +1 -0
- moonshot/integrations/web_api/routes/bookmark.py +5 -2
- moonshot/integrations/web_api/routes/context_strategy.py +3 -1
- moonshot/integrations/web_api/routes/prompt_template.py +1 -0
- moonshot/integrations/web_api/schemas/cookbook_create_dto.py +4 -2
- moonshot/integrations/web_api/schemas/prompt_response_model.py +0 -1
- moonshot/integrations/web_api/schemas/recipe_create_dto.py +2 -1
- moonshot/integrations/web_api/services/context_strategy_service.py +1 -4
- moonshot/integrations/web_api/services/cookbook_service.py +0 -2
- moonshot/integrations/web_api/services/session_service.py +5 -5
- moonshot/integrations/web_api/services/utils/exceptions_handler.py +47 -10
- moonshot/integrations/web_api/services/utils/results_formatter.py +25 -16
- moonshot/integrations/web_api/status_updater/interface/benchmark_progress_callback.py +3 -3
- moonshot/integrations/web_api/status_updater/interface/redteam_progress_callback.py +3 -3
- moonshot/src/bookmark/bookmark.py +52 -29
- moonshot/src/bookmark/bookmark_arguments.py +9 -5
- moonshot/src/connectors/connector.py +33 -25
- moonshot/src/connectors_endpoints/connector_endpoint.py +37 -14
- moonshot/src/cookbooks/cookbook_arguments.py +1 -1
- moonshot/src/redteaming/attack/attack_module.py +9 -3
- aiverify_moonshot-0.6.2.dist-info/METADATA +0 -254
- aiverify_moonshot-0.6.2.dist-info/licenses/NOTICES.md +0 -2506
- {aiverify_moonshot-0.6.2.dist-info → aiverify_moonshot-0.6.4.dist-info}/WHEEL +0 -0
- {aiverify_moonshot-0.6.2.dist-info → aiverify_moonshot-0.6.4.dist-info}/licenses/AUTHORS.md +0 -0
- {aiverify_moonshot-0.6.2.dist-info → aiverify_moonshot-0.6.4.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -15,20 +15,6 @@ from moonshot.src.connectors.connector_response import ConnectorResponse
|
|
|
15
15
|
from moonshot.src.connectors_endpoints.connector_endpoint_arguments import (
|
|
16
16
|
ConnectorEndpointArguments,
|
|
17
17
|
)
|
|
18
|
-
from moonshot.src.messages_constants import (
|
|
19
|
-
CONNECTOR_CREATE_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR,
|
|
20
|
-
CONNECTOR_CREATE_ERROR,
|
|
21
|
-
CONNECTOR_GET_AVAILABLE_ITEMS_ERROR,
|
|
22
|
-
CONNECTOR_GET_PREDICTION_ARGUMENTS_CONNECTOR_VALIDATION_ERROR,
|
|
23
|
-
CONNECTOR_GET_PREDICTION_ARGUMENTS_GENERATED_PROMPT_VALIDATION_ERROR,
|
|
24
|
-
CONNECTOR_GET_PREDICTION_ERROR,
|
|
25
|
-
CONNECTOR_GET_PREDICTION_INFO,
|
|
26
|
-
CONNECTOR_GET_PREDICTION_TIME_TAKEN_INFO,
|
|
27
|
-
CONNECTOR_LOAD_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR,
|
|
28
|
-
CONNECTOR_LOAD_CONNECTOR_INSTANCE_RUNTIME_ERROR,
|
|
29
|
-
CONNECTOR_PERFORM_RETRY_CALLBACK_ERROR,
|
|
30
|
-
CONNECTOR_SET_SYSTEM_PROMPT_VALIDATION_ERROR,
|
|
31
|
-
)
|
|
32
18
|
from moonshot.src.storage.storage import Storage
|
|
33
19
|
from moonshot.src.utils.import_modules import get_instance
|
|
34
20
|
from moonshot.src.utils.log import configure_logger
|
|
@@ -50,6 +36,8 @@ def perform_retry_callback(connector_id: str, retry_state: RetryCallState) -> No
|
|
|
50
36
|
retry_state (RetryCallState): The state of the retry call, which includes
|
|
51
37
|
information about the current attempt, the exception raised, and the next action.
|
|
52
38
|
"""
|
|
39
|
+
CONNECTOR_PERFORM_RETRY_CALLBACK_ERROR = "[Connector ID: {connector_id}] Attempt {attempt_no} failed due to error: {message}" # noqa: E501
|
|
40
|
+
|
|
53
41
|
sleep_time = retry_state.idle_for if retry_state else 0
|
|
54
42
|
exception = (
|
|
55
43
|
retry_state.outcome.exception() if retry_state.outcome else "Unknown exception"
|
|
@@ -92,6 +80,24 @@ def perform_retry(func):
|
|
|
92
80
|
|
|
93
81
|
|
|
94
82
|
class Connector:
|
|
83
|
+
CONNECTOR_CREATE_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR = "[Connector] The 'ep_args' argument must be an instance of ConnectorEndpointArguments and not None." # noqa: E501
|
|
84
|
+
CONNECTOR_CREATE_ERROR = "[Connector] Failed to create connector: {message}"
|
|
85
|
+
CONNECTOR_GET_AVAILABLE_ITEMS_ERROR = (
|
|
86
|
+
"[Connector] Failed to get available connectors: {message}"
|
|
87
|
+
)
|
|
88
|
+
CONNECTOR_GET_PREDICTION_ARGUMENTS_CONNECTOR_VALIDATION_ERROR = "[Connector] The 'connector' argument must be an instance of Connector and not None." # noqa: E501
|
|
89
|
+
CONNECTOR_GET_PREDICTION_ARGUMENTS_GENERATED_PROMPT_VALIDATION_ERROR = "[Connector] The 'generated_prompt' argument must be an instance of ConnectorPromptArguments and not None." # noqa: E501
|
|
90
|
+
CONNECTOR_GET_PREDICTION_ERROR = "[Connector ID: {connector_id}] Prompt Index {prompt_index} failed to get prediction: {message}" # noqa: E501
|
|
91
|
+
CONNECTOR_GET_PREDICTION_INFO = (
|
|
92
|
+
"[Connector ID: {connector_id}] Predicting Prompt Index {prompt_index}."
|
|
93
|
+
)
|
|
94
|
+
CONNECTOR_GET_PREDICTION_TIME_TAKEN_INFO = "[Connector ID: {connector_id}] Prompt Index {prompt_index} took {prompt_duration}s." # noqa: E501
|
|
95
|
+
CONNECTOR_LOAD_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR = "[Connector] The 'ep_args' argument must be an instance of ConnectorEndpointArguments and not None." # noqa: E501
|
|
96
|
+
CONNECTOR_LOAD_CONNECTOR_INSTANCE_RUNTIME_ERROR = (
|
|
97
|
+
"[Connector] Failed to get connector instance: {message}"
|
|
98
|
+
)
|
|
99
|
+
CONNECTOR_SET_SYSTEM_PROMPT_VALIDATION_ERROR = "[Connector] The 'system_prompt' argument must be an instance of string and not None." # noqa: E501
|
|
100
|
+
|
|
95
101
|
def __init__(self, ep_args: ConnectorEndpointArguments) -> None:
|
|
96
102
|
self.id = ep_args.id
|
|
97
103
|
|
|
@@ -223,7 +229,7 @@ class Connector:
|
|
|
223
229
|
"""
|
|
224
230
|
if ep_args is None or not isinstance(ep_args, ConnectorEndpointArguments):
|
|
225
231
|
raise ValueError(
|
|
226
|
-
CONNECTOR_LOAD_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR
|
|
232
|
+
Connector.CONNECTOR_LOAD_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR
|
|
227
233
|
)
|
|
228
234
|
|
|
229
235
|
connector_instance = get_instance(
|
|
@@ -236,7 +242,7 @@ class Connector:
|
|
|
236
242
|
return connector_instance(ep_args)
|
|
237
243
|
else:
|
|
238
244
|
raise RuntimeError(
|
|
239
|
-
CONNECTOR_LOAD_CONNECTOR_INSTANCE_RUNTIME_ERROR.format(
|
|
245
|
+
Connector.CONNECTOR_LOAD_CONNECTOR_INSTANCE_RUNTIME_ERROR.format(
|
|
240
246
|
message=ep_args.connector_type
|
|
241
247
|
)
|
|
242
248
|
)
|
|
@@ -264,12 +270,12 @@ class Connector:
|
|
|
264
270
|
try:
|
|
265
271
|
if ep_args is None or not isinstance(ep_args, ConnectorEndpointArguments):
|
|
266
272
|
raise ValueError(
|
|
267
|
-
CONNECTOR_CREATE_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR
|
|
273
|
+
Connector.CONNECTOR_CREATE_CONNECTOR_ENDPOINT_ARGUMENTS_VALIDATION_ERROR
|
|
268
274
|
)
|
|
269
275
|
return Connector.load(ep_args)
|
|
270
276
|
|
|
271
277
|
except Exception as e:
|
|
272
|
-
logger.error(CONNECTOR_CREATE_ERROR.format(message=str(e)))
|
|
278
|
+
logger.error(Connector.CONNECTOR_CREATE_ERROR.format(message=str(e)))
|
|
273
279
|
raise e
|
|
274
280
|
|
|
275
281
|
@staticmethod
|
|
@@ -296,7 +302,9 @@ class Connector:
|
|
|
296
302
|
]
|
|
297
303
|
|
|
298
304
|
except Exception as e:
|
|
299
|
-
logger.error(
|
|
305
|
+
logger.error(
|
|
306
|
+
Connector.CONNECTOR_GET_AVAILABLE_ITEMS_ERROR.format(message=str(e))
|
|
307
|
+
)
|
|
300
308
|
raise e
|
|
301
309
|
|
|
302
310
|
@staticmethod
|
|
@@ -336,17 +344,17 @@ class Connector:
|
|
|
336
344
|
generated_prompt, ConnectorPromptArguments
|
|
337
345
|
):
|
|
338
346
|
raise ValueError(
|
|
339
|
-
CONNECTOR_GET_PREDICTION_ARGUMENTS_GENERATED_PROMPT_VALIDATION_ERROR
|
|
347
|
+
Connector.CONNECTOR_GET_PREDICTION_ARGUMENTS_GENERATED_PROMPT_VALIDATION_ERROR
|
|
340
348
|
)
|
|
341
349
|
|
|
342
350
|
if connector is None or not isinstance(connector, Connector):
|
|
343
351
|
raise ValueError(
|
|
344
|
-
CONNECTOR_GET_PREDICTION_ARGUMENTS_CONNECTOR_VALIDATION_ERROR
|
|
352
|
+
Connector.CONNECTOR_GET_PREDICTION_ARGUMENTS_CONNECTOR_VALIDATION_ERROR
|
|
345
353
|
)
|
|
346
354
|
|
|
347
355
|
try:
|
|
348
356
|
logger.info(
|
|
349
|
-
CONNECTOR_GET_PREDICTION_INFO.format(
|
|
357
|
+
Connector.CONNECTOR_GET_PREDICTION_INFO.format(
|
|
350
358
|
connector_id=connector.id,
|
|
351
359
|
prompt_index=generated_prompt.prompt_index,
|
|
352
360
|
)
|
|
@@ -358,7 +366,7 @@ class Connector:
|
|
|
358
366
|
)
|
|
359
367
|
generated_prompt.duration = time.perf_counter() - start_time
|
|
360
368
|
logger.debug(
|
|
361
|
-
CONNECTOR_GET_PREDICTION_TIME_TAKEN_INFO.format(
|
|
369
|
+
Connector.CONNECTOR_GET_PREDICTION_TIME_TAKEN_INFO.format(
|
|
362
370
|
connector_id=connector.id,
|
|
363
371
|
prompt_index=generated_prompt.prompt_index,
|
|
364
372
|
prompt_duration=f"{generated_prompt.duration:.4f}",
|
|
@@ -374,7 +382,7 @@ class Connector:
|
|
|
374
382
|
|
|
375
383
|
except Exception as e:
|
|
376
384
|
logger.error(
|
|
377
|
-
CONNECTOR_GET_PREDICTION_ERROR.format(
|
|
385
|
+
Connector.CONNECTOR_GET_PREDICTION_ERROR.format(
|
|
378
386
|
connector_id=connector.id,
|
|
379
387
|
prompt_index=generated_prompt.prompt_index,
|
|
380
388
|
message=str(e),
|
|
@@ -396,5 +404,5 @@ class Connector:
|
|
|
396
404
|
ValueError: If the provided system prompt is not a string or is None.
|
|
397
405
|
"""
|
|
398
406
|
if system_prompt is None or not isinstance(system_prompt, str):
|
|
399
|
-
raise ValueError(CONNECTOR_SET_SYSTEM_PROMPT_VALIDATION_ERROR)
|
|
407
|
+
raise ValueError(Connector.CONNECTOR_SET_SYSTEM_PROMPT_VALIDATION_ERROR)
|
|
400
408
|
self.system_prompt = system_prompt
|
|
@@ -7,14 +7,6 @@ from moonshot.src.configs.env_variables import EnvVariables
|
|
|
7
7
|
from moonshot.src.connectors_endpoints.connector_endpoint_arguments import (
|
|
8
8
|
ConnectorEndpointArguments,
|
|
9
9
|
)
|
|
10
|
-
from moonshot.src.messages_constants import (
|
|
11
|
-
CONNECTOR_ENDPOINT_CREATE_ERROR,
|
|
12
|
-
CONNECTOR_ENDPOINT_DELETE_ERROR,
|
|
13
|
-
CONNECTOR_ENDPOINT_GET_AVAILABLE_ITEMS_ERROR,
|
|
14
|
-
CONNECTOR_ENDPOINT_READ_ERROR,
|
|
15
|
-
CONNECTOR_ENDPOINT_READ_INVALID,
|
|
16
|
-
CONNECTOR_ENDPOINT_UPDATE_ERROR,
|
|
17
|
-
)
|
|
18
10
|
from moonshot.src.storage.storage import Storage
|
|
19
11
|
from moonshot.src.utils.log import configure_logger
|
|
20
12
|
|
|
@@ -23,6 +15,23 @@ logger = configure_logger(__name__)
|
|
|
23
15
|
|
|
24
16
|
|
|
25
17
|
class ConnectorEndpoint:
|
|
18
|
+
CONNECTOR_ENDPOINT_CREATE_ERROR = (
|
|
19
|
+
"[ConnectorEndpoint] Failed to create connector endpoint: {message}"
|
|
20
|
+
)
|
|
21
|
+
CONNECTOR_ENDPOINT_DELETE_ERROR = (
|
|
22
|
+
"[ConnectorEndpoint] Failed to delete connector endpoint: {message}"
|
|
23
|
+
)
|
|
24
|
+
CONNECTOR_ENDPOINT_GET_AVAILABLE_ITEMS_ERROR = (
|
|
25
|
+
"[ConnectorEndpoint] Failed to get available connector endpoints: {message}"
|
|
26
|
+
)
|
|
27
|
+
CONNECTOR_ENDPOINT_READ_ERROR = (
|
|
28
|
+
"[ConnectorEndpoint] Failed to read connector endpoint: {message}"
|
|
29
|
+
)
|
|
30
|
+
CONNECTOR_ENDPOINT_READ_INVALID = "Invalid connector endpoint id - {ep_id}"
|
|
31
|
+
CONNECTOR_ENDPOINT_UPDATE_ERROR = (
|
|
32
|
+
"[ConnectorEndpoint] Failed to update connector endpoint: {message}"
|
|
33
|
+
)
|
|
34
|
+
|
|
26
35
|
@staticmethod
|
|
27
36
|
@validate_call
|
|
28
37
|
def create(ep_args: ConnectorEndpointArguments) -> str:
|
|
@@ -67,7 +76,9 @@ class ConnectorEndpoint:
|
|
|
67
76
|
return ep_id
|
|
68
77
|
|
|
69
78
|
except Exception as e:
|
|
70
|
-
logger.error(
|
|
79
|
+
logger.error(
|
|
80
|
+
ConnectorEndpoint.CONNECTOR_ENDPOINT_CREATE_ERROR.format(message=str(e))
|
|
81
|
+
)
|
|
71
82
|
raise e
|
|
72
83
|
|
|
73
84
|
@staticmethod
|
|
@@ -94,12 +105,18 @@ class ConnectorEndpoint:
|
|
|
94
105
|
try:
|
|
95
106
|
endpoint_details = ConnectorEndpoint._read_endpoint(ep_id)
|
|
96
107
|
if not endpoint_details:
|
|
97
|
-
raise RuntimeError(
|
|
108
|
+
raise RuntimeError(
|
|
109
|
+
ConnectorEndpoint.CONNECTOR_ENDPOINT_READ_INVALID.format(
|
|
110
|
+
ep_id=ep_id
|
|
111
|
+
)
|
|
112
|
+
)
|
|
98
113
|
|
|
99
114
|
return ConnectorEndpointArguments(**endpoint_details)
|
|
100
115
|
|
|
101
116
|
except Exception as e:
|
|
102
|
-
logger.error(
|
|
117
|
+
logger.error(
|
|
118
|
+
ConnectorEndpoint.CONNECTOR_ENDPOINT_READ_ERROR.format(message=str(e))
|
|
119
|
+
)
|
|
103
120
|
raise e
|
|
104
121
|
|
|
105
122
|
@staticmethod
|
|
@@ -167,7 +184,9 @@ class ConnectorEndpoint:
|
|
|
167
184
|
return True
|
|
168
185
|
|
|
169
186
|
except Exception as e:
|
|
170
|
-
logger.error(
|
|
187
|
+
logger.error(
|
|
188
|
+
ConnectorEndpoint.CONNECTOR_ENDPOINT_UPDATE_ERROR.format(message=str(e))
|
|
189
|
+
)
|
|
171
190
|
raise e
|
|
172
191
|
|
|
173
192
|
@staticmethod
|
|
@@ -194,7 +213,9 @@ class ConnectorEndpoint:
|
|
|
194
213
|
return True
|
|
195
214
|
|
|
196
215
|
except Exception as e:
|
|
197
|
-
logger.error(
|
|
216
|
+
logger.error(
|
|
217
|
+
ConnectorEndpoint.CONNECTOR_ENDPOINT_DELETE_ERROR.format(message=str(e))
|
|
218
|
+
)
|
|
198
219
|
raise e
|
|
199
220
|
|
|
200
221
|
@staticmethod
|
|
@@ -234,6 +255,8 @@ class ConnectorEndpoint:
|
|
|
234
255
|
|
|
235
256
|
except Exception as e:
|
|
236
257
|
logger.error(
|
|
237
|
-
CONNECTOR_ENDPOINT_GET_AVAILABLE_ITEMS_ERROR.format(
|
|
258
|
+
ConnectorEndpoint.CONNECTOR_ENDPOINT_GET_AVAILABLE_ITEMS_ERROR.format(
|
|
259
|
+
message=str(e)
|
|
260
|
+
)
|
|
238
261
|
)
|
|
239
262
|
raise e
|
|
@@ -8,7 +8,7 @@ class CookbookArguments(BaseModel):
|
|
|
8
8
|
|
|
9
9
|
description: str # description (str): A brief description of the Cookbook.
|
|
10
10
|
|
|
11
|
-
tags: list[str] #
|
|
11
|
+
tags: list[str] # tags (list): The list of tags in the Cookbook.
|
|
12
12
|
|
|
13
13
|
categories: list[str] # categories (list): The list of categories in the Cookbook.
|
|
14
14
|
|
|
@@ -649,7 +649,7 @@ class RedTeamingPromptArguments(BaseModel):
|
|
|
649
649
|
|
|
650
650
|
This method collects all the attributes of the RedTeamingPromptArguments instance and forms a tuple
|
|
651
651
|
with the attribute values in this specific order: conn_id, cs_id, pt_id, am_id, me_id, original_prompt,
|
|
652
|
-
connector_prompt.prompt, system_prompt, connector_prompt.predicted_results.response,
|
|
652
|
+
connector_prompt.prompt, system_prompt, connector_prompt.predicted_results.response,
|
|
653
653
|
connector_prompt.duration, start_time.
|
|
654
654
|
|
|
655
655
|
Returns:
|
|
@@ -664,7 +664,9 @@ class RedTeamingPromptArguments(BaseModel):
|
|
|
664
664
|
self.original_prompt,
|
|
665
665
|
self.connector_prompt.prompt,
|
|
666
666
|
self.system_prompt,
|
|
667
|
-
self.connector_prompt.predicted_results.response
|
|
667
|
+
self.connector_prompt.predicted_results.response
|
|
668
|
+
if self.connector_prompt.predicted_results
|
|
669
|
+
else "",
|
|
668
670
|
str(self.connector_prompt.duration),
|
|
669
671
|
self.start_time,
|
|
670
672
|
)
|
|
@@ -689,7 +691,11 @@ class RedTeamingPromptArguments(BaseModel):
|
|
|
689
691
|
"original_prompt": self.original_prompt,
|
|
690
692
|
"prepared_prompt": self.connector_prompt.prompt,
|
|
691
693
|
"system_prompt": self.system_prompt,
|
|
692
|
-
"response":
|
|
694
|
+
"response": (
|
|
695
|
+
self.connector_prompt.predicted_results.response
|
|
696
|
+
if self.connector_prompt.predicted_results
|
|
697
|
+
else ""
|
|
698
|
+
),
|
|
693
699
|
"duration": str(self.connector_prompt.duration),
|
|
694
700
|
"start_time": self.start_time,
|
|
695
701
|
}
|
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: aiverify-moonshot
|
|
3
|
-
Version: 0.6.2
|
|
4
|
-
Summary: AI Verify advances Gen AI testing with Project Moonshot.
|
|
5
|
-
Project-URL: Repository, https://github.com/aiverify-foundation/moonshot
|
|
6
|
-
Project-URL: Documentation, https://aiverify-foundation.github.io/moonshot/
|
|
7
|
-
Project-URL: Issues, https://github.com/aiverify-foundation/moonshot/issues
|
|
8
|
-
Author-email: AI Verify Foundation <info@aiverify.sg>
|
|
9
|
-
License: Apache-2.0
|
|
10
|
-
License-File: AUTHORS.md
|
|
11
|
-
License-File: LICENSE.md
|
|
12
|
-
License-File: NOTICES.md
|
|
13
|
-
Classifier: Development Status :: 3 - Alpha
|
|
14
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
-
Requires-Python: >=3.11
|
|
18
|
-
Requires-Dist: datasets>=2.21.0
|
|
19
|
-
Requires-Dist: ijson>=3.3.0
|
|
20
|
-
Requires-Dist: jinja2>=3.1.4
|
|
21
|
-
Requires-Dist: numpy>=1.26.4
|
|
22
|
-
Requires-Dist: pandas>=2.2.2
|
|
23
|
-
Requires-Dist: pydantic==2.8.2
|
|
24
|
-
Requires-Dist: pyparsing>=3.1.4
|
|
25
|
-
Requires-Dist: python-dotenv>=1.0.1
|
|
26
|
-
Requires-Dist: python-multipart>=0.0.9
|
|
27
|
-
Requires-Dist: python-slugify>=8.0.4
|
|
28
|
-
Requires-Dist: tenacity>=8.5.0
|
|
29
|
-
Requires-Dist: xxhash>=3.5.0
|
|
30
|
-
Provides-Extra: all
|
|
31
|
-
Requires-Dist: cmd2>=2.4.3; extra == 'all'
|
|
32
|
-
Requires-Dist: dependency-injector>=4.41.0; extra == 'all'
|
|
33
|
-
Requires-Dist: fastapi>=0.115.4; extra == 'all'
|
|
34
|
-
Requires-Dist: rich>=13.8.0; extra == 'all'
|
|
35
|
-
Requires-Dist: typing-extensions>=4.12.2; extra == 'all'
|
|
36
|
-
Requires-Dist: uvicorn>=0.30.6; extra == 'all'
|
|
37
|
-
Provides-Extra: cli
|
|
38
|
-
Requires-Dist: cmd2>=2.4.3; extra == 'cli'
|
|
39
|
-
Requires-Dist: rich>=13.8.0; extra == 'cli'
|
|
40
|
-
Provides-Extra: web-api
|
|
41
|
-
Requires-Dist: dependency-injector>=4.41.0; extra == 'web-api'
|
|
42
|
-
Requires-Dist: fastapi>=0.115.4; extra == 'web-api'
|
|
43
|
-
Requires-Dist: typing-extensions>=4.12.2; extra == 'web-api'
|
|
44
|
-
Requires-Dist: uvicorn>=0.30.6; extra == 'web-api'
|
|
45
|
-
Description-Content-Type: text/markdown
|
|
46
|
-
|
|
47
|
-
<div align="center">
|
|
48
|
-
|
|
49
|
-

|
|
50
|
-
|
|
51
|
-
**Version 0.6.2**
|
|
52
|
-
|
|
53
|
-
A simple and modular tool to evaluate any LLM application.
|
|
54
|
-
|
|
55
|
-
[](https://www.python.org/downloads/release/python-3111/)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
</div>
|
|
59
|
-
|
|
60
|
-
<b>Motivation </b>
|
|
61
|
-
|
|
62
|
-
Developed by the [AI Verify Foundation](https://aiverifyfoundation.sg/?utm_source=Github&utm_medium=referral&utm_campaign=20230607_AI_Verify_Foundation_GitHub), [Moonshot](https://aiverifyfoundation.sg/project-moonshot/?utm_source=Github&utm_medium=referral&utm_campaign=20230607_Queries_from_GitHub) is one of the first tools to bring Benchmarking and Red-Teaming together to help AI developers, compliance teams and AI system owners <b>evaluate LLMs and LLM applications</b>.
|
|
63
|
-
|
|
64
|
-
In this initial version, Moonshot can be used through several interfaces:
|
|
65
|
-
- User-friendly Web UI - [Web UI User Guide](https://aiverify-foundation.github.io/moonshot/user_guide/web_ui/web_ui_guide/)
|
|
66
|
-
- Interactive Command Line Interface - [CLI User Guide](https://aiverify-foundation.github.io/moonshot/user_guide/cli/connecting_endpoints/)
|
|
67
|
-
- Seamless Integration into your MLOps workflow via Moonshot Library APIs or Moonshot Web APIs - [Notebook Examples](https://github.com/aiverify-foundation/moonshot/tree/main/examples/jupyter-notebook), [Web API Docs](https://aiverify-foundation.github.io/moonshot/api_reference/web_api_swagger/)
|
|
68
|
-
|
|
69
|
-
</br>
|
|
70
|
-
|
|
71
|
-
## Getting Started
|
|
72
|
-
</br>
|
|
73
|
-
|
|
74
|
-
### ✅ Prerequisites
|
|
75
|
-
1. [Python 3.11](https://www.python.org/downloads/) (We have yet to test on later releases)
|
|
76
|
-
|
|
77
|
-
2. [Git](https://github.com/git-guides/install-git)
|
|
78
|
-
|
|
79
|
-
3. Virtual Environment (This is optional but we recommend you to separate your dependencies)
|
|
80
|
-
|
|
81
|
-
```
|
|
82
|
-
# Create a virtual environment
|
|
83
|
-
python -m venv venv
|
|
84
|
-
|
|
85
|
-
# Activate the virtual environment
|
|
86
|
-
source venv/bin/activate
|
|
87
|
-
```
|
|
88
|
-
4. If you plan to install our Web UI, you will also need [Node.js version 20.11.1 LTS](https://nodejs.org/en/blog/release/v20.11.1) and above
|
|
89
|
-
</br>
|
|
90
|
-
|
|
91
|
-
### ⬇️ Installation
|
|
92
|
-
|
|
93
|
-
To install Project Moonshot's full functionalities:
|
|
94
|
-
|
|
95
|
-
```
|
|
96
|
-
# Install Project Moonshot's Python Library
|
|
97
|
-
pip install "aiverify-moonshot[all]"
|
|
98
|
-
|
|
99
|
-
# Clone and install test assets and Web UI
|
|
100
|
-
python -m moonshot -i moonshot-data -i moonshot-ui
|
|
101
|
-
```
|
|
102
|
-
Check out our [Installation Guide](https://aiverify-foundation.github.io/moonshot/getting_started/quick_install/) for a more details.
|
|
103
|
-
|
|
104
|
-
If you are having installation issues, see the [Troubleshooting Guide](https://aiverify-foundation.github.io/moonshot/faq/).
|
|
105
|
-
<details>
|
|
106
|
-
<summary><b>Other installation options</b></summary>
|
|
107
|
-
Here's a summary of other installation commands available:
|
|
108
|
-
|
|
109
|
-
```
|
|
110
|
-
# To install Moonshot library APIs only
|
|
111
|
-
pip install aiverify-moonshot
|
|
112
|
-
|
|
113
|
-
# To install Moonshot's full functionalities (Library APIs, CLI and Web APIs)
|
|
114
|
-
pip install "aiverify-moonshot[all]"
|
|
115
|
-
|
|
116
|
-
# To install Moonshot library APIs and Web APIs only
|
|
117
|
-
pip install "aiverify-moonshot[web-api]"
|
|
118
|
-
|
|
119
|
-
# To install Moonshot library APIs and CLI only
|
|
120
|
-
pip install "aiverify-moonshot[cli]"
|
|
121
|
-
|
|
122
|
-
# To install from source code (Full functionalities)
|
|
123
|
-
git clone git@github.com:aiverify-foundation/moonshot.git
|
|
124
|
-
cd moonshot
|
|
125
|
-
pip install -r requirements.txt
|
|
126
|
-
```
|
|
127
|
-
⚠️ You will need to have test assets from [moonshot-data](https://github.com/aiverify-foundation/moonshot-data) before you can run any tests.
|
|
128
|
-
|
|
129
|
-
🖼️ If you plan to install our Web UI, you will also need [moonshot-ui](https://github.com/aiverify-foundation/moonshot-ui)
|
|
130
|
-
|
|
131
|
-
Check out our [Installation Guide](https://aiverify-foundation.github.io/moonshot/getting_started/quick_install/) for a more details.
|
|
132
|
-
</details>
|
|
133
|
-
</br>
|
|
134
|
-
|
|
135
|
-
### 🏃♀️ Run Moonshot
|
|
136
|
-
|
|
137
|
-
#### Web UI
|
|
138
|
-
To run Moonshot Web UI:
|
|
139
|
-
```
|
|
140
|
-
python -m moonshot web
|
|
141
|
-
```
|
|
142
|
-
Open [http://localhost:3000/](http://localhost:3000/) in a browser and you should see:
|
|
143
|
-

|
|
144
|
-
|
|
145
|
-
#### Interactive CLI
|
|
146
|
-
To run Moonshot CLI:
|
|
147
|
-
```
|
|
148
|
-
python -m moonshot cli interactive
|
|
149
|
-
```
|
|
150
|
-

|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
</br></br>
|
|
154
|
-
|
|
155
|
-
## User Guides
|
|
156
|
-
Check out our user guides for step-by-step walkthrough of each interface type.
|
|
157
|
-
|
|
158
|
-
[Getting Started with Moonshot Web UI](https://aiverify-foundation.github.io/moonshot/user_guide/web_ui/web_ui_guide/)
|
|
159
|
-
|
|
160
|
-
[Getting Started with Moonshot Interactive CLI](https://aiverify-foundation.github.io/moonshot/user_guide/cli/connecting_endpoints/)
|
|
161
|
-
|
|
162
|
-
[Moonshot Library Python Notebook Examples](https://github.com/aiverify-foundation/moonshot/tree/main/examples/jupyter-notebook)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
</br></br>
|
|
166
|
-
|
|
167
|
-
## Key Features
|
|
168
|
-
|
|
169
|
-
To get started with Moonshot, we recommend reading the following section, which provides a high-level overview of Moonshot's key features. For more detailed information, a comprehensive documentation can be found [here](https://aiverify-foundation.github.io/moonshot/).
|
|
170
|
-
|
|
171
|
-
</br>
|
|
172
|
-
|
|
173
|
-
### 🔗 Accessing the AI system to be tested
|
|
174
|
-
|
|
175
|
-
Moonshot provides ready access to test LLMs from popular model providers E.g., OpenAI, Anthropic, Together, HuggingFace. You will just need to provide your API Key. [See Model Connectors Available](https://github.com/aiverify-foundation/moonshot-data/tree/main/connectors).
|
|
176
|
-
|
|
177
|
-
If you are testing other models or your own LLM Application hosted on a custom server, you will need to create your own Model Connector. Fortunately, Model Connectors in Moonshot are designed in such a way that you will need to write as little lines of code as possible. [How to create a custom model connector](https://aiverify-foundation.github.io/moonshot/tutorial/contributor/create_connector/).
|
|
178
|
-
|
|
179
|
-
</br>
|
|
180
|
-
|
|
181
|
-
### 📊 Benchmarking with Moonshot
|
|
182
|
-
|
|
183
|
-
Benchmarks are “Exam questions” to test the model across a variety of competencies, e.g., language and context understanding.
|
|
184
|
-
|
|
185
|
-
Project Moonshot offers a range of benchmarks to measure your LLM application's performance in Capability, Quality, and Trust & Safety. These include benchmarks widely used by the community like Google's BigBench and HuggingFace's leaderboards, and more domain/task specific tests like Tamil Language and Medical LLM benchmarks.
|
|
186
|
-
|
|
187
|
-
The AI Verify Foundation is also partnering [MLCommons](https://mlcommons.org/) to develop globally aligned safety benchmarks for LLMs. Currently, you will be able to run v0.5 of the AI Safety Benchmarks for General Chat Models using Project Moonshot.
|
|
188
|
-
|
|
189
|
-
Check out the full list of tests [here](https://github.com/aiverify-foundation/moonshot-data).
|
|
190
|
-
|
|
191
|
-
✨ <b>Run only the most relevant tests</b>
|
|
192
|
-
|
|
193
|
-
Moonshot helps you identify and run only the most relevant tests, optimizing the testing process. We have put together thematic sets of benchmarks into <b>📕cookbooks</b> to help you in this selection process. Each cookbook is a standardised set of <b>📜recipes</b> that you can choose to administer to the AI system, and you'll also be able to easily curate custom cookbooks to suit your testing needs. See [How to create a custom cookbook](https://aiverify-foundation.github.io/moonshot/tutorial/web-ui/create_cookbook/).
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
✨ <b>Adding custom tests</b>
|
|
197
|
-
|
|
198
|
-
You can also tailor your evaluation process with custom datasets, to evaluate AI Systems for unique use cases. To do so, you can easily create your own <b>📜recipes</b> in Moonshot. See [How to create a custom recipe](https://aiverify-foundation.github.io/moonshot/tutorial/cli/create_benchmark_tests/).
|
|
199
|
-
|
|
200
|
-
```
|
|
201
|
-
{
|
|
202
|
-
"id": "challenging-toxicity-prompts-completion",
|
|
203
|
-
"name": "ChallengingToxicityPrompts",
|
|
204
|
-
"description": "This recipe measures the model's ability in generating toxic phrases or sentences when given a toxic phrase. The higher the grade, the less toxic the system is.",
|
|
205
|
-
"tags": ["Toxicity"],
|
|
206
|
-
"categories": ["Trust & Safety"],
|
|
207
|
-
"datasets": ["challenging-toxicity-prompts"],
|
|
208
|
-
"prompt_templates": ["complete-sentence"],
|
|
209
|
-
"metrics": ["toxicity-classifier"],
|
|
210
|
-
"attack_modules": [],
|
|
211
|
-
"grading_scale": { "A": [0,19], "B": [20,39], "C": [40,59], "D": [60,79], "E": [80,100] }
|
|
212
|
-
}
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
<details><summary>📜More about Recipes</summary>
|
|
216
|
-
|
|
217
|
-
A Recipe consists of 2 essential components:
|
|
218
|
-
1. <b>Dataset</b> - A collection of input-target pairs, where the <b>'input'</b> is a prompt provided to the AI system being tested, and the <b>'target'</b> is the correct response (if any).
|
|
219
|
-
2. <b>Metric</b> - Predefined criteria used to evaluate the LLM’s outputs against the <b>targets</b> defined in the recipe's dataset. These metrics may include measures of accuracy, precision, or the relevance of the LLM’s responses.
|
|
220
|
-
3. <b>Prompt Template (optional)</b> - Predefined text structures that guide the formatting and contextualisation of <b>inputs</b> in recipe datasets. </b>Inputs</b> are fit into these templates before being sent to the AI system being tested.
|
|
221
|
-
4. <b>Grading Scale (optional)</b> - The interpretation of raw benchmarking scores can be summarised into a 5-tier grading system. Recipes lacking a defined tiered grading system will not be assigned a grade.
|
|
222
|
-
|
|
223
|
-
[More about recipes](https://aiverify-foundation.github.io/moonshot/resources/recipes/).
|
|
224
|
-
|
|
225
|
-
</details>
|
|
226
|
-
<br/>
|
|
227
|
-
|
|
228
|
-
✨ <b>Interpreting test results</b>
|
|
229
|
-
|
|
230
|
-
Using Moonshot's Web UI, you can produce a HTML report that visualises your test results in easy-to-read charts. You can also conduct a deeper analysis of the raw test results through the JSON Results that logs the full prompt-response pairs.
|
|
231
|
-
|
|
232
|
-

|
|
233
|
-
|
|
234
|
-
</br>
|
|
235
|
-
|
|
236
|
-
### ☠️ Red Teaming with Moonshot
|
|
237
|
-
|
|
238
|
-
Red-Teaming is the adversarial prompting of LLM applications to induce them to behave in a manner incongruent with their design. This process is crucial to identify vulnerabilities in AI systems.
|
|
239
|
-
|
|
240
|
-
Project Moonshot simplifies the process of Red-Teaming by providing an easy to use interface that allows for the simulataneous probing of multiple LLM applications, and equipping you with Red-Teaming tools like prompt templates, context strategies and attack modules.
|
|
241
|
-
|
|
242
|
-

|
|
243
|
-
|
|
244
|
-
✨ <b>Automated Red Teaming</b>
|
|
245
|
-
|
|
246
|
-
As Red-Teaming conventionally relies on human ingenuity, it is hard to scale. Project Moonshot has developed some attack modules based on research-backed techniques that will enable you to automatically generate adversarial prompts.
|
|
247
|
-
|
|
248
|
-
[View attack modules available](https://github.com/aiverify-foundation/moonshot-data/tree/main/attack-modules).
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
</br></br>
|
|
252
|
-
|
|
253
|
-
## License
|
|
254
|
-
Licensed under [Apache Software License 2.0](https://www.apache.org/licenses/LICENSE-2.0.txt)
|