azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +0 -16
- azure/ai/evaluation/_common/rai_service.py +1 -1
- azure/ai/evaluation/_common/utils.py +1 -1
- azure/ai/evaluation/_converters/__init__.py +1 -1
- azure/ai/evaluation/_converters/_ai_services.py +4 -4
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +84 -68
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
- azure/ai/evaluation/_evaluate/_utils.py +3 -3
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +1 -1
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +1 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +1 -1
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +19 -0
- azure/ai/evaluation/{_red_team → red_team}/_attack_objective_generator.py +3 -0
- azure/ai/evaluation/{_red_team → red_team}/_attack_strategy.py +3 -0
- azure/ai/evaluation/{_red_team → red_team}/_red_team.py +96 -67
- azure/ai/evaluation/red_team/_red_team_result.py +382 -0
- azure/ai/evaluation/{_red_team → red_team}/_utils/constants.py +2 -1
- azure/ai/evaluation/{_red_team → red_team}/_utils/formatting_utils.py +23 -22
- azure/ai/evaluation/{_red_team → red_team}/_utils/logging_utils.py +1 -1
- azure/ai/evaluation/{_red_team → red_team}/_utils/strategy_utils.py +8 -4
- azure/ai/evaluation/simulator/_simulator.py +1 -1
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +13 -2
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/RECORD +50 -40
- azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
- azure/ai/evaluation/_red_team/_utils/__init__.py +0 -3
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- /azure/ai/evaluation/{_red_team → red_team}/_callback_chat_target.py +0 -0
- /azure/ai/evaluation/{_red_team → red_team}/_default_converter.py +0 -0
- /azure/ai/evaluation/{_red_team → red_team/_utils}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
azure/ai/evaluation/__init__.py,sha256=
|
|
1
|
+
azure/ai/evaluation/__init__.py,sha256=kAtHb6ouzfFDk8KOVaLdraayQCJmnj6SSM-YZ3sAy4Y,3417
|
|
2
2
|
azure/ai/evaluation/_constants.py,sha256=mrIKAUZvISRcaVpv1fAzIUDbwWS2bFlzCAMp1eB5jsY,3052
|
|
3
3
|
azure/ai/evaluation/_exceptions.py,sha256=v70aKkYfXqCj0FDDXsC41OrDjrfnXzgXUC6G07wwOdE,5344
|
|
4
4
|
azure/ai/evaluation/_http_utils.py,sha256=1bGce6pKAL-vmaUGRPxVX7DVO05XVQ8YPIwIQ3q7mfA,17221
|
|
5
5
|
azure/ai/evaluation/_model_configurations.py,sha256=MNN6cQlz7P9vNfHmfEKsUcly3j1FEOEFsA8WV7GPuKQ,4043
|
|
6
6
|
azure/ai/evaluation/_user_agent.py,sha256=O2y-QPBAcw7w7qQ6M2aRPC3Vy3TKd789u5lcs2yuFaI,290
|
|
7
|
-
azure/ai/evaluation/_version.py,sha256=
|
|
7
|
+
azure/ai/evaluation/_version.py,sha256=YpsqP-vu09qp1HVH74yl4WSsHSEc4bGuEPdb2hj15Z8,229
|
|
8
8
|
azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
azure/ai/evaluation/_azure/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
10
10
|
azure/ai/evaluation/_azure/_clients.py,sha256=N1V-LyQkItPuoKl0aieypFPdGSRSld9lQqH1x-n3L7U,9119
|
|
@@ -14,8 +14,8 @@ azure/ai/evaluation/_common/__init__.py,sha256=LHTkf6dMLLxikrGNgbUuREBVQcs4ORHR6
|
|
|
14
14
|
azure/ai/evaluation/_common/_experimental.py,sha256=GVtSn9r1CeR_yEa578dJVNDJ3P24eqe8WYdH7llbiQY,5694
|
|
15
15
|
azure/ai/evaluation/_common/constants.py,sha256=7gwcSbcJr-KHA-7Llmpuo4kap_l3X5-emAn2LMb_wk4,2281
|
|
16
16
|
azure/ai/evaluation/_common/math.py,sha256=d4bwWe35_RWDIZNcbV1BTBbHNx2QHQ4-I3EofDyyNE0,2863
|
|
17
|
-
azure/ai/evaluation/_common/rai_service.py,sha256=
|
|
18
|
-
azure/ai/evaluation/_common/utils.py,sha256=
|
|
17
|
+
azure/ai/evaluation/_common/rai_service.py,sha256=7izMNrZxLDMjbL5gAWmU9aUnFHv7Yb0OQU-A-1XnVcA,28304
|
|
18
|
+
azure/ai/evaluation/_common/utils.py,sha256=SqZfm_DXhZC6JBVvdtzMEuLott2B6uWNVFz-rVSSSiA,18392
|
|
19
19
|
azure/ai/evaluation/_common/raiclient/__init__.py,sha256=SaoPfbEPhdoXpIKqOPmRVNyYky1mW5c7UobHt08ksAI,1153
|
|
20
20
|
azure/ai/evaluation/_common/raiclient/_client.py,sha256=ZnFRcpZ0UD3MeFWnJ44AbkbUqPHsvGM502ra28pm_No,5812
|
|
21
21
|
azure/ai/evaluation/_common/raiclient/_configuration.py,sha256=exABHzDcOl5295p5LnWxL82x21z6_ruyBbigW3WoQRA,4246
|
|
@@ -38,21 +38,21 @@ azure/ai/evaluation/_common/raiclient/models/_patch.py,sha256=P7PMm3Gbjlk56lI6B_
|
|
|
38
38
|
azure/ai/evaluation/_common/raiclient/operations/__init__.py,sha256=Ah41eszCE4ZDISukKSdOWQQcZGv0TbDduaT35Q8PQz8,925
|
|
39
39
|
azure/ai/evaluation/_common/raiclient/operations/_operations.py,sha256=_-B8zU6GQ4RciGvBcAoVNlfGdNXaqf2XsS3EP-kYu10,50944
|
|
40
40
|
azure/ai/evaluation/_common/raiclient/operations/_patch.py,sha256=P7PMm3Gbjlk56lI6B_Ra43hSW5qGMEmN3cNYGH5uZ3s,674
|
|
41
|
-
azure/ai/evaluation/_converters/__init__.py,sha256=
|
|
42
|
-
azure/ai/evaluation/_converters/_ai_services.py,sha256=
|
|
41
|
+
azure/ai/evaluation/_converters/__init__.py,sha256=UwArlb5cGLN4pRUm8nG7mXsmxhgdIving2OslYhQDAg,179
|
|
42
|
+
azure/ai/evaluation/_converters/_ai_services.py,sha256=U2s-KdCAv4ja-Xo0nGUhjIQEoyg9QmSj2Dqt73uufUo,37091
|
|
43
43
|
azure/ai/evaluation/_converters/_models.py,sha256=LlrB7Vj_uW3-Ng06b3EHahRXf690W4j2XfYihS7dejo,11029
|
|
44
44
|
azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
45
|
-
azure/ai/evaluation/_evaluate/_eval_run.py,sha256=
|
|
46
|
-
azure/ai/evaluation/_evaluate/_evaluate.py,sha256=
|
|
47
|
-
azure/ai/evaluation/_evaluate/_utils.py,sha256=
|
|
45
|
+
azure/ai/evaluation/_evaluate/_eval_run.py,sha256=Bu_7uV8-reRllUChXZDjILpXX8ym57VIUaKvUNhono0,21985
|
|
46
|
+
azure/ai/evaluation/_evaluate/_evaluate.py,sha256=QO4H8-G53qif8NnsYFEULmr3eWQ4sudpf6gOzhnaNDo,40647
|
|
47
|
+
azure/ai/evaluation/_evaluate/_utils.py,sha256=Cn6BpKzRkIXeywB1zbYMvEBvJXWIIlvPVy_2z8nOfFE,14289
|
|
48
48
|
azure/ai/evaluation/_evaluate/_batch_run/__init__.py,sha256=cPLi_MJ_pCp8eKBxJbiSoxgTnN3nDLuaP57dMkKuyhg,552
|
|
49
49
|
azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py,sha256=fbDkx_qhYlBSGrxoRLPFiNccgd3pToDnLRmBb0QWzmM,3941
|
|
50
50
|
azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py,sha256=dTZYdQGweGzEN6OHtn1jOmGG767AJ7RJwfHoCCeRddg,2761
|
|
51
|
-
azure/ai/evaluation/_evaluate/_batch_run/code_client.py,sha256=
|
|
52
|
-
azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py,sha256=
|
|
53
|
-
azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py,sha256=
|
|
54
|
-
azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py,sha256=
|
|
55
|
-
azure/ai/evaluation/_evaluate/_telemetry/__init__.py,sha256=
|
|
51
|
+
azure/ai/evaluation/_evaluate/_batch_run/code_client.py,sha256=n4JxDAFNnetfpAj4FyhtZms3kuGmDWXCBOogqeO4F98,8607
|
|
52
|
+
azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py,sha256=NXlXHyQ9TFIcXdozmMqc3CmOsyzh2N9EexprBhQwoTQ,3737
|
|
53
|
+
azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py,sha256=8lt0mZfJrPHUeGCdxjlLzB4CZNDt4OuOmYQX9J1hgCw,4666
|
|
54
|
+
azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py,sha256=OCiCnLKRYzNkdm6mo-WSviXa-HbCltq-G9ym6gLIdO4,1658
|
|
55
|
+
azure/ai/evaluation/_evaluate/_telemetry/__init__.py,sha256=tBZ98BC8sIuANCTZsgONQblR-Vrw2oIcRqjjyHxbZio,3513
|
|
56
56
|
azure/ai/evaluation/_evaluators/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
57
57
|
azure/ai/evaluation/_evaluators/_bleu/__init__.py,sha256=quKKO0kvOSkky5hcoNBvgBuMeeVRFCE9GSv70mAdGP4,260
|
|
58
58
|
azure/ai/evaluation/_evaluators/_bleu/_bleu.py,sha256=ZX12bWcDop7HX5hXT2im2KLIJTWik9ok67zK2hZo1WY,4429
|
|
@@ -62,9 +62,9 @@ azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0U
|
|
|
62
62
|
azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=IFk4RYa0h39i9GbtUYDph78ELx9GTJ6gzk4-oBtdL34,5378
|
|
63
63
|
azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=ANvh9mDFW7KMejrgdWqBLjj4SIqEO5WW9gg5pE0RLJk,6798
|
|
64
64
|
azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=xAymP_CZy4aPzWplMdXgQUQVDIUEMI-0nbgdm_umFYY,498
|
|
65
|
-
azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=
|
|
66
|
-
azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py,sha256=
|
|
67
|
-
azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=
|
|
65
|
+
azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=zUntxs1kR7fSzevotAy9m4KnuSFQXtpajXN4w0GNo4o,25449
|
|
66
|
+
azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py,sha256=yYFpoCDe2wMFQck0ykbX8IJBBidk6NT1wUTkVFlVSy8,2728
|
|
67
|
+
azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=iYIciz7Mkea67OiodbZ4Od9Djtu0GcAwingfzvk-d-Q,6177
|
|
68
68
|
azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=fjiGQn2IHbhhzhykA3oguipaNVisoAqUDAEKgvvNN10,8539
|
|
69
69
|
azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py,sha256=gjDBjRxJKwaHbshWH0j2idjlzfzNMnT9a9RL0fQiKeM,2129
|
|
70
70
|
azure/ai/evaluation/_evaluators/_content_safety/__init__.py,sha256=PEYMIybfP64f7byhuTaiq4RiqsYbjqejpW1JsJIG1jA,556
|
|
@@ -83,7 +83,7 @@ azure/ai/evaluation/_evaluators/_fluency/fluency.prompty,sha256=n9v0W9eYwgIO-JSs
|
|
|
83
83
|
azure/ai/evaluation/_evaluators/_gleu/__init__.py,sha256=Ae2EvQ7gqiYAoNO3LwGIhdAAjJPJDfT85rQGKrRrmbA,260
|
|
84
84
|
azure/ai/evaluation/_evaluators/_gleu/_gleu.py,sha256=QnogGvw2JmmV4YZCOvoZMMvoB9JMAjnac2r5h806zmI,4421
|
|
85
85
|
azure/ai/evaluation/_evaluators/_groundedness/__init__.py,sha256=UYNJUeRvBwcSVFyZpdsf29un5eyaDzYoo3QvC1gvlLg,274
|
|
86
|
-
azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py,sha256=
|
|
86
|
+
azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py,sha256=Zzh08Tsz_52NdZkVtMzWVISzG_wLCDNwl-cgeT-o6iQ,7634
|
|
87
87
|
azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty,sha256=v7TOm75DyW_1gOU6gSiZoPcRnHcJ65DrzR2cL_ucWDY,5814
|
|
88
88
|
azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty,sha256=8kNShdfxQvkII7GnqjmdqQ5TNelA2B6cjnqWZk8FFe4,5296
|
|
89
89
|
azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py,sha256=Lr8krXt2yfShFTAuwjTFgrUbO75boLLrRSnF1mriN_Q,280
|
|
@@ -99,7 +99,7 @@ azure/ai/evaluation/_evaluators/_relevance/__init__.py,sha256=JlxytW32Nl8pbE-fI3
|
|
|
99
99
|
azure/ai/evaluation/_evaluators/_relevance/_relevance.py,sha256=KlzKGY19kUO3EU5gqcCGhDDhr_VBaApwXCl9A01UJ_s,6068
|
|
100
100
|
azure/ai/evaluation/_evaluators/_relevance/relevance.prompty,sha256=VHKzVlC2Cv1xuholgIGmerPspspAI0t6IgJ2cxOuYDE,4811
|
|
101
101
|
azure/ai/evaluation/_evaluators/_response_completeness/__init__.py,sha256=U3eqkQQAgRif46B6UGdq3yWefgbkZGJ3ZE2sKoZQDlU,292
|
|
102
|
-
azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py,sha256=
|
|
102
|
+
azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py,sha256=TdTe6wdz2RDqQbTVKg3J0ZqEYuR537KE5lvG6crWY0E,7424
|
|
103
103
|
azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty,sha256=MnlvnNbGucHUAHkrxZrlcgn0zF8jK9gtXiQtVZ4AoBo,7191
|
|
104
104
|
azure/ai/evaluation/_evaluators/_retrieval/__init__.py,sha256=kMu47ZyTZ7f-4Yh6H3KHxswmxitmPJ8FPSk90qgR0XI,265
|
|
105
105
|
azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py,sha256=eddgpihSjZwQH58lcfL0vYh-07H_TOVT8dt_MxRZfBk,5791
|
|
@@ -122,17 +122,28 @@ azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py
|
|
|
122
122
|
azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
|
|
123
123
|
azure/ai/evaluation/_evaluators/_xpia/xpia.py,sha256=64ersTyBvmkl2lvuuh5YJqJ2Ow0RUzjlqxZCnhYhS0g,5958
|
|
124
124
|
azure/ai/evaluation/_legacy/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
|
|
125
|
+
azure/ai/evaluation/_legacy/_adapters/__init__.py,sha256=8idkgtaxIaVaoS7rs5kmnGXLNODpgyRt9Bb-XJkFdt0,692
|
|
126
|
+
azure/ai/evaluation/_legacy/_adapters/_configuration.py,sha256=aDukrNbyXz9zPHYKpmXrLthnJoJQziP1WL793nIuC28,1677
|
|
127
|
+
azure/ai/evaluation/_legacy/_adapters/_constants.py,sha256=MmTUbPSCiM4eLtAA5LIsD6sN1B9IuFB-RTuUnxbwZSs,386
|
|
128
|
+
azure/ai/evaluation/_legacy/_adapters/_errors.py,sha256=SSyaGsqGzzrIeOKEGTtIzhKnAIg_oJw5ze5KMPIfw2Y,1175
|
|
129
|
+
azure/ai/evaluation/_legacy/_adapters/_flows.py,sha256=c2C0otzTL73YbIRSfSKWjkpfscAFOEjdBn_oMVCG9Yo,779
|
|
130
|
+
azure/ai/evaluation/_legacy/_adapters/_service.py,sha256=4xlpxSCrH0jWyKVazu-qhe42KgFksKWVPQqDqf7CFGY,423
|
|
131
|
+
azure/ai/evaluation/_legacy/_adapters/client.py,sha256=76XEJJ2GoKJsfHfMSvixU8oSI6ePtJQBG10x9PXVUxM,1770
|
|
132
|
+
azure/ai/evaluation/_legacy/_adapters/entities.py,sha256=4ZYXfawx5w7t0pyEdm7g49eAJwct2QtcZX0ZvB_6uHU,710
|
|
133
|
+
azure/ai/evaluation/_legacy/_adapters/tracing.py,sha256=jm-lkzLA-agRY1meIzs9mFrzJ1A3dBodi6HgjklIjD0,1162
|
|
134
|
+
azure/ai/evaluation/_legacy/_adapters/types.py,sha256=q7n0TtpFxd1WttbUR_Q8ODd8bTcMaJjIrxLXx1onirc,447
|
|
135
|
+
azure/ai/evaluation/_legacy/_adapters/utils.py,sha256=2KdYqfeuHLcfqk1qJRviNoqqsghxBZNmyoGcUTNphl0,1306
|
|
125
136
|
azure/ai/evaluation/_legacy/_batch_engine/__init__.py,sha256=NNX2DhtPVzJCX8kR_QzZ6EkUsdGifvwip2LHEcRwy1Y,594
|
|
126
137
|
azure/ai/evaluation/_legacy/_batch_engine/_config.py,sha256=-B37cIbWOWEB3kTaNjHGLfQNbLa-XAP1gWm0Kvyv82k,1724
|
|
127
138
|
azure/ai/evaluation/_legacy/_batch_engine/_engine.py,sha256=K7N_MKa4EGhFv4-1W7MeGeaRY7LEHA579x04DV7vUlc,15146
|
|
128
139
|
azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py,sha256=_QQLowht6ww4wBJbShQBo00Y8HFdaWh-dWd44sGvJBc,2870
|
|
129
140
|
azure/ai/evaluation/_legacy/_batch_engine/_logging.py,sha256=aEZsfS2wqbqVEA-vBsfg0-sCWyssGf2L1oWFmu9cego,11398
|
|
130
141
|
azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py,sha256=NLoHjpUSqUy4CVNYXIgpQ3e7izDFCUIQSwvIEf7oNHY,884
|
|
131
|
-
azure/ai/evaluation/_legacy/_batch_engine/_result.py,sha256=
|
|
142
|
+
azure/ai/evaluation/_legacy/_batch_engine/_result.py,sha256=r8MSaQ8q42vKAiaML36dFsMY-WELUkdnVOLVdK7H-FI,3187
|
|
132
143
|
azure/ai/evaluation/_legacy/_batch_engine/_run.py,sha256=b9eJOorEHcGJqE7cApvJpY3JpUdCv3ZVNFo4ZKlfWNA,4343
|
|
133
144
|
azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py,sha256=KjiKaSjT16L_wLYrZpIeBANtin3fa4Gqd3L-Xjw-59I,3418
|
|
134
145
|
azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py,sha256=4V4Xh7xRepTX9eBFKHkaeoNRBytZ_o__xbX_NiIp64M,9674
|
|
135
|
-
azure/ai/evaluation/_legacy/_batch_engine/_status.py,sha256=
|
|
146
|
+
azure/ai/evaluation/_legacy/_batch_engine/_status.py,sha256=xSpJaMmBroGOL8U_iTpAr_PgyNKaxfawqak6TpuUrkk,780
|
|
136
147
|
azure/ai/evaluation/_legacy/_batch_engine/_trace.py,sha256=4ZbvptRg-GOfP7VxDIkP-aZfLULBelHD3Ecr1VAOYk4,4536
|
|
137
148
|
azure/ai/evaluation/_legacy/_batch_engine/_utils.py,sha256=bfeLHUMcCASBNoL-tcLTuCrOS_XhvTX855LYDsShzlM,2491
|
|
138
149
|
azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py,sha256=hBm4gh_m8BCWDAryKmqO4ZsypuGE1QSh3K_DxfoEKpc,5917
|
|
@@ -142,18 +153,6 @@ azure/ai/evaluation/_legacy/prompty/_exceptions.py,sha256=3EeLGkJGZ3gOq0f-6xoIdG
|
|
|
142
153
|
azure/ai/evaluation/_legacy/prompty/_prompty.py,sha256=nuGVZd1KBFOb0lvXsZziGeObrPmuhY3R5WLKtG3tSaQ,11794
|
|
143
154
|
azure/ai/evaluation/_legacy/prompty/_utils.py,sha256=9OOEBTF9MzcgBUyracbHJMNinNfoaGs7Bm8COsTMVkE,22463
|
|
144
155
|
azure/ai/evaluation/_legacy/prompty/_yaml_utils.py,sha256=pVL6xgTHC3AKKD5plJ4d6hiBuComaOlUww1uqqDQlB4,3350
|
|
145
|
-
azure/ai/evaluation/_red_team/__init__.py,sha256=UwArlb5cGLN4pRUm8nG7mXsmxhgdIving2OslYhQDAg,179
|
|
146
|
-
azure/ai/evaluation/_red_team/_attack_objective_generator.py,sha256=fkKrobdFSnOEsOOpsY6gpEiyIWhi5hV4TImbWj6fBeE,10683
|
|
147
|
-
azure/ai/evaluation/_red_team/_attack_strategy.py,sha256=8n2M-BM8TDRs8bAfZAgwDl0MzIXqf8eeKNsg5V83m_s,1348
|
|
148
|
-
azure/ai/evaluation/_red_team/_callback_chat_target.py,sha256=H58nU6IVEpeDuTZPsxIoYKXAvU3dAWnLRhA8wyaY11s,3071
|
|
149
|
-
azure/ai/evaluation/_red_team/_default_converter.py,sha256=K8G775VVdQLsAqff60BBNFrNRj8JijB7St9XOHmGQGA,775
|
|
150
|
-
azure/ai/evaluation/_red_team/_red_team.py,sha256=Khe5zRBPk5D6CMpc6I2pQZT2Mtl7a4wn6kuim6gYjwg,103691
|
|
151
|
-
azure/ai/evaluation/_red_team/_red_team_result.py,sha256=qPEazh6rhtcj8dEHozZVNxC5JuHRoBDmC-2RfdCOd2k,11312
|
|
152
|
-
azure/ai/evaluation/_red_team/_utils/__init__.py,sha256=UwArlb5cGLN4pRUm8nG7mXsmxhgdIving2OslYhQDAg,179
|
|
153
|
-
azure/ai/evaluation/_red_team/_utils/constants.py,sha256=ROM2jbmw3_VOiRoC2AwoUxVxFxVV5wmN-G-QGhmiH60,2256
|
|
154
|
-
azure/ai/evaluation/_red_team/_utils/formatting_utils.py,sha256=m7vZDv1gzFl1jnaegpSI6o6XQmTwy4fFh1j25w8dG_E,6352
|
|
155
|
-
azure/ai/evaluation/_red_team/_utils/logging_utils.py,sha256=Kviojc9PCyArVSRZ_9qJ00zwIsWobKdw7OlOzKjkFOw,4823
|
|
156
|
-
azure/ai/evaluation/_red_team/_utils/strategy_utils.py,sha256=AMi8PAB2UISXLXzRDNnuJHwPc50RZ7Y5WrkTaArJOCY,7809
|
|
157
156
|
azure/ai/evaluation/_safety_evaluation/__init__.py,sha256=UwArlb5cGLN4pRUm8nG7mXsmxhgdIving2OslYhQDAg,179
|
|
158
157
|
azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
159
158
|
azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py,sha256=5JHNB9TGFldpKxucQPGpJiH4EFSZWLrfwnWNwQ3lSVo,36967
|
|
@@ -163,14 +162,25 @@ azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py,sha256=DtNSeshHipzc6vFnv
|
|
|
163
162
|
azure/ai/evaluation/_vendor/rouge_score/scoring.py,sha256=0sqdiNE-4R_EmTTqyWL9_DAOgl54250H5004tZDGxEE,1878
|
|
164
163
|
azure/ai/evaluation/_vendor/rouge_score/tokenize.py,sha256=IyHVsWY6IFFZdB23cLiJs8iBZ0DXk1mQlWE1xtdjuuk,1826
|
|
165
164
|
azure/ai/evaluation/_vendor/rouge_score/tokenizers.py,sha256=3_-y1TyvyluHuERhSJ5CdXSwnpcMA7aAKU6PCz9wH_Q,1745
|
|
165
|
+
azure/ai/evaluation/red_team/__init__.py,sha256=wAjy_85gzdX0YyUlj6kHWrpo01zz2mgOJ1AbH5EkH8s,613
|
|
166
|
+
azure/ai/evaluation/red_team/_attack_objective_generator.py,sha256=yMK5z91_w6GIzbqTQtUXfABuGeH3nfVgF8feOMd37K4,10765
|
|
167
|
+
azure/ai/evaluation/red_team/_attack_strategy.py,sha256=Qy3K63z98uDg_06lc5-6HG1MgquYXB9Y9Ke0xifeEl0,1430
|
|
168
|
+
azure/ai/evaluation/red_team/_callback_chat_target.py,sha256=H58nU6IVEpeDuTZPsxIoYKXAvU3dAWnLRhA8wyaY11s,3071
|
|
169
|
+
azure/ai/evaluation/red_team/_default_converter.py,sha256=K8G775VVdQLsAqff60BBNFrNRj8JijB7St9XOHmGQGA,775
|
|
170
|
+
azure/ai/evaluation/red_team/_red_team.py,sha256=v8zzv1s_S3LgVMHDu2kwVlY56q4T28vaLmIfvDLjoIE,105658
|
|
171
|
+
azure/ai/evaluation/red_team/_red_team_result.py,sha256=BwHf0KBGe9hqyWeTYDOOuoIxJ0rqkY9lHLUOZ02wPJI,17340
|
|
172
|
+
azure/ai/evaluation/red_team/_utils/__init__.py,sha256=UwArlb5cGLN4pRUm8nG7mXsmxhgdIving2OslYhQDAg,179
|
|
173
|
+
azure/ai/evaluation/red_team/_utils/constants.py,sha256=ctSsuWiavhaGApRgP6fL3OtNGphnCKuesTQOwR833nk,2289
|
|
174
|
+
azure/ai/evaluation/red_team/_utils/formatting_utils.py,sha256=jztERWOb1P3ClOKEr6-sMmLDQ8subjKUVBKVVxeSFcY,6416
|
|
175
|
+
azure/ai/evaluation/red_team/_utils/logging_utils.py,sha256=jjNq37zlJz0KPo9g8NuuhnDJo56T-06ElJ-7yyeD46M,4838
|
|
176
|
+
azure/ai/evaluation/red_team/_utils/strategy_utils.py,sha256=3ID-6rY3t8QqbfwFNx-4VxGDHq0i5VoRFMsGR7mlKvM,8058
|
|
166
177
|
azure/ai/evaluation/simulator/__init__.py,sha256=JbrPZ8pvTBalyX94SvZ9btHNoovX8rbZV03KmzxxWys,552
|
|
167
178
|
azure/ai/evaluation/simulator/_adversarial_scenario.py,sha256=FQ9ZNruqPY4o06uu2uig2MZSbearwAJwieIB26GbPOU,1833
|
|
168
179
|
azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=RlHk8Y324pvd7golXOygQ5NayPQ2zjT-ekQooldMGtE,23017
|
|
169
180
|
azure/ai/evaluation/simulator/_constants.py,sha256=nCL7_1BnYh6k0XvxudxsDVMbiG9MMEvYw5wO9FZHHZ8,857
|
|
170
181
|
azure/ai/evaluation/simulator/_direct_attack_simulator.py,sha256=FTtWf655dHJF5FLJi0xGSBgIlGWNiVWyqaLDJSud9XA,10199
|
|
171
182
|
azure/ai/evaluation/simulator/_indirect_attack_simulator.py,sha256=nweIU_AkUIR50qLQpjmljf_OkpsCPth2Ebf4vusygCA,10226
|
|
172
|
-
azure/ai/evaluation/simulator/_simulator.py,sha256=
|
|
173
|
-
azure/ai/evaluation/simulator/_tracing.py,sha256=frZ4-usrzINast9F4-ONRzEGGox71y8bYw0UHNufL1Y,3069
|
|
183
|
+
azure/ai/evaluation/simulator/_simulator.py,sha256=aVoPvTQsY7u4O0YYQm4pug368UelKefAxa38dJVZ4ps,36513
|
|
174
184
|
azure/ai/evaluation/simulator/_utils.py,sha256=16NltlywpbMtoFtULwTKqeURguIS1kSKSo3g8uKV8TA,5181
|
|
175
185
|
azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=LOR5h7vSACrv_cmaS6r7KUqjJcHYg7PAgkDbdvOmd4g,17726
|
|
176
186
|
azure/ai/evaluation/simulator/_conversation/_conversation.py,sha256=h8OHq0sWKiTH821tC5zF44CJ-QgutTgDnEYsFEA7Cw0,7635
|
|
@@ -190,8 +200,8 @@ azure/ai/evaluation/simulator/_model_tools/models.py,sha256=bfVm0PV3vfH_8DkdmTMZ
|
|
|
190
200
|
azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
191
201
|
azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=2BzSqDDYilDushvR56vMRDmqFIaIYAewdUlUZg_elMg,2182
|
|
192
202
|
azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=NE6lH4bfmibgMn4NgJtm9_l3PMoHSFrfjjosDJEKM0g,939
|
|
193
|
-
azure_ai_evaluation-1.
|
|
194
|
-
azure_ai_evaluation-1.
|
|
195
|
-
azure_ai_evaluation-1.
|
|
196
|
-
azure_ai_evaluation-1.
|
|
197
|
-
azure_ai_evaluation-1.
|
|
203
|
+
azure_ai_evaluation-1.5.0.dist-info/METADATA,sha256=MS1dXybS1lf1sBR5fdxYw-y82ifENen9vnVnxDIMkKE,35924
|
|
204
|
+
azure_ai_evaluation-1.5.0.dist-info/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
|
|
205
|
+
azure_ai_evaluation-1.5.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
206
|
+
azure_ai_evaluation-1.5.0.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
|
|
207
|
+
azure_ai_evaluation-1.5.0.dist-info/RECORD,,
|
|
@@ -1,246 +0,0 @@
|
|
|
1
|
-
# ---------------------------------------------------------
|
|
2
|
-
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
-
# ---------------------------------------------------------
|
|
4
|
-
from typing import Dict, List, Optional, TypedDict
|
|
5
|
-
import json
|
|
6
|
-
|
|
7
|
-
class _RiskCategorySummary(TypedDict):
|
|
8
|
-
"""Summary of attack success rates across risk categories."""
|
|
9
|
-
overall_asr: float
|
|
10
|
-
overall_total: int
|
|
11
|
-
overall_successful_attacks: int
|
|
12
|
-
hate_unfairness_asr: float
|
|
13
|
-
hate_unfairness_total: int
|
|
14
|
-
hate_unfairness_successful_attacks: int
|
|
15
|
-
violence_asr: float
|
|
16
|
-
violence_total: int
|
|
17
|
-
violence_successful_attacks: int
|
|
18
|
-
sexual_asr: float
|
|
19
|
-
sexual_total: int
|
|
20
|
-
sexual_successful_attacks: int
|
|
21
|
-
self_harm_asr: float
|
|
22
|
-
self_harm_total: int
|
|
23
|
-
self_harm_successful_attacks: int
|
|
24
|
-
|
|
25
|
-
class _AttackTechniqueSummary(TypedDict):
|
|
26
|
-
"""Summary of attack success rates across complexity levels."""
|
|
27
|
-
overall_asr: float
|
|
28
|
-
overall_total: int
|
|
29
|
-
overall_successful_attacks: int
|
|
30
|
-
baseline_asr: float
|
|
31
|
-
baseline_total: int
|
|
32
|
-
baseline_successful_attacks: int
|
|
33
|
-
easy_complexity_asr: float
|
|
34
|
-
easy_complexity_total: int
|
|
35
|
-
easy_complexity_successful_attacks: int
|
|
36
|
-
moderate_complexity_asr: float
|
|
37
|
-
moderate_complexity_total: int
|
|
38
|
-
moderate_complexity_successful_attacks: int
|
|
39
|
-
difficult_complexity_asr: float
|
|
40
|
-
difficult_complexity_total: int
|
|
41
|
-
difficult_complexity_successful_attacks: int
|
|
42
|
-
|
|
43
|
-
class _JointRiskAttackSummaryItem(TypedDict):
|
|
44
|
-
"""Summary of attack success rates for a specific risk category across complexity levels."""
|
|
45
|
-
risk_category: str
|
|
46
|
-
baseline_asr: float
|
|
47
|
-
easy_complexity_asr: float
|
|
48
|
-
moderate_complexity_asr: float
|
|
49
|
-
difficult_complexity_asr: float
|
|
50
|
-
|
|
51
|
-
class _RedTeamingScorecard(TypedDict):
|
|
52
|
-
"""TypedDict representation of a Red Team Agent scorecard with the updated structure.
|
|
53
|
-
|
|
54
|
-
The scorecard contains four main sections:
|
|
55
|
-
- risk_category_summary: Overall metrics by risk category
|
|
56
|
-
- attack_technique_summary: Overall metrics by attack technique complexity
|
|
57
|
-
- joint_risk_attack_summary: Detailed metrics by risk category and complexity level
|
|
58
|
-
- detailed_joint_risk_attack_asr: Detailed ASR information broken down by complexity level, risk category, and converter
|
|
59
|
-
"""
|
|
60
|
-
risk_category_summary: List[_RiskCategorySummary]
|
|
61
|
-
attack_technique_summary: List[_AttackTechniqueSummary]
|
|
62
|
-
joint_risk_attack_summary: List[_JointRiskAttackSummaryItem]
|
|
63
|
-
detailed_joint_risk_attack_asr: Dict[str, Dict[str, Dict[str, float]]]
|
|
64
|
-
|
|
65
|
-
class _AttackObjectiveSource(TypedDict):
|
|
66
|
-
"""Information about how attack objectives were generated."""
|
|
67
|
-
application_scenario: str
|
|
68
|
-
risk_categories: List[str]
|
|
69
|
-
custom_attack_seed_prompts: str
|
|
70
|
-
policy_document: str
|
|
71
|
-
|
|
72
|
-
class _RedTeamingParameters(TypedDict):
|
|
73
|
-
"""TypedDict representation of parameters used in a red team evaluation with the updated structure.
|
|
74
|
-
|
|
75
|
-
This class defines the structure for capturing metadata about an evaluation run, including:
|
|
76
|
-
- attack_objective_generated_from: Information about the source of attack objectives
|
|
77
|
-
- attack_complexity: List of complexity levels used in the attacks (e.g., "Easy", "Moderate", "Difficult")
|
|
78
|
-
- techniques_used: Dictionary mapping complexity levels to lists of converter techniques used
|
|
79
|
-
"""
|
|
80
|
-
attack_objective_generated_from: _AttackObjectiveSource
|
|
81
|
-
attack_complexity: List[str]
|
|
82
|
-
techniques_used: Dict[str, List[str]]
|
|
83
|
-
|
|
84
|
-
class _RiskAssessment(TypedDict):
|
|
85
|
-
"""Assessment of content safety risks in a conversation."""
|
|
86
|
-
violence: Dict[str, str]
|
|
87
|
-
sexual: Dict[str, str]
|
|
88
|
-
self_harm: Dict[str, str]
|
|
89
|
-
hate_unfair: Dict[str, str]
|
|
90
|
-
|
|
91
|
-
class _Conversation(TypedDict):
|
|
92
|
-
"""TypedDict representation of a simulated conversation in a red team evaluation.
|
|
93
|
-
|
|
94
|
-
This class defines the structure for capturing information about a simulated conversation, including:
|
|
95
|
-
- attack_success: Whether the attack was successful
|
|
96
|
-
- attack_technique: The attack technique used in the conversation
|
|
97
|
-
- attack_complexity: The complexity level of the attack
|
|
98
|
-
- conversation: List of messages exchanged in the conversation
|
|
99
|
-
- risk_assessment: Dictionary containing content safety assessment for the conversation
|
|
100
|
-
"""
|
|
101
|
-
attack_success: Optional[bool]
|
|
102
|
-
attack_technique: str
|
|
103
|
-
attack_complexity: str
|
|
104
|
-
risk_category: str
|
|
105
|
-
conversation: List[Dict[str, str]]
|
|
106
|
-
risk_assessment: Optional[_RiskAssessment]
|
|
107
|
-
|
|
108
|
-
class _RedTeamResult(TypedDict):
|
|
109
|
-
"""TypedDict representation of a Red Team Agent evaluation result with the updated structure.
|
|
110
|
-
|
|
111
|
-
This class defines the structure for capturing the results of a red team evaluation, including:
|
|
112
|
-
- redteaming_scorecard: Scorecard containing summary and detailed ASR information
|
|
113
|
-
- redteaming_parameters: Parameters containing metadata about the evaluation run
|
|
114
|
-
- redteaming_data: List of _Conversation objects representing the conversations in the evaluation
|
|
115
|
-
"""
|
|
116
|
-
redteaming_scorecard: _RedTeamingScorecard
|
|
117
|
-
redteaming_parameters: _RedTeamingParameters
|
|
118
|
-
redteaming_data: List[_Conversation]
|
|
119
|
-
studio_url: Optional[str]
|
|
120
|
-
|
|
121
|
-
class RedTeamOutput():
|
|
122
|
-
def __init__(self, red_team_result: Optional[_RedTeamResult] = None, redteaming_data: Optional[List[_Conversation]] = None):
|
|
123
|
-
self.red_team_result = red_team_result
|
|
124
|
-
self.redteaming_data = redteaming_data
|
|
125
|
-
|
|
126
|
-
def to_json(self) -> str:
|
|
127
|
-
"""
|
|
128
|
-
Converts a _RedTeamResult object to a JSON-serializable dictionary.
|
|
129
|
-
|
|
130
|
-
:returns: A string containing the _RedTeamResult in JSON format.
|
|
131
|
-
:rtype: str
|
|
132
|
-
"""
|
|
133
|
-
return json.dumps(self.red_team_result) if self.red_team_result else ""
|
|
134
|
-
|
|
135
|
-
def to_scorecard(self) -> Optional[_RedTeamingScorecard]:
|
|
136
|
-
"""Extracts the scorecard from a _RedTeamResult object."""
|
|
137
|
-
return self.red_team_result.get("redteaming_scorecard", None) if self.red_team_result else None
|
|
138
|
-
|
|
139
|
-
def to_eval_qr_json_lines(self) -> str:
|
|
140
|
-
"""
|
|
141
|
-
Converts conversations in messages format to query-response format suitable for evaluation.
|
|
142
|
-
|
|
143
|
-
The output format follows the JSONL pattern with each line containing:
|
|
144
|
-
{
|
|
145
|
-
"query": "user message content",
|
|
146
|
-
"response": "assistant message content",
|
|
147
|
-
"risk_category": "risk category",
|
|
148
|
-
"attack_strategy": "strategy name",
|
|
149
|
-
"attack_complexity": "complexity level",
|
|
150
|
-
"attack_success": "true|false", (if available from evaluation)
|
|
151
|
-
"category": "risk category", (if available from evaluation)
|
|
152
|
-
"severity_level": "low|medium|high", (if available from evaluation)
|
|
153
|
-
"threshold": "threshold value" (if available from evaluation)
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
:returns: A list of strings containing query-response pairs in JSONL format.
|
|
157
|
-
:rtype: List[str]
|
|
158
|
-
"""
|
|
159
|
-
if not self.redteaming_data:
|
|
160
|
-
return ""
|
|
161
|
-
|
|
162
|
-
result_lines = []
|
|
163
|
-
|
|
164
|
-
for conversation in self.redteaming_data:
|
|
165
|
-
messages = conversation.get("conversation", [])
|
|
166
|
-
attack_technique = conversation.get("attack_technique", "")
|
|
167
|
-
attack_complexity = conversation.get("attack_complexity", "")
|
|
168
|
-
risk_category = conversation.get("risk_category", "")
|
|
169
|
-
attack_success = conversation.get("attack_success")
|
|
170
|
-
risk_assessment = conversation.get("risk_assessment", {})
|
|
171
|
-
|
|
172
|
-
for i in range(0, len(messages) - 1, 2):
|
|
173
|
-
if i + 1 < len(messages):
|
|
174
|
-
user_msg = messages[i]
|
|
175
|
-
assistant_msg = messages[i + 1]
|
|
176
|
-
|
|
177
|
-
if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
|
|
178
|
-
# Create the base query-response pair
|
|
179
|
-
qr_pair = {
|
|
180
|
-
"query": user_msg.get("content", ""),
|
|
181
|
-
"response": assistant_msg.get("content", ""),
|
|
182
|
-
"risk_category": risk_category,
|
|
183
|
-
"attack_strategy": attack_technique,
|
|
184
|
-
"attack_complexity": attack_complexity
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
# Add evaluation data if available
|
|
188
|
-
if attack_success is not None:
|
|
189
|
-
qr_pair["attack_success"] = str(attack_success).lower()
|
|
190
|
-
|
|
191
|
-
# Add risk assessment data if available
|
|
192
|
-
for category, assessment in risk_assessment.items() if risk_assessment else {}:
|
|
193
|
-
if assessment and assessment.get("severity_label", None):
|
|
194
|
-
qr_pair["category"] = category
|
|
195
|
-
qr_pair["severity_level"] = assessment.get("severity_label", "").lower()
|
|
196
|
-
# Add threshold if available in the future
|
|
197
|
-
|
|
198
|
-
result_lines.append(json.dumps(qr_pair))
|
|
199
|
-
|
|
200
|
-
return result_lines
|
|
201
|
-
|
|
202
|
-
def attack_simulation(self) -> str:
|
|
203
|
-
"""
|
|
204
|
-
Returns the attack simulation data in a human-readable format.
|
|
205
|
-
:returns: A string containing the attack simulation data in a human-readable format.
|
|
206
|
-
:rtype: str
|
|
207
|
-
"""
|
|
208
|
-
if not self.redteaming_data:
|
|
209
|
-
return ""
|
|
210
|
-
|
|
211
|
-
result_lines = []
|
|
212
|
-
|
|
213
|
-
for conversation in self.redteaming_data:
|
|
214
|
-
messages = conversation.get("conversation", [])
|
|
215
|
-
attack_technique = conversation.get("attack_technique", "")
|
|
216
|
-
attack_complexity = conversation.get("attack_complexity", "")
|
|
217
|
-
risk_category = conversation.get("risk_category", "")
|
|
218
|
-
attack_success = conversation.get("attack_success")
|
|
219
|
-
risk_assessment = conversation.get("risk_assessment", {})
|
|
220
|
-
|
|
221
|
-
result_lines.append(f"Attack Technique: {attack_technique}")
|
|
222
|
-
result_lines.append(f"Attack Complexity: {attack_complexity}")
|
|
223
|
-
result_lines.append(f"Risk Category: {risk_category}")
|
|
224
|
-
result_lines.append("")
|
|
225
|
-
|
|
226
|
-
for i in range(0, len(messages) - 1, 2):
|
|
227
|
-
if i + 1 < len(messages):
|
|
228
|
-
user_msg = messages[i]
|
|
229
|
-
assistant_msg = messages[i + 1]
|
|
230
|
-
|
|
231
|
-
if user_msg.get("role") == "user" and assistant_msg.get("role") == "assistant":
|
|
232
|
-
result_lines.append(f"User: {user_msg.get('content', '')}")
|
|
233
|
-
result_lines.append(f"Assistant: {assistant_msg.get('content', '')}")
|
|
234
|
-
result_lines.append("")
|
|
235
|
-
|
|
236
|
-
if attack_success is not None:
|
|
237
|
-
result_lines.append(f"Attack Success: {'Successful' if attack_success else 'Failed'}")
|
|
238
|
-
result_lines.append("")
|
|
239
|
-
|
|
240
|
-
for category, assessment in risk_assessment.items() if risk_assessment else {}:
|
|
241
|
-
if assessment and assessment.get("severity_label", None):
|
|
242
|
-
result_lines.append(f"Category: {category}")
|
|
243
|
-
result_lines.append(f"Severity Level: {assessment.get('severity_label', '')}")
|
|
244
|
-
result_lines.append("")
|
|
245
|
-
|
|
246
|
-
return "\n".join(result_lines)
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
# ---------------------------------------------------------
|
|
2
|
-
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
-
# ---------------------------------------------------------
|
|
4
|
-
# pylint: disable=C0103,C0114,C0116,E0401,E0611
|
|
5
|
-
|
|
6
|
-
import functools
|
|
7
|
-
from typing import Callable, TypeVar
|
|
8
|
-
|
|
9
|
-
from promptflow._sdk._telemetry.activity import ActivityType, monitor_operation
|
|
10
|
-
from typing_extensions import ParamSpec
|
|
11
|
-
|
|
12
|
-
P = ParamSpec("P")
|
|
13
|
-
R = TypeVar("R")
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def monitor_adversarial_scenario(activity_name: str = "adversarial.simulator.call"):
|
|
17
|
-
"""
|
|
18
|
-
Monitor an adversarial scenario.
|
|
19
|
-
|
|
20
|
-
:param activity_name: The name of the activity to monitor.
|
|
21
|
-
:type activity_name: str
|
|
22
|
-
:returns: A decorator
|
|
23
|
-
:rtype: Callable[[Callable], Callable]
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
def decorator(func: Callable[P, R]) -> Callable[P, R]:
|
|
27
|
-
"""
|
|
28
|
-
Decorator for monitoring an adversarial scenario.
|
|
29
|
-
|
|
30
|
-
:param func: The function to be decorated.
|
|
31
|
-
:type func: Callable[P, R]
|
|
32
|
-
:returns: The decorated function
|
|
33
|
-
:rtype: Callable[P, R]
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
@functools.wraps(func)
|
|
37
|
-
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
38
|
-
scenario = str(kwargs.get("scenario", None))
|
|
39
|
-
max_conversation_turns = kwargs.get("max_conversation_turns", None)
|
|
40
|
-
max_simulation_results = kwargs.get("max_simulation_results", None)
|
|
41
|
-
jailbreak = kwargs.get("jailbreak", None)
|
|
42
|
-
decorated_func = monitor_operation(
|
|
43
|
-
activity_name=activity_name,
|
|
44
|
-
activity_type=ActivityType.PUBLICAPI,
|
|
45
|
-
custom_dimensions={
|
|
46
|
-
"scenario": scenario,
|
|
47
|
-
"max_conversation_turns": max_conversation_turns,
|
|
48
|
-
"max_simulation_results": max_simulation_results,
|
|
49
|
-
"jailbreak": jailbreak,
|
|
50
|
-
},
|
|
51
|
-
)(func)
|
|
52
|
-
|
|
53
|
-
return decorated_func(*args, **kwargs)
|
|
54
|
-
|
|
55
|
-
return wrapper
|
|
56
|
-
|
|
57
|
-
return decorator
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def monitor_task_simulator(func: Callable[P, R]) -> Callable[P, R]:
|
|
61
|
-
"""
|
|
62
|
-
Monitor a task simulator.
|
|
63
|
-
|
|
64
|
-
:param func: The function to be decorated.
|
|
65
|
-
:type func: Callable[P, R]
|
|
66
|
-
:returns: The decorated function
|
|
67
|
-
:rtype: Callable[P, R]
|
|
68
|
-
"""
|
|
69
|
-
|
|
70
|
-
@functools.wraps(func)
|
|
71
|
-
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
72
|
-
text = kwargs.get("text")
|
|
73
|
-
user_persona = kwargs.get("user_persona")
|
|
74
|
-
num_queries = kwargs.get("num_queries", 0)
|
|
75
|
-
max_conversation_turns = kwargs.get("max_conversation_turns", 0)
|
|
76
|
-
decorated_func = monitor_operation(
|
|
77
|
-
activity_name="task.simulator.call",
|
|
78
|
-
activity_type=ActivityType.PUBLICAPI,
|
|
79
|
-
custom_dimensions={
|
|
80
|
-
"text_length": len(text) if isinstance(text, str) else 0,
|
|
81
|
-
"user_persona_length": len(user_persona) if isinstance(user_persona, list) else 0,
|
|
82
|
-
"number_of_queries": num_queries,
|
|
83
|
-
"max_conversation_turns": max_conversation_turns,
|
|
84
|
-
},
|
|
85
|
-
)(func)
|
|
86
|
-
|
|
87
|
-
return decorated_func(*args, **kwargs)
|
|
88
|
-
|
|
89
|
-
return wrapper
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|