azure-ai-evaluation 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. azure/ai/evaluation/__init__.py +1 -15
  2. azure/ai/evaluation/_common/utils.py +8 -8
  3. azure/ai/evaluation/_constants.py +3 -0
  4. azure/ai/evaluation/_evaluate/_evaluate.py +5 -2
  5. azure/ai/evaluation/_exceptions.py +0 -1
  6. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  7. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +640 -0
  8. azure/ai/evaluation/_version.py +2 -1
  9. azure/ai/evaluation/simulator/_adversarial_simulator.py +10 -3
  10. azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
  11. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
  12. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
  13. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/METADATA +7 -1
  14. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/RECORD +17 -23
  15. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  16. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  17. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  18. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  19. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  20. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  21. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  22. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  23. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/NOTICE.txt +0 -0
  24. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/WHEEL +0 -0
  25. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.3.0.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ import asyncio
7
7
  import logging
8
8
  import random
9
9
  from typing import Any, Callable, Dict, List, Optional, Union, cast
10
+ import uuid
10
11
 
11
12
  from tqdm import tqdm
12
13
 
@@ -187,6 +188,8 @@ class AdversarialSimulator:
187
188
  )
188
189
  self._ensure_service_dependencies()
189
190
  templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
191
+ simulation_id = str(uuid.uuid4())
192
+ logger.warning("Use simulation_id to help debug the issue: %s", str(simulation_id))
190
193
  concurrent_async_task = min(concurrent_async_task, 1000)
191
194
  semaphore = asyncio.Semaphore(concurrent_async_task)
192
195
  sim_results = []
@@ -236,6 +239,7 @@ class AdversarialSimulator:
236
239
  language=language,
237
240
  semaphore=semaphore,
238
241
  scenario=scenario,
242
+ simulation_id=simulation_id,
239
243
  )
240
244
  )
241
245
  )
@@ -298,9 +302,10 @@ class AdversarialSimulator:
298
302
  language: SupportedLanguages,
299
303
  semaphore: asyncio.Semaphore,
300
304
  scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
305
+ simulation_id: str = "",
301
306
  ) -> List[Dict]:
302
307
  user_bot = self._setup_bot(
303
- role=ConversationRole.USER, template=template, parameters=parameters, scenario=scenario
308
+ role=ConversationRole.USER, template=template, parameters=parameters, scenario=scenario, simulation_id=simulation_id
304
309
  )
305
310
  system_bot = self._setup_bot(
306
311
  target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
@@ -329,7 +334,7 @@ class AdversarialSimulator:
329
334
  )
330
335
 
331
336
  def _get_user_proxy_completion_model(
332
- self, template_key: str, template_parameters: TemplateParameters
337
+ self, template_key: str, template_parameters: TemplateParameters, simulation_id: str = ""
333
338
  ) -> ProxyChatCompletionsModel:
334
339
  return ProxyChatCompletionsModel(
335
340
  name="raisvc_proxy_model",
@@ -340,6 +345,7 @@ class AdversarialSimulator:
340
345
  api_version="2023-07-01-preview",
341
346
  max_tokens=1200,
342
347
  temperature=0.0,
348
+ simulation_id=simulation_id,
343
349
  )
344
350
 
345
351
  def _setup_bot(
@@ -350,10 +356,11 @@ class AdversarialSimulator:
350
356
  parameters: TemplateParameters,
351
357
  target: Optional[Callable] = None,
352
358
  scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
359
+ simulation_id: str = "",
353
360
  ) -> ConversationBot:
354
361
  if role is ConversationRole.USER:
355
362
  model = self._get_user_proxy_completion_model(
356
- template_key=template.template_name, template_parameters=parameters
363
+ template_key=template.template_name, template_parameters=parameters, simulation_id=simulation_id,
357
364
  )
358
365
  return ConversationBot(
359
366
  role=role,
@@ -128,19 +128,15 @@ class ConversationBot:
128
128
  self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
129
129
  if role == ConversationRole.USER:
130
130
  if "conversation_starter" in self.persona_template_args:
131
- print(self.persona_template_args)
132
131
  conversation_starter_content = self.persona_template_args["conversation_starter"]
133
132
  if isinstance(conversation_starter_content, dict):
134
133
  self.conversation_starter = conversation_starter_content
135
- print(f"Conversation starter content: {conversation_starter_content}")
136
134
  else:
137
135
  try:
138
136
  self.conversation_starter = jinja2.Template(
139
137
  conversation_starter_content, undefined=jinja2.StrictUndefined
140
138
  )
141
- print("Successfully created a Jinja2 template for the conversation starter.")
142
139
  except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841
143
- print(f"Template syntax error: {e}. Using raw content.")
144
140
  self.conversation_starter = conversation_starter_content
145
141
  else:
146
142
  self.logger.info(
@@ -153,6 +149,7 @@ class ConversationBot:
153
149
  conversation_history: List[ConversationTurn],
154
150
  max_history: int,
155
151
  turn_number: int = 0,
152
+ session_state: Optional[Dict[str, Any]] = None,
156
153
  ) -> Tuple[dict, dict, float, dict]:
157
154
  """
158
155
  Prompt the ConversationBot for a response.
@@ -262,6 +259,7 @@ class CallbackConversationBot(ConversationBot):
262
259
  conversation_history: List[Any],
263
260
  max_history: int,
264
261
  turn_number: int = 0,
262
+ session_state: Optional[Dict[str, Any]] = None,
265
263
  ) -> Tuple[dict, dict, float, dict]:
266
264
  chat_protocol_message = self._to_chat_protocol(
267
265
  self.user_template, conversation_history, self.user_template_parameters
@@ -269,7 +267,7 @@ class CallbackConversationBot(ConversationBot):
269
267
  msg_copy = copy.deepcopy(chat_protocol_message)
270
268
  result = {}
271
269
  start_time = time.time()
272
- result = await self.callback(msg_copy)
270
+ result = await self.callback(msg_copy, session_state=session_state)
273
271
  end_time = time.time()
274
272
  if not result:
275
273
  result = {
@@ -348,6 +346,7 @@ class MultiModalConversationBot(ConversationBot):
348
346
  conversation_history: List[Any],
349
347
  max_history: int,
350
348
  turn_number: int = 0,
349
+ session_state: Optional[Dict[str, Any]] = None,
351
350
  ) -> Tuple[dict, dict, float, dict]:
352
351
  previous_prompt = conversation_history[-1]
353
352
  chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
@@ -101,6 +101,7 @@ async def simulate_conversation(
101
101
  :rtype: Tuple[Optional[str], List[ConversationTurn]]
102
102
  """
103
103
 
104
+ session_state = {}
104
105
  # Read the first prompt.
105
106
  (first_response, request, _, full_response) = await bots[0].generate_response(
106
107
  session=session,
@@ -149,7 +150,10 @@ async def simulate_conversation(
149
150
  conversation_history=conversation_history,
150
151
  max_history=history_limit,
151
152
  turn_number=current_turn,
153
+ session_state=session_state,
152
154
  )
155
+ if "session_state" in full_response and full_response["session_state"] is not None:
156
+ session_state.update(full_response["session_state"])
153
157
 
154
158
  # check if conversation id is null, which means conversation starter was used. use id from next turn
155
159
  if conversation_id is None and "id" in response:
@@ -89,6 +89,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
89
89
  self.tkey = template_key
90
90
  self.tparam = template_parameters
91
91
  self.result_url: Optional[str] = None
92
+ self.simulation_id: Optional[str] = kwargs.pop("simulation_id", "")
92
93
 
93
94
  super().__init__(name=name, **kwargs)
94
95
 
@@ -169,6 +170,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
169
170
  "Content-Type": "application/json",
170
171
  "X-CV": f"{uuid.uuid4()}",
171
172
  "X-ModelType": self.model or "",
173
+ "x-ms-client-request-id": self.simulation_id,
172
174
  }
173
175
  # add all additional headers
174
176
  headers.update(self.additional_headers) # type: ignore[arg-type]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -377,6 +377,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
377
377
 
378
378
  # Release History
379
379
 
380
+ ## 1.3.0 (2025-02-28)
381
+
382
+ ### Breaking Changes
383
+ - Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
384
+ - Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
385
+
380
386
  ## 1.2.0 (2025-01-27)
381
387
 
382
388
  ### Features Added
@@ -1,10 +1,10 @@
1
- azure/ai/evaluation/__init__.py,sha256=MFxJRoKfSsP_Qlfq0FwynxNf4csNAfTYPQX7jdXc9RU,2757
2
- azure/ai/evaluation/_constants.py,sha256=a7eCgdG6Kid79ebAMu0rPNH7foRF5Aii0K5YQI6cNPc,2765
3
- azure/ai/evaluation/_exceptions.py,sha256=MsTbgsPGYPzIxs7MyLKzSeiVKEoCxYkVjONzNfv2tXA,5162
1
+ azure/ai/evaluation/__init__.py,sha256=3lV2dhUqPzUz4lBo8DoEr5o4PEF_Lh5yWQtuLDUQZCM,2192
2
+ azure/ai/evaluation/_constants.py,sha256=Avp26R4qPqaze024EJXzAX8A457DExF5rnIKMWcE8X4,2881
3
+ azure/ai/evaluation/_exceptions.py,sha256=o3AjCyIAIBo_rkJAH_TsAT-hEZ4SMKd9Uo0eLmD9VHY,5085
4
4
  azure/ai/evaluation/_http_utils.py,sha256=1bGce6pKAL-vmaUGRPxVX7DVO05XVQ8YPIwIQ3q7mfA,17221
5
5
  azure/ai/evaluation/_model_configurations.py,sha256=MNN6cQlz7P9vNfHmfEKsUcly3j1FEOEFsA8WV7GPuKQ,4043
6
6
  azure/ai/evaluation/_user_agent.py,sha256=O2y-QPBAcw7w7qQ6M2aRPC3Vy3TKd789u5lcs2yuFaI,290
7
- azure/ai/evaluation/_version.py,sha256=aIrrVLGzX0UDxMjpkbe8HTOCqRr6Y9R8tC8XGAOocbE,199
7
+ azure/ai/evaluation/_version.py,sha256=xsol5X6WDVwo8FFtHWt28TPyzT0is0sJ3N5i9_ikPVI,229
8
8
  azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  azure/ai/evaluation/_azure/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
10
10
  azure/ai/evaluation/_azure/_clients.py,sha256=N1V-LyQkItPuoKl0aieypFPdGSRSld9lQqH1x-n3L7U,9119
@@ -15,10 +15,10 @@ azure/ai/evaluation/_common/_experimental.py,sha256=GVtSn9r1CeR_yEa578dJVNDJ3P24
15
15
  azure/ai/evaluation/_common/constants.py,sha256=OsExttFGLnTAyZa26jnY5_PCDTb7uJNFqtE2qsRZ1mg,1957
16
16
  azure/ai/evaluation/_common/math.py,sha256=d4bwWe35_RWDIZNcbV1BTBbHNx2QHQ4-I3EofDyyNE0,2863
17
17
  azure/ai/evaluation/_common/rai_service.py,sha256=DcakzdOour9qNdMXU-8UFfvLb12oexAoiJXG8XFTRBs,26462
18
- azure/ai/evaluation/_common/utils.py,sha256=MQIZs95gH5je1L-S3twa_WQi071zRu0Dv54lzCI7ZgU,17642
18
+ azure/ai/evaluation/_common/utils.py,sha256=wssBc9i0JrWwsrvtp45P5_z5ceB7kVGf-KKzpfVQQH4,17594
19
19
  azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
20
20
  azure/ai/evaluation/_evaluate/_eval_run.py,sha256=QBtNBwUxqxsIVmKPU-_H8MDFkF4s_bW7arQYXAniRpo,21965
21
- azure/ai/evaluation/_evaluate/_evaluate.py,sha256=hj1HG9WCjbvAk8iB0MwnVoV-ceQYKKecfyVTlbc3y4A,38934
21
+ azure/ai/evaluation/_evaluate/_evaluate.py,sha256=pyl0HscBAPllpq385p9lMqR2sqip3RZ5BGfcBa4X334,39068
22
22
  azure/ai/evaluation/_evaluate/_utils.py,sha256=sKj_4iN-QjrRlEkiZwA9UNiWozS4LgJcUZ6AWdHrTY4,14231
23
23
  azure/ai/evaluation/_evaluate/_batch_run/__init__.py,sha256=Z-TQdSxKTn0bjsF0YosIJMbQFQHDUv_b9zCBu1TeogQ,474
24
24
  azure/ai/evaluation/_evaluate/_batch_run/code_client.py,sha256=XQLaXfswF6ReHLpQthHLuLLa65Pts8uawGp7kRqmMDs,8260
@@ -59,14 +59,6 @@ azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty,sh
59
59
  azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty,sha256=8kNShdfxQvkII7GnqjmdqQ5TNelA2B6cjnqWZk8FFe4,5296
60
60
  azure/ai/evaluation/_evaluators/_meteor/__init__.py,sha256=209na3pPsdmcuYpYHUYtqQybCpc3yZkc93HnRdicSlI,266
61
61
  azure/ai/evaluation/_evaluators/_meteor/_meteor.py,sha256=OpugAjIgcTcNQ6g6Rks_8GVhcRiH524PbmBKH3bTefs,4369
62
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py,sha256=tPvsY0nv8T3VtiiAwJM6wT5A9FhKP2XXwUlCH994xl4,906
63
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py,sha256=x0l6eLQhxVP85jEyGfFCl27C2okMgD0S3aJ_qrgB3Q8,5219
64
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py,sha256=X2IVw0YvymDD3e4Vx-TfjqgqtYiAKVhUumjBowCpOmA,2441
65
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py,sha256=ral1AAbP5pfsygDe30MtuwajuydiXoXzzCeuLBzIkWc,3779
66
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py,sha256=gMrfyn3KHcV6SoowuEjR7Fon9vVLN7GOPM4rkJRK6xU,4906
67
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py,sha256=QwOCBb618ZXSs-OoVXyNM65N4ZEL7IZt-S1Nqd8xNbY,3703
68
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py,sha256=6zz89yzr_SdldqBVv-3wOErz3H5sBO6wYgNh39aHXmY,3668
69
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py,sha256=t1h3bY6N7SwlSgP_1P-90KGTsq1oWvTYDJpy_uMvzjA,3694
70
62
  azure/ai/evaluation/_evaluators/_protected_material/__init__.py,sha256=eRAQIU9diVXfO5bp6aLWxZoYUvOsrDIfy1gnDOeNTiI,109
71
63
  azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py,sha256=IABs1YMBZdIi1u57dPi-aQpSiPWIGxEZ4hyt97jvdNA,4604
72
64
  azure/ai/evaluation/_evaluators/_qa/__init__.py,sha256=bcXfT--C0hjym2haqd1B2-u9bDciyM0ThOFtU1Q69sk,244
@@ -86,6 +78,8 @@ azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=AeqJ_OJUAsdu9C
86
78
  azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=eoludASychZoGL625bFCaZai-OY7DIAg90ZLax_o4XE,4594
87
79
  azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
88
80
  azure/ai/evaluation/_evaluators/_xpia/xpia.py,sha256=Nv14lU7jN0yXKbHgHRXMHEy6pn1rXmesBOYI2Ge9ewk,5849
81
+ azure/ai/evaluation/_safety_evaluation/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
82
+ azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py,sha256=HtT6aKdZ4EP6Dx-JuExdysO_CBERFMwRWmGTWeHQ_NU,33890
89
83
  azure/ai/evaluation/_vendor/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
90
84
  azure/ai/evaluation/_vendor/rouge_score/__init__.py,sha256=03OkyfS_UmzRnHv6-z9juTaJ6OXJoEJM989hgifIZbc,607
91
85
  azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py,sha256=DtNSeshHipzc6vFnvx7kbs5viXe4LNq-ZrgllFvfR4U,11299
@@ -94,15 +88,15 @@ azure/ai/evaluation/_vendor/rouge_score/tokenize.py,sha256=IyHVsWY6IFFZdB23cLiJs
94
88
  azure/ai/evaluation/_vendor/rouge_score/tokenizers.py,sha256=3_-y1TyvyluHuERhSJ5CdXSwnpcMA7aAKU6PCz9wH_Q,1745
95
89
  azure/ai/evaluation/simulator/__init__.py,sha256=JbrPZ8pvTBalyX94SvZ9btHNoovX8rbZV03KmzxxWys,552
96
90
  azure/ai/evaluation/simulator/_adversarial_scenario.py,sha256=9rpAPz594tYjxzM3XMeDq6CZSc2yvf5YaNaGC7nzYhM,1710
97
- azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=FPZ3OdpGuwCHDVoOZW-f_j7pyK71PfDN3JPh205tW0c,21706
91
+ azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=2QEt5hHOKnNqMySt3SNsr-LY22DvFyusVnPkvAyur1I,22100
98
92
  azure/ai/evaluation/simulator/_constants.py,sha256=nCL7_1BnYh6k0XvxudxsDVMbiG9MMEvYw5wO9FZHHZ8,857
99
93
  azure/ai/evaluation/simulator/_direct_attack_simulator.py,sha256=FTtWf655dHJF5FLJi0xGSBgIlGWNiVWyqaLDJSud9XA,10199
100
94
  azure/ai/evaluation/simulator/_indirect_attack_simulator.py,sha256=nweIU_AkUIR50qLQpjmljf_OkpsCPth2Ebf4vusygCA,10226
101
95
  azure/ai/evaluation/simulator/_simulator.py,sha256=LBzez7qvObpVjTwmlGS_PfhDLo8pRknh5epra2yo9X8,36484
102
96
  azure/ai/evaluation/simulator/_tracing.py,sha256=frZ4-usrzINast9F4-ONRzEGGox71y8bYw0UHNufL1Y,3069
103
97
  azure/ai/evaluation/simulator/_utils.py,sha256=16NltlywpbMtoFtULwTKqeURguIS1kSKSo3g8uKV8TA,5181
104
- azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=s8djzJ58_-CiIA8xHB-SbgeZaq1F7ftrc3qJbpUpUdg,17853
105
- azure/ai/evaluation/simulator/_conversation/_conversation.py,sha256=qdzGMtCPYMxeGpR91NZTEmmz2RtADTvQGj6C-3EUTw4,7402
98
+ azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=LOR5h7vSACrv_cmaS6r7KUqjJcHYg7PAgkDbdvOmd4g,17726
99
+ azure/ai/evaluation/simulator/_conversation/_conversation.py,sha256=h8OHq0sWKiTH821tC5zF44CJ-QgutTgDnEYsFEA7Cw0,7635
106
100
  azure/ai/evaluation/simulator/_conversation/constants.py,sha256=3v7zkjPwJAPbSpJYIK6VOZZy70bJXMo_QTVqSFGlq9A,984
107
101
  azure/ai/evaluation/simulator/_data_sources/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
108
102
  azure/ai/evaluation/simulator/_data_sources/grounding.json,sha256=jqdqHrCgS7hN7K2kXSEcPCmzFjV4cv_qcCSR-Hutwx4,1257075
@@ -111,15 +105,15 @@ azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py,sha256=7BBLH7
111
105
  azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py,sha256=BOttMTec3muMiA4OzwD_iW08GTrhja7PL9XVjRCN3jM,3029
112
106
  azure/ai/evaluation/simulator/_model_tools/__init__.py,sha256=aMv5apb7uVjuhMF9ohhA5kQmo652hrGIJlhdl3y2R1I,835
113
107
  azure/ai/evaluation/simulator/_model_tools/_identity_manager.py,sha256=-hptp2vpJIcfjvtd0E2c7ry00LVh23LxuYGevsNFfgs,6385
114
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py,sha256=Zg_SzqjCGJ3Wt8hktxz6Y1JEJCcV0V5jBC9N06jQP3k,8984
108
+ azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py,sha256=D2Am07l2gq_v-vztqDBTX2MxZTUumm5ysaVSL2L0rxQ,9118
115
109
  azure/ai/evaluation/simulator/_model_tools/_rai_client.py,sha256=40MGzIXGv7oVshWH7AbOPLCigI4HlMrqbF2Rq5jFMGo,8755
116
110
  azure/ai/evaluation/simulator/_model_tools/_template_handler.py,sha256=NQWqjE7csSzkhb2XdW82AoCA-DxixpTrfBxAnOt2Wlc,7075
117
111
  azure/ai/evaluation/simulator/_model_tools/models.py,sha256=bfVm0PV3vfH_8DkdmTMZqYVN-G51hZ6Y0TOO-NiysJY,21811
118
112
  azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
119
113
  azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=2BzSqDDYilDushvR56vMRDmqFIaIYAewdUlUZg_elMg,2182
120
114
  azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=NE6lH4bfmibgMn4NgJtm9_l3PMoHSFrfjjosDJEKM0g,939
121
- azure_ai_evaluation-1.2.0.dist-info/METADATA,sha256=NM0mPj138_k-6vWuKICoqUBtDq-TaWGXOeaCLpyT2IU,32156
122
- azure_ai_evaluation-1.2.0.dist-info/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
123
- azure_ai_evaluation-1.2.0.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
124
- azure_ai_evaluation-1.2.0.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
125
- azure_ai_evaluation-1.2.0.dist-info/RECORD,,
115
+ azure_ai_evaluation-1.3.0.dist-info/METADATA,sha256=msxfA0EnNspmcqbsGuYtdEESA3kLPnEfWBpzf7yFHEw,32885
116
+ azure_ai_evaluation-1.3.0.dist-info/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
117
+ azure_ai_evaluation-1.3.0.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
118
+ azure_ai_evaluation-1.3.0.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
119
+ azure_ai_evaluation-1.3.0.dist-info/RECORD,,
@@ -1,20 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
- from ._content_safety_multimodal import ContentSafetyMultimodalEvaluator
5
- from ._content_safety_multimodal_base import ContentSafetyMultimodalEvaluatorBase
6
- from ._hate_unfairness import HateUnfairnessMultimodalEvaluator
7
- from ._self_harm import SelfHarmMultimodalEvaluator
8
- from ._sexual import SexualMultimodalEvaluator
9
- from ._violence import ViolenceMultimodalEvaluator
10
- from ._protected_material import ProtectedMaterialMultimodalEvaluator
11
-
12
- __all__ = [
13
- "ContentSafetyMultimodalEvaluator",
14
- "ContentSafetyMultimodalEvaluatorBase",
15
- "ViolenceMultimodalEvaluator",
16
- "SexualMultimodalEvaluator",
17
- "SelfHarmMultimodalEvaluator",
18
- "HateUnfairnessMultimodalEvaluator",
19
- "ProtectedMaterialMultimodalEvaluator",
20
- ]
@@ -1,132 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
- import logging
5
- from concurrent.futures import as_completed
6
- from typing import Callable, Dict, List, Union
7
-
8
- from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
9
- from azure.ai.evaluation._common._experimental import experimental
10
- from azure.ai.evaluation._common.utils import validate_conversation
11
- from ._hate_unfairness import HateUnfairnessMultimodalEvaluator
12
- from ._self_harm import SelfHarmMultimodalEvaluator
13
- from ._sexual import SexualMultimodalEvaluator
14
- from ._violence import ViolenceMultimodalEvaluator
15
-
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- @experimental
21
- class ContentSafetyMultimodalEvaluator:
22
- """
23
- Initialize a content safety multimodal evaluator configured to evaluate content safety metrics
24
- in a multimodal scenario.
25
-
26
- :param credential: The credential required for connecting to the Azure AI project.
27
- :type credential: ~azure.core.credentials.TokenCredential
28
- :param azure_ai_project: The scope of the Azure AI project, containing the subscription ID,
29
- resource group, and project name.
30
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
31
- :param kwargs: Additional arguments to pass to the evaluator.
32
- :type kwargs: Any
33
-
34
- :return: A function that evaluates multimodal chat messages and generates content safety metrics.
35
-
36
- **Usage Example**
37
-
38
- .. code-block:: python
39
-
40
- azure_ai_project = {
41
- "subscription_id": "<subscription_id>",
42
- "resource_group_name": "<resource_group_name>",
43
- "project_name": "<project_name>",
44
- }
45
- eval_fn = ContentSafetyMultimodalEvaluator(azure_ai_project)
46
- result = eval_fn(conversation=
47
- {
48
- "messages": [
49
- {
50
- "role": "user",
51
- "content": [
52
- {
53
- "type": "text",
54
- "text": "What's in this image?"
55
- },
56
- {
57
- "type": "image_url",
58
- "image_url": {
59
- "url": "<image url or base64 encoded image>"
60
- }
61
- }
62
- ]
63
- },
64
- {
65
- "role": "assistant",
66
- "content": "This picture shows an astronaut standing in the desert."
67
- }
68
- ]
69
- }
70
- )
71
-
72
- **Output Format**
73
-
74
- .. code-block:: json
75
-
76
- {
77
- "violence": "Medium",
78
- "violence_score": 5.0,
79
- "violence_reason": "Some reason",
80
- "sexual": "Medium",
81
- "sexual_score": 5.0,
82
- "sexual_reason": "Some reason",
83
- "self_harm": "Medium",
84
- "self_harm_score": 5.0,
85
- "self_harm_reason": "Some reason",
86
- "hate_unfairness": "Medium",
87
- "hate_unfairness_score": 5.0,
88
- "hate_unfairness_reason": "Some reason"
89
- }
90
-
91
- """
92
-
93
- id = "content_safety_multimodal"
94
- """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
95
-
96
- def __init__(self, credential, azure_ai_project, **kwargs):
97
- self._parallel = kwargs.pop("_parallel", False)
98
- self._evaluators: List[Callable[..., Dict[str, Union[str, float]]]] = [
99
- ViolenceMultimodalEvaluator(credential=credential, azure_ai_project=azure_ai_project),
100
- SexualMultimodalEvaluator(credential=credential, azure_ai_project=azure_ai_project),
101
- SelfHarmMultimodalEvaluator(credential=credential, azure_ai_project=azure_ai_project),
102
- HateUnfairnessMultimodalEvaluator(credential=credential, azure_ai_project=azure_ai_project),
103
- ]
104
-
105
- def __call__(self, *, conversation, **kwargs):
106
- """
107
- Evaluates content-safety metrics for list of messages.
108
-
109
- :keyword conversation: The conversation contains list of messages to be evaluated.
110
- Each message should have "role" and "content" keys. It supports single turn only.
111
- :paramtype conversation: ~azure.ai.evaluation.Conversation
112
- :return: The evaluation score based on the Content Safety Metrics.
113
- :rtype: Dict[str, Union[float, str]]
114
- """
115
- # validate inputs
116
- validate_conversation(conversation)
117
- results: Dict[str, Union[str, float]] = {}
118
- if self._parallel:
119
- with ThreadPoolExecutor() as executor:
120
- futures = {
121
- executor.submit(evaluator, conversation=conversation, **kwargs): evaluator
122
- for evaluator in self._evaluators
123
- }
124
-
125
- for future in as_completed(futures):
126
- results.update(future.result())
127
- else:
128
- for evaluator in self._evaluators:
129
- result = evaluator(conversation=conversation, **kwargs)
130
- results.update(result)
131
-
132
- return results
@@ -1,55 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
- from abc import ABC
5
- from typing import Union
6
- from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service_multimodal
7
- from azure.ai.evaluation._common.constants import EvaluationMetrics, _InternalEvaluationMetrics
8
- from azure.ai.evaluation._common.utils import validate_conversation
9
- from azure.core.credentials import TokenCredential
10
-
11
-
12
- class ContentSafetyMultimodalEvaluatorBase(ABC):
13
- """
14
- Initialize a evaluator for a specified Evaluation Metric. Base class that is not
15
- meant to be instantiated by users.
16
-
17
- :param metric: The metric to be evaluated.
18
- :type metric: ~azure.ai.evaluation._evaluators._content_safety.flow.constants.EvaluationMetrics
19
- :param credential: The credential for connecting to Azure AI project. Required
20
- :type credential: ~azure.core.credentials.TokenCredential
21
- :param azure_ai_project: The scope of the Azure AI project.
22
- It contains subscription id, resource group, and project name.
23
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
24
- """
25
-
26
- def __init__(
27
- self,
28
- metric: Union[EvaluationMetrics, _InternalEvaluationMetrics],
29
- credential: TokenCredential,
30
- azure_ai_project,
31
- ):
32
- self._metric = metric
33
- self._azure_ai_project = azure_ai_project
34
- self._credential = credential
35
-
36
- async def __call__(self, *, conversation, **kwargs):
37
- """
38
- Evaluates content according to this evaluator's metric.
39
- :keyword conversation: The conversation contains list of messages to be evaluated.
40
- Each message should have "role" and "content" keys.
41
- :paramtype conversation: ~azure.ai.evaluation.Conversation
42
- :return: The evaluation score computation based on the Content Safety metric (self.metric).
43
- :rtype: Dict[str, Union[float, str]]
44
- """
45
- # validate inputs
46
- validate_conversation(conversation)
47
- messages = conversation["messages"]
48
- # Run score computation based on supplied metric.
49
- result = await evaluate_with_rai_service_multimodal(
50
- messages=messages,
51
- metric_name=self._metric,
52
- project_scope=self._azure_ai_project,
53
- credential=self._credential,
54
- )
55
- return result
@@ -1,100 +0,0 @@
1
- # ---------------------------------------------------------
2
- # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
4
- from promptflow._utils.async_utils import async_run_allowing_running_loop
5
- from azure.ai.evaluation._common._experimental import experimental
6
- from azure.ai.evaluation._common.constants import EvaluationMetrics
7
- from ._content_safety_multimodal_base import ContentSafetyMultimodalEvaluatorBase
8
-
9
-
10
- @experimental
11
- class HateUnfairnessMultimodalEvaluator:
12
- """
13
- Initialize a hate-unfairness multimodal evaluator for hate unfairness score.
14
-
15
- :param credential: The credential for connecting to the Azure AI project. This is required.
16
- :type credential: ~azure.core.credentials.TokenCredential
17
- :param azure_ai_project: The scope of the Azure AI project,
18
- which includes subscription ID, resource group name, and project name.
19
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
20
-
21
- **Usage Example**
22
-
23
- .. code-block:: python
24
-
25
- azure_ai_project = {
26
- "subscription_id": "<subscription_id>",
27
- "resource_group_name": "<resource_group_name>",
28
- "project_name": "<project_name>",
29
- }
30
- eval_fn = HateUnfairnessMultimodalEvaluator(azure_ai_project)
31
- result = eval_fn(conversation=
32
- {
33
- "messages": [
34
- {
35
- "role": "user",
36
- "content": [
37
- {
38
- "type": "text",
39
- "text": "What's in this image?"
40
- },
41
- {
42
- "type": "image_url",
43
- "image_url": {
44
- "url": "<image url or base64 encoded image>"
45
- }
46
- }
47
- ]
48
- },
49
- {
50
- "role": "assistant",
51
- "content": "This picture shows an astronaut standing in the desert."
52
- }
53
- ]
54
- }
55
- )
56
-
57
- **Output Format**
58
-
59
- .. code-block:: json
60
-
61
- {
62
- "hate_unfairness": "High",
63
- "hate_unfairness_score": 6.5,
64
- "hate_unfairness_reason": "Some reason"
65
- }
66
-
67
- """
68
-
69
- id = "hate_unfairness_multimodal"
70
- """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
71
-
72
- def __init__(self, credential, azure_ai_project):
73
- self._async_evaluator = _AsyncHateUnfairnessMultimodalEvaluator(credential, azure_ai_project)
74
-
75
- def __call__(self, *, conversation, **kwargs):
76
- """
77
- Evaluates hate unfairness content.
78
-
79
- :keyword conversation: The conversation contains list of messages to be evaluated.
80
- Each message should have "role" and "content" keys. It supports single turn only.
81
- :paramtype conversation: ~azure.ai.evaluation.Conversation
82
- :return: The hate unfairness score.
83
- :rtype: Dict[str, Union[float, str]]
84
- """
85
- return async_run_allowing_running_loop(self._async_evaluator, conversation=conversation, **kwargs)
86
-
87
- def _to_async(self):
88
- return self._async_evaluator
89
-
90
-
91
- class _AsyncHateUnfairnessMultimodalEvaluator(ContentSafetyMultimodalEvaluatorBase):
92
- def __init__(self, credential, azure_ai_project):
93
- super().__init__(
94
- metric=EvaluationMetrics.HATE_FAIRNESS,
95
- credential=credential,
96
- azure_ai_project=azure_ai_project,
97
- )
98
-
99
- async def __call__(self, *, conversation, **kwargs):
100
- return await super().__call__(conversation=conversation, **kwargs)