azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show
  1. azure/ai/evaluation/__init__.py +100 -5
  2. azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
  16. azure/ai/evaluation/_common/constants.py +131 -2
  17. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  18. azure/ai/evaluation/_common/math.py +89 -0
  19. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  20. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  21. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  22. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  23. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  25. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  26. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  27. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  28. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  29. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  30. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  31. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  32. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  33. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  34. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  35. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  37. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  38. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  39. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  40. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  41. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  42. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  43. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  44. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  45. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  46. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  61. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  62. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  63. azure/ai/evaluation/_common/rai_service.py +831 -142
  64. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  65. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  66. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  67. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  68. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  69. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  70. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  71. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  73. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  74. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  76. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  77. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  78. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  79. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  80. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  81. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  82. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  83. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  84. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  85. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  86. azure/ai/evaluation/_common/utils.py +870 -34
  87. azure/ai/evaluation/_constants.py +167 -6
  88. azure/ai/evaluation/_converters/__init__.py +3 -0
  89. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  90. azure/ai/evaluation/_converters/_models.py +467 -0
  91. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  92. azure/ai/evaluation/_eval_mapping.py +83 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  95. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  96. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
  97. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
  98. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
  99. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
  100. azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
  101. azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
  102. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  103. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
  104. azure/ai/evaluation/_evaluate/_utils.py +289 -40
  105. azure/ai/evaluation/_evaluator_definition.py +76 -0
  106. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
  107. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  108. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  109. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
  110. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
  111. azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
  112. azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
  113. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  114. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  115. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
  116. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  117. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  118. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
  119. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
  120. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
  121. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
  122. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
  123. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  124. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  125. azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
  126. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
  127. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
  128. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
  129. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
  130. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
  131. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
  132. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
  133. azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
  134. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  135. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  136. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
  137. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
  138. azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
  139. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
  140. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
  141. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  142. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  143. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  144. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
  145. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
  146. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  147. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
  148. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  149. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
  150. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
  151. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  152. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  153. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  154. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  155. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  156. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  157. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  158. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  159. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  160. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  162. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  163. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  165. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  166. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  167. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  168. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  169. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  170. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  171. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  172. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  173. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  174. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  175. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  176. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  177. azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
  178. azure/ai/evaluation/_exceptions.py +51 -7
  179. azure/ai/evaluation/_http_utils.py +210 -137
  180. azure/ai/evaluation/_legacy/__init__.py +3 -0
  181. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  182. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  183. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  184. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  185. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  186. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  187. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  188. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  189. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  190. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  191. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  192. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  197. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  198. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  199. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  200. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  201. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  202. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  203. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  204. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  205. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  206. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  207. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  208. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  209. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  210. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  211. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  212. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  213. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  214. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  215. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  216. azure/ai/evaluation/_model_configurations.py +130 -8
  217. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  218. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  219. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  220. azure/ai/evaluation/_user_agent.py +32 -1
  221. azure/ai/evaluation/_vendor/__init__.py +3 -0
  222. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  223. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
  224. azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
  225. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
  226. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  227. azure/ai/evaluation/_version.py +2 -1
  228. azure/ai/evaluation/red_team/__init__.py +22 -0
  229. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  230. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  231. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  232. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  233. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  234. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  235. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  236. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  237. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  238. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  239. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  240. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  241. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  242. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  243. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  244. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  245. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  246. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  247. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  248. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  249. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  250. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  251. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  252. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  253. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  254. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  255. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  256. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  257. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  258. azure/ai/evaluation/simulator/__init__.py +2 -1
  259. azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
  260. azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
  261. azure/ai/evaluation/simulator/_constants.py +12 -1
  262. azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
  263. azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
  264. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  265. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  266. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  267. azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
  268. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  269. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  270. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  271. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
  272. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  273. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  274. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
  275. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
  276. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
  277. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
  278. azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
  279. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
  280. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  281. azure/ai/evaluation/simulator/_simulator.py +302 -208
  282. azure/ai/evaluation/simulator/_utils.py +31 -13
  283. azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
  284. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  285. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  286. azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
  287. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
  288. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  289. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
  290. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
  291. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  292. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
  293. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
  294. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  295. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  296. azure/ai/evaluation/simulator/_tracing.py +0 -89
  297. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
  298. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  299. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -7,14 +7,16 @@ import copy
7
7
  import logging
8
8
  import time
9
9
  from dataclasses import dataclass
10
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
11
-
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
11
+ import base64
12
+ import re
12
13
  import jinja2
13
14
 
14
15
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
15
16
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline
16
-
17
- from .._model_tools import LLMBase, OpenAIChatCompletionsModel
17
+ from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
18
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
19
+ from .._model_tools._template_handler import TemplateParameters
18
20
  from .constants import ConversationRole
19
21
 
20
22
 
@@ -40,7 +42,7 @@ class ConversationTurn:
40
42
  role: "ConversationRole"
41
43
  name: Optional[str] = None
42
44
  message: str = ""
43
- full_response: Optional[Any] = None
45
+ full_response: Optional[Dict[str, Any]] = None
44
46
  request: Optional[Any] = None
45
47
 
46
48
  def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
@@ -109,7 +111,7 @@ class ConversationBot:
109
111
  role: ConversationRole,
110
112
  model: Union[LLMBase, OpenAIChatCompletionsModel],
111
113
  conversation_template: str,
112
- instantiation_parameters: Dict[str, str],
114
+ instantiation_parameters: TemplateParameters,
113
115
  ) -> None:
114
116
  self.role = role
115
117
  self.conversation_template_orig = conversation_template
@@ -118,13 +120,13 @@ class ConversationBot:
118
120
  )
119
121
  self.persona_template_args = instantiation_parameters
120
122
  if self.role == ConversationRole.USER:
121
- self.name = self.persona_template_args.get("name", role.value)
123
+ self.name: str = cast(str, self.persona_template_args.get("name", role.value))
122
124
  else:
123
- self.name = self.persona_template_args.get("chatbot_name", role.value) or model.name
125
+ self.name = cast(str, self.persona_template_args.get("chatbot_name", role.value)) or model.name
124
126
  self.model = model
125
127
 
126
128
  self.logger = logging.getLogger(repr(self))
127
- self.conversation_starter = None # can either be a dictionary or jinja template
129
+ self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
128
130
  if role == ConversationRole.USER:
129
131
  if "conversation_starter" in self.persona_template_args:
130
132
  conversation_starter_content = self.persona_template_args["conversation_starter"]
@@ -135,7 +137,7 @@ class ConversationBot:
135
137
  self.conversation_starter = jinja2.Template(
136
138
  conversation_starter_content, undefined=jinja2.StrictUndefined
137
139
  )
138
- except jinja2.exceptions.TemplateSyntaxError: # noqa: F841
140
+ except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841
139
141
  self.conversation_starter = conversation_starter_content
140
142
  else:
141
143
  self.logger.info(
@@ -144,11 +146,12 @@ class ConversationBot:
144
146
 
145
147
  async def generate_response(
146
148
  self,
147
- session: AsyncHttpPipeline,
149
+ session: Union[AsyncHttpPipeline, AIProjectClient],
148
150
  conversation_history: List[ConversationTurn],
149
151
  max_history: int,
150
152
  turn_number: int = 0,
151
- ) -> Tuple[dict, dict, int, dict]:
153
+ session_state: Optional[Dict[str, Any]] = None,
154
+ ) -> Tuple[dict, dict, float, dict]:
152
155
  """
153
156
  Prompt the ConversationBot for a response.
154
157
 
@@ -161,7 +164,7 @@ class ConversationBot:
161
164
  :param turn_number: Parameters used to query GPT-4 model.
162
165
  :type turn_number: int
163
166
  :return: The response from the ConversationBot.
164
- :rtype: Tuple[dict, dict, int, dict]
167
+ :rtype: Tuple[dict, dict, float, dict]
165
168
  """
166
169
 
167
170
  # check if this is the first turn and the conversation_starter is not None,
@@ -169,11 +172,14 @@ class ConversationBot:
169
172
  if turn_number == 0 and self.conversation_starter is not None:
170
173
  # if conversation_starter is a dictionary, pass it into samples as is
171
174
  if isinstance(self.conversation_starter, dict):
172
- samples = [self.conversation_starter]
175
+ samples: List[Union[str, jinja2.Template, Dict]] = [self.conversation_starter]
173
176
  if isinstance(self.conversation_starter, jinja2.Template):
174
177
  samples = [self.conversation_starter.render(**self.persona_template_args)]
175
178
  else:
176
- samples = [self.conversation_starter] # type: ignore[attr-defined]
179
+ samples = [self.conversation_starter]
180
+ jailbreak_string = self.persona_template_args.get("jailbreak_string", None)
181
+ if jailbreak_string:
182
+ samples = [f"{jailbreak_string} {samples[0]}"]
177
183
  time_taken = 0
178
184
 
179
185
  finish_reason = ["stop"]
@@ -238,7 +244,7 @@ class CallbackConversationBot(ConversationBot):
238
244
  self,
239
245
  callback: Callable,
240
246
  user_template: str,
241
- user_template_parameters: Dict,
247
+ user_template_parameters: TemplateParameters,
242
248
  *args,
243
249
  **kwargs,
244
250
  ) -> None:
@@ -250,18 +256,19 @@ class CallbackConversationBot(ConversationBot):
250
256
 
251
257
  async def generate_response(
252
258
  self,
253
- session: AsyncHttpPipeline,
259
+ session: Union[AsyncHttpPipeline, AIProjectClient],
254
260
  conversation_history: List[Any],
255
261
  max_history: int,
256
262
  turn_number: int = 0,
257
- ) -> Tuple[dict, dict, int, dict]:
263
+ session_state: Optional[Dict[str, Any]] = None,
264
+ ) -> Tuple[dict, dict, float, dict]:
258
265
  chat_protocol_message = self._to_chat_protocol(
259
266
  self.user_template, conversation_history, self.user_template_parameters
260
267
  )
261
268
  msg_copy = copy.deepcopy(chat_protocol_message)
262
269
  result = {}
263
270
  start_time = time.time()
264
- result = await self.callback(msg_copy)
271
+ result = await self.callback(msg_copy, session_state=session_state)
265
272
  end_time = time.time()
266
273
  if not result:
267
274
  result = {
@@ -270,8 +277,6 @@ class CallbackConversationBot(ConversationBot):
270
277
  "id": None,
271
278
  "template_parameters": {},
272
279
  }
273
- self.logger.info("Using user provided callback returning response.")
274
-
275
280
  time_taken = end_time - start_time
276
281
  try:
277
282
  response = {
@@ -289,8 +294,6 @@ class CallbackConversationBot(ConversationBot):
289
294
  blame=ErrorBlame.USER_ERROR,
290
295
  ) from exc
291
296
 
292
- self.logger.info("Parsed callback response")
293
-
294
297
  return response, {}, time_taken, result
295
298
 
296
299
  # Bug 3354264: template is unused in the method - is this intentional?
@@ -307,9 +310,134 @@ class CallbackConversationBot(ConversationBot):
307
310
  }
308
311
 
309
312
 
313
+ class MultiModalConversationBot(ConversationBot):
314
+ """MultiModal Conversation bot that uses a user provided callback to generate responses.
315
+
316
+ :param callback: The callback function to use to generate responses.
317
+ :type callback: Callable
318
+ :param user_template: The template to use for the request.
319
+ :type user_template: str
320
+ :param user_template_parameters: The template parameters to use for the request.
321
+ :type user_template_parameters: Dict
322
+ :param args: Optional arguments to pass to the parent class.
323
+ :type args: Any
324
+ :param kwargs: Optional keyword arguments to pass to the parent class.
325
+ :type kwargs: Any
326
+ """
327
+
328
+ def __init__(
329
+ self,
330
+ callback: Callable,
331
+ user_template: str,
332
+ user_template_parameters: TemplateParameters,
333
+ rai_client: Union[RAIClient, AIProjectClient],
334
+ *args,
335
+ **kwargs,
336
+ ) -> None:
337
+ self.callback = callback
338
+ self.user_template = user_template
339
+ self.user_template_parameters = user_template_parameters
340
+ self.rai_client = rai_client
341
+
342
+ super().__init__(*args, **kwargs)
343
+
344
+ async def generate_response(
345
+ self,
346
+ session: Union[AsyncHttpPipeline, AIProjectClient],
347
+ conversation_history: List[Any],
348
+ max_history: int,
349
+ turn_number: int = 0,
350
+ session_state: Optional[Dict[str, Any]] = None,
351
+ ) -> Tuple[dict, dict, float, dict]:
352
+ previous_prompt = conversation_history[-1]
353
+ chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
354
+
355
+ # replace prompt with {image.jpg} tags with image content data.
356
+ conversation_history.pop()
357
+ conversation_history.append(
358
+ ConversationTurn(
359
+ role=previous_prompt.role,
360
+ name=previous_prompt.name,
361
+ message=chat_protocol_message["messages"][0]["content"],
362
+ full_response=previous_prompt.full_response,
363
+ request=chat_protocol_message,
364
+ )
365
+ )
366
+ msg_copy = copy.deepcopy(chat_protocol_message)
367
+ result = {}
368
+ start_time = time.time()
369
+ result = await self.callback(msg_copy)
370
+ end_time = time.time()
371
+ if not result:
372
+ result = {
373
+ "messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
374
+ "finish_reason": ["stop"],
375
+ "id": None,
376
+ "template_parameters": {},
377
+ }
378
+
379
+ time_taken = end_time - start_time
380
+ try:
381
+ response = {
382
+ "samples": [result["messages"][-1]["content"]],
383
+ "finish_reason": ["stop"],
384
+ "id": None,
385
+ }
386
+ except Exception as exc:
387
+ msg = "User provided callback does not conform to chat protocol standard."
388
+ raise EvaluationException(
389
+ message=msg,
390
+ internal_message=msg,
391
+ target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
392
+ category=ErrorCategory.INVALID_VALUE,
393
+ blame=ErrorBlame.USER_ERROR,
394
+ ) from exc
395
+
396
+ return response, chat_protocol_message, time_taken, result
397
+
398
+ async def _to_chat_protocol(self, conversation_history, template_parameters): # pylint: disable=unused-argument
399
+ messages = []
400
+
401
+ for _, m in enumerate(conversation_history):
402
+ if "image:" in m.message:
403
+ content = await self._to_multi_modal_content(m.message)
404
+ messages.append({"content": content, "role": m.role.value})
405
+ else:
406
+ messages.append({"content": m.message, "role": m.role.value})
407
+
408
+ return {
409
+ "template_parameters": template_parameters,
410
+ "messages": messages,
411
+ "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
412
+ }
413
+
414
+ async def _to_multi_modal_content(self, text: str) -> list:
415
+ split_text = re.findall(r"[^{}]+|\{[^{}]*\}", text)
416
+ messages = [
417
+ text.strip("{}").replace("image:", "").strip() if text.startswith("{") else text for text in split_text
418
+ ]
419
+ contents = []
420
+ for msg in messages:
421
+ if msg.startswith("image_understanding/"):
422
+ if isinstance(self.rai_client, RAIClient):
423
+ encoded_image = await self.rai_client.get_image_data(msg)
424
+ else:
425
+ response = self.rai_client.red_teams.get_template_parameters_image(path=msg, stream="true")
426
+ image_data = b"".join(response)
427
+ encoded_image = base64.b64encode(image_data).decode("utf-8")
428
+
429
+ contents.append(
430
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
431
+ )
432
+ else:
433
+ contents.append({"type": "text", "text": msg})
434
+ return contents
435
+
436
+
310
437
  __all__ = [
311
438
  "ConversationRole",
312
439
  "ConversationBot",
313
440
  "CallbackConversationBot",
441
+ "MultiModalConversationBot",
314
442
  "ConversationTurn",
315
443
  ]
@@ -4,14 +4,14 @@
4
4
 
5
5
  import asyncio
6
6
  import logging
7
- from typing import Callable, Dict, List, Tuple, Union
7
+ from typing import Callable, Dict, List, Optional, Tuple, Union
8
8
 
9
9
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
10
10
  from azure.ai.evaluation.simulator._constants import SupportedLanguages
11
11
  from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
12
-
13
12
  from ..._http_utils import AsyncHttpPipeline
14
13
  from . import ConversationBot, ConversationTurn
14
+ from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
15
15
 
16
16
 
17
17
  def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
@@ -73,14 +73,14 @@ def is_closing_message_helper(response: str) -> bool:
73
73
  async def simulate_conversation(
74
74
  *,
75
75
  bots: List[ConversationBot],
76
- session: AsyncHttpPipeline,
76
+ session: Union[AsyncHttpPipeline, AIProjectClient],
77
77
  language: SupportedLanguages,
78
78
  stopping_criteria: Callable[[str], bool] = is_closing_message,
79
79
  turn_limit: int = 10,
80
80
  history_limit: int = 5,
81
81
  api_call_delay_sec: float = 0,
82
82
  logger: logging.Logger = logging.getLogger(__name__),
83
- ) -> Tuple:
83
+ ) -> Tuple[Optional[str], List[ConversationTurn]]:
84
84
  """
85
85
  Simulate a conversation between the given bots.
86
86
 
@@ -99,9 +99,10 @@ async def simulate_conversation(
99
99
  :keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
100
100
  :paramtype logger: logging.Logger
101
101
  :return: Simulation a conversation between the given bots.
102
- :rtype: Tuple
102
+ :rtype: Tuple[Optional[str], List[ConversationTurn]]
103
103
  """
104
104
 
105
+ session_state = {}
105
106
  # Read the first prompt.
106
107
  (first_response, request, _, full_response) = await bots[0].generate_response(
107
108
  session=session,
@@ -110,7 +111,7 @@ async def simulate_conversation(
110
111
  turn_number=0,
111
112
  )
112
113
  if "id" in first_response:
113
- conversation_id = first_response["id"]
114
+ conversation_id: Optional[str] = first_response["id"]
114
115
  else:
115
116
  conversation_id = None
116
117
  first_prompt = first_response["samples"][0]
@@ -150,7 +151,10 @@ async def simulate_conversation(
150
151
  conversation_history=conversation_history,
151
152
  max_history=history_limit,
152
153
  turn_number=current_turn,
154
+ session_state=session_state,
153
155
  )
156
+ if "session_state" in full_response and full_response["session_state"] is not None:
157
+ session_state.update(full_response["session_state"])
154
158
 
155
159
  # check if conversation id is null, which means conversation starter was used. use id from next turn
156
160
  if conversation_id is None and "id" in response:
@@ -12,7 +12,7 @@ OUTPUT_FILE = "openai_api_response.jsonl"
12
12
 
13
13
  # Azure endpoint constants
14
14
  AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
15
- COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
15
+ COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
16
16
  AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
17
17
  AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
18
18
  r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
@@ -0,0 +1,3 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------