azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show
  1. azure/ai/evaluation/__init__.py +100 -5
  2. azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
  3. azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +68 -0
  5. azure/ai/evaluation/_aoai/python_grader.py +86 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
  9. azure/ai/evaluation/_azure/__init__.py +3 -0
  10. azure/ai/evaluation/_azure/_clients.py +204 -0
  11. azure/ai/evaluation/_azure/_envs.py +207 -0
  12. azure/ai/evaluation/_azure/_models.py +227 -0
  13. azure/ai/evaluation/_azure/_token_manager.py +129 -0
  14. azure/ai/evaluation/_common/__init__.py +9 -1
  15. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
  16. azure/ai/evaluation/_common/constants.py +131 -2
  17. azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
  18. azure/ai/evaluation/_common/math.py +89 -0
  19. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  20. azure/ai/evaluation/_common/onedp/_client.py +166 -0
  21. azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
  22. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  23. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  24. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  25. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  26. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  27. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  28. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  29. azure/ai/evaluation/_common/onedp/_validation.py +66 -0
  30. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  31. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  32. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  33. azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
  34. azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
  35. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  36. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
  37. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
  38. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  39. azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
  40. azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
  41. azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
  42. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  43. azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
  44. azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
  45. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  46. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  54. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  55. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  56. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  57. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  58. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  59. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  60. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  61. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  62. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  63. azure/ai/evaluation/_common/rai_service.py +831 -142
  64. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  65. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  66. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  67. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  68. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  69. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  70. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  71. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  72. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  73. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  74. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  75. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  76. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  77. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  78. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  79. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  80. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  81. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  82. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  83. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
  84. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  85. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  86. azure/ai/evaluation/_common/utils.py +870 -34
  87. azure/ai/evaluation/_constants.py +167 -6
  88. azure/ai/evaluation/_converters/__init__.py +3 -0
  89. azure/ai/evaluation/_converters/_ai_services.py +899 -0
  90. azure/ai/evaluation/_converters/_models.py +467 -0
  91. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  92. azure/ai/evaluation/_eval_mapping.py +83 -0
  93. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
  94. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
  95. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  96. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
  97. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
  98. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
  99. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
  100. azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
  101. azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
  102. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
  103. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
  104. azure/ai/evaluation/_evaluate/_utils.py +289 -40
  105. azure/ai/evaluation/_evaluator_definition.py +76 -0
  106. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
  107. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  108. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
  109. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
  110. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
  111. azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
  112. azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
  113. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
  114. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
  115. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
  116. azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
  117. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
  118. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
  119. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
  120. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
  121. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
  122. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
  123. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
  124. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
  125. azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
  126. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
  127. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
  128. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
  129. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
  130. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
  131. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
  132. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
  133. azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
  134. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
  135. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
  136. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
  137. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
  138. azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
  139. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
  140. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
  141. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  142. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
  143. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
  144. azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
  145. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
  146. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  147. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
  148. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  149. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
  150. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
  151. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
  152. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  153. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
  154. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
  155. azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
  156. azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
  157. azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
  158. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
  159. azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
  160. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  161. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
  162. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
  163. azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
  164. azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
  165. azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
  166. azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
  167. azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
  168. azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
  169. azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
  170. azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
  171. azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
  172. azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
  173. azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
  174. azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
  175. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  176. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
  177. azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
  178. azure/ai/evaluation/_exceptions.py +51 -7
  179. azure/ai/evaluation/_http_utils.py +210 -137
  180. azure/ai/evaluation/_legacy/__init__.py +3 -0
  181. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  182. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  183. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  184. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  185. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  186. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  187. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  188. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  189. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  190. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  191. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  192. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  193. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  194. azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
  195. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
  196. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  197. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
  198. azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
  199. azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
  200. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  201. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
  202. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  203. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
  204. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
  205. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  206. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  207. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
  208. azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
  209. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
  210. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  211. azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
  212. azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
  213. azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
  214. azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
  215. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  216. azure/ai/evaluation/_model_configurations.py +130 -8
  217. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  218. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  219. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
  220. azure/ai/evaluation/_user_agent.py +32 -1
  221. azure/ai/evaluation/_vendor/__init__.py +3 -0
  222. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  223. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
  224. azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
  225. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
  226. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  227. azure/ai/evaluation/_version.py +2 -1
  228. azure/ai/evaluation/red_team/__init__.py +22 -0
  229. azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
  230. azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
  231. azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
  232. azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
  233. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
  234. azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
  235. azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
  236. azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
  237. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  238. azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
  239. azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
  240. azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
  241. azure/ai/evaluation/red_team/_red_team.py +1717 -0
  242. azure/ai/evaluation/red_team/_red_team_result.py +661 -0
  243. azure/ai/evaluation/red_team/_result_processor.py +1708 -0
  244. azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
  245. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
  246. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
  247. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
  248. azure/ai/evaluation/red_team/_utils/constants.py +72 -0
  249. azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
  250. azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
  251. azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
  252. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  253. azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
  254. azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
  255. azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
  256. azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
  257. azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
  258. azure/ai/evaluation/simulator/__init__.py +2 -1
  259. azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
  260. azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
  261. azure/ai/evaluation/simulator/_constants.py +12 -1
  262. azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
  263. azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
  264. azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
  265. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  266. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  267. azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
  268. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  269. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  270. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  271. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
  272. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  273. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
  274. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
  275. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
  276. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
  277. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
  278. azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
  279. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
  280. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  281. azure/ai/evaluation/simulator/_simulator.py +302 -208
  282. azure/ai/evaluation/simulator/_utils.py +31 -13
  283. azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
  284. azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
  285. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
  286. azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
  287. azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
  288. azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
  289. azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
  290. azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
  291. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
  292. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
  293. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
  294. azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
  295. azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
  296. azure/ai/evaluation/simulator/_tracing.py +0 -89
  297. azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
  298. azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
  299. {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,139 @@
1
+ """
2
+ Logging utilities for Red Team Agent.
3
+
4
+ This module provides consistent logging configuration and helper functions
5
+ for logging throughout the Red Team Agent.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ from datetime import datetime
11
+
12
+
13
+ def setup_logger(logger_name="RedTeamLogger", output_dir=None):
14
+ """Configure and return a logger instance for the Red Team Agent.
15
+
16
+ Creates two handlers:
17
+ - File handler: Captures all logs at DEBUG level
18
+ - Console handler: Shows WARNING and above for better visibility
19
+
20
+ :param logger_name: Name to use for the logger
21
+ :type logger_name: str
22
+ :param output_dir: Directory to store log files in. If None, logs are stored in current directory.
23
+ :type output_dir: Optional[str]
24
+ :return: The configured logger instance
25
+ :rtype: logging.Logger
26
+ """
27
+ # Format matches what's expected in test_setup_logger
28
+ log_filename = "redteam.log"
29
+
30
+ # If output directory is specified, create path with that directory
31
+ if output_dir:
32
+ os.makedirs(output_dir, exist_ok=True)
33
+ log_filepath = os.path.join(output_dir, log_filename)
34
+ else:
35
+ log_filepath = log_filename
36
+
37
+ logger = logging.getLogger(logger_name)
38
+ logger.setLevel(logging.DEBUG)
39
+
40
+ # Clear any existing handlers (in case logger was already configured)
41
+ if logger.handlers:
42
+ for handler in logger.handlers:
43
+ logger.removeHandler(handler)
44
+
45
+ # File handler - captures all logs at DEBUG level with detailed formatting
46
+ file_handler = logging.FileHandler(log_filepath)
47
+ file_handler.setLevel(logging.DEBUG)
48
+ file_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s")
49
+ file_handler.setFormatter(file_formatter)
50
+ logger.addHandler(file_handler)
51
+
52
+ # Console handler - shows only WARNING and above to reduce output but keep important messages
53
+ console_handler = logging.StreamHandler()
54
+ console_handler.setLevel(logging.WARNING)
55
+ console_formatter = logging.Formatter("%(levelname)s: %(message)s")
56
+ console_handler.setFormatter(console_formatter)
57
+ logger.addHandler(console_handler)
58
+
59
+ # Don't propagate to root logger to avoid duplicate logs
60
+ logger.propagate = False
61
+
62
+ return logger
63
+
64
+
65
+ def log_section_header(logger, section_title):
66
+ """Log a section header to improve log readability.
67
+
68
+ :param logger: The logger instance
69
+ :type logger: logging.Logger
70
+ :param section_title: The title of the section
71
+ :type section_title: str
72
+ """
73
+ logger.debug("=" * 80)
74
+ logger.debug(section_title.upper())
75
+ logger.debug("=" * 80)
76
+
77
+
78
+ def log_subsection_header(logger, section_title):
79
+ """Log a subsection header to improve log readability.
80
+
81
+ :param logger: The logger instance
82
+ :type logger: logging.Logger
83
+ :param section_title: The title of the subsection
84
+ :type section_title: str
85
+ """
86
+ logger.debug("-" * 60)
87
+ logger.debug(section_title)
88
+ logger.debug("-" * 60)
89
+
90
+
91
+ def log_strategy_start(logger, strategy_name, risk_category):
92
+ """Log the start of a strategy processing.
93
+
94
+ :param logger: The logger instance
95
+ :type logger: logging.Logger
96
+ :param strategy_name: The name of the strategy
97
+ :type strategy_name: str
98
+ :param risk_category: The risk category being processed
99
+ :type risk_category: str
100
+ """
101
+ logger.info(f"Starting processing of {strategy_name} strategy for {risk_category} risk category")
102
+
103
+
104
+ def log_strategy_completion(logger, strategy_name, risk_category, elapsed_time=None):
105
+ """Log the completion of a strategy processing.
106
+
107
+ :param logger: The logger instance
108
+ :type logger: logging.Logger
109
+ :param strategy_name: The name of the strategy
110
+ :type strategy_name: str
111
+ :param risk_category: The risk category being processed
112
+ :type risk_category: str
113
+ :param elapsed_time: The time taken to process, if available
114
+ :type elapsed_time: float
115
+ """
116
+ if elapsed_time:
117
+ logger.info(f"Completed {strategy_name} strategy for {risk_category} risk category in {elapsed_time:.2f}s")
118
+ else:
119
+ logger.info(f"Completed {strategy_name} strategy for {risk_category} risk category")
120
+
121
+
122
+ def log_error(logger, message, exception=None, context=None):
123
+ """Log an error with additional context if available.
124
+
125
+ :param logger: The logger instance
126
+ :type logger: logging.Logger
127
+ :param message: The error message
128
+ :type message: str
129
+ :param exception: The exception that was raised, if any
130
+ :type exception: Exception
131
+ :param context: Additional context about where the error occurred
132
+ :type context: str
133
+ """
134
+ error_msg = message
135
+ if context:
136
+ error_msg = f"[{context}] {error_msg}"
137
+ if exception:
138
+ error_msg = f"{error_msg}: {str(exception)}"
139
+ logger.error(error_msg, exc_info=True)
@@ -0,0 +1,73 @@
1
+ """
2
+ Mapping utilities for metrics and risk categories in Red Team Agent.
3
+ """
4
+
5
+ from typing import Union
6
+ from .._attack_objective_generator import _InternalRiskCategory, RiskCategory
7
+ from azure.ai.evaluation._constants import EvaluationMetrics, _InternalEvaluationMetrics
8
+ from azure.ai.evaluation._common.constants import Tasks, _InternalAnnotationTasks
9
+
10
+ # Mapping of risk categories to their corresponding evaluation metrics
11
+ RISK_CATEGORY_METRIC_MAP = {
12
+ RiskCategory.Violence: EvaluationMetrics.VIOLENCE,
13
+ RiskCategory.HateUnfairness: EvaluationMetrics.HATE_FAIRNESS,
14
+ RiskCategory.Sexual: EvaluationMetrics.SEXUAL,
15
+ RiskCategory.SelfHarm: EvaluationMetrics.SELF_HARM,
16
+ RiskCategory.ProtectedMaterial: EvaluationMetrics.PROTECTED_MATERIAL,
17
+ RiskCategory.UngroundedAttributes: EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
18
+ _InternalRiskCategory.ECI: _InternalEvaluationMetrics.ECI,
19
+ RiskCategory.CodeVulnerability: EvaluationMetrics.CODE_VULNERABILITY,
20
+ RiskCategory.SensitiveDataLeakage: EvaluationMetrics.SENSITIVE_DATA_LEAKAGE,
21
+ RiskCategory.TaskAdherence: EvaluationMetrics.TASK_ADHERENCE,
22
+ RiskCategory.ProhibitedActions: EvaluationMetrics.PROHIBITED_ACTIONS,
23
+ }
24
+
25
+ RISK_CATEGORY_ANNOTATION_TASK_MAP = {
26
+ RiskCategory.Violence: Tasks.CONTENT_HARM,
27
+ RiskCategory.HateUnfairness: Tasks.CONTENT_HARM,
28
+ RiskCategory.Sexual: Tasks.CONTENT_HARM,
29
+ RiskCategory.SelfHarm: Tasks.CONTENT_HARM,
30
+ RiskCategory.ProtectedMaterial: Tasks.PROTECTED_MATERIAL,
31
+ RiskCategory.UngroundedAttributes: Tasks.UNGROUNDED_ATTRIBUTES,
32
+ _InternalRiskCategory.ECI: _InternalAnnotationTasks.ECI,
33
+ RiskCategory.CodeVulnerability: Tasks.CODE_VULNERABILITY,
34
+ RiskCategory.SensitiveDataLeakage: Tasks.SENSITIVE_DATA_LEAKAGE,
35
+ RiskCategory.TaskAdherence: Tasks.TASK_ADHERENCE,
36
+ RiskCategory.ProhibitedActions: Tasks.PROHIBITED_ACTIONS,
37
+ }
38
+
39
+
40
+ def get_metric_from_risk_category(risk_category: Union[RiskCategory]) -> str:
41
+ """Get the appropriate evaluation metric for a given risk category.
42
+
43
+ :param risk_category: The risk category to map to an evaluation metric
44
+ :type risk_category: Union[RiskCategory]
45
+ :return: The corresponding evaluation metric
46
+ :rtype: str
47
+ """
48
+ return RISK_CATEGORY_METRIC_MAP.get(risk_category, EvaluationMetrics.HATE_FAIRNESS)
49
+
50
+
51
+ def get_annotation_task_from_risk_category(risk_category: Union[RiskCategory]) -> str:
52
+ """
53
+ Get the appropriate annotation task for a given risk category.
54
+ :param risk_category: The risk category to map to an annotation task
55
+ :type risk_category: Union[RiskCategory]
56
+ :return: The corresponding annotation task
57
+ :rtype: str
58
+ """
59
+ return RISK_CATEGORY_ANNOTATION_TASK_MAP.get(risk_category, Tasks.CONTENT_HARM)
60
+
61
+
62
+ def get_attack_objective_from_risk_category(risk_category: Union[RiskCategory]) -> str:
63
+ """Get the attack objective string for a given risk category.
64
+
65
+ :param risk_category: The risk category to map to an attack objective
66
+ :type risk_category: Union[RiskCategory]
67
+ :return: The corresponding attack objective string
68
+ :rtype: str
69
+ """
70
+ if risk_category == RiskCategory.UngroundedAttributes:
71
+ return "isa"
72
+ else:
73
+ return risk_category.value
@@ -0,0 +1,46 @@
1
+ """
2
+ Utility functions for handling attack objectives in Red Team Agent.
3
+ """
4
+
5
+ import uuid
6
+ from typing import Dict, Optional
7
+
8
+
9
+ def extract_risk_subtype(objective: Dict) -> Optional[str]:
10
+ """Extract risk-subtype from an objective's target_harms metadata.
11
+
12
+ Searches through the target_harms list in the objective's metadata to find
13
+ the first non-empty risk-subtype value.
14
+
15
+ :param objective: The objective dictionary containing metadata with target_harms
16
+ :type objective: Dict
17
+ :return: The risk-subtype value if found, None otherwise
18
+ :rtype: Optional[str]
19
+ """
20
+ target_harms = objective.get("metadata", {}).get("target_harms", [])
21
+ if target_harms and isinstance(target_harms, list):
22
+ for harm in target_harms:
23
+ if isinstance(harm, dict) and "risk-subtype" in harm:
24
+ subtype_value = harm.get("risk-subtype")
25
+ if subtype_value:
26
+ return subtype_value
27
+ return None
28
+
29
+
30
+ def get_objective_id(objective: Dict) -> str:
31
+ """Get a unique identifier for an objective.
32
+
33
+ Uses the objective's 'id' field if available. If not present, generates
34
+ a UUID-based identifier to ensure uniqueness. This avoids using Python's
35
+ id() which returns memory addresses that can be reused after garbage collection.
36
+
37
+ :param objective: The objective dictionary
38
+ :type objective: Dict
39
+ :return: A unique identifier for the objective
40
+ :rtype: str
41
+ """
42
+ obj_id = objective.get("id")
43
+ if obj_id is not None:
44
+ return str(obj_id)
45
+ # Generate a random UUID-based identifier if no 'id' field exists
46
+ return f"generated-{uuid.uuid4()}"
@@ -0,0 +1,252 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ """
5
+ Progress and status management utilities for Red Team Agent.
6
+
7
+ This module provides centralized progress tracking, task status management,
8
+ and user feedback utilities for red team operations.
9
+ """
10
+
11
+ import asyncio
12
+ import time
13
+ from datetime import datetime
14
+ from typing import Dict, Optional, Any
15
+ from tqdm import tqdm
16
+
17
+ from .constants import TASK_STATUS
18
+
19
+
20
+ class ProgressManager:
21
+ """Centralized progress and status tracking for Red Team operations."""
22
+
23
+ def __init__(
24
+ self, total_tasks: int = 0, logger=None, show_progress_bar: bool = True, progress_desc: str = "Processing"
25
+ ):
26
+ """Initialize progress manager.
27
+
28
+ :param total_tasks: Total number of tasks to track
29
+ :param logger: Logger instance for progress messages
30
+ :param show_progress_bar: Whether to show a progress bar
31
+ :param progress_desc: Description for the progress bar
32
+ """
33
+ self.total_tasks = total_tasks
34
+ self.completed_tasks = 0
35
+ self.failed_tasks = 0
36
+ self.timeout_tasks = 0
37
+ self.logger = logger
38
+ self.show_progress_bar = show_progress_bar
39
+ self.progress_desc = progress_desc
40
+
41
+ # Task status tracking
42
+ self.task_statuses: Dict[str, str] = {}
43
+
44
+ # Timing
45
+ self.start_time: Optional[float] = None
46
+ self.end_time: Optional[float] = None
47
+
48
+ # Progress bar
49
+ self.progress_bar: Optional[tqdm] = None
50
+ self.progress_lock = asyncio.Lock()
51
+
52
+ def start(self) -> None:
53
+ """Start progress tracking."""
54
+ self.start_time = time.time()
55
+
56
+ if self.show_progress_bar and self.total_tasks > 0:
57
+ self.progress_bar = tqdm(
58
+ total=self.total_tasks,
59
+ desc=f"{self.progress_desc}: ",
60
+ ncols=100,
61
+ unit="task",
62
+ bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]",
63
+ )
64
+ self.progress_bar.set_postfix({"current": "initializing"})
65
+
66
+ def stop(self) -> None:
67
+ """Stop progress tracking and cleanup."""
68
+ self.end_time = time.time()
69
+
70
+ if self.progress_bar:
71
+ self.progress_bar.close()
72
+ self.progress_bar = None
73
+
74
+ async def update_task_status(self, task_key: str, status: str, details: Optional[str] = None) -> None:
75
+ """Update the status of a specific task.
76
+
77
+ :param task_key: Unique identifier for the task
78
+ :param status: New status for the task
79
+ :param details: Optional details about the status change
80
+ """
81
+ old_status = self.task_statuses.get(task_key)
82
+ self.task_statuses[task_key] = status
83
+
84
+ # Update counters based on status change
85
+ if old_status != status:
86
+ if status == TASK_STATUS["COMPLETED"]:
87
+ self.completed_tasks += 1
88
+ await self._update_progress_bar()
89
+ elif status == TASK_STATUS["FAILED"]:
90
+ self.failed_tasks += 1
91
+ await self._update_progress_bar()
92
+ elif status == TASK_STATUS["TIMEOUT"]:
93
+ self.timeout_tasks += 1
94
+ await self._update_progress_bar()
95
+
96
+ # Log status change
97
+ if self.logger and details:
98
+ self.logger.debug(f"Task {task_key}: {old_status} -> {status} ({details})")
99
+
100
+ async def _update_progress_bar(self) -> None:
101
+ """Update the progress bar display."""
102
+ if not self.progress_bar:
103
+ return
104
+
105
+ async with self.progress_lock:
106
+ self.progress_bar.update(1)
107
+
108
+ completion_pct = (self.completed_tasks / self.total_tasks) * 100 if self.total_tasks > 0 else 0
109
+
110
+ # Calculate time estimates
111
+ if self.start_time:
112
+ elapsed_time = time.time() - self.start_time
113
+ if self.completed_tasks > 0:
114
+ avg_time_per_task = elapsed_time / self.completed_tasks
115
+ remaining_tasks = self.total_tasks - self.completed_tasks - self.failed_tasks - self.timeout_tasks
116
+ est_remaining_time = avg_time_per_task * remaining_tasks if remaining_tasks > 0 else 0
117
+
118
+ postfix = {
119
+ "completed": f"{completion_pct:.1f}%",
120
+ "failed": self.failed_tasks,
121
+ "timeout": self.timeout_tasks,
122
+ }
123
+
124
+ if est_remaining_time > 0:
125
+ postfix["eta"] = f"{est_remaining_time/60:.1f}m"
126
+
127
+ self.progress_bar.set_postfix(postfix)
128
+
129
+ def write_progress_message(self, message: str) -> None:
130
+ """Write a message that respects the progress bar.
131
+
132
+ :param message: Message to display
133
+ """
134
+ if self.progress_bar:
135
+ tqdm.write(message)
136
+ else:
137
+ print(message)
138
+
139
+ def log_task_completion(
140
+ self, task_name: str, duration: float, success: bool = True, details: Optional[str] = None
141
+ ) -> None:
142
+ """Log the completion of a task.
143
+
144
+ :param task_name: Name of the completed task
145
+ :param duration: Duration in seconds
146
+ :param success: Whether the task completed successfully
147
+ :param details: Optional additional details
148
+ """
149
+ status_icon = "✅" if success else "❌"
150
+ message = f"{status_icon} {task_name} completed in {duration:.1f}s"
151
+
152
+ if details:
153
+ message += f" - {details}"
154
+
155
+ self.write_progress_message(message)
156
+
157
+ if self.logger:
158
+ log_level = "info" if success else "warning"
159
+ getattr(self.logger, log_level)(message)
160
+
161
+ def log_task_timeout(self, task_name: str, timeout_duration: float) -> None:
162
+ """Log a task timeout.
163
+
164
+ :param task_name: Name of the timed out task
165
+ :param timeout_duration: Timeout duration in seconds
166
+ """
167
+ message = f"⚠️ TIMEOUT: {task_name} after {timeout_duration}s"
168
+ self.write_progress_message(message)
169
+
170
+ if self.logger:
171
+ self.logger.warning(message)
172
+
173
+ def log_task_error(self, task_name: str, error: Exception) -> None:
174
+ """Log a task error.
175
+
176
+ :param task_name: Name of the failed task
177
+ :param error: The exception that occurred
178
+ """
179
+ message = f"❌ ERROR: {task_name} - {error.__class__.__name__}: {str(error)}"
180
+ self.write_progress_message(message)
181
+
182
+ if self.logger:
183
+ self.logger.error(message)
184
+
185
+ def get_summary(self) -> Dict[str, Any]:
186
+ """Get a summary of progress and statistics.
187
+
188
+ :return: Dictionary containing progress summary
189
+ """
190
+ total_time = None
191
+ if self.start_time:
192
+ end_time = self.end_time or time.time()
193
+ total_time = end_time - self.start_time
194
+
195
+ return {
196
+ "total_tasks": self.total_tasks,
197
+ "completed_tasks": self.completed_tasks,
198
+ "failed_tasks": self.failed_tasks,
199
+ "timeout_tasks": self.timeout_tasks,
200
+ "success_rate": (self.completed_tasks / self.total_tasks) * 100 if self.total_tasks > 0 else 0,
201
+ "total_time_seconds": total_time,
202
+ "average_time_per_task": (
203
+ total_time / self.completed_tasks if total_time and self.completed_tasks > 0 else None
204
+ ),
205
+ "task_statuses": self.task_statuses.copy(),
206
+ }
207
+
208
+ def print_summary(self) -> None:
209
+ """Print a formatted summary of the progress."""
210
+ summary = self.get_summary()
211
+
212
+ self.write_progress_message("\n" + "=" * 60)
213
+ self.write_progress_message("EXECUTION SUMMARY")
214
+ self.write_progress_message("=" * 60)
215
+ self.write_progress_message(f"Total Tasks: {summary['total_tasks']}")
216
+ self.write_progress_message(f"Completed: {summary['completed_tasks']}")
217
+ self.write_progress_message(f"Failed: {summary['failed_tasks']}")
218
+ self.write_progress_message(f"Timeouts: {summary['timeout_tasks']}")
219
+ self.write_progress_message(f"Success Rate: {summary['success_rate']:.1f}%")
220
+
221
+ if summary["total_time_seconds"]:
222
+ self.write_progress_message(f"Total Time: {summary['total_time_seconds']:.1f}s")
223
+
224
+ if summary["average_time_per_task"]:
225
+ self.write_progress_message(f"Avg Time/Task: {summary['average_time_per_task']:.1f}s")
226
+
227
+ self.write_progress_message("=" * 60)
228
+
229
+ def __enter__(self):
230
+ """Context manager entry."""
231
+ self.start()
232
+ return self
233
+
234
+ def __exit__(self, exc_type, exc_val, exc_tb):
235
+ """Context manager exit."""
236
+ self.stop()
237
+
238
+
239
+ def create_progress_manager(
240
+ total_tasks: int = 0, logger=None, show_progress_bar: bool = True, progress_desc: str = "Processing"
241
+ ) -> ProgressManager:
242
+ """Create a ProgressManager instance.
243
+
244
+ :param total_tasks: Total number of tasks to track
245
+ :param logger: Logger instance
246
+ :param show_progress_bar: Whether to show progress bar
247
+ :param progress_desc: Description for progress bar
248
+ :return: Configured ProgressManager
249
+ """
250
+ return ProgressManager(
251
+ total_tasks=total_tasks, logger=logger, show_progress_bar=show_progress_bar, progress_desc=progress_desc
252
+ )