azure-ai-evaluation 1.4.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (290) hide show
  1. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/CHANGELOG.md +6 -0
  2. {azure_ai_evaluation-1.4.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.5.0}/PKG-INFO +13 -2
  3. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/__init__.py +0 -16
  4. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/rai_service.py +1 -1
  5. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/utils.py +1 -1
  6. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_converters/_ai_services.py +4 -4
  7. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  8. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
  9. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
  10. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
  11. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  12. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_evaluate.py +84 -68
  13. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +95 -0
  14. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_utils.py +3 -3
  15. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +1 -1
  16. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
  17. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +1 -1
  18. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
  19. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +1 -0
  20. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
  21. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  22. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  23. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  24. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  25. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  26. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  27. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  28. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  29. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  30. azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  31. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +1 -1
  32. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
  33. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_version.py +1 -1
  34. azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team/__init__.py +19 -0
  35. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_attack_objective_generator.py +3 -0
  36. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_attack_strategy.py +3 -0
  37. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_red_team.py +96 -67
  38. azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  39. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/constants.py +2 -1
  40. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/formatting_utils.py +23 -22
  41. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/logging_utils.py +1 -1
  42. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/strategy_utils.py +8 -4
  43. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_simulator.py +1 -1
  44. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0/azure_ai_evaluation.egg-info}/PKG-INFO +13 -2
  45. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/SOURCES.txt +23 -15
  46. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/requires.txt +6 -1
  47. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/instructions.md +1 -1
  48. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/task_adherence.ipynb +1 -1
  49. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/red_team_samples.py +1 -1
  50. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/setup.py +11 -1
  51. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/conftest.py +4 -35
  52. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +3 -3
  53. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_metrics_upload.py +10 -3
  54. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_batch_run_context.py +6 -3
  55. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluate.py +1 -7
  56. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_attack_objective_generator.py +1 -1
  57. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_attack_strategy.py +1 -1
  58. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_callback_chat_target.py +2 -2
  59. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_constants.py +3 -3
  60. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_formatting_utils.py +4 -12
  61. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_red_team.py +48 -41
  62. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_red_team_result.py +1 -1
  63. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_strategy_utils.py +16 -12
  64. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -179
  65. azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
  66. azure_ai_evaluation-1.4.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
  67. azure_ai_evaluation-1.4.0/azure/ai/evaluation/simulator/_tracing.py +0 -89
  68. azure_ai_evaluation-1.4.0/tests/unittests/test_evaluate_telemetry.py +0 -168
  69. azure_ai_evaluation-1.4.0/tests/unittests/test_evaluators/apology_dag/apology.py +0 -8
  70. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/MANIFEST.in +0 -0
  71. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/NOTICE.txt +0 -0
  72. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/README.md +0 -0
  73. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/TROUBLESHOOTING.md +0 -0
  74. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/__init__.py +0 -0
  75. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/__init__.py +0 -0
  76. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
  77. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/_clients.py +0 -0
  78. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/_models.py +0 -0
  79. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
  80. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/__init__.py +0 -0
  81. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
  82. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/constants.py +0 -0
  83. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/math.py +0 -0
  84. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
  85. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
  86. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
  87. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
  88. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
  89. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
  90. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
  91. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
  92. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
  93. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
  94. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
  95. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
  96. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
  97. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
  98. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
  99. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
  100. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
  101. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
  102. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
  103. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
  104. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
  105. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
  106. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_constants.py +0 -0
  107. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_converters}/__init__.py +0 -0
  108. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_converters/_models.py +0 -0
  109. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_converters → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_evaluate}/__init__.py +0 -0
  110. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
  111. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +0 -0
  112. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
  113. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluate → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_evaluators}/__init__.py +0 -0
  114. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
  115. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
  116. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
  117. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -0
  118. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
  119. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
  120. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
  121. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
  122. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
  123. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
  124. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
  125. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
  126. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
  127. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
  128. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
  129. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
  130. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  131. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
  132. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
  133. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
  134. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
  135. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
  136. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
  137. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
  138. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
  139. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
  140. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
  141. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
  142. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
  143. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +0 -0
  144. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
  145. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
  146. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
  147. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
  148. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
  149. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
  150. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
  151. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
  152. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
  153. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
  154. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
  155. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
  156. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
  157. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
  158. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
  159. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
  160. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
  161. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
  162. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
  163. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
  164. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +0 -0
  165. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
  166. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
  167. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +0 -0
  168. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
  169. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
  170. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +0 -0
  171. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
  172. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
  173. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -0
  174. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
  175. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
  176. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_exceptions.py +0 -0
  177. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_http_utils.py +0 -0
  178. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluators → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy}/__init__.py +0 -0
  179. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
  180. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
  181. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +0 -0
  182. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
  183. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_logging.py +0 -0
  184. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
  185. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
  186. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
  187. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +0 -0
  188. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
  189. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
  190. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
  191. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
  192. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
  193. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
  194. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_prompty.py +0 -0
  195. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
  196. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
  197. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_model_configurations.py +0 -0
  198. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team/_utils → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
  199. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  200. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +0 -0
  201. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_user_agent.py +0 -0
  202. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_legacy → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
  203. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
  204. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
  205. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
  206. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
  207. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
  208. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/py.typed +0 -0
  209. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_callback_chat_target.py +0 -0
  210. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_default_converter.py +0 -0
  211. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_safety_evaluation → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team/_utils}/__init__.py +0 -0
  212. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
  213. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
  214. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +0 -0
  215. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
  216. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +0 -0
  217. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
  218. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
  219. {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.5.0/azure/ai/evaluation/simulator/_data_sources}/__init__.py +0 -0
  220. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
  221. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
  222. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
  223. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
  224. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
  225. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
  226. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
  227. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +0 -0
  228. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
  229. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +0 -0
  230. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
  231. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
  232. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
  233. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  234. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
  235. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
  236. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
  237. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
  238. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
  239. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
  240. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/migration_guide.md +0 -0
  241. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/pyproject.toml +0 -0
  242. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/README.md +0 -0
  243. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
  244. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
  245. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/response_completeness.ipynb +0 -0
  246. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
  247. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
  248. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/user_functions.py +0 -0
  249. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/data/evaluate_test_data.jsonl +0 -0
  250. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_common.py +0 -0
  251. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_evaluate.py +0 -0
  252. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_safety_evaluation.py +0 -0
  253. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_simulate.py +0 -0
  254. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_threshold.py +0 -0
  255. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/setup.cfg +0 -0
  256. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/__init__.py +0 -0
  257. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/__openai_patcher.py +0 -0
  258. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/converters/ai_agent_converter/serialization_helper.py +0 -0
  259. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +0 -0
  260. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/__init__.py +0 -0
  261. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
  262. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/target_fn.py +0 -0
  263. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_adv_simulator.py +0 -0
  264. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_builtin_evaluators.py +0 -0
  265. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_evaluate.py +0 -0
  266. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_lite_management_client.py +0 -0
  267. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_mass_evaluate.py +0 -0
  268. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_prompty_async.py +0 -0
  269. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_sim_and_eval.py +0 -0
  270. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_agent_evaluators.py +0 -0
  271. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_built_in_evaluator.py +0 -0
  272. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_completeness_evaluator.py +0 -0
  273. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
  274. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
  275. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_eval_run.py +0 -0
  276. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluate_performance.py +0 -0
  277. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
  278. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
  279. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
  280. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
  281. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
  282. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
  283. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_non_adv_simulator.py +0 -0
  284. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/__init__.py +0 -0
  285. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_safety_evaluation.py +0 -0
  286. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_save_eval.py +0 -0
  287. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_simulator.py +0 -0
  288. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
  289. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
  290. {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_utils.py +0 -0
@@ -1,5 +1,11 @@
1
1
  # Release History
2
2
 
3
+ ## 1.5.0 (2025-04-04)
4
+
5
+ ### Features Added
6
+
7
+ - New `RedTeam` agent functionality to assess the safety and resilience of AI systems against adversarial prompt attacks
8
+
3
9
  ## 1.4.0 (2025-03-27)
4
10
 
5
11
  ### Features Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -28,8 +28,13 @@ Requires-Dist: azure-identity>=1.16.0
28
28
  Requires-Dist: azure-core>=1.30.2
29
29
  Requires-Dist: nltk>=3.9.1
30
30
  Requires-Dist: azure-storage-blob>=12.10.0
31
+ Requires-Dist: httpx>=0.25.1
32
+ Requires-Dist: pandas<3.0.0,>=2.1.2
33
+ Requires-Dist: openai>=1.40.0
34
+ Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
35
+ Requires-Dist: msrest>=0.6.21
31
36
  Provides-Extra: redteam
32
- Requires-Dist: pyrit>=0.8.0; extra == "redteam"
37
+ Requires-Dist: pyrit==0.8.1; extra == "redteam"
33
38
 
34
39
  # Azure AI Evaluation client library for Python
35
40
 
@@ -376,6 +381,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
376
381
 
377
382
  # Release History
378
383
 
384
+ ## 1.5.0 (2025-04-04)
385
+
386
+ ### Features Added
387
+
388
+ - New `RedTeam` agent functionality to assess the safety and resilience of AI systems against adversarial prompt attacks
389
+
379
390
  ## 1.4.0 (2025-03-27)
380
391
 
381
392
  ### Features Added
@@ -52,22 +52,6 @@ try:
52
52
  except ImportError:
53
53
  print("[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
54
54
 
55
- # RedTeam requires a dependency on pyrit, but python 3.9 is not supported by pyrit.
56
- # So we only import it if it's available and the user has pyrit.
57
- try:
58
- from ._red_team._red_team import RedTeam
59
- from ._red_team._attack_strategy import AttackStrategy
60
- from ._red_team._attack_objective_generator import RiskCategory
61
- from ._red_team._red_team_result import RedTeamOutput
62
- _patch_all.extend([
63
- "RedTeam",
64
- "RedTeamOutput",
65
- "AttackStrategy",
66
- "RiskCategory",
67
- ])
68
- except ImportError:
69
- print("[INFO] Could not import RedTeam. Please install the dependency with `pip install azure-ai-evaluation[redteam]`.")
70
-
71
55
 
72
56
  __all__ = [
73
57
  "evaluate",
@@ -15,7 +15,7 @@ from string import Template
15
15
 
16
16
  import jwt
17
17
 
18
- from promptflow.core._errors import MissingRequiredPackage
18
+ from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
19
19
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
20
20
  from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
21
21
  from azure.ai.evaluation._model_configurations import AzureAIProject
@@ -9,7 +9,7 @@ from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cas
9
9
 
10
10
  import nltk
11
11
  from typing_extensions import NotRequired, Required, TypeGuard
12
- from promptflow.core._errors import MissingRequiredPackage
12
+ from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
13
13
  from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
14
14
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
15
15
  from azure.ai.evaluation._model_configurations import (
@@ -667,7 +667,7 @@ class AIAgentConverter:
667
667
  return evaluations
668
668
 
669
669
  @staticmethod
670
- def run_ids_from_conversation(conversation: dict) -> List[str]:
670
+ def _run_ids_from_conversation(conversation: dict) -> List[str]:
671
671
  """
672
672
  Extracts a list of unique run IDs from a conversation dictionary.
673
673
 
@@ -684,7 +684,7 @@ class AIAgentConverter:
684
684
  return run_ids
685
685
 
686
686
  @staticmethod
687
- def convert_from_conversation(
687
+ def _convert_from_conversation(
688
688
  conversation: dict, run_id: str, exclude_tool_calls_previous_runs: bool = False
689
689
  ) -> dict:
690
690
  """
@@ -765,7 +765,7 @@ class AIAgentConverter:
765
765
  return json.loads(final_result.to_json())
766
766
 
767
767
  @staticmethod
768
- def convert_from_file(filename: str, run_id: str) -> dict:
768
+ def _convert_from_file(filename: str, run_id: str) -> dict:
769
769
  """
770
770
  Converts the agent run from a JSON file to a format suitable for the OpenAI API, the JSON file being a thread.
771
771
 
@@ -801,4 +801,4 @@ class AIAgentConverter:
801
801
  with open(filename, mode="r", encoding="utf-8") as file:
802
802
  data = json.load(file)
803
803
 
804
- return AIAgentConverter.convert_from_conversation(data, run_id)
804
+ return AIAgentConverter._convert_from_conversation(data, run_id)
@@ -6,17 +6,17 @@ import json
6
6
  import logging
7
7
  import os
8
8
  from concurrent.futures import Future
9
- from pathlib import Path
10
- from typing import Any, Callable, Dict, Optional, Union, cast
9
+ from typing import Any, Callable, Dict, Optional, Sequence, Union, cast
11
10
 
12
11
  import pandas as pd
13
- from promptflow.contracts.types import AttrDict
14
- from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
12
+ from azure.ai.evaluation._legacy._adapters.types import AttrDict
13
+ from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
15
14
 
16
15
  from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
17
16
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
18
17
 
19
18
  from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
19
+ from .batch_clients import BatchClientRun
20
20
 
21
21
  LOGGER = logging.getLogger(__name__)
22
22
 
@@ -84,7 +84,7 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
84
84
  for param in inspect.signature(evaluator).parameters.values()
85
85
  if param.name not in ["args", "kwargs"]
86
86
  }
87
- for value in input_df.to_dict("records"):
87
+ for value in cast(Sequence[Dict[str, Any]], input_df.to_dict("records")):
88
88
  # Filter out only the parameters that are present in the input data
89
89
  # if no parameters then pass data as is
90
90
  filtered_values = {k: v for k, v in value.items() if k in parameters} if len(parameters) > 0 else value
@@ -133,10 +133,10 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
133
133
  def run(
134
134
  self, # pylint: disable=unused-argument
135
135
  flow: Callable,
136
- data: Union[os.PathLike, Path, pd.DataFrame],
137
- evaluator_name: Optional[str] = None,
136
+ data: Union[str, os.PathLike, pd.DataFrame],
138
137
  column_mapping: Optional[Dict[str, str]] = None,
139
- **kwargs,
138
+ evaluator_name: Optional[str] = None,
139
+ **kwargs: Any,
140
140
  ) -> CodeRun:
141
141
  input_df = data
142
142
  if not isinstance(input_df, pd.DataFrame):
@@ -157,7 +157,7 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
157
157
  evaluator=flow,
158
158
  input_df=input_df,
159
159
  column_mapping=column_mapping,
160
- evaluator_name=evaluator_name,
160
+ evaluator_name=evaluator_name or "",
161
161
  )
162
162
 
163
163
  return CodeRun(
@@ -169,11 +169,13 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
169
169
  ),
170
170
  )
171
171
 
172
- def get_details(self, run: CodeRun, all_results: bool = False) -> pd.DataFrame:
172
+ def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
173
+ run = self._get_result(client_run)
173
174
  result_df = run.get_result_df(exclude_inputs=not all_results)
174
175
  return result_df
175
176
 
176
- def get_metrics(self, run: CodeRun) -> Dict[str, Any]:
177
+ def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
178
+ run = self._get_result(client_run)
177
179
  try:
178
180
  aggregated_metrics = run.get_aggregated_metrics()
179
181
  print("Aggregated metrics")
@@ -183,6 +185,10 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
183
185
  return {}
184
186
  return aggregated_metrics
185
187
 
186
- def get_run_summary(self, run: CodeRun) -> Any: # pylint: disable=unused-argument
188
+ def get_run_summary(self, client_run: BatchClientRun) -> Any: # pylint: disable=unused-argument
187
189
  # Not implemented
188
190
  return None
191
+
192
+ @staticmethod
193
+ def _get_result(run: BatchClientRun) -> CodeRun:
194
+ return cast(CodeRun, run)
@@ -5,9 +5,9 @@ import os
5
5
  import types
6
6
  from typing import Optional, Type, Union
7
7
 
8
- from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
9
- from promptflow._utils.user_agent_utils import ClientUserAgentUtil
10
- from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
8
+ from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
9
+ from azure.ai.evaluation._legacy._adapters.utils import ClientUserAgentUtil
10
+ from azure.ai.evaluation._legacy._adapters.tracing import inject_openai_api, recover_openai_api
11
11
 
12
12
  from azure.ai.evaluation._constants import (
13
13
  OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
@@ -19,6 +19,8 @@ from azure.ai.evaluation._constants import (
19
19
 
20
20
  from ..._user_agent import USER_AGENT
21
21
  from .._utils import set_event_loop_policy
22
+ from .batch_clients import BatchClient
23
+ from ._run_submitter_client import RunSubmitterClient
22
24
  from .code_client import CodeClient
23
25
  from .proxy_client import ProxyClient
24
26
 
@@ -33,7 +35,7 @@ class EvalRunContext:
33
35
  ]
34
36
  """
35
37
 
36
- def __init__(self, client: Union[CodeClient, ProxyClient]) -> None:
38
+ def __init__(self, client: BatchClient) -> None:
37
39
  self.client = client
38
40
  self._is_batch_timeout_set_by_system = False
39
41
  self._is_otel_timeout_set_by_system = False
@@ -64,6 +66,9 @@ class EvalRunContext:
64
66
  # For addressing the issue of asyncio event loop closed on Windows
65
67
  set_event_loop_policy()
66
68
 
69
+ if isinstance(self.client, RunSubmitterClient):
70
+ set_event_loop_policy()
71
+
67
72
  def __exit__(
68
73
  self,
69
74
  exc_type: Optional[Type[BaseException]],
@@ -8,15 +8,21 @@ import inspect
8
8
  import logging
9
9
  import math
10
10
  import os
11
+ from datetime import datetime
11
12
  from collections import OrderedDict
12
13
  from concurrent.futures import Future
13
- from typing import Any, Callable, Dict, Optional, Union
14
+ from typing import Any, Callable, Dict, Optional, Union, cast
14
15
 
16
+ from azure.ai.evaluation._legacy._adapters.entities import Run
17
+ from azure.ai.evaluation._legacy._adapters._configuration import Configuration
18
+ from azure.ai.evaluation._legacy._adapters.client import PFClient
19
+ from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext
15
20
  import pandas as pd
16
- from promptflow.client import PFClient
17
- from promptflow.entities import Run
18
- from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
19
21
 
22
+ from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClientRun, HasAsyncCallable
23
+
24
+
25
+ Configuration.get_instance().set_config("trace.destination", "none")
20
26
  LOGGER = logging.getLogger(__name__)
21
27
 
22
28
 
@@ -26,46 +32,56 @@ class ProxyRun:
26
32
 
27
33
 
28
34
  class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
29
- def __init__( # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
30
- self, pf_client: PFClient
35
+ def __init__( # pylint: disable=missing-client-constructor-parameter-credential
36
+ self,
37
+ **kwargs: Any,
31
38
  ) -> None:
32
- self._pf_client = pf_client
33
- self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
39
+ self._pf_client = PFClient(**kwargs)
40
+ self._thread_pool = ThreadPoolExecutorWithContext(thread_name_prefix="evaluators_thread")
34
41
 
35
42
  def run(
36
43
  self,
37
- flow: Union[str, os.PathLike, Callable],
38
- data: Union[str, os.PathLike],
44
+ flow: Callable,
45
+ data: Union[str, os.PathLike, pd.DataFrame],
39
46
  column_mapping: Optional[Dict[str, str]] = None,
40
- **kwargs
47
+ evaluator_name: Optional[str] = None,
48
+ **kwargs: Any,
41
49
  ) -> ProxyRun:
42
- flow_to_run = flow
43
- if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and hasattr(flow, "_to_async"):
50
+ if isinstance(data, pd.DataFrame):
51
+ raise ValueError("Data cannot be a pandas DataFrame")
52
+
53
+ flow_to_run: Callable = flow
54
+ if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and isinstance(flow, HasAsyncCallable):
44
55
  flow_to_run = flow._to_async() # pylint: disable=protected-access
45
56
 
57
+ name: str = kwargs.pop("name", "")
58
+ if not name:
59
+ name = f"azure_ai_evaluation_evaluators_{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
60
+
46
61
  batch_use_async = self._should_batch_use_async(flow_to_run)
47
62
  eval_future = self._thread_pool.submit(
48
63
  self._pf_client.run,
49
64
  flow_to_run,
50
65
  data=data,
51
- column_mapping=column_mapping,
66
+ column_mapping=column_mapping, # type: ignore
52
67
  batch_use_async=batch_use_async,
53
- **kwargs
68
+ name=name,
69
+ **kwargs,
54
70
  )
55
71
  return ProxyRun(run=eval_future)
56
72
 
57
- def get_details(self, proxy_run: ProxyRun, all_results: bool = False) -> pd.DataFrame:
58
- run: Run = proxy_run.run.result()
73
+ def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
74
+ run: Run = self.get_result(client_run)
59
75
  result_df = self._pf_client.get_details(run, all_results=all_results)
60
76
  result_df.replace("(Failed)", math.nan, inplace=True)
61
77
  return result_df
62
78
 
63
- def get_metrics(self, proxy_run: ProxyRun) -> Dict[str, Any]:
64
- run: Run = proxy_run.run.result()
79
+ def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
80
+ run: Run = self.get_result(client_run)
65
81
  return self._pf_client.get_metrics(run)
66
82
 
67
- def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
68
- run = proxy_run.run.result()
83
+ def get_run_summary(self, client_run: BatchClientRun) -> Dict[str, Any]:
84
+ run: Run = self.get_result(client_run)
69
85
 
70
86
  # pylint: disable=protected-access
71
87
  completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
@@ -81,13 +97,17 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
81
97
  return OrderedDict(
82
98
  [
83
99
  ("status", status),
84
- ("duration", str(run._end_time - run._created_on)),
100
+ ("duration", str((run._end_time or run._created_on) - run._created_on)),
85
101
  ("completed_lines", completed_lines),
86
102
  ("failed_lines", failed_lines),
87
103
  ("log_path", str(run._output_path)),
88
104
  ]
89
105
  )
90
106
 
107
+ @staticmethod
108
+ def get_result(run: BatchClientRun) -> Run:
109
+ return cast(ProxyRun, run).run.result()
110
+
91
111
  @staticmethod
92
112
  def _should_batch_use_async(flow):
93
113
  if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
@@ -5,7 +5,7 @@ import os
5
5
  import types
6
6
  from typing import Optional, Type
7
7
 
8
- from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
8
+ from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
9
9
  from azure.ai.evaluation._constants import PF_DISABLE_TRACING
10
10
 
11
11
 
@@ -13,7 +13,7 @@ import uuid
13
13
  from typing import Any, Dict, List, Optional, Set, Type
14
14
  from urllib.parse import urlparse
15
15
 
16
- from promptflow._sdk.entities import Run
16
+ from azure.ai.evaluation._legacy._adapters.entities import Run
17
17
  from typing_extensions import Self
18
18
 
19
19
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException