azure-ai-evaluation 0.0.0b0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. azure/ai/evaluation/__init__.py +82 -0
  2. azure/ai/evaluation/_common/__init__.py +16 -0
  3. azure/ai/evaluation/_common/_experimental.py +172 -0
  4. azure/ai/evaluation/_common/constants.py +72 -0
  5. azure/ai/evaluation/_common/math.py +89 -0
  6. azure/ai/evaluation/_common/rai_service.py +632 -0
  7. azure/ai/evaluation/_common/utils.py +445 -0
  8. azure/ai/evaluation/_constants.py +72 -0
  9. azure/ai/evaluation/_evaluate/__init__.py +3 -0
  10. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +9 -0
  11. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +188 -0
  12. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +89 -0
  13. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +99 -0
  14. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
  15. azure/ai/evaluation/_evaluate/_eval_run.py +571 -0
  16. azure/ai/evaluation/_evaluate/_evaluate.py +850 -0
  17. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +179 -0
  18. azure/ai/evaluation/_evaluate/_utils.py +298 -0
  19. azure/ai/evaluation/_evaluators/__init__.py +3 -0
  20. azure/ai/evaluation/_evaluators/_bleu/__init__.py +9 -0
  21. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +72 -0
  22. azure/ai/evaluation/_evaluators/_coherence/__init__.py +7 -0
  23. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +107 -0
  24. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +99 -0
  25. azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
  26. azure/ai/evaluation/_evaluators/_common/_base_eval.py +344 -0
  27. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +88 -0
  28. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +133 -0
  29. azure/ai/evaluation/_evaluators/_content_safety/__init__.py +17 -0
  30. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -0
  31. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +129 -0
  32. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -0
  33. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +125 -0
  34. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +126 -0
  35. azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
  36. azure/ai/evaluation/_evaluators/_eci/_eci.py +89 -0
  37. azure/ai/evaluation/_evaluators/_f1_score/__init__.py +9 -0
  38. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +157 -0
  39. azure/ai/evaluation/_evaluators/_fluency/__init__.py +9 -0
  40. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +104 -0
  41. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +86 -0
  42. azure/ai/evaluation/_evaluators/_gleu/__init__.py +9 -0
  43. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +69 -0
  44. azure/ai/evaluation/_evaluators/_groundedness/__init__.py +9 -0
  45. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +144 -0
  46. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  47. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  48. azure/ai/evaluation/_evaluators/_meteor/__init__.py +9 -0
  49. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +90 -0
  50. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  51. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
  52. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
  53. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
  54. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
  55. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
  56. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
  57. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
  58. azure/ai/evaluation/_evaluators/_protected_material/__init__.py +5 -0
  59. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +113 -0
  60. azure/ai/evaluation/_evaluators/_qa/__init__.py +9 -0
  61. azure/ai/evaluation/_evaluators/_qa/_qa.py +93 -0
  62. azure/ai/evaluation/_evaluators/_relevance/__init__.py +9 -0
  63. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +114 -0
  64. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +100 -0
  65. azure/ai/evaluation/_evaluators/_retrieval/__init__.py +9 -0
  66. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +112 -0
  67. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
  68. azure/ai/evaluation/_evaluators/_rouge/__init__.py +10 -0
  69. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +98 -0
  70. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
  72. azure/ai/evaluation/_evaluators/_similarity/__init__.py +9 -0
  73. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +140 -0
  74. azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +66 -0
  75. azure/ai/evaluation/_evaluators/_xpia/__init__.py +5 -0
  76. azure/ai/evaluation/_evaluators/_xpia/xpia.py +125 -0
  77. azure/ai/evaluation/_exceptions.py +128 -0
  78. azure/ai/evaluation/_http_utils.py +466 -0
  79. azure/ai/evaluation/_model_configurations.py +123 -0
  80. azure/ai/evaluation/_user_agent.py +6 -0
  81. azure/ai/evaluation/_vendor/__init__.py +3 -0
  82. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  83. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  84. azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  85. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  86. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  87. azure/ai/evaluation/_version.py +5 -0
  88. azure/ai/evaluation/py.typed +0 -0
  89. azure/ai/evaluation/simulator/__init__.py +16 -0
  90. azure/ai/evaluation/simulator/_adversarial_scenario.py +46 -0
  91. azure/ai/evaluation/simulator/_adversarial_simulator.py +471 -0
  92. azure/ai/evaluation/simulator/_constants.py +27 -0
  93. azure/ai/evaluation/simulator/_conversation/__init__.py +316 -0
  94. azure/ai/evaluation/simulator/_conversation/_conversation.py +178 -0
  95. azure/ai/evaluation/simulator/_conversation/constants.py +30 -0
  96. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  97. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  98. azure/ai/evaluation/simulator/_direct_attack_simulator.py +218 -0
  99. azure/ai/evaluation/simulator/_helpers/__init__.py +4 -0
  100. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +17 -0
  101. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +96 -0
  102. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +220 -0
  103. azure/ai/evaluation/simulator/_model_tools/__init__.py +23 -0
  104. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +195 -0
  105. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +244 -0
  106. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +168 -0
  107. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +201 -0
  108. azure/ai/evaluation/simulator/_model_tools/models.py +614 -0
  109. azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
  110. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +65 -0
  111. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +37 -0
  112. azure/ai/evaluation/simulator/_simulator.py +716 -0
  113. azure/ai/evaluation/simulator/_tracing.py +89 -0
  114. azure/ai/evaluation/simulator/_utils.py +132 -0
  115. azure_ai_evaluation-1.0.0.dist-info/METADATA +595 -0
  116. azure_ai_evaluation-1.0.0.dist-info/NOTICE.txt +70 -0
  117. azure_ai_evaluation-1.0.0.dist-info/RECORD +119 -0
  118. {azure_ai_evaluation-0.0.0b0.dist-info → azure_ai_evaluation-1.0.0.dist-info}/WHEEL +1 -1
  119. azure_ai_evaluation-1.0.0.dist-info/top_level.txt +1 -0
  120. azure_ai_evaluation-0.0.0b0.dist-info/METADATA +0 -7
  121. azure_ai_evaluation-0.0.0b0.dist-info/RECORD +0 -4
  122. azure_ai_evaluation-0.0.0b0.dist-info/top_level.txt +0 -1
@@ -0,0 +1,201 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Dict, List, Optional, TypedDict, cast
6
+
7
+ from typing_extensions import NotRequired
8
+
9
+ from azure.ai.evaluation._model_configurations import AzureAIProject
10
+
11
+ from ._rai_client import RAIClient
12
+
13
+ CONTENT_HARM_TEMPLATES_COLLECTION_KEY = {
14
+ "adv_qa",
15
+ "adv_conversation",
16
+ "adv_summarization",
17
+ "adv_search",
18
+ "adv_rewrite",
19
+ "adv_content_gen_ungrounded",
20
+ "adv_content_gen_grounded",
21
+ "adv_content_protected_material",
22
+ "adv_politics",
23
+ }
24
+
25
+
26
+ class TemplateParameters(TypedDict):
27
+ """Parameters used in Templates
28
+
29
+ .. note::
30
+
31
+ This type is good enough to type check, but is incorrect. It's meant to represent a dictionary with a known
32
+ `metadata` key (Dict[str, str]), a known `ch_template_placeholder` key (str), and an unknown number of keys
33
+ that map to `str` values.
34
+
35
+ In typescript, this type would be spelled:
36
+
37
+ .. code-block:: typescript
38
+
39
+ type AdversarialTemplateParameters = {
40
+ [key: string]: string
41
+ ch_template_placeholder: string
42
+ metadata: {[index: string]: string} # Doesn't typecheck but gets the point across
43
+ }
44
+
45
+ At time of writing, this isn't possible to express with a TypedDict. TypedDicts must be "closed" in that
46
+ they fully specify all the keys they can contain.
47
+
48
+ `PEP 728 – TypedDict with Typed Extra Items <https://peps.python.org/pep-0728/>` is a proposal to support
49
+ this, but would only be available in Python 3.13 at the earliest.
50
+ """
51
+
52
+ metadata: Dict[str, str]
53
+ conversation_starter: str
54
+ ch_template_placeholder: str
55
+ group_of_people: NotRequired[str]
56
+ category: NotRequired[str]
57
+ target_population: NotRequired[str]
58
+ topic: NotRequired[str]
59
+
60
+
61
+ class _CategorizedParameter(TypedDict):
62
+ parameters: List[TemplateParameters]
63
+ category: str
64
+ parameters_key: str
65
+
66
+
67
+ class ContentHarmTemplatesUtils:
68
+ """Content harm templates utility functions."""
69
+
70
+ @staticmethod
71
+ def get_template_category(key: str) -> str:
72
+ """Parse category from template key
73
+
74
+ :param key: The template key
75
+ :type key: str
76
+ :return: The category
77
+ :rtype: str
78
+ """
79
+ # Check for datasets whose names do not align with the normal
80
+ # naming convention where the first segment of the name is the category.
81
+ if key == "conversation/public/ip/bing_ip.json":
82
+ return "content_protected_material"
83
+ return key.split("/")[0]
84
+
85
+ @staticmethod
86
+ def get_template_key(key: str) -> str:
87
+ """Given a template dataset name (which looks like a .json file name) convert it into
88
+ the corresponding template key (which looks like a .md file name). This allows us to
89
+ properly link datasets to the LLM that must be used to simulate them.
90
+
91
+ :param key: The dataset key.
92
+ :type key: str
93
+ :return: The template key.
94
+ :rtype: str
95
+ """
96
+ filepath = key.rsplit(".json")[0]
97
+ parts = str(filepath).split("/")
98
+ filename = ContentHarmTemplatesUtils.json_name_to_md_name(parts[-1])
99
+ prefix = parts[:-1]
100
+ prefix.append(filename)
101
+
102
+ return "/".join(prefix)
103
+
104
+ @staticmethod
105
+ def json_name_to_md_name(name) -> str:
106
+ """Convert JSON filename to Markdown filename
107
+
108
+ :param name: The JSON filename
109
+ :type name: str
110
+ :return: The Markdown filename
111
+ :rtype: str
112
+ """
113
+ result = name.replace("_aml", "")
114
+
115
+ return result + ".md"
116
+
117
+
118
+ class AdversarialTemplate:
119
+ """Template for adversarial scenarios.
120
+
121
+ :param template_name: The name of the template.
122
+ :type template_name: str
123
+ :param text: The template text.
124
+ :type text: str
125
+ :param context_key: The context key.
126
+ :param template_parameters: The template parameters.
127
+ """
128
+
129
+ def __init__(
130
+ self,
131
+ template_name: str,
132
+ text: Optional[str],
133
+ context_key: List,
134
+ template_parameters: Optional[List[TemplateParameters]] = None,
135
+ ) -> None:
136
+ self.text = text
137
+ self.context_key = context_key
138
+ self.template_name = template_name
139
+ self.template_parameters = template_parameters or []
140
+
141
+ def __str__(self) -> str:
142
+ return "{{ch_template_placeholder}}"
143
+
144
+
145
+ class AdversarialTemplateHandler:
146
+ """
147
+ Adversarial template handler constructor.
148
+
149
+ :param azure_ai_project: The Azure AI project.
150
+ :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
151
+ :param rai_client: The RAI client.
152
+ :type rai_client: ~azure.ai.evaluation.simulator._model_tools.RAIClient
153
+ """
154
+
155
+ def __init__(self, azure_ai_project: AzureAIProject, rai_client: RAIClient) -> None:
156
+ self.azure_ai_project = azure_ai_project
157
+ self.categorized_ch_parameters: Optional[Dict[str, _CategorizedParameter]] = None
158
+ self.rai_client = rai_client
159
+
160
+ async def _get_content_harm_template_collections(self, collection_key: str) -> List[AdversarialTemplate]:
161
+ if self.categorized_ch_parameters is None:
162
+ categorized_parameters: Dict[str, _CategorizedParameter] = {}
163
+ util = ContentHarmTemplatesUtils
164
+
165
+ parameters = await self.rai_client.get_contentharm_parameters()
166
+
167
+ for k in parameters.keys():
168
+ template_key = util.get_template_key(k)
169
+ categorized_parameters[template_key] = {
170
+ "parameters": cast(List[TemplateParameters], parameters[k]),
171
+ "category": util.get_template_category(k),
172
+ "parameters_key": k,
173
+ }
174
+ self.categorized_ch_parameters = categorized_parameters
175
+
176
+ template_category = collection_key.split("adv_")[-1]
177
+
178
+ plist = self.categorized_ch_parameters
179
+ ch_templates = []
180
+ for key, value in plist.items():
181
+ if value["category"] == template_category:
182
+ params = value["parameters"]
183
+ for p in params:
184
+ p.update({"ch_template_placeholder": "{{ch_template_placeholder}}"})
185
+
186
+ template = AdversarialTemplate(template_name=key, text=None, context_key=[], template_parameters=params)
187
+
188
+ ch_templates.append(template)
189
+ return ch_templates
190
+
191
+ def get_template(self, template_name: str) -> Optional[AdversarialTemplate]:
192
+ """Generate content harm template.
193
+
194
+ :param template_name: The name of the template.
195
+ :type template_name: str
196
+ :return: The generated content harm template.
197
+ :rtype: Optional[~azure.ai.evaluation.simulator._model_tools.AdversarialTemplate]
198
+ """
199
+ if template_name in CONTENT_HARM_TEMPLATES_COLLECTION_KEY:
200
+ return AdversarialTemplate(template_name=template_name, text=None, context_key=[], template_parameters=None)
201
+ return None