azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. azure/ai/evaluation/__init__.py +22 -0
  2. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +4 -0
  3. azure/ai/evaluation/_common/constants.py +5 -0
  4. azure/ai/evaluation/_common/math.py +73 -2
  5. azure/ai/evaluation/_common/rai_service.py +250 -62
  6. azure/ai/evaluation/_common/utils.py +196 -23
  7. azure/ai/evaluation/_constants.py +7 -6
  8. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
  9. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +13 -4
  10. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +19 -6
  11. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
  12. azure/ai/evaluation/_evaluate/_eval_run.py +55 -14
  13. azure/ai/evaluation/_evaluate/_evaluate.py +312 -228
  14. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +7 -6
  15. azure/ai/evaluation/_evaluate/_utils.py +46 -11
  16. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +17 -18
  17. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +67 -31
  18. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
  19. azure/ai/evaluation/_evaluators/_common/_base_eval.py +37 -24
  20. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +21 -9
  21. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +52 -16
  22. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +91 -48
  23. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +100 -26
  24. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +94 -26
  25. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +96 -26
  26. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +97 -26
  27. azure/ai/evaluation/_evaluators/_eci/_eci.py +31 -4
  28. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -13
  29. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +67 -36
  30. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
  31. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +14 -16
  32. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +106 -34
  33. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  34. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  35. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +20 -27
  36. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  37. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
  38. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
  39. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
  40. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
  41. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
  42. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
  43. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
  44. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +87 -31
  45. azure/ai/evaluation/_evaluators/_qa/_qa.py +23 -31
  46. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +72 -36
  47. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
  48. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +83 -125
  49. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
  50. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +26 -27
  51. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  52. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
  53. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +37 -28
  54. azure/ai/evaluation/_evaluators/_xpia/xpia.py +94 -33
  55. azure/ai/evaluation/_exceptions.py +19 -0
  56. azure/ai/evaluation/_model_configurations.py +83 -15
  57. azure/ai/evaluation/_version.py +1 -1
  58. azure/ai/evaluation/simulator/__init__.py +2 -1
  59. azure/ai/evaluation/simulator/_adversarial_scenario.py +20 -1
  60. azure/ai/evaluation/simulator/_adversarial_simulator.py +29 -35
  61. azure/ai/evaluation/simulator/_constants.py +11 -1
  62. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  63. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  64. azure/ai/evaluation/simulator/_direct_attack_simulator.py +17 -9
  65. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  66. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +22 -1
  67. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +90 -35
  68. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +4 -2
  69. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +8 -4
  70. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -4
  71. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -1
  72. azure/ai/evaluation/simulator/_simulator.py +165 -105
  73. azure/ai/evaluation/simulator/_utils.py +31 -13
  74. azure_ai_evaluation-1.0.1.dist-info/METADATA +600 -0
  75. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/NOTICE.txt +20 -0
  76. azure_ai_evaluation-1.0.1.dist-info/RECORD +119 -0
  77. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/WHEEL +1 -1
  78. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -322
  79. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
  80. azure_ai_evaluation-1.0.0b4.dist-info/METADATA +0 -535
  81. azure_ai_evaluation-1.0.0b4.dist-info/RECORD +0 -106
  82. /azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +0 -0
  83. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,535 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: azure-ai-evaluation
3
- Version: 1.0.0b4
4
- Summary: Microsoft Azure Evaluation Library for Python
5
- Home-page: https://github.com/Azure/azure-sdk-for-python
6
- Author: Microsoft Corporation
7
- Author-email: azuresdkengsysadmins@microsoft.com
8
- License: MIT License
9
- Project-URL: Bug Reports, https://github.com/Azure/azure-sdk-for-python/issues
10
- Project-URL: Source, https://github.com/Azure/azure-sdk-for-python
11
- Keywords: azure,azure sdk
12
- Classifier: Development Status :: 4 - Beta
13
- Classifier: Programming Language :: Python
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3 :: Only
16
- Classifier: Programming Language :: Python :: 3.8
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: License :: OSI Approved :: MIT License
21
- Classifier: Operating System :: OS Independent
22
- Requires-Python: >=3.8
23
- Description-Content-Type: text/markdown
24
- License-File: NOTICE.txt
25
- Requires-Dist: promptflow-devkit >=1.15.0
26
- Requires-Dist: promptflow-core >=1.15.0
27
- Requires-Dist: pyjwt >=2.8.0
28
- Requires-Dist: azure-identity >=1.16.0
29
- Requires-Dist: azure-core >=1.30.2
30
- Requires-Dist: nltk >=3.9.1
31
- Provides-Extra: remote
32
- Requires-Dist: promptflow-azure <2.0.0,>=1.15.0 ; extra == 'remote'
33
-
34
- # Azure AI Evaluation client library for Python
35
-
36
- We are excited to introduce the public preview of the Azure AI Evaluation SDK.
37
-
38
- [Source code][source_code]
39
- | [Package (PyPI)][evaluation_pypi]
40
- | [API reference documentation][evaluation_ref_docs]
41
- | [Product documentation][product_documentation]
42
- | [Samples][evaluation_samples]
43
-
44
- This package has been tested with Python 3.8, 3.9, 3.10, 3.11, and 3.12.
45
-
46
- For a more complete set of Azure libraries, see https://aka.ms/azsdk/python/all
47
-
48
- ## Getting started
49
-
50
- ### Prerequisites
51
-
52
- - Python 3.8 or later is required to use this package.
53
-
54
- ### Install the package
55
-
56
- Install the Azure AI Evaluation library for Python with [pip][pip_link]::
57
-
58
- ```bash
59
- pip install azure-ai-evaluation
60
- ```
61
-
62
- ## Key concepts
63
-
64
- Evaluators are custom or prebuilt classes or functions that are designed to measure the quality of the outputs from language models.
65
-
66
- ## Examples
67
-
68
- ### Evaluators
69
-
70
- Users can create evaluator runs on the local machine as shown in the example below:
71
-
72
- ```python
73
- import os
74
- from pprint import pprint
75
-
76
- from azure.ai.evaluation import evaluate, RelevanceEvaluator, ViolenceEvaluator
77
-
78
-
79
- def response_length(response, **kwargs):
80
- return {"value": len(response)}
81
-
82
-
83
- if __name__ == "__main__":
84
- # Built-in evaluators
85
- # Initialize Azure OpenAI Model Configuration
86
- model_config = {
87
- "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
88
- "api_key": os.environ.get("AZURE_OPENAI_KEY"),
89
- "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
90
- }
91
-
92
- # Initialzing Relevance Evaluator
93
- relevance_eval = RelevanceEvaluator(model_config)
94
-
95
- # Running Relevance Evaluator on single input row
96
- relevance_score = relevance_eval(
97
- response="The Alpine Explorer Tent is the most waterproof.",
98
- context="From the our product list,"
99
- " the alpine explorer tent is the most waterproof."
100
- " The Adventure Dining Table has higher weight.",
101
- query="Which tent is the most waterproof?",
102
- )
103
-
104
- pprint(relevance_score)
105
- # {'gpt_relevance': 5.0}
106
-
107
- # Content Safety Evaluator
108
-
109
- # Initialize Project Scope
110
- azure_ai_project = {
111
- "subscription_id": <subscription_id>,
112
- "resource_group_name": <resource_group_name>,
113
- "project_name": <project_name>
114
- }
115
-
116
- violence_eval = ViolenceEvaluator(azure_ai_project)
117
- violence_score = violence_eval(query="What is the capital of France?", response="Paris.")
118
- pprint(violence_score)
119
- # {'violence': 'Very low',
120
- # 'violence_reason': "The system's response is a straightforward factual response "
121
- # 'to a geography question. There is no violent content or '
122
- # 'language present.',
123
- # 'violence_score': 0}
124
-
125
- # Code based evaluator
126
- response_length("The Alpine Explorer Tent is the most waterproof.")
127
- # {'value': 48}
128
-
129
- # Using multiple evaluators together using `Evaluate` API
130
-
131
- result = evaluate(
132
- data="evaluate_test_data.jsonl",
133
- evaluators={
134
- "response_length": response_length,
135
- "violence": violence_eval,
136
- },
137
- )
138
-
139
- pprint(result)
140
- ```
141
- ### Simulator
142
-
143
-
144
- Simulators allow users to generate synthentic data using their application. Simulator expects the user to have a callback method that invokes
145
- their AI application.
146
-
147
- #### Simulating with a Prompty
148
-
149
- ```yaml
150
- ---
151
- name: ApplicationPrompty
152
- description: Simulates an application
153
- model:
154
- api: chat
155
- parameters:
156
- temperature: 0.0
157
- top_p: 1.0
158
- presence_penalty: 0
159
- frequency_penalty: 0
160
- response_format:
161
- type: text
162
-
163
- inputs:
164
- conversation_history:
165
- type: dict
166
-
167
- ---
168
- system:
169
- You are a helpful assistant and you're helping with the user's query. Keep the conversation engaging and interesting.
170
-
171
- Output with a string that continues the conversation, responding to the latest message from the user, given the conversation history:
172
- {{ conversation_history }}
173
-
174
- ```
175
- Application code:
176
-
177
- ```python
178
- import json
179
- import asyncio
180
- from typing import Any, Dict, List, Optional
181
- from azure.ai.evaluation.simulator import Simulator
182
- from promptflow.client import load_flow
183
- import os
184
- import wikipedia
185
-
186
- # Set up the model configuration without api_key, using DefaultAzureCredential
187
- model_config = {
188
- "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
189
- "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
190
- # not providing key would make the SDK pick up `DefaultAzureCredential`
191
- # use "api_key": "<your API key>"
192
- }
193
-
194
- # Use Wikipedia to get some text for the simulation
195
- wiki_search_term = "Leonardo da Vinci"
196
- wiki_title = wikipedia.search(wiki_search_term)[0]
197
- wiki_page = wikipedia.page(wiki_title)
198
- text = wiki_page.summary[:1000]
199
-
200
- def method_to_invoke_application_prompty(query: str, messages_list: List[Dict], context: Optional[Dict]):
201
- try:
202
- current_dir = os.path.dirname(__file__)
203
- prompty_path = os.path.join(current_dir, "application.prompty")
204
- _flow = load_flow(
205
- source=prompty_path,
206
- model=model_config,
207
- credential=DefaultAzureCredential()
208
- )
209
- response = _flow(
210
- query=query,
211
- context=context,
212
- conversation_history=messages_list
213
- )
214
- return response
215
- except Exception as e:
216
- print(f"Something went wrong invoking the prompty: {e}")
217
- return "something went wrong"
218
-
219
- async def callback(
220
- messages: Dict[str, List[Dict]],
221
- stream: bool = False,
222
- session_state: Any = None, # noqa: ANN401
223
- context: Optional[Dict[str, Any]] = None,
224
- ) -> dict:
225
- messages_list = messages["messages"]
226
- # Get the last message from the user
227
- latest_message = messages_list[-1]
228
- query = latest_message["content"]
229
- # Call your endpoint or AI application here
230
- response = method_to_invoke_application_prompty(query, messages_list, context)
231
- # Format the response to follow the OpenAI chat protocol format
232
- formatted_response = {
233
- "content": response,
234
- "role": "assistant",
235
- "context": {
236
- "citations": None,
237
- },
238
- }
239
- messages["messages"].append(formatted_response)
240
- return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}
241
-
242
- async def main():
243
- simulator = Simulator(model_config=model_config)
244
- outputs = await simulator(
245
- target=callback,
246
- text=text,
247
- num_queries=2,
248
- max_conversation_turns=4,
249
- user_persona=[
250
- f"I am a student and I want to learn more about {wiki_search_term}",
251
- f"I am a teacher and I want to teach my students about {wiki_search_term}"
252
- ],
253
- )
254
- print(json.dumps(outputs, indent=2))
255
-
256
- if __name__ == "__main__":
257
- # Ensure that the following environment variables are set in your environment:
258
- # AZURE_OPENAI_ENDPOINT and AZURE_DEPLOYMENT
259
- # Example:
260
- # os.environ["AZURE_OPENAI_ENDPOINT"] = "https://your-endpoint.openai.azure.com/"
261
- # os.environ["AZURE_DEPLOYMENT"] = "your-deployment-name"
262
- asyncio.run(main())
263
- print("done!")
264
-
265
- ```
266
-
267
- #### Adversarial Simulator
268
-
269
- ```python
270
- from from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
271
- from azure.identity import DefaultAzureCredential
272
- from typing import Any, Dict, List, Optional
273
- import asyncio
274
-
275
-
276
- azure_ai_project = {
277
- "subscription_id": <subscription_id>,
278
- "resource_group_name": <resource_group_name>,
279
- "project_name": <project_name>
280
- }
281
-
282
- async def callback(
283
- messages: List[Dict],
284
- stream: bool = False,
285
- session_state: Any = None,
286
- context: Dict[str, Any] = None
287
- ) -> dict:
288
- messages_list = messages["messages"]
289
- # get last message
290
- latest_message = messages_list[-1]
291
- query = latest_message["content"]
292
- context = None
293
- if 'file_content' in messages["template_parameters"]:
294
- query += messages["template_parameters"]['file_content']
295
- # the next few lines explains how to use the AsyncAzureOpenAI's chat.completions
296
- # to respond to the simulator. You should replace it with a call to your model/endpoint/application
297
- # make sure you pass the `query` and format the response as we have shown below
298
- from openai import AsyncAzureOpenAI
299
- oai_client = AsyncAzureOpenAI(
300
- api_key=<api_key>,
301
- azure_endpoint=<endpoint>,
302
- api_version="2023-12-01-preview",
303
- )
304
- try:
305
- response_from_oai_chat_completions = await oai_client.chat.completions.create(messages=[{"content": query, "role": "user"}], model="gpt-4", max_tokens=300)
306
- except Exception as e:
307
- print(f"Error: {e}")
308
- # to continue the conversation, return the messages, else you can fail the adversarial with an exception
309
- message = {
310
- "content": "Something went wrong. Check the exception e for more details.",
311
- "role": "assistant",
312
- "context": None,
313
- }
314
- messages["messages"].append(message)
315
- return {
316
- "messages": messages["messages"],
317
- "stream": stream,
318
- "session_state": session_state
319
- }
320
- response_result = response_from_oai_chat_completions.choices[0].message.content
321
- formatted_response = {
322
- "content": response_result,
323
- "role": "assistant",
324
- "context": {},
325
- }
326
- messages["messages"].append(formatted_response)
327
- return {
328
- "messages": messages["messages"],
329
- "stream": stream,
330
- "session_state": session_state,
331
- "context": context
332
- }
333
-
334
- ```
335
-
336
- #### Adversarial QA
337
-
338
- ```python
339
- scenario = AdversarialScenario.ADVERSARIAL_QA
340
- simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
341
-
342
- outputs = asyncio.run(
343
- simulator(
344
- scenario=scenario,
345
- max_conversation_turns=1,
346
- max_simulation_results=3,
347
- target=callback
348
- )
349
- )
350
-
351
- print(outputs.to_eval_qa_json_lines())
352
- ```
353
- #### Direct Attack Simulator
354
-
355
- ```python
356
- scenario = AdversarialScenario.ADVERSARIAL_QA
357
- simulator = DirectAttackSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
358
-
359
- outputs = asyncio.run(
360
- simulator(
361
- scenario=scenario,
362
- max_conversation_turns=1,
363
- max_simulation_results=2,
364
- target=callback
365
- )
366
- )
367
-
368
- print(outputs)
369
- ```
370
- ## Troubleshooting
371
-
372
- ### General
373
-
374
- Azure ML clients raise exceptions defined in [Azure Core][azure_core_readme].
375
-
376
- ### Logging
377
-
378
- This library uses the standard
379
- [logging][python_logging] library for logging.
380
- Basic information about HTTP sessions (URLs, headers, etc.) is logged at INFO
381
- level.
382
-
383
- Detailed DEBUG level logging, including request/response bodies and unredacted
384
- headers, can be enabled on a client with the `logging_enable` argument.
385
-
386
- See full SDK logging documentation with examples [here][sdk_logging_docs].
387
-
388
- ## Next steps
389
-
390
- - View our [samples][evaluation_samples].
391
- - View our [documentation][product_documentation]
392
-
393
- ## Contributing
394
-
395
- This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit [cla.microsoft.com][cla].
396
-
397
- When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
398
-
399
- This project has adopted the [Microsoft Open Source Code of Conduct][code_of_conduct]. For more information see the [Code of Conduct FAQ][coc_faq] or contact [opencode@microsoft.com][coc_contact] with any additional questions or comments.
400
-
401
- <!-- LINKS -->
402
-
403
- [source_code]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/evaluation/azure-ai-evaluation
404
- [evaluation_pypi]: https://pypi.org/project/azure-ai-evaluation/
405
- [evaluation_ref_docs]: https://learn.microsoft.com/python/api/azure-ai-evaluation/azure.ai.evaluation?view=azure-python-preview
406
- [evaluation_samples]: https://github.com/Azure-Samples/azureai-samples/tree/main/scenarios
407
- [product_documentation]: https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk
408
- [python_logging]: https://docs.python.org/3/library/logging.html
409
- [sdk_logging_docs]: https://docs.microsoft.com/azure/developer/python/azure-sdk-logging
410
- [azure_core_readme]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
411
- [pip_link]: https://pypi.org/project/pip/
412
- [azure_core_ref_docs]: https://aka.ms/azsdk-python-core-policies
413
- [azure_core]: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md
414
- [azure_identity]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/identity/azure-identity
415
- [cla]: https://cla.microsoft.com
416
- [code_of_conduct]: https://opensource.microsoft.com/codeofconduct/
417
- [coc_faq]: https://opensource.microsoft.com/codeofconduct/faq/
418
- [coc_contact]: mailto:opencode@microsoft.com
419
-
420
-
421
- # Release History
422
-
423
- ## 1.0.0b4 (2024-10-16)
424
-
425
- ### Breaking Changes
426
-
427
- - Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
428
- - `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed.
429
- - Changed package extra name from "pf-azure" to "remote".
430
-
431
- ### Bugs Fixed
432
- - Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
433
-
434
- ### Other Changes
435
- - Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
436
-
437
- ## 1.0.0b3 (2024-10-01)
438
-
439
- ### Features Added
440
-
441
- - Added `type` field to `AzureOpenAIModelConfiguration` and `OpenAIModelConfiguration`
442
- - The following evaluators now support `conversation` as an alternative input to their usual single-turn inputs:
443
- - `ViolenceEvaluator`
444
- - `SexualEvaluator`
445
- - `SelfHarmEvaluator`
446
- - `HateUnfairnessEvaluator`
447
- - `ProtectedMaterialEvaluator`
448
- - `IndirectAttackEvaluator`
449
- - `CoherenceEvaluator`
450
- - `RelevanceEvaluator`
451
- - `FluencyEvaluator`
452
- - `GroundednessEvaluator`
453
- - Surfaced `RetrievalScoreEvaluator`, formally an internal part of `ChatEvaluator` as a standalone conversation-only evaluator.
454
-
455
- ### Breaking Changes
456
-
457
- - Removed `ContentSafetyChatEvaluator` and `ChatEvaluator`
458
- - The `evaluator_config` parameter of `evaluate` now maps in evaluator name to a dictionary `EvaluatorConfig`, which is a `TypedDict`. The
459
- `column_mapping` between `data` or `target` and evaluator field names should now be specified inside this new dictionary:
460
-
461
- Before:
462
- ```python
463
- evaluate(
464
- ...,
465
- evaluator_config={
466
- "hate_unfairness": {
467
- "query": "${data.question}",
468
- "response": "${data.answer}",
469
- }
470
- },
471
- ...
472
- )
473
- ```
474
-
475
- After
476
- ```python
477
- evaluate(
478
- ...,
479
- evaluator_config={
480
- "hate_unfairness": {
481
- "column_mapping": {
482
- "query": "${data.question}",
483
- "response": "${data.answer}",
484
- }
485
- }
486
- },
487
- ...
488
- )
489
- ```
490
-
491
- - Simulator now requires a model configuration to call the prompty instead of an Azure AI project scope. This enables the usage of simulator with Entra ID based auth.
492
- Before:
493
- ```python
494
- azure_ai_project = {
495
- "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
496
- "resource_group_name": os.environ.get("RESOURCE_GROUP"),
497
- "project_name": os.environ.get("PROJECT_NAME"),
498
- }
499
- sim = Simulator(azure_ai_project=azure_ai_project, credentails=DefaultAzureCredentials())
500
- ```
501
- After:
502
- ```python
503
- model_config = {
504
- "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
505
- "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
506
- }
507
- sim = Simulator(model_config=model_config)
508
- ```
509
- If `api_key` is not included in the `model_config`, the prompty runtime in `promptflow-core` will pick up `DefaultAzureCredential`.
510
-
511
- ### Bugs Fixed
512
-
513
- - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
514
-
515
- ## 1.0.0b2 (2024-09-24)
516
-
517
- ### Breaking Changes
518
-
519
- - `data` and `evaluators` are now required keywords in `evaluate`.
520
-
521
- ## 1.0.0b1 (2024-09-20)
522
-
523
- ### Breaking Changes
524
-
525
- - The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
526
- - The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
527
- - The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
528
- - Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
529
- - Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
530
-
531
- ### Features Added
532
-
533
- - First preview
534
- - This package is port of `promptflow-evals`. New features will be added only to this package moving forward.
535
- - Added a `TypedDict` for `AzureAIProject` that allows for better intellisense and type checking when passing in project information
@@ -1,106 +0,0 @@
1
- azure/ai/evaluation/__init__.py,sha256=rS_yFLTL3_XpRQ2hNHeLB0To8tIfJd0NyKxxxTyBxm4,1977
2
- azure/ai/evaluation/_constants.py,sha256=kcorrWvQbWyugt6hN2jQ9DsL9MegJEr-ecl2XBXmDw0,1990
3
- azure/ai/evaluation/_exceptions.py,sha256=WYOml83XAAq4lPWi1g0kirW29ZYDkIiU--NVJ5l8SLI,4318
4
- azure/ai/evaluation/_http_utils.py,sha256=oVbRaxUm41tVFGkYpZdHjT9ss_9va1NzXYuV3DUVr8k,17125
5
- azure/ai/evaluation/_model_configurations.py,sha256=YmpopzIdPKxIVLhV6yHlo9mRXRMqF-aJhjQB83LxT14,1882
6
- azure/ai/evaluation/_user_agent.py,sha256=O2y-QPBAcw7w7qQ6M2aRPC3Vy3TKd789u5lcs2yuFaI,290
7
- azure/ai/evaluation/_version.py,sha256=DWZYhJb1k3CCBC_y7I7JrcP2TygZpzCHwZ5OWb6Vo44,201
8
- azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- azure/ai/evaluation/_common/__init__.py,sha256=LHTkf6dMLLxikrGNgbUuREBVQcs4ORHR6Eryo4bm9M8,586
10
- azure/ai/evaluation/_common/constants.py,sha256=pzXfC8Z2P36bCcUlz5sX4yGYZTt0JHjAi3auuiTd8Ww,1779
11
- azure/ai/evaluation/_common/math.py,sha256=Dp0jgN3PMoJUTYXD37c7FyFMGZy2vyhUVXUHDHKYtb4,473
12
- azure/ai/evaluation/_common/rai_service.py,sha256=6lDd_-qPqqLZPRy_RKV3qJkzcINu_btBanRwok6WGUc,17191
13
- azure/ai/evaluation/_common/utils.py,sha256=I9zMI7gbAjNJ1ITy9HISzJQSdYu1Ba2mYsZP2qnPQ9M,10867
14
- azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
15
- azure/ai/evaluation/_evaluate/_eval_run.py,sha256=cFFvKolYZPAA0lZn6QS5D_FDK0tizRPba0xvhIeuYkw,21490
16
- azure/ai/evaluation/_evaluate/_evaluate.py,sha256=c_0BeKk5ortPoazcRmBtJwSr_6Ov5MOFZ0qJo1J9CBE,32196
17
- azure/ai/evaluation/_evaluate/_utils.py,sha256=fjnBlWER3XqHKg38vNauGwQj9ZkP_Ln_J1OQ5Kmnj5I,10563
18
- azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py,sha256=BkxhojWca3e2QM3hFwO2xrLiiQ0i-3f8wsMfOx1zchs,361
19
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py,sha256=AeZoEQK4IPXceJJBShaFvGMeO2ith1pUl8TiPE-xti4,3214
20
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py,sha256=XQLaXfswF6ReHLpQthHLuLLa65Pts8uawGp7kRqmMDs,8260
21
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py,sha256=XkSIjtFge586LI2EqdFRQcqwdghlru5N49-IGXz84SU,3234
22
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py,sha256=84QK8EHFnv3vT25BlbvGVog0pmSmh7ntQAlXGE7KNP8,6947
23
- azure/ai/evaluation/_evaluators/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
24
- azure/ai/evaluation/_evaluators/_bleu/__init__.py,sha256=quKKO0kvOSkky5hcoNBvgBuMeeVRFCE9GSv70mAdGP4,260
25
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py,sha256=6EJCG9DnL2Y4pU_vhY4o3UOrumvI-6HI92tzEuCoyXk,2413
26
- azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0UsRNWMIXiea2lLtPPXNGvkJzQ0,258
27
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=g9Cmxg3kRsd_ORLv1xLBmHsHzTpP6UNqRfPNSc85yUI,2526
28
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=_GXYhAH04tsl2qntZH5ACx7gFNfUeQ0hZQpOmDoLPNc,2549
29
- azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=_hPqTkAla_O6s4ebVtTaBrVLEW3KSdDz66WwxjK50cI,423
30
- azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=luidfzVC2dQE567N3G5XmO3Ir0ZOgAJxhwLL4pkC2Hk,15178
31
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=1DsJJIEH_LG21KWPl5HIIs2_yNCb9MrJ-z4ietBML8I,3109
32
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=IDkX6RgsaXVJxeB8faWuK3-PJTrbu5xZEzpxv5YrJUI,4410
33
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py,sha256=PEYMIybfP64f7byhuTaiq4RiqsYbjqejpW1JsJIG1jA,556
34
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py,sha256=yNBXq_n-HTmprclLWZis6hH5FUqtY8umXQUWwI8gMSg,3857
35
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py,sha256=e4Xp3jEj7j-MjfjpE2fPWPlxJmH6ELmoPPA2XSyZCJM,12895
36
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py,sha256=pKTT_kaeBcoOH4HW17ZejaPsEJ5KF76TVHD4zSy01Mk,1805
37
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py,sha256=3znxFvwf6MYPaqpOtvzqllE3Gtbke8oiLHOhk1iCw24,1759
38
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py,sha256=2zpKfo3C_HRpugKLdjmH7ExNkDAtokG7MmfSZxk_6F0,1737
39
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py,sha256=xaB5NvaoqUPaVg4m7JFH1dH4K4idpMeFlWfC_P7deCA,1753
40
- azure/ai/evaluation/_evaluators/_eci/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- azure/ai/evaluation/_evaluators/_eci/_eci.py,sha256=QSyayeMCQj3IH_ZpllgsEx3OFI8Ir33dNCfMFNneBBc,2360
42
- azure/ai/evaluation/_evaluators/_f1_score/__init__.py,sha256=aEVbO7iMoF20obdpLQKcKm69Yyu3mYnblKELLqu8OGI,260
43
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py,sha256=KeYL4Z7cO0Yb_pOAq-3WePUgSqNnci0uA3AH2r41VB4,4786
44
- azure/ai/evaluation/_evaluators/_fluency/__init__.py,sha256=EEJw39xRa0bOAA1rELTTKXQu2s60n_7CZQRD0Gu2QVw,259
45
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py,sha256=Q7HDWaI7lm9tYL6Y4_IUz7-EljJYpDwEu5GQXkXcJqY,2473
46
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty,sha256=xdznyssZDQiLELv4ecC-8uUJ4ssM-iij7A6S1aDsxOQ,2403
47
- azure/ai/evaluation/_evaluators/_gleu/__init__.py,sha256=Ae2EvQ7gqiYAoNO3LwGIhdAAjJPJDfT85rQGKrRrmbA,260
48
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py,sha256=m02wmIGjdoXjp9dwjnFQAKA8hGOUOTvpppDf2CD4QQo,2326
49
- azure/ai/evaluation/_evaluators/_groundedness/__init__.py,sha256=UYNJUeRvBwcSVFyZpdsf29un5eyaDzYoo3QvC1gvlLg,274
50
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py,sha256=Em51FLqq1KqUJRSEJVOMJt4OSX79sv93DOoWp555ReM,2696
51
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty,sha256=ylgxKa_xipb7wN_QwxSnjrD9AhKcJQCv8pPpWPwFfGg,3023
52
- azure/ai/evaluation/_evaluators/_meteor/__init__.py,sha256=209na3pPsdmcuYpYHUYtqQybCpc3yZkc93HnRdicSlI,266
53
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py,sha256=K3EdRuRcuEZYVIlI2jMEp0O9KJYXQB2o6h08q43oKWY,3316
54
- azure/ai/evaluation/_evaluators/_protected_material/__init__.py,sha256=eRAQIU9diVXfO5bp6aLWxZoYUvOsrDIfy1gnDOeNTiI,109
55
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py,sha256=cCyVyr0xmHAYO91VOWZGPksSCc0r1bBx1zhYCrQRwgw,2066
56
- azure/ai/evaluation/_evaluators/_qa/__init__.py,sha256=bcXfT--C0hjym2haqd1B2-u9bDciyM0ThOFtU1Q69sk,244
57
- azure/ai/evaluation/_evaluators/_qa/_qa.py,sha256=F-LKcdG-WHktNSQioF7tce9u8QyUIOlBvjIMtMcqGgU,3611
58
- azure/ai/evaluation/_evaluators/_relevance/__init__.py,sha256=JlxytW32Nl8pbE-fI3GRpfgVuY9EG6zxIAn5VZGSwyc,265
59
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py,sha256=Mn2_XYjhWBpYIErVnhjB_tsM8xJ1K16cXKk1qVocHak,2948
60
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty,sha256=QNWlrWxObUPlXFF1hdCDVpfXuw0QDOxHUtWLj1MwrxA,3559
61
- azure/ai/evaluation/_evaluators/_retrieval/__init__.py,sha256=kMu47ZyTZ7f-4Yh6H3KHxswmxitmPJ8FPSk90qgR0XI,265
62
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py,sha256=XsUl4WzTtFu9O6j4XCXFQOjGEQzrKqqxvDP-ChMsE04,5453
63
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty,sha256=HbQu5Gy9Ghw9r8vGCF-4ui441JBD8w45NOU_9ehamd0,1585
64
- azure/ai/evaluation/_evaluators/_rouge/__init__.py,sha256=kusCDaYcXogDugGefRP8MQSn9xv107oDbrMCqZ6K4GA,291
65
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py,sha256=T__Qt2lC8-DqhlgMvPY10g6sC5svY6oqmbZUerwxbZw,3554
66
- azure/ai/evaluation/_evaluators/_similarity/__init__.py,sha256=V2Mspog99_WBltxTkRHG5NpN5s9XoiTSN4I8POWEkLA,268
67
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=rZVBI7e0D52gp556RVXW32eoT1NLOSrpJiqr_WyM2bk,4530
68
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=eoludASychZoGL625bFCaZai-OY7DIAg90ZLax_o4XE,4594
69
- azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
70
- azure/ai/evaluation/_evaluators/_xpia/xpia.py,sha256=6ALICg2iCBYQWUOfRwjxFJtiDbI0Vc1FsP5lXgnv5Yo,2504
71
- azure/ai/evaluation/_vendor/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
72
- azure/ai/evaluation/_vendor/rouge_score/__init__.py,sha256=03OkyfS_UmzRnHv6-z9juTaJ6OXJoEJM989hgifIZbc,607
73
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py,sha256=xDdNtzwtivcdki5RyErEI9BaQ7nksgj4bXYrGz7tLLs,11409
74
- azure/ai/evaluation/_vendor/rouge_score/scoring.py,sha256=ruwkMrJFJNvs3GWqVLAXudIwDa4EsX_d30pfUPUTf8E,1988
75
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py,sha256=tdSsUibKxtOMY8fdqGK_3-4sMbeOxZEG6D6L7suDTxQ,1936
76
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py,sha256=3_-y1TyvyluHuERhSJ5CdXSwnpcMA7aAKU6PCz9wH_Q,1745
77
- azure/ai/evaluation/simulator/__init__.py,sha256=UtlcXo3SteIQEW_hW2WMhtqLNiDiIGLeW_lIkEUNoMc,486
78
- azure/ai/evaluation/simulator/_adversarial_scenario.py,sha256=SxpyMw5wmM5-fiUjl1_oJH0GQEnsa7ASso10MAr2Hjw,1030
79
- azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=exXUWG-WcXQxHi630VlKaRRNm--S060UHuiJgDPOrQ0,21024
80
- azure/ai/evaluation/simulator/_constants.py,sha256=xM-Or2x7RytfoeBM3N7Vt4JQDJX66UdL3CPz0YN5rvE,485
81
- azure/ai/evaluation/simulator/_direct_attack_simulator.py,sha256=VtnJeddwqornM1VUiKKbD93Be57m7v7LrAwWik5yCy0,9733
82
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py,sha256=f2MA5jIROdTmm2C_mcDO8jasDuMiED0Re3r9ZXQNkbk,7712
83
- azure/ai/evaluation/simulator/_simulator.py,sha256=QS_4BF1hQuO--ZJhnCO-24mv_5-2aXVZ3k4krJR_CFE,32577
84
- azure/ai/evaluation/simulator/_tracing.py,sha256=frZ4-usrzINast9F4-ONRzEGGox71y8bYw0UHNufL1Y,3069
85
- azure/ai/evaluation/simulator/_utils.py,sha256=aXH5GdzQrwluKvYofWtdT0s_nzgVHS2hP6x4rc5zt-E,4287
86
- azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=ulkkJkvRBRROLp_wpAKy1J-HLMJi3Yq6g7Q6VGRuD88,12914
87
- azure/ai/evaluation/simulator/_conversation/_conversation.py,sha256=vzKdpItmUjZrM5OUSkS2UkTnLnKvIzhak5hZ8xvFwnU,7403
88
- azure/ai/evaluation/simulator/_conversation/constants.py,sha256=3v7zkjPwJAPbSpJYIK6VOZZy70bJXMo_QTVqSFGlq9A,984
89
- azure/ai/evaluation/simulator/_helpers/__init__.py,sha256=YTwBf9B_uWGZSbS5vDBde4UpFszxzi3hSlcPtZ4Slcg,259
90
- azure/ai/evaluation/simulator/_helpers/_experimental.py,sha256=hmr9l9hHFNj6iEmBuMawdnnl54YzJrylbB7Dk6cs7cM,5565
91
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py,sha256=7BBLH78b7YDelHDLbAIwf-IO9s9cAEtn-RRXmNReHdc,1017
92
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py,sha256=fumMJYPLPv31KDgVC8A2fNjhLNREMgb1GFJUDv75Vgg,2193
93
- azure/ai/evaluation/simulator/_model_tools/__init__.py,sha256=aMv5apb7uVjuhMF9ohhA5kQmo652hrGIJlhdl3y2R1I,835
94
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py,sha256=aRqLy1mcgLo5_1DJ6BiNSjo1xv8D-TL0Ya3HBjGKZ80,6303
95
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py,sha256=Zg_SzqjCGJ3Wt8hktxz6Y1JEJCcV0V5jBC9N06jQP3k,8984
96
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py,sha256=Bi0tLNlJmz295mdoVaE9_6a_UJVRmCH5uAmxjslS_eQ,7037
97
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py,sha256=FGKLsWL0FZry47ZxFi53FSem8PZmh0iIy3JN4PBg5Tg,7036
98
- azure/ai/evaluation/simulator/_model_tools/models.py,sha256=bfVm0PV3vfH_8DkdmTMZqYVN-G51hZ6Y0TOO-NiysJY,21811
99
- azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=8oklGV7YGQE79bB5gV0AXdHyWcFtgXRCpthyikVF6kw,2174
101
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=q-KshSHNcKxoF4eHelxzIMS3PGKoXQM6_UExOzlvXbk,793
102
- azure_ai_evaluation-1.0.0b4.dist-info/METADATA,sha256=Jffb9zCh3grSD_UjeM40ZiEgq6u6U_IKDxNz_34Q_Lw,18615
103
- azure_ai_evaluation-1.0.0b4.dist-info/NOTICE.txt,sha256=o9xBInKH4j22mM8VfF4mmMniV5Jz1Le1d7D3M7V5W2Y,1924
104
- azure_ai_evaluation-1.0.0b4.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
105
- azure_ai_evaluation-1.0.0b4.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
106
- azure_ai_evaluation-1.0.0b4.dist-info/RECORD,,