azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.0b5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (79) hide show
  1. azure/ai/evaluation/__init__.py +22 -0
  2. azure/ai/evaluation/_common/constants.py +5 -0
  3. azure/ai/evaluation/_common/math.py +11 -0
  4. azure/ai/evaluation/_common/rai_service.py +172 -35
  5. azure/ai/evaluation/_common/utils.py +162 -23
  6. azure/ai/evaluation/_constants.py +6 -6
  7. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
  8. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +4 -4
  9. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +6 -3
  10. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +35 -0
  11. azure/ai/evaluation/_evaluate/_eval_run.py +21 -4
  12. azure/ai/evaluation/_evaluate/_evaluate.py +267 -139
  13. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -5
  14. azure/ai/evaluation/_evaluate/_utils.py +40 -7
  15. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  16. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +14 -9
  17. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
  18. azure/ai/evaluation/_evaluators/_common/_base_eval.py +20 -19
  19. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +18 -8
  20. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +48 -9
  21. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +56 -19
  22. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +5 -5
  23. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +30 -1
  24. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +30 -1
  25. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +30 -1
  26. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +30 -1
  27. azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -1
  28. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +20 -20
  29. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
  30. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  31. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +49 -15
  32. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  33. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  34. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -7
  35. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  36. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +130 -0
  37. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +57 -0
  38. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +96 -0
  39. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +120 -0
  40. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +96 -0
  41. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +96 -0
  42. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +96 -0
  43. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +44 -11
  44. azure/ai/evaluation/_evaluators/_qa/_qa.py +7 -3
  45. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +21 -19
  46. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
  47. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +125 -82
  48. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
  49. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +2 -2
  50. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  51. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +150 -0
  52. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +17 -14
  53. azure/ai/evaluation/_evaluators/_xpia/xpia.py +32 -5
  54. azure/ai/evaluation/_exceptions.py +17 -0
  55. azure/ai/evaluation/_model_configurations.py +18 -1
  56. azure/ai/evaluation/_version.py +1 -1
  57. azure/ai/evaluation/simulator/__init__.py +2 -1
  58. azure/ai/evaluation/simulator/_adversarial_scenario.py +5 -0
  59. azure/ai/evaluation/simulator/_adversarial_simulator.py +4 -1
  60. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  61. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  62. azure/ai/evaluation/simulator/_direct_attack_simulator.py +1 -1
  63. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  64. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +22 -1
  65. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +79 -34
  66. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +1 -1
  67. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -4
  68. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -1
  69. azure/ai/evaluation/simulator/_simulator.py +115 -61
  70. azure/ai/evaluation/simulator/_utils.py +6 -6
  71. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/METADATA +166 -9
  72. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/NOTICE.txt +20 -0
  73. azure_ai_evaluation-1.0.0b5.dist-info/RECORD +120 -0
  74. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/WHEEL +1 -1
  75. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
  76. azure_ai_evaluation-1.0.0b4.dist-info/RECORD +0 -106
  77. /azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +0 -0
  78. /azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +0 -0
  79. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/top_level.txt +0 -0
@@ -26,9 +26,9 @@ class JsonLineList(list):
26
26
  json_lines += json.dumps(item) + "\n"
27
27
  return json_lines
28
28
 
29
- def to_eval_qa_json_lines(self):
29
+ def to_eval_qr_json_lines(self):
30
30
  """
31
- Converts the list to a string of JSON lines suitable for evaluation in a Q&A format.
31
+ Converts the list to a string of JSON lines suitable for evaluation in a query & response format.
32
32
  Each item in the list is expected to be a dictionary with
33
33
  'messages' key. The 'messages' value is a list of
34
34
  dictionaries, each with a 'role' key and a 'content' key.
@@ -80,9 +80,9 @@ class JsonLineChatProtocol(dict):
80
80
  """
81
81
  return json.dumps(self)
82
82
 
83
- def to_eval_qa_json_lines(self) -> str:
83
+ def to_eval_qr_json_lines(self) -> str:
84
84
  """
85
- Converts the object to a string of JSON lines suitable for evaluation in a Q&A format.
85
+ Converts the object to a string of JSON lines suitable for evaluation in a query and response format.
86
86
  The object is expected to be a dictionary with 'messages' key.
87
87
 
88
88
  :returns: A json lines document
@@ -105,10 +105,10 @@ class JsonLineChatProtocol(dict):
105
105
  if user_message and assistant_message:
106
106
  if context:
107
107
  json_lines += (
108
- json.dumps({"question": user_message, "answer": assistant_message, "context": context}) + "\n"
108
+ json.dumps({"query": user_message, "response": assistant_message, "context": context}) + "\n"
109
109
  )
110
110
  user_message = assistant_message = None
111
111
  else:
112
- json_lines += json.dumps({"question": user_message, "answer": assistant_message}) + "\n"
112
+ json_lines += json.dumps({"query": user_message, "response": assistant_message}) + "\n"
113
113
  user_message = assistant_message = None
114
114
  return json_lines
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: azure-ai-evaluation
3
- Version: 1.0.0b4
3
+ Version: 1.0.0b5
4
4
  Summary: Microsoft Azure Evaluation Library for Python
5
5
  Home-page: https://github.com/Azure/azure-sdk-for-python
6
6
  Author: Microsoft Corporation
@@ -30,6 +30,7 @@ Requires-Dist: azure-core >=1.30.2
30
30
  Requires-Dist: nltk >=3.9.1
31
31
  Provides-Extra: remote
32
32
  Requires-Dist: promptflow-azure <2.0.0,>=1.15.0 ; extra == 'remote'
33
+ Requires-Dist: azure-ai-inference >=1.0.0b4 ; extra == 'remote'
33
34
 
34
35
  # Azure AI Evaluation client library for Python
35
36
 
@@ -95,9 +96,6 @@ if __name__ == "__main__":
95
96
  # Running Relevance Evaluator on single input row
96
97
  relevance_score = relevance_eval(
97
98
  response="The Alpine Explorer Tent is the most waterproof.",
98
- context="From the our product list,"
99
- " the alpine explorer tent is the most waterproof."
100
- " The Adventure Dining Table has higher weight.",
101
99
  query="Which tent is the most waterproof?",
102
100
  )
103
101
 
@@ -172,6 +170,95 @@ Output with a string that continues the conversation, responding to the latest m
172
170
  {{ conversation_history }}
173
171
 
174
172
  ```
173
+
174
+ Query Response generaing prompty for gpt-4o with `json_schema` support
175
+ Use this file as an override.
176
+ ```yaml
177
+ ---
178
+ name: TaskSimulatorQueryResponseGPT4o
179
+ description: Gets queries and responses from a blob of text
180
+ model:
181
+ api: chat
182
+ parameters:
183
+ temperature: 0.0
184
+ top_p: 1.0
185
+ presence_penalty: 0
186
+ frequency_penalty: 0
187
+ response_format:
188
+ type: json_schema
189
+ json_schema:
190
+ name: QRJsonSchema
191
+ schema:
192
+ type: object
193
+ properties:
194
+ items:
195
+ type: array
196
+ items:
197
+ type: object
198
+ properties:
199
+ q:
200
+ type: string
201
+ r:
202
+ type: string
203
+ required:
204
+ - q
205
+ - r
206
+
207
+ inputs:
208
+ text:
209
+ type: string
210
+ num_queries:
211
+ type: integer
212
+
213
+
214
+ ---
215
+ system:
216
+ You're an AI that helps in preparing a Question/Answer quiz from Text for "Who wants to be a millionaire" tv show
217
+ Both Questions and Answers MUST BE extracted from given Text
218
+ Frame Question in a way so that Answer is RELEVANT SHORT BITE-SIZED info from Text
219
+ RELEVANT info could be: NUMBER, DATE, STATISTIC, MONEY, NAME
220
+ A sentence should contribute multiple QnAs if it has more info in it
221
+ Answer must not be more than 5 words
222
+ Answer must be picked from Text as is
223
+ Question should be as descriptive as possible and must include as much context as possible from Text
224
+ Output must always have the provided number of QnAs
225
+ Output must be in JSON format.
226
+ Output must have {{num_queries}} objects in the format specified below. Any other count is unacceptable.
227
+ Text:
228
+ <|text_start|>
229
+ On January 24, 1984, former Apple CEO Steve Jobs introduced the first Macintosh. In late 2003, Apple had 2.06 percent of the desktop share in the United States.
230
+ Some years later, research firms IDC and Gartner reported that Apple's market share in the U.S. had increased to about 6%.
231
+ <|text_end|>
232
+ Output with 5 QnAs:
233
+ {
234
+ "qna": [{
235
+ "q": "When did the former Apple CEO Steve Jobs introduced the first Macintosh?",
236
+ "r": "January 24, 1984"
237
+ },
238
+ {
239
+ "q": "Who was the former Apple CEO that introduced the first Macintosh on January 24, 1984?",
240
+ "r": "Steve Jobs"
241
+ },
242
+ {
243
+ "q": "What percent of the desktop share did Apple have in the United States in late 2003?",
244
+ "r": "2.06 percent"
245
+ },
246
+ {
247
+ "q": "What were the research firms that reported on Apple's market share in the U.S.?",
248
+ "r": "IDC and Gartner"
249
+ },
250
+ {
251
+ "q": "What was the percentage increase of Apple's market share in the U.S., as reported by research firms IDC and Gartner?",
252
+ "r": "6%"
253
+ }]
254
+ }
255
+ Text:
256
+ <|text_start|>
257
+ {{ text }}
258
+ <|text_end|>
259
+ Output with {{ num_queries }} QnAs:
260
+ ```
261
+
175
262
  Application code:
176
263
 
177
264
  ```python
@@ -189,6 +276,7 @@ model_config = {
189
276
  "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
190
277
  # not providing key would make the SDK pick up `DefaultAzureCredential`
191
278
  # use "api_key": "<your API key>"
279
+ "api_version": "2024-08-01-preview" # keep this for gpt-4o
192
280
  }
193
281
 
194
282
  # Use Wikipedia to get some text for the simulation
@@ -232,20 +320,21 @@ async def callback(
232
320
  formatted_response = {
233
321
  "content": response,
234
322
  "role": "assistant",
235
- "context": {
236
- "citations": None,
237
- },
323
+ "context": "",
238
324
  }
239
325
  messages["messages"].append(formatted_response)
240
326
  return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}
241
327
 
242
328
  async def main():
243
329
  simulator = Simulator(model_config=model_config)
330
+ current_dir = os.path.dirname(__file__)
331
+ query_response_override_for_latest_gpt_4o = os.path.join(current_dir, "TaskSimulatorQueryResponseGPT4o.prompty")
244
332
  outputs = await simulator(
245
333
  target=callback,
246
334
  text=text,
335
+ query_response_generating_prompty=query_response_override_for_latest_gpt_4o, # use this only with latest gpt-4o
247
336
  num_queries=2,
248
- max_conversation_turns=4,
337
+ max_conversation_turns=1,
249
338
  user_persona=[
250
339
  f"I am a student and I want to learn more about {wiki_search_term}",
251
340
  f"I am a teacher and I want to teach my students about {wiki_search_term}"
@@ -267,7 +356,7 @@ if __name__ == "__main__":
267
356
  #### Adversarial Simulator
268
357
 
269
358
  ```python
270
- from from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
359
+ from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
271
360
  from azure.identity import DefaultAzureCredential
272
361
  from typing import Any, Dict, List, Optional
273
362
  import asyncio
@@ -420,6 +509,72 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
420
509
 
421
510
  # Release History
422
511
 
512
+ ## 1.0.0b5 (2024-10-28)
513
+
514
+ ### Features Added
515
+ - Added `GroundednessProEvaluator`, which is a service-based evaluator for determining response groundedness.
516
+ - Groundedness detection in Non Adversarial Simulator via query/context pairs
517
+ ```python
518
+ import importlib.resources as pkg_resources
519
+ package = "azure.ai.evaluation.simulator._data_sources"
520
+ resource_name = "grounding.json"
521
+ custom_simulator = Simulator(model_config=model_config)
522
+ conversation_turns = []
523
+ with pkg_resources.path(package, resource_name) as grounding_file:
524
+ with open(grounding_file, "r") as file:
525
+ data = json.load(file)
526
+ for item in data:
527
+ conversation_turns.append([item])
528
+ outputs = asyncio.run(custom_simulator(
529
+ target=callback,
530
+ conversation_turns=conversation_turns,
531
+ max_conversation_turns=1,
532
+ ))
533
+ ```
534
+ - Adding evaluator for multimodal use cases
535
+
536
+ ### Breaking Changes
537
+ - Renamed environment variable `PF_EVALS_BATCH_USE_ASYNC` to `AI_EVALS_BATCH_USE_ASYNC`.
538
+ - `RetrievalEvaluator` now requires a `context` input in addition to `query` in single-turn evaluation.
539
+ - `RelevanceEvaluator` no longer takes `context` as an input. It now only takes `query` and `response` in single-turn evaluation.
540
+ - `FluencyEvaluator` no longer takes `query` as an input. It now only takes `response` in single-turn evaluation.
541
+ - AdversarialScenario enum does not include `ADVERSARIAL_INDIRECT_JAILBREAK`, invoking IndirectJailbreak or XPIA should be done with `IndirectAttackSimulator`
542
+ - Outputs of `Simulator` and `AdversarialSimulator` previously had `to_eval_qa_json_lines` and now has `to_eval_qr_json_lines`. Where `to_eval_qa_json_lines` had:
543
+ ```json
544
+ {"question": <user_message>, "answer": <assistant_message>}
545
+ ```
546
+ `to_eval_qr_json_lines` now has:
547
+ ```json
548
+ {"query": <user_message>, "response": assistant_message}
549
+ ```
550
+
551
+ ### Bugs Fixed
552
+ - Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format
553
+ - Fixed an issue where the `evaluate` API would fail with "[WinError 32] The process cannot access the file because it is being used by another process" when venv folder and target function file are in the same directory.
554
+ - Fix evaluate API failure when `trace.destination` is set to `none`
555
+ - Non adversarial simulator now accepts context from the callback
556
+
557
+ ### Other Changes
558
+ - Improved error messages for the `evaluate` API by enhancing the validation of input parameters. This update provides more detailed and actionable error descriptions.
559
+ - `GroundednessEvaluator` now supports `query` as an optional input in single-turn evaluation. If `query` is provided, a different prompt template will be used for the evaluation.
560
+ - To align with our support of a diverse set of models, the following evaluators will now have a new key in their result output without the `gpt_` prefix. To maintain backwards compatibility, the old key with the `gpt_` prefix will still be present in the output; however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
561
+ - `CoherenceEvaluator`
562
+ - `RelevanceEvaluator`
563
+ - `FluencyEvaluator`
564
+ - `GroundednessEvaluator`
565
+ - `SimilarityEvaluator`
566
+ - `RetrievalEvaluator`
567
+ - The following evaluators will now have a new key in their result output including LLM reasoning behind the score. The new key will follow the pattern "<metric_name>_reason". The reasoning is the result of a more detailed prompt template being used to generate the LLM response. Note that this requires the maximum number of tokens used to run these evaluators to be increased.
568
+
569
+ | Evaluator | New Token Limit |
570
+ | --- | --- |
571
+ | `CoherenceEvaluator` | 800 |
572
+ | `RelevanceEvaluator` | 800 |
573
+ | `FluencyEvaluator` | 800 |
574
+ | `GroundednessEvaluator` | 800 |
575
+ | `RetrievalEvaluator` | 1600 |
576
+ - Improved the error message for storage access permission issues to provide clearer guidance for users.
577
+
423
578
  ## 1.0.0b4 (2024-10-16)
424
579
 
425
580
  ### Breaking Changes
@@ -430,9 +585,11 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
430
585
 
431
586
  ### Bugs Fixed
432
587
  - Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
588
+ - Fixed an issue where the Evaluate API did not fail due to missing inputs when the target did not return columns required by the evaluators.
433
589
 
434
590
  ### Other Changes
435
591
  - Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
592
+ - Print the per-evaluator run summary at the end of the Evaluate API call to make troubleshooting row-level failures easier.
436
593
 
437
594
  ## 1.0.0b3 (2024-10-01)
438
595
 
@@ -48,3 +48,23 @@ distributed under the License is distributed on an "AS IS" BASIS,
48
48
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
49
49
  See the License for the specific language governing permissions and
50
50
  limitations under the License.
51
+
52
+
53
+ License notice for [Is GPT-4 a reliable rater? Evaluating consistency in GPT-4's text ratings](https://www.frontiersin.org/journals/education/articles/10.3389/feduc.2023.1272229/full)
54
+ ------------------------------------------------------------------------------------------------------------------
55
+ Copyright © 2023 Hackl, Müller, Granitzer and Sailer. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/).
56
+
57
+
58
+ License notice for [Is ChatGPT a Good NLG Evaluator? A Preliminary Study](https://aclanthology.org/2023.newsum-1.1) (Wang et al., NewSum 2023)
59
+ ------------------------------------------------------------------------------------------------------------------
60
+ Copyright © 2023. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/).
61
+
62
+
63
+ License notice for [SummEval: Re-evaluating Summarization Evaluation.](https://doi.org/10.1162/tacl_a_00373) (Fabbri et al.)
64
+ ------------------------------------------------------------------------------------------------------------------
65
+ © 2021 Association for Computational Linguistics. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/).
66
+
67
+
68
+ License notice for [Evaluation Metrics in the Era of GPT-4: Reliably Evaluating Large Language Models on Sequence to Sequence Tasks](https://aclanthology.org/2023.emnlp-main.543) (Sottana et al., EMNLP 2023)
69
+ ------------------------------------------------------------------------------------------------------------------
70
+ © 2023 Association for Computational Linguistics. This work is openly licensed via [CC BY 4.0](http://creativecommons.org/licenses/by/4.0/).
@@ -0,0 +1,120 @@
1
+ azure/ai/evaluation/__init__.py,sha256=MFxJRoKfSsP_Qlfq0FwynxNf4csNAfTYPQX7jdXc9RU,2757
2
+ azure/ai/evaluation/_constants.py,sha256=KGjzbFKCk0O6xCH57VdKK6CKC0JwS25ouYOQOYCB_6M,1942
3
+ azure/ai/evaluation/_exceptions.py,sha256=91Ovrj9t4nbpJM7GRK3rzwxXk-xLq6WLLzm44GUgt3s,5057
4
+ azure/ai/evaluation/_http_utils.py,sha256=oVbRaxUm41tVFGkYpZdHjT9ss_9va1NzXYuV3DUVr8k,17125
5
+ azure/ai/evaluation/_model_configurations.py,sha256=TklC7ke0jXtLitTQaQAGT5SJgV098XGUHY7On2_IFY4,2249
6
+ azure/ai/evaluation/_user_agent.py,sha256=O2y-QPBAcw7w7qQ6M2aRPC3Vy3TKd789u5lcs2yuFaI,290
7
+ azure/ai/evaluation/_version.py,sha256=mCv_uIychD87cYcoY1AwWAtaTQtk7P0sZUlJF8HsIcY,201
8
+ azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ azure/ai/evaluation/_common/__init__.py,sha256=LHTkf6dMLLxikrGNgbUuREBVQcs4ORHR6Eryo4bm9M8,586
10
+ azure/ai/evaluation/_common/_experimental.py,sha256=hmr9l9hHFNj6iEmBuMawdnnl54YzJrylbB7Dk6cs7cM,5565
11
+ azure/ai/evaluation/_common/constants.py,sha256=OsExttFGLnTAyZa26jnY5_PCDTb7uJNFqtE2qsRZ1mg,1957
12
+ azure/ai/evaluation/_common/math.py,sha256=Y47ljvImn47xuW32enI2O6V7-7SBkraWeyXdJiYw41Q,927
13
+ azure/ai/evaluation/_common/rai_service.py,sha256=zi2iha6y9HphzZlia9ig3riZ_2SGMHF0dfY4l866JXw,23402
14
+ azure/ai/evaluation/_common/utils.py,sha256=7F5C_mZgR4MIIihCTFa5yUDZka0-g7G4KLsITQPq0gE,16080
15
+ azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
16
+ azure/ai/evaluation/_evaluate/_eval_run.py,sha256=XppywHqCZeFguH5_WSIReKA6MAAe2j9hdso6jM_67Po,22283
17
+ azure/ai/evaluation/_evaluate/_evaluate.py,sha256=77gJyIg7m9XJTm3qz6Q4yKSv1aZ19WoVpmmXyQlSqPk,38178
18
+ azure/ai/evaluation/_evaluate/_utils.py,sha256=SAlVwU_5P2ls-394kN97QwmrAApzck8T3i-7LbVyZtg,12320
19
+ azure/ai/evaluation/_evaluate/_batch_run/__init__.py,sha256=G8McpeLxAS_gFhNShX52_YWvE-arhJn-bVpAfzjWG3Q,427
20
+ azure/ai/evaluation/_evaluate/_batch_run/code_client.py,sha256=XQLaXfswF6ReHLpQthHLuLLa65Pts8uawGp7kRqmMDs,8260
21
+ azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py,sha256=1nnaUej4cOiPD9lH58Mt-RhHYd7gDe8G5kZg7w6Gkrs,3196
22
+ azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py,sha256=88zkK6ATyMaUAmk8WAHccO2x9XO-6Ibr4Ggbs4wPmg0,3339
23
+ azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py,sha256=IoueIPzyzK4Kt7ZoC3m9_0BpSY1pSB2H2qFi_6EBApg,1249
24
+ azure/ai/evaluation/_evaluate/_telemetry/__init__.py,sha256=cN6Y6Zq7kOv_EGwtKOO97PYYNiTlQmFUuHAROxq_Au8,6957
25
+ azure/ai/evaluation/_evaluators/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
26
+ azure/ai/evaluation/_evaluators/_bleu/__init__.py,sha256=quKKO0kvOSkky5hcoNBvgBuMeeVRFCE9GSv70mAdGP4,260
27
+ azure/ai/evaluation/_evaluators/_bleu/_bleu.py,sha256=G5oZbR_3fPcuBlhQgIow61Tw7W3cL1ugOFcwWCgvT8U,2425
28
+ azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0UsRNWMIXiea2lLtPPXNGvkJzQ0,258
29
+ azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=TMyTHXu0t0S0j3MRLCcFFDnn78d2-SF92uZzlNG7azI,2956
30
+ azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=ANvh9mDFW7KMejrgdWqBLjj4SIqEO5WW9gg5pE0RLJk,6798
31
+ azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=_hPqTkAla_O6s4ebVtTaBrVLEW3KSdDz66WwxjK50cI,423
32
+ azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=32R2APcWEjvHFhtVU-Vkga9QP9Kr4df_ZZkz5xGD4GE,15419
33
+ azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=mwD6DxcAjNryWW98PgB6-L1BRSwRg9ONjJfjaMirpn8,3853
34
+ azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=SiIpGPotBKj-GohJVL_dnIWQimImnNuZyCI9m-HZssA,5916
35
+ azure/ai/evaluation/_evaluators/_content_safety/__init__.py,sha256=PEYMIybfP64f7byhuTaiq4RiqsYbjqejpW1JsJIG1jA,556
36
+ azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py,sha256=-O2frtWs2XMCnvBo5HFPnxW-MF9_L9QGcxVo360ZBMY,5801
37
+ azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py,sha256=ojhzAbIUgKpJxgEGE2MKpgD091Q8HfvEpgoajus_dI0,12889
38
+ azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py,sha256=p53WfUr_tyoYqPiHkoikPrwERsxNTE7QUw3i4VBgA58,2949
39
+ azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py,sha256=lFYTtQUE0ub1zr6cqQyUQP9igHIljqFGHQFNx6EemH8,2905
40
+ azure/ai/evaluation/_evaluators/_content_safety/_sexual.py,sha256=DgtY7eQyQu_I85-2zQGP_h3w1oj97RHnoUw30lY9Y0w,2880
41
+ azure/ai/evaluation/_evaluators/_content_safety/_violence.py,sha256=5K5UENljzfFU5m2gXUI0vvzFCEch_xZTzEsG7MYJYQw,2897
42
+ azure/ai/evaluation/_evaluators/_eci/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ azure/ai/evaluation/_evaluators/_eci/_eci.py,sha256=gr7gfQnzrf3qXSJ7uf0iwwDg63SgaJjlhapKAa7WH5U,2435
44
+ azure/ai/evaluation/_evaluators/_f1_score/__init__.py,sha256=aEVbO7iMoF20obdpLQKcKm69Yyu3mYnblKELLqu8OGI,260
45
+ azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py,sha256=KeYL4Z7cO0Yb_pOAq-3WePUgSqNnci0uA3AH2r41VB4,4786
46
+ azure/ai/evaluation/_evaluators/_fluency/__init__.py,sha256=EEJw39xRa0bOAA1rELTTKXQu2s60n_7CZQRD0Gu2QVw,259
47
+ azure/ai/evaluation/_evaluators/_fluency/_fluency.py,sha256=QIe6EqPmYxNtaD6_KvOEwKQpEGZfHmxTkywDLcPak-k,2739
48
+ azure/ai/evaluation/_evaluators/_fluency/fluency.prompty,sha256=n9v0W9eYwgIO-JSsLTSKEM_ApJuxxuKWQpNblrTEkFY,4861
49
+ azure/ai/evaluation/_evaluators/_gleu/__init__.py,sha256=Ae2EvQ7gqiYAoNO3LwGIhdAAjJPJDfT85rQGKrRrmbA,260
50
+ azure/ai/evaluation/_evaluators/_gleu/_gleu.py,sha256=tDY9F70NfSq60HmNprrJ4OGC8mk-1_mvLQ2SXShxVig,2338
51
+ azure/ai/evaluation/_evaluators/_groundedness/__init__.py,sha256=UYNJUeRvBwcSVFyZpdsf29un5eyaDzYoo3QvC1gvlLg,274
52
+ azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py,sha256=W-56hA2KaBIfgfl41cJaYgdaf3Fs5Jku96xouAShWpI,4629
53
+ azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty,sha256=v7TOm75DyW_1gOU6gSiZoPcRnHcJ65DrzR2cL_ucWDY,5814
54
+ azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty,sha256=8kNShdfxQvkII7GnqjmdqQ5TNelA2B6cjnqWZk8FFe4,5296
55
+ azure/ai/evaluation/_evaluators/_meteor/__init__.py,sha256=209na3pPsdmcuYpYHUYtqQybCpc3yZkc93HnRdicSlI,266
56
+ azure/ai/evaluation/_evaluators/_meteor/_meteor.py,sha256=c1SMbv70Z1fH7QHO2oiYmRidNBHGeUVN_2Xs_nVlHZE,3260
57
+ azure/ai/evaluation/_evaluators/_multimodal/__init__.py,sha256=tPvsY0nv8T3VtiiAwJM6wT5A9FhKP2XXwUlCH994xl4,906
58
+ azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py,sha256=lowKPujN4Q5OUnVpnn9XUua2sq9XLVU5CYA4g-eyKU4,5182
59
+ azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py,sha256=nKqY1RSieSQ1Qsy4QTeBupzUPW3fhNSqlynd7642NTo,2522
60
+ azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py,sha256=Jk5u4YZH62G2uxDd2bPyfKobVvuN9N5LQmLL7lMRLL4,3605
61
+ azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py,sha256=7SSmGbTckd9FPHSqGwMQxFlmMxTnxXSzrB4G6Kgpfww,4672
62
+ azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py,sha256=pd-QjaXlJ3k9DMxOcrqxIWfB6gut0Kd3o7mHxGM6QRU,3535
63
+ azure/ai/evaluation/_evaluators/_multimodal/_sexual.py,sha256=u2Id-HFAcUj7EG-zVMqwOUlqOh6MN_lnYZ2OYuBMUj0,3503
64
+ azure/ai/evaluation/_evaluators/_multimodal/_violence.py,sha256=Z9_MXkRnf8pbv07bXD6d5WLIXwcxkaB_zz64cof83Kw,3527
65
+ azure/ai/evaluation/_evaluators/_protected_material/__init__.py,sha256=eRAQIU9diVXfO5bp6aLWxZoYUvOsrDIfy1gnDOeNTiI,109
66
+ azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py,sha256=h3pLEkf4gvzvimvmsxr5haA0_wq02EI6kn4tIataZMI,3325
67
+ azure/ai/evaluation/_evaluators/_qa/__init__.py,sha256=bcXfT--C0hjym2haqd1B2-u9bDciyM0ThOFtU1Q69sk,244
68
+ azure/ai/evaluation/_evaluators/_qa/_qa.py,sha256=k0a5RJO5UrCNzJIzsGI6nyQ2aBXHALGYB2aMz880wDY,3742
69
+ azure/ai/evaluation/_evaluators/_relevance/__init__.py,sha256=JlxytW32Nl8pbE-fI3GRpfgVuY9EG6zxIAn5VZGSwyc,265
70
+ azure/ai/evaluation/_evaluators/_relevance/_relevance.py,sha256=-lCbVq84rX1JUmlWoUYNdcCWNFXtH_0JhvL4pnxJyHQ,3307
71
+ azure/ai/evaluation/_evaluators/_relevance/relevance.prompty,sha256=VHKzVlC2Cv1xuholgIGmerPspspAI0t6IgJ2cxOuYDE,4811
72
+ azure/ai/evaluation/_evaluators/_retrieval/__init__.py,sha256=kMu47ZyTZ7f-4Yh6H3KHxswmxitmPJ8FPSk90qgR0XI,265
73
+ azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py,sha256=NNSsg5Zd8w_OJ5QKY9DnCPb5d_P3trXE_Kqe8uEWe0o,8088
74
+ azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty,sha256=_YVoO4Gt_WD42bUcj5n6BDW0dMUqNf0yF3Nj5XMOX2c,16490
75
+ azure/ai/evaluation/_evaluators/_rouge/__init__.py,sha256=kusCDaYcXogDugGefRP8MQSn9xv107oDbrMCqZ6K4GA,291
76
+ azure/ai/evaluation/_evaluators/_rouge/_rouge.py,sha256=ZSPRc-6WnpAHxlEwzq-_-5h_7GbtZhrOfEWSEiY4vYk,3566
77
+ azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py,sha256=0DODUGTOgaYyFbO9_zxuwifixDL3SIm3EkwP1sdwn6M,288
78
+ azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py,sha256=e6mFUioiyCIWnS01Ec2yikvtkg1zTel1NfdhAgcmvKc,5909
79
+ azure/ai/evaluation/_evaluators/_similarity/__init__.py,sha256=V2Mspog99_WBltxTkRHG5NpN5s9XoiTSN4I8POWEkLA,268
80
+ azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=p2BIdulB7ALYurBiltlV6wkHRm7Cu5J3UvWdp2JGyy0,4735
81
+ azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=eoludASychZoGL625bFCaZai-OY7DIAg90ZLax_o4XE,4594
82
+ azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
83
+ azure/ai/evaluation/_evaluators/_xpia/xpia.py,sha256=zpUpt92SBvUFIiEqbkukNvmPgRWermpHfE4L_D_VWqU,3546
84
+ azure/ai/evaluation/_vendor/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
85
+ azure/ai/evaluation/_vendor/rouge_score/__init__.py,sha256=03OkyfS_UmzRnHv6-z9juTaJ6OXJoEJM989hgifIZbc,607
86
+ azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py,sha256=xDdNtzwtivcdki5RyErEI9BaQ7nksgj4bXYrGz7tLLs,11409
87
+ azure/ai/evaluation/_vendor/rouge_score/scoring.py,sha256=ruwkMrJFJNvs3GWqVLAXudIwDa4EsX_d30pfUPUTf8E,1988
88
+ azure/ai/evaluation/_vendor/rouge_score/tokenize.py,sha256=tdSsUibKxtOMY8fdqGK_3-4sMbeOxZEG6D6L7suDTxQ,1936
89
+ azure/ai/evaluation/_vendor/rouge_score/tokenizers.py,sha256=3_-y1TyvyluHuERhSJ5CdXSwnpcMA7aAKU6PCz9wH_Q,1745
90
+ azure/ai/evaluation/simulator/__init__.py,sha256=JbrPZ8pvTBalyX94SvZ9btHNoovX8rbZV03KmzxxWys,552
91
+ azure/ai/evaluation/simulator/_adversarial_scenario.py,sha256=yBZshqnpsqqfZWq2_vAVttgGBNb108kAXR70yURJTyg,1131
92
+ azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=ad7tOA09m-VRmQyrdIPHkPOppPU5B_DYVlS4eD6AJ8c,21125
93
+ azure/ai/evaluation/simulator/_constants.py,sha256=xM-Or2x7RytfoeBM3N7Vt4JQDJX66UdL3CPz0YN5rvE,485
94
+ azure/ai/evaluation/simulator/_direct_attack_simulator.py,sha256=cjfJ_Fq2FKtOnhDsUM6piTNqd_2efb0Lz-agS5DEK28,9765
95
+ azure/ai/evaluation/simulator/_indirect_attack_simulator.py,sha256=xEAsejGnMRZLkM-_W30nDVGE50VRlUrb0b5UQwFQjDI,9685
96
+ azure/ai/evaluation/simulator/_simulator.py,sha256=KzixUmdW9emTmtzwghVBivr860p7J5If7-q0CfTJP58,35870
97
+ azure/ai/evaluation/simulator/_tracing.py,sha256=frZ4-usrzINast9F4-ONRzEGGox71y8bYw0UHNufL1Y,3069
98
+ azure/ai/evaluation/simulator/_utils.py,sha256=KVwts0jSoVk7jv5NX1vT_sKD7WqNpHT06ALow1I5dTA,4313
99
+ azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=ulkkJkvRBRROLp_wpAKy1J-HLMJi3Yq6g7Q6VGRuD88,12914
100
+ azure/ai/evaluation/simulator/_conversation/_conversation.py,sha256=vzKdpItmUjZrM5OUSkS2UkTnLnKvIzhak5hZ8xvFwnU,7403
101
+ azure/ai/evaluation/simulator/_conversation/constants.py,sha256=3v7zkjPwJAPbSpJYIK6VOZZy70bJXMo_QTVqSFGlq9A,984
102
+ azure/ai/evaluation/simulator/_data_sources/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
103
+ azure/ai/evaluation/simulator/_data_sources/grounding.json,sha256=jqdqHrCgS7hN7K2kXSEcPCmzFjV4cv_qcCSR-Hutwx4,1257075
104
+ azure/ai/evaluation/simulator/_helpers/__init__.py,sha256=FQwgrJvzq_nv3wF9DBr2pyLn2V2hKGmtp0QN9nwpAww,203
105
+ azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py,sha256=7BBLH78b7YDelHDLbAIwf-IO9s9cAEtn-RRXmNReHdc,1017
106
+ azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py,sha256=BOttMTec3muMiA4OzwD_iW08GTrhja7PL9XVjRCN3jM,3029
107
+ azure/ai/evaluation/simulator/_model_tools/__init__.py,sha256=aMv5apb7uVjuhMF9ohhA5kQmo652hrGIJlhdl3y2R1I,835
108
+ azure/ai/evaluation/simulator/_model_tools/_identity_manager.py,sha256=bkVRfc9q3FV72CKtK1utQUSjVvLnGB18qPzRjKbjGxQ,6303
109
+ azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py,sha256=Zg_SzqjCGJ3Wt8hktxz6Y1JEJCcV0V5jBC9N06jQP3k,8984
110
+ azure/ai/evaluation/simulator/_model_tools/_rai_client.py,sha256=Bi0tLNlJmz295mdoVaE9_6a_UJVRmCH5uAmxjslS_eQ,7037
111
+ azure/ai/evaluation/simulator/_model_tools/_template_handler.py,sha256=FGKLsWL0FZry47ZxFi53FSem8PZmh0iIy3JN4PBg5Tg,7036
112
+ azure/ai/evaluation/simulator/_model_tools/models.py,sha256=bfVm0PV3vfH_8DkdmTMZqYVN-G51hZ6Y0TOO-NiysJY,21811
113
+ azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
+ azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=2BzSqDDYilDushvR56vMRDmqFIaIYAewdUlUZg_elMg,2182
115
+ azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=NE6lH4bfmibgMn4NgJtm9_l3PMoHSFrfjjosDJEKM0g,939
116
+ azure_ai_evaluation-1.0.0b5.dist-info/METADATA,sha256=WDO8Eb37IZEaXzmpFoSmFvRHYxM6M_vnH5TC7t5m29I,25730
117
+ azure_ai_evaluation-1.0.0b5.dist-info/NOTICE.txt,sha256=4tzi_Yq4-eBGhBvveobWHCgUIVF-ZeouGN0m7hVq5Mk,3592
118
+ azure_ai_evaluation-1.0.0b5.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
119
+ azure_ai_evaluation-1.0.0b5.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
120
+ azure_ai_evaluation-1.0.0b5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.2.0)
2
+ Generator: setuptools (74.1.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,49 +0,0 @@
1
- ---
2
- name: Groundedness
3
- description: Evaluates groundedness score for QA scenario
4
- model:
5
- api: chat
6
- parameters:
7
- temperature: 0.0
8
- max_tokens: 1
9
- top_p: 1.0
10
- presence_penalty: 0
11
- frequency_penalty: 0
12
- response_format:
13
- type: text
14
-
15
- inputs:
16
- response:
17
- type: string
18
- context:
19
- type: string
20
-
21
- ---
22
- system:
23
- You are an AI assistant. You will be given the definition of an evaluation metric for assessing the quality of an answer in a question-answering task. Your job is to compute an accurate evaluation score using the provided evaluation metric. You should return a single integer value between 1 to 5 representing the evaluation metric. You will include no other text or information.
24
- user:
25
- You will be presented with a CONTEXT and an ANSWER about that CONTEXT. You need to decide whether the ANSWER is entailed by the CONTEXT by choosing one of the following rating:
26
- 1. 5: The ANSWER follows logically from the information contained in the CONTEXT.
27
- 2. 1: The ANSWER is logically false from the information contained in the CONTEXT.
28
- 3. an integer score between 1 and 5 and if such integer score does not exist, use 1: It is not possible to determine whether the ANSWER is true or false without further information. Read the passage of information thoroughly and select the correct answer from the three answer labels. Read the CONTEXT thoroughly to ensure you know what the CONTEXT entails. Note the ANSWER is generated by a computer system, it can contain certain symbols, which should not be a negative factor in the evaluation.
29
- Independent Examples:
30
- ## Example Task #1 Input:
31
- {"CONTEXT": "Some are reported as not having been wanted at all.", "QUESTION": "", "ANSWER": "All are reported as being completely and fully wanted."}
32
- ## Example Task #1 Output:
33
- 1
34
- ## Example Task #2 Input:
35
- {"CONTEXT": "Ten new television shows appeared during the month of September. Five of the shows were sitcoms, three were hourlong dramas, and two were news-magazine shows. By January, only seven of these new shows were still on the air. Five of the shows that remained were sitcoms.", "QUESTION": "", "ANSWER": "At least one of the shows that were cancelled was an hourlong drama."}
36
- ## Example Task #2 Output:
37
- 5
38
- ## Example Task #3 Input:
39
- {"CONTEXT": "In Quebec, an allophone is a resident, usually an immigrant, whose mother tongue or home language is neither French nor English.", "QUESTION": "", "ANSWER": "In Quebec, an allophone is a resident, usually an immigrant, whose mother tongue or home language is not French."}
40
- ## Example Task #3 Output:
41
- 5
42
- ## Example Task #4 Input:
43
- {"CONTEXT": "Some are reported as not having been wanted at all.", "QUESTION": "", "ANSWER": "All are reported as being completely and fully wanted."}
44
- ## Example Task #4 Output:
45
- 1
46
- ## Actual Task Input:
47
- {"CONTEXT": {{context}}, "QUESTION": "", "ANSWER": {{response}}}
48
- Reminder: The return values for each task should be correctly formatted as an integer between 1 and 5. Do not repeat the context and question.
49
- Actual Task Output:
@@ -1,106 +0,0 @@
1
- azure/ai/evaluation/__init__.py,sha256=rS_yFLTL3_XpRQ2hNHeLB0To8tIfJd0NyKxxxTyBxm4,1977
2
- azure/ai/evaluation/_constants.py,sha256=kcorrWvQbWyugt6hN2jQ9DsL9MegJEr-ecl2XBXmDw0,1990
3
- azure/ai/evaluation/_exceptions.py,sha256=WYOml83XAAq4lPWi1g0kirW29ZYDkIiU--NVJ5l8SLI,4318
4
- azure/ai/evaluation/_http_utils.py,sha256=oVbRaxUm41tVFGkYpZdHjT9ss_9va1NzXYuV3DUVr8k,17125
5
- azure/ai/evaluation/_model_configurations.py,sha256=YmpopzIdPKxIVLhV6yHlo9mRXRMqF-aJhjQB83LxT14,1882
6
- azure/ai/evaluation/_user_agent.py,sha256=O2y-QPBAcw7w7qQ6M2aRPC3Vy3TKd789u5lcs2yuFaI,290
7
- azure/ai/evaluation/_version.py,sha256=DWZYhJb1k3CCBC_y7I7JrcP2TygZpzCHwZ5OWb6Vo44,201
8
- azure/ai/evaluation/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- azure/ai/evaluation/_common/__init__.py,sha256=LHTkf6dMLLxikrGNgbUuREBVQcs4ORHR6Eryo4bm9M8,586
10
- azure/ai/evaluation/_common/constants.py,sha256=pzXfC8Z2P36bCcUlz5sX4yGYZTt0JHjAi3auuiTd8Ww,1779
11
- azure/ai/evaluation/_common/math.py,sha256=Dp0jgN3PMoJUTYXD37c7FyFMGZy2vyhUVXUHDHKYtb4,473
12
- azure/ai/evaluation/_common/rai_service.py,sha256=6lDd_-qPqqLZPRy_RKV3qJkzcINu_btBanRwok6WGUc,17191
13
- azure/ai/evaluation/_common/utils.py,sha256=I9zMI7gbAjNJ1ITy9HISzJQSdYu1Ba2mYsZP2qnPQ9M,10867
14
- azure/ai/evaluation/_evaluate/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
15
- azure/ai/evaluation/_evaluate/_eval_run.py,sha256=cFFvKolYZPAA0lZn6QS5D_FDK0tizRPba0xvhIeuYkw,21490
16
- azure/ai/evaluation/_evaluate/_evaluate.py,sha256=c_0BeKk5ortPoazcRmBtJwSr_6Ov5MOFZ0qJo1J9CBE,32196
17
- azure/ai/evaluation/_evaluate/_utils.py,sha256=fjnBlWER3XqHKg38vNauGwQj9ZkP_Ln_J1OQ5Kmnj5I,10563
18
- azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py,sha256=BkxhojWca3e2QM3hFwO2xrLiiQ0i-3f8wsMfOx1zchs,361
19
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py,sha256=AeZoEQK4IPXceJJBShaFvGMeO2ith1pUl8TiPE-xti4,3214
20
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py,sha256=XQLaXfswF6ReHLpQthHLuLLa65Pts8uawGp7kRqmMDs,8260
21
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py,sha256=XkSIjtFge586LI2EqdFRQcqwdghlru5N49-IGXz84SU,3234
22
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py,sha256=84QK8EHFnv3vT25BlbvGVog0pmSmh7ntQAlXGE7KNP8,6947
23
- azure/ai/evaluation/_evaluators/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
24
- azure/ai/evaluation/_evaluators/_bleu/__init__.py,sha256=quKKO0kvOSkky5hcoNBvgBuMeeVRFCE9GSv70mAdGP4,260
25
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py,sha256=6EJCG9DnL2Y4pU_vhY4o3UOrumvI-6HI92tzEuCoyXk,2413
26
- azure/ai/evaluation/_evaluators/_coherence/__init__.py,sha256=GRqcSCQse02Spyki0UsRNWMIXiea2lLtPPXNGvkJzQ0,258
27
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py,sha256=g9Cmxg3kRsd_ORLv1xLBmHsHzTpP6UNqRfPNSc85yUI,2526
28
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty,sha256=_GXYhAH04tsl2qntZH5ACx7gFNfUeQ0hZQpOmDoLPNc,2549
29
- azure/ai/evaluation/_evaluators/_common/__init__.py,sha256=_hPqTkAla_O6s4ebVtTaBrVLEW3KSdDz66WwxjK50cI,423
30
- azure/ai/evaluation/_evaluators/_common/_base_eval.py,sha256=luidfzVC2dQE567N3G5XmO3Ir0ZOgAJxhwLL4pkC2Hk,15178
31
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py,sha256=1DsJJIEH_LG21KWPl5HIIs2_yNCb9MrJ-z4ietBML8I,3109
32
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py,sha256=IDkX6RgsaXVJxeB8faWuK3-PJTrbu5xZEzpxv5YrJUI,4410
33
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py,sha256=PEYMIybfP64f7byhuTaiq4RiqsYbjqejpW1JsJIG1jA,556
34
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py,sha256=yNBXq_n-HTmprclLWZis6hH5FUqtY8umXQUWwI8gMSg,3857
35
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py,sha256=e4Xp3jEj7j-MjfjpE2fPWPlxJmH6ELmoPPA2XSyZCJM,12895
36
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py,sha256=pKTT_kaeBcoOH4HW17ZejaPsEJ5KF76TVHD4zSy01Mk,1805
37
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py,sha256=3znxFvwf6MYPaqpOtvzqllE3Gtbke8oiLHOhk1iCw24,1759
38
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py,sha256=2zpKfo3C_HRpugKLdjmH7ExNkDAtokG7MmfSZxk_6F0,1737
39
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py,sha256=xaB5NvaoqUPaVg4m7JFH1dH4K4idpMeFlWfC_P7deCA,1753
40
- azure/ai/evaluation/_evaluators/_eci/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- azure/ai/evaluation/_evaluators/_eci/_eci.py,sha256=QSyayeMCQj3IH_ZpllgsEx3OFI8Ir33dNCfMFNneBBc,2360
42
- azure/ai/evaluation/_evaluators/_f1_score/__init__.py,sha256=aEVbO7iMoF20obdpLQKcKm69Yyu3mYnblKELLqu8OGI,260
43
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py,sha256=KeYL4Z7cO0Yb_pOAq-3WePUgSqNnci0uA3AH2r41VB4,4786
44
- azure/ai/evaluation/_evaluators/_fluency/__init__.py,sha256=EEJw39xRa0bOAA1rELTTKXQu2s60n_7CZQRD0Gu2QVw,259
45
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py,sha256=Q7HDWaI7lm9tYL6Y4_IUz7-EljJYpDwEu5GQXkXcJqY,2473
46
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty,sha256=xdznyssZDQiLELv4ecC-8uUJ4ssM-iij7A6S1aDsxOQ,2403
47
- azure/ai/evaluation/_evaluators/_gleu/__init__.py,sha256=Ae2EvQ7gqiYAoNO3LwGIhdAAjJPJDfT85rQGKrRrmbA,260
48
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py,sha256=m02wmIGjdoXjp9dwjnFQAKA8hGOUOTvpppDf2CD4QQo,2326
49
- azure/ai/evaluation/_evaluators/_groundedness/__init__.py,sha256=UYNJUeRvBwcSVFyZpdsf29un5eyaDzYoo3QvC1gvlLg,274
50
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py,sha256=Em51FLqq1KqUJRSEJVOMJt4OSX79sv93DOoWp555ReM,2696
51
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty,sha256=ylgxKa_xipb7wN_QwxSnjrD9AhKcJQCv8pPpWPwFfGg,3023
52
- azure/ai/evaluation/_evaluators/_meteor/__init__.py,sha256=209na3pPsdmcuYpYHUYtqQybCpc3yZkc93HnRdicSlI,266
53
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py,sha256=K3EdRuRcuEZYVIlI2jMEp0O9KJYXQB2o6h08q43oKWY,3316
54
- azure/ai/evaluation/_evaluators/_protected_material/__init__.py,sha256=eRAQIU9diVXfO5bp6aLWxZoYUvOsrDIfy1gnDOeNTiI,109
55
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py,sha256=cCyVyr0xmHAYO91VOWZGPksSCc0r1bBx1zhYCrQRwgw,2066
56
- azure/ai/evaluation/_evaluators/_qa/__init__.py,sha256=bcXfT--C0hjym2haqd1B2-u9bDciyM0ThOFtU1Q69sk,244
57
- azure/ai/evaluation/_evaluators/_qa/_qa.py,sha256=F-LKcdG-WHktNSQioF7tce9u8QyUIOlBvjIMtMcqGgU,3611
58
- azure/ai/evaluation/_evaluators/_relevance/__init__.py,sha256=JlxytW32Nl8pbE-fI3GRpfgVuY9EG6zxIAn5VZGSwyc,265
59
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py,sha256=Mn2_XYjhWBpYIErVnhjB_tsM8xJ1K16cXKk1qVocHak,2948
60
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty,sha256=QNWlrWxObUPlXFF1hdCDVpfXuw0QDOxHUtWLj1MwrxA,3559
61
- azure/ai/evaluation/_evaluators/_retrieval/__init__.py,sha256=kMu47ZyTZ7f-4Yh6H3KHxswmxitmPJ8FPSk90qgR0XI,265
62
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py,sha256=XsUl4WzTtFu9O6j4XCXFQOjGEQzrKqqxvDP-ChMsE04,5453
63
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty,sha256=HbQu5Gy9Ghw9r8vGCF-4ui441JBD8w45NOU_9ehamd0,1585
64
- azure/ai/evaluation/_evaluators/_rouge/__init__.py,sha256=kusCDaYcXogDugGefRP8MQSn9xv107oDbrMCqZ6K4GA,291
65
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py,sha256=T__Qt2lC8-DqhlgMvPY10g6sC5svY6oqmbZUerwxbZw,3554
66
- azure/ai/evaluation/_evaluators/_similarity/__init__.py,sha256=V2Mspog99_WBltxTkRHG5NpN5s9XoiTSN4I8POWEkLA,268
67
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py,sha256=rZVBI7e0D52gp556RVXW32eoT1NLOSrpJiqr_WyM2bk,4530
68
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty,sha256=eoludASychZoGL625bFCaZai-OY7DIAg90ZLax_o4XE,4594
69
- azure/ai/evaluation/_evaluators/_xpia/__init__.py,sha256=VMEL8WrpJQeh4sQiOLzP7hRFPnjzsvwfvTzaGCVJPCM,88
70
- azure/ai/evaluation/_evaluators/_xpia/xpia.py,sha256=6ALICg2iCBYQWUOfRwjxFJtiDbI0Vc1FsP5lXgnv5Yo,2504
71
- azure/ai/evaluation/_vendor/__init__.py,sha256=Yx1Iq2GNKQ5lYxTotvPwkPL4u0cm6YVxUe-iVbu1clI,180
72
- azure/ai/evaluation/_vendor/rouge_score/__init__.py,sha256=03OkyfS_UmzRnHv6-z9juTaJ6OXJoEJM989hgifIZbc,607
73
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py,sha256=xDdNtzwtivcdki5RyErEI9BaQ7nksgj4bXYrGz7tLLs,11409
74
- azure/ai/evaluation/_vendor/rouge_score/scoring.py,sha256=ruwkMrJFJNvs3GWqVLAXudIwDa4EsX_d30pfUPUTf8E,1988
75
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py,sha256=tdSsUibKxtOMY8fdqGK_3-4sMbeOxZEG6D6L7suDTxQ,1936
76
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py,sha256=3_-y1TyvyluHuERhSJ5CdXSwnpcMA7aAKU6PCz9wH_Q,1745
77
- azure/ai/evaluation/simulator/__init__.py,sha256=UtlcXo3SteIQEW_hW2WMhtqLNiDiIGLeW_lIkEUNoMc,486
78
- azure/ai/evaluation/simulator/_adversarial_scenario.py,sha256=SxpyMw5wmM5-fiUjl1_oJH0GQEnsa7ASso10MAr2Hjw,1030
79
- azure/ai/evaluation/simulator/_adversarial_simulator.py,sha256=exXUWG-WcXQxHi630VlKaRRNm--S060UHuiJgDPOrQ0,21024
80
- azure/ai/evaluation/simulator/_constants.py,sha256=xM-Or2x7RytfoeBM3N7Vt4JQDJX66UdL3CPz0YN5rvE,485
81
- azure/ai/evaluation/simulator/_direct_attack_simulator.py,sha256=VtnJeddwqornM1VUiKKbD93Be57m7v7LrAwWik5yCy0,9733
82
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py,sha256=f2MA5jIROdTmm2C_mcDO8jasDuMiED0Re3r9ZXQNkbk,7712
83
- azure/ai/evaluation/simulator/_simulator.py,sha256=QS_4BF1hQuO--ZJhnCO-24mv_5-2aXVZ3k4krJR_CFE,32577
84
- azure/ai/evaluation/simulator/_tracing.py,sha256=frZ4-usrzINast9F4-ONRzEGGox71y8bYw0UHNufL1Y,3069
85
- azure/ai/evaluation/simulator/_utils.py,sha256=aXH5GdzQrwluKvYofWtdT0s_nzgVHS2hP6x4rc5zt-E,4287
86
- azure/ai/evaluation/simulator/_conversation/__init__.py,sha256=ulkkJkvRBRROLp_wpAKy1J-HLMJi3Yq6g7Q6VGRuD88,12914
87
- azure/ai/evaluation/simulator/_conversation/_conversation.py,sha256=vzKdpItmUjZrM5OUSkS2UkTnLnKvIzhak5hZ8xvFwnU,7403
88
- azure/ai/evaluation/simulator/_conversation/constants.py,sha256=3v7zkjPwJAPbSpJYIK6VOZZy70bJXMo_QTVqSFGlq9A,984
89
- azure/ai/evaluation/simulator/_helpers/__init__.py,sha256=YTwBf9B_uWGZSbS5vDBde4UpFszxzi3hSlcPtZ4Slcg,259
90
- azure/ai/evaluation/simulator/_helpers/_experimental.py,sha256=hmr9l9hHFNj6iEmBuMawdnnl54YzJrylbB7Dk6cs7cM,5565
91
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py,sha256=7BBLH78b7YDelHDLbAIwf-IO9s9cAEtn-RRXmNReHdc,1017
92
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py,sha256=fumMJYPLPv31KDgVC8A2fNjhLNREMgb1GFJUDv75Vgg,2193
93
- azure/ai/evaluation/simulator/_model_tools/__init__.py,sha256=aMv5apb7uVjuhMF9ohhA5kQmo652hrGIJlhdl3y2R1I,835
94
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py,sha256=aRqLy1mcgLo5_1DJ6BiNSjo1xv8D-TL0Ya3HBjGKZ80,6303
95
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py,sha256=Zg_SzqjCGJ3Wt8hktxz6Y1JEJCcV0V5jBC9N06jQP3k,8984
96
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py,sha256=Bi0tLNlJmz295mdoVaE9_6a_UJVRmCH5uAmxjslS_eQ,7037
97
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py,sha256=FGKLsWL0FZry47ZxFi53FSem8PZmh0iIy3JN4PBg5Tg,7036
98
- azure/ai/evaluation/simulator/_model_tools/models.py,sha256=bfVm0PV3vfH_8DkdmTMZqYVN-G51hZ6Y0TOO-NiysJY,21811
99
- azure/ai/evaluation/simulator/_prompty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty,sha256=8oklGV7YGQE79bB5gV0AXdHyWcFtgXRCpthyikVF6kw,2174
101
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty,sha256=q-KshSHNcKxoF4eHelxzIMS3PGKoXQM6_UExOzlvXbk,793
102
- azure_ai_evaluation-1.0.0b4.dist-info/METADATA,sha256=Jffb9zCh3grSD_UjeM40ZiEgq6u6U_IKDxNz_34Q_Lw,18615
103
- azure_ai_evaluation-1.0.0b4.dist-info/NOTICE.txt,sha256=o9xBInKH4j22mM8VfF4mmMniV5Jz1Le1d7D3M7V5W2Y,1924
104
- azure_ai_evaluation-1.0.0b4.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
105
- azure_ai_evaluation-1.0.0b4.dist-info/top_level.txt,sha256=S7DhWV9m80TBzAhOFjxDUiNbKszzoThbnrSz5MpbHSQ,6
106
- azure_ai_evaluation-1.0.0b4.dist-info/RECORD,,