azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (150) hide show
  1. azure/ai/evaluation/__init__.py +9 -16
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +66 -0
  5. azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
  6. azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
  7. azure/ai/evaluation/_azure/_clients.py +4 -4
  8. azure/ai/evaluation/_azure/_envs.py +208 -0
  9. azure/ai/evaluation/_azure/_token_manager.py +12 -7
  10. azure/ai/evaluation/_common/__init__.py +5 -0
  11. azure/ai/evaluation/_common/evaluation_onedp_client.py +118 -0
  12. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  13. azure/ai/evaluation/_common/onedp/_client.py +139 -0
  14. azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
  15. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  16. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  17. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  18. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  19. azure/ai/evaluation/_common/onedp/_validation.py +50 -0
  20. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  21. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  22. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  23. azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
  24. azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
  25. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  26. azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
  27. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
  28. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
  29. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  30. azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
  31. azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
  32. azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
  33. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
  35. azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
  36. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  38. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  39. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  40. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  41. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  42. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  43. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  44. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  54. azure/ai/evaluation/_common/rai_service.py +159 -29
  55. azure/ai/evaluation/_common/raiclient/_version.py +1 -1
  56. azure/ai/evaluation/_common/utils.py +80 -2
  57. azure/ai/evaluation/_constants.py +16 -0
  58. azure/ai/evaluation/_converters/__init__.py +1 -1
  59. azure/ai/evaluation/_converters/_ai_services.py +4 -4
  60. azure/ai/evaluation/_eval_mapping.py +71 -0
  61. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
  62. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  63. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +17 -4
  64. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
  65. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
  66. azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
  67. azure/ai/evaluation/_evaluate/_evaluate.py +372 -105
  68. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +534 -0
  69. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
  70. azure/ai/evaluation/_evaluate/_utils.py +120 -7
  71. azure/ai/evaluation/_evaluators/_common/_base_eval.py +9 -4
  72. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
  73. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
  74. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
  75. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
  76. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +467 -0
  77. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +1 -1
  78. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +2 -2
  79. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +6 -2
  80. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +1 -1
  81. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +8 -2
  82. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
  83. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +1 -1
  84. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +5 -2
  85. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +6 -2
  86. azure/ai/evaluation/_exceptions.py +2 -0
  87. azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
  88. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  89. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  90. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  91. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  92. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  93. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  94. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  95. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  96. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  97. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  98. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  99. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
  100. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
  101. azure/ai/evaluation/_legacy/_batch_engine/_result.py +7 -1
  102. azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
  103. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
  104. azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
  105. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
  106. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
  107. azure/ai/evaluation/{_red_team/_utils → _legacy/_common}/__init__.py +1 -1
  108. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
  109. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
  110. azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
  111. azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
  112. azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
  113. azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
  114. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +90 -17
  115. azure/ai/evaluation/_version.py +1 -1
  116. azure/ai/evaluation/red_team/__init__.py +19 -0
  117. azure/ai/evaluation/{_red_team → red_team}/_attack_objective_generator.py +3 -0
  118. azure/ai/evaluation/{_red_team → red_team}/_attack_strategy.py +4 -1
  119. azure/ai/evaluation/{_red_team → red_team}/_red_team.py +885 -481
  120. azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  121. azure/ai/evaluation/{_red_team → red_team}/_utils/constants.py +2 -1
  122. azure/ai/evaluation/{_red_team → red_team}/_utils/formatting_utils.py +23 -22
  123. azure/ai/evaluation/{_red_team → red_team}/_utils/logging_utils.py +1 -1
  124. azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
  125. azure/ai/evaluation/{_red_team → red_team}/_utils/strategy_utils.py +9 -5
  126. azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
  127. azure/ai/evaluation/simulator/_constants.py +1 -0
  128. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
  129. azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
  130. azure/ai/evaluation/simulator/_direct_attack_simulator.py +35 -22
  131. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  132. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +40 -25
  133. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  134. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +24 -18
  135. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
  136. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
  137. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +9 -5
  138. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  139. azure/ai/evaluation/simulator/_simulator.py +1 -1
  140. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/METADATA +36 -2
  141. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/RECORD +148 -80
  142. azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
  143. azure/ai/evaluation/simulator/_tracing.py +0 -89
  144. /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
  145. /azure/ai/evaluation/{_red_team → red_team}/_callback_chat_target.py +0 -0
  146. /azure/ai/evaluation/{_red_team → red_team}/_default_converter.py +0 -0
  147. /azure/ai/evaluation/{_red_team → red_team/_utils}/__init__.py +0 -0
  148. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/NOTICE.txt +0 -0
  149. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/WHEEL +0 -0
  150. {azure_ai_evaluation-1.4.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/top_level.txt +0 -0
@@ -22,65 +22,51 @@ inputs:
22
22
  ---
23
23
  system:
24
24
  # Instruction
25
- ## Context
26
- ### You are an expert in evaluating the quality of an answer from an intelligent system based on provided definitions and data. Your goal will involve answering the questions below using the information provided.
27
- - **Definition**: You are given a definition of the response quality that is being evaluated to help guide your Score.
28
- - **Data**: Your input data include a response and its ground truth.
29
- - **Questions**: To complete your evaluation you will be asked to evaluate the Data in different ways.
30
-
25
+ ## Goal
26
+ ### You are an expert in evaluating the quality of a Response from an intelligent system based on provided definition and data. Your goal will involve answering the questions below using the information provided.
27
+ - **Definition**: You are given a definition of the communication trait that is being evaluated to help guide your Score.
28
+ - **Data**: Your input data include Response and Ground Truth.
29
+ - **Tasks**: To complete your evaluation you will be asked to evaluate the Data in different ways.
31
30
 
31
+ user:
32
32
  # Definition
33
+ **Completeness** refers to how accurately and thoroughly a response represents the information provided in the ground truth. It considers both the inclusion of all relevant statements and the correctness of those statements. Each statement in the ground truth should be evaluated individually to determine if it is accurately reflected in the response without missing any key information. The scale ranges from 1 to 5, with higher numbers indicating greater completeness.
33
34
 
34
- **Level 1: Fully incomplete**
35
-
36
- **Definition:**
37
- A response is considered fully incomplete if it does not contain any the necessary and relevant information with respect to the ground truth. In other words, it completely misses all the information - especially claims and statements - established in the ground truth.
35
+ # Ratings
36
+ ## [Completeness: 1] (Fully Incomplete)
37
+ **Definition:** A response that does not contain any of the necessary and relevant information with respect to the ground truth. It completely misses all the information, especially claims and statements, established in the ground truth.
38
38
 
39
39
  **Examples:**
40
- 1. **Response:** "Flu shot cannot cure cancer. Stay healthy requires sleeping exactly 8 hours a day. A few hours of exercise per week will have little benefits for physical and mental health. Physical and mental health benefits are separate topics. Scientists have not studied any of them."
41
- **Ground Truth:** "Flu shot can prevent flu-related illnesses. Staying healthy requires proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
42
-
43
-
44
- **Level 2: Barely complete**
40
+ **Response:** "Flu shot cannot cure cancer. Stay healthy requires sleeping exactly 8 hours a day. A few hours of exercise per week will have little benefits for physical and mental health. Physical and mental health benefits are separate topics. Scientists have not studied any of them."
41
+ **Ground Truth:** "Flu shot can prevent flu-related illnesses. Staying healthy requires proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
45
42
 
46
- **Definition:**
47
- A response is considered barely complete if it only contains a small percentage of all the necessary and relevant information with respect to the ground truth. In other words, it misses almost all the information - especially claims and statements - established in the ground truth.
43
+ ## [Completeness: 2] (Barely Complete)
44
+ **Definition:** A response that contains only a small percentage of all the necessary and relevant information with respect to the ground truth. It misses almost all the information, especially claims and statements, established in the ground truth.
48
45
 
49
46
  **Examples:**
50
- 1. **Response:** "Flu shot can prevent flu-related illnesses. Staying healthy requires 2 meals a day. Exercise per week makes not difference to physical and mental health. This is because physical and mental health benefits have low correlation through scientific studies. Scientists are making this observation in studies."
51
- **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
47
+ **Response:** "Flu shot can prevent flu-related illnesses. Staying healthy requires 2 meals a day. Exercise per week makes no difference to physical and mental health. This is because physical and mental health benefits have low correlation through scientific studies. Scientists are making this observation in studies."
48
+ **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
52
49
 
53
-
54
- **Level 3: Moderately complete**
55
-
56
- **Definition:**
57
- A response is considered moderately complete if it contains half of the necessary and relevant information with respect to the ground truth. In other words, it miss half of the information - especially claims and statements - established in the ground truth.
50
+ ## [Completeness: 3] (Moderately Complete)
51
+ **Definition:** A response that contains half of the necessary and relevant information with respect to the ground truth. It misses half of the information, especially claims and statements, established in the ground truth.
58
52
 
59
53
  **Examples:**
60
- 1. **Response:** "Flu shot can prevent flu-related illnesses. Staying healthy requires a few dollar of investments a day. Even a few dollars of investments per week will not make an impact on physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Fiction writers are starting to discover them through their works."
61
- **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
62
-
63
-
64
- **Level 4: Mostly complete**
65
-
66
- **Definition:**
67
- A response is considered mostly complete if it contains most of the necessary and relevant information with respect to the ground truth. In other words, it misses some minor information - especially claims and statements - established in the ground truth.
54
+ **Response:** "Flu shot can prevent flu-related illnesses. Staying healthy requires a few dollars of investments a day. Even a few dollars of investments per week will not make an impact on physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Fiction writers are starting to discover them through their works."
55
+ **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
68
56
 
57
+ ## [Completeness: 4] (Mostly Complete)
58
+ **Definition:** A response that contains most of the necessary and relevant information with respect to the ground truth. It misses some minor information, especially claims and statements, established in the ground truth.
69
59
 
70
60
  **Examples:**
71
- 1. **Response:** "Flu shot can prevent flu-related illnesses. Staying healthy requires keto diet and rigorous athletic training. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
72
- **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
73
-
61
+ **Response:** "Flu shot can prevent flu-related illnesses. Staying healthy requires keto diet and rigorous athletic training. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
62
+ **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
74
63
 
75
- **Level 5: Fully complete**
76
-
77
- **Definition:**
78
- A response is considered complete if it perfectly contains all the necessary and relevant information with respect to the ground truth. In other words, it does not miss any information from statements and claims in the ground truth.
64
+ ## [Completeness: 5] (Fully Complete)
65
+ **Definition:** A response that perfectly contains all the necessary and relevant information with respect to the ground truth. It does not miss any information from statements and claims in the ground truth.
79
66
 
80
67
  **Examples:**
81
- 1. **Response:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
82
- **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
83
-
68
+ **Response:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
69
+ **Ground Truth:** "Flu shot can prevent flu-related illnesses. Stay healthy by proper hydration and moderate exercise. Even a few hours of exercise per week can have long-term benefits for physical and mental health. This is because physical and mental health benefits have intricate relationships through behavioral changes. Scientists are starting to discover them through rigorous studies."
84
70
 
85
71
 
86
72
  # Data
@@ -89,11 +75,10 @@ Ground Truth: {{ground_truth}}
89
75
 
90
76
 
91
77
  # Tasks
92
- ## Please provide your assessment Score for the previous answer. Your output should include the following information:
93
- - **ThoughtChain**: To improve the reasoning process, Think Step by Step and include a step-by-step explanation of your thought process as you analyze the data based on the definitions. Keep it brief and Start your ThoughtChain with "Let's think step by step:".
78
+ ## Please provide your assessment Score for the previous RESPONSE in relation to the GROUND TRUTH based on the Definitions above. Your output should include the following information:
79
+ - **ThoughtChain**: To improve the reasoning process, think step by step and include a step-by-step explanation of your thought process as you analyze the data based on the definitions. Keep it brief and start your ThoughtChain with "Let's think step by step:".
94
80
  - **Explanation**: a very short explanation of why you think the input data should get that Score.
95
- - **Score**: based on your previous analysis, provide your Score. The answer you give MUST be a integer score ("1", "2", ...) based on the categories of the definitions.
96
-
81
+ - **Score**: based on your previous analysis, provide your Score. The Score you give MUST be an integer score (i.e., "1", "2"...) based on the levels of the definitions.
97
82
 
98
83
  ## Please provide your answers between the tags: <S0>your chain of thoughts</S0>, <S1>your explanation</S1>, <S2>your score</S2>.
99
84
  # Output
@@ -28,7 +28,7 @@ class SimilarityEvaluator(PromptyEvaluatorBase):
28
28
  :param model_config: Configuration for the Azure OpenAI model.
29
29
  :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
30
30
  ~azure.ai.evaluation.OpenAIModelConfiguration]
31
- :param threshold: The threshold for the similarity evaluator. Default is 5.
31
+ :param threshold: The threshold for the similarity evaluator. Default is 3.
32
32
  :type threshold: int
33
33
 
34
34
  .. admonition:: Example:
@@ -54,11 +54,14 @@ class TaskAdherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
54
54
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
55
55
 
56
56
  @override
57
- def __init__(self, model_config, *, threshold=_DEFAULT_TASK_ADHERENCE_SCORE):
57
+ def __init__(self, model_config, *, threshold=_DEFAULT_TASK_ADHERENCE_SCORE,
58
+ **kwargs):
58
59
  current_dir = os.path.dirname(__file__)
59
60
  prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
60
61
  self.threshold = threshold
61
- super().__init__(model_config=model_config, prompty_file=prompty_path, result_key=self._RESULT_KEY)
62
+ super().__init__(model_config=model_config, prompty_file=prompty_path,
63
+ result_key=self._RESULT_KEY,
64
+ **kwargs)
62
65
 
63
66
  @overload
64
67
  def __call__(
@@ -64,11 +64,15 @@ class ToolCallAccuracyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
64
64
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
65
65
 
66
66
  @override
67
- def __init__(self, model_config, *, threshold=_DEFAULT_TOOL_CALL_ACCURACY_SCORE):
67
+ def __init__(self, model_config, *,
68
+ threshold=_DEFAULT_TOOL_CALL_ACCURACY_SCORE,
69
+ **kwargs):
68
70
  current_dir = os.path.dirname(__file__)
69
71
  prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
70
72
  self.threshold = threshold
71
- super().__init__(model_config=model_config, prompty_file=prompty_path, result_key=self._RESULT_KEY)
73
+ super().__init__(model_config=model_config, prompty_file=prompty_path,
74
+ result_key=self._RESULT_KEY,
75
+ **kwargs)
72
76
 
73
77
  @overload
74
78
  def __call__(
@@ -38,6 +38,7 @@ class ErrorCategory(Enum):
38
38
  FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
39
39
  PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
40
40
  UNKNOWN = "UNKNOWN"
41
+ UPLOAD_ERROR = "UPLOAD ERROR"
41
42
 
42
43
 
43
44
  class ErrorBlame(Enum):
@@ -85,6 +86,7 @@ class ErrorTarget(Enum):
85
86
  CONVERSATION = "Conversation"
86
87
  TOOL_CALL_ACCURACY_EVALUATOR = "ToolCallAccuracyEvaluator"
87
88
  RED_TEAM = "RedTeam"
89
+ AOAI_GRADER = "AoaiGrader"
88
90
 
89
91
 
90
92
  class EvaluationException(AzureError):
@@ -0,0 +1,7 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # NOTE: This contains adapters that make the Promptflow dependency optional. In the first phase,
6
+ # Promptflow will still be installed as part of the azure-ai-evaluation dependencies. This
7
+ # will be removed in the future once the code migration is complete.
@@ -0,0 +1,17 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Final
6
+
7
+
8
+ _has_legacy = False
9
+ try:
10
+ from promptflow._constants import FlowType
11
+
12
+ _has_legacy = True
13
+ except ImportError:
14
+ pass
15
+
16
+ HAS_LEGACY_SDK: Final[bool] = _has_legacy
17
+ MISSING_LEGACY_SDK: Final[bool] = not _has_legacy
@@ -0,0 +1,45 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from pathlib import Path
6
+ from typing import Any, Dict, Final, Optional
7
+ from typing_extensions import TypeAlias
8
+
9
+
10
+ try:
11
+ from promptflow._sdk._configuration import Configuration as _Configuration
12
+ except ImportError:
13
+ _global_config: Final[Dict[str, Any]] = {}
14
+
15
+ class _Configuration:
16
+ TRACE_DESTINATION: Final[str] = "trace.destination"
17
+ _instance = None
18
+
19
+ def __init__(self, *, override_config: Optional[Dict[str, Any]] = None) -> None:
20
+ self._config = override_config or {}
21
+
22
+ @classmethod
23
+ def get_instance(cls) -> "_Configuration":
24
+ """Use this to get instance to avoid multiple copies of same global config."""
25
+ if cls._instance is None:
26
+ cls._instance = Configuration(override_config=_global_config)
27
+ return cls._instance
28
+
29
+ def set_config(self, key: str, value: Any) -> None:
30
+ # Simulated config storage
31
+ self._config[key] = value
32
+
33
+ def get_config(self, key: str) -> Any:
34
+ # Simulated config storage
35
+ if key in self._config:
36
+ return self._config[key]
37
+ return _global_config.get(key, None)
38
+
39
+ def get_trace_destination(self, path: Optional[Path] = None) -> Optional[str]:
40
+ if path:
41
+ raise NotImplementedError("Setting trace destination with a path is not supported.")
42
+ return self._config.get("trace.destination", None)
43
+
44
+
45
+ Configuration: TypeAlias = _Configuration
@@ -0,0 +1,10 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Final
6
+
7
+
8
+ PF_FLOW_ENTRY_IN_TMP: Final[str] = "PF_FLOW_ENTRY_IN_TMP"
9
+ PF_FLOW_META_LOAD_IN_SUBPROCESS: Final[str] = "PF_FLOW_META_LOAD_IN_SUBPROCESS"
10
+ LINE_NUMBER: Final[str] = "line_number"
@@ -0,0 +1,29 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Any
6
+ from typing_extensions import TypeAlias
7
+
8
+
9
+ try:
10
+ from promptflow.core._errors import MissingRequiredPackage as _MissingRequiredPackage
11
+ except ImportError:
12
+ from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
13
+
14
+ class _MissingRequiredPackage(EvaluationException):
15
+ """Raised when a required package is missing.
16
+
17
+ :param message: A message describing the error. This is the error message the user will see.
18
+ :type message: str
19
+ """
20
+
21
+ def __init__(self, message: str, **kwargs: Any):
22
+ kwargs.setdefault("category", ErrorCategory.MISSING_PACKAGE)
23
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
24
+ kwargs.setdefault("target", ErrorTarget.EVALUATE)
25
+ kwargs.setdefault("internal_message", "Missing required package.")
26
+ super().__init__(message=message, **kwargs)
27
+
28
+
29
+ MissingRequiredPackage: TypeAlias = _MissingRequiredPackage
@@ -0,0 +1,28 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing_extensions import TypeAlias
6
+
7
+
8
+ try:
9
+ from promptflow.core._flow import AsyncPrompty as _AsyncPrompty
10
+ from promptflow._sdk.entities._flows import FlexFlow as _FlexFlow
11
+ from promptflow._sdk.entities._flows.dag import Flow as _Flow
12
+ except ImportError:
13
+ from azure.ai.evaluation._legacy.prompty import AsyncPrompty as _AsyncPrompty
14
+
15
+ class _FlexFlow:
16
+ pass
17
+
18
+ _FlexFlow.__name__ = "FlexFlow"
19
+
20
+ class _Flow:
21
+ name: str
22
+
23
+ _Flow.__name__ = "Flow"
24
+
25
+
26
+ AsyncPrompty: TypeAlias = _AsyncPrompty
27
+ FlexFlow: TypeAlias = _FlexFlow
28
+ Flow: TypeAlias = _Flow
@@ -0,0 +1,16 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Callable, Final
6
+
7
+
8
+ try:
9
+ from promptflow._cli._pf._service import stop_service as _stop_service
10
+ except ImportError:
11
+
12
+ def _stop_service() -> None:
13
+ pass
14
+
15
+
16
+ stop_service: Final[Callable[[], None]] = _stop_service
@@ -0,0 +1,51 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from os import PathLike
6
+ from typing import Any, Callable, Dict, Optional, Union
7
+ from typing_extensions import TypeAlias
8
+
9
+ import pandas as pd
10
+
11
+ from ._errors import MissingRequiredPackage
12
+ from ._configuration import Configuration
13
+ from .entities import Run
14
+
15
+
16
+ try:
17
+ from promptflow.client import PFClient as _PFClient
18
+ except ImportError:
19
+
20
+ class _PFClient:
21
+ def __init__(self, **kwargs):
22
+ self._config = Configuration(override_config=kwargs.pop("config", None))
23
+
24
+ def run(
25
+ self,
26
+ flow: Union[str, PathLike, Callable],
27
+ *,
28
+ data: Union[str, PathLike],
29
+ run: Optional[Union[str, Run]] = None,
30
+ column_mapping: Optional[dict] = None,
31
+ variant: Optional[str] = None,
32
+ connections: Optional[dict] = None,
33
+ environment_variables: Optional[dict] = None,
34
+ name: Optional[str] = None,
35
+ display_name: Optional[str] = None,
36
+ tags: Optional[Dict[str, str]] = None,
37
+ resume_from: Optional[Union[str, Run]] = None,
38
+ code: Optional[Union[str, PathLike]] = None,
39
+ init: Optional[dict] = None,
40
+ **kwargs,
41
+ ) -> Run:
42
+ raise MissingRequiredPackage("Please install 'promptflow' package to use PFClient")
43
+
44
+ def get_details(self, run: Union[str, Run], max_results: int = 100, all_results: bool = False) -> pd.DataFrame:
45
+ return pd.DataFrame()
46
+
47
+ def get_metrics(self, run: Union[str, Run]) -> Dict[str, Any]:
48
+ return {}
49
+
50
+
51
+ PFClient: TypeAlias = _PFClient
@@ -0,0 +1,26 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing_extensions import TypeAlias
6
+
7
+
8
+ try:
9
+ from promptflow._sdk.entities import Run as _Run
10
+ except ImportError:
11
+ from typing_extensions import Protocol
12
+ from typing import Any, Dict, Optional
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+
16
+ class _Run(Protocol):
17
+ name: str
18
+ status: str
19
+ _properties: Dict[str, Any]
20
+ _created_on: datetime
21
+ _end_time: Optional[datetime]
22
+ _experiment_name: Optional[str]
23
+ _output_path: Path
24
+
25
+
26
+ Run: TypeAlias = _Run
@@ -0,0 +1,28 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Callable, Dict, Final, Optional
6
+ from typing_extensions import TypeAlias
7
+
8
+
9
+ try:
10
+ from promptflow.tracing import ThreadPoolExecutorWithContext as _ThreadPoolExecutorWithContext
11
+ from promptflow.tracing._integrations._openai_injector import (
12
+ inject_openai_api as _inject,
13
+ recover_openai_api as _recover,
14
+ )
15
+ from promptflow.tracing import _start_trace
16
+ except ImportError:
17
+ from concurrent.futures import ThreadPoolExecutor as _ThreadPoolExecutorWithContext
18
+ from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
19
+ inject_openai_api as _inject,
20
+ recover_openai_api as _recover,
21
+ )
22
+ from azure.ai.evaluation._legacy._batch_engine._trace import start_trace as _start_trace
23
+
24
+
25
+ ThreadPoolExecutorWithContext: TypeAlias = _ThreadPoolExecutorWithContext
26
+ inject_openai_api: Final[Callable[[], None]] = _inject
27
+ recover_openai_api: Final[Callable[[], None]] = _recover
28
+ start_trace: Final = _start_trace
@@ -0,0 +1,15 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Any
6
+
7
+
8
+ class AttrDict(dict):
9
+ """A dictionary that allows attribute access to its keys."""
10
+
11
+ def __getattr__(self, key: str) -> Any:
12
+ return self[key]
13
+
14
+ def __setattr__(self, key: str, value: Any) -> None:
15
+ self[key] = value
@@ -0,0 +1,31 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from typing import Final, Optional
6
+ from typing_extensions import TypeAlias
7
+
8
+
9
+ try:
10
+ from promptflow._utils.user_agent_utils import ClientUserAgentUtil as _ClientUserAgentUtil
11
+ from promptflow._utils.async_utils import async_run_allowing_running_loop as _async_run_allowing_running_loop
12
+ from promptflow._cli._utils import get_workspace_triad_from_local as _get_workspace_triad_from_local
13
+ except ImportError:
14
+ from azure.ai.evaluation._legacy._batch_engine._utils_deprecated import (
15
+ async_run_allowing_running_loop as _async_run_allowing_running_loop,
16
+ )
17
+ from azure.ai.evaluation._evaluate._utils import AzureMLWorkspace
18
+
19
+ class _ClientUserAgentUtil:
20
+ @staticmethod
21
+ def append_user_agent(user_agent: Optional[str]):
22
+ # TODO ralphe: implement?
23
+ pass
24
+
25
+ def _get_workspace_triad_from_local() -> AzureMLWorkspace:
26
+ return AzureMLWorkspace("", "", "")
27
+
28
+
29
+ ClientUserAgentUtil: TypeAlias = _ClientUserAgentUtil
30
+ async_run_allowing_running_loop: Final = _async_run_allowing_running_loop
31
+ get_workspace_triad_from_local: Final = _get_workspace_triad_from_local