azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (142) hide show
  1. azure/ai/evaluation/__init__.py +51 -6
  2. azure/ai/evaluation/_aoai/__init__.py +1 -1
  3. azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
  4. azure/ai/evaluation/_aoai/label_grader.py +3 -2
  5. azure/ai/evaluation/_aoai/python_grader.py +84 -0
  6. azure/ai/evaluation/_aoai/score_model_grader.py +91 -0
  7. azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
  8. azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
  9. azure/ai/evaluation/_azure/_envs.py +9 -10
  10. azure/ai/evaluation/_azure/_token_manager.py +7 -1
  11. azure/ai/evaluation/_common/constants.py +11 -2
  12. azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
  13. azure/ai/evaluation/_common/onedp/__init__.py +32 -32
  14. azure/ai/evaluation/_common/onedp/_client.py +136 -139
  15. azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
  16. azure/ai/evaluation/_common/onedp/_patch.py +21 -21
  17. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  18. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  19. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  20. azure/ai/evaluation/_common/onedp/_validation.py +50 -50
  21. azure/ai/evaluation/_common/onedp/_version.py +9 -9
  22. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
  23. azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
  24. azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
  25. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -21
  26. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +37 -39
  27. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
  28. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -21
  29. azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
  30. azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
  31. azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
  32. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -21
  33. azure/ai/evaluation/_common/onedp/operations/__init__.py +37 -39
  34. azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5657
  35. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -21
  36. azure/ai/evaluation/_common/rai_service.py +88 -52
  37. azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
  38. azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
  39. azure/ai/evaluation/_common/utils.py +188 -10
  40. azure/ai/evaluation/_constants.py +2 -1
  41. azure/ai/evaluation/_converters/__init__.py +1 -1
  42. azure/ai/evaluation/_converters/_ai_services.py +9 -8
  43. azure/ai/evaluation/_converters/_models.py +46 -0
  44. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  45. azure/ai/evaluation/_eval_mapping.py +2 -2
  46. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +73 -25
  47. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
  48. azure/ai/evaluation/_evaluate/_evaluate.py +210 -94
  49. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +132 -89
  50. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
  51. azure/ai/evaluation/_evaluate/_utils.py +25 -17
  52. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +4 -4
  53. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +20 -12
  54. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +6 -6
  55. azure/ai/evaluation/_evaluators/_common/_base_eval.py +45 -11
  56. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
  57. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +24 -9
  58. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +28 -18
  59. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +11 -8
  60. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +11 -8
  61. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +12 -9
  62. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -7
  63. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
  64. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +37 -64
  65. azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
  66. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +5 -5
  67. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +3 -3
  68. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +4 -4
  69. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +12 -8
  70. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +31 -26
  71. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +210 -96
  72. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -4
  73. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +14 -7
  74. azure/ai/evaluation/_evaluators/_qa/_qa.py +5 -5
  75. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +62 -15
  76. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +140 -59
  77. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +21 -26
  78. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +5 -5
  79. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +22 -22
  80. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +7 -6
  81. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +4 -4
  82. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +27 -24
  83. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +354 -66
  84. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +175 -183
  85. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +99 -21
  86. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +20 -12
  87. azure/ai/evaluation/_evaluators/_xpia/xpia.py +10 -7
  88. azure/ai/evaluation/_exceptions.py +10 -0
  89. azure/ai/evaluation/_http_utils.py +3 -3
  90. azure/ai/evaluation/_legacy/_batch_engine/_config.py +6 -3
  91. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +117 -32
  92. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
  93. azure/ai/evaluation/_legacy/_batch_engine/_result.py +2 -0
  94. azure/ai/evaluation/_legacy/_batch_engine/_run.py +2 -2
  95. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +33 -41
  96. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
  97. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
  98. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
  99. azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
  100. azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
  101. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +195 -111
  102. azure/ai/evaluation/_user_agent.py +32 -1
  103. azure/ai/evaluation/_version.py +1 -1
  104. azure/ai/evaluation/red_team/__init__.py +3 -1
  105. azure/ai/evaluation/red_team/_agent/__init__.py +1 -1
  106. azure/ai/evaluation/red_team/_agent/_agent_functions.py +68 -71
  107. azure/ai/evaluation/red_team/_agent/_agent_tools.py +103 -145
  108. azure/ai/evaluation/red_team/_agent/_agent_utils.py +26 -6
  109. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +62 -71
  110. azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
  111. azure/ai/evaluation/red_team/_attack_strategy.py +2 -1
  112. azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
  113. azure/ai/evaluation/red_team/_default_converter.py +1 -1
  114. azure/ai/evaluation/red_team/_red_team.py +1947 -1040
  115. azure/ai/evaluation/red_team/_red_team_result.py +49 -38
  116. azure/ai/evaluation/red_team/_utils/__init__.py +1 -1
  117. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +39 -34
  118. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +163 -138
  119. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +14 -14
  120. azure/ai/evaluation/red_team/_utils/constants.py +1 -13
  121. azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
  122. azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
  123. azure/ai/evaluation/red_team/_utils/metric_mapping.py +31 -4
  124. azure/ai/evaluation/red_team/_utils/strategy_utils.py +33 -25
  125. azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
  126. azure/ai/evaluation/simulator/_adversarial_simulator.py +31 -17
  127. azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
  128. azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
  129. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +18 -6
  130. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -24
  131. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
  132. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +30 -10
  133. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
  134. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
  135. azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
  136. azure/ai/evaluation/simulator/_simulator.py +21 -8
  137. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/METADATA +46 -3
  138. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/RECORD +141 -136
  139. azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
  140. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/NOTICE.txt +0 -0
  141. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/WHEEL +0 -0
  142. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.10.0.dist-info}/top_level.txt +0 -0
@@ -8,27 +8,28 @@ from azure.ai.evaluation._common._experimental import experimental
8
8
  from azure.ai.evaluation._common.constants import EvaluationMetrics
9
9
  from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
10
10
 
11
+
11
12
  @experimental
12
13
  class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
13
14
  """
14
- Evaluates ungrounded inference of human attributes for a given query, response, and context for a single-turn evaluation only,
15
- where query represents the user query and response represents the AI system response given the provided context.
16
-
17
- Ungrounded Attributes checks for whether a response is first, ungrounded, and checks if it contains information about protected class or
15
+ Evaluates ungrounded inference of human attributes for a given query, response, and context for a single-turn evaluation only,
16
+ where query represents the user query and response represents the AI system response given the provided context.
17
+
18
+ Ungrounded Attributes checks for whether a response is first, ungrounded, and checks if it contains information about protected class or
18
19
  emotional state of a person.
19
20
 
20
21
 
21
22
  It identifies the following attributes:
22
-
23
+
23
24
  - emotional_state
24
25
  - protected_class
25
26
  - groundedness
26
27
 
27
28
  :param credential: The credential for connecting to Azure AI project. Required
28
29
  :type credential: ~azure.core.credentials.TokenCredential
29
- :param azure_ai_project: The scope of the Azure AI project.
30
- It contains subscription id, resource group, and project name.
31
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
30
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
31
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
32
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
32
33
  :param kwargs: Additional arguments to pass to the evaluator.
33
34
  :type kwargs: Any
34
35
 
@@ -42,13 +43,13 @@ class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
42
43
  :caption: Initialize and call a UngroundedAttributesEvaluator with a query, response and context.
43
44
 
44
45
  .. admonition:: Example using Azure AI Project URL:
45
-
46
+
46
47
  .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
47
48
  :start-after: [START ungrounded_attributes_evaluator]
48
49
  :end-before: [END ungrounded_attributes_evaluator]
49
50
  :language: python
50
51
  :dedent: 8
51
- :caption: Initialize and call UngroundedAttributesEvaluator using Azure AI Project URL in the following format
52
+ :caption: Initialize and call UngroundedAttributesEvaluator using Azure AI Project URL in the following format
52
53
  https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
53
54
 
54
55
  .. note::
@@ -57,19 +58,26 @@ class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
57
58
  for the ungrounded attributes will be "ungrounded_attributes_label".
58
59
  """
59
60
 
60
- id = "ungrounded_attributes"
61
+ id = "azureai://built-in/evaluators/ungrounded_attributes"
61
62
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
63
+ _OPTIONAL_PARAMS = ["query"]
62
64
 
63
65
  @override
64
66
  def __init__(
65
67
  self,
66
68
  credential,
67
69
  azure_ai_project,
70
+ **kwargs,
68
71
  ):
72
+ # Set default for evaluate_query if not provided
73
+ if "evaluate_query" not in kwargs:
74
+ kwargs["evaluate_query"] = True
75
+
69
76
  super().__init__(
70
77
  eval_metric=EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
71
78
  azure_ai_project=azure_ai_project,
72
79
  credential=credential,
80
+ **kwargs,
73
81
  )
74
82
 
75
83
  @overload
@@ -109,5 +117,5 @@ class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
109
117
  :return: The ungrounded attributes label.
110
118
  :rtype: Dict[str, Union[str, bool]]
111
119
  """
112
-
120
+
113
121
  return super().__call__(*args, **kwargs)
@@ -40,9 +40,9 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
40
40
 
41
41
  :param credential: The credential for connecting to Azure AI project. Required
42
42
  :type credential: ~azure.core.credentials.TokenCredential
43
- :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
44
- name.
45
- :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
43
+ :param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
44
+ or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
45
+ :type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
46
46
  :param threshold: The threshold for the IndirectAttack evaluator. Default is 0.
47
47
  :type threshold: int
48
48
 
@@ -54,32 +54,35 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
54
54
  :language: python
55
55
  :dedent: 8
56
56
  :caption: Initialize and call an IndirectAttackEvaluator.
57
-
57
+
58
58
  .. admonition:: Example using Azure AI Project URL:
59
-
59
+
60
60
  .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
61
61
  :start-after: [START indirect_attack_evaluator]
62
62
  :end-before: [END indirect_attack_evaluator]
63
63
  :language: python
64
64
  :dedent: 8
65
- :caption: Initialize and call IndirectAttackEvaluator using Azure AI Project URL in the following format
65
+ :caption: Initialize and call IndirectAttackEvaluator using Azure AI Project URL in the following format
66
66
  https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
67
67
 
68
68
  """
69
69
 
70
- id = "azureml://registries/azureml/models/Indirect-Attack-Evaluator/versions/3"
70
+ id = "azureai://built-in/evaluators/indirect_attack"
71
71
  """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
72
+ _OPTIONAL_PARAMS = ["query"]
72
73
 
73
74
  @override
74
75
  def __init__(
75
76
  self,
76
77
  credential,
77
78
  azure_ai_project,
79
+ **kwargs,
78
80
  ):
79
81
  super().__init__(
80
82
  eval_metric=EvaluationMetrics.XPIA,
81
83
  azure_ai_project=azure_ai_project,
82
84
  credential=credential,
85
+ **kwargs,
83
86
  )
84
87
 
85
88
  @overload
@@ -9,6 +9,15 @@ from typing import Optional
9
9
  from azure.core.exceptions import AzureError
10
10
 
11
11
 
12
+ class ErrorMessage(Enum):
13
+ """Error messages to be used when raising EvaluationException.
14
+
15
+ These messages are used to provide a consistent error message format across the SDK.
16
+ """
17
+
18
+ MALFORMED_CONVERSATION_HISTORY = "Malformed Conversation History: Query parameter representing conversation history should have exactly one more user query than agent responses"
19
+
20
+
12
21
  class ErrorCategory(Enum):
13
22
  """Error category to be specified when using EvaluationException class.
14
23
 
@@ -87,6 +96,7 @@ class ErrorTarget(Enum):
87
96
  TOOL_CALL_ACCURACY_EVALUATOR = "ToolCallAccuracyEvaluator"
88
97
  RED_TEAM = "RedTeam"
89
98
  AOAI_GRADER = "AoaiGrader"
99
+ CONVERSATION_HISTORY_PARSING = "_get_conversation_history"
90
100
 
91
101
 
92
102
  class EvaluationException(AzureError):
@@ -7,7 +7,7 @@ from typing import Any, Dict, MutableMapping, Optional, TypedDict, cast
7
7
 
8
8
  from typing_extensions import Self, Unpack
9
9
 
10
- from azure.ai.evaluation._user_agent import USER_AGENT
10
+ from azure.ai.evaluation._user_agent import UserAgentSingleton
11
11
  from azure.core.configuration import Configuration
12
12
  from azure.core.pipeline import AsyncPipeline, Pipeline
13
13
  from azure.core.pipeline.policies import (
@@ -454,7 +454,7 @@ def get_http_client(**kwargs: Any) -> HttpPipeline:
454
454
  :returns: An HttpPipeline with a set of applied policies:
455
455
  :rtype: HttpPipeline
456
456
  """
457
- kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=USER_AGENT))
457
+ kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=UserAgentSingleton().value))
458
458
  return HttpPipeline(**kwargs)
459
459
 
460
460
 
@@ -464,5 +464,5 @@ def get_async_http_client(**kwargs: Any) -> AsyncHttpPipeline:
464
464
  :returns: An AsyncHttpPipeline with a set of applied policies:
465
465
  :rtype: AsyncHttpPipeline
466
466
  """
467
- kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=USER_AGENT))
467
+ kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=UserAgentSingleton().value))
468
468
  return AsyncHttpPipeline(**kwargs)
@@ -19,7 +19,7 @@ class BatchEngineConfig:
19
19
  batch_timeout_seconds: int = PF_BATCH_TIMEOUT_SEC_DEFAULT
20
20
  """The maximum amount of time to wait for all evaluations in the batch to complete."""
21
21
 
22
- run_timeout_seconds: int = 600
22
+ line_timeout_seconds: int = 600
23
23
  """The maximum amount of time to wait for an evaluation to run against a single entry
24
24
  in the data input to complete."""
25
25
 
@@ -32,13 +32,16 @@ class BatchEngineConfig:
32
32
  default_num_results: int = 100
33
33
  """The default number of results to return if you don't ask for all results."""
34
34
 
35
+ raise_on_error: bool = True
36
+ """Whether to raise an error if an evaluation fails."""
37
+
35
38
  def __post_init__(self):
36
39
  if self.logger is None:
37
40
  raise ValueError("logger cannot be None")
38
41
  if self.batch_timeout_seconds <= 0:
39
42
  raise ValueError("batch_timeout_seconds must be greater than 0")
40
- if self.run_timeout_seconds <= 0:
41
- raise ValueError("run_timeout_seconds must be greater than 0")
43
+ if self.line_timeout_seconds <= 0:
44
+ raise ValueError("line_timeout_seconds must be greater than 0")
42
45
  if self.max_concurrency <= 0:
43
46
  raise ValueError("max_concurrency must be greater than 0")
44
47
  if self.default_num_results <= 0:
@@ -20,15 +20,31 @@ from concurrent.futures import Executor
20
20
  from functools import partial
21
21
  from contextlib import contextmanager
22
22
  from datetime import datetime, timezone
23
- from typing import Any, Callable, Dict, Final, Generator, Mapping, MutableMapping, Optional, Sequence, Set, Tuple, cast
23
+ from typing import (
24
+ Any,
25
+ Callable,
26
+ Dict,
27
+ Final,
28
+ Generator,
29
+ List,
30
+ Mapping,
31
+ MutableMapping,
32
+ Optional,
33
+ Sequence,
34
+ Set,
35
+ Tuple,
36
+ cast,
37
+ Literal,
38
+ )
24
39
  from uuid import uuid4
25
40
 
41
+ from ._config import BatchEngineConfig
26
42
  from ._utils import DEFAULTS_KEY, get_int_env_var, get_value_from_path, is_async_callable
27
43
  from ._status import BatchStatus
28
44
  from ._result import BatchResult, BatchRunDetails, BatchRunError, TokenMetrics
29
45
  from ._run_storage import AbstractRunStorage, NoOpRunStorage
30
- from .._common._logging import log_progress, NodeLogManager
31
- from ..._exceptions import ErrorBlame
46
+ from .._common._logging import log_progress, logger, NodeLogManager
47
+ from ..._exceptions import ErrorBlame, EvaluationException
32
48
  from ._exceptions import (
33
49
  BatchEngineCanceledError,
34
50
  BatchEngineError,
@@ -54,30 +70,25 @@ class BatchEngine:
54
70
  self,
55
71
  func: Callable,
56
72
  *,
73
+ config: BatchEngineConfig,
57
74
  storage: Optional[AbstractRunStorage] = None,
58
- batch_timeout_sec: Optional[int] = None,
59
- line_timeout_sec: Optional[int] = None,
60
- max_worker_count: Optional[int] = None,
61
75
  executor: Optional[Executor] = None,
62
76
  ):
63
77
  """Create a new batch engine instance
64
78
 
65
79
  :param Callable func: The function to run the flow
80
+ :param BatchEngineConfig config: The configuration for the batch engine
66
81
  :param Optional[AbstractRunStorage] storage: The storage to store execution results
67
- :param Optional[int] batch_timeout_sec: The timeout of batch run in seconds
68
- :param Optional[int] line_timeout_sec: The timeout of each line in seconds
69
- :param Optional[int] max_worker_count: The concurrency limit of batch run
70
82
  :param Optional[Executor] executor: The executor to run the flow (if needed)
71
83
  """
72
84
 
73
85
  self._func: Callable = func
86
+ self._config: BatchEngineConfig = config
74
87
  self._storage: AbstractRunStorage = storage or NoOpRunStorage()
75
88
 
76
- # TODO ralphe: Consume these from the batch context/config instead of from
77
- # kwargs or (even worse) environment variables
78
- self._batch_timeout_sec = batch_timeout_sec or get_int_env_var("PF_BATCH_TIMEOUT_SEC")
79
- self._line_timeout_sec = line_timeout_sec or get_int_env_var("PF_LINE_TIMEOUT_SEC", 600)
80
- self._max_worker_count = max_worker_count or get_int_env_var("PF_WORKER_COUNT") or MAX_WORKER_COUNT
89
+ self._batch_timeout_sec = self._config.batch_timeout_seconds
90
+ self._line_timeout_sec = self._config.line_timeout_seconds
91
+ self._max_worker_count = self._config.max_concurrency
81
92
 
82
93
  self._executor: Optional[Executor] = executor
83
94
  self._is_canceled: bool = False
@@ -85,15 +96,13 @@ class BatchEngine:
85
96
  async def run(
86
97
  self,
87
98
  data: Sequence[Mapping[str, Any]],
88
- column_mapping: Mapping[str, str],
99
+ column_mapping: Optional[Mapping[str, str]],
89
100
  *,
90
101
  id: Optional[str] = None,
91
102
  max_lines: Optional[int] = None,
92
103
  ) -> BatchResult:
93
104
  if not data:
94
105
  raise BatchEngineValidationError("Please provide a non-empty data mapping.")
95
- if not column_mapping:
96
- raise BatchEngineValidationError("The column mapping is required.")
97
106
 
98
107
  start_time = datetime.now(timezone.utc)
99
108
 
@@ -105,6 +114,8 @@ class BatchEngine:
105
114
  id = id or str(uuid4())
106
115
  result: BatchResult = await self._exec_in_task(id, batch_inputs, start_time)
107
116
  return result
117
+ except EvaluationException:
118
+ raise
108
119
  except Exception as ex:
109
120
  raise BatchEngineError(
110
121
  "Unexpected error while running the batch run.", blame=ErrorBlame.SYSTEM_ERROR
@@ -114,20 +125,58 @@ class BatchEngine:
114
125
  # TODO ralphe: Make sure this works
115
126
  self._is_canceled = True
116
127
 
117
- @staticmethod
118
128
  def _apply_column_mapping(
129
+ self,
119
130
  data: Sequence[Mapping[str, Any]],
120
- column_mapping: Mapping[str, str],
131
+ column_mapping: Optional[Mapping[str, str]],
121
132
  max_lines: Optional[int],
122
133
  ) -> Sequence[Mapping[str, str]]:
134
+
135
+ resolved_column_mapping: Mapping[str, str] = self._resolve_column_mapping(column_mapping)
136
+ resolved_column_mapping.update(self._generate_defaults_for_column_mapping())
137
+ return self._apply_column_mapping_to_lines(data, resolved_column_mapping, max_lines)
138
+
139
+ def _resolve_column_mapping(
140
+ self,
141
+ column_mapping: Optional[Mapping[str, str]],
142
+ ) -> Mapping[str, str]:
143
+ parameters = inspect.signature(self._func).parameters
144
+ default_column_mapping: Dict[str, str] = {
145
+ name: f"${{data.{name}}}"
146
+ for name, value in parameters.items()
147
+ if name not in ["self", "cls", "args", "kwargs"]
148
+ }
149
+ resolved_mapping: Dict[str, str] = default_column_mapping.copy()
150
+
151
+ for name, value in parameters.items():
152
+ if value and value.default is not inspect.Parameter.empty:
153
+ resolved_mapping.pop(name)
154
+
155
+ resolved_mapping.update(column_mapping or {})
156
+ return resolved_mapping
157
+
158
+ def _generate_defaults_for_column_mapping(self) -> Mapping[Literal["$defaults$"], Any]:
159
+
160
+ return {
161
+ DEFAULTS_KEY: {
162
+ name: value.default
163
+ for name, value in inspect.signature(self._func).parameters.items()
164
+ if value.default is not inspect.Parameter.empty
165
+ }
166
+ }
167
+
168
+ @staticmethod
169
+ def _apply_column_mapping_to_lines(
170
+ data: Sequence[Mapping[str, Any]],
171
+ column_mapping: Mapping[str, str],
172
+ max_lines: Optional[int],
173
+ ) -> Sequence[Mapping[str, Any]]:
123
174
  data = data[:max_lines] if max_lines else data
124
175
 
125
176
  inputs: Sequence[Mapping[str, Any]] = []
126
- line: int = 0
127
177
  defaults = cast(Mapping[str, Any], column_mapping.get(DEFAULTS_KEY, {}))
128
178
 
129
- for input in data:
130
- line += 1
179
+ for line_number, input in enumerate(data, start=1):
131
180
  mapped: Dict[str, Any] = {}
132
181
  missing_inputs: Set[str] = set()
133
182
 
@@ -148,18 +197,18 @@ class BatchEngine:
148
197
  continue
149
198
 
150
199
  dict_path = match.group(1)
151
- found, value = get_value_from_path(dict_path, input)
200
+ found, mapped_value = get_value_from_path(dict_path, input)
152
201
  if not found: # try default value
153
- found, value = get_value_from_path(dict_path, defaults)
202
+ found, mapped_value = get_value_from_path(dict_path, defaults)
154
203
 
155
204
  if found:
156
- mapped[key] = value
205
+ mapped[key] = mapped_value
157
206
  else:
158
207
  missing_inputs.add(dict_path)
159
208
 
160
209
  if missing_inputs:
161
210
  missing = ", ".join(missing_inputs)
162
- raise BatchEngineValidationError(f"Missing inputs for line {line}: '{missing}'")
211
+ raise BatchEngineValidationError(f"Missing inputs for line {line_number}: '{missing}'")
163
212
 
164
213
  inputs.append(mapped)
165
214
 
@@ -212,10 +261,12 @@ class BatchEngine:
212
261
  end_time=None,
213
262
  tokens=TokenMetrics(0, 0, 0),
214
263
  error=BatchRunError("The line run is not completed.", None),
264
+ index=i,
215
265
  )
216
266
  )
217
267
  for i in range(len(batch_inputs))
218
268
  ]
269
+ self.handle_line_failures(result_details)
219
270
 
220
271
  for line_result in result_details:
221
272
  # Indicate the worst status of the batch run. This works because
@@ -229,9 +280,15 @@ class BatchEngine:
229
280
  metrics.total_tokens += line_result.tokens.total_tokens
230
281
 
231
282
  if failed_lines and not error:
232
- error = BatchEngineRunFailedError(
233
- str(floor(failed_lines / len(batch_inputs) * 100)) + f"% of the batch run failed."
283
+ error_message = f"{floor(failed_lines / len(batch_inputs) * 100)}% of the batch run failed."
284
+ first_exception: Optional[Exception] = next(
285
+ (result.error.exception for result in result_details if result.error and result.error.exception),
286
+ None,
234
287
  )
288
+ if first_exception is not None:
289
+ error_message += f" {first_exception}"
290
+
291
+ error = BatchEngineRunFailedError(error_message)
235
292
 
236
293
  return BatchResult(
237
294
  status=status,
@@ -283,6 +340,13 @@ class BatchEngine:
283
340
  # TODO ralphe: set logger to use here
284
341
  )
285
342
 
343
+ def __preprocess_inputs(self, inputs: Mapping[str, Any]) -> Mapping[str, Any]:
344
+
345
+ func_params = inspect.signature(self._func).parameters
346
+
347
+ filtered_params = {key: value for key, value in inputs.items() if key in func_params}
348
+ return filtered_params
349
+
286
350
  async def _exec_line_async(
287
351
  self,
288
352
  run_id: str,
@@ -298,6 +362,7 @@ class BatchEngine:
298
362
  end_time=None,
299
363
  tokens=TokenMetrics(0, 0, 0),
300
364
  error=None,
365
+ index=index,
301
366
  )
302
367
 
303
368
  try:
@@ -313,15 +378,17 @@ class BatchEngine:
313
378
  # For now we will just run the function in the current process, but in the future we may
314
379
  # want to consider running the function in a separate process for isolation reasons.
315
380
  output: Any
381
+
382
+ processed_inputs = self.__preprocess_inputs(inputs)
316
383
  if is_async_callable(self._func):
317
- output = await self._func(**inputs)
384
+ output = await self._func(**processed_inputs)
318
385
  else:
319
386
  # to maximize the parallelism, we run the synchronous function in a separate thread
320
387
  # and await its result
321
388
  output = await asyncio.get_event_loop().run_in_executor(
322
- self._executor,
323
- partial(self._func, **inputs))
324
-
389
+ self._executor, partial(self._func, **processed_inputs)
390
+ )
391
+
325
392
  # This should in theory never happen but as an extra precaution, let's check if the output
326
393
  # is awaitable and await it if it is.
327
394
  if inspect.isawaitable(output):
@@ -340,6 +407,24 @@ class BatchEngine:
340
407
 
341
408
  return index, details
342
409
 
410
+ @staticmethod
411
+ def handle_line_failures(run_infos: List[BatchRunDetails], raise_on_line_failure: bool = False):
412
+ """Handle line failures in batch run"""
413
+ failed_run_infos: List[BatchRunDetails] = [r for r in run_infos if r.status == BatchStatus.Failed]
414
+ failed_msg: Optional[str] = None
415
+ if len(failed_run_infos) > 0:
416
+ failed_indexes = ",".join([str(r.index) for r in failed_run_infos])
417
+ first_fail_exception: str = failed_run_infos[0].error.details
418
+ if raise_on_line_failure:
419
+ failed_msg = "Flow run failed due to the error: " + first_fail_exception
420
+ raise Exception(failed_msg)
421
+
422
+ failed_msg = (
423
+ f"{len(failed_run_infos)}/{len(run_infos)} flow run failed, indexes: [{failed_indexes}],"
424
+ f" exception of index {failed_run_infos[0].index}: {first_fail_exception}"
425
+ )
426
+ logger.error(failed_msg)
427
+
343
428
  def _persist_run_info(self, line_results: Sequence[BatchRunDetails]):
344
429
  # TODO ralphe: implement?
345
430
  pass
@@ -90,7 +90,9 @@ def _openai_api_list() -> Generator[Tuple[Any, Callable, bool], None, None]:
90
90
  except ImportError:
91
91
  raise MissingRequiredPackage("Please install the 'openai' package to use the Azure AI Evaluation SDK")
92
92
  except AttributeError:
93
- logging.warning("The module '%s' does not have class '%s' or method '%s'", module_name, class_name, method_name)
93
+ logging.warning(
94
+ "The module '%s' does not have class '%s' or method '%s'", module_name, class_name, method_name
95
+ )
94
96
 
95
97
 
96
98
  def inject_openai_api():
@@ -117,6 +119,7 @@ def recover_openai_api():
117
119
 
118
120
  class CaptureOpenAITokenUsage:
119
121
  """Context manager to capture OpenAI token usage."""
122
+
120
123
  def __init__(self):
121
124
  self._tokens = TokenMetrics(0, 0, 0)
122
125
 
@@ -126,4 +129,4 @@ class CaptureOpenAITokenUsage:
126
129
 
127
130
  def __exit__(self, exc_type: Optional[Exception], exc_value: Optional[Exception], traceback: Optional[Any]) -> None:
128
131
  captured_metrics = _token_metrics.get()
129
- self._tokens.update(captured_metrics)
132
+ self._tokens.update(captured_metrics)
@@ -55,6 +55,8 @@ class BatchRunDetails:
55
55
  """The token metrics of the line run."""
56
56
  error: Optional[BatchRunError]
57
57
  """The error of the line run. This will only be set if the status is Failed."""
58
+ index: int
59
+ """The line run index."""
58
60
 
59
61
  @property
60
62
  def duration(self) -> timedelta:
@@ -58,7 +58,7 @@ class Run:
58
58
  dynamic_callable: Callable,
59
59
  name_prefix: Optional[str],
60
60
  inputs: Sequence[Mapping[str, Any]],
61
- column_mapping: Mapping[str, str],
61
+ column_mapping: Optional[Mapping[str, str]] = None,
62
62
  created_on: Optional[datetime] = None,
63
63
  run: Optional["Run"] = None,
64
64
  ):
@@ -70,7 +70,7 @@ class Run:
70
70
  self.dynamic_callable = dynamic_callable
71
71
  self.name = self._generate_run_name(name_prefix, self._created_on)
72
72
  self.inputs = inputs
73
- self.column_mapping = column_mapping
73
+ self.column_mapping: Optional[Mapping[str, str]] = column_mapping
74
74
  self.result: Optional[BatchResult] = None
75
75
  self.metrics: Mapping[str, Any] = {}
76
76
  self._run = run