azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (142) hide show
  1. azure/ai/evaluation/__init__.py +27 -1
  2. azure/ai/evaluation/_azure/_models.py +6 -6
  3. azure/ai/evaluation/_common/constants.py +6 -2
  4. azure/ai/evaluation/_common/rai_service.py +39 -5
  5. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  6. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  7. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  8. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  9. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  10. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  11. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  12. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  13. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  14. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  15. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  16. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  17. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  18. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  19. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  20. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  21. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  22. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  23. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  24. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
  25. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  26. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  27. azure/ai/evaluation/_common/utils.py +23 -3
  28. azure/ai/evaluation/_constants.py +7 -0
  29. azure/ai/evaluation/_converters/__init__.py +3 -0
  30. azure/ai/evaluation/_converters/_ai_services.py +804 -0
  31. azure/ai/evaluation/_converters/_models.py +302 -0
  32. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
  33. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
  34. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  35. azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
  36. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
  37. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
  38. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
  39. azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
  40. azure/ai/evaluation/_evaluate/_evaluate.py +109 -64
  41. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
  42. azure/ai/evaluation/_evaluate/_utils.py +3 -3
  43. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
  44. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  45. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
  46. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
  47. azure/ai/evaluation/_evaluators/_common/_base_eval.py +44 -4
  48. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +4 -2
  49. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +44 -5
  50. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
  51. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
  52. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
  53. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
  54. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
  55. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
  56. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
  57. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
  58. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
  59. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +22 -4
  60. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  61. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
  62. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
  63. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
  64. azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
  65. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
  66. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  67. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +158 -0
  68. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
  69. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
  70. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
  71. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
  72. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
  73. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  74. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
  75. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
  76. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  77. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
  78. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
  79. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  80. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
  81. azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
  82. azure/ai/evaluation/_exceptions.py +5 -0
  83. azure/ai/evaluation/_legacy/__init__.py +3 -0
  84. azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
  85. azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
  86. azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
  87. azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
  88. azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
  89. azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
  90. azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
  91. azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
  92. azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
  93. azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
  94. azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
  95. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  96. azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
  97. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
  98. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  99. azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
  100. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
  101. azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
  102. azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
  103. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  104. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
  105. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  106. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
  107. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
  108. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  109. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  110. azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
  111. azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
  112. azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
  113. azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
  114. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  115. azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
  116. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  117. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +251 -150
  118. azure/ai/evaluation/_version.py +1 -1
  119. azure/ai/evaluation/red_team/__init__.py +19 -0
  120. azure/ai/evaluation/red_team/_attack_objective_generator.py +195 -0
  121. azure/ai/evaluation/red_team/_attack_strategy.py +45 -0
  122. azure/ai/evaluation/red_team/_callback_chat_target.py +74 -0
  123. azure/ai/evaluation/red_team/_default_converter.py +21 -0
  124. azure/ai/evaluation/red_team/_red_team.py +1887 -0
  125. azure/ai/evaluation/red_team/_red_team_result.py +382 -0
  126. azure/ai/evaluation/red_team/_utils/__init__.py +3 -0
  127. azure/ai/evaluation/red_team/_utils/constants.py +65 -0
  128. azure/ai/evaluation/red_team/_utils/formatting_utils.py +165 -0
  129. azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
  130. azure/ai/evaluation/red_team/_utils/strategy_utils.py +192 -0
  131. azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
  132. azure/ai/evaluation/simulator/_adversarial_simulator.py +54 -27
  133. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
  134. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
  135. azure/ai/evaluation/simulator/_simulator.py +1 -1
  136. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +80 -15
  137. azure_ai_evaluation-1.5.0.dist-info/RECORD +207 -0
  138. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +1 -1
  139. azure/ai/evaluation/simulator/_tracing.py +0 -89
  140. azure_ai_evaluation-1.3.0.dist-info/RECORD +0 -119
  141. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
  142. {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,368 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # This contains code merged together from the following files:
6
+ # promptflow-devkit/promptflow/batch/_batch_engine.py
7
+ # promptflow-devkit/promptflow/_proxy/_python_executor_proxy.py
8
+ # promptflow-core/promptflow/executor/_script_executor.py
9
+ # TODO ralphe: The way this code does batch execution needs to be improved. For now
10
+ # porting over the code largely as is to remove the Promptflow dependency
11
+ # as quickly as possible. In phase 2 this code will be heavily refactored.
12
+
13
+ import re
14
+ import asyncio
15
+ from math import floor
16
+ from asyncio import Semaphore
17
+ from contextlib import contextmanager
18
+ from dataclasses import dataclass
19
+ from datetime import datetime, timedelta, timezone
20
+ from typing import Any, Callable, Dict, Final, Generator, Mapping, MutableMapping, Optional, Sequence, Set, Tuple
21
+ from uuid import uuid4
22
+
23
+ from ._utils import get_int_env_var, get_value_from_path
24
+ from ._status import BatchStatus
25
+ from ._result import BatchResult, BatchRunDetails, BatchRunError, TokenMetrics
26
+ from ._run_storage import AbstractRunStorage, NoOpRunStorage
27
+ from ._logging import log_progress, NodeLogManager
28
+ from ..._exceptions import ErrorBlame
29
+ from ._exceptions import (
30
+ BatchEngineCanceledError,
31
+ BatchEngineError,
32
+ BatchEngineRunFailedError,
33
+ BatchEngineTimeoutError,
34
+ BatchEngineValidationError,
35
+ )
36
+ from ._utils_deprecated import (
37
+ async_run_allowing_running_loop,
38
+ convert_eager_flow_output_to_dict,
39
+ )
40
+
41
+
42
+ MAX_WORKER_COUNT: Final[int] = 10
43
+ KEYWORD_PATTERN: Final = re.compile(r"^\${([^{}]+)}$")
44
+
45
+
46
+ class BatchEngine:
47
+ """This class is used to execute flows in batch mode"""
48
+
49
+ def __init__(
50
+ self,
51
+ executor: Callable,
52
+ *,
53
+ storage: Optional[AbstractRunStorage] = None,
54
+ batch_timeout_sec: Optional[int] = None,
55
+ line_timeout_sec: Optional[int] = None,
56
+ max_worker_count: Optional[int] = None,
57
+ **kwargs: Any,
58
+ ):
59
+ """Create a new batch engine instance
60
+
61
+ :param Callable executor: The executor to run the flow
62
+ :param Optional[AbstractRunStorage] storage: The storage to store execution results
63
+ :param Optional[int] batch_timeout_sec: The timeout of batch run in seconds
64
+ :param Optional[int] line_timeout_sec: The timeout of each line in seconds
65
+ :param Optional[int] max_worker_count: The concurrency limit of batch run
66
+ :param kwargs: The keyword arguments related to creating the executor proxy class
67
+ :type kwargs: Any
68
+ """
69
+
70
+ self._executor = executor
71
+ # self._working_dir = working_dir
72
+
73
+ # self._is_eager_flow = True
74
+ # self._is_prompty_flow = False
75
+ # self._program_language = FlowLanguage.Python
76
+ # self._message_format = MessageFormatType.BASIC
77
+ # self._multimedia_processor = MultimediaProcessor.create(self._message_format)
78
+ # self._connections = {}
79
+
80
+ self._storage: AbstractRunStorage = storage or NoOpRunStorage()
81
+
82
+ # TODO ralphe: Consume these from the batch context/config instead of from
83
+ # kwargs or (even worse) environment variables
84
+ # self._batch_use_async = kwargs.get("batch_use_async", True)
85
+ self._batch_timeout_sec = batch_timeout_sec or get_int_env_var("PF_BATCH_TIMEOUT_SEC")
86
+ self._line_timeout_sec = line_timeout_sec or get_int_env_var("PF_LINE_TIMEOUT_SEC", 600)
87
+ self._max_worker_count = max_worker_count or get_int_env_var("PF_WORKER_COUNT") or MAX_WORKER_COUNT
88
+ # update kwargs with worker_count and line_timeout_sec
89
+ kwargs.update({"worker_count": self._max_worker_count, "line_timeout_sec": self._line_timeout_sec})
90
+
91
+ self._is_canceled: bool = False
92
+ self._kwargs: Mapping[str, Any] = kwargs
93
+ # self._init_kwargs: Mapping[str, Any] = init_kwargs or {}
94
+
95
+ def run(
96
+ self,
97
+ data: Sequence[Mapping[str, Any]],
98
+ column_mapping: Mapping[str, str],
99
+ *,
100
+ id: Optional[str] = None,
101
+ max_lines: Optional[int] = None,
102
+ ) -> BatchResult:
103
+ if not data:
104
+ raise BatchEngineValidationError("Please provide a non-empty data mapping.")
105
+ if not column_mapping:
106
+ raise BatchEngineValidationError("The column mapping is required.")
107
+
108
+ start_time = datetime.now(timezone.utc)
109
+
110
+ batch_inputs = self._apply_column_mapping(data, column_mapping, max_lines)
111
+ if not batch_inputs or all(len(data) == 0 for data in batch_inputs):
112
+ raise BatchEngineValidationError("No data to process.")
113
+
114
+ try:
115
+ id = id or str(uuid4())
116
+
117
+ result: BatchResult = async_run_allowing_running_loop(self._exec_in_task, id, batch_inputs, start_time)
118
+
119
+ return result
120
+ except Exception as ex:
121
+ raise BatchEngineError(
122
+ "Unexpected error while running the batch run.", blame=ErrorBlame.SYSTEM_ERROR
123
+ ) from ex
124
+
125
+ def cancel(self):
126
+ # TODO ralphe: Make sure this works
127
+ self._is_canceled = True
128
+
129
+ @staticmethod
130
+ def _apply_column_mapping(
131
+ data: Sequence[Mapping[str, Any]],
132
+ column_mapping: Mapping[str, str],
133
+ max_lines: Optional[int],
134
+ ) -> Sequence[Mapping[str, str]]:
135
+ data = data[:max_lines] if max_lines else data
136
+
137
+ inputs: Sequence[Mapping[str, Any]] = []
138
+ line: int = 0
139
+
140
+ for input in data:
141
+ line += 1
142
+ mapped: Dict[str, Any] = {}
143
+ missing_inputs: Set[str] = set()
144
+
145
+ for key, value in column_mapping.items():
146
+ if not isinstance(value, str):
147
+ # All non-string values are literal values.
148
+ mapped[key] = value
149
+ continue
150
+
151
+ match: Optional[re.Match[str]] = re.search(KEYWORD_PATTERN, value)
152
+ if match is None:
153
+ # Literal string value value
154
+ mapped[key] = value
155
+ continue
156
+
157
+ dict_path = match.group(1)
158
+ found, value = get_value_from_path(dict_path, input)
159
+ if found:
160
+ mapped[key] = value
161
+ else:
162
+ missing_inputs.add(dict_path)
163
+
164
+ if missing_inputs:
165
+ missing = ", ".join(missing_inputs)
166
+ raise BatchEngineValidationError(f"Missing inputs for line {line}: '{missing}'")
167
+
168
+ inputs.append(mapped)
169
+
170
+ return inputs
171
+
172
+ async def _exec_in_task(
173
+ self, run_id: str, batch_inputs: Sequence[Mapping[str, Any]], start_time: datetime
174
+ ) -> BatchResult:
175
+ # Since the batch execution is not guaranteed to be completed in the same order
176
+ # as the inputs, we keep track of these in a mapping from index to result
177
+ results: Dict[int, BatchRunDetails] = {}
178
+ status: BatchStatus = BatchStatus.Completed
179
+ error: Optional[Exception] = None
180
+
181
+ task = asyncio.create_task(self._exec_batch(run_id, batch_inputs, start_time, results))
182
+
183
+ while not task.done():
184
+ # check whether the task is completed or canceled every 1s
185
+ await asyncio.sleep(1)
186
+ if self._is_canceled:
187
+ task.cancel()
188
+ # use current completed line results and aggregation results to create a BatchResult
189
+ status = BatchStatus.Canceled
190
+ error = BatchEngineCanceledError("The batch run is canceled by user.")
191
+ break
192
+ elif self._batch_timeout_expired(start_time):
193
+ task.cancel()
194
+ status = BatchStatus.Failed
195
+ error = BatchEngineTimeoutError(
196
+ f"The batch run failed due to timeout [{self._batch_timeout_sec}s]. "
197
+ f"Please adjust the timeout to a higher value."
198
+ )
199
+ break
200
+
201
+ end_time = datetime.now(timezone.utc)
202
+ metrics = TokenMetrics(0, 0, 0)
203
+ failed_lines: int = 0
204
+
205
+ # generate the details in the same order as the inputs and fill in the missing results
206
+ # with a failed status
207
+ result_details = [
208
+ (
209
+ results[i]
210
+ if i in results
211
+ else BatchRunDetails(
212
+ id=BatchRunDetails.create_id(run_id, i),
213
+ status=BatchStatus.Failed,
214
+ result=None,
215
+ start_time=None,
216
+ end_time=None,
217
+ tokens=TokenMetrics(0, 0, 0),
218
+ error=BatchRunError("The line run is not completed.", None),
219
+ )
220
+ )
221
+ for i in range(len(batch_inputs))
222
+ ]
223
+
224
+ for line_result in result_details:
225
+ # Indicate the worst status of the batch run. This works because
226
+ # canceled and failed have a higher value than completed.
227
+ status = max(status, line_result.status)
228
+ if BatchStatus.is_failed(line_result.status):
229
+ failed_lines += 1
230
+ if line_result.tokens:
231
+ metrics.prompt_tokens += line_result.tokens.prompt_tokens
232
+ metrics.completion_tokens += line_result.tokens.completion_tokens
233
+ metrics.total_tokens += line_result.tokens.total_tokens
234
+
235
+ if failed_lines and not error:
236
+ error = BatchEngineRunFailedError(
237
+ str(floor(failed_lines / len(batch_inputs) * 100)) + f"% of the batch run failed."
238
+ )
239
+
240
+ return BatchResult(
241
+ status=status,
242
+ total_lines=len(batch_inputs),
243
+ failed_lines=failed_lines,
244
+ start_time=start_time,
245
+ end_time=end_time,
246
+ tokens=metrics,
247
+ details=result_details,
248
+ error=error,
249
+ )
250
+
251
+ async def _exec_batch(
252
+ self,
253
+ run_id: str,
254
+ batch_inputs: Sequence[Mapping[str, Any]],
255
+ start_time: datetime,
256
+ results: MutableMapping[int, BatchRunDetails],
257
+ ) -> None:
258
+ semaphore: Semaphore = Semaphore(self._max_worker_count)
259
+
260
+ # TODO ralphe: This async code needs to refactored to use e.g. asyncio.gather, or
261
+ # asyncio.as_completed.
262
+ # TODO ralphe: This code needs to handle cancellation better
263
+ async def create_under_semaphore(index: int, inputs: Mapping[str, Any]):
264
+ async with semaphore:
265
+ return await self._exec_line_async(run_id, inputs, index)
266
+
267
+ pending = [
268
+ asyncio.create_task(create_under_semaphore(index, inputs)) for index, inputs in enumerate(batch_inputs)
269
+ ]
270
+
271
+ total_lines: int = len(batch_inputs)
272
+ completed_lines: int = 0
273
+ while completed_lines < total_lines:
274
+ # TODO ralphe: Fix this code so it doesn't re-order the outputs
275
+ # wait for any task to complete
276
+ done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
277
+ completed_line_results = [task.result() for task in done]
278
+ # persist node run infos and flow run info in line result to storage
279
+ self._persist_run_info([result for _, result in completed_line_results])
280
+ results.update({index: result for index, result in completed_line_results})
281
+ # update the progress log
282
+ completed_lines += len(completed_line_results)
283
+ log_progress(
284
+ run_start_time=start_time,
285
+ total_count=total_lines,
286
+ current_count=completed_lines,
287
+ # TODO ralphe: set logger to use here
288
+ )
289
+
290
+ async def _exec_line_async(
291
+ self,
292
+ run_id: str,
293
+ inputs: Mapping[str, Any],
294
+ index: int,
295
+ ) -> Tuple[int, BatchRunDetails]:
296
+ with self._exec_line_context(run_id, index):
297
+ details: BatchRunDetails = BatchRunDetails(
298
+ id=f"{run_id}_{index}",
299
+ status=BatchStatus.NotStarted,
300
+ result=None,
301
+ start_time=datetime.now(timezone.utc),
302
+ end_time=None,
303
+ tokens=TokenMetrics(0, 0, 0),
304
+ error=None,
305
+ )
306
+
307
+ try:
308
+ # TODO ralphe: Handle line timeouts here
309
+ output: Any = await self._executor(**inputs)
310
+ details.status = BatchStatus.Completed
311
+ details.result = convert_eager_flow_output_to_dict(output)
312
+
313
+ # TODO figure out how to get the token metrics here
314
+ except Exception as ex:
315
+ details.status = BatchStatus.Failed
316
+ details.error = BatchRunError(
317
+ f"Error while evaluating single input: {ex.__class__.__name__}: {str(ex)}", ex
318
+ )
319
+ finally:
320
+ details.end_time = datetime.now(timezone.utc)
321
+
322
+ return index, details
323
+
324
+ def _persist_run_info(self, line_results: Sequence[BatchRunDetails]):
325
+ # TODO ralphe: implement?
326
+ pass
327
+
328
+ def _batch_timeout_expired(self, start_time: datetime) -> bool:
329
+ if self._batch_timeout_sec is None:
330
+ return False
331
+ return (datetime.now(timezone.utc) - start_time).total_seconds() > self._batch_timeout_sec
332
+
333
+ @contextmanager
334
+ def _exec_line_context(self, run_id: str, line_number: int) -> Generator[None, Any, None]:
335
+ # TODO ralphe: Do proper tracing and logging here
336
+ log_manager = NodeLogManager()
337
+ log_manager.set_node_context(run_id, "Flex", line_number)
338
+ with log_manager, self._update_operation_context(run_id, line_number):
339
+ yield
340
+
341
+ @contextmanager
342
+ def _update_operation_context(self, run_id: str, line_number: int) -> Generator[None, Any, None]:
343
+ # operation_context = OperationContext.get_instance()
344
+ # original_context = operation_context.copy()
345
+ # original_mode = operation_context.get("run_mode", RunMode.Test.name)
346
+ # values_for_context = {"flow_id": self._flow_id, "root_run_id": run_id}
347
+ # if original_mode == RunMode.Batch.name:
348
+ # values_for_otel = {
349
+ # "batch_run_id": run_id,
350
+ # "line_number": line_number,
351
+ # }
352
+ # else:
353
+ # values_for_otel = {"line_run_id": run_id}
354
+ # try:
355
+ # append_promptflow_package_ua(operation_context)
356
+ # operation_context.set_execution_target(execution_target=self._execution_target)
357
+ # operation_context.set_default_tracing_keys(DEFAULT_TRACING_KEYS)
358
+ # operation_context.run_mode = original_mode
359
+ # operation_context.update(values_for_context)
360
+ # for k, v in values_for_otel.items():
361
+ # operation_context._add_otel_attributes(k, v)
362
+ # # Inject OpenAI API to make sure traces and headers injection works and
363
+ # # update OpenAI API configs from environment variables.
364
+ # inject_openai_api()
365
+ yield
366
+
367
+ # finally:
368
+ # OperationContext.set_instance(original_context)
@@ -0,0 +1,88 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from ..._exceptions import ErrorCategory, ErrorBlame, ErrorTarget, EvaluationException
6
+
7
+
8
+ class BatchEngineError(EvaluationException):
9
+ """Exception class for batch engine errors.
10
+
11
+ This exception is used to indicate that the error was caused by or in the batch engine.
12
+
13
+ :param message: The error message.
14
+ :type message: str
15
+ """
16
+
17
+ def __init__(self, message: str, **kwargs):
18
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
19
+ kwargs.setdefault("target", ErrorTarget.EVAL_RUN)
20
+ kwargs.setdefault("blame", ErrorBlame.UNKNOWN)
21
+
22
+ super().__init__(message, **kwargs)
23
+
24
+
25
+ class BatchEngineValidationError(BatchEngineError):
26
+ """Exception raised when validation fails
27
+
28
+ :param message: The error message.
29
+ :type message: str
30
+ """
31
+
32
+ def __init__(self, message: str, **kwargs):
33
+ kwargs.setdefault("category", ErrorCategory.INVALID_VALUE)
34
+ kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
35
+ super().__init__(message, **kwargs)
36
+
37
+
38
+ class BatchEngineTimeoutError(BatchEngineError):
39
+ """Exception raised when a batch engine operation times out.
40
+
41
+ :param message: The error message.
42
+ :type message: str
43
+ """
44
+
45
+ def __init__(self, message: str, **kwargs):
46
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
47
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
48
+ super().__init__(message, **kwargs)
49
+
50
+
51
+ class BatchEngineCanceledError(BatchEngineError):
52
+ """Exception raised when a batch engine operation is canceled.
53
+
54
+ :param message: The error message.
55
+ :type message: str
56
+ """
57
+
58
+ def __init__(self, message: str, **kwargs):
59
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
60
+ kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
61
+ super().__init__(message, **kwargs)
62
+
63
+
64
+ class BatchEngineRunFailedError(BatchEngineError):
65
+ """Exception raised when a batch engine run fails.
66
+
67
+ :param message: The error message.
68
+ :type message: str
69
+ """
70
+
71
+ def __init__(self, message: str, **kwargs):
72
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
73
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
74
+ super().__init__(message, **kwargs)
75
+
76
+
77
+ class BatchEnginePartialError(BatchEngineError):
78
+ """Exception raised when a batch engine run has some successfull lines, mixed in
79
+ with some failures.
80
+
81
+ :param message: The error message.
82
+ :type message: str
83
+ """
84
+
85
+ def __init__(self, message: str, **kwargs):
86
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
87
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
88
+ super().__init__(message, **kwargs)