azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +10 -0
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
- azure/ai/evaluation/_aoai/label_grader.py +66 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
- azure/ai/evaluation/_azure/_clients.py +4 -4
- azure/ai/evaluation/_azure/_envs.py +208 -0
- azure/ai/evaluation/_azure/_token_manager.py +12 -7
- azure/ai/evaluation/_common/__init__.py +7 -0
- azure/ai/evaluation/_common/evaluation_onedp_client.py +163 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +139 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +165 -34
- azure/ai/evaluation/_common/raiclient/_version.py +1 -1
- azure/ai/evaluation/_common/utils.py +79 -1
- azure/ai/evaluation/_constants.py +16 -0
- azure/ai/evaluation/_converters/_ai_services.py +162 -118
- azure/ai/evaluation/_converters/_models.py +76 -6
- azure/ai/evaluation/_eval_mapping.py +73 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +325 -76
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +553 -0
- azure/ai/evaluation/_evaluate/_utils.py +117 -4
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +12 -3
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +469 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +11 -1
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -1
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +16 -2
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
- azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +11 -1
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -2
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +10 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +11 -1
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +16 -2
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +86 -12
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
- azure/ai/evaluation/_exceptions.py +2 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
- azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +114 -22
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +976 -546
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
- azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +38 -25
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +43 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +26 -18
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +15 -10
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/METADATA +49 -3
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/RECORD +144 -86
- /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -8,20 +8,26 @@ import os
|
|
|
8
8
|
import re
|
|
9
9
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict, Union, cast
|
|
10
10
|
|
|
11
|
+
from openai import OpenAI, AzureOpenAI
|
|
11
12
|
from azure.ai.evaluation._legacy._adapters._constants import LINE_NUMBER
|
|
12
13
|
from azure.ai.evaluation._legacy._adapters.entities import Run
|
|
13
14
|
import pandas as pd
|
|
14
15
|
|
|
15
16
|
from azure.ai.evaluation._common.math import list_mean_nan_safe, apply_transform_nan_safe
|
|
16
|
-
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
17
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
17
18
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
18
19
|
|
|
20
|
+
from azure.ai.evaluation._aoai.aoai_grader import AzureOpenAIGrader
|
|
21
|
+
|
|
19
22
|
from .._constants import (
|
|
20
23
|
CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT,
|
|
24
|
+
EVALUATION_PASS_FAIL_MAPPING,
|
|
21
25
|
EvaluationMetrics,
|
|
22
26
|
DefaultOpenEncoding,
|
|
23
27
|
Prefixes,
|
|
24
28
|
_InternalEvaluationMetrics,
|
|
29
|
+
BINARY_AGGREGATE_SUFFIX,
|
|
30
|
+
DEFAULT_OAI_EVAL_RUN_NAME
|
|
25
31
|
)
|
|
26
32
|
from .._model_configurations import AzureAIProject, EvaluationResult, EvaluatorConfig
|
|
27
33
|
from .._user_agent import USER_AGENT
|
|
@@ -29,7 +35,6 @@ from ._batch_run import (
|
|
|
29
35
|
EvalRunContext,
|
|
30
36
|
CodeClient,
|
|
31
37
|
ProxyClient,
|
|
32
|
-
ProxyRun,
|
|
33
38
|
TargetRunContext,
|
|
34
39
|
RunSubmitterClient,
|
|
35
40
|
)
|
|
@@ -38,16 +43,22 @@ from ._utils import (
|
|
|
38
43
|
_log_metrics_and_instance_results,
|
|
39
44
|
_trace_destination_from_project_scope,
|
|
40
45
|
_write_output,
|
|
41
|
-
DataLoaderFactory,
|
|
46
|
+
DataLoaderFactory, _log_metrics_and_instance_results_onedp,
|
|
42
47
|
)
|
|
43
|
-
from ._batch_run.batch_clients import BatchClient
|
|
48
|
+
from ._batch_run.batch_clients import BatchClient, BatchClientRun
|
|
44
49
|
|
|
50
|
+
from ._evaluate_aoai import (
|
|
51
|
+
_begin_aoai_evaluation,
|
|
52
|
+
_split_evaluators_and_grader_configs,
|
|
53
|
+
_get_evaluation_run_results,
|
|
54
|
+
OAIEvalRunCreationInfo
|
|
55
|
+
)
|
|
45
56
|
LOGGER = logging.getLogger(__name__)
|
|
46
57
|
|
|
47
58
|
# For metrics (aggregates) whose metric names intentionally differ from their
|
|
48
59
|
# originating column name, usually because the aggregation of the original value
|
|
49
60
|
# means something sufficiently different.
|
|
50
|
-
# Note that content safety metrics are handled
|
|
61
|
+
# Note that content safety metrics are handled separately.
|
|
51
62
|
METRIC_COLUMN_NAME_REPLACEMENTS = {
|
|
52
63
|
"groundedness_pro_label": "groundedness_pro_passing_rate",
|
|
53
64
|
}
|
|
@@ -58,6 +69,19 @@ class __EvaluatorInfo(TypedDict):
|
|
|
58
69
|
metrics: Dict[str, Any]
|
|
59
70
|
run_summary: Dict[str, Any]
|
|
60
71
|
|
|
72
|
+
class __ValidatedData(TypedDict):
|
|
73
|
+
'''
|
|
74
|
+
Simple dictionary that contains ALL pre-processed data and
|
|
75
|
+
the resultant objects that are needed for downstream evaluation.
|
|
76
|
+
'''
|
|
77
|
+
evaluators: Dict[str, Callable]
|
|
78
|
+
graders: Dict[str, AzureOpenAIGrader]
|
|
79
|
+
input_data_df: pd.DataFrame
|
|
80
|
+
column_mapping: Dict[str, Dict[str, str]]
|
|
81
|
+
target_run: Optional[BatchClientRun]
|
|
82
|
+
batch_run_client: BatchClient
|
|
83
|
+
batch_run_data: Union[str, os.PathLike, pd.DataFrame]
|
|
84
|
+
|
|
61
85
|
|
|
62
86
|
def _aggregate_other_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[str, float]]:
|
|
63
87
|
"""Identify and average various metrics that need to have the metric name be replaced,
|
|
@@ -117,7 +141,6 @@ def _aggregate_content_safety_metrics(
|
|
|
117
141
|
module = inspect.getmodule(evaluators[evaluator_name])
|
|
118
142
|
if (
|
|
119
143
|
module
|
|
120
|
-
and module.__name__.startswith("azure.ai.evaluation.")
|
|
121
144
|
and metric_name.endswith("_score")
|
|
122
145
|
and metric_name.replace("_score", "") in content_safety_metrics
|
|
123
146
|
):
|
|
@@ -208,6 +231,48 @@ def _process_rows(row, detail_defect_rates):
|
|
|
208
231
|
return detail_defect_rates
|
|
209
232
|
|
|
210
233
|
|
|
234
|
+
def _aggregation_binary_output(df: pd.DataFrame) -> Dict[str, float]:
|
|
235
|
+
"""
|
|
236
|
+
Aggregate binary output results (pass/fail) from evaluation dataframe.
|
|
237
|
+
|
|
238
|
+
For each evaluator, calculates the proportion of "pass" results.
|
|
239
|
+
|
|
240
|
+
:param df: The dataframe of evaluation results.
|
|
241
|
+
:type df: ~pandas.DataFrame
|
|
242
|
+
:return: A dictionary mapping evaluator names to the proportion of pass results.
|
|
243
|
+
:rtype: Dict[str, float]
|
|
244
|
+
"""
|
|
245
|
+
results = {}
|
|
246
|
+
|
|
247
|
+
# Find all columns that end with "_result"
|
|
248
|
+
result_columns = [col for col in df.columns if col.startswith("outputs.") and col.endswith("_result")]
|
|
249
|
+
|
|
250
|
+
for col in result_columns:
|
|
251
|
+
# Extract the evaluator name from the column name
|
|
252
|
+
# (outputs.<evaluator>.<metric>_result)
|
|
253
|
+
parts = col.split(".")
|
|
254
|
+
evaluator_name = None
|
|
255
|
+
if len(parts) >= 3:
|
|
256
|
+
evaluator_name = parts[1]
|
|
257
|
+
else:
|
|
258
|
+
LOGGER.warning("Skipping column '%s' due to unexpected format. Expected at least three parts separated by '.'", col)
|
|
259
|
+
continue
|
|
260
|
+
if evaluator_name:
|
|
261
|
+
# Count the occurrences of each unique value (pass/fail)
|
|
262
|
+
value_counts = df[col].value_counts().to_dict()
|
|
263
|
+
|
|
264
|
+
# Calculate the proportion of EVALUATION_PASS_FAIL_MAPPING[True] results
|
|
265
|
+
total_rows = len(df)
|
|
266
|
+
pass_count = value_counts.get(EVALUATION_PASS_FAIL_MAPPING[True], 0)
|
|
267
|
+
proportion = pass_count / total_rows if total_rows > 0 else 0.0
|
|
268
|
+
|
|
269
|
+
# Set the result with the evaluator name as the key
|
|
270
|
+
result_key = f"{evaluator_name}.{BINARY_AGGREGATE_SUFFIX}"
|
|
271
|
+
results[result_key] = round(proportion, 2)
|
|
272
|
+
|
|
273
|
+
return results
|
|
274
|
+
|
|
275
|
+
|
|
211
276
|
def _aggregate_metrics(df: pd.DataFrame, evaluators: Dict[str, Callable]) -> Dict[str, float]:
|
|
212
277
|
"""Aggregate metrics from the evaluation results.
|
|
213
278
|
On top of naively calculating the mean of most metrics, this function also identifies certain columns
|
|
@@ -221,6 +286,8 @@ def _aggregate_metrics(df: pd.DataFrame, evaluators: Dict[str, Callable]) -> Dic
|
|
|
221
286
|
:return: The aggregated metrics.
|
|
222
287
|
:rtype: Dict[str, float]
|
|
223
288
|
"""
|
|
289
|
+
binary_metrics = _aggregation_binary_output(df)
|
|
290
|
+
|
|
224
291
|
df.rename(columns={col: col.replace("outputs.", "") for col in df.columns}, inplace=True)
|
|
225
292
|
|
|
226
293
|
handled_columns = []
|
|
@@ -248,6 +315,10 @@ def _aggregate_metrics(df: pd.DataFrame, evaluators: Dict[str, Callable]) -> Dic
|
|
|
248
315
|
metrics = mean_value.to_dict()
|
|
249
316
|
# Add defect rates back into metrics
|
|
250
317
|
metrics.update(defect_rates)
|
|
318
|
+
|
|
319
|
+
# Add binary threshold metrics based on pass/fail results
|
|
320
|
+
metrics.update(binary_metrics)
|
|
321
|
+
|
|
251
322
|
return metrics
|
|
252
323
|
|
|
253
324
|
|
|
@@ -486,12 +557,12 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
|
|
|
486
557
|
|
|
487
558
|
def _apply_target_to_data(
|
|
488
559
|
target: Callable,
|
|
489
|
-
data: Union[str, os.PathLike],
|
|
560
|
+
data: Union[str, os.PathLike, pd.DataFrame],
|
|
490
561
|
batch_client: BatchClient,
|
|
491
562
|
initial_data: pd.DataFrame,
|
|
492
563
|
evaluation_name: Optional[str] = None,
|
|
493
564
|
**kwargs,
|
|
494
|
-
) -> Tuple[pd.DataFrame, Set[str],
|
|
565
|
+
) -> Tuple[pd.DataFrame, Set[str], BatchClientRun]:
|
|
495
566
|
"""
|
|
496
567
|
Apply the target function to the data set and return updated data and generated columns.
|
|
497
568
|
|
|
@@ -509,24 +580,18 @@ def _apply_target_to_data(
|
|
|
509
580
|
:rtype: Tuple[pandas.DataFrame, List[str]]
|
|
510
581
|
"""
|
|
511
582
|
|
|
512
|
-
if not isinstance(batch_client, ProxyClient):
|
|
513
|
-
raise ValueError("Only ProxyClient supports target runs for now.")
|
|
514
|
-
|
|
515
583
|
_run_name = kwargs.get("_run_name")
|
|
516
|
-
with TargetRunContext():
|
|
517
|
-
run =
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
name=_run_name,
|
|
525
|
-
),
|
|
584
|
+
with TargetRunContext(batch_client):
|
|
585
|
+
run: BatchClientRun = batch_client.run(
|
|
586
|
+
flow=target,
|
|
587
|
+
display_name=evaluation_name,
|
|
588
|
+
data=data,
|
|
589
|
+
stream=True,
|
|
590
|
+
name=_run_name,
|
|
591
|
+
evaluator_name=getattr(target, "__qualname__", "TARGET"),
|
|
526
592
|
)
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
run_summary = batch_client.get_run_summary(run)
|
|
593
|
+
target_output: pd.DataFrame = batch_client.get_details(run, all_results=True)
|
|
594
|
+
run_summary = batch_client.get_run_summary(run)
|
|
530
595
|
|
|
531
596
|
if run_summary["completed_lines"] == 0:
|
|
532
597
|
msg = (
|
|
@@ -557,7 +622,7 @@ def _apply_target_to_data(
|
|
|
557
622
|
# Concatenate output to input
|
|
558
623
|
target_output = pd.concat([target_output, initial_data], axis=1)
|
|
559
624
|
|
|
560
|
-
return target_output, generated_columns, run
|
|
625
|
+
return target_output, generated_columns, run
|
|
561
626
|
|
|
562
627
|
|
|
563
628
|
def _process_column_mappings(
|
|
@@ -573,7 +638,7 @@ def _process_column_mappings(
|
|
|
573
638
|
|
|
574
639
|
processed_config: Dict[str, Dict[str, str]] = {}
|
|
575
640
|
|
|
576
|
-
expected_references = re.compile(r"^\$\{(target|data)\.[a-zA-
|
|
641
|
+
expected_references = re.compile(r"^\$\{(target|data)\.[a-zA-Z0-9_]+\}$")
|
|
577
642
|
|
|
578
643
|
if column_mapping:
|
|
579
644
|
for evaluator, mapping_config in column_mapping.items():
|
|
@@ -625,11 +690,11 @@ def _rename_columns_conditionally(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
625
690
|
def evaluate(
|
|
626
691
|
*,
|
|
627
692
|
data: Union[str, os.PathLike],
|
|
628
|
-
evaluators: Dict[str, Callable],
|
|
693
|
+
evaluators: Dict[str, Union[Callable, AzureOpenAIGrader]],
|
|
629
694
|
evaluation_name: Optional[str] = None,
|
|
630
695
|
target: Optional[Callable] = None,
|
|
631
696
|
evaluator_config: Optional[Dict[str, EvaluatorConfig]] = None,
|
|
632
|
-
azure_ai_project: Optional[AzureAIProject] = None,
|
|
697
|
+
azure_ai_project: Optional[Union[str, AzureAIProject]] = None,
|
|
633
698
|
output_path: Optional[Union[str, os.PathLike]] = None,
|
|
634
699
|
fail_on_evaluator_errors: bool = False,
|
|
635
700
|
**kwargs,
|
|
@@ -641,8 +706,9 @@ def evaluate(
|
|
|
641
706
|
JSONL and CSV files are supported. `target` and `data` both cannot be None. Required.
|
|
642
707
|
:paramtype data: str
|
|
643
708
|
:keyword evaluators: Evaluators to be used for evaluation. It should be a dictionary with key as alias for evaluator
|
|
644
|
-
and value as the evaluator function.
|
|
645
|
-
|
|
709
|
+
and value as the evaluator function. Also accepts AzureOpenAIGrader instances as values, which are processed separately.
|
|
710
|
+
Required.
|
|
711
|
+
:paramtype evaluators: Dict[str, Union[Callable, ~azure.ai.evaluation.AzureOpenAIGrader]]
|
|
646
712
|
:keyword evaluation_name: Display name of the evaluation.
|
|
647
713
|
:paramtype evaluation_name: Optional[str]
|
|
648
714
|
:keyword target: Target to be evaluated. `target` and `data` both cannot be None
|
|
@@ -672,14 +738,24 @@ def evaluate(
|
|
|
672
738
|
:end-before: [END evaluate_method]
|
|
673
739
|
:language: python
|
|
674
740
|
:dedent: 8
|
|
675
|
-
:caption: Run an evaluation on local data with
|
|
741
|
+
:caption: Run an evaluation on local data with one or more evaluators using azure.ai.evaluation.AzureAIProject
|
|
742
|
+
|
|
743
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
744
|
+
|
|
745
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
746
|
+
:start-after: [START evaluate_method]
|
|
747
|
+
:end-before: [END evaluate_method]
|
|
748
|
+
:language: python
|
|
749
|
+
:dedent: 8
|
|
750
|
+
:caption: Run an evaluation on local data with one or more evaluators using Azure AI Project URL in following format
|
|
751
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
676
752
|
"""
|
|
677
753
|
try:
|
|
678
754
|
return _evaluate(
|
|
679
755
|
evaluation_name=evaluation_name,
|
|
680
756
|
target=target,
|
|
681
757
|
data=data,
|
|
682
|
-
|
|
758
|
+
evaluators_and_graders=evaluators,
|
|
683
759
|
evaluator_config=evaluator_config,
|
|
684
760
|
azure_ai_project=azure_ai_project,
|
|
685
761
|
output_path=output_path,
|
|
@@ -744,23 +820,157 @@ def _print_fail_flag_warning() -> None:
|
|
|
744
820
|
|
|
745
821
|
def _evaluate( # pylint: disable=too-many-locals,too-many-statements
|
|
746
822
|
*,
|
|
747
|
-
|
|
823
|
+
evaluators_and_graders: Dict[str, Union[Callable, AzureOpenAIGrader]],
|
|
748
824
|
evaluation_name: Optional[str] = None,
|
|
749
825
|
target: Optional[Callable] = None,
|
|
750
826
|
data: Union[str, os.PathLike],
|
|
751
827
|
evaluator_config: Optional[Dict[str, EvaluatorConfig]] = None,
|
|
752
|
-
azure_ai_project: Optional[AzureAIProject] = None,
|
|
828
|
+
azure_ai_project: Optional[Union[str, AzureAIProject]] = None,
|
|
753
829
|
output_path: Optional[Union[str, os.PathLike]] = None,
|
|
754
830
|
fail_on_evaluator_errors: bool = False,
|
|
755
831
|
**kwargs,
|
|
756
832
|
) -> EvaluationResult:
|
|
757
833
|
if fail_on_evaluator_errors:
|
|
758
834
|
_print_fail_flag_warning()
|
|
759
|
-
|
|
835
|
+
|
|
836
|
+
# Turn inputted mess of data into a dataframe, apply targets if needed
|
|
837
|
+
# split graders and evaluators, and verify that column mappings are sensible.
|
|
838
|
+
validated_data = _preprocess_data(
|
|
839
|
+
data=data,
|
|
840
|
+
evaluators_and_graders=evaluators_and_graders,
|
|
841
|
+
evaluator_config=evaluator_config,
|
|
842
|
+
target=target,
|
|
843
|
+
output_path=output_path,
|
|
844
|
+
azure_ai_project=azure_ai_project,
|
|
845
|
+
evaluation_name=evaluation_name,
|
|
846
|
+
**kwargs,
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
# extract relevant info from validated data
|
|
850
|
+
column_mapping = validated_data["column_mapping"]
|
|
851
|
+
evaluators = validated_data["evaluators"]
|
|
852
|
+
graders = validated_data["graders"]
|
|
853
|
+
input_data_df = validated_data["input_data_df"]
|
|
854
|
+
results_df = pd.DataFrame()
|
|
855
|
+
metrics: Dict[str, float] = {}
|
|
856
|
+
eval_run_info_list: List[OAIEvalRunCreationInfo] = []
|
|
857
|
+
|
|
858
|
+
# Start OAI eval runs if any graders are present.
|
|
859
|
+
need_oai_run = len(graders) > 0
|
|
860
|
+
need_local_run = len(evaluators) > 0
|
|
861
|
+
need_get_oai_results = False
|
|
862
|
+
got_local_results = False
|
|
863
|
+
if need_oai_run:
|
|
864
|
+
try:
|
|
865
|
+
aoi_name = evaluation_name if evaluation_name else DEFAULT_OAI_EVAL_RUN_NAME
|
|
866
|
+
eval_run_info_list = _begin_aoai_evaluation(
|
|
867
|
+
graders,
|
|
868
|
+
column_mapping,
|
|
869
|
+
input_data_df,
|
|
870
|
+
aoi_name
|
|
871
|
+
)
|
|
872
|
+
need_get_oai_results = len(eval_run_info_list) > 0
|
|
873
|
+
except EvaluationException as e:
|
|
874
|
+
if need_local_run:
|
|
875
|
+
# If there are normal evaluators, don't stop execution and try to run
|
|
876
|
+
# those.
|
|
877
|
+
LOGGER.warning("Remote Azure Open AI grader evaluations failed during run creation." +
|
|
878
|
+
" Continuing with local evaluators.")
|
|
879
|
+
LOGGER.warning(e)
|
|
880
|
+
else:
|
|
881
|
+
raise e
|
|
882
|
+
|
|
883
|
+
# Evaluate 'normal' evaluators. This includes built-in evaluators and any user-supplied callables.
|
|
884
|
+
if need_local_run:
|
|
885
|
+
try:
|
|
886
|
+
eval_result_df, eval_metrics, per_evaluator_results = _run_callable_evaluators(
|
|
887
|
+
validated_data=validated_data,
|
|
888
|
+
fail_on_evaluator_errors=fail_on_evaluator_errors
|
|
889
|
+
)
|
|
890
|
+
results_df = eval_result_df
|
|
891
|
+
metrics = eval_metrics
|
|
892
|
+
got_local_results = True
|
|
893
|
+
# TODO figure out how to update this printing to include OAI results?
|
|
894
|
+
_print_summary(per_evaluator_results)
|
|
895
|
+
except EvaluationException as e:
|
|
896
|
+
if need_get_oai_results:
|
|
897
|
+
# If there are OAI graders, we only print a warning on local failures.
|
|
898
|
+
LOGGER.warning("Local evaluations failed. Will still attempt to retrieve online grader results.")
|
|
899
|
+
LOGGER.warning(e)
|
|
900
|
+
else:
|
|
901
|
+
raise e
|
|
902
|
+
|
|
903
|
+
# Retrieve OAI eval run results if needed.
|
|
904
|
+
if need_get_oai_results:
|
|
905
|
+
try:
|
|
906
|
+
aoai_results, aoai_metrics = _get_evaluation_run_results(eval_run_info_list) # type: ignore
|
|
907
|
+
# Post build TODO: add equivalent of _print_summary(per_evaluator_results) here
|
|
908
|
+
|
|
909
|
+
# Combine results if both evaluators and graders are present
|
|
910
|
+
if len(evaluators) > 0:
|
|
911
|
+
results_df = pd.concat([results_df, aoai_results], axis=1)
|
|
912
|
+
metrics.update(aoai_metrics)
|
|
913
|
+
else:
|
|
914
|
+
# Otherwise combine aoai results with input data df to include input columns in outputs.
|
|
915
|
+
results_df = pd.concat([input_data_df, aoai_results], axis=1)
|
|
916
|
+
metrics = aoai_metrics
|
|
917
|
+
except EvaluationException as e:
|
|
918
|
+
if got_local_results:
|
|
919
|
+
# If there are local eval results, we only print a warning on OAI failure.
|
|
920
|
+
LOGGER.warning("Remote Azure Open AI grader evaluations failed. Still returning local results.")
|
|
921
|
+
LOGGER.warning(e)
|
|
922
|
+
else:
|
|
923
|
+
raise e
|
|
924
|
+
|
|
925
|
+
# Done with all evaluations, message outputs into final forms, and log results if needed.
|
|
926
|
+
name_map = _map_names_to_builtins(evaluators, graders)
|
|
927
|
+
if is_onedp_project(azure_ai_project):
|
|
928
|
+
studio_url = _log_metrics_and_instance_results_onedp(
|
|
929
|
+
metrics, results_df, azure_ai_project, evaluation_name, name_map, **kwargs
|
|
930
|
+
)
|
|
931
|
+
else:
|
|
932
|
+
# Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
|
|
933
|
+
trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
|
|
934
|
+
studio_url = None
|
|
935
|
+
if trace_destination:
|
|
936
|
+
studio_url = _log_metrics_and_instance_results(
|
|
937
|
+
metrics, results_df, trace_destination, None, evaluation_name, name_map, **kwargs
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
result_df_dict = results_df.to_dict("records")
|
|
941
|
+
result: EvaluationResult = {"rows": result_df_dict, "metrics": metrics, "studio_url": studio_url} # type: ignore
|
|
942
|
+
|
|
943
|
+
if output_path:
|
|
944
|
+
_write_output(output_path, result)
|
|
945
|
+
|
|
946
|
+
return result
|
|
947
|
+
|
|
760
948
|
|
|
949
|
+
def _preprocess_data(
|
|
950
|
+
data: Union[str, os.PathLike],
|
|
951
|
+
evaluators_and_graders: Dict[str, Union[Callable, AzureOpenAIGrader]],
|
|
952
|
+
evaluator_config: Optional[Dict[str, EvaluatorConfig]] = None,
|
|
953
|
+
target: Optional[Callable] = None,
|
|
954
|
+
output_path: Optional[Union[str, os.PathLike]] = None,
|
|
955
|
+
azure_ai_project: Optional[Union[str, AzureAIProject]] = None,
|
|
956
|
+
evaluation_name: Optional[str] = None,
|
|
957
|
+
**kwargs,
|
|
958
|
+
) -> __ValidatedData:
|
|
761
959
|
# Process evaluator config to replace ${target.} with ${data.}
|
|
762
960
|
if evaluator_config is None:
|
|
763
961
|
evaluator_config = {}
|
|
962
|
+
|
|
963
|
+
input_data_df = _validate_and_load_data(
|
|
964
|
+
target,
|
|
965
|
+
data,
|
|
966
|
+
evaluators_and_graders,
|
|
967
|
+
output_path,
|
|
968
|
+
azure_ai_project,
|
|
969
|
+
evaluation_name
|
|
970
|
+
)
|
|
971
|
+
if target is not None:
|
|
972
|
+
_validate_columns_for_target(input_data_df, target)
|
|
973
|
+
|
|
764
974
|
# extract column mapping dicts into dictionary mapping evaluator name to column mapping
|
|
765
975
|
column_mapping = _process_column_mappings(
|
|
766
976
|
{
|
|
@@ -769,27 +979,35 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
|
|
|
769
979
|
}
|
|
770
980
|
)
|
|
771
981
|
|
|
772
|
-
if target is not None:
|
|
773
|
-
_validate_columns_for_target(input_data_df, target)
|
|
774
|
-
|
|
775
982
|
# Create default configuration for evaluators that directly maps
|
|
776
983
|
# input data names to keyword inputs of the same name in the evaluators.
|
|
777
984
|
column_mapping = column_mapping or {}
|
|
778
985
|
column_mapping.setdefault("default", {})
|
|
779
986
|
|
|
780
|
-
|
|
987
|
+
# Split normal evaluators and OAI graders
|
|
988
|
+
evaluators, graders = _split_evaluators_and_grader_configs(evaluators_and_graders)
|
|
989
|
+
|
|
990
|
+
target_run: Optional[BatchClientRun] = None
|
|
781
991
|
target_generated_columns: Set[str] = set()
|
|
782
992
|
batch_run_client: BatchClient
|
|
783
993
|
batch_run_data: Union[str, os.PathLike, pd.DataFrame] = data
|
|
784
994
|
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
995
|
+
if kwargs.pop("_use_run_submitter_client", False):
|
|
996
|
+
batch_run_client = RunSubmitterClient()
|
|
997
|
+
batch_run_data = input_data_df
|
|
998
|
+
elif kwargs.pop("_use_pf_client", True):
|
|
788
999
|
batch_run_client = ProxyClient(user_agent=USER_AGENT)
|
|
1000
|
+
# Ensure the absolute path is passed to pf.run, as relative path doesn't work with
|
|
1001
|
+
# multiple evaluators. If the path is already absolute, abspath will return the original path.
|
|
789
1002
|
batch_run_data = os.path.abspath(data)
|
|
1003
|
+
else:
|
|
1004
|
+
batch_run_client = CodeClient()
|
|
1005
|
+
batch_run_data = input_data_df
|
|
790
1006
|
|
|
1007
|
+
# If target is set, apply 1-1 column mapping from target outputs to evaluator inputs
|
|
1008
|
+
if data is not None and target is not None:
|
|
791
1009
|
input_data_df, target_generated_columns, target_run = _apply_target_to_data(
|
|
792
|
-
target,
|
|
1010
|
+
target, batch_run_data, batch_run_client, input_data_df, evaluation_name, **kwargs
|
|
793
1011
|
)
|
|
794
1012
|
|
|
795
1013
|
for evaluator_name, mapping in column_mapping.items():
|
|
@@ -803,17 +1021,6 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
|
|
|
803
1021
|
# customer did not mapped target output.
|
|
804
1022
|
if col not in mapping and run_output not in mapped_to_values:
|
|
805
1023
|
column_mapping[evaluator_name][col] = run_output # pylint: disable=unnecessary-dict-index-lookup
|
|
806
|
-
elif kwargs.pop("_use_run_submitter_client", False):
|
|
807
|
-
batch_run_client = RunSubmitterClient()
|
|
808
|
-
batch_run_data = input_data_df
|
|
809
|
-
elif kwargs.pop("_use_pf_client", True):
|
|
810
|
-
batch_run_client = ProxyClient(user_agent=USER_AGENT)
|
|
811
|
-
# Ensure the absolute path is passed to pf.run, as relative path doesn't work with
|
|
812
|
-
# multiple evaluators. If the path is already absolute, abspath will return the original path.
|
|
813
|
-
batch_run_data = os.path.abspath(data)
|
|
814
|
-
else:
|
|
815
|
-
batch_run_client = CodeClient()
|
|
816
|
-
batch_run_data = input_data_df
|
|
817
1024
|
|
|
818
1025
|
# After we have generated all columns, we can check if we have everything we need for evaluators.
|
|
819
1026
|
_validate_columns_for_evaluators(input_data_df, evaluators, target, target_generated_columns, column_mapping)
|
|
@@ -829,6 +1036,29 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
|
|
|
829
1036
|
if not col.startswith(Prefixes.TSG_OUTPUTS) and col not in column_mapping["default"].keys():
|
|
830
1037
|
column_mapping["default"][col] = f"${{data.{col}}}"
|
|
831
1038
|
|
|
1039
|
+
return __ValidatedData(
|
|
1040
|
+
evaluators=evaluators,
|
|
1041
|
+
graders=graders,
|
|
1042
|
+
input_data_df=input_data_df,
|
|
1043
|
+
column_mapping=column_mapping,
|
|
1044
|
+
target_run=target_run,
|
|
1045
|
+
batch_run_client=batch_run_client,
|
|
1046
|
+
batch_run_data=batch_run_data,
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def _run_callable_evaluators(
|
|
1051
|
+
validated_data: __ValidatedData,
|
|
1052
|
+
fail_on_evaluator_errors: bool = False,
|
|
1053
|
+
**kwargs,
|
|
1054
|
+
) -> Tuple[pd.DataFrame, Dict[str, Any], Dict[str, __EvaluatorInfo]]:
|
|
1055
|
+
|
|
1056
|
+
# Extract needed values
|
|
1057
|
+
batch_run_client = validated_data["batch_run_client"]
|
|
1058
|
+
target_run = validated_data["target_run"]
|
|
1059
|
+
batch_run_data = validated_data["batch_run_data"]
|
|
1060
|
+
column_mapping = validated_data["column_mapping"]
|
|
1061
|
+
evaluators = validated_data["evaluators"]
|
|
832
1062
|
with EvalRunContext(batch_run_client):
|
|
833
1063
|
runs = {
|
|
834
1064
|
evaluator_name: batch_run_client.run(
|
|
@@ -889,31 +1119,50 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
|
|
|
889
1119
|
# Rename columns, generated by target function to outputs instead of inputs.
|
|
890
1120
|
# If target generates columns, already present in the input data, these columns
|
|
891
1121
|
# will be marked as outputs already so we do not need to rename them.
|
|
892
|
-
input_data_df = _rename_columns_conditionally(input_data_df)
|
|
893
|
-
|
|
894
|
-
result_df = pd.concat([input_data_df, evaluators_result_df], axis=1, verify_integrity=True)
|
|
895
|
-
metrics = _aggregate_metrics(evaluators_result_df, evaluators)
|
|
896
|
-
metrics.update(evaluators_metric)
|
|
897
|
-
|
|
898
|
-
# Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
|
|
899
|
-
target_run: Optional[Run] = None
|
|
900
|
-
trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
|
|
901
|
-
studio_url = None
|
|
902
|
-
if trace_destination:
|
|
903
|
-
studio_url = _log_metrics_and_instance_results(
|
|
904
|
-
metrics, result_df, trace_destination, target_run, evaluation_name, **kwargs
|
|
905
|
-
)
|
|
906
1122
|
|
|
907
|
-
|
|
908
|
-
|
|
1123
|
+
input_data_df = _rename_columns_conditionally(validated_data["input_data_df"])
|
|
1124
|
+
eval_result_df = pd.concat([input_data_df, evaluators_result_df], axis=1, verify_integrity=True)
|
|
1125
|
+
eval_metrics = _aggregate_metrics(evaluators_result_df, evaluators)
|
|
1126
|
+
eval_metrics.update(evaluators_metric)
|
|
909
1127
|
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
if output_path:
|
|
913
|
-
_write_output(output_path, result)
|
|
1128
|
+
return eval_result_df, eval_metrics, per_evaluator_results
|
|
914
1129
|
|
|
915
|
-
|
|
1130
|
+
def _map_names_to_builtins(
|
|
1131
|
+
evaluators: Dict[str, Callable],
|
|
1132
|
+
graders: Dict[str, AzureOpenAIGrader],
|
|
1133
|
+
) -> Dict[str, str]:
|
|
1134
|
+
"""
|
|
1135
|
+
Construct a mapping from user-supplied evaluator names to which known, built-in
|
|
1136
|
+
evaluator or grader they refer to. Custom or otherwise unknown evaluators are
|
|
1137
|
+
mapped to the "unknown" value.
|
|
916
1138
|
|
|
1139
|
+
:param evaluators: The dictionary of evaluators.
|
|
1140
|
+
:type evaluators: Dict[str, Callable]
|
|
1141
|
+
:param graders: The dictionary of graders.
|
|
1142
|
+
:type graders: Dict[str, AzureOpenAIGrader]
|
|
1143
|
+
:param evaluator_config: The configuration for evaluators.
|
|
1144
|
+
:type evaluator_config: Optional[Dict[str, EvaluatorConfig]]
|
|
1145
|
+
|
|
1146
|
+
"""
|
|
1147
|
+
from .._eval_mapping import EVAL_CLASS_MAP
|
|
1148
|
+
name_map = {}
|
|
1149
|
+
|
|
1150
|
+
for name, evaluator in evaluators.items():
|
|
1151
|
+
# Check if the evaluator is a known built-in evaluator
|
|
1152
|
+
found_eval = False
|
|
1153
|
+
for eval_class, eval_id in EVAL_CLASS_MAP.items():
|
|
1154
|
+
if isinstance(evaluator, eval_class):
|
|
1155
|
+
name_map[name] = eval_id
|
|
1156
|
+
found_eval = True
|
|
1157
|
+
break
|
|
1158
|
+
if not found_eval:
|
|
1159
|
+
# If not found, map to "unknown"
|
|
1160
|
+
name_map[name] = "unknown"
|
|
1161
|
+
|
|
1162
|
+
for name, grader in graders.items():
|
|
1163
|
+
name_map[name] = grader.id
|
|
1164
|
+
|
|
1165
|
+
return name_map
|
|
917
1166
|
|
|
918
1167
|
def _turn_error_logs_into_exception(log_path: str) -> None:
|
|
919
1168
|
"""Produce an EvaluationException using the contents of the inputted
|
|
@@ -929,4 +1178,4 @@ def _turn_error_logs_into_exception(log_path: str) -> None:
|
|
|
929
1178
|
target=ErrorTarget.EVALUATE,
|
|
930
1179
|
category=ErrorCategory.FAILED_EXECUTION,
|
|
931
1180
|
blame=ErrorBlame.UNKNOWN,
|
|
932
|
-
)
|
|
1181
|
+
)
|