azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +85 -14
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/_common/constants.py +124 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +578 -69
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +505 -27
- azure/ai/evaluation/_constants.py +147 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +87 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
- azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
- azure/ai/evaluation/_evaluate/_utils.py +237 -42
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
- azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
- azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
- azure/ai/evaluation/_exceptions.py +24 -1
- azure/ai/evaluation/_http_utils.py +7 -5
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +26 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
- azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- azure/ai/evaluation/simulator/_simulator.py +43 -19
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
- azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from typing import Any, Mapping, Optional, Sequence
|
|
8
|
+
|
|
9
|
+
from ._status import BatchStatus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class TokenMetrics:
|
|
14
|
+
"""The token metrics of a run."""
|
|
15
|
+
|
|
16
|
+
prompt_tokens: int
|
|
17
|
+
"""The number of tokens used in the prompt for the run."""
|
|
18
|
+
completion_tokens: int
|
|
19
|
+
"""The number of tokens used in the completion for the run."""
|
|
20
|
+
total_tokens: int
|
|
21
|
+
"""The total number of tokens used in the run."""
|
|
22
|
+
|
|
23
|
+
def update(self, other: "TokenMetrics") -> None:
|
|
24
|
+
"""Update the token metrics with another set of token metrics."""
|
|
25
|
+
self.prompt_tokens += other.prompt_tokens
|
|
26
|
+
self.completion_tokens += other.completion_tokens
|
|
27
|
+
self.total_tokens += other.total_tokens
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class BatchRunError:
|
|
32
|
+
"""The error of a batch run."""
|
|
33
|
+
|
|
34
|
+
details: str
|
|
35
|
+
"""The details of the error."""
|
|
36
|
+
exception: Optional[BaseException]
|
|
37
|
+
"""The exception of the error."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class BatchRunDetails:
|
|
42
|
+
"""The error of a line in a batch run."""
|
|
43
|
+
|
|
44
|
+
id: str
|
|
45
|
+
"""The ID of the line run."""
|
|
46
|
+
status: BatchStatus
|
|
47
|
+
"""The status of the line run."""
|
|
48
|
+
result: Optional[Mapping[str, Any]]
|
|
49
|
+
"""The result of the line run."""
|
|
50
|
+
start_time: Optional[datetime]
|
|
51
|
+
"""The start time of the line run. If this was never started, this should be None."""
|
|
52
|
+
end_time: Optional[datetime]
|
|
53
|
+
"""The end time of the line run. If this never completed, this should be None."""
|
|
54
|
+
tokens: TokenMetrics
|
|
55
|
+
"""The token metrics of the line run."""
|
|
56
|
+
error: Optional[BatchRunError]
|
|
57
|
+
"""The error of the line run. This will only be set if the status is Failed."""
|
|
58
|
+
index: int
|
|
59
|
+
"""The line run index."""
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def duration(self) -> timedelta:
|
|
63
|
+
"""The duration of the line run."""
|
|
64
|
+
if self.start_time is not None and self.end_time is not None:
|
|
65
|
+
return self.end_time - self.start_time
|
|
66
|
+
return timedelta(0)
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def create_id(run_id: str, index: int) -> str:
|
|
70
|
+
"""Helper method to create the ID for a line run."""
|
|
71
|
+
return f"{run_id}_{index}"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class BatchResult:
|
|
76
|
+
"""The result of a batch run."""
|
|
77
|
+
|
|
78
|
+
status: BatchStatus
|
|
79
|
+
"""The overall status of the batch run."""
|
|
80
|
+
total_lines: int
|
|
81
|
+
"""The total number of lines in the batch run."""
|
|
82
|
+
failed_lines: int
|
|
83
|
+
"""The number of failed lines in the batch run."""
|
|
84
|
+
start_time: datetime
|
|
85
|
+
"""The start time of the batch run."""
|
|
86
|
+
end_time: datetime
|
|
87
|
+
"""The end time of the batch run."""
|
|
88
|
+
tokens: TokenMetrics
|
|
89
|
+
"""The overall token metrics of the batch run."""
|
|
90
|
+
details: Sequence[BatchRunDetails]
|
|
91
|
+
"""The details of each line in the batch run."""
|
|
92
|
+
error: Optional[Exception] = None
|
|
93
|
+
"""The error of the batch run. This will only be set if the status does not indicate success."""
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def duration(self) -> timedelta:
|
|
97
|
+
"""The duration of the batch run."""
|
|
98
|
+
if self.start_time is not None and self.end_time is not None:
|
|
99
|
+
return self.end_time - self.start_time
|
|
100
|
+
return timedelta(0)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def results(self) -> Sequence[Optional[Mapping[str, Any]]]:
|
|
104
|
+
"""The results of the batch run."""
|
|
105
|
+
if not self.details:
|
|
106
|
+
return []
|
|
107
|
+
return [d.result for d in self.details]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any, Callable, Mapping, Optional, Sequence
|
|
9
|
+
|
|
10
|
+
from ._utils import normalize_identifier_name
|
|
11
|
+
from ._result import BatchResult
|
|
12
|
+
from ._status import BatchStatus
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RunStatus(Enum):
|
|
16
|
+
# TODO ralphe: Trim this to just the statuses we need
|
|
17
|
+
# QUEUED = "Queued"
|
|
18
|
+
NOT_STARTED = "NotStarted"
|
|
19
|
+
PREPARING = "Preparing"
|
|
20
|
+
# PROVISIONING = "Provisioning"
|
|
21
|
+
# STARTING = "Starting"
|
|
22
|
+
RUNNING = "Running"
|
|
23
|
+
# CANCEL_REQUESTED = "CancelRequested"
|
|
24
|
+
CANCELED = "Canceled"
|
|
25
|
+
# FINALIZING = "Finalizing"
|
|
26
|
+
COMPLETED = "Completed"
|
|
27
|
+
FAILED = "Failed"
|
|
28
|
+
# UNAPPROVED = "Unapproved"
|
|
29
|
+
# NOTRESPONDING = "NotResponding"
|
|
30
|
+
# PAUSING = "Pausing"
|
|
31
|
+
# PAUSED = "Paused"
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def from_batch_result_status(status: BatchStatus) -> "RunStatus":
|
|
35
|
+
if status == BatchStatus.NotStarted:
|
|
36
|
+
return RunStatus.NOT_STARTED
|
|
37
|
+
if status == BatchStatus.Running:
|
|
38
|
+
return RunStatus.RUNNING
|
|
39
|
+
if status == BatchStatus.Completed:
|
|
40
|
+
return RunStatus.COMPLETED
|
|
41
|
+
if status == BatchStatus.Canceled:
|
|
42
|
+
return RunStatus.CANCELED
|
|
43
|
+
if status == BatchStatus.Failed:
|
|
44
|
+
return RunStatus.FAILED
|
|
45
|
+
|
|
46
|
+
return RunStatus.FAILED
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Run:
|
|
50
|
+
"""The equivalent of a Promptflow Run
|
|
51
|
+
promptflow-devkit/promptflow/_sdk/entities/_run.py
|
|
52
|
+
|
|
53
|
+
THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
*,
|
|
58
|
+
dynamic_callable: Callable,
|
|
59
|
+
name_prefix: Optional[str],
|
|
60
|
+
inputs: Sequence[Mapping[str, Any]],
|
|
61
|
+
column_mapping: Optional[Mapping[str, str]] = None,
|
|
62
|
+
created_on: Optional[datetime] = None,
|
|
63
|
+
run: Optional["Run"] = None,
|
|
64
|
+
):
|
|
65
|
+
self._status: RunStatus = RunStatus.NOT_STARTED
|
|
66
|
+
self._created_on = created_on or datetime.now(timezone.utc)
|
|
67
|
+
self._start_time: Optional[datetime] = None
|
|
68
|
+
self._end_time: Optional[datetime] = None
|
|
69
|
+
|
|
70
|
+
self.dynamic_callable = dynamic_callable
|
|
71
|
+
self.name = self._generate_run_name(name_prefix, self._created_on)
|
|
72
|
+
self.inputs = inputs
|
|
73
|
+
self.column_mapping: Optional[Mapping[str, str]] = column_mapping
|
|
74
|
+
self.result: Optional[BatchResult] = None
|
|
75
|
+
self.metrics: Mapping[str, Any] = {}
|
|
76
|
+
self._run = run
|
|
77
|
+
|
|
78
|
+
# self._use_remote_flow = False
|
|
79
|
+
# self._from_flex_flow = True
|
|
80
|
+
# self._from_prompty = False
|
|
81
|
+
# self.flow = path to pointless flow file
|
|
82
|
+
# self._experiment_name = name of folder containing pointless flow file
|
|
83
|
+
# self._lineage_id = basically equivalent to a hex digest of the SHA256 hash of:
|
|
84
|
+
# f"{uuid.getnod()}/{posix_full_path_to_pointless_folder}"
|
|
85
|
+
# self._output_path = Path("<user_folder>/.promptflow/runs/<self.name>")
|
|
86
|
+
# self._flow_name = name of pointless folder
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def status(self) -> RunStatus:
|
|
90
|
+
return self._status
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def created_on(self) -> datetime:
|
|
94
|
+
return self._created_on
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def duration(self) -> Optional[timedelta]:
|
|
98
|
+
if self._start_time is None or self._end_time is None:
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
return self._end_time - self._start_time
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def outputs(self) -> Sequence[Mapping[str, Any]]:
|
|
105
|
+
if self.result is None:
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
return [value or {} for value in self.result.results]
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def previous_run(self) -> Optional["Run"]:
|
|
112
|
+
return self._run
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _generate_run_name(name_prefix: Optional[str], creation_time: datetime) -> str:
|
|
116
|
+
# The Promptflow code looked at the folder name of the temporary folder used to
|
|
117
|
+
# store the temporary flow YAML file which was a single entry that told it look
|
|
118
|
+
# at the passed in dynamic_callable. Example folder name:
|
|
119
|
+
# azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_l82059h3
|
|
120
|
+
# instead we will use the passed in name_prefix or use a UUID (which is equally
|
|
121
|
+
# opaque as what the original code did)
|
|
122
|
+
if not name_prefix:
|
|
123
|
+
name_prefix = str(uuid4())
|
|
124
|
+
|
|
125
|
+
timestamp = creation_time.strftime("%Y%m%d_%H%M%S_%f")
|
|
126
|
+
name = f"{name_prefix}_{timestamp}"
|
|
127
|
+
return normalize_identifier_name(name)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Original source:
|
|
6
|
+
# promptflow-devkit/promptflow/_sdk/operations/_local_storage_operations.py
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from contextlib import AbstractContextManager
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Final, Mapping, Optional, Tuple, Union
|
|
13
|
+
|
|
14
|
+
from ._result import BatchResult, TokenMetrics, BatchStatus
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
EVAL_USER_SUBFOLDER: Final[str] = ".evaluation"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AbstractRunLogger(AbstractContextManager):
|
|
21
|
+
@property
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def file_path(self) -> Path:
|
|
24
|
+
"""Get the file path of the logger.
|
|
25
|
+
|
|
26
|
+
:return: The file path of the logger.
|
|
27
|
+
:rtype: Path
|
|
28
|
+
"""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def get_logs(self) -> str:
|
|
33
|
+
"""Get the logs of the run.
|
|
34
|
+
|
|
35
|
+
:return: The logs of the run.
|
|
36
|
+
:rtype: str
|
|
37
|
+
"""
|
|
38
|
+
...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AbstractRunStorage(ABC):
|
|
42
|
+
@property
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def logger(self) -> "AbstractRunLogger":
|
|
45
|
+
"""Get the logger of the run.
|
|
46
|
+
|
|
47
|
+
:return: The logger of the run.
|
|
48
|
+
:rtype: ~promptflow.contracts.run_logger.RunLogger
|
|
49
|
+
"""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def persist_result(self, result: Optional[BatchResult]) -> None:
|
|
54
|
+
"""Persist results of a batch engine execution (including any errors).
|
|
55
|
+
|
|
56
|
+
:param Optional[BatchResult] result: The result to persist.
|
|
57
|
+
"""
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def load_exception(self) -> Mapping[str, Any]:
|
|
62
|
+
"""Load the exception from the storage. If there was no exception, an empty
|
|
63
|
+
mapping will be returned.
|
|
64
|
+
|
|
65
|
+
:return: The exception.
|
|
66
|
+
:rtype: Optional[Exception]
|
|
67
|
+
"""
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
|
|
72
|
+
"""Load the inputs and outputs from the storage.
|
|
73
|
+
|
|
74
|
+
:return: The inputs and outputs.
|
|
75
|
+
:rtype: Tuple(Mapping[str, Any], BatchResult)
|
|
76
|
+
"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
|
|
81
|
+
"""Load the metrics from the storage.
|
|
82
|
+
|
|
83
|
+
:return: The metrics.
|
|
84
|
+
:rtype: Mapping[str, Union[int, float, str]]
|
|
85
|
+
"""
|
|
86
|
+
...
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class NoOpRunStorage(AbstractRunStorage):
|
|
90
|
+
"""A no-op implementation of the run storage."""
|
|
91
|
+
|
|
92
|
+
def __init__(self):
|
|
93
|
+
self._logger = NoOpLogger()
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def logger(self) -> AbstractRunLogger:
|
|
98
|
+
return self._logger
|
|
99
|
+
|
|
100
|
+
def persist_result(self, result: Optional[BatchResult]) -> None:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
def load_exception(self) -> Mapping[str, Any]:
|
|
104
|
+
return {}
|
|
105
|
+
|
|
106
|
+
def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
|
|
107
|
+
now = datetime.now(timezone.utc)
|
|
108
|
+
return {}, BatchResult(BatchStatus.NotStarted, 0, 0, now, now, TokenMetrics(0, 0, 0), [])
|
|
109
|
+
|
|
110
|
+
def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
|
|
111
|
+
return {}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class NoOpLogger(AbstractRunLogger):
|
|
115
|
+
"""A no-op implementation of the run logger."""
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def file_path(self) -> Path:
|
|
119
|
+
return Path.home() / EVAL_USER_SUBFOLDER
|
|
120
|
+
|
|
121
|
+
def __enter__(self) -> None:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
def __exit__(self, *args) -> None:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
def get_logs(self) -> str:
|
|
128
|
+
return ""
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import dataclasses
|
|
6
|
+
import inspect
|
|
7
|
+
import sys
|
|
8
|
+
import traceback
|
|
9
|
+
|
|
10
|
+
from concurrent.futures import Executor
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from typing import Any, Callable, Dict, Mapping, Optional, Sequence, TextIO, Union
|
|
13
|
+
|
|
14
|
+
from ._run import Run, RunStatus
|
|
15
|
+
from ._trace import start_trace
|
|
16
|
+
from ._run_storage import AbstractRunStorage, NoOpRunStorage
|
|
17
|
+
from .._common._logging import incremental_print, print_red_error
|
|
18
|
+
from ._config import BatchEngineConfig
|
|
19
|
+
from ._exceptions import BatchEngineValidationError
|
|
20
|
+
from ._engine import DEFAULTS_KEY, BatchEngine, BatchEngineError, BatchResult, BatchStatus
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RunSubmitter:
|
|
24
|
+
"""Submits run to executor
|
|
25
|
+
promptflow-devkit/promptflow/_sdk/_orchestrator/run_submitter.py
|
|
26
|
+
|
|
27
|
+
THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, config: BatchEngineConfig, executor: Optional[Executor] = None):
|
|
30
|
+
# self._client = PFClient instance
|
|
31
|
+
# self._config = PFClient config
|
|
32
|
+
# self.run_operations = RunOperations instance
|
|
33
|
+
|
|
34
|
+
# TODO ralphe: Use proper logger here. Old code did LoggerFactory.get_logger(__name__)
|
|
35
|
+
self._config = config
|
|
36
|
+
self._executor = executor
|
|
37
|
+
|
|
38
|
+
async def submit(
|
|
39
|
+
self,
|
|
40
|
+
dynamic_callable: Callable,
|
|
41
|
+
inputs: Sequence[Mapping[str, Any]],
|
|
42
|
+
column_mapping: Optional[Mapping[str, str]],
|
|
43
|
+
*,
|
|
44
|
+
name_prefix: Optional[str] = None,
|
|
45
|
+
created_on: Optional[datetime] = None,
|
|
46
|
+
storage_creator: Optional[Callable[[Run], AbstractRunStorage]] = None,
|
|
47
|
+
**kwargs,
|
|
48
|
+
) -> Run:
|
|
49
|
+
|
|
50
|
+
# The old code always spun up two threads here using a ThreadPoolExecutor:
|
|
51
|
+
# 1. One thread essentially did nothing of value (since tracing was disabled, and we
|
|
52
|
+
# don't care about checking for the latest PromptFlow version number now)
|
|
53
|
+
# 2. The other thread did the _run_bulk call. This was followed by a
|
|
54
|
+
# wait(return_when=ALL_COMPLETED)
|
|
55
|
+
# This quite frankly is unnecessary complexity since the the evaluation code already
|
|
56
|
+
# calls this in the context of ThreadPoolThread. So we can just do the equivalent
|
|
57
|
+
# of the _run_bulk code here directly.
|
|
58
|
+
# In a future code refactor, all of this will be cleaned up in favour of proper
|
|
59
|
+
# async/await code.
|
|
60
|
+
|
|
61
|
+
run: Run = Run(
|
|
62
|
+
dynamic_callable=dynamic_callable,
|
|
63
|
+
name_prefix=name_prefix,
|
|
64
|
+
inputs=inputs,
|
|
65
|
+
column_mapping=column_mapping,
|
|
66
|
+
created_on=created_on,
|
|
67
|
+
run=kwargs.pop("run", None),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
attributes: Dict[str, Any] = kwargs.get("attributes", {})
|
|
71
|
+
collection_for_run: str = run.name
|
|
72
|
+
start_trace(attributes=attributes, run=run, _collection=collection_for_run)
|
|
73
|
+
|
|
74
|
+
self._validate_inputs(run=run)
|
|
75
|
+
|
|
76
|
+
local_storage = storage_creator(run) if storage_creator else NoOpRunStorage()
|
|
77
|
+
with local_storage.logger:
|
|
78
|
+
run._status = RunStatus.PREPARING
|
|
79
|
+
|
|
80
|
+
# unnecessary Flow loading code was removed here. Instead do direct calls to _submit_bulk_run
|
|
81
|
+
await self._submit_bulk_run(run=run, local_storage=local_storage, **kwargs)
|
|
82
|
+
|
|
83
|
+
self.stream_run(run=run, storage=local_storage, raise_on_error=self._config.raise_on_error)
|
|
84
|
+
return run
|
|
85
|
+
|
|
86
|
+
async def _submit_bulk_run(self, run: Run, local_storage: AbstractRunStorage, **kwargs) -> None:
|
|
87
|
+
logger = self._config.logger
|
|
88
|
+
|
|
89
|
+
logger.info(f"Submitting run {run.name}, log path: {local_storage.logger.file_path}")
|
|
90
|
+
|
|
91
|
+
# Old code loaded the Flex flow, parsed input and outputs types. That logic has been
|
|
92
|
+
# removed since it is unnecessary. It also parsed and set environment variables. This
|
|
93
|
+
# has also been removed since it can be problematic in a multi-threaded environment.
|
|
94
|
+
|
|
95
|
+
if run.previous_run:
|
|
96
|
+
previous: Optional[Run] = run.previous_run
|
|
97
|
+
if previous.status != RunStatus.COMPLETED:
|
|
98
|
+
raise BatchEngineValidationError(
|
|
99
|
+
f"Referenced run {previous.name} is not completed, got status {previous.status.value}."
|
|
100
|
+
)
|
|
101
|
+
if previous.outputs is not None:
|
|
102
|
+
if len(previous.outputs) != len(run.inputs):
|
|
103
|
+
raise BatchEngineValidationError(
|
|
104
|
+
f"Referenced run {previous.name} has {len(previous.outputs)} outputs, "
|
|
105
|
+
f"but {len(run.inputs)} inputs are provided."
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# load in the previous run's outputs and inputs into the list of dictionaries to allow for
|
|
109
|
+
# the previous run's outputs to be used as inputs for the current run
|
|
110
|
+
run.inputs = [
|
|
111
|
+
{"run.outputs": previous.outputs[i], "run.inputs": previous.inputs[i], **run.inputs[i]}
|
|
112
|
+
for i in range(len(run.inputs))
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
self._validate_column_mapping(run.column_mapping)
|
|
116
|
+
|
|
117
|
+
run._status = RunStatus.RUNNING
|
|
118
|
+
run._start_time = datetime.now(timezone.utc)
|
|
119
|
+
batch_result: Optional[BatchResult] = None
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
batch_engine = BatchEngine(
|
|
123
|
+
run.dynamic_callable,
|
|
124
|
+
config=self._config,
|
|
125
|
+
storage=local_storage,
|
|
126
|
+
executor=self._executor,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
batch_result = await batch_engine.run(data=run.inputs, column_mapping=run.column_mapping, id=run.name)
|
|
130
|
+
run._status = RunStatus.from_batch_result_status(batch_result.status)
|
|
131
|
+
|
|
132
|
+
error_logs: Sequence[str] = []
|
|
133
|
+
if run._status != RunStatus.COMPLETED:
|
|
134
|
+
error_logs.append(f"Run {run.name} failed with status {batch_result.status}.")
|
|
135
|
+
if batch_result.error:
|
|
136
|
+
error_logs.append(f"Error: {str(batch_result.error)}")
|
|
137
|
+
|
|
138
|
+
if error_logs:
|
|
139
|
+
logger.warning("\n".join(error_logs))
|
|
140
|
+
except Exception as e:
|
|
141
|
+
run._status = RunStatus.FAILED
|
|
142
|
+
# when run failed in executor, store the exception in result and dump to file
|
|
143
|
+
logger.warning(f"Run {run.name} failed when executing in executor with exception {e}.")
|
|
144
|
+
if not batch_result:
|
|
145
|
+
batch_result = BatchResult(
|
|
146
|
+
status=BatchStatus.Failed,
|
|
147
|
+
total_lines=0,
|
|
148
|
+
failed_lines=0,
|
|
149
|
+
start_time=datetime.now(timezone.utc),
|
|
150
|
+
end_time=datetime.now(timezone.utc),
|
|
151
|
+
tokens=None,
|
|
152
|
+
details=[],
|
|
153
|
+
)
|
|
154
|
+
batch_result.error = e
|
|
155
|
+
elif not batch_result.error:
|
|
156
|
+
batch_result.error = e
|
|
157
|
+
# for user error, swallow stack trace and return failed run since user don't need the stack trace
|
|
158
|
+
if not isinstance(e, BatchEngineValidationError):
|
|
159
|
+
# for other errors, raise it to user to help debug root cause.
|
|
160
|
+
raise e
|
|
161
|
+
# won't raise the exception since it's already included in run object.
|
|
162
|
+
finally:
|
|
163
|
+
# persist inputs, outputs and metrics
|
|
164
|
+
local_storage.persist_result(batch_result)
|
|
165
|
+
# exceptions
|
|
166
|
+
# local_storage.dump_exception(exception=exception, batch_result=batch_result) # TODO ralphe: persist_result should handle this
|
|
167
|
+
# system metrics
|
|
168
|
+
system_metrics = {}
|
|
169
|
+
if batch_result:
|
|
170
|
+
# system_metrics.update(dataclasses.asdict(batch_result.tokens)) # token related
|
|
171
|
+
system_metrics.update(
|
|
172
|
+
{
|
|
173
|
+
# "duration": batch_result.duration.total_seconds(),
|
|
174
|
+
# "__pf__.lines.completed": batch_result.total_lines - batch_result.failed_lines,
|
|
175
|
+
# "__pf__.lines.failed": batch_result.failed_lines,
|
|
176
|
+
}
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
run._end_time = datetime.now(timezone.utc)
|
|
180
|
+
run.metrics = system_metrics
|
|
181
|
+
run.result = batch_result
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def _validate_inputs(run: Run):
|
|
185
|
+
if not run.inputs and not run.previous_run:
|
|
186
|
+
raise BatchEngineValidationError("Either data, or a previous run must be specified for the evaluation run.")
|
|
187
|
+
|
|
188
|
+
@staticmethod
|
|
189
|
+
def _validate_column_mapping(column_mapping: Optional[Mapping[str, str]]):
|
|
190
|
+
if not column_mapping:
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
if not isinstance(column_mapping, Mapping):
|
|
194
|
+
raise BatchEngineValidationError(f"Column mapping must be a dict, got {type(column_mapping)}.")
|
|
195
|
+
|
|
196
|
+
has_mapping = any([isinstance(v, str) and v.startswith("$") for v in column_mapping.values()])
|
|
197
|
+
if not has_mapping:
|
|
198
|
+
raise BatchEngineValidationError(
|
|
199
|
+
"Column mapping must contain at least one mapping binding, "
|
|
200
|
+
f"current column mapping contains all static values: {column_mapping}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def stream_run(run: Run, storage: AbstractRunStorage, raise_on_error: bool) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Stream the output of the batch execution.
|
|
207
|
+
|
|
208
|
+
:param Run run: The run to stream.
|
|
209
|
+
:param AbstractRunStorage storage: The storage to use for the output.
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
if run is None or storage is None:
|
|
213
|
+
return
|
|
214
|
+
|
|
215
|
+
file_handler = sys.stdout
|
|
216
|
+
error_message: Optional[str] = None
|
|
217
|
+
try:
|
|
218
|
+
printed = 0
|
|
219
|
+
available_logs = storage.logger.get_logs()
|
|
220
|
+
incremental_print(available_logs, printed, file_handler)
|
|
221
|
+
RunSubmitter._print_run_summary(run, file_handler)
|
|
222
|
+
except KeyboardInterrupt:
|
|
223
|
+
error_message = "The output streaming for the run was interrupted, but the run is still executing."
|
|
224
|
+
print(error_message)
|
|
225
|
+
|
|
226
|
+
if run.status == RunStatus.FAILED or run.status == RunStatus.CANCELED:
|
|
227
|
+
if run.status == RunStatus.FAILED:
|
|
228
|
+
# Get the first error message from the results, or use a default one
|
|
229
|
+
if run.result and run.result.error:
|
|
230
|
+
error_message = "".join(
|
|
231
|
+
traceback.format_exception(
|
|
232
|
+
type(run.result.error), run.result.error, run.result.error.__traceback__
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
elif run.result and run.result.details:
|
|
236
|
+
err = next((r.error for r in run.result.details if r.error), None)
|
|
237
|
+
if err and err.exception:
|
|
238
|
+
error_message = "".join(
|
|
239
|
+
traceback.format_exception(type(err.exception), err.exception, err.exception.__traceback__)
|
|
240
|
+
)
|
|
241
|
+
elif err and err.details:
|
|
242
|
+
error_message = err.details
|
|
243
|
+
|
|
244
|
+
if not error_message:
|
|
245
|
+
error_message = "Run fails with unknown error."
|
|
246
|
+
else:
|
|
247
|
+
error_message = "Run is canceled."
|
|
248
|
+
if raise_on_error:
|
|
249
|
+
raise BatchEngineError(error_message)
|
|
250
|
+
else:
|
|
251
|
+
print_red_error(error_message)
|
|
252
|
+
|
|
253
|
+
@staticmethod
|
|
254
|
+
def _print_run_summary(run: Run, text_out: Union[TextIO, Any]) -> None:
|
|
255
|
+
duration = str(run.duration)
|
|
256
|
+
text_out.write(
|
|
257
|
+
"======= Run Summary =======\n\n"
|
|
258
|
+
f'Run name: "{run.name}"\n'
|
|
259
|
+
f'Run status: "{run.status.value}"\n'
|
|
260
|
+
f'Start time: "{run.created_on}"\n'
|
|
261
|
+
f'Duration: "{duration}"\n\n'
|
|
262
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from enum import IntEnum, auto, unique
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@unique
|
|
9
|
+
class BatchStatus(IntEnum):
|
|
10
|
+
NotStarted = 0
|
|
11
|
+
Running = auto()
|
|
12
|
+
|
|
13
|
+
# NOTE: DO NOT REORDER THESE ENUMS. The order is important for the is_terminated method
|
|
14
|
+
# and other logic in the code to work properly
|
|
15
|
+
Completed = auto()
|
|
16
|
+
Canceled = auto()
|
|
17
|
+
Failed = auto()
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def is_terminated(status: "BatchStatus") -> bool:
|
|
21
|
+
return status >= BatchStatus.Completed
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def is_failed(status: "BatchStatus") -> bool:
|
|
25
|
+
return status == BatchStatus.Failed or status == BatchStatus.Canceled
|