azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +27 -1
- azure/ai/evaluation/_azure/_models.py +6 -6
- azure/ai/evaluation/_common/constants.py +6 -2
- azure/ai/evaluation/_common/rai_service.py +39 -5
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +23 -3
- azure/ai/evaluation/_constants.py +7 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +804 -0
- azure/ai/evaluation/_converters/_models.py +302 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
- azure/ai/evaluation/_evaluate/_evaluate.py +109 -64
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
- azure/ai/evaluation/_evaluate/_utils.py +3 -3
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +44 -4
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +4 -2
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +44 -5
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +22 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
- azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +158 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
- azure/ai/evaluation/_exceptions.py +5 -0
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +251 -150
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +19 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +195 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +45 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +74 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_red_team.py +1887 -0
- azure/ai/evaluation/red_team/_red_team_result.py +382 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +3 -0
- azure/ai/evaluation/red_team/_utils/constants.py +65 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +165 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +192 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +54 -27
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
- azure/ai/evaluation/simulator/_simulator.py +1 -1
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +80 -15
- azure_ai_evaluation-1.5.0.dist-info/RECORD +207 -0
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.3.0.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import dataclasses
|
|
6
|
+
import sys
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any, Callable, Dict, Mapping, Optional, Sequence, TextIO, Union
|
|
9
|
+
|
|
10
|
+
from ._run import Run, RunStatus
|
|
11
|
+
from ._trace import start_trace, is_collection_writeable
|
|
12
|
+
from ._run_storage import AbstractRunStorage, NoOpRunStorage
|
|
13
|
+
from ._logging import incremental_print, print_red_error
|
|
14
|
+
from ._config import BatchEngineConfig
|
|
15
|
+
from ._exceptions import BatchEngineValidationError
|
|
16
|
+
from ._engine import BatchEngine, BatchEngineError, BatchResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RunSubmitter:
|
|
20
|
+
"""Submits run to executor
|
|
21
|
+
promptflow-devkit/promptflow/_sdk/_orchestrator/run_submitter.py
|
|
22
|
+
|
|
23
|
+
THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, config: BatchEngineConfig):
|
|
26
|
+
# self._client = PFClient instance
|
|
27
|
+
# self._config = PFClient config
|
|
28
|
+
# self.run_operations = RunOperations instance
|
|
29
|
+
|
|
30
|
+
# TODO ralphe: Use proper logger here. Old code did LoggerFactory.get_logger(__name__)
|
|
31
|
+
self._config = config
|
|
32
|
+
|
|
33
|
+
def submit(
|
|
34
|
+
self,
|
|
35
|
+
dynamic_callable: Callable,
|
|
36
|
+
inputs: Sequence[Mapping[str, Any]],
|
|
37
|
+
column_mapping: Mapping[str, str],
|
|
38
|
+
*,
|
|
39
|
+
name_prefix: Optional[str] = None,
|
|
40
|
+
created_on: Optional[datetime] = None,
|
|
41
|
+
storage_creator: Optional[Callable[[Run], AbstractRunStorage]] = None,
|
|
42
|
+
**kwargs,
|
|
43
|
+
) -> Run:
|
|
44
|
+
# The old code always spun up two threads here using a ThreadPoolExecutor:
|
|
45
|
+
# 1. One thread essentially did nothing of value (since tracing was disabled, and we
|
|
46
|
+
# don't care about checking for the latest PromptFlow version number now)
|
|
47
|
+
# 2. The other thread did the _run_bulk call. This was followed by a
|
|
48
|
+
# wait(return_when=ALL_COMPLETED)
|
|
49
|
+
# This quite frankly is unnecessary complexity since the the evaluation code already
|
|
50
|
+
# calls this in the context of ThreadPoolThread. So we can just do the equivalent
|
|
51
|
+
# of the _run_bulk code here directly.
|
|
52
|
+
# In a future code refactor, all of this will be cleaned up in favour of proper
|
|
53
|
+
# async/await code.
|
|
54
|
+
run: Run = kwargs.pop("run", None) or Run(
|
|
55
|
+
dynamic_callable=dynamic_callable,
|
|
56
|
+
name_prefix=name_prefix,
|
|
57
|
+
inputs=inputs,
|
|
58
|
+
column_mapping=column_mapping,
|
|
59
|
+
created_on=created_on,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
logger = self._config.logger
|
|
63
|
+
attributes: Dict[str, Any] = kwargs.get("attributes", {})
|
|
64
|
+
collection_for_run: Optional[str] = None
|
|
65
|
+
|
|
66
|
+
logger.debug("start trace for flow run...")
|
|
67
|
+
logger.debug("flow path for run.start_trace: %s", run.name)
|
|
68
|
+
|
|
69
|
+
if is_collection_writeable():
|
|
70
|
+
logger.debug("trace collection is writeable, will use flow name as collection...")
|
|
71
|
+
collection_for_run = run.name
|
|
72
|
+
logger.debug("collection for run: %s", collection_for_run)
|
|
73
|
+
else:
|
|
74
|
+
logger.debug("trace collection is protected, will honor existing collection.")
|
|
75
|
+
start_trace(attributes=attributes, run=run, _collection=collection_for_run)
|
|
76
|
+
|
|
77
|
+
self._validate_inputs(run=run)
|
|
78
|
+
|
|
79
|
+
local_storage = storage_creator(run) if storage_creator else NoOpRunStorage()
|
|
80
|
+
with local_storage.logger:
|
|
81
|
+
run._status = RunStatus.PREPARING
|
|
82
|
+
|
|
83
|
+
# unnecessary Flow loading code was removed here. Instead do direct calls to _submit_bulk_run
|
|
84
|
+
self._submit_bulk_run(run=run, local_storage=local_storage, **kwargs)
|
|
85
|
+
|
|
86
|
+
self.stream_run(run=run, storage=local_storage, raise_on_error=True)
|
|
87
|
+
return run
|
|
88
|
+
|
|
89
|
+
def _submit_bulk_run(self, run: Run, local_storage: AbstractRunStorage, **kwargs) -> None:
|
|
90
|
+
logger = self._config.logger
|
|
91
|
+
|
|
92
|
+
logger.info(f"Submitting run {run.name}, log path: {local_storage.logger.file_path}")
|
|
93
|
+
|
|
94
|
+
# Old code loaded the Flex flow, parsed input and outputs types. That logic has been
|
|
95
|
+
# removed since it is unnecessary. It also parsed and set environment variables. This
|
|
96
|
+
# has also been removed since it can be problematic in a multi-threaded environment.
|
|
97
|
+
|
|
98
|
+
self._validate_column_mapping(run.column_mapping)
|
|
99
|
+
|
|
100
|
+
run._status = RunStatus.RUNNING
|
|
101
|
+
run._start_time = datetime.now(timezone.utc)
|
|
102
|
+
batch_result: Optional[BatchResult] = None
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
batch_engine = BatchEngine(
|
|
106
|
+
run.dynamic_callable,
|
|
107
|
+
storage=local_storage,
|
|
108
|
+
batch_timeout_sec=self._config.batch_timeout_seconds,
|
|
109
|
+
line_timeout_sec=self._config.run_timeout_seconds,
|
|
110
|
+
max_worker_count=self._config.max_concurrency,
|
|
111
|
+
**kwargs,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
batch_result = batch_engine.run(data=run.inputs, column_mapping=run.column_mapping, id=run.name)
|
|
115
|
+
run._status = RunStatus.from_batch_result_status(batch_result.status)
|
|
116
|
+
|
|
117
|
+
error_logs: Sequence[str] = []
|
|
118
|
+
if run._status != RunStatus.COMPLETED:
|
|
119
|
+
error_logs.append(f"Run {run.name} failed with status {batch_result.status}.")
|
|
120
|
+
if batch_result.error:
|
|
121
|
+
error_logs.append(f"Error: {str(batch_result.error)}")
|
|
122
|
+
|
|
123
|
+
if error_logs:
|
|
124
|
+
logger.warning("\n".join(error_logs))
|
|
125
|
+
except Exception as e:
|
|
126
|
+
run._status = RunStatus.FAILED
|
|
127
|
+
# when run failed in executor, store the exception in result and dump to file
|
|
128
|
+
logger.warning(f"Run {run.name} failed when executing in executor with exception {e}.")
|
|
129
|
+
# for user error, swallow stack trace and return failed run since user don't need the stack trace
|
|
130
|
+
if not isinstance(e, BatchEngineValidationError):
|
|
131
|
+
# for other errors, raise it to user to help debug root cause.
|
|
132
|
+
raise e
|
|
133
|
+
# won't raise the exception since it's already included in run object.
|
|
134
|
+
finally:
|
|
135
|
+
# persist inputs, outputs and metrics
|
|
136
|
+
local_storage.persist_result(batch_result)
|
|
137
|
+
# exceptions
|
|
138
|
+
# local_storage.dump_exception(exception=exception, batch_result=batch_result) # TODO ralphe: persist_result should handle this
|
|
139
|
+
# system metrics
|
|
140
|
+
system_metrics = {}
|
|
141
|
+
if batch_result:
|
|
142
|
+
system_metrics.update(dataclasses.asdict(batch_result.tokens)) # token related
|
|
143
|
+
system_metrics.update(
|
|
144
|
+
{
|
|
145
|
+
"duration": batch_result.duration.total_seconds(),
|
|
146
|
+
# "__pf__.lines.completed": batch_result.total_lines - batch_result.failed_lines,
|
|
147
|
+
# "__pf__.lines.failed": batch_result.failed_lines,
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
run._end_time = datetime.now(timezone.utc)
|
|
152
|
+
run.metrics = system_metrics
|
|
153
|
+
run.result = batch_result
|
|
154
|
+
|
|
155
|
+
@staticmethod
|
|
156
|
+
def _validate_inputs(run: Run):
|
|
157
|
+
if not run.inputs:
|
|
158
|
+
raise BatchEngineValidationError("Data must be specified for evaluation run.")
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def _validate_column_mapping(column_mapping: Mapping[str, str]):
|
|
162
|
+
if not isinstance(column_mapping, Mapping):
|
|
163
|
+
raise BatchEngineValidationError(f"Column mapping must be a dict, got {type(column_mapping)}.")
|
|
164
|
+
|
|
165
|
+
has_mapping = any([isinstance(v, str) and v.startswith("$") for v in column_mapping.values()])
|
|
166
|
+
if not has_mapping:
|
|
167
|
+
raise BatchEngineValidationError(
|
|
168
|
+
"Column mapping must contain at least one mapping binding, "
|
|
169
|
+
f"current column mapping contains all static values: {column_mapping}"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def stream_run(run: Run, storage: AbstractRunStorage, raise_on_error: bool) -> None:
|
|
174
|
+
"""
|
|
175
|
+
Stream the output of the batch execution.
|
|
176
|
+
|
|
177
|
+
:param Run run: The run to stream.
|
|
178
|
+
:param AbstractRunStorage storage: The storage to use for the output.
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
# TODO ralphe: This doesn't seem to be do anything useful beyond just print
|
|
182
|
+
# a run summary at the end. This is because by the time it gets
|
|
183
|
+
# invoked even in the original code, the run has already completed.
|
|
184
|
+
|
|
185
|
+
if run is None or storage is None:
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
file_handler = sys.stdout
|
|
189
|
+
try:
|
|
190
|
+
printed = 0
|
|
191
|
+
available_logs = storage.logger.get_logs()
|
|
192
|
+
incremental_print(available_logs, printed, file_handler)
|
|
193
|
+
RunSubmitter._print_run_summary(run, file_handler)
|
|
194
|
+
except KeyboardInterrupt:
|
|
195
|
+
error_message = "The output streaming for the run was interrupted, but the run is still executing."
|
|
196
|
+
print(error_message)
|
|
197
|
+
|
|
198
|
+
if run.status == RunStatus.FAILED or run.status == RunStatus.CANCELED:
|
|
199
|
+
if run.status == RunStatus.FAILED:
|
|
200
|
+
error_message = storage.load_exception().get("message", "Run fails with unknown error.")
|
|
201
|
+
else:
|
|
202
|
+
error_message = "Run is canceled."
|
|
203
|
+
if raise_on_error:
|
|
204
|
+
raise BatchEngineError(error_message)
|
|
205
|
+
else:
|
|
206
|
+
print_red_error(error_message)
|
|
207
|
+
|
|
208
|
+
@staticmethod
|
|
209
|
+
def _print_run_summary(run: Run, text_out: Union[TextIO, Any]) -> None:
|
|
210
|
+
duration = str(run.duration)
|
|
211
|
+
text_out.write(
|
|
212
|
+
"======= Run Summary =======\n\n"
|
|
213
|
+
f'Run name: "{run.name}"\n'
|
|
214
|
+
f'Run status: "{run.status.value}"\n'
|
|
215
|
+
f'Start time: "{run.created_on}"\n'
|
|
216
|
+
f'Duration: "{duration}"\n\n'
|
|
217
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from enum import IntEnum, auto, unique
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@unique
|
|
9
|
+
class BatchStatus(IntEnum):
|
|
10
|
+
NotStarted = 0
|
|
11
|
+
Running = auto()
|
|
12
|
+
|
|
13
|
+
# NOTE: DO NOT REORDER THESE ENUMS. The order is important for the is_terminated method
|
|
14
|
+
# and other logic in the code to work properly
|
|
15
|
+
Completed = auto()
|
|
16
|
+
Canceled = auto()
|
|
17
|
+
Failed = auto()
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def is_terminated(status: "BatchStatus") -> bool:
|
|
21
|
+
return status >= BatchStatus.Completed
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def is_failed(status: "BatchStatus") -> bool:
|
|
25
|
+
return status == BatchStatus.Failed or status == BatchStatus.Canceled
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Pretty much all this code will be removed
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from typing import Any, Dict, Optional
|
|
10
|
+
|
|
11
|
+
from ._openai_injector import inject_openai_api
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def start_trace(
|
|
15
|
+
*,
|
|
16
|
+
resource_attributes: Optional[Dict] = None,
|
|
17
|
+
collection: Optional[str] = None,
|
|
18
|
+
**kwargs: Any,
|
|
19
|
+
) -> None:
|
|
20
|
+
"""Promptflow instrumentation.
|
|
21
|
+
|
|
22
|
+
:param resource_attributes: Specify the resource attributes for current process.
|
|
23
|
+
:type resource_attributes: typing.Optional[dict]
|
|
24
|
+
:param collection: Specify the collection for current tracing.
|
|
25
|
+
:type collection: typing.Optional[str]
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
logging.debug("injecting OpenAI API...")
|
|
29
|
+
inject_openai_api()
|
|
30
|
+
logging.debug("OpenAI API injected.")
|
|
31
|
+
|
|
32
|
+
res_attrs: Dict[str, str] = {"service.name": "promptflow"}
|
|
33
|
+
if resource_attributes:
|
|
34
|
+
logging.debug("specified resource attributes: %s", resource_attributes)
|
|
35
|
+
res_attrs.update(resource_attributes)
|
|
36
|
+
|
|
37
|
+
# determine collection
|
|
38
|
+
collection_user_specified = collection is not None
|
|
39
|
+
if not collection_user_specified:
|
|
40
|
+
collection = kwargs.get("_collection", _get_collection_from_cwd())
|
|
41
|
+
# logging.debug("collection is not user specified")
|
|
42
|
+
# if is_collection_writeable():
|
|
43
|
+
# # internal parameter for devkit call
|
|
44
|
+
# _collection = kwargs.get("_collection", None)
|
|
45
|
+
# if _collection is not None:
|
|
46
|
+
# logging.debug("received internal parameter _collection: %s, will use this", _collection)
|
|
47
|
+
# collection = _collection
|
|
48
|
+
# else:
|
|
49
|
+
# logging.debug("trying to get from current working directory...")
|
|
50
|
+
# collection = _get_collection_from_cwd()
|
|
51
|
+
# # TODO ralphe: OpenTelemetry dependency. This is a future task to resolve.
|
|
52
|
+
# # else:
|
|
53
|
+
# # logging.debug("collection is protected, will directly use that...")
|
|
54
|
+
# # tracer_provider: TracerProvider = trace.get_tracer_provider()
|
|
55
|
+
# # collection = tracer_provider.resource.attributes["collection"]
|
|
56
|
+
logging.info("collection: %s", collection)
|
|
57
|
+
res_attrs["collection"] = collection or "default"
|
|
58
|
+
logging.info("resource attributes: %s", res_attrs)
|
|
59
|
+
|
|
60
|
+
# if user specifies collection, we will add a flag on tracer provider to avoid override
|
|
61
|
+
_set_tracer_provider(res_attrs, protected_collection=collection_user_specified)
|
|
62
|
+
|
|
63
|
+
# Rest of code is removed since we are removing promptflow-devkit dependency
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def is_collection_writeable() -> bool:
|
|
67
|
+
# TODO ralphe: This has OpenTelemetry dependency. That is a future task to resolve.
|
|
68
|
+
# return not getattr(trace.get_tracer_provider(), TRACER_PROVIDER_PROTECTED_COLLECTION_ATTR, False)
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _get_collection_from_cwd() -> str:
|
|
73
|
+
"""Try to use cwd folder name as collection name; will fall back to default value if run into exception."""
|
|
74
|
+
cur_folder_name = ""
|
|
75
|
+
try:
|
|
76
|
+
cwd = os.getcwd()
|
|
77
|
+
cur_folder_name = os.path.basename(cwd)
|
|
78
|
+
except Exception: # pylint: disable=broad-except
|
|
79
|
+
# possible exception: PermissionError, FileNotFoundError, OSError, etc.
|
|
80
|
+
pass
|
|
81
|
+
collection = cur_folder_name or "default"
|
|
82
|
+
return collection
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _set_tracer_provider(res_attrs: Dict[str, str], protected_collection: bool) -> None:
|
|
86
|
+
# TODO ralphe: OpenTelemetry dependency. This is a future task to resolve.
|
|
87
|
+
pass
|
|
88
|
+
# res = Resource(attributes=res_attrs)
|
|
89
|
+
# tracer_provider = TracerProvider(resource=res)
|
|
90
|
+
|
|
91
|
+
# cur_tracer_provider = trace.get_tracer_provider()
|
|
92
|
+
# if isinstance(cur_tracer_provider, TracerProvider):
|
|
93
|
+
# logging.info("tracer provider is already set, will merge the resource attributes...")
|
|
94
|
+
# cur_res = cur_tracer_provider.resource
|
|
95
|
+
# logging.debug("current resource: %s", cur_res.attributes)
|
|
96
|
+
# new_res = cur_res.merge(res)
|
|
97
|
+
# cur_tracer_provider._resource = new_res
|
|
98
|
+
# logging.info("tracer provider is updated with resource attributes: %s", new_res.attributes)
|
|
99
|
+
# else:
|
|
100
|
+
# trace.set_tracer_provider(tracer_provider)
|
|
101
|
+
# logging.info("tracer provider is set with resource attributes: %s", res.attributes)
|
|
102
|
+
|
|
103
|
+
# if protected_collection:
|
|
104
|
+
# logging.info("user specifies collection, will add a flag on tracer provider to avoid override...")
|
|
105
|
+
# setattr(trace.get_tracer_provider(), TRACER_PROVIDER_PROTECTED_COLLECTION_ATTR, True)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from typing import Any, Mapping, Sequence, Tuple
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def normalize_identifier_name(name: str) -> str:
|
|
11
|
+
"""Normalize the identifier name to a valid Python variable name.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
name (str): The identifier name to normalize.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
str: The normalized identifier name.
|
|
18
|
+
"""
|
|
19
|
+
normalized = re.sub(r"\W", "_", name.strip())
|
|
20
|
+
if normalized[0].isdigit():
|
|
21
|
+
normalized = f"_{normalized}"
|
|
22
|
+
return normalized
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_int_env_var(env_var_name: str, default_value: int = 0) -> int:
|
|
26
|
+
"""Get the integer value of the environment variable.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
env_var_name (str): The name of the environment variable.
|
|
30
|
+
default_value (int): The default value if the environment variable is not set.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
int: The integer value of the environment variable.
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
value = os.getenv(env_var_name, default_value)
|
|
37
|
+
return int(value)
|
|
38
|
+
except ValueError:
|
|
39
|
+
return default_value
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_value_from_path(path: str, data: Mapping[str, Any]) -> Tuple[bool, Any]:
|
|
43
|
+
"""Tried to get a value from a mapping based on the specified path. The path is a
|
|
44
|
+
string with dot-separated keys (e.g. data.nested_1.nested_2).
|
|
45
|
+
|
|
46
|
+
This will interpret the path prioritizing a depth first search with the shortest
|
|
47
|
+
key possible at each level. If for example you had the following data:
|
|
48
|
+
{
|
|
49
|
+
"foo": {
|
|
50
|
+
"bar": {
|
|
51
|
+
"happy": 12
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"foo.bar": {
|
|
55
|
+
"none": 14,
|
|
56
|
+
"random": { "some": 15 }
|
|
57
|
+
},
|
|
58
|
+
"foo.bar.none": 16
|
|
59
|
+
}
|
|
60
|
+
And you asked for foo.bar.none, the returned value would be 14"
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def _get_value(data: Mapping[str, Any], parts: Sequence[str]) -> Tuple[bool, Any]:
|
|
64
|
+
if len(parts) == 0:
|
|
65
|
+
return True, data
|
|
66
|
+
|
|
67
|
+
for i in range(1, len(parts) + 1):
|
|
68
|
+
key = ".".join(parts[:i])
|
|
69
|
+
if isinstance(data, Mapping) and key in data:
|
|
70
|
+
found, match = _get_value(data[key], parts[i:])
|
|
71
|
+
if found:
|
|
72
|
+
return found, match
|
|
73
|
+
|
|
74
|
+
return False, None
|
|
75
|
+
|
|
76
|
+
if path is None or data is None:
|
|
77
|
+
return False, None
|
|
78
|
+
|
|
79
|
+
parts = path.strip().split(".")
|
|
80
|
+
if len(parts) == 0:
|
|
81
|
+
return False, None
|
|
82
|
+
return _get_value(data, parts)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import contextvars
|
|
7
|
+
import dataclasses
|
|
8
|
+
from asyncio import Task
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
+
from typing import Any, AsyncIterator, Callable, Iterator, Mapping, Optional, Sequence, Tuple, cast
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ThreadPoolExecutorWithContext(ThreadPoolExecutor):
|
|
14
|
+
# Original source:
|
|
15
|
+
# promptflow-tracing/promptflow/tracing/_context_utils.py
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
max_workers: Optional[int] = None,
|
|
20
|
+
thread_name_prefix: str = "",
|
|
21
|
+
initializer: Optional[Callable] = None,
|
|
22
|
+
initargs: Tuple[Any, ...] = (),
|
|
23
|
+
) -> None:
|
|
24
|
+
"""The ThreadPoolExecutionWithContext is an extended thread pool implementation
|
|
25
|
+
which will copy the context from the current thread to the child threads.
|
|
26
|
+
Thus the traced functions in child threads could keep parent-child relationship in the tracing system.
|
|
27
|
+
The arguments are the same as ThreadPoolExecutor.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
max_workers: The maximum number of threads that can be used to
|
|
31
|
+
execute the given calls.
|
|
32
|
+
thread_name_prefix: An optional name prefix to give our threads.
|
|
33
|
+
initializer: A callable used to initialize worker threads.
|
|
34
|
+
initargs: A tuple of arguments to pass to the initializer.
|
|
35
|
+
"""
|
|
36
|
+
current_context = contextvars.copy_context()
|
|
37
|
+
initializer_args = (current_context, initializer, initargs)
|
|
38
|
+
super().__init__(max_workers, thread_name_prefix, self.set_context_then_call, initializer_args)
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def set_context_then_call(
|
|
42
|
+
context: contextvars.Context,
|
|
43
|
+
initializer: Optional[Callable],
|
|
44
|
+
initargs: Tuple[Any, ...],
|
|
45
|
+
) -> None:
|
|
46
|
+
for var, value in context.items():
|
|
47
|
+
var.set(value)
|
|
48
|
+
if initializer:
|
|
49
|
+
initializer(*initargs)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _has_running_loop() -> bool:
|
|
53
|
+
"""Check if the current thread has a running event loop."""
|
|
54
|
+
# When using asyncio.get_running_loop(), a RuntimeError is raised if there is no running event loop.
|
|
55
|
+
# So, we use a try-catch block to determine whether there is currently an event loop in place.
|
|
56
|
+
#
|
|
57
|
+
# Note that this is the only way to check whether there is a running loop now, see:
|
|
58
|
+
# https://docs.python.org/3/library/asyncio-eventloop.html?highlight=get_running_loop#asyncio.get_running_loop
|
|
59
|
+
try:
|
|
60
|
+
asyncio.get_running_loop()
|
|
61
|
+
return True
|
|
62
|
+
except RuntimeError:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def async_run_allowing_running_loop(async_func, *args, **kwargs):
|
|
67
|
+
"""Run an async function in a new thread, allowing the current thread to have a running event loop.
|
|
68
|
+
|
|
69
|
+
When run in an async environment (e.g., in a notebook), because each thread allows only one event
|
|
70
|
+
loop, using asyncio.run directly leads to a RuntimeError ("asyncio.run() cannot be called from a
|
|
71
|
+
running event loop").
|
|
72
|
+
|
|
73
|
+
To address this issue, we add a check for the event loop here. If the current thread already has an
|
|
74
|
+
event loop, we run _exec_batch in a new thread; otherwise, we run it in the current thread.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
if _has_running_loop():
|
|
78
|
+
# TODO ralphe: The logic here makes absolutely no sense to me. If you already have an
|
|
79
|
+
# async event loop running, why would you want to start up a new thread,
|
|
80
|
+
# create a new event loop, and run the async function in a new thread?
|
|
81
|
+
# You can just use the following to schedule the async function call on
|
|
82
|
+
# the existing event loop:
|
|
83
|
+
# asyncio.get_running_loop().create_task(async_func(*args, *args, **kwargs)).result()
|
|
84
|
+
# The correct thing to do here is not make these decisions here at all.
|
|
85
|
+
# Instead, all the BatchEngine code should be async first, with the event
|
|
86
|
+
# loop being started by the callers of that code. For now, I am keeping
|
|
87
|
+
# this odd logic as is, and in phase 2 of the migration, this will be
|
|
88
|
+
# refactored to be more idiomatic asyncio code.
|
|
89
|
+
with ThreadPoolExecutorWithContext() as executor:
|
|
90
|
+
return executor.submit(lambda: asyncio.run(async_func(*args, **kwargs))).result()
|
|
91
|
+
else:
|
|
92
|
+
return asyncio.run(async_func(*args, **kwargs))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def stringify_output_async(output: Any) -> str:
|
|
96
|
+
if isinstance(output, AsyncIterator):
|
|
97
|
+
return await stringify_output_async([v async for v in output])
|
|
98
|
+
if isinstance(output, Iterator):
|
|
99
|
+
return await stringify_output_async([v for v in output])
|
|
100
|
+
if isinstance(output, Mapping):
|
|
101
|
+
return ", ".join(
|
|
102
|
+
[f"{await stringify_output_async(k)}:{await stringify_output_async(v)}" for k, v in output.items()]
|
|
103
|
+
)
|
|
104
|
+
if isinstance(output, Sequence):
|
|
105
|
+
return "".join([await stringify_output_async(v) for v in output])
|
|
106
|
+
if isinstance(output, Task):
|
|
107
|
+
return await stringify_output_async(await output)
|
|
108
|
+
|
|
109
|
+
return str(output)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def convert_eager_flow_output_to_dict(value: Any) -> Mapping[str, Any]:
|
|
113
|
+
"""
|
|
114
|
+
Convert the output of eager flow to a dict. Since the output of eager flow
|
|
115
|
+
may not be a dict, we need to convert it to a dict in batch mode.
|
|
116
|
+
|
|
117
|
+
Examples:
|
|
118
|
+
1. If the output is a dict, return it directly:
|
|
119
|
+
value = {"output": 1} -> {"output": 1}
|
|
120
|
+
2. If the output is a dataclass, convert it to a dict:
|
|
121
|
+
value = SampleDataClass(output=1) -> {"output": 1}
|
|
122
|
+
3. If the output is not a dict or dataclass, convert it to a dict by adding a key "output":
|
|
123
|
+
value = 1 -> {"output": 1}
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
if isinstance(value, Mapping):
|
|
127
|
+
return value
|
|
128
|
+
elif dataclasses.is_dataclass(value):
|
|
129
|
+
return dataclasses.asdict(cast(Any, value))
|
|
130
|
+
else:
|
|
131
|
+
return {"output": value}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from azure.ai.evaluation._legacy.prompty._prompty import AsyncPrompty
|
|
6
|
+
from azure.ai.evaluation._legacy.prompty._connection import Connection, OpenAIConnection, AzureOpenAIConnection
|
|
7
|
+
from azure.ai.evaluation._legacy.prompty._exceptions import (
|
|
8
|
+
PromptyException,
|
|
9
|
+
MissingRequiredInputError,
|
|
10
|
+
InvalidInputError,
|
|
11
|
+
JinjaTemplateError,
|
|
12
|
+
NotSupportedError,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# =========================================================================================================
|
|
16
|
+
# NOTE: All of the code here is largely copy of code from Promptflow. Generally speaking, the following
|
|
17
|
+
# changes were made:
|
|
18
|
+
# - Added type annotations
|
|
19
|
+
# - Legacy or deprecated functionality has been removed (e.g. no more support for completions API)
|
|
20
|
+
# - Reworked the way images are handled to 1) Reduce the amount of code brought over, 2) remove
|
|
21
|
+
# the need to do two passes over the template to insert images, 3) remove the completely unnecessary
|
|
22
|
+
# loading of image data from the internet when it is not actually needed
|
|
23
|
+
# - Minor obvious tweaks to improve code readability, and removal of unused code paths
|
|
24
|
+
# =========================================================================================================
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"AsyncPrompty",
|
|
28
|
+
"Connection",
|
|
29
|
+
"AzureOpenAIConnection",
|
|
30
|
+
"OpenAIConnection",
|
|
31
|
+
"PromptyException",
|
|
32
|
+
"MissingRequiredInputError",
|
|
33
|
+
"InvalidInputError",
|
|
34
|
+
"JinjaTemplateError",
|
|
35
|
+
"NotSupportedError",
|
|
36
|
+
]
|