azure-ai-evaluation 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (134) hide show
  1. azure/ai/evaluation/__init__.py +42 -14
  2. azure/ai/evaluation/_azure/_models.py +6 -6
  3. azure/ai/evaluation/_common/constants.py +6 -2
  4. azure/ai/evaluation/_common/rai_service.py +38 -4
  5. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  6. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  7. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  8. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  9. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  10. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  11. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  12. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  13. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  14. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  15. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  16. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  17. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  18. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  19. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  20. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  21. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  22. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  23. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  24. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
  25. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  26. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  27. azure/ai/evaluation/_common/utils.py +30 -10
  28. azure/ai/evaluation/_constants.py +10 -0
  29. azure/ai/evaluation/_converters/__init__.py +3 -0
  30. azure/ai/evaluation/_converters/_ai_services.py +804 -0
  31. azure/ai/evaluation/_converters/_models.py +302 -0
  32. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
  33. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
  34. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  35. azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  36. azure/ai/evaluation/_evaluate/_evaluate.py +36 -4
  37. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
  38. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  39. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
  40. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
  41. azure/ai/evaluation/_evaluators/_common/_base_eval.py +43 -3
  42. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +3 -1
  43. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +43 -4
  44. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
  45. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
  46. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
  47. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
  48. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
  49. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
  50. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
  51. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
  52. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
  53. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +21 -3
  54. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  55. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
  56. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
  57. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
  58. azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
  59. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
  60. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  61. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +157 -0
  62. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
  63. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
  64. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
  65. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
  66. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
  67. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  68. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
  69. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
  70. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
  72. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
  73. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  74. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
  75. azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
  76. azure/ai/evaluation/_exceptions.py +5 -1
  77. azure/ai/evaluation/_legacy/__init__.py +3 -0
  78. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  79. azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
  80. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
  81. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  82. azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
  83. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
  84. azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
  85. azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
  86. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  87. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
  88. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  89. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
  90. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
  91. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  92. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  93. azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
  94. azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
  95. azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
  96. azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
  97. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  98. azure/ai/evaluation/_red_team/__init__.py +3 -0
  99. azure/ai/evaluation/_red_team/_attack_objective_generator.py +192 -0
  100. azure/ai/evaluation/_red_team/_attack_strategy.py +42 -0
  101. azure/ai/evaluation/_red_team/_callback_chat_target.py +74 -0
  102. azure/ai/evaluation/_red_team/_default_converter.py +21 -0
  103. azure/ai/evaluation/_red_team/_red_team.py +1858 -0
  104. azure/ai/evaluation/_red_team/_red_team_result.py +246 -0
  105. azure/ai/evaluation/_red_team/_utils/__init__.py +3 -0
  106. azure/ai/evaluation/_red_team/_utils/constants.py +64 -0
  107. azure/ai/evaluation/_red_team/_utils/formatting_utils.py +164 -0
  108. azure/ai/evaluation/_red_team/_utils/logging_utils.py +139 -0
  109. azure/ai/evaluation/_red_team/_utils/strategy_utils.py +188 -0
  110. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  111. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  112. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +741 -0
  113. azure/ai/evaluation/_version.py +2 -1
  114. azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
  115. azure/ai/evaluation/simulator/_adversarial_simulator.py +61 -27
  116. azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
  117. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
  118. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
  119. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
  120. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
  121. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/METADATA +75 -15
  122. azure_ai_evaluation-1.4.0.dist-info/RECORD +197 -0
  123. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/WHEEL +1 -1
  124. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  125. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  126. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  127. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  128. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  129. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  130. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  131. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  132. azure_ai_evaluation-1.2.0.dist-info/RECORD +0 -125
  133. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/NOTICE.txt +0 -0
  134. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,128 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # Original source:
6
+ # promptflow-devkit/promptflow/_sdk/operations/_local_storage_operations.py
7
+
8
+ from abc import ABC, abstractmethod
9
+ from contextlib import AbstractContextManager
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any, Final, Mapping, Optional, Tuple, Union
13
+
14
+ from ._result import BatchResult, TokenMetrics, BatchStatus
15
+
16
+
17
+ EVAL_USER_SUBFOLDER: Final[str] = ".evaluation"
18
+
19
+
20
+ class AbstractRunLogger(AbstractContextManager):
21
+ @property
22
+ @abstractmethod
23
+ def file_path(self) -> Path:
24
+ """Get the file path of the logger.
25
+
26
+ :return: The file path of the logger.
27
+ :rtype: Path
28
+ """
29
+ ...
30
+
31
+ @abstractmethod
32
+ def get_logs(self) -> str:
33
+ """Get the logs of the run.
34
+
35
+ :return: The logs of the run.
36
+ :rtype: str
37
+ """
38
+ ...
39
+
40
+
41
+ class AbstractRunStorage(ABC):
42
+ @property
43
+ @abstractmethod
44
+ def logger(self) -> "AbstractRunLogger":
45
+ """Get the logger of the run.
46
+
47
+ :return: The logger of the run.
48
+ :rtype: ~promptflow.contracts.run_logger.RunLogger
49
+ """
50
+ ...
51
+
52
+ @abstractmethod
53
+ def persist_result(self, result: Optional[BatchResult]) -> None:
54
+ """Persist results of a batch engine execution (including any errors).
55
+
56
+ :param Optional[BatchResult] result: The result to persist.
57
+ """
58
+ ...
59
+
60
+ @abstractmethod
61
+ def load_exception(self) -> Mapping[str, Any]:
62
+ """Load the exception from the storage. If there was no exception, an empty
63
+ mapping will be returned.
64
+
65
+ :return: The exception.
66
+ :rtype: Optional[Exception]
67
+ """
68
+ ...
69
+
70
+ @abstractmethod
71
+ def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
72
+ """Load the inputs and outputs from the storage.
73
+
74
+ :return: The inputs and outputs.
75
+ :rtype: Tuple(Mapping[str, Any], BatchResult)
76
+ """
77
+ ...
78
+
79
+ @abstractmethod
80
+ def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
81
+ """Load the metrics from the storage.
82
+
83
+ :return: The metrics.
84
+ :rtype: Mapping[str, Union[int, float, str]]
85
+ """
86
+ ...
87
+
88
+
89
+ class NoOpRunStorage(AbstractRunStorage):
90
+ """A no-op implementation of the run storage."""
91
+
92
+ def __init__(self):
93
+ self._logger = NoOpLogger()
94
+ pass
95
+
96
+ @property
97
+ def logger(self) -> AbstractRunLogger:
98
+ return self._logger
99
+
100
+ def persist_result(self, result: Optional[BatchResult]) -> None:
101
+ pass
102
+
103
+ def load_exception(self) -> Mapping[str, Any]:
104
+ return {}
105
+
106
+ def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
107
+ now = datetime.now(timezone.utc)
108
+ return {}, BatchResult(BatchStatus.NotStarted, 0, 0, now, now, TokenMetrics(0, 0, 0), [])
109
+
110
+ def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
111
+ return {}
112
+
113
+
114
+ class NoOpLogger(AbstractRunLogger):
115
+ """A no-op implementation of the run logger."""
116
+
117
+ @property
118
+ def file_path(self) -> Path:
119
+ return Path.home() / EVAL_USER_SUBFOLDER
120
+
121
+ def __enter__(self) -> None:
122
+ pass
123
+
124
+ def __exit__(self, *args) -> None:
125
+ pass
126
+
127
+ def get_logs(self) -> str:
128
+ return ""
@@ -0,0 +1,217 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import dataclasses
6
+ import sys
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Callable, Dict, Mapping, Optional, Sequence, TextIO, Union
9
+
10
+ from ._run import Run, RunStatus
11
+ from ._trace import start_trace, is_collection_writeable
12
+ from ._run_storage import AbstractRunStorage, NoOpRunStorage
13
+ from ._logging import incremental_print, print_red_error
14
+ from ._config import BatchEngineConfig
15
+ from ._exceptions import BatchEngineValidationError
16
+ from ._engine import BatchEngine, BatchEngineError, BatchResult
17
+
18
+
19
+ class RunSubmitter:
20
+ """Submits run to executor
21
+ promptflow-devkit/promptflow/_sdk/_orchestrator/run_submitter.py
22
+
23
+ THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
24
+
25
+ def __init__(self, config: BatchEngineConfig):
26
+ # self._client = PFClient instance
27
+ # self._config = PFClient config
28
+ # self.run_operations = RunOperations instance
29
+
30
+ # TODO ralphe: Use proper logger here. Old code did LoggerFactory.get_logger(__name__)
31
+ self._config = config
32
+
33
+ def submit(
34
+ self,
35
+ dynamic_callable: Callable,
36
+ inputs: Sequence[Mapping[str, Any]],
37
+ column_mapping: Mapping[str, str],
38
+ *,
39
+ name_prefix: Optional[str] = None,
40
+ created_on: Optional[datetime] = None,
41
+ storage_creator: Optional[Callable[[Run], AbstractRunStorage]] = None,
42
+ **kwargs,
43
+ ) -> Run:
44
+ # The old code always spun up two threads here using a ThreadPoolExecutor:
45
+ # 1. One thread essentially did nothing of value (since tracing was disabled, and we
46
+ # don't care about checking for the latest PromptFlow version number now)
47
+ # 2. The other thread did the _run_bulk call. This was followed by a
48
+ # wait(return_when=ALL_COMPLETED)
49
+ # This quite frankly is unnecessary complexity since the the evaluation code already
50
+ # calls this in the context of ThreadPoolThread. So we can just do the equivalent
51
+ # of the _run_bulk code here directly.
52
+ # In a future code refactor, all of this will be cleaned up in favour of proper
53
+ # async/await code.
54
+ run: Run = kwargs.pop("run", None) or Run(
55
+ dynamic_callable=dynamic_callable,
56
+ name_prefix=name_prefix,
57
+ inputs=inputs,
58
+ column_mapping=column_mapping,
59
+ created_on=created_on,
60
+ )
61
+
62
+ logger = self._config.logger
63
+ attributes: Dict[str, Any] = kwargs.get("attributes", {})
64
+ collection_for_run: Optional[str] = None
65
+
66
+ logger.debug("start trace for flow run...")
67
+ logger.debug("flow path for run.start_trace: %s", run.name)
68
+
69
+ if is_collection_writeable():
70
+ logger.debug("trace collection is writeable, will use flow name as collection...")
71
+ collection_for_run = run.name
72
+ logger.debug("collection for run: %s", collection_for_run)
73
+ else:
74
+ logger.debug("trace collection is protected, will honor existing collection.")
75
+ start_trace(attributes=attributes, run=run, _collection=collection_for_run)
76
+
77
+ self._validate_inputs(run=run)
78
+
79
+ local_storage = storage_creator(run) if storage_creator else NoOpRunStorage()
80
+ with local_storage.logger:
81
+ run._status = RunStatus.PREPARING
82
+
83
+ # unnecessary Flow loading code was removed here. Instead do direct calls to _submit_bulk_run
84
+ self._submit_bulk_run(run=run, local_storage=local_storage, **kwargs)
85
+
86
+ self.stream_run(run=run, storage=local_storage, raise_on_error=True)
87
+ return run
88
+
89
+ def _submit_bulk_run(self, run: Run, local_storage: AbstractRunStorage, **kwargs) -> None:
90
+ logger = self._config.logger
91
+
92
+ logger.info(f"Submitting run {run.name}, log path: {local_storage.logger.file_path}")
93
+
94
+ # Old code loaded the Flex flow, parsed input and outputs types. That logic has been
95
+ # removed since it is unnecessary. It also parsed and set environment variables. This
96
+ # has also been removed since it can be problematic in a multi-threaded environment.
97
+
98
+ self._validate_column_mapping(run.column_mapping)
99
+
100
+ run._status = RunStatus.RUNNING
101
+ run._start_time = datetime.now(timezone.utc)
102
+ batch_result: Optional[BatchResult] = None
103
+
104
+ try:
105
+ batch_engine = BatchEngine(
106
+ run.dynamic_callable,
107
+ storage=local_storage,
108
+ batch_timeout_sec=self._config.batch_timeout_seconds,
109
+ line_timeout_sec=self._config.run_timeout_seconds,
110
+ max_worker_count=self._config.max_concurrency,
111
+ **kwargs,
112
+ )
113
+
114
+ batch_result = batch_engine.run(data=run.inputs, column_mapping=run.column_mapping, id=run.name)
115
+ run._status = RunStatus.from_batch_result_status(batch_result.status)
116
+
117
+ error_logs: Sequence[str] = []
118
+ if run._status != RunStatus.COMPLETED:
119
+ error_logs.append(f"Run {run.name} failed with status {batch_result.status}.")
120
+ if batch_result.error:
121
+ error_logs.append(f"Error: {str(batch_result.error)}")
122
+
123
+ if error_logs:
124
+ logger.warning("\n".join(error_logs))
125
+ except Exception as e:
126
+ run._status = RunStatus.FAILED
127
+ # when run failed in executor, store the exception in result and dump to file
128
+ logger.warning(f"Run {run.name} failed when executing in executor with exception {e}.")
129
+ # for user error, swallow stack trace and return failed run since user don't need the stack trace
130
+ if not isinstance(e, BatchEngineValidationError):
131
+ # for other errors, raise it to user to help debug root cause.
132
+ raise e
133
+ # won't raise the exception since it's already included in run object.
134
+ finally:
135
+ # persist inputs, outputs and metrics
136
+ local_storage.persist_result(batch_result)
137
+ # exceptions
138
+ # local_storage.dump_exception(exception=exception, batch_result=batch_result) # TODO ralphe: persist_result should handle this
139
+ # system metrics
140
+ system_metrics = {}
141
+ if batch_result:
142
+ system_metrics.update(dataclasses.asdict(batch_result.tokens)) # token related
143
+ system_metrics.update(
144
+ {
145
+ "duration": batch_result.duration.total_seconds(),
146
+ # "__pf__.lines.completed": batch_result.total_lines - batch_result.failed_lines,
147
+ # "__pf__.lines.failed": batch_result.failed_lines,
148
+ }
149
+ )
150
+
151
+ run._end_time = datetime.now(timezone.utc)
152
+ run.metrics = system_metrics
153
+ run.result = batch_result
154
+
155
+ @staticmethod
156
+ def _validate_inputs(run: Run):
157
+ if not run.inputs:
158
+ raise BatchEngineValidationError("Data must be specified for evaluation run.")
159
+
160
+ @staticmethod
161
+ def _validate_column_mapping(column_mapping: Mapping[str, str]):
162
+ if not isinstance(column_mapping, Mapping):
163
+ raise BatchEngineValidationError(f"Column mapping must be a dict, got {type(column_mapping)}.")
164
+
165
+ has_mapping = any([isinstance(v, str) and v.startswith("$") for v in column_mapping.values()])
166
+ if not has_mapping:
167
+ raise BatchEngineValidationError(
168
+ "Column mapping must contain at least one mapping binding, "
169
+ f"current column mapping contains all static values: {column_mapping}"
170
+ )
171
+
172
+ @staticmethod
173
+ def stream_run(run: Run, storage: AbstractRunStorage, raise_on_error: bool) -> None:
174
+ """
175
+ Stream the output of the batch execution.
176
+
177
+ :param Run run: The run to stream.
178
+ :param AbstractRunStorage storage: The storage to use for the output.
179
+ """
180
+
181
+ # TODO ralphe: This doesn't seem to be do anything useful beyond just print
182
+ # a run summary at the end. This is because by the time it gets
183
+ # invoked even in the original code, the run has already completed.
184
+
185
+ if run is None or storage is None:
186
+ return
187
+
188
+ file_handler = sys.stdout
189
+ try:
190
+ printed = 0
191
+ available_logs = storage.logger.get_logs()
192
+ incremental_print(available_logs, printed, file_handler)
193
+ RunSubmitter._print_run_summary(run, file_handler)
194
+ except KeyboardInterrupt:
195
+ error_message = "The output streaming for the run was interrupted, but the run is still executing."
196
+ print(error_message)
197
+
198
+ if run.status == RunStatus.FAILED or run.status == RunStatus.CANCELED:
199
+ if run.status == RunStatus.FAILED:
200
+ error_message = storage.load_exception().get("message", "Run fails with unknown error.")
201
+ else:
202
+ error_message = "Run is canceled."
203
+ if raise_on_error:
204
+ raise BatchEngineError(error_message)
205
+ else:
206
+ print_red_error(error_message)
207
+
208
+ @staticmethod
209
+ def _print_run_summary(run: Run, text_out: Union[TextIO, Any]) -> None:
210
+ duration = str(run.duration)
211
+ text_out.write(
212
+ "======= Run Summary =======\n\n"
213
+ f'Run name: "{run.name}"\n'
214
+ f'Run status: "{run.status.value}"\n'
215
+ f'Start time: "{run.created_on}"\n'
216
+ f'Duration: "{duration}"\n\n'
217
+ )
@@ -0,0 +1,25 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from enum import IntEnum, auto, unique
6
+
7
+
8
+ @unique
9
+ class BatchStatus(IntEnum):
10
+ NotStarted = 0
11
+ Running = auto()
12
+
13
+ # NOTE: DO NOT REORDER THESE ENUMS. The order is important for the is_terminated method
14
+ # and other logic in the code to work properly
15
+ Completed = auto()
16
+ Canceled = auto()
17
+ Failed = auto()
18
+
19
+ @staticmethod
20
+ def is_terminated(status: "BatchStatus") -> bool:
21
+ return status >= BatchStatus.Completed
22
+
23
+ @staticmethod
24
+ def is_failed(status: "BatchStatus") -> bool:
25
+ return status == BatchStatus.Failed or status == BatchStatus.Canceled
@@ -0,0 +1,105 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # Pretty much all this code will be removed
6
+
7
+ import logging
8
+ import os
9
+ from typing import Any, Dict, Optional
10
+
11
+ from ._openai_injector import inject_openai_api
12
+
13
+
14
+ def start_trace(
15
+ *,
16
+ resource_attributes: Optional[Dict] = None,
17
+ collection: Optional[str] = None,
18
+ **kwargs: Any,
19
+ ) -> None:
20
+ """Promptflow instrumentation.
21
+
22
+ :param resource_attributes: Specify the resource attributes for current process.
23
+ :type resource_attributes: typing.Optional[dict]
24
+ :param collection: Specify the collection for current tracing.
25
+ :type collection: typing.Optional[str]
26
+ """
27
+
28
+ logging.debug("injecting OpenAI API...")
29
+ inject_openai_api()
30
+ logging.debug("OpenAI API injected.")
31
+
32
+ res_attrs: Dict[str, str] = {"service.name": "promptflow"}
33
+ if resource_attributes:
34
+ logging.debug("specified resource attributes: %s", resource_attributes)
35
+ res_attrs.update(resource_attributes)
36
+
37
+ # determine collection
38
+ collection_user_specified = collection is not None
39
+ if not collection_user_specified:
40
+ collection = kwargs.get("_collection", _get_collection_from_cwd())
41
+ # logging.debug("collection is not user specified")
42
+ # if is_collection_writeable():
43
+ # # internal parameter for devkit call
44
+ # _collection = kwargs.get("_collection", None)
45
+ # if _collection is not None:
46
+ # logging.debug("received internal parameter _collection: %s, will use this", _collection)
47
+ # collection = _collection
48
+ # else:
49
+ # logging.debug("trying to get from current working directory...")
50
+ # collection = _get_collection_from_cwd()
51
+ # # TODO ralphe: OpenTelemetry dependency. This is a future task to resolve.
52
+ # # else:
53
+ # # logging.debug("collection is protected, will directly use that...")
54
+ # # tracer_provider: TracerProvider = trace.get_tracer_provider()
55
+ # # collection = tracer_provider.resource.attributes["collection"]
56
+ logging.info("collection: %s", collection)
57
+ res_attrs["collection"] = collection or "default"
58
+ logging.info("resource attributes: %s", res_attrs)
59
+
60
+ # if user specifies collection, we will add a flag on tracer provider to avoid override
61
+ _set_tracer_provider(res_attrs, protected_collection=collection_user_specified)
62
+
63
+ # Rest of code is removed since we are removing promptflow-devkit dependency
64
+
65
+
66
+ def is_collection_writeable() -> bool:
67
+ # TODO ralphe: This has OpenTelemetry dependency. That is a future task to resolve.
68
+ # return not getattr(trace.get_tracer_provider(), TRACER_PROVIDER_PROTECTED_COLLECTION_ATTR, False)
69
+ return True
70
+
71
+
72
+ def _get_collection_from_cwd() -> str:
73
+ """Try to use cwd folder name as collection name; will fall back to default value if run into exception."""
74
+ cur_folder_name = ""
75
+ try:
76
+ cwd = os.getcwd()
77
+ cur_folder_name = os.path.basename(cwd)
78
+ except Exception: # pylint: disable=broad-except
79
+ # possible exception: PermissionError, FileNotFoundError, OSError, etc.
80
+ pass
81
+ collection = cur_folder_name or "default"
82
+ return collection
83
+
84
+
85
+ def _set_tracer_provider(res_attrs: Dict[str, str], protected_collection: bool) -> None:
86
+ # TODO ralphe: OpenTelemetry dependency. This is a future task to resolve.
87
+ pass
88
+ # res = Resource(attributes=res_attrs)
89
+ # tracer_provider = TracerProvider(resource=res)
90
+
91
+ # cur_tracer_provider = trace.get_tracer_provider()
92
+ # if isinstance(cur_tracer_provider, TracerProvider):
93
+ # logging.info("tracer provider is already set, will merge the resource attributes...")
94
+ # cur_res = cur_tracer_provider.resource
95
+ # logging.debug("current resource: %s", cur_res.attributes)
96
+ # new_res = cur_res.merge(res)
97
+ # cur_tracer_provider._resource = new_res
98
+ # logging.info("tracer provider is updated with resource attributes: %s", new_res.attributes)
99
+ # else:
100
+ # trace.set_tracer_provider(tracer_provider)
101
+ # logging.info("tracer provider is set with resource attributes: %s", res.attributes)
102
+
103
+ # if protected_collection:
104
+ # logging.info("user specifies collection, will add a flag on tracer provider to avoid override...")
105
+ # setattr(trace.get_tracer_provider(), TRACER_PROVIDER_PROTECTED_COLLECTION_ATTR, True)
@@ -0,0 +1,82 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import os
6
+ import re
7
+ from typing import Any, Mapping, Sequence, Tuple
8
+
9
+
10
+ def normalize_identifier_name(name: str) -> str:
11
+ """Normalize the identifier name to a valid Python variable name.
12
+
13
+ Args:
14
+ name (str): The identifier name to normalize.
15
+
16
+ Returns:
17
+ str: The normalized identifier name.
18
+ """
19
+ normalized = re.sub(r"\W", "_", name.strip())
20
+ if normalized[0].isdigit():
21
+ normalized = f"_{normalized}"
22
+ return normalized
23
+
24
+
25
+ def get_int_env_var(env_var_name: str, default_value: int = 0) -> int:
26
+ """Get the integer value of the environment variable.
27
+
28
+ Args:
29
+ env_var_name (str): The name of the environment variable.
30
+ default_value (int): The default value if the environment variable is not set.
31
+
32
+ Returns:
33
+ int: The integer value of the environment variable.
34
+ """
35
+ try:
36
+ value = os.getenv(env_var_name, default_value)
37
+ return int(value)
38
+ except ValueError:
39
+ return default_value
40
+
41
+
42
+ def get_value_from_path(path: str, data: Mapping[str, Any]) -> Tuple[bool, Any]:
43
+ """Tried to get a value from a mapping based on the specified path. The path is a
44
+ string with dot-separated keys (e.g. data.nested_1.nested_2).
45
+
46
+ This will interpret the path prioritizing a depth first search with the shortest
47
+ key possible at each level. If for example you had the following data:
48
+ {
49
+ "foo": {
50
+ "bar": {
51
+ "happy": 12
52
+ }
53
+ },
54
+ "foo.bar": {
55
+ "none": 14,
56
+ "random": { "some": 15 }
57
+ },
58
+ "foo.bar.none": 16
59
+ }
60
+ And you asked for foo.bar.none, the returned value would be 14"
61
+ """
62
+
63
+ def _get_value(data: Mapping[str, Any], parts: Sequence[str]) -> Tuple[bool, Any]:
64
+ if len(parts) == 0:
65
+ return True, data
66
+
67
+ for i in range(1, len(parts) + 1):
68
+ key = ".".join(parts[:i])
69
+ if isinstance(data, Mapping) and key in data:
70
+ found, match = _get_value(data[key], parts[i:])
71
+ if found:
72
+ return found, match
73
+
74
+ return False, None
75
+
76
+ if path is None or data is None:
77
+ return False, None
78
+
79
+ parts = path.strip().split(".")
80
+ if len(parts) == 0:
81
+ return False, None
82
+ return _get_value(data, parts)
@@ -0,0 +1,131 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ import asyncio
6
+ import contextvars
7
+ import dataclasses
8
+ from asyncio import Task
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from typing import Any, AsyncIterator, Callable, Iterator, Mapping, Optional, Sequence, Tuple, cast
11
+
12
+
13
+ class ThreadPoolExecutorWithContext(ThreadPoolExecutor):
14
+ # Original source:
15
+ # promptflow-tracing/promptflow/tracing/_context_utils.py
16
+
17
+ def __init__(
18
+ self,
19
+ max_workers: Optional[int] = None,
20
+ thread_name_prefix: str = "",
21
+ initializer: Optional[Callable] = None,
22
+ initargs: Tuple[Any, ...] = (),
23
+ ) -> None:
24
+ """The ThreadPoolExecutionWithContext is an extended thread pool implementation
25
+ which will copy the context from the current thread to the child threads.
26
+ Thus the traced functions in child threads could keep parent-child relationship in the tracing system.
27
+ The arguments are the same as ThreadPoolExecutor.
28
+
29
+ Args:
30
+ max_workers: The maximum number of threads that can be used to
31
+ execute the given calls.
32
+ thread_name_prefix: An optional name prefix to give our threads.
33
+ initializer: A callable used to initialize worker threads.
34
+ initargs: A tuple of arguments to pass to the initializer.
35
+ """
36
+ current_context = contextvars.copy_context()
37
+ initializer_args = (current_context, initializer, initargs)
38
+ super().__init__(max_workers, thread_name_prefix, self.set_context_then_call, initializer_args)
39
+
40
+ @staticmethod
41
+ def set_context_then_call(
42
+ context: contextvars.Context,
43
+ initializer: Optional[Callable],
44
+ initargs: Tuple[Any, ...],
45
+ ) -> None:
46
+ for var, value in context.items():
47
+ var.set(value)
48
+ if initializer:
49
+ initializer(*initargs)
50
+
51
+
52
+ def _has_running_loop() -> bool:
53
+ """Check if the current thread has a running event loop."""
54
+ # When using asyncio.get_running_loop(), a RuntimeError is raised if there is no running event loop.
55
+ # So, we use a try-catch block to determine whether there is currently an event loop in place.
56
+ #
57
+ # Note that this is the only way to check whether there is a running loop now, see:
58
+ # https://docs.python.org/3/library/asyncio-eventloop.html?highlight=get_running_loop#asyncio.get_running_loop
59
+ try:
60
+ asyncio.get_running_loop()
61
+ return True
62
+ except RuntimeError:
63
+ return False
64
+
65
+
66
+ def async_run_allowing_running_loop(async_func, *args, **kwargs):
67
+ """Run an async function in a new thread, allowing the current thread to have a running event loop.
68
+
69
+ When run in an async environment (e.g., in a notebook), because each thread allows only one event
70
+ loop, using asyncio.run directly leads to a RuntimeError ("asyncio.run() cannot be called from a
71
+ running event loop").
72
+
73
+ To address this issue, we add a check for the event loop here. If the current thread already has an
74
+ event loop, we run _exec_batch in a new thread; otherwise, we run it in the current thread.
75
+ """
76
+
77
+ if _has_running_loop():
78
+ # TODO ralphe: The logic here makes absolutely no sense to me. If you already have an
79
+ # async event loop running, why would you want to start up a new thread,
80
+ # create a new event loop, and run the async function in a new thread?
81
+ # You can just use the following to schedule the async function call on
82
+ # the existing event loop:
83
+ # asyncio.get_running_loop().create_task(async_func(*args, *args, **kwargs)).result()
84
+ # The correct thing to do here is not make these decisions here at all.
85
+ # Instead, all the BatchEngine code should be async first, with the event
86
+ # loop being started by the callers of that code. For now, I am keeping
87
+ # this odd logic as is, and in phase 2 of the migration, this will be
88
+ # refactored to be more idiomatic asyncio code.
89
+ with ThreadPoolExecutorWithContext() as executor:
90
+ return executor.submit(lambda: asyncio.run(async_func(*args, **kwargs))).result()
91
+ else:
92
+ return asyncio.run(async_func(*args, **kwargs))
93
+
94
+
95
+ async def stringify_output_async(output: Any) -> str:
96
+ if isinstance(output, AsyncIterator):
97
+ return await stringify_output_async([v async for v in output])
98
+ if isinstance(output, Iterator):
99
+ return await stringify_output_async([v for v in output])
100
+ if isinstance(output, Mapping):
101
+ return ", ".join(
102
+ [f"{await stringify_output_async(k)}:{await stringify_output_async(v)}" for k, v in output.items()]
103
+ )
104
+ if isinstance(output, Sequence):
105
+ return "".join([await stringify_output_async(v) for v in output])
106
+ if isinstance(output, Task):
107
+ return await stringify_output_async(await output)
108
+
109
+ return str(output)
110
+
111
+
112
+ def convert_eager_flow_output_to_dict(value: Any) -> Mapping[str, Any]:
113
+ """
114
+ Convert the output of eager flow to a dict. Since the output of eager flow
115
+ may not be a dict, we need to convert it to a dict in batch mode.
116
+
117
+ Examples:
118
+ 1. If the output is a dict, return it directly:
119
+ value = {"output": 1} -> {"output": 1}
120
+ 2. If the output is a dataclass, convert it to a dict:
121
+ value = SampleDataClass(output=1) -> {"output": 1}
122
+ 3. If the output is not a dict or dataclass, convert it to a dict by adding a key "output":
123
+ value = 1 -> {"output": 1}
124
+ """
125
+
126
+ if isinstance(value, Mapping):
127
+ return value
128
+ elif dataclasses.is_dataclass(value):
129
+ return dataclasses.asdict(cast(Any, value))
130
+ else:
131
+ return {"output": value}