azure-ai-evaluation 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (134) hide show
  1. azure/ai/evaluation/__init__.py +42 -14
  2. azure/ai/evaluation/_azure/_models.py +6 -6
  3. azure/ai/evaluation/_common/constants.py +6 -2
  4. azure/ai/evaluation/_common/rai_service.py +38 -4
  5. azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
  6. azure/ai/evaluation/_common/raiclient/_client.py +128 -0
  7. azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
  8. azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
  9. azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
  10. azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
  11. azure/ai/evaluation/_common/raiclient/_version.py +9 -0
  12. azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
  13. azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
  14. azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
  15. azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
  16. azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
  17. azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
  18. azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
  19. azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
  20. azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
  21. azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
  22. azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
  23. azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
  24. azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
  25. azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
  26. azure/ai/evaluation/_common/raiclient/py.typed +1 -0
  27. azure/ai/evaluation/_common/utils.py +30 -10
  28. azure/ai/evaluation/_constants.py +10 -0
  29. azure/ai/evaluation/_converters/__init__.py +3 -0
  30. azure/ai/evaluation/_converters/_ai_services.py +804 -0
  31. azure/ai/evaluation/_converters/_models.py +302 -0
  32. azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
  33. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
  34. azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
  35. azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  36. azure/ai/evaluation/_evaluate/_evaluate.py +36 -4
  37. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
  38. azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
  39. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
  40. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
  41. azure/ai/evaluation/_evaluators/_common/_base_eval.py +43 -3
  42. azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +3 -1
  43. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +43 -4
  44. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
  45. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
  46. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
  47. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
  48. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
  49. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
  50. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
  51. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
  52. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
  53. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +21 -3
  54. azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
  55. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
  56. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
  57. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
  58. azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
  59. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
  60. azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
  61. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +157 -0
  62. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
  63. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
  64. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
  65. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
  66. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
  67. azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
  68. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
  69. azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
  70. azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
  71. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
  72. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
  73. azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
  74. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
  75. azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
  76. azure/ai/evaluation/_exceptions.py +5 -1
  77. azure/ai/evaluation/_legacy/__init__.py +3 -0
  78. azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
  79. azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
  80. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
  81. azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
  82. azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
  83. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
  84. azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
  85. azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
  86. azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
  87. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
  88. azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
  89. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
  90. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
  91. azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
  92. azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
  93. azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
  94. azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
  95. azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
  96. azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
  97. azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
  98. azure/ai/evaluation/_red_team/__init__.py +3 -0
  99. azure/ai/evaluation/_red_team/_attack_objective_generator.py +192 -0
  100. azure/ai/evaluation/_red_team/_attack_strategy.py +42 -0
  101. azure/ai/evaluation/_red_team/_callback_chat_target.py +74 -0
  102. azure/ai/evaluation/_red_team/_default_converter.py +21 -0
  103. azure/ai/evaluation/_red_team/_red_team.py +1858 -0
  104. azure/ai/evaluation/_red_team/_red_team_result.py +246 -0
  105. azure/ai/evaluation/_red_team/_utils/__init__.py +3 -0
  106. azure/ai/evaluation/_red_team/_utils/constants.py +64 -0
  107. azure/ai/evaluation/_red_team/_utils/formatting_utils.py +164 -0
  108. azure/ai/evaluation/_red_team/_utils/logging_utils.py +139 -0
  109. azure/ai/evaluation/_red_team/_utils/strategy_utils.py +188 -0
  110. azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
  111. azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
  112. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +741 -0
  113. azure/ai/evaluation/_version.py +2 -1
  114. azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
  115. azure/ai/evaluation/simulator/_adversarial_simulator.py +61 -27
  116. azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
  117. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
  118. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
  119. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
  120. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
  121. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/METADATA +75 -15
  122. azure_ai_evaluation-1.4.0.dist-info/RECORD +197 -0
  123. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/WHEEL +1 -1
  124. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
  125. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
  126. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
  127. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
  128. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
  129. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
  130. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
  131. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
  132. azure_ai_evaluation-1.2.0.dist-info/RECORD +0 -125
  133. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/NOTICE.txt +0 -0
  134. {azure_ai_evaluation-1.2.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,88 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from ..._exceptions import ErrorCategory, ErrorBlame, ErrorTarget, EvaluationException
6
+
7
+
8
+ class BatchEngineError(EvaluationException):
9
+ """Exception class for batch engine errors.
10
+
11
+ This exception is used to indicate that the error was caused by or in the batch engine.
12
+
13
+ :param message: The error message.
14
+ :type message: str
15
+ """
16
+
17
+ def __init__(self, message: str, **kwargs):
18
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
19
+ kwargs.setdefault("target", ErrorTarget.EVAL_RUN)
20
+ kwargs.setdefault("blame", ErrorBlame.UNKNOWN)
21
+
22
+ super().__init__(message, **kwargs)
23
+
24
+
25
+ class BatchEngineValidationError(BatchEngineError):
26
+ """Exception raised when validation fails
27
+
28
+ :param message: The error message.
29
+ :type message: str
30
+ """
31
+
32
+ def __init__(self, message: str, **kwargs):
33
+ kwargs.setdefault("category", ErrorCategory.INVALID_VALUE)
34
+ kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
35
+ super().__init__(message, **kwargs)
36
+
37
+
38
+ class BatchEngineTimeoutError(BatchEngineError):
39
+ """Exception raised when a batch engine operation times out.
40
+
41
+ :param message: The error message.
42
+ :type message: str
43
+ """
44
+
45
+ def __init__(self, message: str, **kwargs):
46
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
47
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
48
+ super().__init__(message, **kwargs)
49
+
50
+
51
+ class BatchEngineCanceledError(BatchEngineError):
52
+ """Exception raised when a batch engine operation is canceled.
53
+
54
+ :param message: The error message.
55
+ :type message: str
56
+ """
57
+
58
+ def __init__(self, message: str, **kwargs):
59
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
60
+ kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
61
+ super().__init__(message, **kwargs)
62
+
63
+
64
+ class BatchEngineRunFailedError(BatchEngineError):
65
+ """Exception raised when a batch engine run fails.
66
+
67
+ :param message: The error message.
68
+ :type message: str
69
+ """
70
+
71
+ def __init__(self, message: str, **kwargs):
72
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
73
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
74
+ super().__init__(message, **kwargs)
75
+
76
+
77
+ class BatchEnginePartialError(BatchEngineError):
78
+ """Exception raised when a batch engine run has some successfull lines, mixed in
79
+ with some failures.
80
+
81
+ :param message: The error message.
82
+ :type message: str
83
+ """
84
+
85
+ def __init__(self, message: str, **kwargs):
86
+ kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
87
+ kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
88
+ super().__init__(message, **kwargs)
@@ -0,0 +1,292 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # Original source:
6
+ # - promptflow-core/promptflow/_core/log_manager.py
7
+ # - promptflow-core/promptflow/_utils/logger_utils.py
8
+
9
+ import os
10
+ import logging
11
+ import re
12
+ import sys
13
+ from re import Pattern
14
+ from contextvars import ContextVar
15
+ from datetime import datetime, timezone
16
+ from dataclasses import dataclass
17
+ from io import StringIO, TextIOBase
18
+ from typing import Any, Dict, Final, Mapping, Optional, Set, TextIO, Tuple, Union
19
+
20
+
21
+ valid_logging_level: Final[Set[str]] = {"CRITICAL", "FATAL", "ERROR", "WARN", "WARNING", "INFO", "DEBUG", "NOTSET"}
22
+
23
+
24
+ def get_pf_logging_level(default=logging.INFO):
25
+ logging_level = os.environ.get("PF_LOGGING_LEVEL", None)
26
+ if logging_level not in valid_logging_level:
27
+ # Fall back to info if user input is invalid.
28
+ logging_level = default
29
+ return logging_level
30
+
31
+
32
+ def _get_format_for_logger(
33
+ default_log_format: Optional[str] = None, default_date_format: Optional[str] = None
34
+ ) -> Tuple[str, str]:
35
+ """
36
+ Get the logging format and date format for logger.
37
+
38
+ This function attempts to find the handler of the root logger with a configured formatter.
39
+ If such a handler is found, it returns the format and date format used by this handler.
40
+ This can be configured through logging.basicConfig. If no configured formatter is found,
41
+ it defaults to LOG_FORMAT and DATETIME_FORMAT.
42
+ """
43
+ log_format = (
44
+ os.environ.get("PF_LOG_FORMAT")
45
+ or default_log_format
46
+ or "%(asctime)s %(thread)7d %(name)-18s %(levelname)-8s %(message)s"
47
+ )
48
+ datetime_format = os.environ.get("PF_LOG_DATETIME_FORMAT") or default_date_format or "%Y-%m-%d %H:%M:%S %z"
49
+ return log_format, datetime_format
50
+
51
+
52
+ def get_logger(name: str) -> logging.Logger:
53
+ """Get logger used during execution."""
54
+ logger = logging.Logger(name)
55
+ logger.setLevel(get_pf_logging_level())
56
+ # logger.addHandler(FileHandlerConcurrentWrapper())
57
+ stdout_handler = logging.StreamHandler(sys.stdout)
58
+ fmt, datefmt = _get_format_for_logger()
59
+ # TODO ralphe: Do we need a credentials scrubber here like the old code had? We are not logging
60
+ # logging anything that sensitive here.
61
+ stdout_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
62
+ logger.addHandler(stdout_handler)
63
+ return logger
64
+
65
+
66
+ def scrub_credentials(s: str):
67
+ """Scrub credentials in string s.
68
+
69
+ For example, for input string: "print accountkey=accountKey", the output will be:
70
+ "print accountkey=**data_scrubbed**"
71
+ """
72
+ # for h in logger.handlers:
73
+ # if isinstance(h, FileHandlerConcurrentWrapper):
74
+ # if h.handler and h.handler._formatter:
75
+ # credential_scrubber = h.handler._formatter.credential_scrubber
76
+ # if credential_scrubber:
77
+ # return credential_scrubber.scrub(s)
78
+ return CredentialScrubber.scrub(s)
79
+
80
+
81
+ class CredentialScrubber:
82
+ """Scrub sensitive information in string."""
83
+
84
+ PLACE_HOLDER = "**data_scrubbed**"
85
+ LENGTH_THRESHOLD = 2
86
+ DEFAULT_REGEX_SET: Final[Set[Pattern[str]]] = {
87
+ re.compile(r"(?<=sig=)[^\s;&]+", flags=re.IGNORECASE), # Replace signature.
88
+ re.compile(r"(?<=key=)[^\s;&]+", flags=re.IGNORECASE), # Replace key.
89
+ }
90
+
91
+ @staticmethod
92
+ def scrub(input: str) -> str:
93
+ """Replace sensitive information in input string with PLACE_HOLDER.
94
+
95
+ For example, for input string: "print accountkey=accountKey", the output will be:
96
+ "print accountkey=**data_scrubbed**"
97
+ """
98
+ output = input
99
+ for regex in CredentialScrubber.DEFAULT_REGEX_SET:
100
+ output = regex.sub(CredentialScrubber.PLACE_HOLDER, output)
101
+ return output
102
+
103
+
104
+ # Logs by flow_logger will only be shown in flow mode.
105
+ # These logs should contain all detailed logs from executor and runtime.
106
+ flow_logger = get_logger("execution.flow")
107
+
108
+ # Logs by bulk_logger will only be shown in bulktest and eval modes.
109
+ # These logs should contain overall progress logs and error logs.
110
+ bulk_logger = get_logger("execution.bulk")
111
+
112
+ # Logs by logger will be shown in all the modes above,
113
+ # such as error logs.
114
+ logger = get_logger("execution")
115
+
116
+
117
+ def log_progress(
118
+ run_start_time: datetime,
119
+ total_count: int,
120
+ current_count: int,
121
+ logger: logging.Logger = bulk_logger,
122
+ formatter="Finished {count} / {total_count} lines.",
123
+ ) -> None:
124
+ if current_count > 0:
125
+ delta = datetime.now(timezone.utc).timestamp() - run_start_time.timestamp()
126
+ average_execution_time = round(delta / current_count, 2)
127
+ estimated_execution_time = round(average_execution_time * (total_count - current_count), 2)
128
+ logger.info(formatter.format(count=current_count, total_count=total_count))
129
+ logger.info(
130
+ f"Average execution time for completed lines: {average_execution_time} seconds. "
131
+ f"Estimated time for incomplete lines: {estimated_execution_time} seconds."
132
+ )
133
+
134
+
135
+ def incremental_print(log: str, printed: int, fileout: Union[TextIO, Any]) -> int:
136
+ count = 0
137
+ for line in log.splitlines():
138
+ if count >= printed:
139
+ fileout.write(line + "\n")
140
+ printed += 1
141
+ count += 1
142
+ return printed
143
+
144
+
145
+ def print_red_error(message):
146
+ try:
147
+ from colorama import Fore, init
148
+
149
+ init(autoreset=True)
150
+ print(Fore.RED + message)
151
+ except ImportError:
152
+ print(message)
153
+
154
+
155
+ @dataclass
156
+ class NodeInfo:
157
+ run_id: str
158
+ node_name: str
159
+ line_number: int
160
+
161
+
162
+ class NodeLogManager:
163
+ """Replace sys.stdout and sys.stderr with NodeLogWriter.
164
+
165
+ This class intercepts and saves logs to stdout/stderr when executing a node. For example:
166
+ with NodeLogManager() as log_manager:
167
+ print('test stdout')
168
+ print('test stderr', file=sys.stderr)
169
+
170
+ log_manager.get_logs() will return: {'stdout': 'test stdout\n', 'stderr': 'test stderr\n'}
171
+ """
172
+
173
+ def __init__(self, record_datetime: bool = True):
174
+ self.stdout_logger = NodeLogWriter(sys.stdout, record_datetime)
175
+ self.stderr_logger = NodeLogWriter(sys.stderr, record_datetime, is_stderr=True)
176
+
177
+ def __enter__(self) -> "NodeLogManager":
178
+ """Replace sys.stdout and sys.stderr with NodeLogWriter."""
179
+ self._prev_stdout = sys.stdout
180
+ self._prev_stderr = sys.stderr
181
+ sys.stdout = self.stdout_logger
182
+ sys.stderr = self.stderr_logger
183
+ return self
184
+
185
+ def __exit__(self, *args) -> None:
186
+ """Restore sys.stdout and sys.stderr."""
187
+ sys.stdout = self._prev_stdout
188
+ sys.stderr = self._prev_stderr
189
+
190
+ def set_node_context(self, run_id: str, node_name: str, line_number: int) -> None:
191
+ """Set node context."""
192
+ self.stdout_logger.set_node_info(run_id, node_name, line_number)
193
+ self.stderr_logger.set_node_info(run_id, node_name, line_number)
194
+
195
+ def clear_node_context(self, run_id: str) -> None:
196
+ """Clear node context."""
197
+ self.stdout_logger.clear_node_info(run_id)
198
+ self.stderr_logger.clear_node_info(run_id)
199
+
200
+ def get_logs(self, run_id: str) -> Mapping[str, str]:
201
+ return {
202
+ "stdout": self.stdout_logger.get_log(run_id),
203
+ "stderr": self.stderr_logger.get_log(run_id),
204
+ }
205
+
206
+
207
+ class NodeLogWriter(TextIOBase):
208
+ """Record node run logs."""
209
+
210
+ DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
211
+
212
+ def __init__(self, prev_stdout: Union[TextIOBase, Any], record_datetime: bool = True, is_stderr: bool = False):
213
+ self.run_id_to_stdout: Dict[str, StringIO] = {}
214
+ self._context: ContextVar[Optional[NodeInfo]] = ContextVar("run_log_info", default=None)
215
+ self._prev_out: Union[TextIOBase, Any] = prev_stdout
216
+ self._record_datetime: bool = record_datetime
217
+ self._is_stderr: bool = is_stderr
218
+
219
+ def set_node_info(self, run_id: str, node_name: str, line_number: int) -> None:
220
+ """Set node info to a context variable.
221
+
222
+ After set node info, write method will write to string IO associated with this node.
223
+ """
224
+ run_log_info = NodeInfo(run_id, node_name, line_number)
225
+ self._context.set(run_log_info)
226
+ self.run_id_to_stdout.update({run_id: StringIO()})
227
+
228
+ def clear_node_info(self, run_id: str):
229
+ """Clear context variable associated with run id."""
230
+ log_info: Optional[NodeInfo] = self._context.get()
231
+ if log_info and log_info.run_id == run_id:
232
+ self._context.set(None)
233
+
234
+ if run_id in self.run_id_to_stdout:
235
+ self.run_id_to_stdout.pop(run_id)
236
+
237
+ def get_log(self, run_id: str) -> str:
238
+ """Get log associated with run id."""
239
+ string_io: Optional[StringIO] = self.run_id_to_stdout.get(run_id)
240
+ if string_io is None:
241
+ return ""
242
+
243
+ return string_io.getvalue()
244
+
245
+ def write(self, s: str) -> int:
246
+ """Override TextIO's write method and writes input string into a string IO
247
+
248
+ The written string is compliant without any credentials.
249
+ The string is also recorded to flow/bulk logger.
250
+ If node info is not set, write to previous stdout.
251
+ """
252
+ log_info: Optional[NodeInfo] = self._context.get()
253
+ s = scrub_credentials(s) # Remove credential from string.
254
+ if log_info is None:
255
+ return self._prev_out.write(s)
256
+ else:
257
+ self._write_to_flow_log(log_info, s)
258
+ stdout: Optional[StringIO] = self.run_id_to_stdout.get(log_info.run_id)
259
+ # When the line execution timeout is reached, all running nodes will be cancelled and node info will
260
+ # be cleared. This will remove StringIO from self.run_id_to_stdout. For sync tools running in a worker
261
+ # thread, they can't be stopped and self._context won't change in the worker
262
+ # thread because it's a thread-local variable. Therefore, we need to check if StringIO is None here.
263
+ if stdout is None:
264
+ return 0
265
+ if self._record_datetime and s != "\n": # For line breaker, do not add datetime prefix.
266
+ s = f"[{datetime.now(timezone.utc).strftime(self.DATETIME_FORMAT)}] {s}"
267
+ return stdout.write(s)
268
+
269
+ def flush(self):
270
+ """Override TextIO's flush method."""
271
+ node_info: Optional[NodeInfo] = self._context.get()
272
+ if node_info is None:
273
+ self._prev_out.flush()
274
+ else:
275
+ string_io = self.run_id_to_stdout.get(node_info.run_id)
276
+ if string_io is not None:
277
+ string_io.flush()
278
+
279
+ def _write_to_flow_log(self, log_info: NodeInfo, s: str):
280
+ """Save stdout log to flow_logger and stderr log to logger."""
281
+ # If user uses "print('log message.')" to log, then
282
+ # "write" method will be called twice and the second time input is only '\n'.
283
+ # For this case, should not log '\n' in flow_logger.
284
+ if s != "\n":
285
+ if self._is_stderr:
286
+ flow_log = f"[{str(log_info)}] stderr> " + s.rstrip("\n")
287
+ # Log stderr in all scenarios so we can diagnose problems.
288
+ logger.warning(flow_log)
289
+ else:
290
+ flow_log = f"[{str(log_info)}] stdout> " + s.rstrip("\n")
291
+ # Log stdout only in flow mode.
292
+ flow_logger.info(flow_log)
@@ -0,0 +1,23 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ # Original source code: promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
6
+
7
+
8
+ def inject_openai_api():
9
+ """This function:
10
+ 1. Modifies the create methods of the OpenAI API classes to inject logic before calling the original methods.
11
+ It stores the original methods as _original attributes of the create methods.
12
+ 2. Updates the openai api configs from environment variables.
13
+ """
14
+ # TODO ralphe: Port function?
15
+ pass
16
+
17
+
18
+ def recover_openai_api():
19
+ """This function restores the original create methods of the OpenAI API classes
20
+ by assigning them back from the _original attributes of the modified methods.
21
+ """
22
+ # TODO ralphe: Port function?
23
+ pass
@@ -0,0 +1,99 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timedelta
7
+ from typing import Any, Mapping, Optional, Sequence
8
+
9
+ from ._status import BatchStatus
10
+
11
+
12
+ @dataclass
13
+ class TokenMetrics:
14
+ """The token metrics of a run."""
15
+
16
+ prompt_tokens: int
17
+ """The number of tokens used in the prompt for the run."""
18
+ completion_tokens: int
19
+ """The number of tokens used in the completion for the run."""
20
+ total_tokens: int
21
+ """The total number of tokens used in the run."""
22
+
23
+
24
+ @dataclass
25
+ class BatchRunError:
26
+ """The error of a batch run."""
27
+
28
+ details: str
29
+ """The details of the error."""
30
+ exception: Optional[BaseException]
31
+ """The exception of the error."""
32
+
33
+
34
+ @dataclass
35
+ class BatchRunDetails:
36
+ """The error of a line in a batch run."""
37
+
38
+ id: str
39
+ """The ID of the line run."""
40
+ status: BatchStatus
41
+ """The status of the line run."""
42
+ result: Optional[Mapping[str, Any]]
43
+ """The result of the line run."""
44
+ start_time: Optional[datetime]
45
+ """The start time of the line run. If this was never started, this should be None."""
46
+ end_time: Optional[datetime]
47
+ """The end time of the line run. If this never completed, this should be None."""
48
+ tokens: TokenMetrics
49
+ """The token metrics of the line run."""
50
+ error: Optional[BatchRunError]
51
+ """The error of the line run. This will only be set if the status is Failed."""
52
+
53
+ @property
54
+ def duration(self) -> timedelta:
55
+ """The duration of the line run."""
56
+ if self.start_time is not None and self.end_time is not None:
57
+ return self.end_time - self.start_time
58
+ return timedelta(0)
59
+
60
+ @staticmethod
61
+ def create_id(run_id: str, index: int) -> str:
62
+ """Helper method to create the ID for a line run."""
63
+ return f"{run_id}_{index}"
64
+
65
+
66
+ @dataclass
67
+ class BatchResult:
68
+ """The result of a batch run."""
69
+
70
+ status: BatchStatus
71
+ """The overall status of the batch run."""
72
+ total_lines: int
73
+ """The total number of lines in the batch run."""
74
+ failed_lines: int
75
+ """The number of failed lines in the batch run."""
76
+ start_time: datetime
77
+ """The start time of the batch run."""
78
+ end_time: datetime
79
+ """The end time of the batch run."""
80
+ tokens: TokenMetrics
81
+ """The overall token metrics of the batch run."""
82
+ details: Sequence[BatchRunDetails]
83
+ """The details of each line in the batch run."""
84
+ error: Optional[Exception] = None
85
+ """The error of the batch run. This will only be set if the status does not indicate success."""
86
+
87
+ @property
88
+ def duration(self) -> timedelta:
89
+ """The duration of the batch run."""
90
+ if self.start_time is not None and self.end_time is not None:
91
+ return self.end_time - self.start_time
92
+ return timedelta(0)
93
+
94
+ @property
95
+ def results(self) -> Sequence[Optional[Mapping[str, Any]]]:
96
+ """The results of the batch run."""
97
+ if not self.details:
98
+ return []
99
+ return [d.result for d in self.details]
@@ -0,0 +1,121 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from uuid import uuid4
6
+ from datetime import datetime, timedelta, timezone
7
+ from enum import Enum
8
+ from typing import Any, Callable, Mapping, Optional, Sequence
9
+
10
+ from ._utils import normalize_identifier_name
11
+ from ._result import BatchResult
12
+ from ._status import BatchStatus
13
+
14
+
15
+ class RunStatus(Enum):
16
+ # TODO ralphe: Trim this to just the statuses we need
17
+ # QUEUED = "Queued"
18
+ NOT_STARTED = "NotStarted"
19
+ PREPARING = "Preparing"
20
+ # PROVISIONING = "Provisioning"
21
+ # STARTING = "Starting"
22
+ RUNNING = "Running"
23
+ # CANCEL_REQUESTED = "CancelRequested"
24
+ CANCELED = "Canceled"
25
+ # FINALIZING = "Finalizing"
26
+ COMPLETED = "Completed"
27
+ FAILED = "Failed"
28
+ # UNAPPROVED = "Unapproved"
29
+ # NOTRESPONDING = "NotResponding"
30
+ # PAUSING = "Pausing"
31
+ # PAUSED = "Paused"
32
+
33
+ @staticmethod
34
+ def from_batch_result_status(status: BatchStatus) -> "RunStatus":
35
+ if status == BatchStatus.NotStarted:
36
+ return RunStatus.NOT_STARTED
37
+ if status == BatchStatus.Running:
38
+ return RunStatus.RUNNING
39
+ if status == BatchStatus.Completed:
40
+ return RunStatus.COMPLETED
41
+ if status == BatchStatus.Canceled:
42
+ return RunStatus.CANCELED
43
+ if status == BatchStatus.Failed:
44
+ return RunStatus.FAILED
45
+
46
+ return RunStatus.FAILED
47
+
48
+
49
+ class Run:
50
+ """The equivalent of a Promptflow Run
51
+ promptflow-devkit/promptflow/_sdk/entities/_run.py
52
+
53
+ THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
54
+
55
+ def __init__(
56
+ self,
57
+ *,
58
+ dynamic_callable: Callable,
59
+ name_prefix: Optional[str],
60
+ inputs: Sequence[Mapping[str, Any]],
61
+ column_mapping: Mapping[str, str],
62
+ created_on: Optional[datetime] = None,
63
+ ):
64
+ self._status: RunStatus = RunStatus.NOT_STARTED
65
+ self._created_on = created_on or datetime.now(timezone.utc)
66
+ self._start_time: Optional[datetime] = None
67
+ self._end_time: Optional[datetime] = None
68
+
69
+ self.dynamic_callable = dynamic_callable
70
+ self.name = self._generate_run_name(name_prefix, self._created_on)
71
+ self.inputs = inputs
72
+ self.column_mapping = column_mapping
73
+ self.result: Optional[BatchResult] = None
74
+ self.metrics: Mapping[str, Any] = {}
75
+
76
+ # self._use_remote_flow = False
77
+ # self._from_flex_flow = True
78
+ # self._from_prompty = False
79
+ # self.flow = path to pointless flow file
80
+ # self._experiment_name = name of folder containing pointless flow file
81
+ # self._lineage_id = basically equivalent to a hex digest of the SHA256 hash of:
82
+ # f"{uuid.getnod()}/{posix_full_path_to_pointless_folder}"
83
+ # self._output_path = Path("<user_folder>/.promptflow/runs/<self.name>")
84
+ # self._flow_name = name of pointless folder
85
+
86
+ @property
87
+ def status(self) -> RunStatus:
88
+ return self._status
89
+
90
+ @property
91
+ def created_on(self) -> datetime:
92
+ return self._created_on
93
+
94
+ @property
95
+ def duration(self) -> Optional[timedelta]:
96
+ if self._start_time is None or self._end_time is None:
97
+ return None
98
+
99
+ return self._end_time - self._start_time
100
+
101
+ @property
102
+ def outputs(self) -> Sequence[Mapping[str, Any]]:
103
+ if self.result is None:
104
+ return []
105
+
106
+ return [value or {} for value in self.result.results]
107
+
108
+ @staticmethod
109
+ def _generate_run_name(name_prefix: Optional[str], creation_time: datetime) -> str:
110
+ # The Promptflow code looked at the folder name of the temporary folder used to
111
+ # store the temporary flow YAML file which was a single entry that told it look
112
+ # at the passed in dynamic_callable. Example folder name:
113
+ # azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_l82059h3
114
+ # instead we will use the passed in name_prefix or use a UUID (which is equally
115
+ # opaque as what the original code did)
116
+ if not name_prefix:
117
+ name_prefix = str(uuid4())
118
+
119
+ timestamp = creation_time.strftime("%Y%m%d_%H%M%S_%f")
120
+ name = f"{name_prefix}_{timestamp}"
121
+ return normalize_identifier_name(name)