PyPI - azure-ai-evaluation - Versions diffs - 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

azure-ai-evaluation 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (150) hide show

azure/ai/evaluation/_legacy/_batch_engine/_engine.py CHANGED Viewed

@@ -10,21 +10,24 @@
 #              porting over the code largely as is to remove the Promptflow dependency
 #              as quickly as possible. In phase 2 this code will be heavily refactored.
+import inspect
 import re
 import asyncio
 from math import floor
 from asyncio import Semaphore
+from concurrent.futures import Executor
+from functools import partial
 from contextlib import contextmanager
-from dataclasses import dataclass
-from datetime import datetime, timedelta, timezone
-from typing import Any, Callable, Dict, Final, Generator, Mapping, MutableMapping, Optional, Sequence, Set, Tuple
+from datetime import datetime, timezone
+from typing import Any, Callable, Dict, Final, Generator, Mapping, MutableMapping, Optional, Sequence, Set, Tuple, cast
 from uuid import uuid4
-from ._utils import get_int_env_var, get_value_from_path
+from ._utils import DEFAULTS_KEY, get_int_env_var, get_value_from_path, is_async_callable
 from ._status import BatchStatus
 from ._result import BatchResult, BatchRunDetails, BatchRunError, TokenMetrics
 from ._run_storage import AbstractRunStorage, NoOpRunStorage
-from ._logging import log_progress, NodeLogManager
+from .._common._logging import log_progress, NodeLogManager
 from ..._exceptions import ErrorBlame
 from ._exceptions import (
     BatchEngineCanceledError,
@@ -37,6 +40,7 @@ from ._utils_deprecated import (
     async_run_allowing_running_loop,
     convert_eager_flow_output_to_dict,
 )
+from ._openai_injector import CaptureOpenAITokenUsage
 MAX_WORKER_COUNT: Final[int] = 10
@@ -48,51 +52,37 @@ class BatchEngine:
     def __init__(
         self,
-        executor: Callable,
+        func: Callable,
         *,
         storage: Optional[AbstractRunStorage] = None,
         batch_timeout_sec: Optional[int] = None,
         line_timeout_sec: Optional[int] = None,
         max_worker_count: Optional[int] = None,
-        **kwargs: Any,
+        executor: Optional[Executor] = None,
     ):
         """Create a new batch engine instance
-        :param Callable executor: The executor to run the flow
+        :param Callable func: The function to run the flow
         :param Optional[AbstractRunStorage] storage: The storage to store execution results
         :param Optional[int] batch_timeout_sec: The timeout of batch run in seconds
         :param Optional[int] line_timeout_sec: The timeout of each line in seconds
         :param Optional[int] max_worker_count: The concurrency limit of batch run
-        :param kwargs: The keyword arguments related to creating the executor proxy class
-        :type kwargs: Any
+        :param Optional[Executor] executor: The executor to run the flow (if needed)
         """
-        self._executor = executor
-        # self._working_dir = working_dir
-        # self._is_eager_flow = True
-        # self._is_prompty_flow = False
-        # self._program_language = FlowLanguage.Python
-        # self._message_format = MessageFormatType.BASIC
-        # self._multimedia_processor = MultimediaProcessor.create(self._message_format)
-        # self._connections = {}
+        self._func: Callable = func
         self._storage: AbstractRunStorage = storage or NoOpRunStorage()
         # TODO ralphe: Consume these from the batch context/config instead of from
         #              kwargs or (even worse) environment variables
-        # self._batch_use_async = kwargs.get("batch_use_async", True)
         self._batch_timeout_sec = batch_timeout_sec or get_int_env_var("PF_BATCH_TIMEOUT_SEC")
         self._line_timeout_sec = line_timeout_sec or get_int_env_var("PF_LINE_TIMEOUT_SEC", 600)
         self._max_worker_count = max_worker_count or get_int_env_var("PF_WORKER_COUNT") or MAX_WORKER_COUNT
-        # update kwargs with worker_count and line_timeout_sec
-        kwargs.update({"worker_count": self._max_worker_count, "line_timeout_sec": self._line_timeout_sec})
+        self._executor: Optional[Executor] = executor
         self._is_canceled: bool = False
-        self._kwargs: Mapping[str, Any] = kwargs
-        # self._init_kwargs: Mapping[str, Any] = init_kwargs or {}
-    def run(
+    async def run(
         self,
         data: Sequence[Mapping[str, Any]],
         column_mapping: Mapping[str, str],
@@ -113,9 +103,7 @@ class BatchEngine:
         try:
             id = id or str(uuid4())
-            result: BatchResult = async_run_allowing_running_loop(self._exec_in_task, id, batch_inputs, start_time)
+            result: BatchResult = await self._exec_in_task(id, batch_inputs, start_time)
             return result
         except Exception as ex:
             raise BatchEngineError(
@@ -136,6 +124,7 @@ class BatchEngine:
         inputs: Sequence[Mapping[str, Any]] = []
         line: int = 0
+        defaults = cast(Mapping[str, Any], column_mapping.get(DEFAULTS_KEY, {}))
         for input in data:
             line += 1
@@ -143,6 +132,10 @@ class BatchEngine:
             missing_inputs: Set[str] = set()
             for key, value in column_mapping.items():
+                if key == DEFAULTS_KEY:
+                    # Skip the defaults key
+                    continue
                 if not isinstance(value, str):
                     # All non-string values are literal values.
                     mapped[key] = value
@@ -156,6 +149,9 @@ class BatchEngine:
                 dict_path = match.group(1)
                 found, value = get_value_from_path(dict_path, input)
+                if not found:  # try default value
+                    found, value = get_value_from_path(dict_path, defaults)
                 if found:
                     mapped[key] = value
                 else:
@@ -306,11 +302,34 @@ class BatchEngine:
             try:
                 # TODO ralphe: Handle line timeouts here
-                output: Any = await self._executor(**inputs)
+                with CaptureOpenAITokenUsage() as captured_tokens:
+                    # NOTE: In the legacy code, any synchronous functions were executed in a different process
+                    #       for isolation reasons. However this isolation was violated in the way the code was
+                    #       used by the evaluation SDK (e.g. you need to have the module already loaded to pass the
+                    #       callable into the batch engine, so starting a new process to examine it was redundant).
+                    #       It also came with performance and memory usage costs (each line was processed in a
+                    #       separate process up to a maximum of 4), and these processes were created and torn down
+                    #       too frequently.
+                    #       For now we will just run the function in the current process, but in the future we may
+                    #       want to consider running the function in a separate process for isolation reasons.
+                    output: Any
+                    if is_async_callable(self._func):
+                        output = await self._func(**inputs)
+                    else:
+                        # to maximize the parallelism, we run the synchronous function in a separate thread
+                        # and await its result
+                        output = await asyncio.get_event_loop().run_in_executor(
+                            self._executor,
+                            partial(self._func, **inputs))
+                    # This should in theory never happen but as an extra precaution, let's check if the output
+                    # is awaitable and await it if it is.
+                    if inspect.isawaitable(output):
+                        output = await output
                 details.status = BatchStatus.Completed
                 details.result = convert_eager_flow_output_to_dict(output)
-                # TODO figure out how to get the token metrics here
+                details.tokens.update(captured_tokens)
             except Exception as ex:
                 details.status = BatchStatus.Failed
                 details.error = BatchRunError(

azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py CHANGED Viewed

@@ -4,20 +4,126 @@
 # Original source code: promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
+import functools
+import importlib
+import logging
+from contextvars import ContextVar
+from typing import Any, Callable, Final, Generator, Optional, Protocol, Sequence, Tuple
+from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
+from azure.ai.evaluation._legacy._batch_engine._result import TokenMetrics
+_token_metrics: ContextVar[TokenMetrics] = ContextVar("token_metrics", default=TokenMetrics(0, 0, 0))
+KEY_ATTR_ORIGINAL: Final[str] = "_original"
+class _TokenMetrics(Protocol):
+    """Protocol class to represent the token metrics."""
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class _WithUsage(Protocol):
+    """Protocol class to represent an OpenAI object that may have a token usage property/attribute."""
+    usage: Optional[_TokenMetrics]
+def _wrap_openai_api_method(method: Callable, is_async: bool) -> Callable:
+    """Wraps the OpenAI API method to inject logic to run on the result of the call."""
+    def update_usage(result: _WithUsage) -> None:
+        if hasattr(result, "usage") and result.usage is not None:
+            usage = _token_metrics.get()
+            usage.prompt_tokens += result.usage.prompt_tokens
+            usage.completion_tokens += result.usage.completion_tokens
+            usage.total_tokens += result.usage.total_tokens
+    if is_async:
+        @functools.wraps(method)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+            result: _WithUsage = await method(*args, **kwargs)
+            update_usage(result)
+            return result
+        return async_wrapper
+    else:
+        @functools.wraps(method)
+        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+            result: _WithUsage = method(*args, **kwargs)
+            update_usage(result)
+            return result
+        return sync_wrapper
+def _openai_api_list() -> Generator[Tuple[Any, Callable, bool], None, None]:
+    """Load the list of OpenAI API classes and their corresponding method names."""
+    apis: Sequence[Tuple[str, str, str, bool]] = [
+        ("openai.resources.chat", "Completions", "create", False),
+        ("openai.resources.chat", "AsyncCompletions", "create", True),
+        ("openai.resources", "Completions", "create", False),
+        ("openai.resources", "AsyncCompletions", "create", True),
+        ("openai.resources", "Embeddings", "create", False),
+        ("openai.resources", "AsyncEmbeddings", "create", True),
+        ("openai.resources", "Responses", "create", False),
+        ("openai.resources", "AsyncResponses", "create", True),
+    ]
+    for module_name, class_name, method_name, is_async in apis:
+        try:
+            module = importlib.import_module(module_name)
+            cls = getattr(module, class_name, None)
+            if cls is None:
+                continue
+            method = getattr(cls, method_name, None)
+            if method is None:
+                continue
+            yield cls, method, is_async
+        except ImportError:
+            raise MissingRequiredPackage("Please install the 'openai' package to use the Azure AI Evaluation SDK")
+        except AttributeError:
+            logging.warning("The module '%s' does not have class '%s' or method '%s'", module_name, class_name, method_name)
 def inject_openai_api():
-    """This function:
-    1. Modifies the create methods of the OpenAI API classes to inject logic before calling the original methods.
-    It stores the original methods as _original attributes of the create methods.
-    2. Updates the openai api configs from environment variables.
+    """This function modifies the create methods of the OpenAI API classes to inject logic
+    to enable us to collect token usage data.
     """
-    # TODO ralphe: Port function?
-    pass
+    for cls, method, is_async in _openai_api_list():
+        # Check if the create method of the openai_api class has already been modified
+        if not hasattr(method, KEY_ATTR_ORIGINAL):
+            wrapper_method: Callable = _wrap_openai_api_method(method, is_async)
+            setattr(wrapper_method, KEY_ATTR_ORIGINAL, method)
+            setattr(cls, method.__name__, wrapper_method)
 def recover_openai_api():
     """This function restores the original create methods of the OpenAI API classes
     by assigning them back from the _original attributes of the modified methods.
     """
-    # TODO ralphe: Port function?
-    pass
+    for cls, method, _ in _openai_api_list():
+        if hasattr(method, KEY_ATTR_ORIGINAL):
+            original_method = getattr(method, KEY_ATTR_ORIGINAL)
+            setattr(cls, method.__name__, original_method)
+class CaptureOpenAITokenUsage:
+    """Context manager to capture OpenAI token usage."""
+    def __init__(self):
+        self._tokens = TokenMetrics(0, 0, 0)
+    def __enter__(self) -> TokenMetrics:
+        _token_metrics.set(TokenMetrics(0, 0, 0))
+        return self._tokens
+    def __exit__(self, exc_type: Optional[Exception], exc_value: Optional[Exception], traceback: Optional[Any]) -> None:
+        captured_metrics = _token_metrics.get()
+        self._tokens.update(captured_metrics)

azure/ai/evaluation/_legacy/_batch_engine/_result.py CHANGED Viewed

@@ -20,6 +20,12 @@ class TokenMetrics:
     total_tokens: int
     """The total number of tokens used in the run."""
+    def update(self, other: "TokenMetrics") -> None:
+        """Update the token metrics with another set of token metrics."""
+        self.prompt_tokens += other.prompt_tokens
+        self.completion_tokens += other.completion_tokens
+        self.total_tokens += other.total_tokens
 @dataclass
 class BatchRunError:
@@ -96,4 +102,4 @@ class BatchResult:
         """The results of the batch run."""
         if not self.details:
             return []
-        return [d.result for d in self.details]
+        return [d.result for d in self.details]

azure/ai/evaluation/_legacy/_batch_engine/_run.py CHANGED Viewed

@@ -60,6 +60,7 @@ class Run:
         inputs: Sequence[Mapping[str, Any]],
         column_mapping: Mapping[str, str],
         created_on: Optional[datetime] = None,
+        run: Optional["Run"] = None,
     ):
         self._status: RunStatus = RunStatus.NOT_STARTED
         self._created_on = created_on or datetime.now(timezone.utc)
@@ -72,6 +73,7 @@ class Run:
         self.column_mapping = column_mapping
         self.result: Optional[BatchResult] = None
         self.metrics: Mapping[str, Any] = {}
+        self._run = run
         # self._use_remote_flow = False
         # self._from_flex_flow = True
@@ -105,6 +107,10 @@ class Run:
         return [value or {} for value in self.result.results]
+    @property
+    def previous_run(self) -> Optional["Run"]:
+        return self._run
     @staticmethod
     def _generate_run_name(name_prefix: Optional[str], creation_time: datetime) -> str:
         # The Promptflow code looked at the folder name  of the temporary folder used to

azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py CHANGED Viewed

@@ -3,17 +3,20 @@
 # ---------------------------------------------------------
 import dataclasses
+import inspect
 import sys
+from concurrent.futures import Executor
 from datetime import datetime, timezone
 from typing import Any, Callable, Dict, Mapping, Optional, Sequence, TextIO, Union
 from ._run import Run, RunStatus
-from ._trace import start_trace, is_collection_writeable
+from ._trace import start_trace
 from ._run_storage import AbstractRunStorage, NoOpRunStorage
-from ._logging import incremental_print, print_red_error
+from .._common._logging import incremental_print, print_red_error
 from ._config import BatchEngineConfig
 from ._exceptions import BatchEngineValidationError
-from ._engine import BatchEngine, BatchEngineError, BatchResult
+from ._engine import DEFAULTS_KEY, BatchEngine, BatchEngineError, BatchResult
 class RunSubmitter:
@@ -22,25 +25,32 @@ class RunSubmitter:
     THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
-    def __init__(self, config: BatchEngineConfig):
+    def __init__(self, config: BatchEngineConfig, executor: Optional[Executor] = None):
         # self._client = PFClient instance
         # self._config = PFClient config
         # self.run_operations = RunOperations instance
         # TODO ralphe: Use proper logger here. Old code did LoggerFactory.get_logger(__name__)
         self._config = config
+        self._executor = executor
-    def submit(
+    async def submit(
         self,
         dynamic_callable: Callable,
         inputs: Sequence[Mapping[str, Any]],
-        column_mapping: Mapping[str, str],
+        column_mapping: Optional[Mapping[str, str]],
         *,
         name_prefix: Optional[str] = None,
         created_on: Optional[datetime] = None,
         storage_creator: Optional[Callable[[Run], AbstractRunStorage]] = None,
         **kwargs,
     ) -> Run:
+        # if the column mappings are not provided, generate them based on the arguments to the
+        # flow function.
+        if column_mapping is None:
+            column_mapping = self._generate_column_mapping(dynamic_callable)
         # The old code always spun up two threads here using a ThreadPoolExecutor:
         # 1. One thread essentially did nothing of value (since tracing was disabled, and we
         #    don't care about checking for the latest PromptFlow version number now)
@@ -51,27 +61,18 @@ class RunSubmitter:
         # of the _run_bulk code here directly.
         # In a future code refactor, all of this will be cleaned up in favour of proper
         # async/await code.
-        run: Run = kwargs.pop("run", None) or Run(
+        run: Run = Run(
             dynamic_callable=dynamic_callable,
             name_prefix=name_prefix,
             inputs=inputs,
             column_mapping=column_mapping,
             created_on=created_on,
+            run=kwargs.pop("run", None),
         )
-        logger = self._config.logger
         attributes: Dict[str, Any] = kwargs.get("attributes", {})
-        collection_for_run: Optional[str] = None
-        logger.debug("start trace for flow run...")
-        logger.debug("flow path for run.start_trace: %s", run.name)
-        if is_collection_writeable():
-            logger.debug("trace collection is writeable, will use flow name as collection...")
-            collection_for_run = run.name
-            logger.debug("collection for run: %s", collection_for_run)
-        else:
-            logger.debug("trace collection is protected, will honor existing collection.")
+        collection_for_run: str = run.name
         start_trace(attributes=attributes, run=run, _collection=collection_for_run)
         self._validate_inputs(run=run)
@@ -81,12 +82,12 @@ class RunSubmitter:
             run._status = RunStatus.PREPARING
             # unnecessary Flow loading code was removed here. Instead do direct calls to _submit_bulk_run
-            self._submit_bulk_run(run=run, local_storage=local_storage, **kwargs)
+            await self._submit_bulk_run(run=run, local_storage=local_storage, **kwargs)
         self.stream_run(run=run, storage=local_storage, raise_on_error=True)
         return run
-    def _submit_bulk_run(self, run: Run, local_storage: AbstractRunStorage, **kwargs) -> None:
+    async def _submit_bulk_run(self, run: Run, local_storage: AbstractRunStorage, **kwargs) -> None:
         logger = self._config.logger
         logger.info(f"Submitting run {run.name}, log path: {local_storage.logger.file_path}")
@@ -95,6 +96,29 @@ class RunSubmitter:
         # removed since it is unnecessary. It also parsed and set environment variables. This
         # has also been removed since it can be problematic in a multi-threaded environment.
+        if run.previous_run:
+            previous: Optional[Run] = run.previous_run
+            if previous.status != RunStatus.COMPLETED:
+                raise BatchEngineValidationError(
+                    f"Referenced run {previous.name} is not completed, got status {previous.status.value}."
+                )
+            if previous.outputs is not None:
+                if len(previous.outputs) != len(run.inputs):
+                    raise BatchEngineValidationError(
+                        f"Referenced run {previous.name} has {len(previous.outputs)} outputs, "
+                        f"but {len(run.inputs)} inputs are provided."
+                    )
+                # load in the previous run's outputs and inputs into the list of dictionaries to allow for
+                # the previous run's outputs to be used as inputs for the current run
+                run.inputs = [
+                    {
+                        "run.outputs": previous.outputs[i],
+                        "run.inputs": previous.inputs[i],
+                        **run.inputs[i]
+                    }
+                    for i in range(len(run.inputs))]
         self._validate_column_mapping(run.column_mapping)
         run._status = RunStatus.RUNNING
@@ -108,10 +132,10 @@ class RunSubmitter:
                 batch_timeout_sec=self._config.batch_timeout_seconds,
                 line_timeout_sec=self._config.run_timeout_seconds,
                 max_worker_count=self._config.max_concurrency,
-                **kwargs,
+                executor=self._executor,
             )
-            batch_result = batch_engine.run(data=run.inputs, column_mapping=run.column_mapping, id=run.name)
+            batch_result = await batch_engine.run(data=run.inputs, column_mapping=run.column_mapping, id=run.name)
             run._status = RunStatus.from_batch_result_status(batch_result.status)
             error_logs: Sequence[str] = []
@@ -152,10 +176,30 @@ class RunSubmitter:
             run.metrics = system_metrics
             run.result = batch_result
+    @staticmethod
+    def _generate_column_mapping(function: Callable) -> Mapping[str, Any]:
+        args = inspect.signature(function).parameters
+        default_values: Dict[str, Any] = {}
+        mapping: Dict[str, Any] = {}
+        for key, value in args.items():
+            if key in ["self", "cls"] or value.kind in [value.VAR_POSITIONAL, value.VAR_KEYWORD]:
+                continue
+            mapping[key] = f"${{data.{key}}}"
+            if value.default != inspect.Parameter.empty:
+                default_values[key] = value.default
+        return {
+            **mapping,
+            DEFAULTS_KEY: default_values,
+        }
     @staticmethod
     def _validate_inputs(run: Run):
-        if not run.inputs:
-            raise BatchEngineValidationError("Data must be specified for evaluation run.")
+        if not run.inputs and not run.previous_run:
+            raise BatchEngineValidationError(
+                "Either data, or a previous run must be specified for the evaluation run."
+            )
     @staticmethod
     def _validate_column_mapping(column_mapping: Mapping[str, str]):
@@ -178,10 +222,6 @@ class RunSubmitter:
         :param AbstractRunStorage storage: The storage to use for the output.
         """
-        # TODO ralphe: This doesn't seem to be do anything useful beyond just print
-        #              a run summary at the end. This is because by the time it gets
-        #              invoked even in the original code, the run has already completed.
         if run is None or storage is None:
             return

azure/ai/evaluation/_legacy/_batch_engine/_status.py CHANGED Viewed

@@ -22,4 +22,4 @@ class BatchStatus(IntEnum):
     @staticmethod
     def is_failed(status: "BatchStatus") -> bool:
-        return status == BatchStatus.Failed or status == BatchStatus.Canceled
+        return status == BatchStatus.Failed or status == BatchStatus.Canceled

azure-ai-evaluation 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl