PyPI - azure-ai-evaluation - Versions diffs - 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl - Mend

azure-ai-evaluation 1.0.0b2py3-none-any.whl → 1.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (299) hide show

azure/ai/evaluation/simulator/_conversation/__init__.py CHANGED Viewed

@@ -7,14 +7,16 @@ import copy
 import logging
 import time
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
+import base64
+import re
 import jinja2
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation._http_utils import AsyncHttpPipeline
-from .._model_tools import LLMBase, OpenAIChatCompletionsModel
+from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
+from .._model_tools._template_handler import TemplateParameters
 from .constants import ConversationRole
@@ -40,7 +42,7 @@ class ConversationTurn:
     role: "ConversationRole"
     name: Optional[str] = None
     message: str = ""
-    full_response: Optional[Any] = None
+    full_response: Optional[Dict[str, Any]] = None
     request: Optional[Any] = None
     def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
@@ -109,7 +111,7 @@ class ConversationBot:
         role: ConversationRole,
         model: Union[LLMBase, OpenAIChatCompletionsModel],
         conversation_template: str,
-        instantiation_parameters: Dict[str, str],
+        instantiation_parameters: TemplateParameters,
     ) -> None:
         self.role = role
         self.conversation_template_orig = conversation_template
@@ -118,13 +120,13 @@ class ConversationBot:
         )
         self.persona_template_args = instantiation_parameters
         if self.role == ConversationRole.USER:
-            self.name = self.persona_template_args.get("name", role.value)
+            self.name: str = cast(str, self.persona_template_args.get("name", role.value))
         else:
-            self.name = self.persona_template_args.get("chatbot_name", role.value) or model.name
+            self.name = cast(str, self.persona_template_args.get("chatbot_name", role.value)) or model.name
         self.model = model
         self.logger = logging.getLogger(repr(self))
-        self.conversation_starter = None  # can either be a dictionary or jinja template
+        self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
         if role == ConversationRole.USER:
             if "conversation_starter" in self.persona_template_args:
                 conversation_starter_content = self.persona_template_args["conversation_starter"]
@@ -135,7 +137,7 @@ class ConversationBot:
                         self.conversation_starter = jinja2.Template(
                             conversation_starter_content, undefined=jinja2.StrictUndefined
                         )
-                    except jinja2.exceptions.TemplateSyntaxError:  # noqa: F841
+                    except jinja2.exceptions.TemplateSyntaxError as e:  # noqa: F841
                         self.conversation_starter = conversation_starter_content
             else:
                 self.logger.info(
@@ -144,11 +146,12 @@ class ConversationBot:
     async def generate_response(
         self,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         conversation_history: List[ConversationTurn],
         max_history: int,
         turn_number: int = 0,
-    ) -> Tuple[dict, dict, int, dict]:
+        session_state: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[dict, dict, float, dict]:
         """
         Prompt the ConversationBot for a response.
@@ -161,7 +164,7 @@ class ConversationBot:
         :param turn_number: Parameters used to query GPT-4 model.
         :type turn_number: int
         :return: The response from the ConversationBot.
-        :rtype: Tuple[dict, dict, int, dict]
+        :rtype: Tuple[dict, dict, float, dict]
         """
         # check if this is the first turn and the conversation_starter is not None,
@@ -169,11 +172,14 @@ class ConversationBot:
         if turn_number == 0 and self.conversation_starter is not None:
             # if conversation_starter is a dictionary, pass it into samples as is
             if isinstance(self.conversation_starter, dict):
-                samples = [self.conversation_starter]
+                samples: List[Union[str, jinja2.Template, Dict]] = [self.conversation_starter]
             if isinstance(self.conversation_starter, jinja2.Template):
                 samples = [self.conversation_starter.render(**self.persona_template_args)]
             else:
-                samples = [self.conversation_starter]  # type: ignore[attr-defined]
+                samples = [self.conversation_starter]
+            jailbreak_string = self.persona_template_args.get("jailbreak_string", None)
+            if jailbreak_string:
+                samples = [f"{jailbreak_string} {samples[0]}"]
             time_taken = 0
             finish_reason = ["stop"]
@@ -238,7 +244,7 @@ class CallbackConversationBot(ConversationBot):
         self,
         callback: Callable,
         user_template: str,
-        user_template_parameters: Dict,
+        user_template_parameters: TemplateParameters,
         *args,
         **kwargs,
     ) -> None:
@@ -250,18 +256,19 @@ class CallbackConversationBot(ConversationBot):
     async def generate_response(
         self,
-        session: AsyncHttpPipeline,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
         conversation_history: List[Any],
         max_history: int,
         turn_number: int = 0,
-    ) -> Tuple[dict, dict, int, dict]:
+        session_state: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[dict, dict, float, dict]:
         chat_protocol_message = self._to_chat_protocol(
             self.user_template, conversation_history, self.user_template_parameters
         )
         msg_copy = copy.deepcopy(chat_protocol_message)
         result = {}
         start_time = time.time()
-        result = await self.callback(msg_copy)
+        result = await self.callback(msg_copy, session_state=session_state)
         end_time = time.time()
         if not result:
             result = {
@@ -270,8 +277,6 @@ class CallbackConversationBot(ConversationBot):
                 "id": None,
                 "template_parameters": {},
             }
-        self.logger.info("Using user provided callback returning response.")
         time_taken = end_time - start_time
         try:
             response = {
@@ -289,8 +294,6 @@ class CallbackConversationBot(ConversationBot):
                 blame=ErrorBlame.USER_ERROR,
             ) from exc
-        self.logger.info("Parsed callback response")
         return response, {}, time_taken, result
     # Bug 3354264: template is unused in the method - is this intentional?
@@ -307,9 +310,134 @@ class CallbackConversationBot(ConversationBot):
         }
+class MultiModalConversationBot(ConversationBot):
+    """MultiModal Conversation bot that uses a user provided callback to generate responses.
+    :param callback: The callback function to use to generate responses.
+    :type callback: Callable
+    :param user_template: The template to use for the request.
+    :type user_template: str
+    :param user_template_parameters: The template parameters to use for the request.
+    :type user_template_parameters: Dict
+    :param args: Optional arguments to pass to the parent class.
+    :type args: Any
+    :param kwargs: Optional keyword arguments to pass to the parent class.
+    :type kwargs: Any
+    """
+    def __init__(
+        self,
+        callback: Callable,
+        user_template: str,
+        user_template_parameters: TemplateParameters,
+        rai_client: Union[RAIClient, AIProjectClient],
+        *args,
+        **kwargs,
+    ) -> None:
+        self.callback = callback
+        self.user_template = user_template
+        self.user_template_parameters = user_template_parameters
+        self.rai_client = rai_client
+        super().__init__(*args, **kwargs)
+    async def generate_response(
+        self,
+        session: Union[AsyncHttpPipeline, AIProjectClient],
+        conversation_history: List[Any],
+        max_history: int,
+        turn_number: int = 0,
+        session_state: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[dict, dict, float, dict]:
+        previous_prompt = conversation_history[-1]
+        chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
+        # replace prompt with {image.jpg} tags with image content data.
+        conversation_history.pop()
+        conversation_history.append(
+            ConversationTurn(
+                role=previous_prompt.role,
+                name=previous_prompt.name,
+                message=chat_protocol_message["messages"][0]["content"],
+                full_response=previous_prompt.full_response,
+                request=chat_protocol_message,
+            )
+        )
+        msg_copy = copy.deepcopy(chat_protocol_message)
+        result = {}
+        start_time = time.time()
+        result = await self.callback(msg_copy)
+        end_time = time.time()
+        if not result:
+            result = {
+                "messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
+                "finish_reason": ["stop"],
+                "id": None,
+                "template_parameters": {},
+            }
+        time_taken = end_time - start_time
+        try:
+            response = {
+                "samples": [result["messages"][-1]["content"]],
+                "finish_reason": ["stop"],
+                "id": None,
+            }
+        except Exception as exc:
+            msg = "User provided callback does not conform to chat protocol standard."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
+                category=ErrorCategory.INVALID_VALUE,
+                blame=ErrorBlame.USER_ERROR,
+            ) from exc
+        return response, chat_protocol_message, time_taken, result
+    async def _to_chat_protocol(self, conversation_history, template_parameters):  # pylint: disable=unused-argument
+        messages = []
+        for _, m in enumerate(conversation_history):
+            if "image:" in m.message:
+                content = await self._to_multi_modal_content(m.message)
+                messages.append({"content": content, "role": m.role.value})
+            else:
+                messages.append({"content": m.message, "role": m.role.value})
+        return {
+            "template_parameters": template_parameters,
+            "messages": messages,
+            "$schema": "http://azureml/sdk-2-0/ChatConversation.json",
+        }
+    async def _to_multi_modal_content(self, text: str) -> list:
+        split_text = re.findall(r"[^{}]+|\{[^{}]*\}", text)
+        messages = [
+            text.strip("{}").replace("image:", "").strip() if text.startswith("{") else text for text in split_text
+        ]
+        contents = []
+        for msg in messages:
+            if msg.startswith("image_understanding/"):
+                if isinstance(self.rai_client, RAIClient):
+                    encoded_image = await self.rai_client.get_image_data(msg)
+                else:
+                    response = self.rai_client.red_teams.get_template_parameters_image(path=msg, stream="true")
+                    image_data = b"".join(response)
+                    encoded_image = base64.b64encode(image_data).decode("utf-8")
+                contents.append(
+                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
+                )
+            else:
+                contents.append({"type": "text", "text": msg})
+        return contents
 __all__ = [
     "ConversationRole",
     "ConversationBot",
     "CallbackConversationBot",
+    "MultiModalConversationBot",
     "ConversationTurn",
 ]

azure/ai/evaluation/simulator/_conversation/_conversation.py CHANGED Viewed

@@ -4,14 +4,14 @@
 import asyncio
 import logging
-from typing import Callable, Dict, List, Tuple, Union
+from typing import Callable, Dict, List, Optional, Tuple, Union
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 from azure.ai.evaluation.simulator._constants import SupportedLanguages
 from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
 from ..._http_utils import AsyncHttpPipeline
 from . import ConversationBot, ConversationTurn
+from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
 def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
@@ -73,14 +73,14 @@ def is_closing_message_helper(response: str) -> bool:
 async def simulate_conversation(
     *,
     bots: List[ConversationBot],
-    session: AsyncHttpPipeline,
+    session: Union[AsyncHttpPipeline, AIProjectClient],
     language: SupportedLanguages,
     stopping_criteria: Callable[[str], bool] = is_closing_message,
     turn_limit: int = 10,
     history_limit: int = 5,
     api_call_delay_sec: float = 0,
     logger: logging.Logger = logging.getLogger(__name__),
-) -> Tuple:
+) -> Tuple[Optional[str], List[ConversationTurn]]:
     """
     Simulate a conversation between the given bots.
@@ -99,9 +99,10 @@ async def simulate_conversation(
     :keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
     :paramtype logger: logging.Logger
     :return: Simulation a conversation between the given bots.
-    :rtype: Tuple
+    :rtype: Tuple[Optional[str], List[ConversationTurn]]
     """
+    session_state = {}
     # Read the first prompt.
     (first_response, request, _, full_response) = await bots[0].generate_response(
         session=session,
@@ -110,7 +111,7 @@ async def simulate_conversation(
         turn_number=0,
     )
     if "id" in first_response:
-        conversation_id = first_response["id"]
+        conversation_id: Optional[str] = first_response["id"]
     else:
         conversation_id = None
     first_prompt = first_response["samples"][0]
@@ -150,7 +151,10 @@ async def simulate_conversation(
                 conversation_history=conversation_history,
                 max_history=history_limit,
                 turn_number=current_turn,
+                session_state=session_state,
             )
+            if "session_state" in full_response and full_response["session_state"] is not None:
+                session_state.update(full_response["session_state"])
             # check if conversation id is null, which means conversation starter was used. use id from next turn
             if conversation_id is None and "id" in response:

azure/ai/evaluation/simulator/_conversation/constants.py CHANGED Viewed

@@ -12,7 +12,7 @@ OUTPUT_FILE = "openai_api_response.jsonl"
 # Azure endpoint constants
 AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
-COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
+COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
 AZURE_TOKEN_REFRESH_INTERVAL = 600  # seconds
 AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
     r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"

azure/ai/evaluation/simulator/_data_sources/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------

azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl

Potentially problematic release.

azure-ai-evaluation 1.0.0b2py3-none-any.whl → 1.13.3py3-none-any.whl