PyPI - deepeval - Versions diffs - 3.8.0__py3-none-any.whl → 3.8.2__py3-none-any.whl - Mend

deepeval 3.8.0py3-none-any.whl → 3.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

deepeval/_version.py +1 -1
deepeval/annotation/annotation.py +2 -2
deepeval/confident/api.py +31 -3
deepeval/config/settings.py +3 -0
deepeval/dataset/dataset.py +6 -4
deepeval/integrations/langchain/callback.py +307 -15
deepeval/integrations/langchain/utils.py +75 -24
deepeval/integrations/pydantic_ai/instrumentator.py +43 -11
deepeval/integrations/pydantic_ai/otel.py +9 -0
deepeval/metrics/contextual_recall/contextual_recall.py +25 -6
deepeval/metrics/contextual_recall/schema.py +6 -0
deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +10 -1
deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +10 -1
deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +10 -1
deepeval/metrics/utils.py +12 -1
deepeval/models/llms/amazon_bedrock_model.py +51 -6
deepeval/models/llms/azure_model.py +33 -7
deepeval/models/llms/gemini_model.py +6 -1
deepeval/prompt/prompt.py +7 -5
deepeval/simulator/conversation_simulator.py +4 -2
deepeval/telemetry.py +12 -91
deepeval/test_case/llm_test_case.py +1 -0
deepeval/tracing/tracing.py +6 -5
{deepeval-3.8.0.dist-info → deepeval-3.8.2.dist-info}/METADATA +1 -1
{deepeval-3.8.0.dist-info → deepeval-3.8.2.dist-info}/RECORD +28 -28
{deepeval-3.8.0.dist-info → deepeval-3.8.2.dist-info}/LICENSE.md +0 -0
{deepeval-3.8.0.dist-info → deepeval-3.8.2.dist-info}/WHEEL +0 -0
{deepeval-3.8.0.dist-info → deepeval-3.8.2.dist-info}/entry_points.txt +0 -0

deepeval/integrations/langchain/utils.py CHANGED Viewed

@@ -1,5 +1,77 @@
-from typing import Any, List, Dict, Optional
+import uuid
+from typing import Any, List, Dict, Optional, Union, Literal, Callable
+from time import perf_counter
 from langchain_core.outputs import ChatGeneration
+from rich.progress import Progress
+from deepeval.metrics import BaseMetric
+from deepeval.tracing.context import current_span_context, current_trace_context
+from deepeval.tracing.tracing import trace_manager
+from deepeval.tracing.types import (
+    AgentSpan,
+    BaseSpan,
+    LlmSpan,
+    RetrieverSpan,
+    SpanType,
+    ToolSpan,
+    TraceSpanStatus,
+)
+def convert_chat_messages_to_input(
+    messages: list[list[Any]], **kwargs
+) -> List[Dict[str, str]]:
+    """
+    Convert LangChain chat messages to our internal format.
+    Args:
+        messages: list[list[BaseMessage]] - outer list is batches, inner is messages.
+        **kwargs: May contain invocation_params with tools definitions.
+    Returns:
+        List of dicts with 'role' and 'content' keys, matching the schema used
+        by parse_prompts_to_messages for consistency.
+    """
+    # Valid roles matching parse_prompts_to_messages
+    ROLE_MAPPING = {
+        "human": "human",
+        "user": "human",
+        "ai": "ai",
+        "assistant": "ai",
+        "system": "system",
+        "tool": "tool",
+        "function": "function",
+    }
+    result: List[Dict[str, str]] = []
+    for batch in messages:
+        for msg in batch:
+            # BaseMessage has .type (role) and .content
+            raw_role = getattr(msg, "type", "unknown")
+            content = getattr(msg, "content", "")
+            # Normalize role using same conventions as prompt parsing
+            role = ROLE_MAPPING.get(raw_role.lower(), raw_role)
+            # Convert content to string (handles empty content, lists, etc.)
+            if isinstance(content, list):
+                # Some messages have content as a list of content blocks
+                content_str = " ".join(
+                    str(c.get("text", c) if isinstance(c, dict) else c)
+                    for c in content
+                )
+            else:
+                content_str = str(content) if content else ""
+            result.append({"role": role, "content": content_str})
+    # Append tool definitions if present which matches parse_prompts_to_messages behavior
+    tools = kwargs.get("invocation_params", {}).get("tools", None)
+    if tools and isinstance(tools, list):
+        for tool in tools:
+            result.append({"role": "Tool Input", "content": str(tool)})
+    return result
 def parse_prompts_to_messages(
@@ -112,27 +184,6 @@ def safe_extract_model_name(
     return None
-from typing import Any, List, Dict, Optional, Union, Literal, Callable
-from langchain_core.outputs import ChatGeneration
-from time import perf_counter
-import uuid
-from rich.progress import Progress
-from deepeval.tracing.tracing import Observer
-from deepeval.metrics import BaseMetric
-from deepeval.tracing.context import current_span_context, current_trace_context
-from deepeval.tracing.tracing import trace_manager
-from deepeval.tracing.types import (
-    AgentSpan,
-    BaseSpan,
-    LlmSpan,
-    RetrieverSpan,
-    SpanType,
-    ToolSpan,
-    TraceSpanStatus,
-)
 def enter_current_context(
     span_type: Optional[
         Union[Literal["agent", "llm", "retriever", "tool"], str]
@@ -239,8 +290,8 @@ def enter_current_context(
     if (
         parent_span
-        and getattr(parent_span, "progress", None) is not None
-        and getattr(parent_span, "pbar_callback_id", None) is not None
+        and parent_span.progress is not None
+        and parent_span.pbar_callback_id is not None
     ):
         progress = parent_span.progress
         pbar_callback_id = parent_span.pbar_callback_id

deepeval/integrations/pydantic_ai/instrumentator.py CHANGED Viewed

@@ -40,6 +40,7 @@ try:
     from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
         OTLPSpanExporter,
     )
+    from opentelemetry.trace import set_tracer_provider
     from pydantic_ai.models.instrumented import (
         InstrumentationSettings as _BaseInstrumentationSettings,
     )
@@ -131,7 +132,12 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
     ):
         is_dependency_installed()
-        _environment = os.getenv("CONFIDENT_TRACE_ENVIRONMENT", "development")
+        if trace_manager.environment is not None:
+            _environment = trace_manager.environment
+        elif settings.CONFIDENT_TRACE_ENVIRONMENT is not None:
+            _environment = settings.CONFIDENT_TRACE_ENVIRONMENT
+        else:
+            _environment = "development"
         if _environment and _environment in [
             "production",
             "staging",
@@ -176,6 +182,12 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
                     )
                 )
             )
+        try:
+            set_tracer_provider(trace_provider)
+        except Exception as e:
+            # Handle case where provider is already set (optional warning)
+            logger.warning(f"Could not set global tracer provider: {e}")
         super().__init__(tracer_provider=trace_provider)
@@ -234,16 +246,14 @@ class SpanInterceptor(SpanProcessor):
             )
         # set agent name and metric collection
-        if span.attributes.get("agent_name"):
-            span.set_attribute("confident.span.type", "agent")
-            span.set_attribute(
-                "confident.span.name", span.attributes.get("agent_name")
-            )
-            if self.settings.agent_metric_collection:
-                span.set_attribute(
-                    "confident.span.metric_collection",
-                    self.settings.agent_metric_collection,
-                )
+        agent_name = (
+            span.attributes.get("gen_ai.agent.name")
+            or span.attributes.get("pydantic_ai.agent.name")
+            or span.attributes.get("agent_name")
+        )
+        if agent_name:
+            self._add_agent_span(span, agent_name)
         # set llm metric collection
         if span.attributes.get("gen_ai.operation.name") in [
@@ -270,6 +280,19 @@ class SpanInterceptor(SpanProcessor):
                 )
     def on_end(self, span):
+        already_processed = (
+            span.attributes.get("confident.span.type") == "agent"
+        )
+        if not already_processed:
+            agent_name = (
+                span.attributes.get("gen_ai.agent.name")
+                or span.attributes.get("pydantic_ai.agent.name")
+                or span.attributes.get("agent_name")
+            )
+            if agent_name:
+                self._add_agent_span(span, agent_name)
         if self.settings.is_test_mode:
             if span.attributes.get("confident.span.type") == "agent":
@@ -323,3 +346,12 @@ class SpanInterceptor(SpanProcessor):
                 trace.end_time = perf_counter()
                 trace_manager.traces_to_evaluate.append(trace)
                 test_exporter.clear_span_json_list()
+    def _add_agent_span(self, span, name):
+        span.set_attribute("confident.span.type", "agent")
+        span.set_attribute("confident.span.name", name)
+        if self.settings.agent_metric_collection:
+            span.set_attribute(
+                "confident.span.metric_collection",
+                self.settings.agent_metric_collection,
+            )

deepeval/integrations/pydantic_ai/otel.py CHANGED Viewed

@@ -2,6 +2,7 @@ import warnings
 from typing import Optional
 from deepeval.telemetry import capture_tracing_integration
 from deepeval.config.settings import get_settings
+import logging
 try:
     from opentelemetry import trace
@@ -24,6 +25,9 @@ def is_opentelemetry_available():
     return True
+logger = logging.getLogger(__name__)
+settings = get_settings()
 settings = get_settings()
 # OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
@@ -51,6 +55,11 @@ def instrument_pydantic_ai(api_key: Optional[str] = None):
                 )
             )
         )
+        try:
+            trace.set_tracer_provider(tracer_provider)
+        except Exception as e:
+            # Handle case where provider is already set (optional warning)
+            logger.warning(f"Could not set global tracer provider: {e}")
         # create an instrumented exporter
         from pydantic_ai.models.instrumented import InstrumentationSettings

deepeval/metrics/contextual_recall/contextual_recall.py CHANGED Viewed

@@ -23,6 +23,7 @@ from deepeval.metrics.contextual_recall.schema import (
     ContextualRecallVerdict,
     Verdicts,
     ContextualRecallScoreReason,
+    VerdictWithExpectedOutput,
 )
 from deepeval.metrics.api import metric_data_manager
@@ -93,7 +94,7 @@ class ContextualRecallMetric(BaseMetric):
                 expected_output = test_case.expected_output
                 retrieval_context = test_case.retrieval_context
-                self.verdicts: List[ContextualRecallVerdict] = (
+                self.verdicts: List[VerdictWithExpectedOutput] = (
                     self._generate_verdicts(
                         expected_output, retrieval_context, multimodal
                     )
@@ -144,7 +145,7 @@ class ContextualRecallMetric(BaseMetric):
             expected_output = test_case.expected_output
             retrieval_context = test_case.retrieval_context
-            self.verdicts: List[ContextualRecallVerdict] = (
+            self.verdicts: List[VerdictWithExpectedOutput] = (
                 await self._a_generate_verdicts(
                     expected_output, retrieval_context, multimodal
                 )
@@ -241,13 +242,13 @@ class ContextualRecallMetric(BaseMetric):
         expected_output: str,
         retrieval_context: List[str],
         multimodal: bool,
-    ) -> List[ContextualRecallVerdict]:
+    ) -> List[VerdictWithExpectedOutput]:
         prompt = self.evaluation_template.generate_verdicts(
             expected_output=expected_output,
             retrieval_context=retrieval_context,
             multimodal=multimodal,
         )
-        return await a_generate_with_schema_and_extract(
+        verdicts = await a_generate_with_schema_and_extract(
             metric=self,
             prompt=prompt,
             schema_cls=Verdicts,
@@ -256,19 +257,28 @@ class ContextualRecallMetric(BaseMetric):
                 ContextualRecallVerdict(**item) for item in data["verdicts"]
             ],
         )
+        final_verdicts = []
+        for verdict in verdicts:
+            new_verdict = VerdictWithExpectedOutput(
+                verdict=verdict.verdict,
+                reason=verdict.reason,
+                expected_output=expected_output,
+            )
+            final_verdicts.append(new_verdict)
+        return final_verdicts
     def _generate_verdicts(
         self,
         expected_output: str,
         retrieval_context: List[str],
         multimodal: bool,
-    ) -> List[ContextualRecallVerdict]:
+    ) -> List[VerdictWithExpectedOutput]:
         prompt = self.evaluation_template.generate_verdicts(
             expected_output=expected_output,
             retrieval_context=retrieval_context,
             multimodal=multimodal,
         )
-        return generate_with_schema_and_extract(
+        verdicts = generate_with_schema_and_extract(
             metric=self,
             prompt=prompt,
             schema_cls=Verdicts,
@@ -277,6 +287,15 @@ class ContextualRecallMetric(BaseMetric):
                 ContextualRecallVerdict(**item) for item in data["verdicts"]
             ],
         )
+        final_verdicts = []
+        for verdict in verdicts:
+            new_verdict = VerdictWithExpectedOutput(
+                verdict=verdict.verdict,
+                reason=verdict.reason,
+                expected_output=expected_output,
+            )
+            final_verdicts.append(new_verdict)
+        return final_verdicts
     def is_successful(self) -> bool:
         if self.error is not None:

deepeval/metrics/contextual_recall/schema.py CHANGED Viewed

@@ -7,6 +7,12 @@ class ContextualRecallVerdict(BaseModel):
     reason: str
+class VerdictWithExpectedOutput(BaseModel):
+    verdict: str
+    reason: str
+    expected_output: str
 class Verdicts(BaseModel):
     verdicts: List[ContextualRecallVerdict]

deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py CHANGED Viewed

@@ -85,7 +85,12 @@ class ImageCoherenceMetric(BaseMetric):
                 self.contexts_below = []
                 self.scores = []
                 self.reasons = []
-                for image_index in self.get_image_indices(actual_output):
+                image_indices = self.get_image_indices(actual_output)
+                if not image_indices:
+                    raise ValueError(
+                        f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                    )
+                for image_index in image_indices:
                     context_above, context_below = self.get_image_context(
                         image_index, actual_output
                     )
@@ -188,6 +193,10 @@ class ImageCoherenceMetric(BaseMetric):
             tasks = []
             image_indices = self.get_image_indices(actual_output)
+            if not image_indices:
+                raise ValueError(
+                    f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                )
             for image_index in image_indices:
                 context_above, context_below = self.get_image_context(
                     image_index, actual_output

deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py CHANGED Viewed

@@ -86,7 +86,12 @@ class ImageHelpfulnessMetric(BaseMetric):
                 self.contexts_below = []
                 self.scores = []
                 self.reasons = []
-                for image_index in self.get_image_indices(actual_output):
+                image_indices = self.get_image_indices(actual_output)
+                if not image_indices:
+                    raise ValueError(
+                        f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                    )
+                for image_index in image_indices:
                     context_above, context_below = self.get_image_context(
                         image_index, actual_output
                     )
@@ -189,6 +194,10 @@ class ImageHelpfulnessMetric(BaseMetric):
             tasks = []
             image_indices = self.get_image_indices(actual_output)
+            if not image_indices:
+                raise ValueError(
+                    f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                )
             for image_index in image_indices:
                 context_above, context_below = self.get_image_context(
                     image_index, actual_output

deepeval/metrics/multimodal_metrics/image_reference/image_reference.py CHANGED Viewed

@@ -86,7 +86,12 @@ class ImageReferenceMetric(BaseMetric):
                 self.contexts_below = []
                 self.scores = []
                 self.reasons = []
-                for image_index in self.get_image_indices(actual_output):
+                image_indices = self.get_image_indices(actual_output)
+                if not image_indices:
+                    raise ValueError(
+                        f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                    )
+                for image_index in image_indices:
                     context_above, context_below = self.get_image_context(
                         image_index, actual_output
                     )
@@ -189,6 +194,10 @@ class ImageReferenceMetric(BaseMetric):
             tasks = []
             image_indices = self.get_image_indices(actual_output)
+            if not image_indices:
+                raise ValueError(
+                    f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                )
             for image_index in image_indices:
                 context_above, context_below = self.get_image_context(
                     image_index, actual_output

deepeval/metrics/utils.py CHANGED Viewed

@@ -312,7 +312,7 @@ def check_llm_test_case_params(
                 if isinstance(ele, MLLMImage):
                     count += 1
             if count != actual_output_image_count:
-                error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
+                error_str = f"Can only evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
                 raise ValueError(error_str)
     if isinstance(test_case, LLMTestCase) is False:
@@ -320,6 +320,17 @@ def check_llm_test_case_params(
         metric.error = error_str
         raise ValueError(error_str)
+    # Centralized: if a metric requires actual_output, reject empty/whitespace
+    # (including empty multimodal outputs) as "missing params".
+    if LLMTestCaseParams.ACTUAL_OUTPUT in test_case_params:
+        actual_output = getattr(
+            test_case, LLMTestCaseParams.ACTUAL_OUTPUT.value
+        )
+        if isinstance(actual_output, str) and actual_output == "":
+            error_str = f"'actual_output' cannot be empty for the '{metric.__name__}' metric"
+            metric.error = error_str
+            raise MissingTestCaseParamsError(error_str)
     missing_params = []
     for param in test_case_params:
         if getattr(test_case, param.value) is None:

deepeval/models/llms/amazon_bedrock_model.py CHANGED Viewed

@@ -14,6 +14,7 @@ from deepeval.models.retry_policy import (
     sdk_retries_for,
 )
 from deepeval.test_case import MLLMImage
+from deepeval.errors import DeepEvalError
 from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
 from deepeval.models import DeepEvalBaseLLM
 from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
@@ -155,27 +156,28 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     def generate(
         self, prompt: str, schema: Optional[BaseModel] = None
-    ) -> Tuple[Union[str, BaseModel], float]:
+    ) -> Tuple[Union[str, BaseModel], Optional[float]]:
         return safe_asyncio_run(self.a_generate(prompt, schema))
     @retry_bedrock
     async def a_generate(
         self, prompt: str, schema: Optional[BaseModel] = None
-    ) -> Tuple[Union[str, BaseModel], float]:
+    ) -> Tuple[Union[str, BaseModel], Optional[float]]:
         if check_if_multimodal(prompt):
             prompt = convert_to_multi_modal_array(input=prompt)
             payload = self.generate_payload(prompt)
         else:
             payload = self.get_converse_request_body(prompt)
-        payload = self.get_converse_request_body(prompt)
         client = await self._ensure_client()
         response = await client.converse(
             modelId=self.get_model_name(),
             messages=payload["messages"],
             inferenceConfig=payload["inferenceConfig"],
         )
-        message = response["output"]["message"]["content"][0]["text"]
+        message = self._extract_text_from_converse_response(response)
         cost = self.calculate_cost(
             response["usage"]["inputTokens"],
             response["usage"]["outputTokens"],
@@ -206,7 +208,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
                 try:
                     image_raw_bytes = base64.b64decode(element.dataBase64)
                 except Exception:
-                    raise ValueError(
+                    raise DeepEvalError(
                         f"Invalid base64 data in MLLMImage: {element._id}"
                     )
@@ -294,6 +296,46 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     # Helpers
     ###############################################
+    @staticmethod
+    def _extract_text_from_converse_response(response: dict) -> str:
+        try:
+            content = response["output"]["message"]["content"]
+        except Exception as e:
+            raise DeepEvalError(
+                "Missing output.message.content in Bedrock response"
+            ) from e
+        # Collect any text blocks (ignore reasoning/tool blocks)
+        text_parts = []
+        for block in content:
+            if isinstance(block, dict) and "text" in block:
+                v = block.get("text")
+                if isinstance(v, str) and v.strip():
+                    text_parts.append(v)
+        if text_parts:
+            # join in case there are multiple text blocks
+            return "\n".join(text_parts)
+        # No text blocks present; raise an actionable error
+        keys = []
+        for b in content:
+            if isinstance(b, dict):
+                keys.append(list(b.keys()))
+            else:
+                keys.append(type(b).__name__)
+        stop_reason = (
+            response.get("stopReason")
+            or response.get("output", {}).get("stopReason")
+            or response.get("output", {}).get("message", {}).get("stopReason")
+        )
+        raise DeepEvalError(
+            f"Bedrock response contained no text content blocks. "
+            f"content keys={keys}, stopReason={stop_reason}"
+        )
     def get_converse_request_body(self, prompt: str) -> dict:
         return {
@@ -303,11 +345,14 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
             },
         }
-    def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
+    def calculate_cost(
+        self, input_tokens: int, output_tokens: int
+    ) -> Optional[float]:
         if self.model_data.input_price and self.model_data.output_price:
             input_cost = input_tokens * self.model_data.input_price
             output_cost = output_tokens * self.model_data.output_price
             return input_cost + output_cost
+        return None
     def load_model(self):
         pass

deepeval/models/llms/azure_model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from openai.types.chat.chat_completion import ChatCompletion
 from openai import AzureOpenAI, AsyncAzureOpenAI
-from typing import Optional, Tuple, Union, Dict, List
+from typing import Optional, Tuple, Union, Dict, List, Callable, Awaitable
 from pydantic import BaseModel, SecretStr
 from deepeval.errors import DeepEvalError
@@ -42,6 +42,10 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
         model: Optional[str] = None,
         api_key: Optional[str] = None,
         base_url: Optional[str] = None,
+        azure_ad_token_provider: Optional[
+            Callable[[], "str | Awaitable[str]"]
+        ] = None,
+        azure_ad_token: Optional[str] = None,
         temperature: Optional[float] = None,
         cost_per_input_token: Optional[float] = None,
         cost_per_output_token: Optional[float] = None,
@@ -67,12 +71,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
         model = model or settings.AZURE_MODEL_NAME
         deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
+        self.azure_ad_token_provider = azure_ad_token_provider
         if api_key is not None:
             # keep it secret, keep it safe from serializings, logging and alike
             self.api_key: Optional[SecretStr] = SecretStr(api_key)
         else:
             self.api_key = settings.AZURE_OPENAI_API_KEY
+        if azure_ad_token is not None:
+            self.azure_ad_token = azure_ad_token
+        else:
+            self.azure_ad_token = settings.AZURE_OPENAI_AD_TOKEN
         api_version = api_version or settings.OPENAI_API_VERSION
         if base_url is not None:
             base_url = str(base_url).rstrip("/")
@@ -431,18 +442,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
         return kwargs
     def _build_client(self, cls):
-        api_key = require_secret_api_key(
-            self.api_key,
-            provider_label="AzureOpenAI",
-            env_var_name="AZURE_OPENAI_API_KEY",
-            param_hint="`api_key` to AzureOpenAIModel(...)",
-        )
+        # Only require the API key / Azure ad token if no token provider is supplied
+        azure_ad_token = None
+        api_key = None
+        if self.azure_ad_token_provider is None:
+            if self.azure_ad_token is not None:
+                azure_ad_token = require_secret_api_key(
+                    self.azure_ad_token,
+                    provider_label="AzureOpenAI",
+                    env_var_name="AZURE_OPENAI_AD_TOKEN",
+                    param_hint="`azure_ad_token` to AzureOpenAIModel(...)",
+                )
+            else:
+                api_key = require_secret_api_key(
+                    self.api_key,
+                    provider_label="AzureOpenAI",
+                    env_var_name="AZURE_OPENAI_API_KEY",
+                    param_hint="`api_key` to AzureOpenAIModel(...)",
+                )
         kw = dict(
             api_key=api_key,
             api_version=self.api_version,
             azure_endpoint=self.base_url,
             azure_deployment=self.deployment_name,
+            azure_ad_token_provider=self.azure_ad_token_provider,
+            azure_ad_token=azure_ad_token,
             **self._client_kwargs(),
         )
         try:

deepeval/models/llms/gemini_model.py CHANGED Viewed

@@ -65,6 +65,7 @@ class GeminiModel(DeepEvalBaseLLM):
         project: Optional[str] = None,
         location: Optional[str] = None,
         service_account_key: Optional[Union[str, Dict[str, str]]] = None,
+        use_vertexai: Optional[bool] = None,
         generation_kwargs: Optional[Dict] = None,
         **kwargs,
     ):
@@ -93,7 +94,11 @@ class GeminiModel(DeepEvalBaseLLM):
             location if location is not None else settings.GOOGLE_CLOUD_LOCATION
         )
         self.location = str(location).strip() if location is not None else None
-        self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
+        self.use_vertexai = (
+            use_vertexai
+            if use_vertexai is not None
+            else settings.GOOGLE_GENAI_USE_VERTEXAI
+        )
         self.service_account_key: Optional[SecretStr] = None
         if service_account_key is None:

deepeval 3.8.0__py3-none-any.whl → 3.8.2__py3-none-any.whl

deepeval 3.8.0py3-none-any.whl → 3.8.2py3-none-any.whl