PyPI - deepeval - Versions diffs - 3.8.0__tar.gz → 3.8.1__tar.gz - Mend

deepeval 3.8.0tar.gz → 3.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (527) hide show

{deepeval-3.8.0 → deepeval-3.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepeval
-Version: 3.8.0
+Version: 3.8.1
 Summary: The LLM Evaluation Framework
 Home-page: https://github.com/confident-ai/deepeval
 License: Apache-2.0

deepeval-3.8.1/deepeval/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__: str = "3.8.1"

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/annotation.py RENAMED Viewed

@@ -14,7 +14,7 @@ def send_annotation(
     explanation: Optional[str] = None,
     user_id: Optional[str] = None,
     type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
-) -> str:
+) -> None:
     api_annotation = APIAnnotation(
         rating=rating,
         traceUuid=trace_uuid,
@@ -50,7 +50,7 @@ async def a_send_annotation(
     explanation: Optional[str] = None,
     type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
     user_id: Optional[str] = None,
-) -> str:
+) -> None:
     api_annotation = APIAnnotation(
         rating=rating,
         traceUuid=trace_uuid,

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/settings.py RENAMED Viewed

@@ -447,6 +447,9 @@ class Settings(BaseSettings):
     AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
         None, description="Azure OpenAI API key."
     )
+    AZURE_OPENAI_AD_TOKEN: Optional[SecretStr] = Field(
+        None, description="Azure OpenAI Ad Token."
+    )
     AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
         None, description="Azure OpenAI endpoint URL."
     )

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/dataset.py RENAMED Viewed

@@ -84,9 +84,11 @@ class EvaluationDataset:
     def __init__(
         self,
         goldens: Union[List[Golden], List[ConversationalGolden]] = [],
+        confident_api_key: Optional[str] = None,
     ):
         self._alias = None
         self._id = None
+        self.confident_api_key = confident_api_key
         if len(goldens) > 0:
             self._multi_turn = (
                 True if isinstance(goldens[0], ConversationalGolden) else False
@@ -722,7 +724,7 @@ class EvaluationDataset:
                 "Unable to push empty dataset to Confident AI, there must be at least one golden in dataset."
             )
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         api_dataset = APIDataset(
             goldens=self.goldens if not self._multi_turn else None,
             conversationalGoldens=(self.goldens if self._multi_turn else None),
@@ -755,7 +757,7 @@ class EvaluationDataset:
         auto_convert_goldens_to_test_cases: bool = False,
         public: bool = False,
     ):
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         with capture_pull_dataset():
             with Progress(
                 SpinnerColumn(style="rgb(106,0,255)"),
@@ -839,7 +841,7 @@ class EvaluationDataset:
             raise ValueError(
                 f"Can't queue empty list of goldens to dataset with alias: {alias} on Confident AI."
             )
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         multi_turn = isinstance(goldens[0], ConversationalGolden)
@@ -871,7 +873,7 @@ class EvaluationDataset:
         self,
         alias: str,
     ):
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         api.send_request(
             method=HttpMethods.DELETE,
             endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/callback.py RENAMED Viewed

@@ -539,4 +539,4 @@ class CallbackHandler(BaseCallbackHandler):
         with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
             retriever_span.status = TraceSpanStatus.ERRORED
             retriever_span.error = str(error)
-            exit_current_context(uuid_str=uuid_str)
+            exit_current_context(uuid_str=uuid_str)

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/contextual_recall.py RENAMED Viewed

@@ -23,6 +23,7 @@ from deepeval.metrics.contextual_recall.schema import (
     ContextualRecallVerdict,
     Verdicts,
     ContextualRecallScoreReason,
+    VerdictWithExpectedOutput,
 )
 from deepeval.metrics.api import metric_data_manager
@@ -93,7 +94,7 @@ class ContextualRecallMetric(BaseMetric):
                 expected_output = test_case.expected_output
                 retrieval_context = test_case.retrieval_context
-                self.verdicts: List[ContextualRecallVerdict] = (
+                self.verdicts: List[VerdictWithExpectedOutput] = (
                     self._generate_verdicts(
                         expected_output, retrieval_context, multimodal
                     )
@@ -144,7 +145,7 @@ class ContextualRecallMetric(BaseMetric):
             expected_output = test_case.expected_output
             retrieval_context = test_case.retrieval_context
-            self.verdicts: List[ContextualRecallVerdict] = (
+            self.verdicts: List[VerdictWithExpectedOutput] = (
                 await self._a_generate_verdicts(
                     expected_output, retrieval_context, multimodal
                 )
@@ -241,13 +242,13 @@ class ContextualRecallMetric(BaseMetric):
         expected_output: str,
         retrieval_context: List[str],
         multimodal: bool,
-    ) -> List[ContextualRecallVerdict]:
+    ) -> List[VerdictWithExpectedOutput]:
         prompt = self.evaluation_template.generate_verdicts(
             expected_output=expected_output,
             retrieval_context=retrieval_context,
             multimodal=multimodal,
         )
-        return await a_generate_with_schema_and_extract(
+        verdicts = await a_generate_with_schema_and_extract(
             metric=self,
             prompt=prompt,
             schema_cls=Verdicts,
@@ -256,19 +257,28 @@ class ContextualRecallMetric(BaseMetric):
                 ContextualRecallVerdict(**item) for item in data["verdicts"]
             ],
         )
+        final_verdicts = []
+        for verdict in verdicts:
+            new_verdict = VerdictWithExpectedOutput(
+                verdict=verdict.verdict,
+                reason=verdict.reason,
+                expected_output=expected_output,
+            )
+            final_verdicts.append(new_verdict)
+        return final_verdicts
     def _generate_verdicts(
         self,
         expected_output: str,
         retrieval_context: List[str],
         multimodal: bool,
-    ) -> List[ContextualRecallVerdict]:
+    ) -> List[VerdictWithExpectedOutput]:
         prompt = self.evaluation_template.generate_verdicts(
             expected_output=expected_output,
             retrieval_context=retrieval_context,
             multimodal=multimodal,
         )
-        return generate_with_schema_and_extract(
+        verdicts = generate_with_schema_and_extract(
             metric=self,
             prompt=prompt,
             schema_cls=Verdicts,
@@ -277,6 +287,15 @@ class ContextualRecallMetric(BaseMetric):
                 ContextualRecallVerdict(**item) for item in data["verdicts"]
             ],
         )
+        final_verdicts = []
+        for verdict in verdicts:
+            new_verdict = VerdictWithExpectedOutput(
+                verdict=verdict.verdict,
+                reason=verdict.reason,
+                expected_output=expected_output,
+            )
+            final_verdicts.append(new_verdict)
+        return final_verdicts
     def is_successful(self) -> bool:
         if self.error is not None:

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/schema.py RENAMED Viewed

@@ -7,6 +7,12 @@ class ContextualRecallVerdict(BaseModel):
     reason: str
+class VerdictWithExpectedOutput(BaseModel):
+    verdict: str
+    reason: str
+    expected_output: str
 class Verdicts(BaseModel):
     verdicts: List[ContextualRecallVerdict]

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py RENAMED Viewed

@@ -85,7 +85,12 @@ class ImageCoherenceMetric(BaseMetric):
                 self.contexts_below = []
                 self.scores = []
                 self.reasons = []
-                for image_index in self.get_image_indices(actual_output):
+                image_indices = self.get_image_indices(actual_output)
+                if not image_indices:
+                    raise ValueError(
+                        f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                    )
+                for image_index in image_indices:
                     context_above, context_below = self.get_image_context(
                         image_index, actual_output
                     )
@@ -188,6 +193,10 @@ class ImageCoherenceMetric(BaseMetric):
             tasks = []
             image_indices = self.get_image_indices(actual_output)
+            if not image_indices:
+                raise ValueError(
+                    f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                )
             for image_index in image_indices:
                 context_above, context_below = self.get_image_context(
                     image_index, actual_output

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py RENAMED Viewed

@@ -86,7 +86,12 @@ class ImageHelpfulnessMetric(BaseMetric):
                 self.contexts_below = []
                 self.scores = []
                 self.reasons = []
-                for image_index in self.get_image_indices(actual_output):
+                image_indices = self.get_image_indices(actual_output)
+                if not image_indices:
+                    raise ValueError(
+                        f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                    )
+                for image_index in image_indices:
                     context_above, context_below = self.get_image_context(
                         image_index, actual_output
                     )
@@ -189,6 +194,10 @@ class ImageHelpfulnessMetric(BaseMetric):
             tasks = []
             image_indices = self.get_image_indices(actual_output)
+            if not image_indices:
+                raise ValueError(
+                    f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                )
             for image_index in image_indices:
                 context_above, context_below = self.get_image_context(
                     image_index, actual_output

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py RENAMED Viewed

@@ -86,7 +86,12 @@ class ImageReferenceMetric(BaseMetric):
                 self.contexts_below = []
                 self.scores = []
                 self.reasons = []
-                for image_index in self.get_image_indices(actual_output):
+                image_indices = self.get_image_indices(actual_output)
+                if not image_indices:
+                    raise ValueError(
+                        f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                    )
+                for image_index in image_indices:
                     context_above, context_below = self.get_image_context(
                         image_index, actual_output
                     )
@@ -189,6 +194,10 @@ class ImageReferenceMetric(BaseMetric):
             tasks = []
             image_indices = self.get_image_indices(actual_output)
+            if not image_indices:
+                raise ValueError(
+                    f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
+                )
             for image_index in image_indices:
                 context_above, context_below = self.get_image_context(
                     image_index, actual_output

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/utils.py RENAMED Viewed

@@ -312,7 +312,7 @@ def check_llm_test_case_params(
                 if isinstance(ele, MLLMImage):
                     count += 1
             if count != actual_output_image_count:
-                error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
+                error_str = f"Can only evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
                 raise ValueError(error_str)
     if isinstance(test_case, LLMTestCase) is False:

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/amazon_bedrock_model.py RENAMED Viewed

@@ -14,6 +14,7 @@ from deepeval.models.retry_policy import (
     sdk_retries_for,
 )
 from deepeval.test_case import MLLMImage
+from deepeval.errors import DeepEvalError
 from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
 from deepeval.models import DeepEvalBaseLLM
 from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
@@ -155,27 +156,28 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     def generate(
         self, prompt: str, schema: Optional[BaseModel] = None
-    ) -> Tuple[Union[str, BaseModel], float]:
+    ) -> Tuple[Union[str, BaseModel], Optional[float]]:
         return safe_asyncio_run(self.a_generate(prompt, schema))
     @retry_bedrock
     async def a_generate(
         self, prompt: str, schema: Optional[BaseModel] = None
-    ) -> Tuple[Union[str, BaseModel], float]:
+    ) -> Tuple[Union[str, BaseModel], Optional[float]]:
         if check_if_multimodal(prompt):
             prompt = convert_to_multi_modal_array(input=prompt)
             payload = self.generate_payload(prompt)
         else:
             payload = self.get_converse_request_body(prompt)
-        payload = self.get_converse_request_body(prompt)
         client = await self._ensure_client()
         response = await client.converse(
             modelId=self.get_model_name(),
             messages=payload["messages"],
             inferenceConfig=payload["inferenceConfig"],
         )
-        message = response["output"]["message"]["content"][0]["text"]
+        message = self._extract_text_from_converse_response(response)
         cost = self.calculate_cost(
             response["usage"]["inputTokens"],
             response["usage"]["outputTokens"],
@@ -206,7 +208,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
                 try:
                     image_raw_bytes = base64.b64decode(element.dataBase64)
                 except Exception:
-                    raise ValueError(
+                    raise DeepEvalError(
                         f"Invalid base64 data in MLLMImage: {element._id}"
                     )
@@ -294,6 +296,46 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     # Helpers
     ###############################################
+    @staticmethod
+    def _extract_text_from_converse_response(response: dict) -> str:
+        try:
+            content = response["output"]["message"]["content"]
+        except Exception as e:
+            raise DeepEvalError(
+                "Missing output.message.content in Bedrock response"
+            ) from e
+        # Collect any text blocks (ignore reasoning/tool blocks)
+        text_parts = []
+        for block in content:
+            if isinstance(block, dict) and "text" in block:
+                v = block.get("text")
+                if isinstance(v, str) and v.strip():
+                    text_parts.append(v)
+        if text_parts:
+            # join in case there are multiple text blocks
+            return "\n".join(text_parts)
+        # No text blocks present; raise an actionable error
+        keys = []
+        for b in content:
+            if isinstance(b, dict):
+                keys.append(list(b.keys()))
+            else:
+                keys.append(type(b).__name__)
+        stop_reason = (
+            response.get("stopReason")
+            or response.get("output", {}).get("stopReason")
+            or response.get("output", {}).get("message", {}).get("stopReason")
+        )
+        raise DeepEvalError(
+            f"Bedrock response contained no text content blocks. "
+            f"content keys={keys}, stopReason={stop_reason}"
+        )
     def get_converse_request_body(self, prompt: str) -> dict:
         return {
@@ -303,11 +345,14 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
             },
         }
-    def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
+    def calculate_cost(
+        self, input_tokens: int, output_tokens: int
+    ) -> Optional[float]:
         if self.model_data.input_price and self.model_data.output_price:
             input_cost = input_tokens * self.model_data.input_price
             output_cost = output_tokens * self.model_data.output_price
             return input_cost + output_cost
+        return None
     def load_model(self):
         pass

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/azure_model.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from openai.types.chat.chat_completion import ChatCompletion
 from openai import AzureOpenAI, AsyncAzureOpenAI
-from typing import Optional, Tuple, Union, Dict, List
+from typing import Optional, Tuple, Union, Dict, List, Callable, Awaitable
 from pydantic import BaseModel, SecretStr
 from deepeval.errors import DeepEvalError
@@ -42,6 +42,10 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
         model: Optional[str] = None,
         api_key: Optional[str] = None,
         base_url: Optional[str] = None,
+        azure_ad_token_provider: Optional[
+            Callable[[], "str | Awaitable[str]"]
+        ] = None,
+        azure_ad_token: Optional[str] = None,
         temperature: Optional[float] = None,
         cost_per_input_token: Optional[float] = None,
         cost_per_output_token: Optional[float] = None,
@@ -67,12 +71,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
         model = model or settings.AZURE_MODEL_NAME
         deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
+        self.azure_ad_token_provider = azure_ad_token_provider
         if api_key is not None:
             # keep it secret, keep it safe from serializings, logging and alike
             self.api_key: Optional[SecretStr] = SecretStr(api_key)
         else:
             self.api_key = settings.AZURE_OPENAI_API_KEY
+        if azure_ad_token is not None:
+            self.azure_ad_token = azure_ad_token
+        else:
+            self.azure_ad_token = settings.AZURE_OPENAI_AD_TOKEN
         api_version = api_version or settings.OPENAI_API_VERSION
         if base_url is not None:
             base_url = str(base_url).rstrip("/")
@@ -431,18 +442,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
         return kwargs
     def _build_client(self, cls):
-        api_key = require_secret_api_key(
-            self.api_key,
-            provider_label="AzureOpenAI",
-            env_var_name="AZURE_OPENAI_API_KEY",
-            param_hint="`api_key` to AzureOpenAIModel(...)",
-        )
+        # Only require the API key / Azure ad token if no token provider is supplied
+        azure_ad_token = None
+        api_key = None
+        if self.azure_ad_token_provider is None:
+            if self.azure_ad_token is not None:
+                azure_ad_token = require_secret_api_key(
+                    self.azure_ad_token,
+                    provider_label="AzureOpenAI",
+                    env_var_name="AZURE_OPENAI_AD_TOKEN",
+                    param_hint="`azure_ad_token` to AzureOpenAIModel(...)",
+                )
+            else:
+                api_key = require_secret_api_key(
+                    self.api_key,
+                    provider_label="AzureOpenAI",
+                    env_var_name="AZURE_OPENAI_API_KEY",
+                    param_hint="`api_key` to AzureOpenAIModel(...)",
+                )
         kw = dict(
             api_key=api_key,
             api_version=self.api_version,
             azure_endpoint=self.base_url,
             azure_deployment=self.deployment_name,
+            azure_ad_token_provider=self.azure_ad_token_provider,
+            azure_ad_token=azure_ad_token,
             **self._client_kwargs(),
         )
         try:

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/gemini_model.py RENAMED Viewed

@@ -65,6 +65,7 @@ class GeminiModel(DeepEvalBaseLLM):
         project: Optional[str] = None,
         location: Optional[str] = None,
         service_account_key: Optional[Union[str, Dict[str, str]]] = None,
+        use_vertexai: Optional[bool] = None,
         generation_kwargs: Optional[Dict] = None,
         **kwargs,
     ):
@@ -93,7 +94,11 @@ class GeminiModel(DeepEvalBaseLLM):
             location if location is not None else settings.GOOGLE_CLOUD_LOCATION
         )
         self.location = str(location).strip() if location is not None else None
-        self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
+        self.use_vertexai = (
+            use_vertexai
+            if use_vertexai is not None
+            else settings.GOOGLE_GENAI_USE_VERTEXAI
+        )
         self.service_account_key: Optional[SecretStr] = None
         if service_account_key is None:

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/prompt.py RENAMED Viewed

@@ -114,6 +114,7 @@ class Prompt:
         output_type: Optional[OutputType] = None,
         output_schema: Optional[Type[BaseModel]] = None,
         interpolation_type: Optional[PromptInterpolationType] = None,
+        confident_api_key: Optional[str] = None,
     ):
         if text_template and messages_template:
             raise TypeError(
@@ -129,6 +130,7 @@ class Prompt:
         self.interpolation_type: PromptInterpolationType = (
             interpolation_type or PromptInterpolationType.FSTRING
         )
+        self.confident_api_key = confident_api_key
         self._version = None
         self._prompt_version_id: Optional[str] = None
@@ -244,7 +246,7 @@ class Prompt:
             raise ValueError(
                 "Prompt alias is not set. Please set an alias to continue."
             )
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         data, _ = api.send_request(
             method=HttpMethods.GET,
             endpoint=Endpoints.PROMPTS_VERSIONS_ENDPOINT,
@@ -496,7 +498,7 @@ class Prompt:
             except Exception:
                 pass
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         with Progress(
             SpinnerColumn(style="rgb(106,0,255)"),
             BarColumn(bar_width=60),
@@ -635,7 +637,7 @@ class Prompt:
             # Pydantic version below 2.0
             body = body.dict(by_alias=True, exclude_none=True)
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         _, link = api.send_request(
             method=HttpMethods.POST,
             endpoint=Endpoints.PROMPTS_ENDPOINT,
@@ -692,7 +694,7 @@ class Prompt:
             )
         except AttributeError:
             body = body.dict(by_alias=True, exclude_none=True)
-        api = Api()
+        api = Api(api_key=self.confident_api_key)
         data, _ = api.send_request(
             method=HttpMethods.PUT,
             endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
@@ -765,7 +767,7 @@ class Prompt:
         while True:
             await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
-            api = Api()
+            api = Api(api_key=self.confident_api_key)
             try:
                 if label:
                     data, _ = api.send_request(

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/llm_test_case.py RENAMED Viewed

@@ -386,6 +386,7 @@ class LLMTestCase(BaseModel):
                 [
                     re.search(pattern, self.input or "") is not None,
                     re.search(pattern, self.actual_output or "") is not None,
+                    re.search(pattern, self.expected_output or "") is not None,
                 ]
             )
             if isinstance(self.input, str)

{deepeval-3.8.0 → deepeval-3.8.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepeval"
-version = "3.8.0"
+version = "3.8.1"
 description = "The LLM Evaluation Framework"
 authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
 license = "Apache-2.0"

deepeval-3.8.0/deepeval/_version.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__: str = "3.8.0"

{deepeval-3.8.0 → deepeval-3.8.1}/LICENSE.md RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/README.md RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/api.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/extractors.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/patch.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/utils.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/arc.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/mode.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/template.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/base_benchmark.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/bbq.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/task.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/template.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt RENAMED Viewed

File without changes

{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt RENAMED Viewed

File without changes

deepeval 3.8.0__tar.gz → 3.8.1__tar.gz

deepeval 3.8.0tar.gz → 3.8.1tar.gz