PyPI - deepeval - Versions diffs - 3.4.8__tar.gz → 3.5.0__tar.gz - Mend

deepeval 3.4.8tar.gz → 3.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (470) hide show

{deepeval-3.4.8 → deepeval-3.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepeval
-Version: 3.4.8
+Version: 3.5.0
 Summary: The LLM Evaluation Framework
 Home-page: https://github.com/confident-ai/deepeval
 License: Apache-2.0
@@ -25,6 +25,8 @@ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.24.0,<2.0.0)
 Requires-Dist: opentelemetry-sdk (>=1.24.0,<2.0.0)
 Requires-Dist: portalocker
 Requires-Dist: posthog (>=6.3.0,<7.0.0)
+Requires-Dist: pydantic (>=2.11.7,<3.0.0)
+Requires-Dist: pydantic-settings (>=2.10.1,<3.0.0)
 Requires-Dist: pyfiglet
 Requires-Dist: pytest
 Requires-Dist: pytest-asyncio
@@ -187,16 +189,6 @@ Let's pretend your LLM application is a RAG based customer support chatbot; here
 ```
 pip install -U deepeval
 ```
-### Environment variables (.env / .env.local)
-DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
-**Precedence:** process env -> `.env.local` -> `.env`.
-Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
-```bash
-cp .env.example .env.local
-# then edit .env.local (ignored by git)
-```
 ## Create an account (highly recommended)
@@ -389,9 +381,20 @@ evaluate(dataset, [answer_relevancy_metric])
 dataset.evaluate([answer_relevancy_metric])
 ```
-# LLM Evaluation With Confident AI
+## A Note on Env Variables (.env / .env.local)
+DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
+**Precedence:** process env -> `.env.local` -> `.env`.
+Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
+```bash
+cp .env.example .env.local
+# then edit .env.local (ignored by git)
+```
+# DeepEval With Confident AI
-The correct LLM evaluation lifecycle is only achievable with [the DeepEval platform](https://confident-ai.com?utm_source=Github). It allows you to:
+DeepEval's cloud platform, [Confident AI](https://confident-ai.com?utm_source=Github), allows you to:
 1. Curate/annotate evaluation datasets on the cloud
 2. Benchmark LLM app using dataset, and compare with previous iterations to experiment which models/prompts works best

{deepeval-3.4.8 → deepeval-3.5.0}/README.md RENAMED Viewed

@@ -140,16 +140,6 @@ Let's pretend your LLM application is a RAG based customer support chatbot; here
 ```
 pip install -U deepeval
 ```
-### Environment variables (.env / .env.local)
-DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
-**Precedence:** process env -> `.env.local` -> `.env`.
-Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
-```bash
-cp .env.example .env.local
-# then edit .env.local (ignored by git)
-```
 ## Create an account (highly recommended)
@@ -342,9 +332,20 @@ evaluate(dataset, [answer_relevancy_metric])
 dataset.evaluate([answer_relevancy_metric])
 ```
-# LLM Evaluation With Confident AI
+## A Note on Env Variables (.env / .env.local)
+DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
+**Precedence:** process env -> `.env.local` -> `.env`.
+Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
+```bash
+cp .env.example .env.local
+# then edit .env.local (ignored by git)
+```
+# DeepEval With Confident AI
-The correct LLM evaluation lifecycle is only achievable with [the DeepEval platform](https://confident-ai.com?utm_source=Github). It allows you to:
+DeepEval's cloud platform, [Confident AI](https://confident-ai.com?utm_source=Github), allows you to:
 1. Curate/annotate evaluation datasets on the cloud
 2. Benchmark LLM app using dataset, and compare with previous iterations to experiment which models/prompts works best

{deepeval-3.4.8 → deepeval-3.5.0}/deepeval/__init__.py RENAMED Viewed

@@ -3,9 +3,9 @@ import warnings
 import re
 # load environment variables before other imports
-from .env import autoload_dotenv as _autoload_dotenv
+from deepeval.config.settings import autoload_dotenv, get_settings
-_autoload_dotenv()
+autoload_dotenv()
 from ._version import __version__
 from deepeval.evaluate import evaluate, assert_test
@@ -14,9 +14,12 @@ from deepeval.test_run import on_test_run_end, log_hyperparameters
 from deepeval.utils import login
 from deepeval.telemetry import *
-if os.getenv("DEEPEVAL_GRPC_LOGGING") != "1":
-    os.environ["GRPC_VERBOSITY"] = "ERROR"
-    os.environ["GRPC_TRACE"] = ""
+settings = get_settings()
+if not settings.DEEPEVAL_GRPC_LOGGING:
+    os.environ.setdefault("GRPC_VERBOSITY", "ERROR")
+    os.environ.setdefault("GRPC_TRACE", "")
 __all__ = [
     "login",

deepeval-3.5.0/deepeval/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__: str = "3.5.0"

{deepeval-3.4.8 → deepeval-3.5.0}/deepeval/benchmarks/drop/drop.py RENAMED Viewed

@@ -1,6 +1,5 @@
-from typing import List, Optional, Dict
+from typing import List, Optional, Dict, Union
 from tqdm import tqdm
-from typing import Union
 from deepeval.dataset import Golden
 from deepeval.benchmarks.base_benchmark import (
@@ -50,7 +49,7 @@ class DROP(DeepEvalBaseBenchmark):
         self,
         model: DeepEvalBaseLLM,
         *args,
-        batch_size: int | None = None,
+        batch_size: Union[int, None] = None,
         **kwargs,
     ) -> DeepEvalBaseBenchmarkResult:
         import pandas as pd

{deepeval-3.4.8 → deepeval-3.5.0}/deepeval/benchmarks/hellaswag/hellaswag.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Union
 from tqdm import tqdm
 from deepeval.dataset import Golden
@@ -51,7 +51,7 @@ class HellaSwag(DeepEvalBaseBenchmark):
         self,
         model: DeepEvalBaseLLM,
         *args,
-        batch_size: int | None = None,
+        batch_size: Union[int, None] = None,
         **kwargs,
     ) -> DeepEvalBaseBenchmarkResult:
         import pandas as pd

{deepeval-3.4.8 → deepeval-3.5.0}/deepeval/benchmarks/logi_qa/logi_qa.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Dict
+from typing import List, Optional, Dict, Union
 from tqdm import tqdm
 import requests
 import json
@@ -52,7 +52,7 @@ class LogiQA(DeepEvalBaseBenchmark):
         self,
         model: DeepEvalBaseLLM,
         *args,
-        batch_size: int | None = None,
+        batch_size: Union[int, None] = None,
         **kwargs,
     ) -> DeepEvalBaseBenchmarkResult:
         import pandas as pd

{deepeval-3.4.8 → deepeval-3.5.0}/deepeval/benchmarks/math_qa/math_qa.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Dict
+from typing import List, Optional, Dict, Union
 from tqdm import tqdm
 from deepeval.dataset import Golden
@@ -50,7 +50,7 @@ class MathQA(DeepEvalBaseBenchmark):
         self,
         model: DeepEvalBaseLLM,
         *args,
-        batch_size: int | None = None,
+        batch_size: Union[int, None] = None,
         **kwargs,
     ) -> DeepEvalBaseBenchmarkResult:
         import pandas as pd

{deepeval-3.4.8 → deepeval-3.5.0}/deepeval/benchmarks/mmlu/mmlu.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Dict
+from typing import List, Optional, Dict, Union
 from tqdm import tqdm
 from deepeval.dataset import Golden
@@ -49,7 +49,7 @@ class MMLU(DeepEvalBaseBenchmark):
         self,
         model: DeepEvalBaseLLM,
         *args,
-        batch_size: int | None = None,
+        batch_size: Union[int, None] = None,
         **kwargs,
     ) -> DeepEvalBaseBenchmarkResult:
         import pandas as pd

{deepeval-3.4.8 → deepeval-3.5.0}/deepeval/benchmarks/truthful_qa/truthful_qa.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Union
 from tqdm import tqdm
 from deepeval.dataset import Golden
@@ -59,7 +59,7 @@ class TruthfulQA(DeepEvalBaseBenchmark):
         self,
         model: DeepEvalBaseLLM,
         *args,
-        batch_size: int | None = None,
+        batch_size: Union[int, None] = None,
         **kwargs,
     ) -> DeepEvalBaseBenchmarkResult:
         import pandas as pd

deepeval 3.4.8__tar.gz → 3.5.0__tar.gz

deepeval 3.4.8tar.gz → 3.5.0tar.gz