PyPI - deepeval - Versions diffs - 3.7.0__py3-none-any.whl → 3.7.2__py3-none-any.whl - Mend

deepeval 3.7.0py3-none-any.whl → 3.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

deepeval/__init__.py +0 -4
deepeval/_version.py +1 -1
deepeval/cli/main.py +7 -0
deepeval/confident/api.py +6 -1
deepeval/config/settings.py +5 -0
deepeval/evaluate/compare.py +219 -4
deepeval/evaluate/types.py +6 -0
deepeval/evaluate/utils.py +30 -0
deepeval/key_handler.py +1 -0
deepeval/metrics/arena_g_eval/arena_g_eval.py +5 -1
deepeval/metrics/arena_g_eval/utils.py +5 -5
deepeval/metrics/conversational_g_eval/conversational_g_eval.py +9 -18
deepeval/metrics/g_eval/g_eval.py +5 -1
deepeval/metrics/g_eval/utils.py +1 -1
deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +5 -1
deepeval/metrics/utils.py +1 -1
deepeval/models/llms/gemini_model.py +27 -5
deepeval/openai_agents/callback_handler.py +12 -3
deepeval/prompt/prompt.py +25 -14
deepeval/simulator/template.py +1 -1
deepeval/synthesizer/config.py +9 -0
deepeval/synthesizer/schema.py +23 -0
deepeval/synthesizer/synthesizer.py +1137 -2
deepeval/synthesizer/templates/__init__.py +11 -2
deepeval/synthesizer/templates/template.py +554 -1
deepeval/synthesizer/templates/template_extraction.py +32 -0
deepeval/synthesizer/templates/template_prompt.py +262 -0
deepeval/test_case/__init__.py +2 -1
deepeval/test_case/arena_test_case.py +15 -4
deepeval/test_case/mllm_test_case.py +45 -22
deepeval/test_run/cache.py +31 -10
deepeval/test_run/hyperparameters.py +5 -1
deepeval/test_run/test_run.py +28 -9
deepeval/tracing/tracing.py +1 -1
deepeval/utils.py +4 -0
{deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/METADATA +3 -2
{deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/RECORD +40 -40
{deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/LICENSE.md +0 -0
{deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/WHEEL +0 -0
{deepeval-3.7.0.dist-info → deepeval-3.7.2.dist-info}/entry_points.txt +0 -0

deepeval/__init__.py CHANGED Viewed

@@ -102,9 +102,5 @@ def update_warning_opt_in():
     return os.getenv("DEEPEVAL_UPDATE_WARNING_OPT_IN") == "1"
-def is_read_only_env():
-    return os.getenv("DEEPEVAL_FILE_SYSTEM") == "READ_ONLY"
 if update_warning_opt_in():
     check_for_update()

deepeval/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__: str = "3.7.0"
1	+ __version__: str = "3.7.2"

deepeval/cli/main.py CHANGED Viewed

@@ -1484,6 +1484,11 @@ def set_gemini_model_env(
     google_cloud_location: Optional[str] = typer.Option(
         None, "--location", help="Google Cloud location"
     ),
+    google_service_account_key: Optional[str] = typer.Option(
+        None,
+        "--service-account-key",
+        help="Google Service Account Key for Gemini",
+    ),
     save: Optional[str] = typer.Option(
         None,
         "--save",
@@ -1513,6 +1518,8 @@ def set_gemini_model_env(
             settings.GOOGLE_CLOUD_PROJECT = google_cloud_project
         if google_cloud_location:
             settings.GOOGLE_CLOUD_LOCATION = google_cloud_location
+        if google_service_account_key:
+            settings.GOOGLE_SERVICE_ACCOUNT_KEY = google_service_account_key
         if model_name:
             settings.GEMINI_MODEL_NAME = model_name

deepeval/confident/api.py CHANGED Viewed

@@ -27,6 +27,10 @@ retryable_exceptions = requests.exceptions.SSLError
 def get_base_api_url():
+    s = get_settings()
+    if s.CONFIDENT_BASE_URL:
+        base_url = s.CONFIDENT_BASE_URL.rstrip("/")
+        return base_url
     region = KEY_FILE_HANDLER.fetch_data(KeyValues.CONFIDENT_REGION)
     if region == "EU":
         return API_BASE_URL_EU
@@ -87,6 +91,7 @@ class Endpoints(Enum):
     DATASET_ALIAS_QUEUE_ENDPOINT = "/v1/datasets/:alias/queue"
     TEST_RUN_ENDPOINT = "/v1/test-run"
+    EXPERIMENT_ENDPOINT = "/v1/experiment"
     METRIC_DATA_ENDPOINT = "/v1/metric-data"
     TRACES_ENDPOINT = "/v1/traces"
     ANNOTATIONS_ENDPOINT = "/v1/annotations"
@@ -115,7 +120,7 @@ class Api:
         self.api_key = api_key
         self._headers = {
             "Content-Type": "application/json",
-            "CONFIDENT_API_KEY": api_key,
+            "CONFIDENT-API-KEY": api_key,
             "X-DeepEval-Version": deepeval.__version__,
         }
         self.base_api_url = get_base_api_url()

deepeval/config/settings.py CHANGED Viewed

@@ -229,6 +229,11 @@ class Settings(BaseSettings):
     API_KEY: Optional[SecretStr] = None
     CONFIDENT_API_KEY: Optional[SecretStr] = None
+    # ======
+    # Base URL for Confident AI API server
+    # ======
+    CONFIDENT_BASE_URL: Optional[str] = None
     # General
     TEMPERATURE: Optional[confloat(ge=0, le=2)] = None

deepeval/evaluate/compare.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Optional, List, Dict, Callable
 import asyncio
+import time
 from rich.progress import (
     Progress,
     TextColumn,
@@ -8,24 +9,74 @@ from rich.progress import (
     TaskProgressColumn,
 )
 from collections import Counter
+import json
 from deepeval.errors import MissingTestCaseParamsError
 from deepeval.evaluate.configs import AsyncConfig, DisplayConfig, ErrorConfig
-from deepeval.test_case import ArenaTestCase
+from deepeval.test_case import ArenaTestCase, Contestant
+from deepeval.test_case.api import create_api_test_case
 from deepeval.metrics import ArenaGEval
-from deepeval.utils import add_pbar, update_pbar, custom_console
-from deepeval.utils import get_or_create_event_loop
+from deepeval.utils import (
+    add_pbar,
+    update_pbar,
+    custom_console,
+    get_or_create_event_loop,
+    open_browser,
+)
+from deepeval.test_run.test_run import (
+    TestRun,
+    MetricData,
+    TestRunEncoder,
+    MetricScores,
+    console,
+)
+from deepeval.test_run.hyperparameters import (
+    process_hyperparameters,
+)
+from deepeval.confident.api import Api, Endpoints, HttpMethods, is_confident
 from deepeval.telemetry import capture_evaluation_run
+from deepeval.test_run.api import LLMApiTestCase
+from deepeval.evaluate.utils import create_arena_metric_data
+from deepeval.evaluate.types import PostExperimentRequest
 def compare(
     test_cases: List[ArenaTestCase],
     metric: ArenaGEval,
+    name: str = "compare()",
     # Configs
     async_config: Optional[AsyncConfig] = AsyncConfig(),
     display_config: Optional[DisplayConfig] = DisplayConfig(),
     error_config: Optional[ErrorConfig] = ErrorConfig(),
 ) -> Dict[str, int]:
+    # Prepare test run map
+    unique_contestant_names = set(
+        [
+            contestant.name
+            for test_case in test_cases
+            for contestant in test_case.contestants
+        ]
+    )
+    test_run_map: Dict[str, TestRun] = {}
+    for contestant_name in unique_contestant_names:
+        test_run = TestRun(
+            identifier=contestant_name,
+            test_passed=0,
+            test_failed=0,
+        )
+        test_run.metrics_scores = [
+            MetricScores(
+                metric=metric.name,
+                scores=[],
+                passes=0,
+                fails=0,
+                errors=0,
+            )
+        ]
+        test_run_map[contestant_name] = test_run
+    start_time = time.time()
     with capture_evaluation_run("compare()"):
         if async_config.run_async:
             loop = get_or_create_event_loop()
@@ -39,6 +90,7 @@ def compare(
                     throttle_value=async_config.throttle_value,
                     max_concurrent=async_config.max_concurrent,
                     skip_on_missing_params=error_config.skip_on_missing_params,
+                    test_run_map=test_run_map,
                 )
             )
         else:
@@ -49,7 +101,10 @@ def compare(
                 verbose_mode=display_config.verbose_mode,
                 show_indicator=display_config.show_indicator,
                 skip_on_missing_params=error_config.skip_on_missing_params,
+                test_run_map=test_run_map,
             )
+    end_time = time.time()
+    run_duration = end_time - start_time
     # Aggregate winners
     winner_counts = Counter()
@@ -57,7 +112,13 @@ def compare(
         if winner:
             winner_counts[winner] += 1
-    print(winner_counts)
+    process_test_runs(test_run_map=test_run_map, test_cases=test_cases)
+    wrap_up_experiment(
+        name=name,
+        test_runs=list(test_run_map.values()),
+        winner_counts=winner_counts,
+        run_duration=run_duration,
+    )
     return dict(winner_counts)
@@ -70,6 +131,7 @@ async def a_execute_arena_test_cases(
     throttle_value: int,
     skip_on_missing_params: bool,
     max_concurrent: int,
+    test_run_map: Dict[str, TestRun],
 ) -> List[str]:
     semaphore = asyncio.Semaphore(max_concurrent)
@@ -104,6 +166,8 @@ async def a_execute_arena_test_cases(
                 else metric.verbose_mode
             ),
         )
+        start_time = time.perf_counter()
         winner = await _a_handle_metric_measurement(
             metric=metric_copy,
             test_case=test_case,
@@ -112,10 +176,21 @@ async def a_execute_arena_test_cases(
             _progress=progress,
             _pbar_id=pbar_test_case_id,
         )
+        end_time = time.perf_counter()
+        run_duration = end_time - start_time
         if winner:
             winners.append(winner)
         update_pbar(progress, pbar_id)
+        update_test_run_map(
+            test_case=test_case,
+            index=index,
+            test_run_map=test_run_map,
+            metric_copy=metric_copy,
+            winner=winner,
+            run_duration=run_duration,
+        )
     # Create tasks for all test cases
     if show_indicator:
@@ -156,6 +231,7 @@ def execute_arena_test_cases(
     skip_on_missing_params: bool,
     show_indicator: bool,
     verbose_mode: Optional[bool] = None,
+    test_run_map: Optional[Dict[str, TestRun]] = None,
 ) -> List[str]:
     """
     Non-async version of comparing arena test cases.
@@ -183,6 +259,8 @@ def execute_arena_test_cases(
                     else metric.verbose_mode
                 ),
             )
+            start_time = time.perf_counter()
             winner = _handle_metric_measurement(
                 metric=metric_copy,
                 test_case=test_case,
@@ -191,10 +269,21 @@ def execute_arena_test_cases(
                 _progress=progress,
                 _pbar_id=pbar_test_case_id,
             )
+            end_time = time.perf_counter()
+            run_duration = end_time - start_time
             if winner:
                 winners.append(winner)
             update_pbar(progress, pbar_id)
+            update_test_run_map(
+                test_case=test_case,
+                index=i,
+                test_run_map=test_run_map,
+                metric_copy=metric_copy,
+                winner=winner,
+                run_duration=run_duration,
+            )
     if show_indicator:
         progress = Progress(
@@ -313,3 +402,129 @@ async def _a_handle_metric_measurement(
                 return None
             else:
                 raise
+def update_test_run_map(
+    test_case: ArenaTestCase,
+    index: int,
+    test_run_map: Dict[str, TestRun],
+    metric_copy: ArenaGEval,
+    winner: str,
+    run_duration: float,
+):
+    for contestant in test_case.contestants:
+        test_run = test_run_map.get(contestant.name)
+        # update test cases in test run
+        api_test_case: LLMApiTestCase = create_api_test_case(
+            test_case=contestant.test_case, index=index
+        )
+        metric_data: MetricData = create_arena_metric_data(
+            metric_copy, contestant.name
+        )
+        api_test_case.update_metric_data(metric_data)
+        api_test_case.update_run_duration(run_duration)
+        test_run.add_test_case(api_test_case)
+        # update other test run attributes
+        if test_run.run_duration is None:
+            test_run.run_duration = 0.0
+        test_run.run_duration += run_duration
+        # Ensure test_passed and test_failed are initialized
+        if test_run.test_passed is None:
+            test_run.test_passed = 0
+        if test_run.test_failed is None:
+            test_run.test_failed = 0
+        if winner == contestant:
+            test_run.test_passed += 1
+        else:
+            test_run.test_failed += 1
+        # update metric scores
+        test_run.metrics_scores[0].metric = metric_copy.name
+        test_run.metrics_scores[0].scores.append(
+            1 if winner == contestant else 0
+        )
+        test_run.metrics_scores[0].passes += 1 if winner == contestant else 0
+        test_run.metrics_scores[0].fails += 1 if winner != contestant else 0
+        test_run.metrics_scores[0].errors += 0
+def process_test_runs(
+    test_run_map: Dict[str, TestRun],
+    test_cases: List[ArenaTestCase],
+):
+    hyperparameters_map = {
+        contestant_name: {} for contestant_name in test_run_map.keys()
+    }
+    for test_case in test_cases:
+        for contestant in test_case.contestants:
+            if contestant.hyperparameters:
+                hyperparameters_map[contestant.name].update(
+                    contestant.hyperparameters
+                )
+    for contestant_name, hyperparameters in hyperparameters_map.items():
+        test_run = test_run_map.get(contestant_name)
+        test_run.hyperparameters = process_hyperparameters(hyperparameters)
+def wrap_up_experiment(
+    name: str,
+    test_runs: List[TestRun],
+    winner_counts: Counter,
+    run_duration: float,
+):
+    winner_breakdown = []
+    for contestant, wins in winner_counts.most_common():
+        winner_breakdown.append(
+            f"    » [bold green]{contestant}[/bold green]: {wins} wins"
+        )
+    winner_text = (
+        "\n".join(winner_breakdown) if winner_breakdown else "No winners"
+    )
+    console.print(
+        f"\n🎉 Arena completed! (time taken: {round(run_duration, 2)}s | token cost: {test_runs[0].evaluation_cost if test_runs else 0} USD)\n"
+        f"🏆 Results ({sum(winner_counts.values())} total test cases):\n"
+        f"{winner_text}\n\n"
+    )
+    if not is_confident():
+        console.print(
+            f"{'=' * 80}\n"
+            f"\n» Want to share experiments with your team? ❤️ 🏟️\n"
+            f"  » Run [bold]'deepeval login'[/bold] to analyze and save arena results on [rgb(106,0,255)]Confident AI[/rgb(106,0,255)].\n\n"
+        )
+        return
+    try:
+        api = Api()
+        experiment_request = PostExperimentRequest(
+            testRuns=test_runs, name=name
+        )
+        try:
+            body = experiment_request.model_dump(
+                by_alias=True, exclude_none=True
+            )
+        except AttributeError:
+            body = experiment_request.dict(by_alias=True, exclude_none=True)
+        json_str = json.dumps(body, cls=TestRunEncoder)
+        body = json.loads(json_str)
+        _, link = api.send_request(
+            method=HttpMethods.POST,
+            endpoint=Endpoints.EXPERIMENT_ENDPOINT,
+            body=body,
+        )
+        console.print(
+            "[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on "
+            f"[link={link}]{link}[/link]"
+        )
+        open_browser(link)
+    except Exception:
+        raise

deepeval/evaluate/types.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pydantic import BaseModel
 from deepeval.test_run.api import MetricData, TurnApi
 from deepeval.test_case import MLLMImage
+from deepeval.test_run import TestRun
 @dataclass
@@ -29,3 +30,8 @@ class EvaluationResult(BaseModel):
     test_results: List[TestResult]
     confident_link: Optional[str]
     test_run_id: Optional[str]
+class PostExperimentRequest(BaseModel):
+    testRuns: List[TestRun]
+    name: Optional[str]

deepeval/evaluate/utils.py CHANGED Viewed

@@ -8,6 +8,7 @@ from deepeval.utils import format_turn
 from deepeval.test_run.test_run import TestRunResultDisplay
 from deepeval.dataset import Golden
 from deepeval.metrics import (
+    ArenaGEval,
     BaseMetric,
     BaseConversationalMetric,
     BaseMultimodalMetric,
@@ -84,6 +85,35 @@ def create_metric_data(metric: BaseMetric) -> MetricData:
         )
+def create_arena_metric_data(metric: ArenaGEval, contestant: str) -> MetricData:
+    if metric.error is not None:
+        return MetricData(
+            name=metric.__name__,
+            threshold=1,
+            score=None,
+            reason=None,
+            success=False,
+            strictMode=True,
+            evaluationModel=metric.evaluation_model,
+            error=metric.error,
+            evaluationCost=metric.evaluation_cost,
+            verboseLogs=metric.verbose_logs,
+        )
+    else:
+        return MetricData(
+            name=metric.__name__,
+            score=1 if contestant == metric.winner else 0,
+            threshold=1,
+            reason=metric.reason,
+            success=metric.is_successful(),
+            strictMode=True,
+            evaluationModel=metric.evaluation_model,
+            error=None,
+            evaluationCost=metric.evaluation_cost,
+            verboseLogs=metric.verbose_logs,
+        )
 def create_test_result(
     api_test_case: Union[LLMApiTestCase, ConversationalApiTestCase],
 ) -> TestResult:

deepeval/key_handler.py CHANGED Viewed

@@ -70,6 +70,7 @@ class ModelKeyValues(Enum):
     GOOGLE_GENAI_USE_VERTEXAI = "GOOGLE_GENAI_USE_VERTEXAI"
     GOOGLE_CLOUD_PROJECT = "GOOGLE_CLOUD_PROJECT"
     GOOGLE_CLOUD_LOCATION = "GOOGLE_CLOUD_LOCATION"
+    GOOGLE_SERVICE_ACCOUNT_KEY = "GOOGLE_SERVICE_ACCOUNT_KEY"
     # LiteLLM
     USE_LITELLM = "USE_LITELLM"
     LITELLM_MODEL_NAME = "LITELLM_MODEL_NAME"

deepeval/metrics/arena_g_eval/arena_g_eval.py CHANGED Viewed

@@ -46,7 +46,11 @@ class ArenaGEval(BaseArenaMetric):
         self.criteria = criteria
         self.model, self.using_native_model = initialize_model(model)
         self.evaluation_model = self.model.get_model_name()
-        self.evaluation_steps = evaluation_steps
+        self.evaluation_steps = (
+            evaluation_steps
+            if evaluation_steps and len(evaluation_steps) > 0
+            else None
+        )
         self.async_mode = async_mode
         self.verbose_mode = verbose_mode
         self._include_g_eval_suffix = _include_g_eval_suffix

deepeval/metrics/arena_g_eval/utils.py CHANGED Viewed

@@ -89,10 +89,10 @@ class FormattedArenaTestCase:
 def format_arena_test_case(
     evaluation_params: List[LLMTestCaseParams], test_case: ArenaTestCase
 ) -> Tuple[FormattedArenaTestCase, Dict[str, str]]:
-    case = next(iter(test_case.contestants.values()))
+    case = next(iter([case.test_case for case in test_case.contestants]))
     # Create dummy name mapping
-    real_names = list(test_case.contestants.keys())
+    real_names = list([case.name for case in test_case.contestants])
     available_fake_names = FAKE_NAMES.copy()
     random.shuffle(available_fake_names)
@@ -119,10 +119,10 @@ def format_arena_test_case(
             else None
         ),
         contestants={
-            contestant: construct_formatted_llm_test_case(
-                evaluation_params, test_case
+            contestant.name: construct_formatted_llm_test_case(
+                evaluation_params, contestant.test_case
             )
-            for contestant, test_case in test_case.contestants.items()
+            for contestant in test_case.contestants
         },
         dummy_to_real_names=dummy_to_real_names,
     )

deepeval/metrics/conversational_g_eval/conversational_g_eval.py CHANGED Viewed

@@ -9,6 +9,8 @@ from deepeval.metrics.g_eval.utils import (
     construct_conversational_g_eval_turn_params_string,
     construct_non_turns_test_case_string,
     format_rubrics,
+    validate_and_sort_rubrics,
+    validate_criteria_and_evaluation_steps,
 )
 from deepeval.test_case import (
     TurnParams,
@@ -63,27 +65,16 @@ class ConversationalGEval(BaseConversationalMetric):
         self.evaluation_params = evaluation_params
-        # Check if both criteria and evaluation_steps are not None at the same time
-        if criteria is None and evaluation_steps is None:
-            raise ValueError(
-                "Either 'criteria' or 'evaluation_steps' must be provided."
-            )
-        # Check if criteria is provided, it cannot be an empty string
-        if criteria is not None and not criteria.strip():
-            raise ValueError("Criteria provided cannot be an empty string.")
-        # Check if evaluation_steps is provided, it cannot be an empty list
-        if evaluation_steps is not None and len(evaluation_steps) == 0:
-            raise ValueError(
-                "'evaluation_steps' must not be an empty list. Either omit evaluation steps or include a non-empty list of steps."
-            )
+        validate_criteria_and_evaluation_steps(criteria, evaluation_steps)
         self.criteria = criteria
-        self.rubric = rubric
+        self.rubric = validate_and_sort_rubrics(rubric)
         self.model, self.using_native_model = initialize_model(model)
         self.evaluation_model = self.model.get_model_name()
-        self.evaluation_steps = evaluation_steps
+        self.evaluation_steps = (
+            evaluation_steps
+            if evaluation_steps and len(evaluation_steps) > 0
+            else None
+        )
         self.threshold = 1 if strict_mode else threshold
         self.strict_mode = strict_mode
         self.async_mode = async_mode

deepeval/metrics/g_eval/g_eval.py CHANGED Viewed

@@ -61,7 +61,11 @@ class GEval(BaseMetric):
         self.score_range_span = self.score_range[1] - self.score_range[0]
         self.model, self.using_native_model = initialize_model(model)
         self.evaluation_model = self.model.get_model_name()
-        self.evaluation_steps = evaluation_steps
+        self.evaluation_steps = (
+            evaluation_steps
+            if evaluation_steps and len(evaluation_steps) > 0
+            else None
+        )
         self.threshold = 1 if strict_mode else threshold
         self.top_logprobs = top_logprobs
         self.strict_mode = strict_mode

deepeval/metrics/g_eval/utils.py CHANGED Viewed

@@ -77,7 +77,7 @@ def validate_criteria_and_evaluation_steps(
 def validate_and_sort_rubrics(
     rubrics: Optional[List[Rubric]] = None,
 ) -> Optional[List[Rubric]]:
-    if rubrics is None:
+    if rubrics is None or len(rubrics) == 0:
         return None
     # Sort rubrics by start of range

deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py CHANGED Viewed

@@ -64,7 +64,11 @@ class MultimodalGEval(BaseMultimodalMetric):
         self.rubric = validate_and_sort_rubrics(rubric)
         self.model, self.using_native_model = initialize_multimodal_model(model)
         self.evaluation_model = self.model.get_model_name()
-        self.evaluation_steps = evaluation_steps
+        self.evaluation_steps = (
+            evaluation_steps
+            if evaluation_steps and len(evaluation_steps) > 0
+            else None
+        )
         self.threshold = 1 if strict_mode else threshold
         self.top_logprobs = top_logprobs
         self.strict_mode = strict_mode

deepeval/metrics/utils.py CHANGED Viewed

@@ -270,7 +270,7 @@ def check_arena_test_case_params(
             f"Expected ArenaTestCase, got {type(arena_test_case).__name__}"
         )
-    cases = list(arena_test_case.contestants.values())
+    cases = [contestant.test_case for contestant in arena_test_case.contestants]
     ref_input = cases[0].input
     for case in cases[1:]:
         if case.input != ref_input:

deepeval/models/llms/gemini_model.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from pydantic import BaseModel
-from google.genai import types
+from google.genai import types, Client
 from typing import Optional, Dict
-from google import genai
 from deepeval.models.retry_policy import (
     create_retry_decorator,
@@ -9,7 +8,8 @@ from deepeval.models.retry_policy import (
 from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
 from deepeval.models.base_model import DeepEvalBaseLLM
 from deepeval.constants import ProviderSlug as PS
+from google.oauth2 import service_account
+import json
 default_gemini_model = "gemini-1.5-pro"
@@ -52,6 +52,7 @@ class GeminiModel(DeepEvalBaseLLM):
         api_key: Optional[str] = None,
         project: Optional[str] = None,
         location: Optional[str] = None,
+        service_account_key: Optional[Dict[str, str]] = None,
         temperature: float = 0,
         generation_kwargs: Optional[Dict] = None,
         **kwargs,
@@ -75,6 +76,17 @@ class GeminiModel(DeepEvalBaseLLM):
         self.use_vertexai = KEY_FILE_HANDLER.fetch_data(
             ModelKeyValues.GOOGLE_GENAI_USE_VERTEXAI
         )
+        if service_account_key:
+            self.service_account_key = service_account_key
+        else:
+            service_account_key_data = KEY_FILE_HANDLER.fetch_data(
+                ModelKeyValues.GOOGLE_SERVICE_ACCOUNT_KEY
+            )
+            if service_account_key_data is None:
+                self.service_account_key = None
+            elif isinstance(service_account_key_data, str):
+                self.service_account_key = json.loads(service_account_key_data)
         if temperature < 0:
             raise ValueError("Temperature must be >= 0.")
         self.temperature = temperature
@@ -117,10 +129,20 @@ class GeminiModel(DeepEvalBaseLLM):
                 )
             # Create client for Vertex AI
-            self.client = genai.Client(
+            self.client = Client(
                 vertexai=True,
                 project=self.project,
                 location=self.location,
+                credentials=(
+                    service_account.Credentials.from_service_account_info(
+                        self.service_account_key,
+                        scopes=[
+                            "https://www.googleapis.com/auth/cloud-platform"
+                        ],
+                    )
+                    if self.service_account_key
+                    else None
+                ),
                 **self.kwargs,
             )
         else:
@@ -130,7 +152,7 @@ class GeminiModel(DeepEvalBaseLLM):
                     "or set it in your DeepEval configuration."
                 )
             # Create client for Gemini API
-            self.client = genai.Client(api_key=self.api_key, **self.kwargs)
+            self.client = Client(api_key=self.api_key, **self.kwargs)
         # Configure default model generation settings
         self.model_safety_settings = [

deepeval 3.7.0__py3-none-any.whl → 3.7.2__py3-none-any.whl

deepeval 3.7.0py3-none-any.whl → 3.7.2py3-none-any.whl