deepeval 3.7.8__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/drop/drop.py +5 -2
- deepeval/benchmarks/mmlu/mmlu.py +6 -4
- deepeval/cli/main.py +168 -0
- deepeval/cli/utils.py +2 -2
- deepeval/confident/api.py +2 -0
- deepeval/config/settings.py +10 -0
- deepeval/constants.py +1 -0
- deepeval/integrations/langchain/callback.py +330 -158
- deepeval/integrations/langchain/utils.py +31 -8
- deepeval/key_handler.py +8 -1
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +35 -0
- deepeval/metrics/g_eval/g_eval.py +35 -1
- deepeval/metrics/g_eval/utils.py +65 -0
- deepeval/models/__init__.py +2 -0
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/constants.py +23 -0
- deepeval/models/llms/gemini_model.py +27 -29
- deepeval/models/llms/openai_model.py +5 -4
- deepeval/models/llms/openrouter_model.py +398 -0
- deepeval/models/retry_policy.py +3 -0
- deepeval/prompt/api.py +1 -0
- deepeval/synthesizer/synthesizer.py +190 -82
- deepeval/tracing/tracing.py +6 -1
- deepeval/tracing/types.py +1 -1
- deepeval/utils.py +21 -6
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/METADATA +7 -7
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/RECORD +31 -30
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/WHEEL +0 -0
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/entry_points.txt +0 -0
|
@@ -145,6 +145,7 @@ def enter_current_context(
|
|
|
145
145
|
progress: Optional[Progress] = None,
|
|
146
146
|
pbar_callback_id: Optional[int] = None,
|
|
147
147
|
uuid_str: Optional[str] = None,
|
|
148
|
+
fallback_trace_uuid: Optional[str] = None,
|
|
148
149
|
) -> BaseSpan:
|
|
149
150
|
start_time = perf_counter()
|
|
150
151
|
observe_kwargs = observe_kwargs or {}
|
|
@@ -159,12 +160,27 @@ def enter_current_context(
|
|
|
159
160
|
parent_uuid: Optional[str] = None
|
|
160
161
|
|
|
161
162
|
if parent_span:
|
|
162
|
-
|
|
163
|
-
trace_uuid
|
|
164
|
-
|
|
163
|
+
# Validate that the parent span's trace is still active
|
|
164
|
+
if parent_span.trace_uuid in trace_manager.active_traces:
|
|
165
|
+
parent_uuid = parent_span.uuid
|
|
166
|
+
trace_uuid = parent_span.trace_uuid
|
|
167
|
+
else:
|
|
168
|
+
# Parent span references a dead trace - treat as if no parent
|
|
169
|
+
parent_span = None
|
|
170
|
+
|
|
171
|
+
if not parent_span:
|
|
165
172
|
current_trace = current_trace_context.get()
|
|
166
|
-
|
|
173
|
+
# IMPORTANT: Verify trace is still active, not just in context
|
|
174
|
+
# (a previous failed async operation might leave a dead trace in context)
|
|
175
|
+
if current_trace and current_trace.uuid in trace_manager.active_traces:
|
|
167
176
|
trace_uuid = current_trace.uuid
|
|
177
|
+
elif (
|
|
178
|
+
fallback_trace_uuid
|
|
179
|
+
and fallback_trace_uuid in trace_manager.active_traces
|
|
180
|
+
):
|
|
181
|
+
# In async contexts, ContextVar may not propagate. Use the fallback trace_uuid
|
|
182
|
+
# provided by the CallbackHandler to avoid creating duplicate traces.
|
|
183
|
+
trace_uuid = fallback_trace_uuid
|
|
168
184
|
else:
|
|
169
185
|
trace = trace_manager.start_new_trace(
|
|
170
186
|
metric_collection=metric_collection
|
|
@@ -258,11 +274,13 @@ def exit_current_context(
|
|
|
258
274
|
|
|
259
275
|
current_span = current_span_context.get()
|
|
260
276
|
|
|
277
|
+
# In async contexts (LangChain/LangGraph), context variables don't propagate
|
|
278
|
+
# reliably across task boundaries. Fall back to direct span lookup.
|
|
261
279
|
if not current_span or current_span.uuid != uuid_str:
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
280
|
+
current_span = trace_manager.get_span_by_uuid(uuid_str)
|
|
281
|
+
if not current_span:
|
|
282
|
+
# Span already removed or never existed
|
|
283
|
+
return
|
|
266
284
|
|
|
267
285
|
current_span.end_time = end_time
|
|
268
286
|
if exc_type is not None:
|
|
@@ -295,7 +313,12 @@ def exit_current_context(
|
|
|
295
313
|
else:
|
|
296
314
|
current_span_context.set(None)
|
|
297
315
|
else:
|
|
316
|
+
# Try context first, then fall back to direct trace lookup for async contexts
|
|
298
317
|
current_trace = current_trace_context.get()
|
|
318
|
+
if not current_trace and current_span.trace_uuid:
|
|
319
|
+
current_trace = trace_manager.get_trace_by_uuid(
|
|
320
|
+
current_span.trace_uuid
|
|
321
|
+
)
|
|
299
322
|
if current_span.status == TraceSpanStatus.ERRORED and current_trace:
|
|
300
323
|
current_trace.status = TraceSpanStatus.ERRORED
|
|
301
324
|
if current_trace and current_trace.uuid == current_span.trace_uuid:
|
deepeval/key_handler.py
CHANGED
|
@@ -162,6 +162,13 @@ class ModelKeyValues(Enum):
|
|
|
162
162
|
VLLM_API_KEY = "VLLM_API_KEY"
|
|
163
163
|
VLLM_MODEL_NAME = "VLLM_MODEL_NAME"
|
|
164
164
|
|
|
165
|
+
# OpenRouter
|
|
166
|
+
USE_OPENROUTER_MODEL = "USE_OPENROUTER_MODEL"
|
|
167
|
+
OPENROUTER_MODEL_NAME = "OPENROUTER_MODEL_NAME"
|
|
168
|
+
OPENROUTER_COST_PER_INPUT_TOKEN = "OPENROUTER_COST_PER_INPUT_TOKEN"
|
|
169
|
+
OPENROUTER_COST_PER_OUTPUT_TOKEN = "OPENROUTER_COST_PER_OUTPUT_TOKEN"
|
|
170
|
+
OPENROUTER_API_KEY = "OPENROUTER_API_KEY"
|
|
171
|
+
|
|
165
172
|
|
|
166
173
|
class EmbeddingKeyValues(Enum):
|
|
167
174
|
# Azure OpenAI
|
|
@@ -174,7 +181,7 @@ class EmbeddingKeyValues(Enum):
|
|
|
174
181
|
USE_LOCAL_EMBEDDINGS = "USE_LOCAL_EMBEDDINGS"
|
|
175
182
|
LOCAL_EMBEDDING_MODEL_NAME = "LOCAL_EMBEDDING_MODEL_NAME"
|
|
176
183
|
LOCAL_EMBEDDING_BASE_URL = "LOCAL_EMBEDDING_BASE_URL"
|
|
177
|
-
LOCAL_EMBEDDING_API_KEY = "LOCAL_EMBEDDING_API_KEY"
|
|
184
|
+
LOCAL_EMBEDDING_API_KEY = ("LOCAL_EMBEDDING_API_KEY",)
|
|
178
185
|
|
|
179
186
|
|
|
180
187
|
class KeyFileHandler:
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
4
4
|
from typing import Optional, List, Tuple, Union, Dict, Type
|
|
5
|
+
from rich.console import Console
|
|
5
6
|
import math
|
|
6
7
|
from deepeval.metrics import BaseConversationalMetric
|
|
7
8
|
from deepeval.metrics.g_eval.utils import (
|
|
@@ -11,6 +12,8 @@ from deepeval.metrics.g_eval.utils import (
|
|
|
11
12
|
format_rubrics,
|
|
12
13
|
validate_and_sort_rubrics,
|
|
13
14
|
validate_criteria_and_evaluation_steps,
|
|
15
|
+
CONVERSATIONAL_G_EVAL_API_PARAMS,
|
|
16
|
+
construct_geval_upload_payload,
|
|
14
17
|
)
|
|
15
18
|
from deepeval.test_case import (
|
|
16
19
|
TurnParams,
|
|
@@ -33,6 +36,7 @@ from deepeval.models import DeepEvalBaseLLM
|
|
|
33
36
|
from deepeval.metrics.indicator import metric_progress_indicator
|
|
34
37
|
import deepeval.metrics.conversational_g_eval.schema as cgschema
|
|
35
38
|
from deepeval.metrics.api import metric_data_manager
|
|
39
|
+
from deepeval.confident.api import Api, Endpoints, HttpMethods
|
|
36
40
|
|
|
37
41
|
|
|
38
42
|
class ConversationalGEval(BaseConversationalMetric):
|
|
@@ -412,6 +416,37 @@ class ConversationalGEval(BaseConversationalMetric):
|
|
|
412
416
|
self.success = False
|
|
413
417
|
return self.success
|
|
414
418
|
|
|
419
|
+
def upload(self):
|
|
420
|
+
api = Api()
|
|
421
|
+
|
|
422
|
+
payload = construct_geval_upload_payload(
|
|
423
|
+
name=self.name,
|
|
424
|
+
evaluation_params=self.evaluation_params,
|
|
425
|
+
g_eval_api_params=CONVERSATIONAL_G_EVAL_API_PARAMS,
|
|
426
|
+
criteria=self.criteria,
|
|
427
|
+
evaluation_steps=self.evaluation_steps,
|
|
428
|
+
multi_turn=True,
|
|
429
|
+
rubric=self.rubric,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
data, _ = api.send_request(
|
|
433
|
+
method=HttpMethods.POST,
|
|
434
|
+
endpoint=Endpoints.METRICS_ENDPOINT,
|
|
435
|
+
body=payload,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
metric_id = data.get("id")
|
|
439
|
+
self.metric_id = metric_id
|
|
440
|
+
console = Console()
|
|
441
|
+
|
|
442
|
+
if metric_id:
|
|
443
|
+
console.print(
|
|
444
|
+
"[rgb(5,245,141)]✓[/rgb(5,245,141)] Metric uploaded successfully "
|
|
445
|
+
f"(id: [bold]{metric_id}[/bold])"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return data
|
|
449
|
+
|
|
415
450
|
@property
|
|
416
451
|
def __name__(self):
|
|
417
452
|
if self._include_g_eval_suffix:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""LLM evaluated metric based on the GEval framework: https://arxiv.org/pdf/2303.16634.pdf"""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
|
|
4
|
+
from rich.console import Console
|
|
5
5
|
from typing import Optional, List, Tuple, Union, Type
|
|
6
6
|
from deepeval.metrics import BaseMetric
|
|
7
7
|
from deepeval.test_case import (
|
|
@@ -32,9 +32,12 @@ from deepeval.metrics.g_eval.utils import (
|
|
|
32
32
|
validate_criteria_and_evaluation_steps,
|
|
33
33
|
number_evaluation_steps,
|
|
34
34
|
get_score_range,
|
|
35
|
+
construct_geval_upload_payload,
|
|
36
|
+
G_EVAL_API_PARAMS,
|
|
35
37
|
)
|
|
36
38
|
from deepeval.metrics.api import metric_data_manager
|
|
37
39
|
from deepeval.config.settings import get_settings
|
|
40
|
+
from deepeval.confident.api import Api, Endpoints, HttpMethods
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
class GEval(BaseMetric):
|
|
@@ -408,6 +411,37 @@ class GEval(BaseMetric):
|
|
|
408
411
|
self.success = False
|
|
409
412
|
return self.success
|
|
410
413
|
|
|
414
|
+
def upload(self):
|
|
415
|
+
api = Api()
|
|
416
|
+
|
|
417
|
+
payload = construct_geval_upload_payload(
|
|
418
|
+
name=self.name,
|
|
419
|
+
evaluation_params=self.evaluation_params,
|
|
420
|
+
g_eval_api_params=G_EVAL_API_PARAMS,
|
|
421
|
+
criteria=self.criteria,
|
|
422
|
+
evaluation_steps=self.evaluation_steps,
|
|
423
|
+
multi_turn=False,
|
|
424
|
+
rubric=self.rubric,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
data, _ = api.send_request(
|
|
428
|
+
method=HttpMethods.POST,
|
|
429
|
+
endpoint=Endpoints.METRICS_ENDPOINT,
|
|
430
|
+
body=payload,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
metric_id = data.get("id")
|
|
434
|
+
self.metric_id = metric_id
|
|
435
|
+
console = Console()
|
|
436
|
+
|
|
437
|
+
if metric_id:
|
|
438
|
+
console.print(
|
|
439
|
+
"[rgb(5,245,141)]✓[/rgb(5,245,141)] Metric uploaded successfully "
|
|
440
|
+
f"(id: [bold]{metric_id}[/bold])"
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
return data
|
|
444
|
+
|
|
411
445
|
@property
|
|
412
446
|
def __name__(self):
|
|
413
447
|
if self._include_g_eval_suffix:
|
deepeval/metrics/g_eval/utils.py
CHANGED
|
@@ -52,6 +52,71 @@ CONVERSATIONAL_G_EVAL_PARAMS = {
|
|
|
52
52
|
TurnParams.SCENARIO: "Scenario",
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
G_EVAL_API_PARAMS = {
|
|
56
|
+
LLMTestCaseParams.INPUT: "input",
|
|
57
|
+
LLMTestCaseParams.ACTUAL_OUTPUT: "actualOutput",
|
|
58
|
+
LLMTestCaseParams.EXPECTED_OUTPUT: "expectedOutput",
|
|
59
|
+
LLMTestCaseParams.CONTEXT: "context",
|
|
60
|
+
LLMTestCaseParams.RETRIEVAL_CONTEXT: "retrievalContext",
|
|
61
|
+
LLMTestCaseParams.EXPECTED_TOOLS: "expectedTools",
|
|
62
|
+
LLMTestCaseParams.TOOLS_CALLED: "toolsCalled",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
CONVERSATIONAL_G_EVAL_API_PARAMS = {
|
|
66
|
+
TurnParams.ROLE: "role",
|
|
67
|
+
TurnParams.CONTENT: "content",
|
|
68
|
+
TurnParams.SCENARIO: "scenario",
|
|
69
|
+
TurnParams.EXPECTED_OUTCOME: "expectedOutcome",
|
|
70
|
+
TurnParams.RETRIEVAL_CONTEXT: "retrievalContext",
|
|
71
|
+
TurnParams.TOOLS_CALLED: "toolsCalled",
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def construct_geval_upload_payload(
|
|
76
|
+
name: str,
|
|
77
|
+
evaluation_params: List[LLMTestCaseParams],
|
|
78
|
+
g_eval_api_params: Dict,
|
|
79
|
+
criteria: Optional[str] = None,
|
|
80
|
+
evaluation_steps: Optional[List[str]] = None,
|
|
81
|
+
multi_turn: bool = False,
|
|
82
|
+
rubric: Optional[List[Rubric]] = None,
|
|
83
|
+
) -> Dict:
|
|
84
|
+
if not evaluation_params:
|
|
85
|
+
raise ValueError("GEval requires at least one evaluation parameter.")
|
|
86
|
+
|
|
87
|
+
unsupported_params = [
|
|
88
|
+
param for param in evaluation_params if param not in g_eval_api_params
|
|
89
|
+
]
|
|
90
|
+
if unsupported_params:
|
|
91
|
+
raise ValueError(
|
|
92
|
+
"Unsupported evaluation params for GEval upload: "
|
|
93
|
+
+ ", ".join(param.name for param in unsupported_params)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
payload = {
|
|
97
|
+
"name": name,
|
|
98
|
+
"evaluationParams": [
|
|
99
|
+
g_eval_api_params[param] for param in evaluation_params
|
|
100
|
+
],
|
|
101
|
+
"multiTurn": multi_turn,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if criteria is not None:
|
|
105
|
+
payload["criteria"] = criteria
|
|
106
|
+
else:
|
|
107
|
+
payload["evaluationSteps"] = evaluation_steps
|
|
108
|
+
|
|
109
|
+
if rubric is not None:
|
|
110
|
+
payload["rubric"] = [
|
|
111
|
+
{
|
|
112
|
+
"scoreRange": list(r.score_range),
|
|
113
|
+
"expectedOutcome": r.expected_outcome,
|
|
114
|
+
}
|
|
115
|
+
for r in rubric
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
return payload
|
|
119
|
+
|
|
55
120
|
|
|
56
121
|
def validate_criteria_and_evaluation_steps(
|
|
57
122
|
criteria: Optional[str] = None,
|
deepeval/models/__init__.py
CHANGED
|
@@ -16,6 +16,7 @@ from deepeval.models.llms import (
|
|
|
16
16
|
GrokModel,
|
|
17
17
|
DeepSeekModel,
|
|
18
18
|
PortkeyModel,
|
|
19
|
+
OpenRouterModel,
|
|
19
20
|
)
|
|
20
21
|
from deepeval.models.embedding_models import (
|
|
21
22
|
OpenAIEmbeddingModel,
|
|
@@ -44,4 +45,5 @@ __all__ = [
|
|
|
44
45
|
"LocalEmbeddingModel",
|
|
45
46
|
"OllamaEmbeddingModel",
|
|
46
47
|
"PortkeyModel",
|
|
48
|
+
"OpenRouterModel",
|
|
47
49
|
]
|
deepeval/models/llms/__init__.py
CHANGED
|
@@ -10,6 +10,7 @@ from .kimi_model import KimiModel
|
|
|
10
10
|
from .grok_model import GrokModel
|
|
11
11
|
from .deepseek_model import DeepSeekModel
|
|
12
12
|
from .portkey_model import PortkeyModel
|
|
13
|
+
from .openrouter_model import OpenRouterModel
|
|
13
14
|
|
|
14
15
|
__all__ = [
|
|
15
16
|
"AzureOpenAIModel",
|
|
@@ -24,4 +25,5 @@ __all__ = [
|
|
|
24
25
|
"GrokModel",
|
|
25
26
|
"DeepSeekModel",
|
|
26
27
|
"PortkeyModel",
|
|
28
|
+
"OpenRouterModel",
|
|
27
29
|
]
|
|
@@ -3,6 +3,11 @@ from typing import Any, Callable, Union
|
|
|
3
3
|
from deepeval.models.base_model import DeepEvalModelData
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
DEFAULT_GPT_MODEL = "gpt-4.1"
|
|
7
|
+
# OpenRouter uses provider/model format (e.g., "openai/gpt-4", "anthropic/claude-3-opus")
|
|
8
|
+
# DeepEval does not validate OpenRouter model strings.
|
|
9
|
+
DEFAULT_OPENROUTER_MODEL = f"openai/{DEFAULT_GPT_MODEL}"
|
|
10
|
+
|
|
6
11
|
ModelDataFactory = Callable[[], DeepEvalModelData]
|
|
7
12
|
ModelDataValue = Union[DeepEvalModelData, ModelDataFactory]
|
|
8
13
|
|
|
@@ -366,6 +371,24 @@ OPENAI_MODELS_DATA = ModelDataRegistry(
|
|
|
366
371
|
input_price=1.25 / 1e6,
|
|
367
372
|
output_price=10.00 / 1e6,
|
|
368
373
|
),
|
|
374
|
+
"gpt-5.1": make_model_data(
|
|
375
|
+
supports_log_probs=False,
|
|
376
|
+
supports_multimodal=True,
|
|
377
|
+
supports_structured_outputs=True,
|
|
378
|
+
supports_json=False,
|
|
379
|
+
supports_temperature=False,
|
|
380
|
+
input_price=1.25 / 1e6,
|
|
381
|
+
output_price=10.00 / 1e6,
|
|
382
|
+
),
|
|
383
|
+
"gpt-5.2": make_model_data(
|
|
384
|
+
supports_log_probs=False,
|
|
385
|
+
supports_multimodal=True,
|
|
386
|
+
supports_structured_outputs=True,
|
|
387
|
+
supports_json=False,
|
|
388
|
+
supports_temperature=False,
|
|
389
|
+
input_price=1.75 / 1e6,
|
|
390
|
+
output_price=14.00 / 1e6,
|
|
391
|
+
),
|
|
369
392
|
}
|
|
370
393
|
)
|
|
371
394
|
|
|
@@ -22,7 +22,7 @@ from deepeval.models.llms.constants import GEMINI_MODELS_DATA
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
23
|
from google.genai import Client
|
|
24
24
|
|
|
25
|
-
default_gemini_model = "gemini-
|
|
25
|
+
default_gemini_model = "gemini-2.5-pro"
|
|
26
26
|
|
|
27
27
|
# consistent retry rules
|
|
28
28
|
retry_gemini = create_retry_decorator(PS.GOOGLE)
|
|
@@ -371,25 +371,6 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
371
371
|
client_kwargs = self._client_kwargs(**self.kwargs)
|
|
372
372
|
|
|
373
373
|
if self.should_use_vertexai():
|
|
374
|
-
service_account_key_json = require_secret_api_key(
|
|
375
|
-
self.service_account_key,
|
|
376
|
-
provider_label="Google Gemini",
|
|
377
|
-
env_var_name="GOOGLE_SERVICE_ACCOUNT_KEY",
|
|
378
|
-
param_hint="`service_account_key` to GeminiModel(...)",
|
|
379
|
-
)
|
|
380
|
-
|
|
381
|
-
try:
|
|
382
|
-
service_account_key = json.loads(service_account_key_json)
|
|
383
|
-
except Exception as e:
|
|
384
|
-
raise DeepEvalError(
|
|
385
|
-
"GOOGLE_SERVICE_ACCOUNT_KEY must be valid JSON for a Google service account."
|
|
386
|
-
) from e
|
|
387
|
-
|
|
388
|
-
if not isinstance(service_account_key, dict):
|
|
389
|
-
raise DeepEvalError(
|
|
390
|
-
"GOOGLE_SERVICE_ACCOUNT_KEY must decode to a JSON object."
|
|
391
|
-
)
|
|
392
|
-
|
|
393
374
|
if not self.project or not self.location:
|
|
394
375
|
raise DeepEvalError(
|
|
395
376
|
"When using Vertex AI API, both project and location are required. "
|
|
@@ -397,17 +378,34 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
397
378
|
"GOOGLE_CLOUD_LOCATION in your DeepEval configuration."
|
|
398
379
|
)
|
|
399
380
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
381
|
+
# if no service account key is provided, allow the SDK
|
|
382
|
+
# to resolve Application Default Credentials automatically.
|
|
383
|
+
credentials = None
|
|
384
|
+
if self.service_account_key is not None:
|
|
385
|
+
service_account_key_json = require_secret_api_key(
|
|
386
|
+
self.service_account_key,
|
|
387
|
+
provider_label="Google Gemini",
|
|
388
|
+
env_var_name="GOOGLE_SERVICE_ACCOUNT_KEY",
|
|
389
|
+
param_hint="`service_account_key` to GeminiModel(...)",
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
try:
|
|
393
|
+
service_account_key = json.loads(service_account_key_json)
|
|
394
|
+
except Exception as e:
|
|
395
|
+
raise DeepEvalError(
|
|
396
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must be valid JSON for a Google service account."
|
|
397
|
+
) from e
|
|
398
|
+
|
|
399
|
+
if not isinstance(service_account_key, dict):
|
|
400
|
+
raise DeepEvalError(
|
|
401
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must decode to a JSON object."
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
oauth2 = self._require_oauth2()
|
|
405
|
+
credentials = oauth2.service_account.Credentials.from_service_account_info(
|
|
403
406
|
service_account_key,
|
|
404
|
-
scopes=[
|
|
405
|
-
"https://www.googleapis.com/auth/cloud-platform",
|
|
406
|
-
],
|
|
407
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
407
408
|
)
|
|
408
|
-
if service_account_key
|
|
409
|
-
else None
|
|
410
|
-
)
|
|
411
409
|
|
|
412
410
|
client = self._module.Client(
|
|
413
411
|
vertexai=True,
|
|
@@ -24,14 +24,13 @@ from deepeval.models.retry_policy import (
|
|
|
24
24
|
sdk_retries_for,
|
|
25
25
|
)
|
|
26
26
|
from deepeval.models.llms.constants import (
|
|
27
|
+
DEFAULT_GPT_MODEL,
|
|
27
28
|
OPENAI_MODELS_DATA,
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
32
33
|
|
|
33
|
-
default_gpt_model = "gpt-4.1"
|
|
34
|
-
|
|
35
34
|
|
|
36
35
|
def _request_timeout_seconds() -> float:
|
|
37
36
|
timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
|
|
@@ -70,7 +69,7 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
70
69
|
|
|
71
70
|
model = model or settings.OPENAI_MODEL_NAME
|
|
72
71
|
if model is None:
|
|
73
|
-
model =
|
|
72
|
+
model = DEFAULT_GPT_MODEL
|
|
74
73
|
|
|
75
74
|
cost_per_input_token = (
|
|
76
75
|
cost_per_input_token
|
|
@@ -377,7 +376,9 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
377
376
|
# Utilities #
|
|
378
377
|
#############
|
|
379
378
|
|
|
380
|
-
def calculate_cost(
|
|
379
|
+
def calculate_cost(
|
|
380
|
+
self, input_tokens: int, output_tokens: int
|
|
381
|
+
) -> Optional[float]:
|
|
381
382
|
if self.model_data.input_price and self.model_data.output_price:
|
|
382
383
|
input_cost = input_tokens * self.model_data.input_price
|
|
383
384
|
output_cost = output_tokens * self.model_data.output_price
|