deepeval 3.7.9__py3-none-any.whl → 3.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/annotation/annotation.py +2 -2
  3. deepeval/cli/main.py +168 -0
  4. deepeval/confident/api.py +2 -0
  5. deepeval/config/settings.py +13 -0
  6. deepeval/constants.py +1 -0
  7. deepeval/dataset/dataset.py +6 -4
  8. deepeval/integrations/langchain/callback.py +330 -158
  9. deepeval/integrations/langchain/utils.py +31 -8
  10. deepeval/key_handler.py +8 -1
  11. deepeval/metrics/contextual_recall/contextual_recall.py +25 -6
  12. deepeval/metrics/contextual_recall/schema.py +6 -0
  13. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +35 -0
  14. deepeval/metrics/g_eval/g_eval.py +35 -1
  15. deepeval/metrics/g_eval/utils.py +65 -0
  16. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +10 -1
  17. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +10 -1
  18. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +10 -1
  19. deepeval/metrics/utils.py +1 -1
  20. deepeval/models/__init__.py +2 -0
  21. deepeval/models/llms/__init__.py +2 -0
  22. deepeval/models/llms/amazon_bedrock_model.py +51 -6
  23. deepeval/models/llms/azure_model.py +33 -7
  24. deepeval/models/llms/constants.py +23 -0
  25. deepeval/models/llms/gemini_model.py +6 -1
  26. deepeval/models/llms/openai_model.py +5 -4
  27. deepeval/models/llms/openrouter_model.py +398 -0
  28. deepeval/models/retry_policy.py +3 -0
  29. deepeval/prompt/api.py +1 -0
  30. deepeval/prompt/prompt.py +7 -5
  31. deepeval/test_case/llm_test_case.py +1 -0
  32. deepeval/tracing/tracing.py +6 -1
  33. deepeval/tracing/types.py +1 -1
  34. {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/METADATA +3 -3
  35. {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/RECORD +38 -37
  36. {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/LICENSE.md +0 -0
  37. {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/WHEEL +0 -0
  38. {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from openai.types.chat.chat_completion import ChatCompletion
2
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
- from typing import Optional, Tuple, Union, Dict, List
3
+ from typing import Optional, Tuple, Union, Dict, List, Callable, Awaitable
4
4
  from pydantic import BaseModel, SecretStr
5
5
 
6
6
  from deepeval.errors import DeepEvalError
@@ -42,6 +42,10 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
42
42
  model: Optional[str] = None,
43
43
  api_key: Optional[str] = None,
44
44
  base_url: Optional[str] = None,
45
+ azure_ad_token_provider: Optional[
46
+ Callable[[], "str | Awaitable[str]"]
47
+ ] = None,
48
+ azure_ad_token: Optional[str] = None,
45
49
  temperature: Optional[float] = None,
46
50
  cost_per_input_token: Optional[float] = None,
47
51
  cost_per_output_token: Optional[float] = None,
@@ -67,12 +71,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
67
71
  model = model or settings.AZURE_MODEL_NAME
68
72
  deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
69
73
 
74
+ self.azure_ad_token_provider = azure_ad_token_provider
75
+
70
76
  if api_key is not None:
71
77
  # keep it secret, keep it safe from serializings, logging and alike
72
78
  self.api_key: Optional[SecretStr] = SecretStr(api_key)
73
79
  else:
74
80
  self.api_key = settings.AZURE_OPENAI_API_KEY
75
81
 
82
+ if azure_ad_token is not None:
83
+ self.azure_ad_token = azure_ad_token
84
+ else:
85
+ self.azure_ad_token = settings.AZURE_OPENAI_AD_TOKEN
86
+
76
87
  api_version = api_version or settings.OPENAI_API_VERSION
77
88
  if base_url is not None:
78
89
  base_url = str(base_url).rstrip("/")
@@ -431,18 +442,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
431
442
  return kwargs
432
443
 
433
444
  def _build_client(self, cls):
434
- api_key = require_secret_api_key(
435
- self.api_key,
436
- provider_label="AzureOpenAI",
437
- env_var_name="AZURE_OPENAI_API_KEY",
438
- param_hint="`api_key` to AzureOpenAIModel(...)",
439
- )
445
+ # Only require the API key / Azure ad token if no token provider is supplied
446
+ azure_ad_token = None
447
+ api_key = None
448
+
449
+ if self.azure_ad_token_provider is None:
450
+ if self.azure_ad_token is not None:
451
+ azure_ad_token = require_secret_api_key(
452
+ self.azure_ad_token,
453
+ provider_label="AzureOpenAI",
454
+ env_var_name="AZURE_OPENAI_AD_TOKEN",
455
+ param_hint="`azure_ad_token` to AzureOpenAIModel(...)",
456
+ )
457
+ else:
458
+ api_key = require_secret_api_key(
459
+ self.api_key,
460
+ provider_label="AzureOpenAI",
461
+ env_var_name="AZURE_OPENAI_API_KEY",
462
+ param_hint="`api_key` to AzureOpenAIModel(...)",
463
+ )
440
464
 
441
465
  kw = dict(
442
466
  api_key=api_key,
443
467
  api_version=self.api_version,
444
468
  azure_endpoint=self.base_url,
445
469
  azure_deployment=self.deployment_name,
470
+ azure_ad_token_provider=self.azure_ad_token_provider,
471
+ azure_ad_token=azure_ad_token,
446
472
  **self._client_kwargs(),
447
473
  )
448
474
  try:
@@ -3,6 +3,11 @@ from typing import Any, Callable, Union
3
3
  from deepeval.models.base_model import DeepEvalModelData
4
4
 
5
5
 
6
+ DEFAULT_GPT_MODEL = "gpt-4.1"
7
+ # OpenRouter uses provider/model format (e.g., "openai/gpt-4", "anthropic/claude-3-opus")
8
+ # DeepEval does not validate OpenRouter model strings.
9
+ DEFAULT_OPENROUTER_MODEL = f"openai/{DEFAULT_GPT_MODEL}"
10
+
6
11
  ModelDataFactory = Callable[[], DeepEvalModelData]
7
12
  ModelDataValue = Union[DeepEvalModelData, ModelDataFactory]
8
13
 
@@ -366,6 +371,24 @@ OPENAI_MODELS_DATA = ModelDataRegistry(
366
371
  input_price=1.25 / 1e6,
367
372
  output_price=10.00 / 1e6,
368
373
  ),
374
+ "gpt-5.1": make_model_data(
375
+ supports_log_probs=False,
376
+ supports_multimodal=True,
377
+ supports_structured_outputs=True,
378
+ supports_json=False,
379
+ supports_temperature=False,
380
+ input_price=1.25 / 1e6,
381
+ output_price=10.00 / 1e6,
382
+ ),
383
+ "gpt-5.2": make_model_data(
384
+ supports_log_probs=False,
385
+ supports_multimodal=True,
386
+ supports_structured_outputs=True,
387
+ supports_json=False,
388
+ supports_temperature=False,
389
+ input_price=1.75 / 1e6,
390
+ output_price=14.00 / 1e6,
391
+ ),
369
392
  }
370
393
  )
371
394
 
@@ -65,6 +65,7 @@ class GeminiModel(DeepEvalBaseLLM):
65
65
  project: Optional[str] = None,
66
66
  location: Optional[str] = None,
67
67
  service_account_key: Optional[Union[str, Dict[str, str]]] = None,
68
+ use_vertexai: Optional[bool] = None,
68
69
  generation_kwargs: Optional[Dict] = None,
69
70
  **kwargs,
70
71
  ):
@@ -93,7 +94,11 @@ class GeminiModel(DeepEvalBaseLLM):
93
94
  location if location is not None else settings.GOOGLE_CLOUD_LOCATION
94
95
  )
95
96
  self.location = str(location).strip() if location is not None else None
96
- self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
97
+ self.use_vertexai = (
98
+ use_vertexai
99
+ if use_vertexai is not None
100
+ else settings.GOOGLE_GENAI_USE_VERTEXAI
101
+ )
97
102
 
98
103
  self.service_account_key: Optional[SecretStr] = None
99
104
  if service_account_key is None:
@@ -24,14 +24,13 @@ from deepeval.models.retry_policy import (
24
24
  sdk_retries_for,
25
25
  )
26
26
  from deepeval.models.llms.constants import (
27
+ DEFAULT_GPT_MODEL,
27
28
  OPENAI_MODELS_DATA,
28
29
  )
29
30
 
30
31
 
31
32
  retry_openai = create_retry_decorator(PS.OPENAI)
32
33
 
33
- default_gpt_model = "gpt-4.1"
34
-
35
34
 
36
35
  def _request_timeout_seconds() -> float:
37
36
  timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
@@ -70,7 +69,7 @@ class GPTModel(DeepEvalBaseLLM):
70
69
 
71
70
  model = model or settings.OPENAI_MODEL_NAME
72
71
  if model is None:
73
- model = default_gpt_model
72
+ model = DEFAULT_GPT_MODEL
74
73
 
75
74
  cost_per_input_token = (
76
75
  cost_per_input_token
@@ -377,7 +376,9 @@ class GPTModel(DeepEvalBaseLLM):
377
376
  # Utilities #
378
377
  #############
379
378
 
380
- def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
379
+ def calculate_cost(
380
+ self, input_tokens: int, output_tokens: int
381
+ ) -> Optional[float]:
381
382
  if self.model_data.input_price and self.model_data.output_price:
382
383
  input_cost = input_tokens * self.model_data.input_price
383
384
  output_cost = output_tokens * self.model_data.output_price
@@ -0,0 +1,398 @@
1
+ import warnings
2
+ import inspect
3
+
4
+ from typing import Optional, Tuple, Union, Dict, Type
5
+ from pydantic import BaseModel, SecretStr
6
+ from openai.types.chat.chat_completion import ChatCompletion
7
+ from openai import (
8
+ OpenAI,
9
+ AsyncOpenAI,
10
+ )
11
+
12
+ from deepeval.config.settings import get_settings
13
+ from deepeval.constants import ProviderSlug as PS
14
+ from deepeval.errors import DeepEvalError
15
+ from deepeval.models import DeepEvalBaseLLM
16
+ from deepeval.models.llms.constants import DEFAULT_OPENROUTER_MODEL
17
+ from deepeval.models.llms.utils import trim_and_load_json
18
+ from deepeval.models.utils import require_secret_api_key
19
+ from deepeval.models.retry_policy import (
20
+ create_retry_decorator,
21
+ sdk_retries_for,
22
+ )
23
+
24
+
25
+ retry_openrouter = create_retry_decorator(PS.OPENROUTER)
26
+
27
+
28
+ def _request_timeout_seconds() -> float:
29
+ timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
30
+ return timeout if timeout > 0 else 30.0
31
+
32
+
33
+ def _convert_schema_to_openrouter_format(
34
+ schema: Union[Type[BaseModel], BaseModel],
35
+ ) -> Dict:
36
+ """
37
+ Convert Pydantic BaseModel to OpenRouter's JSON Schema format.
38
+
39
+ OpenRouter expects:
40
+ {
41
+ "type": "json_schema",
42
+ "json_schema": {
43
+ "name": "schema_name",
44
+ "strict": true,
45
+ "schema": { ... JSON Schema ... }
46
+ }
47
+ }
48
+ """
49
+ json_schema = schema.model_json_schema()
50
+ schema_name = (
51
+ schema.__name__
52
+ if inspect.isclass(schema)
53
+ else schema.__class__.__name__
54
+ )
55
+
56
+ # OpenRouter requires additionalProperties: false when strict: true
57
+ # Ensure it's set at the root level of the schema
58
+ if "additionalProperties" not in json_schema:
59
+ json_schema["additionalProperties"] = False
60
+
61
+ return {
62
+ "type": "json_schema",
63
+ "json_schema": {
64
+ "name": schema_name,
65
+ "strict": True,
66
+ "schema": json_schema,
67
+ },
68
+ }
69
+
70
+
71
+ class OpenRouterModel(DeepEvalBaseLLM):
72
+ def __init__(
73
+ self,
74
+ model: Optional[str] = None,
75
+ api_key: Optional[str] = None,
76
+ base_url: Optional[str] = None,
77
+ temperature: Optional[float] = None,
78
+ cost_per_input_token: Optional[float] = None,
79
+ cost_per_output_token: Optional[float] = None,
80
+ generation_kwargs: Optional[Dict] = None,
81
+ **kwargs,
82
+ ):
83
+ settings = get_settings()
84
+ model = model or settings.OPENROUTER_MODEL_NAME
85
+ if model is None:
86
+ model = DEFAULT_OPENROUTER_MODEL
87
+
88
+ if api_key is not None:
89
+ # keep it secret, keep it safe from serializings, logging and alike
90
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
91
+ else:
92
+ self.api_key = settings.OPENROUTER_API_KEY
93
+
94
+ if base_url is not None:
95
+ base_url = str(base_url).rstrip("/")
96
+ elif settings.OPENROUTER_BASE_URL is not None:
97
+ base_url = str(settings.OPENROUTER_BASE_URL).rstrip("/")
98
+ else:
99
+ base_url = "https://openrouter.ai/api/v1"
100
+
101
+ cost_per_input_token = (
102
+ cost_per_input_token
103
+ if cost_per_input_token is not None
104
+ else settings.OPENROUTER_COST_PER_INPUT_TOKEN
105
+ )
106
+ cost_per_output_token = (
107
+ cost_per_output_token
108
+ if cost_per_output_token is not None
109
+ else settings.OPENROUTER_COST_PER_OUTPUT_TOKEN
110
+ )
111
+
112
+ if temperature is not None:
113
+ temperature = float(temperature)
114
+ elif settings.TEMPERATURE is not None:
115
+ temperature = settings.TEMPERATURE
116
+ else:
117
+ temperature = 0.0
118
+
119
+ # validation
120
+ if temperature < 0:
121
+ raise DeepEvalError("Temperature must be >= 0.")
122
+
123
+ self.base_url = base_url
124
+ self.cost_per_input_token = cost_per_input_token
125
+ self.cost_per_output_token = cost_per_output_token
126
+ self.temperature = temperature
127
+
128
+ self.kwargs = dict(kwargs)
129
+ self.kwargs.pop("temperature", None)
130
+
131
+ self.generation_kwargs = dict(generation_kwargs or {})
132
+ self.generation_kwargs.pop("temperature", None)
133
+
134
+ super().__init__(model)
135
+
136
+ ###############################################
137
+ # Generate functions
138
+ ###############################################
139
+
140
+ async def _generate_with_client(
141
+ self,
142
+ client: AsyncOpenAI,
143
+ prompt: str,
144
+ schema: Optional[BaseModel] = None,
145
+ ) -> Tuple[Union[str, Dict], float]:
146
+ """
147
+ Core generation logic shared between generate() and a_generate().
148
+
149
+ Args:
150
+ client: AsyncOpenAI client
151
+ prompt: The prompt to send
152
+ schema: Optional Pydantic schema for structured outputs
153
+
154
+ Returns:
155
+ Tuple of (output, cost)
156
+ """
157
+ if schema:
158
+ # Try OpenRouter's native JSON Schema format
159
+ try:
160
+ openrouter_response_format = (
161
+ _convert_schema_to_openrouter_format(schema)
162
+ )
163
+ completion = await client.chat.completions.create(
164
+ model=self.name,
165
+ messages=[{"role": "user", "content": prompt}],
166
+ response_format=openrouter_response_format,
167
+ temperature=self.temperature,
168
+ **self.generation_kwargs,
169
+ )
170
+
171
+ # Parse the JSON response and validate against schema
172
+ json_output = trim_and_load_json(
173
+ completion.choices[0].message.content
174
+ )
175
+ cost = self.calculate_cost(
176
+ completion.usage.prompt_tokens,
177
+ completion.usage.completion_tokens,
178
+ response=completion,
179
+ )
180
+ return schema.model_validate(json_output), cost
181
+ except Exception as e:
182
+ # Warn if structured outputs fail
183
+ warnings.warn(
184
+ f"Structured outputs not supported for model '{self.name}'. "
185
+ f"Falling back to regular generation with JSON parsing. "
186
+ f"Error: {str(e)}",
187
+ UserWarning,
188
+ stacklevel=3,
189
+ )
190
+ # Fall back to regular generation and parse JSON manually (like Bedrock)
191
+ # This works with any model that can generate JSON in text
192
+ pass
193
+
194
+ # Regular generation (or fallback if structured outputs failed)
195
+ completion = await client.chat.completions.create(
196
+ model=self.name,
197
+ messages=[{"role": "user", "content": prompt}],
198
+ temperature=self.temperature,
199
+ **self.generation_kwargs,
200
+ )
201
+
202
+ output = completion.choices[0].message.content
203
+ cost = self.calculate_cost(
204
+ completion.usage.prompt_tokens,
205
+ completion.usage.completion_tokens,
206
+ response=completion,
207
+ )
208
+ if schema:
209
+ # Parse JSON from text and validate against schema (like Bedrock)
210
+ json_output = trim_and_load_json(output)
211
+ return schema.model_validate(json_output), cost
212
+ else:
213
+ return output, cost
214
+
215
+ @retry_openrouter
216
+ def generate(
217
+ self, prompt: str, schema: Optional[BaseModel] = None
218
+ ) -> Tuple[Union[str, Dict], float]:
219
+ from deepeval.models.llms.utils import safe_asyncio_run
220
+
221
+ client = self.load_model(async_mode=True)
222
+ return safe_asyncio_run(
223
+ self._generate_with_client(client, prompt, schema)
224
+ )
225
+
226
+ @retry_openrouter
227
+ async def a_generate(
228
+ self, prompt: str, schema: Optional[BaseModel] = None
229
+ ) -> Tuple[Union[str, BaseModel], float]:
230
+ client = self.load_model(async_mode=True)
231
+ return await self._generate_with_client(client, prompt, schema)
232
+
233
+ ###############################################
234
+ # Other generate functions
235
+ ###############################################
236
+
237
+ @retry_openrouter
238
+ def generate_raw_response(
239
+ self,
240
+ prompt: str,
241
+ top_logprobs: int = 5,
242
+ ) -> Tuple[ChatCompletion, float]:
243
+ # Generate completion
244
+ client = self.load_model(async_mode=False)
245
+ completion = client.chat.completions.create(
246
+ model=self.name,
247
+ messages=[{"role": "user", "content": prompt}],
248
+ temperature=self.temperature,
249
+ logprobs=True,
250
+ top_logprobs=top_logprobs,
251
+ **self.generation_kwargs,
252
+ )
253
+ # Cost calculation
254
+ input_tokens = completion.usage.prompt_tokens
255
+ output_tokens = completion.usage.completion_tokens
256
+ cost = self.calculate_cost(
257
+ input_tokens, output_tokens, response=completion
258
+ )
259
+
260
+ return completion, cost
261
+
262
+ @retry_openrouter
263
+ async def a_generate_raw_response(
264
+ self,
265
+ prompt: str,
266
+ top_logprobs: int = 5,
267
+ ) -> Tuple[ChatCompletion, float]:
268
+ # Generate completion
269
+ client = self.load_model(async_mode=True)
270
+ completion = await client.chat.completions.create(
271
+ model=self.name,
272
+ messages=[{"role": "user", "content": prompt}],
273
+ temperature=self.temperature,
274
+ logprobs=True,
275
+ top_logprobs=top_logprobs,
276
+ **self.generation_kwargs,
277
+ )
278
+ # Cost calculation
279
+ input_tokens = completion.usage.prompt_tokens
280
+ output_tokens = completion.usage.completion_tokens
281
+ cost = self.calculate_cost(
282
+ input_tokens, output_tokens, response=completion
283
+ )
284
+
285
+ return completion, cost
286
+
287
+ @retry_openrouter
288
+ def generate_samples(
289
+ self, prompt: str, n: int, temperature: float
290
+ ) -> Tuple[list[str], float]:
291
+ client = self.load_model(async_mode=False)
292
+ response = client.chat.completions.create(
293
+ model=self.name,
294
+ messages=[{"role": "user", "content": prompt}],
295
+ n=n,
296
+ temperature=temperature,
297
+ **self.generation_kwargs,
298
+ )
299
+ completions = [choice.message.content for choice in response.choices]
300
+ cost = self.calculate_cost(
301
+ response.usage.prompt_tokens,
302
+ response.usage.completion_tokens,
303
+ response=response,
304
+ )
305
+ return completions, cost
306
+
307
+ ###############################################
308
+ # Utilities
309
+ ###############################################
310
+
311
+ def calculate_cost(
312
+ self, input_tokens: int, output_tokens: int, response=None
313
+ ) -> Optional[float]:
314
+ """
315
+ Calculate cost with priority:
316
+ 1. User-provided pricing (highest priority)
317
+ 2. Try to extract from API response (if OpenRouter includes pricing)
318
+ 3. Return None if cost cannot be determined
319
+ """
320
+ # Priority 1: User-provided pricing
321
+ if (
322
+ self.cost_per_input_token is not None
323
+ and self.cost_per_output_token is not None
324
+ ):
325
+ return (
326
+ input_tokens * self.cost_per_input_token
327
+ + output_tokens * self.cost_per_output_token
328
+ )
329
+
330
+ # Priority 2: Try to extract from API response (if OpenRouter includes pricing)
331
+ # Note: OpenRouter may include pricing in response metadata
332
+ if response is not None:
333
+ # Check if response has cost information
334
+ usage_cost = getattr(getattr(response, "usage", None), "cost", None)
335
+ if usage_cost is not None:
336
+ try:
337
+ return float(usage_cost)
338
+ except (ValueError, TypeError):
339
+ pass
340
+ # Some responses might have cost at the top level
341
+ response_cost = getattr(response, "cost", None)
342
+ if response_cost is not None:
343
+ try:
344
+ return float(response_cost)
345
+ except (ValueError, TypeError):
346
+ pass
347
+
348
+ # Priority 3: Return None since cost is unknown
349
+ return None
350
+
351
+ ###############################################
352
+ # Model
353
+ ###############################################
354
+
355
+ def get_model_name(self):
356
+ return f"{self.name} (OpenRouter)"
357
+
358
+ def load_model(self, async_mode: bool = False):
359
+ if not async_mode:
360
+ return self._build_client(OpenAI)
361
+ return self._build_client(AsyncOpenAI)
362
+
363
+ def _client_kwargs(self) -> Dict:
364
+ """
365
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
366
+ If the user opts into SDK retries for 'openrouter' via DEEPEVAL_SDK_RETRY_PROVIDERS,
367
+ leave their retry settings as is.
368
+ """
369
+ kwargs = dict(self.kwargs or {})
370
+ if not sdk_retries_for(PS.OPENROUTER):
371
+ kwargs["max_retries"] = 0
372
+
373
+ if not kwargs.get("timeout"):
374
+ kwargs["timeout"] = _request_timeout_seconds()
375
+
376
+ return kwargs
377
+
378
+ def _build_client(self, cls):
379
+ api_key = require_secret_api_key(
380
+ self.api_key,
381
+ provider_label="OpenRouter",
382
+ env_var_name="OPENROUTER_API_KEY",
383
+ param_hint="`api_key` to OpenRouterModel(...)",
384
+ )
385
+
386
+ kw = dict(
387
+ api_key=api_key,
388
+ base_url=self.base_url,
389
+ **self._client_kwargs(),
390
+ )
391
+ try:
392
+ return cls(**kw)
393
+ except TypeError as e:
394
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
395
+ if "max_retries" in str(e):
396
+ kw.pop("max_retries", None)
397
+ return cls(**kw)
398
+ raise
@@ -772,6 +772,7 @@ AZURE_OPENAI_ERROR_POLICY = OPENAI_ERROR_POLICY
772
772
  DEEPSEEK_ERROR_POLICY = OPENAI_ERROR_POLICY
773
773
  KIMI_ERROR_POLICY = OPENAI_ERROR_POLICY
774
774
  LOCAL_ERROR_POLICY = OPENAI_ERROR_POLICY
775
+ OPENROUTER_ERROR_POLICY = OPENAI_ERROR_POLICY
775
776
 
776
777
  ######################
777
778
  # AWS Bedrock Policy #
@@ -998,6 +999,7 @@ _POLICY_BY_SLUG: dict[str, Optional[ErrorPolicy]] = {
998
999
  PS.LITELLM.value: LITELLM_ERROR_POLICY,
999
1000
  PS.LOCAL.value: LOCAL_ERROR_POLICY,
1000
1001
  PS.OLLAMA.value: OLLAMA_ERROR_POLICY,
1002
+ PS.OPENROUTER.value: OPENROUTER_ERROR_POLICY,
1001
1003
  }
1002
1004
 
1003
1005
 
@@ -1019,6 +1021,7 @@ _STATIC_PRED_BY_SLUG: dict[str, Optional[Callable[[Exception], bool]]] = {
1019
1021
  PS.LITELLM.value: _opt_pred(LITELLM_ERROR_POLICY),
1020
1022
  PS.LOCAL.value: _opt_pred(LOCAL_ERROR_POLICY),
1021
1023
  PS.OLLAMA.value: _opt_pred(OLLAMA_ERROR_POLICY),
1024
+ PS.OPENROUTER.value: _opt_pred(OPENROUTER_ERROR_POLICY),
1022
1025
  }
1023
1026
 
1024
1027
 
deepeval/prompt/api.py CHANGED
@@ -30,6 +30,7 @@ class ModelProvider(Enum):
30
30
  X_AI = "X_AI"
31
31
  DEEPSEEK = "DEEPSEEK"
32
32
  BEDROCK = "BEDROCK"
33
+ OPENROUTER = "OPENROUTER"
33
34
 
34
35
 
35
36
  class ModelSettings(BaseModel):
deepeval/prompt/prompt.py CHANGED
@@ -114,6 +114,7 @@ class Prompt:
114
114
  output_type: Optional[OutputType] = None,
115
115
  output_schema: Optional[Type[BaseModel]] = None,
116
116
  interpolation_type: Optional[PromptInterpolationType] = None,
117
+ confident_api_key: Optional[str] = None,
117
118
  ):
118
119
  if text_template and messages_template:
119
120
  raise TypeError(
@@ -129,6 +130,7 @@ class Prompt:
129
130
  self.interpolation_type: PromptInterpolationType = (
130
131
  interpolation_type or PromptInterpolationType.FSTRING
131
132
  )
133
+ self.confident_api_key = confident_api_key
132
134
 
133
135
  self._version = None
134
136
  self._prompt_version_id: Optional[str] = None
@@ -244,7 +246,7 @@ class Prompt:
244
246
  raise ValueError(
245
247
  "Prompt alias is not set. Please set an alias to continue."
246
248
  )
247
- api = Api()
249
+ api = Api(api_key=self.confident_api_key)
248
250
  data, _ = api.send_request(
249
251
  method=HttpMethods.GET,
250
252
  endpoint=Endpoints.PROMPTS_VERSIONS_ENDPOINT,
@@ -496,7 +498,7 @@ class Prompt:
496
498
  except Exception:
497
499
  pass
498
500
 
499
- api = Api()
501
+ api = Api(api_key=self.confident_api_key)
500
502
  with Progress(
501
503
  SpinnerColumn(style="rgb(106,0,255)"),
502
504
  BarColumn(bar_width=60),
@@ -635,7 +637,7 @@ class Prompt:
635
637
  # Pydantic version below 2.0
636
638
  body = body.dict(by_alias=True, exclude_none=True)
637
639
 
638
- api = Api()
640
+ api = Api(api_key=self.confident_api_key)
639
641
  _, link = api.send_request(
640
642
  method=HttpMethods.POST,
641
643
  endpoint=Endpoints.PROMPTS_ENDPOINT,
@@ -692,7 +694,7 @@ class Prompt:
692
694
  )
693
695
  except AttributeError:
694
696
  body = body.dict(by_alias=True, exclude_none=True)
695
- api = Api()
697
+ api = Api(api_key=self.confident_api_key)
696
698
  data, _ = api.send_request(
697
699
  method=HttpMethods.PUT,
698
700
  endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
@@ -765,7 +767,7 @@ class Prompt:
765
767
  while True:
766
768
  await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
767
769
 
768
- api = Api()
770
+ api = Api(api_key=self.confident_api_key)
769
771
  try:
770
772
  if label:
771
773
  data, _ = api.send_request(
@@ -386,6 +386,7 @@ class LLMTestCase(BaseModel):
386
386
  [
387
387
  re.search(pattern, self.input or "") is not None,
388
388
  re.search(pattern, self.actual_output or "") is not None,
389
+ re.search(pattern, self.expected_output or "") is not None,
389
390
  ]
390
391
  )
391
392
  if isinstance(self.input, str)
@@ -847,7 +847,12 @@ class Observer:
847
847
  self.trace_uuid = parent_span.trace_uuid
848
848
  else:
849
849
  current_trace = current_trace_context.get()
850
- if current_trace:
850
+ # IMPORTANT: Verify trace is still active, not just in context
851
+ # (a previous failed async operation might leave a dead trace in context)
852
+ if (
853
+ current_trace
854
+ and current_trace.uuid in trace_manager.active_traces
855
+ ):
851
856
  self.trace_uuid = current_trace.uuid
852
857
  else:
853
858
  trace = trace_manager.start_new_trace(