deepeval 3.7.8__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,398 @@
1
+ import warnings
2
+ import inspect
3
+
4
+ from typing import Optional, Tuple, Union, Dict, Type
5
+ from pydantic import BaseModel, SecretStr
6
+ from openai.types.chat.chat_completion import ChatCompletion
7
+ from openai import (
8
+ OpenAI,
9
+ AsyncOpenAI,
10
+ )
11
+
12
+ from deepeval.config.settings import get_settings
13
+ from deepeval.constants import ProviderSlug as PS
14
+ from deepeval.errors import DeepEvalError
15
+ from deepeval.models import DeepEvalBaseLLM
16
+ from deepeval.models.llms.constants import DEFAULT_OPENROUTER_MODEL
17
+ from deepeval.models.llms.utils import trim_and_load_json
18
+ from deepeval.models.utils import require_secret_api_key
19
+ from deepeval.models.retry_policy import (
20
+ create_retry_decorator,
21
+ sdk_retries_for,
22
+ )
23
+
24
+
25
+ retry_openrouter = create_retry_decorator(PS.OPENROUTER)
26
+
27
+
28
+ def _request_timeout_seconds() -> float:
29
+ timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
30
+ return timeout if timeout > 0 else 30.0
31
+
32
+
33
+ def _convert_schema_to_openrouter_format(
34
+ schema: Union[Type[BaseModel], BaseModel],
35
+ ) -> Dict:
36
+ """
37
+ Convert Pydantic BaseModel to OpenRouter's JSON Schema format.
38
+
39
+ OpenRouter expects:
40
+ {
41
+ "type": "json_schema",
42
+ "json_schema": {
43
+ "name": "schema_name",
44
+ "strict": true,
45
+ "schema": { ... JSON Schema ... }
46
+ }
47
+ }
48
+ """
49
+ json_schema = schema.model_json_schema()
50
+ schema_name = (
51
+ schema.__name__
52
+ if inspect.isclass(schema)
53
+ else schema.__class__.__name__
54
+ )
55
+
56
+ # OpenRouter requires additionalProperties: false when strict: true
57
+ # Ensure it's set at the root level of the schema
58
+ if "additionalProperties" not in json_schema:
59
+ json_schema["additionalProperties"] = False
60
+
61
+ return {
62
+ "type": "json_schema",
63
+ "json_schema": {
64
+ "name": schema_name,
65
+ "strict": True,
66
+ "schema": json_schema,
67
+ },
68
+ }
69
+
70
+
71
+ class OpenRouterModel(DeepEvalBaseLLM):
72
+ def __init__(
73
+ self,
74
+ model: Optional[str] = None,
75
+ api_key: Optional[str] = None,
76
+ base_url: Optional[str] = None,
77
+ temperature: Optional[float] = None,
78
+ cost_per_input_token: Optional[float] = None,
79
+ cost_per_output_token: Optional[float] = None,
80
+ generation_kwargs: Optional[Dict] = None,
81
+ **kwargs,
82
+ ):
83
+ settings = get_settings()
84
+ model = model or settings.OPENROUTER_MODEL_NAME
85
+ if model is None:
86
+ model = DEFAULT_OPENROUTER_MODEL
87
+
88
+ if api_key is not None:
89
+ # keep it secret, keep it safe from serializings, logging and alike
90
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
91
+ else:
92
+ self.api_key = settings.OPENROUTER_API_KEY
93
+
94
+ if base_url is not None:
95
+ base_url = str(base_url).rstrip("/")
96
+ elif settings.OPENROUTER_BASE_URL is not None:
97
+ base_url = str(settings.OPENROUTER_BASE_URL).rstrip("/")
98
+ else:
99
+ base_url = "https://openrouter.ai/api/v1"
100
+
101
+ cost_per_input_token = (
102
+ cost_per_input_token
103
+ if cost_per_input_token is not None
104
+ else settings.OPENROUTER_COST_PER_INPUT_TOKEN
105
+ )
106
+ cost_per_output_token = (
107
+ cost_per_output_token
108
+ if cost_per_output_token is not None
109
+ else settings.OPENROUTER_COST_PER_OUTPUT_TOKEN
110
+ )
111
+
112
+ if temperature is not None:
113
+ temperature = float(temperature)
114
+ elif settings.TEMPERATURE is not None:
115
+ temperature = settings.TEMPERATURE
116
+ else:
117
+ temperature = 0.0
118
+
119
+ # validation
120
+ if temperature < 0:
121
+ raise DeepEvalError("Temperature must be >= 0.")
122
+
123
+ self.base_url = base_url
124
+ self.cost_per_input_token = cost_per_input_token
125
+ self.cost_per_output_token = cost_per_output_token
126
+ self.temperature = temperature
127
+
128
+ self.kwargs = dict(kwargs)
129
+ self.kwargs.pop("temperature", None)
130
+
131
+ self.generation_kwargs = dict(generation_kwargs or {})
132
+ self.generation_kwargs.pop("temperature", None)
133
+
134
+ super().__init__(model)
135
+
136
+ ###############################################
137
+ # Generate functions
138
+ ###############################################
139
+
140
+ async def _generate_with_client(
141
+ self,
142
+ client: AsyncOpenAI,
143
+ prompt: str,
144
+ schema: Optional[BaseModel] = None,
145
+ ) -> Tuple[Union[str, Dict], float]:
146
+ """
147
+ Core generation logic shared between generate() and a_generate().
148
+
149
+ Args:
150
+ client: AsyncOpenAI client
151
+ prompt: The prompt to send
152
+ schema: Optional Pydantic schema for structured outputs
153
+
154
+ Returns:
155
+ Tuple of (output, cost)
156
+ """
157
+ if schema:
158
+ # Try OpenRouter's native JSON Schema format
159
+ try:
160
+ openrouter_response_format = (
161
+ _convert_schema_to_openrouter_format(schema)
162
+ )
163
+ completion = await client.chat.completions.create(
164
+ model=self.name,
165
+ messages=[{"role": "user", "content": prompt}],
166
+ response_format=openrouter_response_format,
167
+ temperature=self.temperature,
168
+ **self.generation_kwargs,
169
+ )
170
+
171
+ # Parse the JSON response and validate against schema
172
+ json_output = trim_and_load_json(
173
+ completion.choices[0].message.content
174
+ )
175
+ cost = self.calculate_cost(
176
+ completion.usage.prompt_tokens,
177
+ completion.usage.completion_tokens,
178
+ response=completion,
179
+ )
180
+ return schema.model_validate(json_output), cost
181
+ except Exception as e:
182
+ # Warn if structured outputs fail
183
+ warnings.warn(
184
+ f"Structured outputs not supported for model '{self.name}'. "
185
+ f"Falling back to regular generation with JSON parsing. "
186
+ f"Error: {str(e)}",
187
+ UserWarning,
188
+ stacklevel=3,
189
+ )
190
+ # Fall back to regular generation and parse JSON manually (like Bedrock)
191
+ # This works with any model that can generate JSON in text
192
+ pass
193
+
194
+ # Regular generation (or fallback if structured outputs failed)
195
+ completion = await client.chat.completions.create(
196
+ model=self.name,
197
+ messages=[{"role": "user", "content": prompt}],
198
+ temperature=self.temperature,
199
+ **self.generation_kwargs,
200
+ )
201
+
202
+ output = completion.choices[0].message.content
203
+ cost = self.calculate_cost(
204
+ completion.usage.prompt_tokens,
205
+ completion.usage.completion_tokens,
206
+ response=completion,
207
+ )
208
+ if schema:
209
+ # Parse JSON from text and validate against schema (like Bedrock)
210
+ json_output = trim_and_load_json(output)
211
+ return schema.model_validate(json_output), cost
212
+ else:
213
+ return output, cost
214
+
215
+ @retry_openrouter
216
+ def generate(
217
+ self, prompt: str, schema: Optional[BaseModel] = None
218
+ ) -> Tuple[Union[str, Dict], float]:
219
+ from deepeval.models.llms.utils import safe_asyncio_run
220
+
221
+ client = self.load_model(async_mode=True)
222
+ return safe_asyncio_run(
223
+ self._generate_with_client(client, prompt, schema)
224
+ )
225
+
226
+ @retry_openrouter
227
+ async def a_generate(
228
+ self, prompt: str, schema: Optional[BaseModel] = None
229
+ ) -> Tuple[Union[str, BaseModel], float]:
230
+ client = self.load_model(async_mode=True)
231
+ return await self._generate_with_client(client, prompt, schema)
232
+
233
+ ###############################################
234
+ # Other generate functions
235
+ ###############################################
236
+
237
+ @retry_openrouter
238
+ def generate_raw_response(
239
+ self,
240
+ prompt: str,
241
+ top_logprobs: int = 5,
242
+ ) -> Tuple[ChatCompletion, float]:
243
+ # Generate completion
244
+ client = self.load_model(async_mode=False)
245
+ completion = client.chat.completions.create(
246
+ model=self.name,
247
+ messages=[{"role": "user", "content": prompt}],
248
+ temperature=self.temperature,
249
+ logprobs=True,
250
+ top_logprobs=top_logprobs,
251
+ **self.generation_kwargs,
252
+ )
253
+ # Cost calculation
254
+ input_tokens = completion.usage.prompt_tokens
255
+ output_tokens = completion.usage.completion_tokens
256
+ cost = self.calculate_cost(
257
+ input_tokens, output_tokens, response=completion
258
+ )
259
+
260
+ return completion, cost
261
+
262
+ @retry_openrouter
263
+ async def a_generate_raw_response(
264
+ self,
265
+ prompt: str,
266
+ top_logprobs: int = 5,
267
+ ) -> Tuple[ChatCompletion, float]:
268
+ # Generate completion
269
+ client = self.load_model(async_mode=True)
270
+ completion = await client.chat.completions.create(
271
+ model=self.name,
272
+ messages=[{"role": "user", "content": prompt}],
273
+ temperature=self.temperature,
274
+ logprobs=True,
275
+ top_logprobs=top_logprobs,
276
+ **self.generation_kwargs,
277
+ )
278
+ # Cost calculation
279
+ input_tokens = completion.usage.prompt_tokens
280
+ output_tokens = completion.usage.completion_tokens
281
+ cost = self.calculate_cost(
282
+ input_tokens, output_tokens, response=completion
283
+ )
284
+
285
+ return completion, cost
286
+
287
+ @retry_openrouter
288
+ def generate_samples(
289
+ self, prompt: str, n: int, temperature: float
290
+ ) -> Tuple[list[str], float]:
291
+ client = self.load_model(async_mode=False)
292
+ response = client.chat.completions.create(
293
+ model=self.name,
294
+ messages=[{"role": "user", "content": prompt}],
295
+ n=n,
296
+ temperature=temperature,
297
+ **self.generation_kwargs,
298
+ )
299
+ completions = [choice.message.content for choice in response.choices]
300
+ cost = self.calculate_cost(
301
+ response.usage.prompt_tokens,
302
+ response.usage.completion_tokens,
303
+ response=response,
304
+ )
305
+ return completions, cost
306
+
307
+ ###############################################
308
+ # Utilities
309
+ ###############################################
310
+
311
+ def calculate_cost(
312
+ self, input_tokens: int, output_tokens: int, response=None
313
+ ) -> Optional[float]:
314
+ """
315
+ Calculate cost with priority:
316
+ 1. User-provided pricing (highest priority)
317
+ 2. Try to extract from API response (if OpenRouter includes pricing)
318
+ 3. Return None if cost cannot be determined
319
+ """
320
+ # Priority 1: User-provided pricing
321
+ if (
322
+ self.cost_per_input_token is not None
323
+ and self.cost_per_output_token is not None
324
+ ):
325
+ return (
326
+ input_tokens * self.cost_per_input_token
327
+ + output_tokens * self.cost_per_output_token
328
+ )
329
+
330
+ # Priority 2: Try to extract from API response (if OpenRouter includes pricing)
331
+ # Note: OpenRouter may include pricing in response metadata
332
+ if response is not None:
333
+ # Check if response has cost information
334
+ usage_cost = getattr(getattr(response, "usage", None), "cost", None)
335
+ if usage_cost is not None:
336
+ try:
337
+ return float(usage_cost)
338
+ except (ValueError, TypeError):
339
+ pass
340
+ # Some responses might have cost at the top level
341
+ response_cost = getattr(response, "cost", None)
342
+ if response_cost is not None:
343
+ try:
344
+ return float(response_cost)
345
+ except (ValueError, TypeError):
346
+ pass
347
+
348
+ # Priority 3: Return None since cost is unknown
349
+ return None
350
+
351
+ ###############################################
352
+ # Model
353
+ ###############################################
354
+
355
+ def get_model_name(self):
356
+ return f"{self.name} (OpenRouter)"
357
+
358
+ def load_model(self, async_mode: bool = False):
359
+ if not async_mode:
360
+ return self._build_client(OpenAI)
361
+ return self._build_client(AsyncOpenAI)
362
+
363
+ def _client_kwargs(self) -> Dict:
364
+ """
365
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
366
+ If the user opts into SDK retries for 'openrouter' via DEEPEVAL_SDK_RETRY_PROVIDERS,
367
+ leave their retry settings as is.
368
+ """
369
+ kwargs = dict(self.kwargs or {})
370
+ if not sdk_retries_for(PS.OPENROUTER):
371
+ kwargs["max_retries"] = 0
372
+
373
+ if not kwargs.get("timeout"):
374
+ kwargs["timeout"] = _request_timeout_seconds()
375
+
376
+ return kwargs
377
+
378
+ def _build_client(self, cls):
379
+ api_key = require_secret_api_key(
380
+ self.api_key,
381
+ provider_label="OpenRouter",
382
+ env_var_name="OPENROUTER_API_KEY",
383
+ param_hint="`api_key` to OpenRouterModel(...)",
384
+ )
385
+
386
+ kw = dict(
387
+ api_key=api_key,
388
+ base_url=self.base_url,
389
+ **self._client_kwargs(),
390
+ )
391
+ try:
392
+ return cls(**kw)
393
+ except TypeError as e:
394
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
395
+ if "max_retries" in str(e):
396
+ kw.pop("max_retries", None)
397
+ return cls(**kw)
398
+ raise
@@ -772,6 +772,7 @@ AZURE_OPENAI_ERROR_POLICY = OPENAI_ERROR_POLICY
772
772
  DEEPSEEK_ERROR_POLICY = OPENAI_ERROR_POLICY
773
773
  KIMI_ERROR_POLICY = OPENAI_ERROR_POLICY
774
774
  LOCAL_ERROR_POLICY = OPENAI_ERROR_POLICY
775
+ OPENROUTER_ERROR_POLICY = OPENAI_ERROR_POLICY
775
776
 
776
777
  ######################
777
778
  # AWS Bedrock Policy #
@@ -998,6 +999,7 @@ _POLICY_BY_SLUG: dict[str, Optional[ErrorPolicy]] = {
998
999
  PS.LITELLM.value: LITELLM_ERROR_POLICY,
999
1000
  PS.LOCAL.value: LOCAL_ERROR_POLICY,
1000
1001
  PS.OLLAMA.value: OLLAMA_ERROR_POLICY,
1002
+ PS.OPENROUTER.value: OPENROUTER_ERROR_POLICY,
1001
1003
  }
1002
1004
 
1003
1005
 
@@ -1019,6 +1021,7 @@ _STATIC_PRED_BY_SLUG: dict[str, Optional[Callable[[Exception], bool]]] = {
1019
1021
  PS.LITELLM.value: _opt_pred(LITELLM_ERROR_POLICY),
1020
1022
  PS.LOCAL.value: _opt_pred(LOCAL_ERROR_POLICY),
1021
1023
  PS.OLLAMA.value: _opt_pred(OLLAMA_ERROR_POLICY),
1024
+ PS.OPENROUTER.value: _opt_pred(OPENROUTER_ERROR_POLICY),
1022
1025
  }
1023
1026
 
1024
1027
 
deepeval/prompt/api.py CHANGED
@@ -30,6 +30,7 @@ class ModelProvider(Enum):
30
30
  X_AI = "X_AI"
31
31
  DEEPSEEK = "DEEPSEEK"
32
32
  BEDROCK = "BEDROCK"
33
+ OPENROUTER = "OPENROUTER"
33
34
 
34
35
 
35
36
  class ModelSettings(BaseModel):