deepeval 3.6.6__py3-none-any.whl → 3.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/equity_med_qa/equity_med_qa.py +1 -0
- deepeval/cli/main.py +42 -0
- deepeval/confident/api.py +1 -0
- deepeval/config/settings.py +22 -4
- deepeval/constants.py +8 -1
- deepeval/dataset/dataset.py +2 -11
- deepeval/dataset/utils.py +1 -1
- deepeval/errors.py +20 -2
- deepeval/evaluate/evaluate.py +5 -1
- deepeval/evaluate/execute.py +811 -248
- deepeval/evaluate/types.py +1 -0
- deepeval/evaluate/utils.py +33 -119
- deepeval/integrations/crewai/__init__.py +7 -1
- deepeval/integrations/crewai/handler.py +1 -1
- deepeval/integrations/crewai/subs.py +51 -0
- deepeval/integrations/crewai/tool.py +71 -0
- deepeval/integrations/crewai/wrapper.py +45 -5
- deepeval/integrations/llama_index/__init__.py +0 -4
- deepeval/integrations/llama_index/handler.py +20 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
- deepeval/metrics/__init__.py +13 -0
- deepeval/metrics/answer_relevancy/answer_relevancy.py +12 -3
- deepeval/metrics/api.py +281 -0
- deepeval/metrics/argument_correctness/argument_correctness.py +12 -2
- deepeval/metrics/base_metric.py +1 -0
- deepeval/metrics/bias/bias.py +12 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +39 -24
- deepeval/metrics/contextual_recall/contextual_recall.py +12 -3
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +12 -1
- deepeval/metrics/conversation_completeness/conversation_completeness.py +12 -0
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -0
- deepeval/metrics/conversational_dag/nodes.py +12 -4
- deepeval/metrics/conversational_g_eval/__init__.py +3 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +84 -66
- deepeval/metrics/dag/dag.py +12 -0
- deepeval/metrics/dag/nodes.py +12 -4
- deepeval/metrics/dag/schema.py +1 -1
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/faithfulness/faithfulness.py +12 -1
- deepeval/metrics/g_eval/g_eval.py +11 -0
- deepeval/metrics/goal_accuracy/__init__.py +1 -0
- deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
- deepeval/metrics/goal_accuracy/schema.py +17 -0
- deepeval/metrics/goal_accuracy/template.py +235 -0
- deepeval/metrics/hallucination/hallucination.py +20 -9
- deepeval/metrics/indicator.py +8 -2
- deepeval/metrics/json_correctness/json_correctness.py +12 -1
- deepeval/metrics/knowledge_retention/knowledge_retention.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +20 -2
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +29 -6
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +14 -2
- deepeval/metrics/misuse/misuse.py +12 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +3 -0
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +3 -0
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +6 -1
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +38 -25
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +10 -5
- deepeval/metrics/non_advice/non_advice.py +12 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +12 -1
- deepeval/metrics/plan_adherence/__init__.py +1 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
- deepeval/metrics/plan_adherence/schema.py +11 -0
- deepeval/metrics/plan_adherence/template.py +170 -0
- deepeval/metrics/plan_quality/__init__.py +1 -0
- deepeval/metrics/plan_quality/plan_quality.py +292 -0
- deepeval/metrics/plan_quality/schema.py +11 -0
- deepeval/metrics/plan_quality/template.py +101 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +12 -1
- deepeval/metrics/role_adherence/role_adherence.py +12 -0
- deepeval/metrics/role_violation/role_violation.py +12 -0
- deepeval/metrics/step_efficiency/__init__.py +1 -0
- deepeval/metrics/step_efficiency/schema.py +11 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
- deepeval/metrics/step_efficiency/template.py +256 -0
- deepeval/metrics/summarization/summarization.py +12 -1
- deepeval/metrics/task_completion/task_completion.py +4 -0
- deepeval/metrics/tool_correctness/schema.py +6 -0
- deepeval/metrics/tool_correctness/template.py +88 -0
- deepeval/metrics/tool_correctness/tool_correctness.py +233 -21
- deepeval/metrics/tool_use/__init__.py +1 -0
- deepeval/metrics/tool_use/schema.py +19 -0
- deepeval/metrics/tool_use/template.py +220 -0
- deepeval/metrics/tool_use/tool_use.py +458 -0
- deepeval/metrics/topic_adherence/__init__.py +1 -0
- deepeval/metrics/topic_adherence/schema.py +16 -0
- deepeval/metrics/topic_adherence/template.py +162 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
- deepeval/metrics/toxicity/toxicity.py +12 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +12 -0
- deepeval/models/embedding_models/azure_embedding_model.py +37 -36
- deepeval/models/embedding_models/local_embedding_model.py +30 -32
- deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
- deepeval/models/embedding_models/openai_embedding_model.py +22 -31
- deepeval/models/llms/grok_model.py +1 -1
- deepeval/models/llms/openai_model.py +2 -0
- deepeval/openai/__init__.py +14 -32
- deepeval/openai/extractors.py +85 -50
- deepeval/openai/patch.py +258 -167
- deepeval/openai/types.py +20 -0
- deepeval/openai/utils.py +205 -56
- deepeval/prompt/__init__.py +19 -1
- deepeval/prompt/api.py +160 -0
- deepeval/prompt/prompt.py +245 -62
- deepeval/prompt/utils.py +186 -15
- deepeval/synthesizer/chunking/context_generator.py +209 -152
- deepeval/synthesizer/chunking/doc_chunker.py +46 -12
- deepeval/synthesizer/synthesizer.py +19 -15
- deepeval/test_case/api.py +131 -0
- deepeval/test_case/llm_test_case.py +6 -2
- deepeval/test_run/__init__.py +1 -0
- deepeval/test_run/hyperparameters.py +47 -8
- deepeval/test_run/test_run.py +292 -206
- deepeval/tracing/__init__.py +2 -1
- deepeval/tracing/api.py +3 -1
- deepeval/tracing/otel/exporter.py +3 -4
- deepeval/tracing/otel/utils.py +24 -5
- deepeval/tracing/trace_context.py +89 -5
- deepeval/tracing/tracing.py +74 -3
- deepeval/tracing/types.py +20 -2
- deepeval/tracing/utils.py +8 -0
- deepeval/utils.py +21 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/RECORD +133 -103
- deepeval/integrations/llama_index/agent/patched.py +0 -68
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from openai import OpenAI, AsyncOpenAI
|
|
2
|
-
from typing import Dict, List
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
@@ -15,25 +15,32 @@ retry_local = create_retry_decorator(PS.LOCAL)
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
18
|
-
def __init__(
|
|
19
|
-
self
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
api_key: Optional[str] = None,
|
|
21
|
+
base_url: Optional[str] = None,
|
|
22
|
+
model: Optional[str] = None,
|
|
23
|
+
generation_kwargs: Optional[Dict] = None,
|
|
24
|
+
**client_kwargs,
|
|
25
|
+
):
|
|
26
|
+
self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
|
|
27
|
+
EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
|
|
28
|
+
)
|
|
29
|
+
self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
|
|
20
30
|
EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
|
|
21
31
|
)
|
|
22
|
-
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
32
|
+
self.model_name = model or KEY_FILE_HANDLER.fetch_data(
|
|
23
33
|
EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
|
|
24
34
|
)
|
|
25
|
-
self.
|
|
26
|
-
|
|
27
|
-
)
|
|
28
|
-
self.kwargs = kwargs
|
|
29
|
-
super().__init__(model_name)
|
|
35
|
+
self.client_kwargs = client_kwargs or {}
|
|
36
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
37
|
+
super().__init__(self.model_name)
|
|
30
38
|
|
|
31
39
|
@retry_local
|
|
32
40
|
def embed_text(self, text: str) -> List[float]:
|
|
33
41
|
embedding_model = self.load_model()
|
|
34
42
|
response = embedding_model.embeddings.create(
|
|
35
|
-
model=self.model_name,
|
|
36
|
-
input=[text],
|
|
43
|
+
model=self.model_name, input=[text], **self.generation_kwargs
|
|
37
44
|
)
|
|
38
45
|
return response.data[0].embedding
|
|
39
46
|
|
|
@@ -41,8 +48,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
41
48
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
42
49
|
embedding_model = self.load_model()
|
|
43
50
|
response = embedding_model.embeddings.create(
|
|
44
|
-
model=self.model_name,
|
|
45
|
-
input=texts,
|
|
51
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
46
52
|
)
|
|
47
53
|
return [data.embedding for data in response.data]
|
|
48
54
|
|
|
@@ -50,8 +56,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
56
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
51
57
|
embedding_model = self.load_model(async_mode=True)
|
|
52
58
|
response = await embedding_model.embeddings.create(
|
|
53
|
-
model=self.model_name,
|
|
54
|
-
input=[text],
|
|
59
|
+
model=self.model_name, input=[text], **self.generation_kwargs
|
|
55
60
|
)
|
|
56
61
|
return response.data[0].embedding
|
|
57
62
|
|
|
@@ -59,8 +64,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
59
64
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
60
65
|
embedding_model = self.load_model(async_mode=True)
|
|
61
66
|
response = await embedding_model.embeddings.create(
|
|
62
|
-
model=self.model_name,
|
|
63
|
-
input=texts,
|
|
67
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
64
68
|
)
|
|
65
69
|
return [data.embedding for data in response.data]
|
|
66
70
|
|
|
@@ -76,27 +80,21 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
76
80
|
return self._build_client(OpenAI)
|
|
77
81
|
return self._build_client(AsyncOpenAI)
|
|
78
82
|
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
-
If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
|
|
82
|
-
If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
|
|
83
|
-
"""
|
|
84
|
-
kwargs = dict(self.kwargs or {})
|
|
83
|
+
def _build_client(self, cls):
|
|
84
|
+
client_kwargs = self.client_kwargs.copy()
|
|
85
85
|
if not sdk_retries_for(PS.LOCAL):
|
|
86
|
-
|
|
87
|
-
return kwargs
|
|
86
|
+
client_kwargs["max_retries"] = 0
|
|
88
87
|
|
|
89
|
-
|
|
90
|
-
kw = dict(
|
|
88
|
+
client_init_kwargs = dict(
|
|
91
89
|
api_key=self.api_key,
|
|
92
90
|
base_url=self.base_url,
|
|
93
|
-
**
|
|
91
|
+
**client_kwargs,
|
|
94
92
|
)
|
|
95
93
|
try:
|
|
96
|
-
return cls(**
|
|
94
|
+
return cls(**client_init_kwargs)
|
|
97
95
|
except TypeError as e:
|
|
98
|
-
#
|
|
96
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
99
97
|
if "max_retries" in str(e):
|
|
100
|
-
|
|
101
|
-
return cls(**
|
|
98
|
+
client_init_kwargs.pop("max_retries", None)
|
|
99
|
+
return cls(**client_init_kwargs)
|
|
102
100
|
raise
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ollama import Client, AsyncClient
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, Optional, Dict
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
@@ -13,27 +13,28 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
16
|
-
def __init__(
|
|
17
|
-
self
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
model: Optional[str] = None,
|
|
19
|
+
host: Optional[str] = None,
|
|
20
|
+
generation_kwargs: Optional[Dict] = None,
|
|
21
|
+
**client_kwargs,
|
|
22
|
+
):
|
|
23
|
+
self.host = host or KEY_FILE_HANDLER.fetch_data(
|
|
18
24
|
EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
|
|
19
25
|
)
|
|
20
|
-
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
26
|
+
self.model_name = model or KEY_FILE_HANDLER.fetch_data(
|
|
21
27
|
EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
|
|
22
28
|
)
|
|
23
|
-
|
|
24
|
-
self.
|
|
25
|
-
|
|
26
|
-
)
|
|
27
|
-
self.args = args
|
|
28
|
-
self.kwargs = kwargs
|
|
29
|
-
super().__init__(model_name)
|
|
29
|
+
self.client_kwargs = client_kwargs or {}
|
|
30
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
31
|
+
super().__init__(self.model_name)
|
|
30
32
|
|
|
31
33
|
@retry_ollama
|
|
32
34
|
def embed_text(self, text: str) -> List[float]:
|
|
33
35
|
embedding_model = self.load_model()
|
|
34
36
|
response = embedding_model.embed(
|
|
35
|
-
model=self.model_name,
|
|
36
|
-
input=text,
|
|
37
|
+
model=self.model_name, input=text, **self.generation_kwargs
|
|
37
38
|
)
|
|
38
39
|
return response["embeddings"][0]
|
|
39
40
|
|
|
@@ -41,8 +42,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
41
42
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
42
43
|
embedding_model = self.load_model()
|
|
43
44
|
response = embedding_model.embed(
|
|
44
|
-
model=self.model_name,
|
|
45
|
-
input=texts,
|
|
45
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
46
46
|
)
|
|
47
47
|
return response["embeddings"]
|
|
48
48
|
|
|
@@ -50,8 +50,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
50
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
51
51
|
embedding_model = self.load_model(async_mode=True)
|
|
52
52
|
response = await embedding_model.embed(
|
|
53
|
-
model=self.model_name,
|
|
54
|
-
input=text,
|
|
53
|
+
model=self.model_name, input=text, **self.generation_kwargs
|
|
55
54
|
)
|
|
56
55
|
return response["embeddings"][0]
|
|
57
56
|
|
|
@@ -59,8 +58,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
59
58
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
60
59
|
embedding_model = self.load_model(async_mode=True)
|
|
61
60
|
response = await embedding_model.embed(
|
|
62
|
-
model=self.model_name,
|
|
63
|
-
input=texts,
|
|
61
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
64
62
|
)
|
|
65
63
|
return response["embeddings"]
|
|
66
64
|
|
|
@@ -74,7 +72,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
74
72
|
return self._build_client(AsyncClient)
|
|
75
73
|
|
|
76
74
|
def _build_client(self, cls):
|
|
77
|
-
return cls(host=self.
|
|
75
|
+
return cls(host=self.host, **self.client_kwargs)
|
|
78
76
|
|
|
79
77
|
def get_model_name(self):
|
|
80
78
|
return f"{self.model_name} (Ollama)"
|
|
@@ -19,27 +19,28 @@ default_openai_embedding_model = "text-embedding-3-small"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
22
|
+
|
|
22
23
|
def __init__(
|
|
23
24
|
self,
|
|
24
25
|
model: Optional[str] = None,
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
openai_api_key: Optional[str] = None,
|
|
27
|
+
generation_kwargs: Optional[Dict] = None,
|
|
28
|
+
**client_kwargs,
|
|
27
29
|
):
|
|
28
|
-
|
|
29
|
-
if
|
|
30
|
+
self.openai_api_key = openai_api_key
|
|
31
|
+
self.model_name = model if model else default_openai_embedding_model
|
|
32
|
+
if self.model_name not in valid_openai_embedding_models:
|
|
30
33
|
raise ValueError(
|
|
31
34
|
f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
|
|
32
35
|
)
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
-
self.kwargs = kwargs
|
|
36
|
+
self.client_kwargs = client_kwargs or {}
|
|
37
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
36
38
|
|
|
37
39
|
@retry_openai
|
|
38
40
|
def embed_text(self, text: str) -> List[float]:
|
|
39
41
|
client = self.load_model(async_mode=False)
|
|
40
42
|
response = client.embeddings.create(
|
|
41
|
-
input=text,
|
|
42
|
-
model=self.model_name,
|
|
43
|
+
input=text, model=self.model_name, **self.generation_kwargs
|
|
43
44
|
)
|
|
44
45
|
return response.data[0].embedding
|
|
45
46
|
|
|
@@ -47,8 +48,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
47
48
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
48
49
|
client = self.load_model(async_mode=False)
|
|
49
50
|
response = client.embeddings.create(
|
|
50
|
-
input=texts,
|
|
51
|
-
model=self.model_name,
|
|
51
|
+
input=texts, model=self.model_name, **self.generation_kwargs
|
|
52
52
|
)
|
|
53
53
|
return [item.embedding for item in response.data]
|
|
54
54
|
|
|
@@ -56,8 +56,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
56
56
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
57
57
|
client = self.load_model(async_mode=True)
|
|
58
58
|
response = await client.embeddings.create(
|
|
59
|
-
input=text,
|
|
60
|
-
model=self.model_name,
|
|
59
|
+
input=text, model=self.model_name, **self.generation_kwargs
|
|
61
60
|
)
|
|
62
61
|
return response.data[0].embedding
|
|
63
62
|
|
|
@@ -65,8 +64,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
65
64
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
66
65
|
client = self.load_model(async_mode=True)
|
|
67
66
|
response = await client.embeddings.create(
|
|
68
|
-
input=texts,
|
|
69
|
-
model=self.model_name,
|
|
67
|
+
input=texts, model=self.model_name, **self.generation_kwargs
|
|
70
68
|
)
|
|
71
69
|
return [item.embedding for item in response.data]
|
|
72
70
|
|
|
@@ -82,27 +80,20 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
82
80
|
return self._build_client(OpenAI)
|
|
83
81
|
return self._build_client(AsyncOpenAI)
|
|
84
82
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
88
|
-
If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
89
|
-
leave their retry settings as is.
|
|
90
|
-
"""
|
|
91
|
-
kwargs = dict(self.kwargs or {})
|
|
83
|
+
def _build_client(self, cls):
|
|
84
|
+
client_kwargs = self.client_kwargs.copy()
|
|
92
85
|
if not sdk_retries_for(PS.OPENAI):
|
|
93
|
-
|
|
94
|
-
return kwargs
|
|
86
|
+
client_kwargs["max_retries"] = 0
|
|
95
87
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
**self._client_kwargs(),
|
|
88
|
+
client_init_kwargs = dict(
|
|
89
|
+
api_key=self.openai_api_key,
|
|
90
|
+
**client_kwargs,
|
|
100
91
|
)
|
|
101
92
|
try:
|
|
102
|
-
return cls(**
|
|
93
|
+
return cls(**client_init_kwargs)
|
|
103
94
|
except TypeError as e:
|
|
104
95
|
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
105
96
|
if "max_retries" in str(e):
|
|
106
|
-
|
|
107
|
-
return cls(**
|
|
97
|
+
client_init_kwargs.pop("max_retries", None)
|
|
98
|
+
return cls(**client_init_kwargs)
|
|
108
99
|
raise
|
|
@@ -56,8 +56,8 @@ model_pricing = {
|
|
|
56
56
|
class GrokModel(DeepEvalBaseLLM):
|
|
57
57
|
def __init__(
|
|
58
58
|
self,
|
|
59
|
-
api_key: Optional[str] = None,
|
|
60
59
|
model: Optional[str] = None,
|
|
60
|
+
api_key: Optional[str] = None,
|
|
61
61
|
temperature: float = 0,
|
|
62
62
|
generation_kwargs: Optional[Dict] = None,
|
|
63
63
|
**kwargs,
|
deepeval/openai/__init__.py
CHANGED
|
@@ -1,37 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
try:
|
|
2
|
+
import openai # noqa: F401
|
|
3
|
+
except ImportError:
|
|
4
|
+
raise ModuleNotFoundError(
|
|
5
|
+
"Please install OpenAI to use this feature: 'pip install openai'"
|
|
6
|
+
)
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
loader = SourceFileLoader("deepeval_openai", openai_spec.origin)
|
|
14
|
-
new_spec = importlib.util.spec_from_loader(
|
|
15
|
-
"deepeval_openai",
|
|
16
|
-
loader,
|
|
17
|
-
origin=openai_spec.origin,
|
|
18
|
-
is_package=True,
|
|
19
|
-
)
|
|
20
|
-
deepeval_openai = importlib.util.module_from_spec(new_spec)
|
|
21
|
-
deepeval_openai.__path__ = package_dirs
|
|
22
|
-
sys.modules["deepeval_openai"] = deepeval_openai
|
|
23
|
-
loader.exec_module(deepeval_openai)
|
|
24
|
-
patch_openai(deepeval_openai)
|
|
25
|
-
return deepeval_openai
|
|
9
|
+
try:
|
|
10
|
+
from openai import OpenAI, AsyncOpenAI # noqa: F401
|
|
11
|
+
except ImportError:
|
|
12
|
+
OpenAI = None # type: ignore
|
|
13
|
+
AsyncOpenAI = None # type: ignore
|
|
26
14
|
|
|
27
15
|
|
|
28
|
-
|
|
29
|
-
openai
|
|
30
|
-
OpenAI = patched_openai.OpenAI
|
|
31
|
-
AsyncOpenAI = patched_openai.AsyncOpenAI
|
|
16
|
+
if OpenAI or AsyncOpenAI:
|
|
17
|
+
from deepeval.openai.patch import patch_openai_classes
|
|
32
18
|
|
|
33
|
-
|
|
34
|
-
"openai",
|
|
35
|
-
"OpenAI",
|
|
36
|
-
"AsyncOpenAI",
|
|
37
|
-
]
|
|
19
|
+
patch_openai_classes()
|
deepeval/openai/extractors.py
CHANGED
|
@@ -1,39 +1,36 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from openai.types.chat import ChatCompletion, ParsedChatCompletion
|
|
2
|
-
from typing import
|
|
3
|
+
from typing import Any, Union, Dict
|
|
3
4
|
from openai.types.responses import Response
|
|
4
|
-
from pydantic import BaseModel
|
|
5
|
-
import json
|
|
6
5
|
|
|
7
6
|
from deepeval.test_case.llm_test_case import ToolCall
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
prompt_tokens: Optional[int] = None
|
|
21
|
-
completion_tokens: Optional[int] = None
|
|
22
|
-
tools_called: Optional[List[ToolCall]] = None
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def extract_input_parameters(
|
|
26
|
-
is_completion: bool, kwargs: Dict
|
|
7
|
+
from deepeval.openai.utils import (
|
|
8
|
+
render_response_input,
|
|
9
|
+
stringify_multimodal_content,
|
|
10
|
+
render_messages,
|
|
11
|
+
)
|
|
12
|
+
from deepeval.openai.types import InputParameters, OutputParameters
|
|
13
|
+
from deepeval.tracing.types import Message
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# guarding against errors to be compatible with legacy APIs
|
|
17
|
+
def safe_extract_input_parameters(
|
|
18
|
+
is_completion: bool, kwargs: Dict[str, Any]
|
|
27
19
|
) -> InputParameters:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
20
|
+
try:
|
|
21
|
+
if is_completion:
|
|
22
|
+
return extract_input_parameters_from_completion(kwargs)
|
|
23
|
+
else:
|
|
24
|
+
return extract_input_parameters_from_response(kwargs)
|
|
25
|
+
except:
|
|
26
|
+
return InputParameters(model="NA")
|
|
32
27
|
|
|
33
28
|
|
|
34
|
-
def extract_input_parameters_from_completion(
|
|
29
|
+
def extract_input_parameters_from_completion(
|
|
30
|
+
kwargs: Dict[str, Any],
|
|
31
|
+
) -> InputParameters:
|
|
35
32
|
model = kwargs.get("model")
|
|
36
|
-
messages = kwargs.get("messages")
|
|
33
|
+
messages = kwargs.get("messages") or []
|
|
37
34
|
tools = kwargs.get("tools")
|
|
38
35
|
tool_descriptions_map = (
|
|
39
36
|
{
|
|
@@ -45,7 +42,7 @@ def extract_input_parameters_from_completion(kwargs: Dict) -> InputParameters:
|
|
|
45
42
|
)
|
|
46
43
|
|
|
47
44
|
# extract first user input from messages
|
|
48
|
-
|
|
45
|
+
input_arg = ""
|
|
49
46
|
user_messages = []
|
|
50
47
|
for message in messages:
|
|
51
48
|
role = message["role"]
|
|
@@ -53,20 +50,25 @@ def extract_input_parameters_from_completion(kwargs: Dict) -> InputParameters:
|
|
|
53
50
|
if role == "user":
|
|
54
51
|
user_messages.append(content)
|
|
55
52
|
if len(user_messages) > 0:
|
|
56
|
-
|
|
53
|
+
input_arg = user_messages[0]
|
|
54
|
+
|
|
55
|
+
# render messages
|
|
56
|
+
messages = render_messages(messages)
|
|
57
57
|
|
|
58
58
|
return InputParameters(
|
|
59
59
|
model=model,
|
|
60
|
-
input=
|
|
60
|
+
input=stringify_multimodal_content(input_arg),
|
|
61
61
|
messages=messages,
|
|
62
62
|
tools=tools,
|
|
63
63
|
tool_descriptions=tool_descriptions_map,
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
|
|
67
|
-
def extract_input_parameters_from_response(
|
|
67
|
+
def extract_input_parameters_from_response(
|
|
68
|
+
kwargs: Dict[str, Any],
|
|
69
|
+
) -> InputParameters:
|
|
68
70
|
model = kwargs.get("model")
|
|
69
|
-
|
|
71
|
+
input_payload = kwargs.get("input")
|
|
70
72
|
instructions = kwargs.get("instructions")
|
|
71
73
|
tools = kwargs.get("tools")
|
|
72
74
|
tool_descriptions = (
|
|
@@ -74,35 +76,59 @@ def extract_input_parameters_from_response(kwargs: Dict) -> InputParameters:
|
|
|
74
76
|
if tools is not None
|
|
75
77
|
else None
|
|
76
78
|
)
|
|
79
|
+
messages = []
|
|
80
|
+
if isinstance(input_payload, list):
|
|
81
|
+
messages = render_response_input(input_payload)
|
|
82
|
+
elif isinstance(input_payload, str):
|
|
83
|
+
messages = [
|
|
84
|
+
{
|
|
85
|
+
"role": "user",
|
|
86
|
+
"content": input_payload,
|
|
87
|
+
}
|
|
88
|
+
]
|
|
89
|
+
if instructions:
|
|
90
|
+
messages.insert(
|
|
91
|
+
0,
|
|
92
|
+
{
|
|
93
|
+
"role": "system",
|
|
94
|
+
"content": instructions,
|
|
95
|
+
},
|
|
96
|
+
)
|
|
77
97
|
return InputParameters(
|
|
78
98
|
model=model,
|
|
79
|
-
input=
|
|
99
|
+
input=stringify_multimodal_content(input_payload),
|
|
100
|
+
messages=messages,
|
|
80
101
|
instructions=instructions,
|
|
81
102
|
tools=tools,
|
|
82
103
|
tool_descriptions=tool_descriptions,
|
|
83
104
|
)
|
|
84
105
|
|
|
85
106
|
|
|
86
|
-
def
|
|
107
|
+
def safe_extract_output_parameters(
|
|
87
108
|
is_completion: bool,
|
|
88
109
|
response: Union[ChatCompletion, ParsedChatCompletion, Response],
|
|
89
110
|
input_parameters: InputParameters,
|
|
90
111
|
) -> OutputParameters:
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
112
|
+
|
|
113
|
+
# guarding against errors to be compatible with legacy APIs
|
|
114
|
+
try:
|
|
115
|
+
if is_completion:
|
|
116
|
+
return extract_output_parameters_from_completion(
|
|
117
|
+
response, input_parameters
|
|
118
|
+
)
|
|
119
|
+
else:
|
|
120
|
+
return extract_output_parameters_from_response(
|
|
121
|
+
response, input_parameters
|
|
122
|
+
)
|
|
123
|
+
except:
|
|
124
|
+
return OutputParameters()
|
|
99
125
|
|
|
100
126
|
|
|
101
127
|
def extract_output_parameters_from_completion(
|
|
102
128
|
completion: Union[ChatCompletion, ParsedChatCompletion],
|
|
103
129
|
input_parameters: InputParameters,
|
|
104
130
|
) -> OutputParameters:
|
|
105
|
-
output = str(completion.choices[0].message.content)
|
|
131
|
+
output = str(completion.choices[0].message.content or "")
|
|
106
132
|
prompt_tokens = completion.usage.prompt_tokens
|
|
107
133
|
completion_tokens = completion.usage.completion_tokens
|
|
108
134
|
|
|
@@ -112,16 +138,21 @@ def extract_output_parameters_from_completion(
|
|
|
112
138
|
if openai_tool_calls is not None:
|
|
113
139
|
tools_called = []
|
|
114
140
|
for tool_call in openai_tool_calls:
|
|
141
|
+
tool_descriptions = input_parameters.tool_descriptions or {}
|
|
115
142
|
tools_called.append(
|
|
116
143
|
ToolCall(
|
|
117
144
|
name=tool_call.function.name,
|
|
118
145
|
input_parameters=json.loads(tool_call.function.arguments),
|
|
119
|
-
description=
|
|
120
|
-
tool_call.function.name
|
|
121
|
-
),
|
|
146
|
+
description=tool_descriptions.get(tool_call.function.name),
|
|
122
147
|
)
|
|
123
148
|
)
|
|
124
149
|
|
|
150
|
+
if not output and tools_called:
|
|
151
|
+
tool_calls = []
|
|
152
|
+
for tool_call in tools_called:
|
|
153
|
+
tool_calls.append(tool_call)
|
|
154
|
+
output = tool_calls
|
|
155
|
+
|
|
125
156
|
return OutputParameters(
|
|
126
157
|
output=output,
|
|
127
158
|
prompt_tokens=prompt_tokens,
|
|
@@ -145,15 +176,19 @@ def extract_output_parameters_from_response(
|
|
|
145
176
|
for tool_call in openai_raw_output:
|
|
146
177
|
if tool_call.type != "function_call":
|
|
147
178
|
continue
|
|
179
|
+
tool_descriptions = input_parameters.tool_descriptions or {}
|
|
148
180
|
tools_called.append(
|
|
149
181
|
ToolCall(
|
|
150
182
|
name=tool_call.name,
|
|
151
183
|
input_parameters=json.loads(tool_call.arguments),
|
|
152
|
-
description=
|
|
153
|
-
tool_call.name
|
|
154
|
-
),
|
|
184
|
+
description=tool_descriptions.get(tool_call.name),
|
|
155
185
|
)
|
|
156
186
|
)
|
|
187
|
+
if not output and tools_called:
|
|
188
|
+
tool_calls = []
|
|
189
|
+
for tool_call in tools_called:
|
|
190
|
+
tool_calls.append(tool_call)
|
|
191
|
+
output = tool_calls
|
|
157
192
|
|
|
158
193
|
return OutputParameters(
|
|
159
194
|
output=output,
|