deepeval 3.6.7__py3-none-any.whl → 3.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/errors.py +20 -2
- deepeval/evaluate/execute.py +725 -217
- deepeval/evaluate/types.py +1 -0
- deepeval/evaluate/utils.py +13 -3
- deepeval/integrations/crewai/__init__.py +2 -1
- deepeval/integrations/crewai/tool.py +71 -0
- deepeval/integrations/llama_index/__init__.py +0 -4
- deepeval/integrations/llama_index/handler.py +20 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
- deepeval/metrics/__init__.py +13 -0
- deepeval/metrics/base_metric.py +1 -0
- deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
- deepeval/metrics/conversational_g_eval/__init__.py +3 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
- deepeval/metrics/dag/schema.py +1 -1
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/goal_accuracy/__init__.py +1 -0
- deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
- deepeval/metrics/goal_accuracy/schema.py +17 -0
- deepeval/metrics/goal_accuracy/template.py +235 -0
- deepeval/metrics/hallucination/hallucination.py +8 -8
- deepeval/metrics/mcp/mcp_task_completion.py +7 -2
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
- deepeval/metrics/plan_adherence/__init__.py +1 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
- deepeval/metrics/plan_adherence/schema.py +11 -0
- deepeval/metrics/plan_adherence/template.py +170 -0
- deepeval/metrics/plan_quality/__init__.py +1 -0
- deepeval/metrics/plan_quality/plan_quality.py +292 -0
- deepeval/metrics/plan_quality/schema.py +11 -0
- deepeval/metrics/plan_quality/template.py +101 -0
- deepeval/metrics/step_efficiency/__init__.py +1 -0
- deepeval/metrics/step_efficiency/schema.py +11 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
- deepeval/metrics/step_efficiency/template.py +256 -0
- deepeval/metrics/task_completion/task_completion.py +1 -0
- deepeval/metrics/tool_correctness/schema.py +6 -0
- deepeval/metrics/tool_correctness/template.py +88 -0
- deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
- deepeval/metrics/tool_use/__init__.py +1 -0
- deepeval/metrics/tool_use/schema.py +19 -0
- deepeval/metrics/tool_use/template.py +220 -0
- deepeval/metrics/tool_use/tool_use.py +458 -0
- deepeval/metrics/topic_adherence/__init__.py +1 -0
- deepeval/metrics/topic_adherence/schema.py +16 -0
- deepeval/metrics/topic_adherence/template.py +162 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
- deepeval/models/embedding_models/azure_embedding_model.py +37 -36
- deepeval/models/embedding_models/local_embedding_model.py +30 -32
- deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
- deepeval/models/embedding_models/openai_embedding_model.py +22 -31
- deepeval/openai/extractors.py +61 -16
- deepeval/openai/patch.py +8 -12
- deepeval/openai/types.py +1 -1
- deepeval/openai/utils.py +108 -1
- deepeval/prompt/prompt.py +1 -0
- deepeval/prompt/utils.py +43 -14
- deepeval/synthesizer/synthesizer.py +11 -10
- deepeval/test_case/llm_test_case.py +6 -2
- deepeval/test_run/test_run.py +190 -207
- deepeval/tracing/__init__.py +2 -1
- deepeval/tracing/otel/exporter.py +3 -4
- deepeval/tracing/otel/utils.py +23 -4
- deepeval/tracing/trace_context.py +53 -38
- deepeval/tracing/tracing.py +23 -0
- deepeval/tracing/types.py +16 -14
- deepeval/utils.py +21 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/RECORD +75 -53
- deepeval/integrations/llama_index/agent/patched.py +0 -68
- deepeval/tracing/message_types/__init__.py +0 -10
- deepeval/tracing/message_types/base.py +0 -6
- deepeval/tracing/message_types/messages.py +0 -14
- deepeval/tracing/message_types/tools.py +0 -18
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from openai import OpenAI, AsyncOpenAI
|
|
2
|
-
from typing import Dict, List
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
@@ -15,25 +15,32 @@ retry_local = create_retry_decorator(PS.LOCAL)
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
18
|
-
def __init__(
|
|
19
|
-
self
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
api_key: Optional[str] = None,
|
|
21
|
+
base_url: Optional[str] = None,
|
|
22
|
+
model: Optional[str] = None,
|
|
23
|
+
generation_kwargs: Optional[Dict] = None,
|
|
24
|
+
**client_kwargs,
|
|
25
|
+
):
|
|
26
|
+
self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
|
|
27
|
+
EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
|
|
28
|
+
)
|
|
29
|
+
self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
|
|
20
30
|
EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
|
|
21
31
|
)
|
|
22
|
-
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
32
|
+
self.model_name = model or KEY_FILE_HANDLER.fetch_data(
|
|
23
33
|
EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
|
|
24
34
|
)
|
|
25
|
-
self.
|
|
26
|
-
|
|
27
|
-
)
|
|
28
|
-
self.kwargs = kwargs
|
|
29
|
-
super().__init__(model_name)
|
|
35
|
+
self.client_kwargs = client_kwargs or {}
|
|
36
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
37
|
+
super().__init__(self.model_name)
|
|
30
38
|
|
|
31
39
|
@retry_local
|
|
32
40
|
def embed_text(self, text: str) -> List[float]:
|
|
33
41
|
embedding_model = self.load_model()
|
|
34
42
|
response = embedding_model.embeddings.create(
|
|
35
|
-
model=self.model_name,
|
|
36
|
-
input=[text],
|
|
43
|
+
model=self.model_name, input=[text], **self.generation_kwargs
|
|
37
44
|
)
|
|
38
45
|
return response.data[0].embedding
|
|
39
46
|
|
|
@@ -41,8 +48,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
41
48
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
42
49
|
embedding_model = self.load_model()
|
|
43
50
|
response = embedding_model.embeddings.create(
|
|
44
|
-
model=self.model_name,
|
|
45
|
-
input=texts,
|
|
51
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
46
52
|
)
|
|
47
53
|
return [data.embedding for data in response.data]
|
|
48
54
|
|
|
@@ -50,8 +56,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
56
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
51
57
|
embedding_model = self.load_model(async_mode=True)
|
|
52
58
|
response = await embedding_model.embeddings.create(
|
|
53
|
-
model=self.model_name,
|
|
54
|
-
input=[text],
|
|
59
|
+
model=self.model_name, input=[text], **self.generation_kwargs
|
|
55
60
|
)
|
|
56
61
|
return response.data[0].embedding
|
|
57
62
|
|
|
@@ -59,8 +64,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
59
64
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
60
65
|
embedding_model = self.load_model(async_mode=True)
|
|
61
66
|
response = await embedding_model.embeddings.create(
|
|
62
|
-
model=self.model_name,
|
|
63
|
-
input=texts,
|
|
67
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
64
68
|
)
|
|
65
69
|
return [data.embedding for data in response.data]
|
|
66
70
|
|
|
@@ -76,27 +80,21 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
76
80
|
return self._build_client(OpenAI)
|
|
77
81
|
return self._build_client(AsyncOpenAI)
|
|
78
82
|
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
-
If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
|
|
82
|
-
If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
|
|
83
|
-
"""
|
|
84
|
-
kwargs = dict(self.kwargs or {})
|
|
83
|
+
def _build_client(self, cls):
|
|
84
|
+
client_kwargs = self.client_kwargs.copy()
|
|
85
85
|
if not sdk_retries_for(PS.LOCAL):
|
|
86
|
-
|
|
87
|
-
return kwargs
|
|
86
|
+
client_kwargs["max_retries"] = 0
|
|
88
87
|
|
|
89
|
-
|
|
90
|
-
kw = dict(
|
|
88
|
+
client_init_kwargs = dict(
|
|
91
89
|
api_key=self.api_key,
|
|
92
90
|
base_url=self.base_url,
|
|
93
|
-
**
|
|
91
|
+
**client_kwargs,
|
|
94
92
|
)
|
|
95
93
|
try:
|
|
96
|
-
return cls(**
|
|
94
|
+
return cls(**client_init_kwargs)
|
|
97
95
|
except TypeError as e:
|
|
98
|
-
#
|
|
96
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
99
97
|
if "max_retries" in str(e):
|
|
100
|
-
|
|
101
|
-
return cls(**
|
|
98
|
+
client_init_kwargs.pop("max_retries", None)
|
|
99
|
+
return cls(**client_init_kwargs)
|
|
102
100
|
raise
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ollama import Client, AsyncClient
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, Optional, Dict
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
@@ -13,27 +13,28 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
16
|
-
def __init__(
|
|
17
|
-
self
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
model: Optional[str] = None,
|
|
19
|
+
host: Optional[str] = None,
|
|
20
|
+
generation_kwargs: Optional[Dict] = None,
|
|
21
|
+
**client_kwargs,
|
|
22
|
+
):
|
|
23
|
+
self.host = host or KEY_FILE_HANDLER.fetch_data(
|
|
18
24
|
EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
|
|
19
25
|
)
|
|
20
|
-
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
26
|
+
self.model_name = model or KEY_FILE_HANDLER.fetch_data(
|
|
21
27
|
EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
|
|
22
28
|
)
|
|
23
|
-
|
|
24
|
-
self.
|
|
25
|
-
|
|
26
|
-
)
|
|
27
|
-
self.args = args
|
|
28
|
-
self.kwargs = kwargs
|
|
29
|
-
super().__init__(model_name)
|
|
29
|
+
self.client_kwargs = client_kwargs or {}
|
|
30
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
31
|
+
super().__init__(self.model_name)
|
|
30
32
|
|
|
31
33
|
@retry_ollama
|
|
32
34
|
def embed_text(self, text: str) -> List[float]:
|
|
33
35
|
embedding_model = self.load_model()
|
|
34
36
|
response = embedding_model.embed(
|
|
35
|
-
model=self.model_name,
|
|
36
|
-
input=text,
|
|
37
|
+
model=self.model_name, input=text, **self.generation_kwargs
|
|
37
38
|
)
|
|
38
39
|
return response["embeddings"][0]
|
|
39
40
|
|
|
@@ -41,8 +42,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
41
42
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
42
43
|
embedding_model = self.load_model()
|
|
43
44
|
response = embedding_model.embed(
|
|
44
|
-
model=self.model_name,
|
|
45
|
-
input=texts,
|
|
45
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
46
46
|
)
|
|
47
47
|
return response["embeddings"]
|
|
48
48
|
|
|
@@ -50,8 +50,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
50
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
51
51
|
embedding_model = self.load_model(async_mode=True)
|
|
52
52
|
response = await embedding_model.embed(
|
|
53
|
-
model=self.model_name,
|
|
54
|
-
input=text,
|
|
53
|
+
model=self.model_name, input=text, **self.generation_kwargs
|
|
55
54
|
)
|
|
56
55
|
return response["embeddings"][0]
|
|
57
56
|
|
|
@@ -59,8 +58,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
59
58
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
60
59
|
embedding_model = self.load_model(async_mode=True)
|
|
61
60
|
response = await embedding_model.embed(
|
|
62
|
-
model=self.model_name,
|
|
63
|
-
input=texts,
|
|
61
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
64
62
|
)
|
|
65
63
|
return response["embeddings"]
|
|
66
64
|
|
|
@@ -74,7 +72,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
74
72
|
return self._build_client(AsyncClient)
|
|
75
73
|
|
|
76
74
|
def _build_client(self, cls):
|
|
77
|
-
return cls(host=self.
|
|
75
|
+
return cls(host=self.host, **self.client_kwargs)
|
|
78
76
|
|
|
79
77
|
def get_model_name(self):
|
|
80
78
|
return f"{self.model_name} (Ollama)"
|
|
@@ -19,27 +19,28 @@ default_openai_embedding_model = "text-embedding-3-small"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
22
|
+
|
|
22
23
|
def __init__(
|
|
23
24
|
self,
|
|
24
25
|
model: Optional[str] = None,
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
openai_api_key: Optional[str] = None,
|
|
27
|
+
generation_kwargs: Optional[Dict] = None,
|
|
28
|
+
**client_kwargs,
|
|
27
29
|
):
|
|
28
|
-
|
|
29
|
-
if
|
|
30
|
+
self.openai_api_key = openai_api_key
|
|
31
|
+
self.model_name = model if model else default_openai_embedding_model
|
|
32
|
+
if self.model_name not in valid_openai_embedding_models:
|
|
30
33
|
raise ValueError(
|
|
31
34
|
f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
|
|
32
35
|
)
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
-
self.kwargs = kwargs
|
|
36
|
+
self.client_kwargs = client_kwargs or {}
|
|
37
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
36
38
|
|
|
37
39
|
@retry_openai
|
|
38
40
|
def embed_text(self, text: str) -> List[float]:
|
|
39
41
|
client = self.load_model(async_mode=False)
|
|
40
42
|
response = client.embeddings.create(
|
|
41
|
-
input=text,
|
|
42
|
-
model=self.model_name,
|
|
43
|
+
input=text, model=self.model_name, **self.generation_kwargs
|
|
43
44
|
)
|
|
44
45
|
return response.data[0].embedding
|
|
45
46
|
|
|
@@ -47,8 +48,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
47
48
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
48
49
|
client = self.load_model(async_mode=False)
|
|
49
50
|
response = client.embeddings.create(
|
|
50
|
-
input=texts,
|
|
51
|
-
model=self.model_name,
|
|
51
|
+
input=texts, model=self.model_name, **self.generation_kwargs
|
|
52
52
|
)
|
|
53
53
|
return [item.embedding for item in response.data]
|
|
54
54
|
|
|
@@ -56,8 +56,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
56
56
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
57
57
|
client = self.load_model(async_mode=True)
|
|
58
58
|
response = await client.embeddings.create(
|
|
59
|
-
input=text,
|
|
60
|
-
model=self.model_name,
|
|
59
|
+
input=text, model=self.model_name, **self.generation_kwargs
|
|
61
60
|
)
|
|
62
61
|
return response.data[0].embedding
|
|
63
62
|
|
|
@@ -65,8 +64,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
65
64
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
66
65
|
client = self.load_model(async_mode=True)
|
|
67
66
|
response = await client.embeddings.create(
|
|
68
|
-
input=texts,
|
|
69
|
-
model=self.model_name,
|
|
67
|
+
input=texts, model=self.model_name, **self.generation_kwargs
|
|
70
68
|
)
|
|
71
69
|
return [item.embedding for item in response.data]
|
|
72
70
|
|
|
@@ -82,27 +80,20 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
82
80
|
return self._build_client(OpenAI)
|
|
83
81
|
return self._build_client(AsyncOpenAI)
|
|
84
82
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
88
|
-
If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
89
|
-
leave their retry settings as is.
|
|
90
|
-
"""
|
|
91
|
-
kwargs = dict(self.kwargs or {})
|
|
83
|
+
def _build_client(self, cls):
|
|
84
|
+
client_kwargs = self.client_kwargs.copy()
|
|
92
85
|
if not sdk_retries_for(PS.OPENAI):
|
|
93
|
-
|
|
94
|
-
return kwargs
|
|
86
|
+
client_kwargs["max_retries"] = 0
|
|
95
87
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
**self._client_kwargs(),
|
|
88
|
+
client_init_kwargs = dict(
|
|
89
|
+
api_key=self.openai_api_key,
|
|
90
|
+
**client_kwargs,
|
|
100
91
|
)
|
|
101
92
|
try:
|
|
102
|
-
return cls(**
|
|
93
|
+
return cls(**client_init_kwargs)
|
|
103
94
|
except TypeError as e:
|
|
104
95
|
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
105
96
|
if "max_retries" in str(e):
|
|
106
|
-
|
|
107
|
-
return cls(**
|
|
97
|
+
client_init_kwargs.pop("max_retries", None)
|
|
98
|
+
return cls(**client_init_kwargs)
|
|
108
99
|
raise
|
deepeval/openai/extractors.py
CHANGED
|
@@ -4,17 +4,26 @@ from typing import Any, Union, Dict
|
|
|
4
4
|
from openai.types.responses import Response
|
|
5
5
|
|
|
6
6
|
from deepeval.test_case.llm_test_case import ToolCall
|
|
7
|
-
from deepeval.openai.utils import
|
|
7
|
+
from deepeval.openai.utils import (
|
|
8
|
+
render_response_input,
|
|
9
|
+
stringify_multimodal_content,
|
|
10
|
+
render_messages,
|
|
11
|
+
)
|
|
8
12
|
from deepeval.openai.types import InputParameters, OutputParameters
|
|
13
|
+
from deepeval.tracing.types import Message
|
|
9
14
|
|
|
10
15
|
|
|
11
|
-
|
|
16
|
+
# guarding against errors to be compatible with legacy APIs
|
|
17
|
+
def safe_extract_input_parameters(
|
|
12
18
|
is_completion: bool, kwargs: Dict[str, Any]
|
|
13
19
|
) -> InputParameters:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
20
|
+
try:
|
|
21
|
+
if is_completion:
|
|
22
|
+
return extract_input_parameters_from_completion(kwargs)
|
|
23
|
+
else:
|
|
24
|
+
return extract_input_parameters_from_response(kwargs)
|
|
25
|
+
except:
|
|
26
|
+
return InputParameters(model="NA")
|
|
18
27
|
|
|
19
28
|
|
|
20
29
|
def extract_input_parameters_from_completion(
|
|
@@ -43,6 +52,9 @@ def extract_input_parameters_from_completion(
|
|
|
43
52
|
if len(user_messages) > 0:
|
|
44
53
|
input_arg = user_messages[0]
|
|
45
54
|
|
|
55
|
+
# render messages
|
|
56
|
+
messages = render_messages(messages)
|
|
57
|
+
|
|
46
58
|
return InputParameters(
|
|
47
59
|
model=model,
|
|
48
60
|
input=stringify_multimodal_content(input_arg),
|
|
@@ -64,7 +76,24 @@ def extract_input_parameters_from_response(
|
|
|
64
76
|
if tools is not None
|
|
65
77
|
else None
|
|
66
78
|
)
|
|
67
|
-
messages =
|
|
79
|
+
messages = []
|
|
80
|
+
if isinstance(input_payload, list):
|
|
81
|
+
messages = render_response_input(input_payload)
|
|
82
|
+
elif isinstance(input_payload, str):
|
|
83
|
+
messages = [
|
|
84
|
+
{
|
|
85
|
+
"role": "user",
|
|
86
|
+
"content": input_payload,
|
|
87
|
+
}
|
|
88
|
+
]
|
|
89
|
+
if instructions:
|
|
90
|
+
messages.insert(
|
|
91
|
+
0,
|
|
92
|
+
{
|
|
93
|
+
"role": "system",
|
|
94
|
+
"content": instructions,
|
|
95
|
+
},
|
|
96
|
+
)
|
|
68
97
|
return InputParameters(
|
|
69
98
|
model=model,
|
|
70
99
|
input=stringify_multimodal_content(input_payload),
|
|
@@ -75,19 +104,24 @@ def extract_input_parameters_from_response(
|
|
|
75
104
|
)
|
|
76
105
|
|
|
77
106
|
|
|
78
|
-
def
|
|
107
|
+
def safe_extract_output_parameters(
|
|
79
108
|
is_completion: bool,
|
|
80
109
|
response: Union[ChatCompletion, ParsedChatCompletion, Response],
|
|
81
110
|
input_parameters: InputParameters,
|
|
82
111
|
) -> OutputParameters:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
112
|
+
|
|
113
|
+
# guarding against errors to be compatible with legacy APIs
|
|
114
|
+
try:
|
|
115
|
+
if is_completion:
|
|
116
|
+
return extract_output_parameters_from_completion(
|
|
117
|
+
response, input_parameters
|
|
118
|
+
)
|
|
119
|
+
else:
|
|
120
|
+
return extract_output_parameters_from_response(
|
|
121
|
+
response, input_parameters
|
|
122
|
+
)
|
|
123
|
+
except:
|
|
124
|
+
return OutputParameters()
|
|
91
125
|
|
|
92
126
|
|
|
93
127
|
def extract_output_parameters_from_completion(
|
|
@@ -113,6 +147,12 @@ def extract_output_parameters_from_completion(
|
|
|
113
147
|
)
|
|
114
148
|
)
|
|
115
149
|
|
|
150
|
+
if not output and tools_called:
|
|
151
|
+
tool_calls = []
|
|
152
|
+
for tool_call in tools_called:
|
|
153
|
+
tool_calls.append(tool_call)
|
|
154
|
+
output = tool_calls
|
|
155
|
+
|
|
116
156
|
return OutputParameters(
|
|
117
157
|
output=output,
|
|
118
158
|
prompt_tokens=prompt_tokens,
|
|
@@ -144,6 +184,11 @@ def extract_output_parameters_from_response(
|
|
|
144
184
|
description=tool_descriptions.get(tool_call.name),
|
|
145
185
|
)
|
|
146
186
|
)
|
|
187
|
+
if not output and tools_called:
|
|
188
|
+
tool_calls = []
|
|
189
|
+
for tool_call in tools_called:
|
|
190
|
+
tool_calls.append(tool_call)
|
|
191
|
+
output = tool_calls
|
|
147
192
|
|
|
148
193
|
return OutputParameters(
|
|
149
194
|
output=output,
|
deepeval/openai/patch.py
CHANGED
|
@@ -3,8 +3,8 @@ from functools import wraps
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
from deepeval.openai.extractors import (
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
safe_extract_output_parameters,
|
|
7
|
+
safe_extract_input_parameters,
|
|
8
8
|
InputParameters,
|
|
9
9
|
OutputParameters,
|
|
10
10
|
)
|
|
@@ -16,7 +16,6 @@ from deepeval.tracing.context import (
|
|
|
16
16
|
)
|
|
17
17
|
from deepeval.tracing import observe
|
|
18
18
|
from deepeval.tracing.trace_context import current_llm_context
|
|
19
|
-
from deepeval.openai.utils import create_child_tool_spans
|
|
20
19
|
|
|
21
20
|
# Store original methods for safety and potential unpatching
|
|
22
21
|
_ORIGINAL_METHODS = {}
|
|
@@ -123,7 +122,7 @@ def _patch_async_openai_client_method(
|
|
|
123
122
|
):
|
|
124
123
|
@wraps(orig_method)
|
|
125
124
|
async def patched_async_openai_method(*args, **kwargs):
|
|
126
|
-
input_parameters: InputParameters =
|
|
125
|
+
input_parameters: InputParameters = safe_extract_input_parameters(
|
|
127
126
|
is_completion_method, kwargs
|
|
128
127
|
)
|
|
129
128
|
|
|
@@ -137,7 +136,7 @@ def _patch_async_openai_client_method(
|
|
|
137
136
|
)
|
|
138
137
|
async def llm_generation(*args, **kwargs):
|
|
139
138
|
response = await orig_method(*args, **kwargs)
|
|
140
|
-
output_parameters =
|
|
139
|
+
output_parameters = safe_extract_output_parameters(
|
|
141
140
|
is_completion_method, response, input_parameters
|
|
142
141
|
)
|
|
143
142
|
_update_all_attributes(
|
|
@@ -162,7 +161,7 @@ def _patch_sync_openai_client_method(
|
|
|
162
161
|
):
|
|
163
162
|
@wraps(orig_method)
|
|
164
163
|
def patched_sync_openai_method(*args, **kwargs):
|
|
165
|
-
input_parameters: InputParameters =
|
|
164
|
+
input_parameters: InputParameters = safe_extract_input_parameters(
|
|
166
165
|
is_completion_method, kwargs
|
|
167
166
|
)
|
|
168
167
|
|
|
@@ -176,7 +175,7 @@ def _patch_sync_openai_client_method(
|
|
|
176
175
|
)
|
|
177
176
|
def llm_generation(*args, **kwargs):
|
|
178
177
|
response = orig_method(*args, **kwargs)
|
|
179
|
-
output_parameters =
|
|
178
|
+
output_parameters = safe_extract_output_parameters(
|
|
180
179
|
is_completion_method, response, input_parameters
|
|
181
180
|
)
|
|
182
181
|
_update_all_attributes(
|
|
@@ -205,8 +204,8 @@ def _update_all_attributes(
|
|
|
205
204
|
):
|
|
206
205
|
"""Update span and trace attributes with input/output parameters."""
|
|
207
206
|
update_current_span(
|
|
208
|
-
input=input_parameters.
|
|
209
|
-
output=output_parameters.output or
|
|
207
|
+
input=input_parameters.messages,
|
|
208
|
+
output=output_parameters.output or output_parameters.tools_called,
|
|
210
209
|
tools_called=output_parameters.tools_called,
|
|
211
210
|
# attributes to be added
|
|
212
211
|
expected_output=expected_output,
|
|
@@ -223,9 +222,6 @@ def _update_all_attributes(
|
|
|
223
222
|
prompt=llm_context.prompt,
|
|
224
223
|
)
|
|
225
224
|
|
|
226
|
-
if output_parameters.tools_called:
|
|
227
|
-
create_child_tool_spans(output_parameters)
|
|
228
|
-
|
|
229
225
|
__update_input_and_output_of_current_trace(
|
|
230
226
|
input_parameters, output_parameters
|
|
231
227
|
)
|
deepeval/openai/types.py
CHANGED
|
@@ -14,7 +14,7 @@ class InputParameters(BaseModel):
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class OutputParameters(BaseModel):
|
|
17
|
-
output: Optional[
|
|
17
|
+
output: Optional[Any] = None
|
|
18
18
|
prompt_tokens: Optional[int] = None
|
|
19
19
|
completion_tokens: Optional[int] = None
|
|
20
20
|
tools_called: Optional[List[ToolCall]] = None
|
deepeval/openai/utils.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import uuid
|
|
3
|
-
from typing import Any, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Iterable
|
|
4
|
+
|
|
5
|
+
from openai.types.chat.chat_completion_message_param import (
|
|
6
|
+
ChatCompletionMessageParam,
|
|
7
|
+
)
|
|
4
8
|
|
|
5
9
|
from deepeval.tracing.types import ToolSpan, TraceSpanStatus
|
|
6
10
|
from deepeval.tracing.context import current_span_context
|
|
@@ -126,3 +130,106 @@ def stringify_multimodal_content(content: Any) -> str:
|
|
|
126
130
|
|
|
127
131
|
# unknown dicts and types returned as shortened JSON
|
|
128
132
|
return _compact_dump(content)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def render_messages(
|
|
136
|
+
messages: Iterable[ChatCompletionMessageParam],
|
|
137
|
+
) -> List[Dict[str, Any]]:
|
|
138
|
+
|
|
139
|
+
messages_list = []
|
|
140
|
+
|
|
141
|
+
for message in messages:
|
|
142
|
+
role = message.get("role")
|
|
143
|
+
content = message.get("content")
|
|
144
|
+
if role == "assistant" and message.get("tool_calls"):
|
|
145
|
+
tool_calls = message.get("tool_calls")
|
|
146
|
+
if isinstance(tool_calls, list):
|
|
147
|
+
for tool_call in tool_calls:
|
|
148
|
+
# Extract type - either "function" or "custom"
|
|
149
|
+
tool_type = tool_call.get("type", "function")
|
|
150
|
+
|
|
151
|
+
# Extract name and arguments based on type
|
|
152
|
+
if tool_type == "function":
|
|
153
|
+
function_data = tool_call.get("function", {})
|
|
154
|
+
name = function_data.get("name", "")
|
|
155
|
+
arguments = function_data.get("arguments", "")
|
|
156
|
+
elif tool_type == "custom":
|
|
157
|
+
custom_data = tool_call.get("custom", {})
|
|
158
|
+
name = custom_data.get("name", "")
|
|
159
|
+
arguments = custom_data.get("input", "")
|
|
160
|
+
else:
|
|
161
|
+
name = ""
|
|
162
|
+
arguments = ""
|
|
163
|
+
|
|
164
|
+
messages_list.append(
|
|
165
|
+
{
|
|
166
|
+
"id": tool_call.get("id", ""),
|
|
167
|
+
"call_id": tool_call.get(
|
|
168
|
+
"id", ""
|
|
169
|
+
), # OpenAI uses 'id', not 'call_id'
|
|
170
|
+
"name": name,
|
|
171
|
+
"type": tool_type,
|
|
172
|
+
"arguments": json.loads(arguments),
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
elif role == "tool":
|
|
177
|
+
messages_list.append(
|
|
178
|
+
{
|
|
179
|
+
"call_id": message.get("tool_call_id", ""),
|
|
180
|
+
"type": role, # "tool"
|
|
181
|
+
"output": message.get("content", {}),
|
|
182
|
+
}
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
messages_list.append(
|
|
186
|
+
{
|
|
187
|
+
"role": role,
|
|
188
|
+
"content": content,
|
|
189
|
+
}
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return messages_list
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def render_response_input(input: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
196
|
+
|
|
197
|
+
messages_list = []
|
|
198
|
+
|
|
199
|
+
for item in input:
|
|
200
|
+
type = item.get("type")
|
|
201
|
+
role = item.get("role")
|
|
202
|
+
|
|
203
|
+
if type == "message":
|
|
204
|
+
messages_list.append(
|
|
205
|
+
{
|
|
206
|
+
"role": role,
|
|
207
|
+
"content": item.get("content"),
|
|
208
|
+
}
|
|
209
|
+
)
|
|
210
|
+
else:
|
|
211
|
+
messages_list.append(item)
|
|
212
|
+
|
|
213
|
+
return messages_list
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _render_content(content: Dict[str, Any], indent: int = 0) -> str:
|
|
217
|
+
"""
|
|
218
|
+
Renders a dictionary as a formatted string with indentation for nested structures.
|
|
219
|
+
"""
|
|
220
|
+
if not content:
|
|
221
|
+
return ""
|
|
222
|
+
|
|
223
|
+
lines = []
|
|
224
|
+
prefix = " " * indent
|
|
225
|
+
|
|
226
|
+
for key, value in content.items():
|
|
227
|
+
if isinstance(value, dict):
|
|
228
|
+
lines.append(f"{prefix}{key}:")
|
|
229
|
+
lines.append(_render_content(value, indent + 1))
|
|
230
|
+
elif isinstance(value, list):
|
|
231
|
+
lines.append(f"{prefix}{key}: {_compact_dump(value)}")
|
|
232
|
+
else:
|
|
233
|
+
lines.append(f"{prefix}{key}: {value}")
|
|
234
|
+
|
|
235
|
+
return "\n".join(lines)
|
deepeval/prompt/prompt.py
CHANGED
|
@@ -202,6 +202,7 @@ class Prompt:
|
|
|
202
202
|
"Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
|
|
203
203
|
)
|
|
204
204
|
|
|
205
|
+
print("@@@@@")
|
|
205
206
|
return interpolate_text(interpolation_type, text_template, **kwargs)
|
|
206
207
|
|
|
207
208
|
elif prompt_type == PromptType.LIST:
|