deepeval 3.6.0__py3-none-any.whl → 3.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/evaluate/execute.py +11 -2
- deepeval/metrics/hallucination/hallucination.py +1 -1
- deepeval/metrics/tool_correctness/tool_correctness.py +5 -10
- deepeval/openai_agents/callback_handler.py +10 -3
- deepeval/test_case/llm_test_case.py +3 -0
- deepeval/tracing/otel/exporter.py +6 -0
- deepeval/tracing/otel/utils.py +79 -18
- deepeval/tracing/tracing.py +7 -1
- deepeval/tracing/utils.py +57 -6
- {deepeval-3.6.0.dist-info → deepeval-3.6.2.dist-info}/METADATA +2 -2
- {deepeval-3.6.0.dist-info → deepeval-3.6.2.dist-info}/RECORD +15 -15
- {deepeval-3.6.0.dist-info → deepeval-3.6.2.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.0.dist-info → deepeval-3.6.2.dist-info}/WHEEL +0 -0
- {deepeval-3.6.0.dist-info → deepeval-3.6.2.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.6.
|
|
1
|
+
__version__: str = "3.6.2"
|
deepeval/evaluate/execute.py
CHANGED
|
@@ -836,7 +836,13 @@ def execute_agentic_test_cases(
|
|
|
836
836
|
):
|
|
837
837
|
if asyncio.iscoroutinefunction(observed_callback):
|
|
838
838
|
loop = get_or_create_event_loop()
|
|
839
|
-
|
|
839
|
+
coro = observed_callback(golden.input)
|
|
840
|
+
loop.run_until_complete(
|
|
841
|
+
asyncio.wait_for(
|
|
842
|
+
coro,
|
|
843
|
+
timeout=settings.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS,
|
|
844
|
+
)
|
|
845
|
+
)
|
|
840
846
|
else:
|
|
841
847
|
observed_callback(golden.input)
|
|
842
848
|
current_trace: Trace = current_trace_context.get()
|
|
@@ -1190,7 +1196,10 @@ async def _a_execute_agentic_test_case(
|
|
|
1190
1196
|
_pbar_callback_id=pbar_tags_id,
|
|
1191
1197
|
):
|
|
1192
1198
|
if asyncio.iscoroutinefunction(observed_callback):
|
|
1193
|
-
await
|
|
1199
|
+
await asyncio.wait_for(
|
|
1200
|
+
observed_callback(golden.input),
|
|
1201
|
+
timeout=settings.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS,
|
|
1202
|
+
)
|
|
1194
1203
|
else:
|
|
1195
1204
|
observed_callback(golden.input)
|
|
1196
1205
|
current_trace: Trace = current_trace_context.get()
|
|
@@ -30,7 +30,7 @@ class HallucinationMetric(BaseMetric):
|
|
|
30
30
|
threshold: float = 0.5,
|
|
31
31
|
model: Optional[Union[str, DeepEvalBaseLLM]] = None,
|
|
32
32
|
include_reason: bool = True,
|
|
33
|
-
async_mode: bool =
|
|
33
|
+
async_mode: bool = True,
|
|
34
34
|
strict_mode: bool = False,
|
|
35
35
|
verbose_mode: bool = False,
|
|
36
36
|
evaluation_template: Type[
|
|
@@ -152,19 +152,14 @@ class ToolCorrectnessMetric(BaseMetric):
|
|
|
152
152
|
|
|
153
153
|
# Calculate score
|
|
154
154
|
def _calculate_score(self):
|
|
155
|
-
|
|
155
|
+
# Fix: handle empty expected_tools to avoid ZeroDivisionError
|
|
156
|
+
if len(self.expected_tools) == 0:
|
|
157
|
+
score = 1.0 if len(self.tools_called) == 0 else 0.0
|
|
158
|
+
elif self.should_exact_match:
|
|
156
159
|
score = self._calculate_exact_match_score()
|
|
157
160
|
elif self.should_consider_ordering:
|
|
158
161
|
_, weighted_length = self._compute_weighted_lcs()
|
|
159
|
-
|
|
160
|
-
len(self.tools_called) == len(self.expected_tools)
|
|
161
|
-
and len(self.expected_tools) == 0
|
|
162
|
-
):
|
|
163
|
-
score = 1.0
|
|
164
|
-
elif len(self.expected_tools) == 0:
|
|
165
|
-
score = 0.0
|
|
166
|
-
else:
|
|
167
|
-
score = weighted_length / len(self.expected_tools)
|
|
162
|
+
score = weighted_length / len(self.expected_tools)
|
|
168
163
|
else:
|
|
169
164
|
score = self._calculate_non_exact_match_score()
|
|
170
165
|
return 0 if self.strict_mode and score < self.threshold else score
|
|
@@ -83,7 +83,9 @@ class DeepEvalTracingProcessor(TracingProcessor):
|
|
|
83
83
|
if not span.started_at:
|
|
84
84
|
return
|
|
85
85
|
current_span = current_span_context.get()
|
|
86
|
-
if current_span and isinstance(
|
|
86
|
+
if current_span and isinstance(
|
|
87
|
+
current_span, LlmSpan
|
|
88
|
+
): # llm span started by
|
|
87
89
|
return
|
|
88
90
|
|
|
89
91
|
span_type = self.get_span_kind(span.span_data)
|
|
@@ -101,10 +103,15 @@ class DeepEvalTracingProcessor(TracingProcessor):
|
|
|
101
103
|
current_trace_context.get(), span.span_data
|
|
102
104
|
)
|
|
103
105
|
|
|
106
|
+
span_type = self.get_span_kind(span.span_data)
|
|
104
107
|
current_span = current_span_context.get()
|
|
105
|
-
if
|
|
108
|
+
if (
|
|
109
|
+
current_span
|
|
110
|
+
and isinstance(current_span, LlmSpan)
|
|
111
|
+
and span_type == "llm"
|
|
112
|
+
): # addtional check if the span kind data is llm too
|
|
106
113
|
update_span_properties(current_span, span.span_data)
|
|
107
|
-
|
|
114
|
+
|
|
108
115
|
observer = self.span_observers.pop(span.span_id, None)
|
|
109
116
|
if observer:
|
|
110
117
|
observer.__exit__(None, None, None)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from pydantic import (
|
|
2
|
+
ConfigDict,
|
|
2
3
|
Field,
|
|
3
4
|
BaseModel,
|
|
4
5
|
model_validator,
|
|
@@ -151,6 +152,8 @@ class ToolCall(BaseModel):
|
|
|
151
152
|
|
|
152
153
|
|
|
153
154
|
class LLMTestCase(BaseModel):
|
|
155
|
+
model_config = ConfigDict(extra="ignore")
|
|
156
|
+
|
|
154
157
|
input: str
|
|
155
158
|
actual_output: Optional[str] = Field(
|
|
156
159
|
default=None,
|
|
@@ -42,6 +42,7 @@ from deepeval.tracing.types import TraceAttributes
|
|
|
42
42
|
from deepeval.test_case import ToolCall
|
|
43
43
|
from dataclasses import dataclass
|
|
44
44
|
import deepeval
|
|
45
|
+
from deepeval.tracing.utils import make_json_serializable_for_metadata
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
@dataclass
|
|
@@ -360,6 +361,8 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
360
361
|
raw_trace_expected_tools
|
|
361
362
|
)
|
|
362
363
|
trace_metadata = self._parse_json_string(raw_trace_metadata)
|
|
364
|
+
if trace_metadata:
|
|
365
|
+
trace_metadata = make_json_serializable_for_metadata(trace_metadata)
|
|
363
366
|
trace_metric_collection = parse_string(raw_trace_metric_collection)
|
|
364
367
|
|
|
365
368
|
base_span_wrapper.trace_input = trace_input
|
|
@@ -426,6 +429,9 @@ class ConfidentSpanExporter(SpanExporter):
|
|
|
426
429
|
span_tools_called = self._parse_list_of_tools(raw_span_tools_called)
|
|
427
430
|
span_expected_tools = self._parse_list_of_tools(raw_span_expected_tools)
|
|
428
431
|
span_metadata = self._parse_json_string(raw_span_metadata)
|
|
432
|
+
if span_metadata:
|
|
433
|
+
span_metadata = make_json_serializable_for_metadata(span_metadata)
|
|
434
|
+
|
|
429
435
|
span_metric_collection = parse_string(raw_span_metric_collection)
|
|
430
436
|
|
|
431
437
|
# Set Span Attributes
|
deepeval/tracing/otel/utils.py
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
1
3
|
from typing import List, Optional, Tuple, Any
|
|
4
|
+
from opentelemetry.sdk.trace.export import ReadableSpan
|
|
5
|
+
|
|
6
|
+
from deepeval.evaluate.utils import create_api_test_case
|
|
7
|
+
from deepeval.test_run.api import LLMApiTestCase
|
|
8
|
+
from deepeval.test_run.test_run import global_test_run_manager
|
|
2
9
|
from deepeval.tracing.types import Trace, LLMTestCase, ToolCall
|
|
3
10
|
from deepeval.tracing import trace_manager, BaseSpan
|
|
4
|
-
from
|
|
5
|
-
|
|
11
|
+
from deepeval.tracing.utils import make_json_serializable
|
|
12
|
+
|
|
6
13
|
|
|
7
14
|
GEN_AI_OPERATION_NAMES = ["chat", "generate_content", "task_completion"]
|
|
8
15
|
|
|
@@ -103,11 +110,14 @@ def check_llm_input_from_gen_ai_attributes(
|
|
|
103
110
|
output = None
|
|
104
111
|
try:
|
|
105
112
|
input = json.loads(span.attributes.get("gen_ai.input.messages"))
|
|
106
|
-
|
|
113
|
+
input = _flatten_input(input)
|
|
114
|
+
|
|
115
|
+
except Exception:
|
|
107
116
|
pass
|
|
108
117
|
try:
|
|
109
118
|
output = json.loads(span.attributes.get("gen_ai.output.messages"))
|
|
110
|
-
|
|
119
|
+
output = _flatten_input(output)
|
|
120
|
+
except Exception:
|
|
111
121
|
pass
|
|
112
122
|
|
|
113
123
|
if input is None and output is None:
|
|
@@ -121,18 +131,73 @@ def check_llm_input_from_gen_ai_attributes(
|
|
|
121
131
|
and last_event.get("event.name") == "gen_ai.choice"
|
|
122
132
|
):
|
|
123
133
|
output = last_event
|
|
124
|
-
except Exception
|
|
134
|
+
except Exception:
|
|
125
135
|
pass
|
|
126
136
|
|
|
127
137
|
return input, output
|
|
128
138
|
|
|
129
139
|
|
|
140
|
+
def _flatten_input(input: list) -> list:
|
|
141
|
+
if input and isinstance(input, list):
|
|
142
|
+
try:
|
|
143
|
+
result: List[dict] = []
|
|
144
|
+
for m in input:
|
|
145
|
+
if isinstance(m, dict):
|
|
146
|
+
role = m.get("role")
|
|
147
|
+
if not role:
|
|
148
|
+
role = "assistant"
|
|
149
|
+
parts = m.get("parts")
|
|
150
|
+
if parts:
|
|
151
|
+
for part in parts:
|
|
152
|
+
if isinstance(part, dict):
|
|
153
|
+
ptype = part.get("type")
|
|
154
|
+
if ptype == "text":
|
|
155
|
+
result.append(
|
|
156
|
+
{
|
|
157
|
+
"role": role,
|
|
158
|
+
"content": part.get("content"),
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
result.append(
|
|
163
|
+
{
|
|
164
|
+
"role": role,
|
|
165
|
+
"content": make_json_serializable(
|
|
166
|
+
part
|
|
167
|
+
),
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
result.append(
|
|
172
|
+
{
|
|
173
|
+
"role": role,
|
|
174
|
+
"content": make_json_serializable(part),
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
result.append(
|
|
179
|
+
{"role": role, "content": m.get("content")}
|
|
180
|
+
) # no parts
|
|
181
|
+
else:
|
|
182
|
+
result.append(
|
|
183
|
+
{
|
|
184
|
+
"role": "assistant",
|
|
185
|
+
"content": make_json_serializable(m),
|
|
186
|
+
}
|
|
187
|
+
)
|
|
188
|
+
return result
|
|
189
|
+
except Exception:
|
|
190
|
+
return input
|
|
191
|
+
|
|
192
|
+
return input
|
|
193
|
+
|
|
194
|
+
|
|
130
195
|
def check_tool_name_from_gen_ai_attributes(span: ReadableSpan) -> Optional[str]:
|
|
131
196
|
try:
|
|
132
197
|
gen_ai_tool_name = span.attributes.get("gen_ai.tool.name")
|
|
133
198
|
if gen_ai_tool_name:
|
|
134
199
|
return gen_ai_tool_name
|
|
135
|
-
except Exception
|
|
200
|
+
except Exception:
|
|
136
201
|
pass
|
|
137
202
|
|
|
138
203
|
return None
|
|
@@ -145,7 +210,7 @@ def check_tool_input_parameters_from_gen_ai_attributes(
|
|
|
145
210
|
tool_arguments = span.attributes.get("tool_arguments")
|
|
146
211
|
if tool_arguments:
|
|
147
212
|
return json.loads(tool_arguments)
|
|
148
|
-
except Exception
|
|
213
|
+
except Exception:
|
|
149
214
|
pass
|
|
150
215
|
|
|
151
216
|
return None
|
|
@@ -164,7 +229,7 @@ def check_span_type_from_gen_ai_attributes(span: ReadableSpan):
|
|
|
164
229
|
|
|
165
230
|
elif gen_ai_tool_name:
|
|
166
231
|
return "tool"
|
|
167
|
-
except Exception
|
|
232
|
+
except Exception:
|
|
168
233
|
pass
|
|
169
234
|
|
|
170
235
|
return "base"
|
|
@@ -175,7 +240,7 @@ def check_model_from_gen_ai_attributes(span: ReadableSpan):
|
|
|
175
240
|
gen_ai_request_model_name = span.attributes.get("gen_ai.request.model")
|
|
176
241
|
if gen_ai_request_model_name:
|
|
177
242
|
return gen_ai_request_model_name
|
|
178
|
-
except Exception
|
|
243
|
+
except Exception:
|
|
179
244
|
pass
|
|
180
245
|
|
|
181
246
|
return None
|
|
@@ -226,7 +291,7 @@ def prepare_trace_llm_test_case(span: ReadableSpan) -> Optional[LLMTestCase]:
|
|
|
226
291
|
tools_called.append(
|
|
227
292
|
ToolCall.model_validate_json(tool_call_json_str)
|
|
228
293
|
)
|
|
229
|
-
except Exception
|
|
294
|
+
except Exception:
|
|
230
295
|
pass
|
|
231
296
|
|
|
232
297
|
_expected_tools = span.attributes.get(
|
|
@@ -239,7 +304,7 @@ def prepare_trace_llm_test_case(span: ReadableSpan) -> Optional[LLMTestCase]:
|
|
|
239
304
|
expected_tools.append(
|
|
240
305
|
ToolCall.model_validate_json(tool_call_json_str)
|
|
241
306
|
)
|
|
242
|
-
except Exception
|
|
307
|
+
except Exception:
|
|
243
308
|
pass
|
|
244
309
|
|
|
245
310
|
test_case.tools_called = tools_called
|
|
@@ -268,12 +333,6 @@ def parse_list_of_strings(context: List[str]) -> List[str]:
|
|
|
268
333
|
return parsed_context
|
|
269
334
|
|
|
270
335
|
|
|
271
|
-
from deepeval.evaluate.utils import create_api_test_case
|
|
272
|
-
from deepeval.test_run.api import LLMApiTestCase
|
|
273
|
-
from deepeval.test_run.test_run import global_test_run_manager
|
|
274
|
-
from typing import Optional
|
|
275
|
-
|
|
276
|
-
|
|
277
336
|
def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
|
|
278
337
|
# Accept single trace or list of traces
|
|
279
338
|
if isinstance(traces, Trace):
|
|
@@ -374,13 +433,15 @@ def check_pydantic_ai_agent_input_output(
|
|
|
374
433
|
except Exception:
|
|
375
434
|
pass
|
|
376
435
|
|
|
436
|
+
input_val = _flatten_input(input_val)
|
|
437
|
+
output_val = _flatten_input(output_val)
|
|
377
438
|
return input_val, output_val
|
|
378
439
|
|
|
379
440
|
|
|
380
441
|
def check_tool_output(span: ReadableSpan):
|
|
381
442
|
try:
|
|
382
443
|
return span.attributes.get("tool_response")
|
|
383
|
-
except Exception
|
|
444
|
+
except Exception:
|
|
384
445
|
pass
|
|
385
446
|
return None
|
|
386
447
|
|
deepeval/tracing/tracing.py
CHANGED
|
@@ -208,7 +208,13 @@ class TraceManager:
|
|
|
208
208
|
else:
|
|
209
209
|
# print(f"Ending trace: {trace.root_spans}")
|
|
210
210
|
self.environment = Environment.TESTING
|
|
211
|
-
|
|
211
|
+
if (
|
|
212
|
+
trace.root_spans
|
|
213
|
+
and len(trace.root_spans) > 0
|
|
214
|
+
and trace.root_spans[0].children
|
|
215
|
+
and len(trace.root_spans[0].children) > 0
|
|
216
|
+
):
|
|
217
|
+
trace.root_spans = [trace.root_spans[0].children[0]]
|
|
212
218
|
for root_span in trace.root_spans:
|
|
213
219
|
root_span.parent_uuid = None
|
|
214
220
|
|
deepeval/tracing/utils.py
CHANGED
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import time
|
|
3
2
|
import inspect
|
|
4
3
|
import json
|
|
5
4
|
import sys
|
|
6
|
-
import difflib
|
|
7
5
|
from datetime import datetime, timezone
|
|
8
6
|
from enum import Enum
|
|
9
7
|
from time import perf_counter
|
|
10
|
-
import time
|
|
11
8
|
from collections import deque
|
|
12
|
-
from typing import Any, Dict, Optional
|
|
9
|
+
from typing import Any, Dict, Optional
|
|
13
10
|
|
|
14
11
|
from deepeval.constants import CONFIDENT_TRACING_ENABLED
|
|
15
12
|
|
|
@@ -100,6 +97,60 @@ def make_json_serializable(obj):
|
|
|
100
97
|
return _serialize(obj)
|
|
101
98
|
|
|
102
99
|
|
|
100
|
+
def make_json_serializable_for_metadata(obj):
|
|
101
|
+
"""
|
|
102
|
+
Recursively converts an object to a JSON‐serializable form,
|
|
103
|
+
replacing circular references with "<circular>".
|
|
104
|
+
"""
|
|
105
|
+
seen = set() # Store `id` of objects we've visited
|
|
106
|
+
|
|
107
|
+
def _serialize(o):
|
|
108
|
+
oid = id(o)
|
|
109
|
+
|
|
110
|
+
# strip Nulls
|
|
111
|
+
if isinstance(o, str):
|
|
112
|
+
return _strip_nul(o)
|
|
113
|
+
|
|
114
|
+
# Primitive types are already serializable
|
|
115
|
+
if isinstance(o, (str, int, float, bool)) or o is None:
|
|
116
|
+
return str(o)
|
|
117
|
+
|
|
118
|
+
# Detect circular reference
|
|
119
|
+
if oid in seen:
|
|
120
|
+
return "<circular>"
|
|
121
|
+
|
|
122
|
+
# Mark current object as seen
|
|
123
|
+
seen.add(oid)
|
|
124
|
+
|
|
125
|
+
# Handle containers
|
|
126
|
+
if isinstance(o, (list, tuple, set, deque)): # TODO: check if more
|
|
127
|
+
serialized = []
|
|
128
|
+
for item in o:
|
|
129
|
+
serialized.append(_serialize(item))
|
|
130
|
+
|
|
131
|
+
return serialized
|
|
132
|
+
|
|
133
|
+
if isinstance(o, dict):
|
|
134
|
+
result = {}
|
|
135
|
+
for key, value in o.items():
|
|
136
|
+
# Convert key to string (JSON only allows string keys)
|
|
137
|
+
result[str(key)] = _serialize(value)
|
|
138
|
+
return result
|
|
139
|
+
|
|
140
|
+
# Handle objects with __dict__
|
|
141
|
+
if hasattr(o, "__dict__"):
|
|
142
|
+
result = {}
|
|
143
|
+
for key, value in vars(o).items():
|
|
144
|
+
if not key.startswith("_"):
|
|
145
|
+
result[key] = _serialize(value)
|
|
146
|
+
return result
|
|
147
|
+
|
|
148
|
+
# Fallback: convert to string
|
|
149
|
+
return _strip_nul(str(o))
|
|
150
|
+
|
|
151
|
+
return _serialize(obj)
|
|
152
|
+
|
|
153
|
+
|
|
103
154
|
def to_zod_compatible_iso(
|
|
104
155
|
dt: datetime, microsecond_precision: bool = False
|
|
105
156
|
) -> str:
|
|
@@ -135,8 +186,8 @@ def perf_counter_to_datetime(perf_counter_value: float) -> datetime:
|
|
|
135
186
|
def replace_self_with_class_name(obj):
|
|
136
187
|
try:
|
|
137
188
|
return f"<{obj.__class__.__name__}>"
|
|
138
|
-
except:
|
|
139
|
-
return
|
|
189
|
+
except Exception:
|
|
190
|
+
return "<self>"
|
|
140
191
|
|
|
141
192
|
|
|
142
193
|
def get_deepeval_trace_mode() -> Optional[str]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.6.
|
|
3
|
+
Version: 3.6.2
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -359,7 +359,7 @@ for golden in dataset.goldens:
|
|
|
359
359
|
|
|
360
360
|
@pytest.mark.parametrize(
|
|
361
361
|
"test_case",
|
|
362
|
-
dataset,
|
|
362
|
+
dataset.test_cases,
|
|
363
363
|
)
|
|
364
364
|
def test_customer_chatbot(test_case: LLMTestCase):
|
|
365
365
|
answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
|
|
2
|
-
deepeval/_version.py,sha256=
|
|
2
|
+
deepeval/_version.py,sha256=3BMVt8jAt3lUkzkZWaFVDhhP9a-3lhvDGzjhGKNfjCo,27
|
|
3
3
|
deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
|
|
4
4
|
deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
|
|
5
5
|
deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
|
|
@@ -159,7 +159,7 @@ deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
|
|
|
159
159
|
deepeval/evaluate/compare.py,sha256=tdSJY4E7YJ_zO3dzvpwngZHLiUI2YQcTWJOLI83htsQ,9855
|
|
160
160
|
deepeval/evaluate/configs.py,sha256=QfWjaWNxLsgEe8-5j4PIs5WcSyEckiWt0qdpXSpl57M,928
|
|
161
161
|
deepeval/evaluate/evaluate.py,sha256=NPAJ2iJqJI_RurXKUIC0tft_ozYMIKwZf5iPfmnNhQc,10412
|
|
162
|
-
deepeval/evaluate/execute.py,sha256=
|
|
162
|
+
deepeval/evaluate/execute.py,sha256=XS0XtDGKC1ZOo09lthillfi5aDI5TWFbJ-Y7yICNvGo,89056
|
|
163
163
|
deepeval/evaluate/types.py,sha256=IGZ3Xsj0UecPI3JNeTpJaK1gDvlepokfCmHwtItIW9M,831
|
|
164
164
|
deepeval/evaluate/utils.py,sha256=kkliSGzuICeUsXDtlMMPfN95dUKlqarNhfciSffd4gI,23143
|
|
165
165
|
deepeval/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -245,7 +245,7 @@ deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9Zjvx
|
|
|
245
245
|
deepeval/metrics/g_eval/template.py,sha256=mHj4-mr_HQwbCjpHg7lM_6UesoSatL3g8UGGQAOdT0U,4509
|
|
246
246
|
deepeval/metrics/g_eval/utils.py,sha256=uUT86jRXVYvLDzcnZvvfWssDyGoBHb66nWcJSg4i1u4,8784
|
|
247
247
|
deepeval/metrics/hallucination/__init__.py,sha256=rCVlHi2UGzDKmZKi0esFLafmshVBx2WZ0jiIb-KqcYQ,44
|
|
248
|
-
deepeval/metrics/hallucination/hallucination.py,sha256=
|
|
248
|
+
deepeval/metrics/hallucination/hallucination.py,sha256=8JN5pj5YWRtl7rgbbFQF6EVBCGm1NV9vaX3_5tScNs4,9548
|
|
249
249
|
deepeval/metrics/hallucination/schema.py,sha256=V8xbrBLMwJfre-lPuDc7rMEdhHf_1hfgoW1jE_ULvAY,286
|
|
250
250
|
deepeval/metrics/hallucination/template.py,sha256=hiss1soxSBFqzOt0KmHZdZUzoQsmXnslDyb8HsjALPs,2620
|
|
251
251
|
deepeval/metrics/indicator.py,sha256=oewo_n5Qet9Zfzo2QQs-EQ8w92siuyDCAmoTZW45ndc,10244
|
|
@@ -348,7 +348,7 @@ deepeval/metrics/task_completion/schema.py,sha256=JfnZkbCh7skWvrESy65GEo6Rvo0FDJ
|
|
|
348
348
|
deepeval/metrics/task_completion/task_completion.py,sha256=RKFkXCVOhO70I8A16zv5BCaV3QVKldNxawJ0T93U_Zc,8978
|
|
349
349
|
deepeval/metrics/task_completion/template.py,sha256=4xjTBcGrPQxInbf8iwJOZyok9SQex1aCkbxKmfkXoA4,10437
|
|
350
350
|
deepeval/metrics/tool_correctness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
|
-
deepeval/metrics/tool_correctness/tool_correctness.py,sha256=
|
|
351
|
+
deepeval/metrics/tool_correctness/tool_correctness.py,sha256=4dS8o5pD2o9W2uDb-lFgulHpLI5kFhAlguWlffIreUU,11993
|
|
352
352
|
deepeval/metrics/toxicity/__init__.py,sha256=1lgt8BKxfBDd7bfSLu_5kMzmsr9b2_ahPK9oq5zLkMk,39
|
|
353
353
|
deepeval/metrics/toxicity/schema.py,sha256=7uUdzXqTvIIz5nfahlllo_fzVRXg7UeMeXn7Hl32pKY,459
|
|
354
354
|
deepeval/metrics/toxicity/template.py,sha256=zl4y4Tg9gXkxKJ8aXVwj0cJ94pvfYuP7MTeV3dvB5yQ,5045
|
|
@@ -396,7 +396,7 @@ deepeval/openai/patch.py,sha256=tPDqXaBScBJveM9P5xLT_mVwkubw0bOey-efvdjZIfg,7466
|
|
|
396
396
|
deepeval/openai/utils.py,sha256=-84VZGUsnzRkYAFWc_DGaGuQTDCUItk0VtUTdjtSxg4,2748
|
|
397
397
|
deepeval/openai_agents/__init__.py,sha256=F4c6MtsdV7LWj0YamQcMGs4_u5sOYZJXWOQP8kV5xUg,314
|
|
398
398
|
deepeval/openai_agents/agent.py,sha256=_SQdd0JzZK-ZvpP7yPEi22Y7fVk16PC00ROahdDQdCQ,951
|
|
399
|
-
deepeval/openai_agents/callback_handler.py,sha256=
|
|
399
|
+
deepeval/openai_agents/callback_handler.py,sha256=4Tt2OAGfYd35C5LBMekxz0SDivYryKGm3lxls1WT7cY,4842
|
|
400
400
|
deepeval/openai_agents/extractors.py,sha256=jcV-IeWLIh64astJRy_dRBAbUOIab1vp0Wzda7AgVyk,13963
|
|
401
401
|
deepeval/openai_agents/patch.py,sha256=MNvbGe5NLq0rC7L-7lnqcxKhclQvLuBKZnZyAifSHLY,10241
|
|
402
402
|
deepeval/openai_agents/runner.py,sha256=WtHuzhYHgC571uJYGjbTz3R23VaKnlKybGJSRCxM9pY,12310
|
|
@@ -434,7 +434,7 @@ deepeval/telemetry.py,sha256=JPZw1VBJ5dGiS8k-dzWs5OhMbNlr65QgVretTy33WCg,21704
|
|
|
434
434
|
deepeval/test_case/__init__.py,sha256=hLkHxGH0-FFhx4MlJwIbzNHL4pgyLGquh8l0qD-z_cQ,731
|
|
435
435
|
deepeval/test_case/arena_test_case.py,sha256=PcfDxadlc4yW4AEDdvN32AeUpx2Sms1jvnbX31Xu65o,957
|
|
436
436
|
deepeval/test_case/conversational_test_case.py,sha256=lF0V1yCGCInQetggm2wbXx-MkuMRs2ScwqIXCSwb1Fs,7534
|
|
437
|
-
deepeval/test_case/llm_test_case.py,sha256=
|
|
437
|
+
deepeval/test_case/llm_test_case.py,sha256=L-dCvJ4pMPPavZTyN9ZKN30h351DWI_TunmXfHPIjig,12180
|
|
438
438
|
deepeval/test_case/mcp.py,sha256=Z625NLvz0E_UJpbyfyuAi_4nsqKH6DByBf0rfKd70xU,1879
|
|
439
439
|
deepeval/test_case/mllm_test_case.py,sha256=8a0YoE72geX_fLI6yk_cObSxCPddwW-DOb-5OPE1-W8,5414
|
|
440
440
|
deepeval/test_case/utils.py,sha256=5lT7QmhItsQHt44-qQfspuktilcrEyvl2cS0cgUJxds,809
|
|
@@ -453,16 +453,16 @@ deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHs
|
|
|
453
453
|
deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7Xsw5rl4RTVFg,1981
|
|
454
454
|
deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
|
|
455
455
|
deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
|
|
456
|
-
deepeval/tracing/otel/exporter.py,sha256=
|
|
457
|
-
deepeval/tracing/otel/utils.py,sha256=
|
|
456
|
+
deepeval/tracing/otel/exporter.py,sha256=wPO1ITKpjueLOSNLO6nD2QL9LAd8Xcu6en8hRkB61Wo,28891
|
|
457
|
+
deepeval/tracing/otel/utils.py,sha256=4FqCwOi-iYhuQ3GhAkbbmXbfhvSLGj9DAdfPCrUIccs,14738
|
|
458
458
|
deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
|
|
459
459
|
deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
|
|
460
|
-
deepeval/tracing/tracing.py,sha256=
|
|
460
|
+
deepeval/tracing/tracing.py,sha256=xZEyuxdGY259nQaDkGp_qO7Avriv8hrf4L15ZfeMNV8,42728
|
|
461
461
|
deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
|
|
462
|
-
deepeval/tracing/utils.py,sha256=
|
|
462
|
+
deepeval/tracing/utils.py,sha256=SLnks8apGlrV6uVnvFVl2mWYABEkvXbPXnQvq3KaU_o,7943
|
|
463
463
|
deepeval/utils.py,sha256=-_o3W892u7naX4Y7a8if4mP0Rtkgtapg6Krr1ZBpj0o,17197
|
|
464
|
-
deepeval-3.6.
|
|
465
|
-
deepeval-3.6.
|
|
466
|
-
deepeval-3.6.
|
|
467
|
-
deepeval-3.6.
|
|
468
|
-
deepeval-3.6.
|
|
464
|
+
deepeval-3.6.2.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
|
|
465
|
+
deepeval-3.6.2.dist-info/METADATA,sha256=TZ7FbJUYYZ1w2P-qmLZdIHB0zv4TnZ4VeLBgN9Bq6Yo,18754
|
|
466
|
+
deepeval-3.6.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
467
|
+
deepeval-3.6.2.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
|
|
468
|
+
deepeval-3.6.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|